query_config.py 3.7 KB
"""
Query configuration constants.

Since all tenants share the same ES mapping, we can hardcode field lists here.
"""

import os
from typing import Dict, List

# Default index name
DEFAULT_INDEX_NAME = "search_products"

# Text embedding field
TEXT_EMBEDDING_FIELD = "title_embedding"

# Image embedding field
IMAGE_EMBEDDING_FIELD = "image_embedding"

# Default match fields for text search (with boost)
DEFAULT_MATCH_FIELDS = [
    "title_zh^3.0",
    "brief_zh^1.5",
    "description_zh^1.0",
    "vendor_zh^1.5",
    "tags^1.0",
    "category_path_zh^1.5",
    "category_name_zh^1.5"
]

# Domain-specific match fields
DOMAIN_FIELDS: Dict[str, List[str]] = {
    "default": DEFAULT_MATCH_FIELDS,
    "title": ["title_zh^2.0"],
    "vendor": ["vendor_zh^1.5"],
    "category": ["category_path_zh^1.5", "category_name_zh^1.5"],
    "tags": ["tags^1.0"]
}

# Source fields to return in search results
# 注意:为了在后端做多语言选择,_zh / _en 字段仍然需要从 ES 取出,
# 但不会原样透出给前端,而是统一映射到 title / description / vendor 等字段。
SOURCE_FIELDS = [
    # 基本标识
    "tenant_id",
    "spu_id",
    "create_time",
    "update_time",

    # 多语言文本字段(仅用于后端选择,不直接返回给前端)
    "title_zh",
    "title_en",
    "brief_zh",
    "brief_en",
    "description_zh",
    "description_en",
    "vendor_zh",
    "vendor_en",
    "category_path_zh",
    "category_path_en",
    "category_name_zh",
    "category_name_en",

    # 语言无关字段(直接返回给前端)
    "tags",
    "image_url",
    "category_id",
    "category_name",
    "category_level",
    "category1_name",
    "category2_name",
    "category3_name",
    "option1_name",
    "option2_name",
    "option3_name",
    "min_price",
    "max_price",
    "compare_at_price",
    "sku_prices",
    "sku_weights",
    "sku_weight_units",
    "total_inventory",
    "skus",
    "specifications",
]

# Query processing settings
ENABLE_TRANSLATION = os.environ.get("ENABLE_TRANSLATION", "true").lower() == "true"
ENABLE_TEXT_EMBEDDING = os.environ.get("ENABLE_TEXT_EMBEDDING", "true").lower() == "true"
TRANSLATION_API_KEY = os.environ.get("DEEPL_API_KEY")
TRANSLATION_SERVICE = "deepl"

# Ranking expression (currently disabled)
RANKING_EXPRESSION = "bm25() + 0.2*text_embedding_relevance()"

# Function score config
FUNCTION_SCORE_CONFIG = {
    "score_mode": "sum",
    "boost_mode": "multiply",
    "functions": []
}

# Load rewrite dictionary from file if exists
def load_rewrite_dictionary() -> Dict[str, str]:
    """Load query rewrite dictionary from file."""
    rewrite_file = os.path.join(
        os.path.dirname(os.path.dirname(__file__)),
        "config",
        "query_rewrite.dict"
    )
    
    if not os.path.exists(rewrite_file):
        return {}
    
    rewrite_dict = {}
    try:
        with open(rewrite_file, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                parts = line.split('\t')
                if len(parts) == 2:
                    rewrite_dict[parts[0].strip()] = parts[1].strip()
    except Exception as e:
        print(f"Warning: Failed to load rewrite dictionary: {e}")
    
    return rewrite_dict

REWRITE_DICTIONARY = load_rewrite_dictionary()

# Default facets for faceted search
# 分类分面:使用category1_name, category2_name, category3_name
# specifications分面:使用嵌套聚合,按name分组,然后按value聚合
DEFAULT_FACETS = [
    "category1_name",  # 一级分类
    "category2_name",  # 二级分类
    "category3_name",  # 三级分类
    "specifications"  # 规格分面(特殊处理:嵌套聚合)
]