""" Query configuration constants. Since all tenants share the same ES mapping, we can hardcode field lists here. """ import os from typing import Dict, List # Default index name DEFAULT_INDEX_NAME = "search_products" # Text embedding field TEXT_EMBEDDING_FIELD = "title_embedding" # Image embedding field IMAGE_EMBEDDING_FIELD = "image_embedding" # Default match fields for text search (with boost) # 文本召回:同时搜索中英文字段,两者相互补充 DEFAULT_MATCH_FIELDS = [ # 中文字段 "title_zh^3.0", "brief_zh^1.5", "description_zh^1.0", "vendor_zh^1.5", "category_path_zh^1.5", "category_name_zh^1.5", # 英文字段 "title_en^3.0", "brief_en^1.5", "description_en^1.0", "vendor_en^1.5", "category_path_en^1.5", "category_name_en^1.5", # 语言无关字段 "tags^1.0", ] # Domain-specific match fields DOMAIN_FIELDS: Dict[str, List[str]] = { "default": DEFAULT_MATCH_FIELDS, "title": ["title_zh^2.0"], "vendor": ["vendor_zh^1.5"], "category": ["category_path_zh^1.5", "category_name_zh^1.5"], "tags": ["tags^1.0"] } # Source fields to return in search results # 注意:为了在后端做多语言选择,_zh / _en 字段仍然需要从 ES 取出, # 但不会原样透出给前端,而是统一映射到 title / description / vendor 等字段。 SOURCE_FIELDS = [ # 基本标识 "tenant_id", "spu_id", "create_time", "update_time", # 多语言文本字段(仅用于后端选择,不直接返回给前端) "title_zh", "title_en", "brief_zh", "brief_en", "description_zh", "description_en", "vendor_zh", "vendor_en", "category_path_zh", "category_path_en", "category_name_zh", "category_name_en", # 语言无关字段(直接返回给前端) "tags", "image_url", "category_id", "category_name", "category_level", "category1_name", "category2_name", "category3_name", "option1_name", "option2_name", "option3_name", "min_price", "max_price", "compare_at_price", "sku_prices", "sku_weights", "sku_weight_units", "total_inventory", "skus", "specifications", ] # Query processing settings ENABLE_TRANSLATION = os.environ.get("ENABLE_TRANSLATION", "true").lower() == "true" ENABLE_TEXT_EMBEDDING = os.environ.get("ENABLE_TEXT_EMBEDDING", "true").lower() == "true" TRANSLATION_API_KEY = os.environ.get("DEEPL_API_KEY") TRANSLATION_SERVICE = "deepl" # Ranking expression (currently disabled) RANKING_EXPRESSION = "bm25() + 0.2*text_embedding_relevance()" # Function score config FUNCTION_SCORE_CONFIG = { "score_mode": "sum", "boost_mode": "multiply", "functions": [] } # Load rewrite dictionary from file if exists def load_rewrite_dictionary() -> Dict[str, str]: """Load query rewrite dictionary from file.""" rewrite_file = os.path.join( os.path.dirname(os.path.dirname(__file__)), "config", "query_rewrite.dict" ) if not os.path.exists(rewrite_file): return {} rewrite_dict = {} try: with open(rewrite_file, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line or line.startswith('#'): continue parts = line.split('\t') if len(parts) == 2: rewrite_dict[parts[0].strip()] = parts[1].strip() except Exception as e: print(f"Warning: Failed to load rewrite dictionary: {e}") return rewrite_dict REWRITE_DICTIONARY = load_rewrite_dictionary() # Default facets for faceted search # 分类分面:使用category1_name, category2_name, category3_name # specifications分面:使用嵌套聚合,按name分组,然后按value聚合 DEFAULT_FACETS = [ "category1_name", # 一级分类 "category2_name", # 二级分类 "category3_name", # 三级分类 "specifications" # 规格分面(特殊处理:嵌套聚合) ]