Commit 2739b28126a5621b1d03d7eb40dfa4f433d45579

Authored by tangwang
1 parent d7d48f52

多语言索引调整

api/result_formatter.py
@@ -13,7 +13,7 @@ class ResultFormatter: @@ -13,7 +13,7 @@ class ResultFormatter:
13 def format_search_results( 13 def format_search_results(
14 es_hits: List[Dict[str, Any]], 14 es_hits: List[Dict[str, Any]],
15 max_score: float = 1.0, 15 max_score: float = 1.0,
16 - language: str = "zh", 16 + language: str = "en",
17 sku_filter_dimension: Optional[List[str]] = None 17 sku_filter_dimension: Optional[List[str]] = None
18 ) -> List[SpuResult]: 18 ) -> List[SpuResult]:
19 """ 19 """
@@ -27,24 +27,17 @@ class ResultFormatter: @@ -27,24 +27,17 @@ class ResultFormatter:
27 List of SpuResult objects 27 List of SpuResult objects
28 """ 28 """
29 results = [] 29 results = []
30 - lang = (language or "zh").lower() 30 + lang = (language or "en").lower()
31 if lang not in ("zh", "en"): 31 if lang not in ("zh", "en"):
32 lang = "en" 32 lang = "en"
33 33
34 def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]: 34 def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
35 - """  
36 - 从多语言对象字段中按语言选择一个值:  
37 - - 新结构: {base: {"zh": "...", "en": "...", ...}}  
38 - - 兼容旧结构: {base_zh: "...", base_en: "..."}  
39 - 若目标语言缺失则回退到另一种。  
40 - """ 35 + """从多语言对象字段中按语言选择一个值:{base: {"zh": "...", "en": "...", ...}}"""
41 obj = src.get(base) 36 obj = src.get(base)
42 - if isinstance(obj, dict):  
43 - zh_val = obj.get("zh")  
44 - en_val = obj.get("en")  
45 - else:  
46 - zh_val = src.get(f"{base}_zh")  
47 - en_val = src.get(f"{base}_en") 37 + if not isinstance(obj, dict):
  38 + return None
  39 + zh_val = obj.get("zh")
  40 + en_val = obj.get("en")
48 if lang == "zh": 41 if lang == "zh":
49 return zh_val or en_val 42 return zh_val or en_val
50 return en_val or zh_val 43 return en_val or zh_val
@@ -319,12 +312,10 @@ class ResultFormatter: @@ -319,12 +312,10 @@ class ResultFormatter:
319 is_selected = (name, value_bucket['key']) in selected_specs 312 is_selected = (name, value_bucket['key']) in selected_specs
320 313
321 # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量) 314 # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量)
322 - # 如果没有 product_count(兼容旧格式),回退到 doc_count  
323 product_count_agg = value_bucket.get('product_count', {}) 315 product_count_agg = value_bucket.get('product_count', {})
324 if product_count_agg and 'doc_count' in product_count_agg: 316 if product_count_agg and 'doc_count' in product_count_agg:
325 count = product_count_agg['doc_count'] 317 count = product_count_agg['doc_count']
326 else: 318 else:
327 - # 回退到 doc_count(兼容旧格式,但这不是我们想要的计数方式)  
328 count = value_bucket.get('doc_count', 0) 319 count = value_bucket.get('doc_count', 0)
329 320
330 value = FacetValue( 321 value = FacetValue(
config/config.yaml
@@ -83,7 +83,7 @@ query_config: @@ -83,7 +83,7 @@ query_config:
83 supported_languages: 83 supported_languages:
84 - "zh" 84 - "zh"
85 - "en" 85 - "en"
86 - default_language: "zh" 86 + default_language: "en"
87 87
88 # 功能开关(翻译开关由tenant_config控制) 88 # 功能开关(翻译开关由tenant_config控制)
89 enable_text_embedding: true 89 enable_text_embedding: true
@@ -153,7 +153,7 @@ spu_config: @@ -153,7 +153,7 @@ spu_config:
153 tenant_config: 153 tenant_config:
154 # 默认配置(未配置的租户使用此配置) 154 # 默认配置(未配置的租户使用此配置)
155 default: 155 default:
156 - primary_language: "zh" 156 + primary_language: "en"
157 translate_to_en: true 157 translate_to_en: true
158 translate_to_zh: false 158 translate_to_zh: false
159 # 租户特定配置 159 # 租户特定配置
config/config_loader.py
@@ -29,7 +29,7 @@ class IndexConfig: @@ -29,7 +29,7 @@ class IndexConfig:
29 class QueryConfig: 29 class QueryConfig:
30 """Configuration for query processing.""" 30 """Configuration for query processing."""
31 supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"]) 31 supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"])
32 - default_language: str = "zh" 32 + default_language: str = "en"
33 33
34 # Feature flags 34 # Feature flags
35 enable_text_embedding: bool = True 35 enable_text_embedding: bool = True
@@ -231,7 +231,7 @@ class ConfigLoader: @@ -231,7 +231,7 @@ class ConfigLoader:
231 231
232 query_config = QueryConfig( 232 query_config = QueryConfig(
233 supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], 233 supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
234 - default_language=query_config_data.get("default_language") or "zh", 234 + default_language=query_config_data.get("default_language") or "en",
235 enable_text_embedding=query_config_data.get("enable_text_embedding", True), 235 enable_text_embedding=query_config_data.get("enable_text_embedding", True),
236 enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), 236 enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
237 rewrite_dictionary=rewrite_dictionary, 237 rewrite_dictionary=rewrite_dictionary,
config/tenant_config_loader.py
@@ -39,7 +39,7 @@ class TenantConfigLoader: @@ -39,7 +39,7 @@ class TenantConfigLoader:
39 # 返回默认配置 39 # 返回默认配置
40 self._config = { 40 self._config = {
41 "default": { 41 "default": {
42 - "primary_language": "zh", 42 + "primary_language": "en",
43 "translate_to_en": True, 43 "translate_to_en": True,
44 "translate_to_zh": False 44 "translate_to_zh": False
45 }, 45 },
@@ -66,7 +66,7 @@ class TenantConfigLoader: @@ -66,7 +66,7 @@ class TenantConfigLoader:
66 else: 66 else:
67 logger.debug(f"Tenant {tenant_id} not found in config, using default") 67 logger.debug(f"Tenant {tenant_id} not found in config, using default")
68 return config.get("default", { 68 return config.get("default", {
69 - "primary_language": "zh", 69 + "primary_language": "en",
70 "translate_to_en": True, 70 "translate_to_en": True,
71 "translate_to_zh": False 71 "translate_to_zh": False
72 }) 72 })
indexer/document_transformer.py
@@ -87,7 +87,7 @@ class SPUDocumentTransformer: @@ -87,7 +87,7 @@ class SPUDocumentTransformer:
87 logger.error(f"SPU {spu_id} has no title, this may cause search issues") 87 logger.error(f"SPU {spu_id} has no title, this may cause search issues")
88 88
89 # 获取租户配置 89 # 获取租户配置
90 - primary_lang = self.tenant_config.get('primary_language', 'zh') 90 + primary_lang = self.tenant_config.get('primary_language', 'en')
91 91
92 # 文本字段处理(使用translator的内部逻辑自动处理多语言翻译) 92 # 文本字段处理(使用translator的内部逻辑自动处理多语言翻译)
93 self._fill_text_fields(doc, spu_row, primary_lang) 93 self._fill_text_fields(doc, spu_row, primary_lang)
@@ -283,7 +283,7 @@ class SPUDocumentTransformer: @@ -283,7 +283,7 @@ class SPUDocumentTransformer:
283 # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) 283 # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目)
284 # - 仅记录错误日志,不阻塞索引流程 284 # - 仅记录错误日志,不阻塞索引流程
285 285
286 - primary_lang = self.tenant_config.get('primary_language', 'zh') 286 + primary_lang = self.tenant_config.get('primary_language', 'en')
287 287
288 if pd.notna(spu_row.get('category_path')): 288 if pd.notna(spu_row.get('category_path')):
289 category_path = str(spu_row['category_path']) 289 category_path = str(spu_row['category_path'])
search/es_query_builder.py
@@ -26,7 +26,7 @@ class ESQueryBuilder: @@ -26,7 +26,7 @@ class ESQueryBuilder:
26 source_fields: Optional[List[str]] = None, 26 source_fields: Optional[List[str]] = None,
27 function_score_config: Optional[FunctionScoreConfig] = None, 27 function_score_config: Optional[FunctionScoreConfig] = None,
28 enable_multilang_search: bool = True, 28 enable_multilang_search: bool = True,
29 - default_language: str = "zh", 29 + default_language: str = "en",
30 knn_boost: float = 0.25 30 knn_boost: float = 0.25
31 ): 31 ):
32 """ 32 """
search/searcher.py
@@ -135,7 +135,7 @@ class Searcher: @@ -135,7 +135,7 @@ class Searcher:
135 sort_by: Optional[str] = None, 135 sort_by: Optional[str] = None,
136 sort_order: Optional[str] = "desc", 136 sort_order: Optional[str] = "desc",
137 debug: bool = False, 137 debug: bool = False,
138 - language: str = "zh", 138 + language: str = "en",
139 sku_filter_dimension: Optional[List[str]] = None, 139 sku_filter_dimension: Optional[List[str]] = None,
140 ) -> SearchResult: 140 ) -> SearchResult:
141 """ 141 """
@@ -275,7 +275,7 @@ class Searcher: @@ -275,7 +275,7 @@ class Searcher:
275 try: 275 try:
276 # Generate tenant-specific index name 276 # Generate tenant-specific index name
277 index_name = get_tenant_index_name(tenant_id) 277 index_name = get_tenant_index_name(tenant_id)
278 - index_name = "search_products" 278 + # index_name = "search_products"
279 279
280 # No longer need to add tenant_id to filters since each tenant has its own index 280 # No longer need to add tenant_id to filters since each tenant has its own index
281 281
@@ -556,7 +556,7 @@ class Searcher: @@ -556,7 +556,7 @@ class Searcher:
556 formatted_results = ResultFormatter.format_search_results( 556 formatted_results = ResultFormatter.format_search_results(
557 es_hits, 557 es_hits,
558 max_score, 558 max_score,
559 - language="zh", # Default language for image search 559 + language="en", # Default language for image search
560 sku_filter_dimension=None # Image search doesn't support SKU filtering 560 sku_filter_dimension=None # Image search doesn't support SKU filtering
561 ) 561 )
562 562