Commit 2739b28126a5621b1d03d7eb40dfa4f433d45579

Authored by tangwang
1 parent d7d48f52

多语言索引调整

api/result_formatter.py
... ... @@ -13,7 +13,7 @@ class ResultFormatter:
13 13 def format_search_results(
14 14 es_hits: List[Dict[str, Any]],
15 15 max_score: float = 1.0,
16   - language: str = "zh",
  16 + language: str = "en",
17 17 sku_filter_dimension: Optional[List[str]] = None
18 18 ) -> List[SpuResult]:
19 19 """
... ... @@ -27,24 +27,17 @@ class ResultFormatter:
27 27 List of SpuResult objects
28 28 """
29 29 results = []
30   - lang = (language or "zh").lower()
  30 + lang = (language or "en").lower()
31 31 if lang not in ("zh", "en"):
32 32 lang = "en"
33 33  
34 34 def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
35   - """
36   - 从多语言对象字段中按语言选择一个值:
37   - - 新结构: {base: {"zh": "...", "en": "...", ...}}
38   - - 兼容旧结构: {base_zh: "...", base_en: "..."}
39   - 若目标语言缺失则回退到另一种。
40   - """
  35 + """从多语言对象字段中按语言选择一个值:{base: {"zh": "...", "en": "...", ...}}"""
41 36 obj = src.get(base)
42   - if isinstance(obj, dict):
43   - zh_val = obj.get("zh")
44   - en_val = obj.get("en")
45   - else:
46   - zh_val = src.get(f"{base}_zh")
47   - en_val = src.get(f"{base}_en")
  37 + if not isinstance(obj, dict):
  38 + return None
  39 + zh_val = obj.get("zh")
  40 + en_val = obj.get("en")
48 41 if lang == "zh":
49 42 return zh_val or en_val
50 43 return en_val or zh_val
... ... @@ -319,12 +312,10 @@ class ResultFormatter:
319 312 is_selected = (name, value_bucket['key']) in selected_specs
320 313  
321 314 # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量)
322   - # 如果没有 product_count(兼容旧格式),回退到 doc_count
323 315 product_count_agg = value_bucket.get('product_count', {})
324 316 if product_count_agg and 'doc_count' in product_count_agg:
325 317 count = product_count_agg['doc_count']
326 318 else:
327   - # 回退到 doc_count(兼容旧格式,但这不是我们想要的计数方式)
328 319 count = value_bucket.get('doc_count', 0)
329 320  
330 321 value = FacetValue(
... ...
config/config.yaml
... ... @@ -83,7 +83,7 @@ query_config:
83 83 supported_languages:
84 84 - "zh"
85 85 - "en"
86   - default_language: "zh"
  86 + default_language: "en"
87 87  
88 88 # 功能开关(翻译开关由tenant_config控制)
89 89 enable_text_embedding: true
... ... @@ -153,7 +153,7 @@ spu_config:
153 153 tenant_config:
154 154 # 默认配置(未配置的租户使用此配置)
155 155 default:
156   - primary_language: "zh"
  156 + primary_language: "en"
157 157 translate_to_en: true
158 158 translate_to_zh: false
159 159 # 租户特定配置
... ...
config/config_loader.py
... ... @@ -29,7 +29,7 @@ class IndexConfig:
29 29 class QueryConfig:
30 30 """Configuration for query processing."""
31 31 supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"])
32   - default_language: str = "zh"
  32 + default_language: str = "en"
33 33  
34 34 # Feature flags
35 35 enable_text_embedding: bool = True
... ... @@ -231,7 +231,7 @@ class ConfigLoader:
231 231  
232 232 query_config = QueryConfig(
233 233 supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
234   - default_language=query_config_data.get("default_language") or "zh",
  234 + default_language=query_config_data.get("default_language") or "en",
235 235 enable_text_embedding=query_config_data.get("enable_text_embedding", True),
236 236 enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
237 237 rewrite_dictionary=rewrite_dictionary,
... ...
config/tenant_config_loader.py
... ... @@ -39,7 +39,7 @@ class TenantConfigLoader:
39 39 # 返回默认配置
40 40 self._config = {
41 41 "default": {
42   - "primary_language": "zh",
  42 + "primary_language": "en",
43 43 "translate_to_en": True,
44 44 "translate_to_zh": False
45 45 },
... ... @@ -66,7 +66,7 @@ class TenantConfigLoader:
66 66 else:
67 67 logger.debug(f"Tenant {tenant_id} not found in config, using default")
68 68 return config.get("default", {
69   - "primary_language": "zh",
  69 + "primary_language": "en",
70 70 "translate_to_en": True,
71 71 "translate_to_zh": False
72 72 })
... ...
indexer/document_transformer.py
... ... @@ -87,7 +87,7 @@ class SPUDocumentTransformer:
87 87 logger.error(f"SPU {spu_id} has no title, this may cause search issues")
88 88  
89 89 # 获取租户配置
90   - primary_lang = self.tenant_config.get('primary_language', 'zh')
  90 + primary_lang = self.tenant_config.get('primary_language', 'en')
91 91  
92 92 # 文本字段处理(使用translator的内部逻辑自动处理多语言翻译)
93 93 self._fill_text_fields(doc, spu_row, primary_lang)
... ... @@ -283,7 +283,7 @@ class SPUDocumentTransformer:
283 283 # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目)
284 284 # - 仅记录错误日志,不阻塞索引流程
285 285  
286   - primary_lang = self.tenant_config.get('primary_language', 'zh')
  286 + primary_lang = self.tenant_config.get('primary_language', 'en')
287 287  
288 288 if pd.notna(spu_row.get('category_path')):
289 289 category_path = str(spu_row['category_path'])
... ...
search/es_query_builder.py
... ... @@ -26,7 +26,7 @@ class ESQueryBuilder:
26 26 source_fields: Optional[List[str]] = None,
27 27 function_score_config: Optional[FunctionScoreConfig] = None,
28 28 enable_multilang_search: bool = True,
29   - default_language: str = "zh",
  29 + default_language: str = "en",
30 30 knn_boost: float = 0.25
31 31 ):
32 32 """
... ...
search/searcher.py
... ... @@ -135,7 +135,7 @@ class Searcher:
135 135 sort_by: Optional[str] = None,
136 136 sort_order: Optional[str] = "desc",
137 137 debug: bool = False,
138   - language: str = "zh",
  138 + language: str = "en",
139 139 sku_filter_dimension: Optional[List[str]] = None,
140 140 ) -> SearchResult:
141 141 """
... ... @@ -275,7 +275,7 @@ class Searcher:
275 275 try:
276 276 # Generate tenant-specific index name
277 277 index_name = get_tenant_index_name(tenant_id)
278   - index_name = "search_products"
  278 + # index_name = "search_products"
279 279  
280 280 # No longer need to add tenant_id to filters since each tenant has its own index
281 281  
... ... @@ -556,7 +556,7 @@ class Searcher:
556 556 formatted_results = ResultFormatter.format_search_results(
557 557 es_hits,
558 558 max_score,
559   - language="zh", # Default language for image search
  559 + language="en", # Default language for image search
560 560 sku_filter_dimension=None # Image search doesn't support SKU filtering
561 561 )
562 562  
... ...