Commit 2739b28126a5621b1d03d7eb40dfa4f433d45579
1 parent
d7d48f52
多语言索引调整
Showing
7 changed files
with
19 additions
and
28 deletions
Show diff stats
api/result_formatter.py
| @@ -13,7 +13,7 @@ class ResultFormatter: | @@ -13,7 +13,7 @@ class ResultFormatter: | ||
| 13 | def format_search_results( | 13 | def format_search_results( |
| 14 | es_hits: List[Dict[str, Any]], | 14 | es_hits: List[Dict[str, Any]], |
| 15 | max_score: float = 1.0, | 15 | max_score: float = 1.0, |
| 16 | - language: str = "zh", | 16 | + language: str = "en", |
| 17 | sku_filter_dimension: Optional[List[str]] = None | 17 | sku_filter_dimension: Optional[List[str]] = None |
| 18 | ) -> List[SpuResult]: | 18 | ) -> List[SpuResult]: |
| 19 | """ | 19 | """ |
| @@ -27,24 +27,17 @@ class ResultFormatter: | @@ -27,24 +27,17 @@ class ResultFormatter: | ||
| 27 | List of SpuResult objects | 27 | List of SpuResult objects |
| 28 | """ | 28 | """ |
| 29 | results = [] | 29 | results = [] |
| 30 | - lang = (language or "zh").lower() | 30 | + lang = (language or "en").lower() |
| 31 | if lang not in ("zh", "en"): | 31 | if lang not in ("zh", "en"): |
| 32 | lang = "en" | 32 | lang = "en" |
| 33 | 33 | ||
| 34 | def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]: | 34 | def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]: |
| 35 | - """ | ||
| 36 | - 从多语言对象字段中按语言选择一个值: | ||
| 37 | - - 新结构: {base: {"zh": "...", "en": "...", ...}} | ||
| 38 | - - 兼容旧结构: {base_zh: "...", base_en: "..."} | ||
| 39 | - 若目标语言缺失则回退到另一种。 | ||
| 40 | - """ | 35 | + """从多语言对象字段中按语言选择一个值:{base: {"zh": "...", "en": "...", ...}}""" |
| 41 | obj = src.get(base) | 36 | obj = src.get(base) |
| 42 | - if isinstance(obj, dict): | ||
| 43 | - zh_val = obj.get("zh") | ||
| 44 | - en_val = obj.get("en") | ||
| 45 | - else: | ||
| 46 | - zh_val = src.get(f"{base}_zh") | ||
| 47 | - en_val = src.get(f"{base}_en") | 37 | + if not isinstance(obj, dict): |
| 38 | + return None | ||
| 39 | + zh_val = obj.get("zh") | ||
| 40 | + en_val = obj.get("en") | ||
| 48 | if lang == "zh": | 41 | if lang == "zh": |
| 49 | return zh_val or en_val | 42 | return zh_val or en_val |
| 50 | return en_val or zh_val | 43 | return en_val or zh_val |
| @@ -319,12 +312,10 @@ class ResultFormatter: | @@ -319,12 +312,10 @@ class ResultFormatter: | ||
| 319 | is_selected = (name, value_bucket['key']) in selected_specs | 312 | is_selected = (name, value_bucket['key']) in selected_specs |
| 320 | 313 | ||
| 321 | # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量) | 314 | # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量) |
| 322 | - # 如果没有 product_count(兼容旧格式),回退到 doc_count | ||
| 323 | product_count_agg = value_bucket.get('product_count', {}) | 315 | product_count_agg = value_bucket.get('product_count', {}) |
| 324 | if product_count_agg and 'doc_count' in product_count_agg: | 316 | if product_count_agg and 'doc_count' in product_count_agg: |
| 325 | count = product_count_agg['doc_count'] | 317 | count = product_count_agg['doc_count'] |
| 326 | else: | 318 | else: |
| 327 | - # 回退到 doc_count(兼容旧格式,但这不是我们想要的计数方式) | ||
| 328 | count = value_bucket.get('doc_count', 0) | 319 | count = value_bucket.get('doc_count', 0) |
| 329 | 320 | ||
| 330 | value = FacetValue( | 321 | value = FacetValue( |
config/config.yaml
| @@ -83,7 +83,7 @@ query_config: | @@ -83,7 +83,7 @@ query_config: | ||
| 83 | supported_languages: | 83 | supported_languages: |
| 84 | - "zh" | 84 | - "zh" |
| 85 | - "en" | 85 | - "en" |
| 86 | - default_language: "zh" | 86 | + default_language: "en" |
| 87 | 87 | ||
| 88 | # 功能开关(翻译开关由tenant_config控制) | 88 | # 功能开关(翻译开关由tenant_config控制) |
| 89 | enable_text_embedding: true | 89 | enable_text_embedding: true |
| @@ -153,7 +153,7 @@ spu_config: | @@ -153,7 +153,7 @@ spu_config: | ||
| 153 | tenant_config: | 153 | tenant_config: |
| 154 | # 默认配置(未配置的租户使用此配置) | 154 | # 默认配置(未配置的租户使用此配置) |
| 155 | default: | 155 | default: |
| 156 | - primary_language: "zh" | 156 | + primary_language: "en" |
| 157 | translate_to_en: true | 157 | translate_to_en: true |
| 158 | translate_to_zh: false | 158 | translate_to_zh: false |
| 159 | # 租户特定配置 | 159 | # 租户特定配置 |
config/config_loader.py
| @@ -29,7 +29,7 @@ class IndexConfig: | @@ -29,7 +29,7 @@ class IndexConfig: | ||
| 29 | class QueryConfig: | 29 | class QueryConfig: |
| 30 | """Configuration for query processing.""" | 30 | """Configuration for query processing.""" |
| 31 | supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"]) | 31 | supported_languages: List[str] = field(default_factory=lambda: ["zh", "en"]) |
| 32 | - default_language: str = "zh" | 32 | + default_language: str = "en" |
| 33 | 33 | ||
| 34 | # Feature flags | 34 | # Feature flags |
| 35 | enable_text_embedding: bool = True | 35 | enable_text_embedding: bool = True |
| @@ -231,7 +231,7 @@ class ConfigLoader: | @@ -231,7 +231,7 @@ class ConfigLoader: | ||
| 231 | 231 | ||
| 232 | query_config = QueryConfig( | 232 | query_config = QueryConfig( |
| 233 | supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], | 233 | supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], |
| 234 | - default_language=query_config_data.get("default_language") or "zh", | 234 | + default_language=query_config_data.get("default_language") or "en", |
| 235 | enable_text_embedding=query_config_data.get("enable_text_embedding", True), | 235 | enable_text_embedding=query_config_data.get("enable_text_embedding", True), |
| 236 | enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), | 236 | enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), |
| 237 | rewrite_dictionary=rewrite_dictionary, | 237 | rewrite_dictionary=rewrite_dictionary, |
config/tenant_config_loader.py
| @@ -39,7 +39,7 @@ class TenantConfigLoader: | @@ -39,7 +39,7 @@ class TenantConfigLoader: | ||
| 39 | # 返回默认配置 | 39 | # 返回默认配置 |
| 40 | self._config = { | 40 | self._config = { |
| 41 | "default": { | 41 | "default": { |
| 42 | - "primary_language": "zh", | 42 | + "primary_language": "en", |
| 43 | "translate_to_en": True, | 43 | "translate_to_en": True, |
| 44 | "translate_to_zh": False | 44 | "translate_to_zh": False |
| 45 | }, | 45 | }, |
| @@ -66,7 +66,7 @@ class TenantConfigLoader: | @@ -66,7 +66,7 @@ class TenantConfigLoader: | ||
| 66 | else: | 66 | else: |
| 67 | logger.debug(f"Tenant {tenant_id} not found in config, using default") | 67 | logger.debug(f"Tenant {tenant_id} not found in config, using default") |
| 68 | return config.get("default", { | 68 | return config.get("default", { |
| 69 | - "primary_language": "zh", | 69 | + "primary_language": "en", |
| 70 | "translate_to_en": True, | 70 | "translate_to_en": True, |
| 71 | "translate_to_zh": False | 71 | "translate_to_zh": False |
| 72 | }) | 72 | }) |
indexer/document_transformer.py
| @@ -87,7 +87,7 @@ class SPUDocumentTransformer: | @@ -87,7 +87,7 @@ class SPUDocumentTransformer: | ||
| 87 | logger.error(f"SPU {spu_id} has no title, this may cause search issues") | 87 | logger.error(f"SPU {spu_id} has no title, this may cause search issues") |
| 88 | 88 | ||
| 89 | # 获取租户配置 | 89 | # 获取租户配置 |
| 90 | - primary_lang = self.tenant_config.get('primary_language', 'zh') | 90 | + primary_lang = self.tenant_config.get('primary_language', 'en') |
| 91 | 91 | ||
| 92 | # 文本字段处理(使用translator的内部逻辑自动处理多语言翻译) | 92 | # 文本字段处理(使用translator的内部逻辑自动处理多语言翻译) |
| 93 | self._fill_text_fields(doc, spu_row, primary_lang) | 93 | self._fill_text_fields(doc, spu_row, primary_lang) |
| @@ -283,7 +283,7 @@ class SPUDocumentTransformer: | @@ -283,7 +283,7 @@ class SPUDocumentTransformer: | ||
| 283 | # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) | 283 | # - 当商品的类目ID在映射中不存在时,视为“不合法类目”,整条类目相关字段都不写入(当成没有类目) |
| 284 | # - 仅记录错误日志,不阻塞索引流程 | 284 | # - 仅记录错误日志,不阻塞索引流程 |
| 285 | 285 | ||
| 286 | - primary_lang = self.tenant_config.get('primary_language', 'zh') | 286 | + primary_lang = self.tenant_config.get('primary_language', 'en') |
| 287 | 287 | ||
| 288 | if pd.notna(spu_row.get('category_path')): | 288 | if pd.notna(spu_row.get('category_path')): |
| 289 | category_path = str(spu_row['category_path']) | 289 | category_path = str(spu_row['category_path']) |
search/es_query_builder.py
| @@ -26,7 +26,7 @@ class ESQueryBuilder: | @@ -26,7 +26,7 @@ class ESQueryBuilder: | ||
| 26 | source_fields: Optional[List[str]] = None, | 26 | source_fields: Optional[List[str]] = None, |
| 27 | function_score_config: Optional[FunctionScoreConfig] = None, | 27 | function_score_config: Optional[FunctionScoreConfig] = None, |
| 28 | enable_multilang_search: bool = True, | 28 | enable_multilang_search: bool = True, |
| 29 | - default_language: str = "zh", | 29 | + default_language: str = "en", |
| 30 | knn_boost: float = 0.25 | 30 | knn_boost: float = 0.25 |
| 31 | ): | 31 | ): |
| 32 | """ | 32 | """ |
search/searcher.py
| @@ -135,7 +135,7 @@ class Searcher: | @@ -135,7 +135,7 @@ class Searcher: | ||
| 135 | sort_by: Optional[str] = None, | 135 | sort_by: Optional[str] = None, |
| 136 | sort_order: Optional[str] = "desc", | 136 | sort_order: Optional[str] = "desc", |
| 137 | debug: bool = False, | 137 | debug: bool = False, |
| 138 | - language: str = "zh", | 138 | + language: str = "en", |
| 139 | sku_filter_dimension: Optional[List[str]] = None, | 139 | sku_filter_dimension: Optional[List[str]] = None, |
| 140 | ) -> SearchResult: | 140 | ) -> SearchResult: |
| 141 | """ | 141 | """ |
| @@ -275,7 +275,7 @@ class Searcher: | @@ -275,7 +275,7 @@ class Searcher: | ||
| 275 | try: | 275 | try: |
| 276 | # Generate tenant-specific index name | 276 | # Generate tenant-specific index name |
| 277 | index_name = get_tenant_index_name(tenant_id) | 277 | index_name = get_tenant_index_name(tenant_id) |
| 278 | - index_name = "search_products" | 278 | + # index_name = "search_products" |
| 279 | 279 | ||
| 280 | # No longer need to add tenant_id to filters since each tenant has its own index | 280 | # No longer need to add tenant_id to filters since each tenant has its own index |
| 281 | 281 | ||
| @@ -556,7 +556,7 @@ class Searcher: | @@ -556,7 +556,7 @@ class Searcher: | ||
| 556 | formatted_results = ResultFormatter.format_search_results( | 556 | formatted_results = ResultFormatter.format_search_results( |
| 557 | es_hits, | 557 | es_hits, |
| 558 | max_score, | 558 | max_score, |
| 559 | - language="zh", # Default language for image search | 559 | + language="en", # Default language for image search |
| 560 | sku_filter_dimension=None # Image search doesn't support SKU filtering | 560 | sku_filter_dimension=None # Image search doesn't support SKU filtering |
| 561 | ) | 561 | ) |
| 562 | 562 |