diff --git a/config/config.yaml b/config/config.yaml index 198dad5..2bf6ddc 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -88,7 +88,6 @@ query_config: # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true - enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索) # Embedding字段名称 text_embedding_field: "title_embedding" diff --git a/config/config_loader.py b/config/config_loader.py index 6bd06af..b53f8e1 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -33,8 +33,7 @@ class QueryConfig: # Feature flags enable_text_embedding: bool = True enable_query_rewrite: bool = True - enable_multilang_search: bool = True # Enable multi-language search using translations - + # Query rewrite dictionary (loaded from external file) rewrite_dictionary: Dict[str, str] = field(default_factory=dict) diff --git a/search/es_query_builder.py b/search/es_query_builder.py index ae88d21..c767374 100644 --- a/search/es_query_builder.py +++ b/search/es_query_builder.py @@ -24,20 +24,21 @@ class ESQueryBuilder: image_embedding_field: Optional[str] = None, source_fields: Optional[List[str]] = None, function_score_config: Optional[FunctionScoreConfig] = None, - enable_multilang_search: bool = True, default_language: str = "en", knn_boost: float = 0.25 ): """ Initialize query builder. + Multi-language search (translation-based cross-language recall) is always enabled: + queries are matched against both detected-language and translated zh/en clauses. + Args: match_fields: Fields to search for text matching text_embedding_field: Field name for text embeddings image_embedding_field: Field name for image embeddings source_fields: Fields to return in search results (_source includes) function_score_config: Function score configuration - enable_multilang_search: Enable multi-language search using translations default_language: Default language to use when detection fails or returns "unknown" knn_boost: Boost value for KNN (embedding recall) """ @@ -46,7 +47,6 @@ class ESQueryBuilder: self.image_embedding_field = image_embedding_field self.source_fields = source_fields self.function_score_config = function_score_config - self.enable_multilang_search = enable_multilang_search self.default_language = default_language self.knn_boost = knn_boost @@ -488,35 +488,31 @@ class ESQueryBuilder: } }) - # 2. Translation queries - lower boost (0.4) for other languages - if self.enable_multilang_search: - if language != 'zh' and translations.get('zh'): - zh_fields, _ = self._get_match_fields('zh') - should_clauses.append({ - "multi_match": { - "query": translations['zh'], - "fields": zh_fields, - # "operator": "AND", - "minimum_should_match": "75%", - "tie_breaker": tie_breaker_base_query, - "boost": 0.4, - "_name": "base_query_trans_zh" - } - }) - - if language != 'en' and translations.get('en'): - en_fields, _ = self._get_match_fields('en') - should_clauses.append({ - "multi_match": { - "query": translations['en'], - "fields": en_fields, - # "operator": "AND", - "minimum_should_match": "75%", - "tie_breaker": tie_breaker_base_query, - "boost": 0.4, - "_name": "base_query_trans_en" - } - }) + # 2. Translation queries - lower boost (0.4) for other languages (multi-language search always on) + if language != 'zh' and translations.get('zh'): + zh_fields, _ = self._get_match_fields('zh') + should_clauses.append({ + "multi_match": { + "query": translations['zh'], + "fields": zh_fields, + "minimum_should_match": "75%", + "tie_breaker": tie_breaker_base_query, + "boost": 0.4, + "_name": "base_query_trans_zh" + } + }) + if language != 'en' and translations.get('en'): + en_fields, _ = self._get_match_fields('en') + should_clauses.append({ + "multi_match": { + "query": translations['en'], + "fields": en_fields, + "minimum_should_match": "75%", + "tie_breaker": tie_breaker_base_query, + "boost": 0.4, + "_name": "base_query_trans_en" + } + }) if False and is_long_query: boost = 0.5 * pow(min(1.0, token_count / 10.0), 0.9) diff --git a/search/searcher.py b/search/searcher.py index 5536e21..4949da9 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -119,7 +119,6 @@ class Searcher: image_embedding_field=self.image_embedding_field, source_fields=self.source_fields, function_score_config=self.config.function_score, - enable_multilang_search=self.config.query_config.enable_multilang_search, default_language=self.config.query_config.default_language, knn_boost=self.config.query_config.knn_boost ) diff --git a/tests/test_embedding_pipeline.py b/tests/test_embedding_pipeline.py index f524b38..1eab72c 100644 --- a/tests/test_embedding_pipeline.py +++ b/tests/test_embedding_pipeline.py @@ -77,7 +77,6 @@ def _build_test_config() -> SearchConfig: default_language="en", enable_text_embedding=True, enable_query_rewrite=False, - enable_multilang_search=True, rewrite_dictionary={}, translation_prompts={"query_zh": "e-commerce domain", "query_en": "e-commerce domain"}, text_embedding_field="title_embedding", -- libgit2 0.21.2