Commit 24e921414ea12e1e6fa123c0b33eb5333bcdecd2
1 parent
26b910bd
delete enable_multilang_search
Showing
5 changed files
with
29 additions
and
37 deletions
Show diff stats
config/config.yaml
config/config_loader.py
| ... | ... | @@ -33,8 +33,7 @@ class QueryConfig: |
| 33 | 33 | # Feature flags |
| 34 | 34 | enable_text_embedding: bool = True |
| 35 | 35 | enable_query_rewrite: bool = True |
| 36 | - enable_multilang_search: bool = True # Enable multi-language search using translations | |
| 37 | - | |
| 36 | + | |
| 38 | 37 | # Query rewrite dictionary (loaded from external file) |
| 39 | 38 | rewrite_dictionary: Dict[str, str] = field(default_factory=dict) |
| 40 | 39 | ... | ... |
search/es_query_builder.py
| ... | ... | @@ -24,20 +24,21 @@ class ESQueryBuilder: |
| 24 | 24 | image_embedding_field: Optional[str] = None, |
| 25 | 25 | source_fields: Optional[List[str]] = None, |
| 26 | 26 | function_score_config: Optional[FunctionScoreConfig] = None, |
| 27 | - enable_multilang_search: bool = True, | |
| 28 | 27 | default_language: str = "en", |
| 29 | 28 | knn_boost: float = 0.25 |
| 30 | 29 | ): |
| 31 | 30 | """ |
| 32 | 31 | Initialize query builder. |
| 33 | 32 | |
| 33 | + Multi-language search (translation-based cross-language recall) is always enabled: | |
| 34 | + queries are matched against both detected-language and translated zh/en clauses. | |
| 35 | + | |
| 34 | 36 | Args: |
| 35 | 37 | match_fields: Fields to search for text matching |
| 36 | 38 | text_embedding_field: Field name for text embeddings |
| 37 | 39 | image_embedding_field: Field name for image embeddings |
| 38 | 40 | source_fields: Fields to return in search results (_source includes) |
| 39 | 41 | function_score_config: Function score configuration |
| 40 | - enable_multilang_search: Enable multi-language search using translations | |
| 41 | 42 | default_language: Default language to use when detection fails or returns "unknown" |
| 42 | 43 | knn_boost: Boost value for KNN (embedding recall) |
| 43 | 44 | """ |
| ... | ... | @@ -46,7 +47,6 @@ class ESQueryBuilder: |
| 46 | 47 | self.image_embedding_field = image_embedding_field |
| 47 | 48 | self.source_fields = source_fields |
| 48 | 49 | self.function_score_config = function_score_config |
| 49 | - self.enable_multilang_search = enable_multilang_search | |
| 50 | 50 | self.default_language = default_language |
| 51 | 51 | self.knn_boost = knn_boost |
| 52 | 52 | |
| ... | ... | @@ -488,35 +488,31 @@ class ESQueryBuilder: |
| 488 | 488 | } |
| 489 | 489 | }) |
| 490 | 490 | |
| 491 | - # 2. Translation queries - lower boost (0.4) for other languages | |
| 492 | - if self.enable_multilang_search: | |
| 493 | - if language != 'zh' and translations.get('zh'): | |
| 494 | - zh_fields, _ = self._get_match_fields('zh') | |
| 495 | - should_clauses.append({ | |
| 496 | - "multi_match": { | |
| 497 | - "query": translations['zh'], | |
| 498 | - "fields": zh_fields, | |
| 499 | - # "operator": "AND", | |
| 500 | - "minimum_should_match": "75%", | |
| 501 | - "tie_breaker": tie_breaker_base_query, | |
| 502 | - "boost": 0.4, | |
| 503 | - "_name": "base_query_trans_zh" | |
| 504 | - } | |
| 505 | - }) | |
| 506 | - | |
| 507 | - if language != 'en' and translations.get('en'): | |
| 508 | - en_fields, _ = self._get_match_fields('en') | |
| 509 | - should_clauses.append({ | |
| 510 | - "multi_match": { | |
| 511 | - "query": translations['en'], | |
| 512 | - "fields": en_fields, | |
| 513 | - # "operator": "AND", | |
| 514 | - "minimum_should_match": "75%", | |
| 515 | - "tie_breaker": tie_breaker_base_query, | |
| 516 | - "boost": 0.4, | |
| 517 | - "_name": "base_query_trans_en" | |
| 518 | - } | |
| 519 | - }) | |
| 491 | + # 2. Translation queries - lower boost (0.4) for other languages (multi-language search always on) | |
| 492 | + if language != 'zh' and translations.get('zh'): | |
| 493 | + zh_fields, _ = self._get_match_fields('zh') | |
| 494 | + should_clauses.append({ | |
| 495 | + "multi_match": { | |
| 496 | + "query": translations['zh'], | |
| 497 | + "fields": zh_fields, | |
| 498 | + "minimum_should_match": "75%", | |
| 499 | + "tie_breaker": tie_breaker_base_query, | |
| 500 | + "boost": 0.4, | |
| 501 | + "_name": "base_query_trans_zh" | |
| 502 | + } | |
| 503 | + }) | |
| 504 | + if language != 'en' and translations.get('en'): | |
| 505 | + en_fields, _ = self._get_match_fields('en') | |
| 506 | + should_clauses.append({ | |
| 507 | + "multi_match": { | |
| 508 | + "query": translations['en'], | |
| 509 | + "fields": en_fields, | |
| 510 | + "minimum_should_match": "75%", | |
| 511 | + "tie_breaker": tie_breaker_base_query, | |
| 512 | + "boost": 0.4, | |
| 513 | + "_name": "base_query_trans_en" | |
| 514 | + } | |
| 515 | + }) | |
| 520 | 516 | |
| 521 | 517 | if False and is_long_query: |
| 522 | 518 | boost = 0.5 * pow(min(1.0, token_count / 10.0), 0.9) | ... | ... |
search/searcher.py
| ... | ... | @@ -119,7 +119,6 @@ class Searcher: |
| 119 | 119 | image_embedding_field=self.image_embedding_field, |
| 120 | 120 | source_fields=self.source_fields, |
| 121 | 121 | function_score_config=self.config.function_score, |
| 122 | - enable_multilang_search=self.config.query_config.enable_multilang_search, | |
| 123 | 122 | default_language=self.config.query_config.default_language, |
| 124 | 123 | knn_boost=self.config.query_config.knn_boost |
| 125 | 124 | ) | ... | ... |
tests/test_embedding_pipeline.py
| ... | ... | @@ -77,7 +77,6 @@ def _build_test_config() -> SearchConfig: |
| 77 | 77 | default_language="en", |
| 78 | 78 | enable_text_embedding=True, |
| 79 | 79 | enable_query_rewrite=False, |
| 80 | - enable_multilang_search=True, | |
| 81 | 80 | rewrite_dictionary={}, |
| 82 | 81 | translation_prompts={"query_zh": "e-commerce domain", "query_en": "e-commerce domain"}, |
| 83 | 82 | text_embedding_field="title_embedding", | ... | ... |