Commit 24e921414ea12e1e6fa123c0b33eb5333bcdecd2

Authored by tangwang
1 parent 26b910bd

delete enable_multilang_search

config/config.yaml
@@ -88,7 +88,6 @@ query_config: @@ -88,7 +88,6 @@ query_config:
88 # 功能开关(翻译开关由tenant_config控制) 88 # 功能开关(翻译开关由tenant_config控制)
89 enable_text_embedding: true 89 enable_text_embedding: true
90 enable_query_rewrite: true 90 enable_query_rewrite: true
91 - enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索)  
92 91
93 # Embedding字段名称 92 # Embedding字段名称
94 text_embedding_field: "title_embedding" 93 text_embedding_field: "title_embedding"
config/config_loader.py
@@ -33,8 +33,7 @@ class QueryConfig: @@ -33,8 +33,7 @@ class QueryConfig:
33 # Feature flags 33 # Feature flags
34 enable_text_embedding: bool = True 34 enable_text_embedding: bool = True
35 enable_query_rewrite: bool = True 35 enable_query_rewrite: bool = True
36 - enable_multilang_search: bool = True # Enable multi-language search using translations  
37 - 36 +
38 # Query rewrite dictionary (loaded from external file) 37 # Query rewrite dictionary (loaded from external file)
39 rewrite_dictionary: Dict[str, str] = field(default_factory=dict) 38 rewrite_dictionary: Dict[str, str] = field(default_factory=dict)
40 39
search/es_query_builder.py
@@ -24,20 +24,21 @@ class ESQueryBuilder: @@ -24,20 +24,21 @@ class ESQueryBuilder:
24 image_embedding_field: Optional[str] = None, 24 image_embedding_field: Optional[str] = None,
25 source_fields: Optional[List[str]] = None, 25 source_fields: Optional[List[str]] = None,
26 function_score_config: Optional[FunctionScoreConfig] = None, 26 function_score_config: Optional[FunctionScoreConfig] = None,
27 - enable_multilang_search: bool = True,  
28 default_language: str = "en", 27 default_language: str = "en",
29 knn_boost: float = 0.25 28 knn_boost: float = 0.25
30 ): 29 ):
31 """ 30 """
32 Initialize query builder. 31 Initialize query builder.
33 32
  33 + Multi-language search (translation-based cross-language recall) is always enabled:
  34 + queries are matched against both detected-language and translated zh/en clauses.
  35 +
34 Args: 36 Args:
35 match_fields: Fields to search for text matching 37 match_fields: Fields to search for text matching
36 text_embedding_field: Field name for text embeddings 38 text_embedding_field: Field name for text embeddings
37 image_embedding_field: Field name for image embeddings 39 image_embedding_field: Field name for image embeddings
38 source_fields: Fields to return in search results (_source includes) 40 source_fields: Fields to return in search results (_source includes)
39 function_score_config: Function score configuration 41 function_score_config: Function score configuration
40 - enable_multilang_search: Enable multi-language search using translations  
41 default_language: Default language to use when detection fails or returns "unknown" 42 default_language: Default language to use when detection fails or returns "unknown"
42 knn_boost: Boost value for KNN (embedding recall) 43 knn_boost: Boost value for KNN (embedding recall)
43 """ 44 """
@@ -46,7 +47,6 @@ class ESQueryBuilder: @@ -46,7 +47,6 @@ class ESQueryBuilder:
46 self.image_embedding_field = image_embedding_field 47 self.image_embedding_field = image_embedding_field
47 self.source_fields = source_fields 48 self.source_fields = source_fields
48 self.function_score_config = function_score_config 49 self.function_score_config = function_score_config
49 - self.enable_multilang_search = enable_multilang_search  
50 self.default_language = default_language 50 self.default_language = default_language
51 self.knn_boost = knn_boost 51 self.knn_boost = knn_boost
52 52
@@ -488,35 +488,31 @@ class ESQueryBuilder: @@ -488,35 +488,31 @@ class ESQueryBuilder:
488 } 488 }
489 }) 489 })
490 490
491 - # 2. Translation queries - lower boost (0.4) for other languages  
492 - if self.enable_multilang_search:  
493 - if language != 'zh' and translations.get('zh'):  
494 - zh_fields, _ = self._get_match_fields('zh')  
495 - should_clauses.append({  
496 - "multi_match": {  
497 - "query": translations['zh'],  
498 - "fields": zh_fields,  
499 - # "operator": "AND",  
500 - "minimum_should_match": "75%",  
501 - "tie_breaker": tie_breaker_base_query,  
502 - "boost": 0.4,  
503 - "_name": "base_query_trans_zh"  
504 - }  
505 - })  
506 -  
507 - if language != 'en' and translations.get('en'):  
508 - en_fields, _ = self._get_match_fields('en')  
509 - should_clauses.append({  
510 - "multi_match": {  
511 - "query": translations['en'],  
512 - "fields": en_fields,  
513 - # "operator": "AND",  
514 - "minimum_should_match": "75%",  
515 - "tie_breaker": tie_breaker_base_query,  
516 - "boost": 0.4,  
517 - "_name": "base_query_trans_en"  
518 - }  
519 - }) 491 + # 2. Translation queries - lower boost (0.4) for other languages (multi-language search always on)
  492 + if language != 'zh' and translations.get('zh'):
  493 + zh_fields, _ = self._get_match_fields('zh')
  494 + should_clauses.append({
  495 + "multi_match": {
  496 + "query": translations['zh'],
  497 + "fields": zh_fields,
  498 + "minimum_should_match": "75%",
  499 + "tie_breaker": tie_breaker_base_query,
  500 + "boost": 0.4,
  501 + "_name": "base_query_trans_zh"
  502 + }
  503 + })
  504 + if language != 'en' and translations.get('en'):
  505 + en_fields, _ = self._get_match_fields('en')
  506 + should_clauses.append({
  507 + "multi_match": {
  508 + "query": translations['en'],
  509 + "fields": en_fields,
  510 + "minimum_should_match": "75%",
  511 + "tie_breaker": tie_breaker_base_query,
  512 + "boost": 0.4,
  513 + "_name": "base_query_trans_en"
  514 + }
  515 + })
520 516
521 if False and is_long_query: 517 if False and is_long_query:
522 boost = 0.5 * pow(min(1.0, token_count / 10.0), 0.9) 518 boost = 0.5 * pow(min(1.0, token_count / 10.0), 0.9)
search/searcher.py
@@ -119,7 +119,6 @@ class Searcher: @@ -119,7 +119,6 @@ class Searcher:
119 image_embedding_field=self.image_embedding_field, 119 image_embedding_field=self.image_embedding_field,
120 source_fields=self.source_fields, 120 source_fields=self.source_fields,
121 function_score_config=self.config.function_score, 121 function_score_config=self.config.function_score,
122 - enable_multilang_search=self.config.query_config.enable_multilang_search,  
123 default_language=self.config.query_config.default_language, 122 default_language=self.config.query_config.default_language,
124 knn_boost=self.config.query_config.knn_boost 123 knn_boost=self.config.query_config.knn_boost
125 ) 124 )
tests/test_embedding_pipeline.py
@@ -77,7 +77,6 @@ def _build_test_config() -> SearchConfig: @@ -77,7 +77,6 @@ def _build_test_config() -> SearchConfig:
77 default_language="en", 77 default_language="en",
78 enable_text_embedding=True, 78 enable_text_embedding=True,
79 enable_query_rewrite=False, 79 enable_query_rewrite=False,
80 - enable_multilang_search=True,  
81 rewrite_dictionary={}, 80 rewrite_dictionary={},
82 translation_prompts={"query_zh": "e-commerce domain", "query_en": "e-commerce domain"}, 81 translation_prompts={"query_zh": "e-commerce domain", "query_en": "e-commerce domain"},
83 text_embedding_field="title_embedding", 82 text_embedding_field="title_embedding",