Commit 24e921414ea12e1e6fa123c0b33eb5333bcdecd2

Authored by tangwang
1 parent 26b910bd

delete enable_multilang_search

config/config.yaml
... ... @@ -88,7 +88,6 @@ query_config:
88 88 # 功能开关(翻译开关由tenant_config控制)
89 89 enable_text_embedding: true
90 90 enable_query_rewrite: true
91   - enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索)
92 91  
93 92 # Embedding字段名称
94 93 text_embedding_field: "title_embedding"
... ...
config/config_loader.py
... ... @@ -33,8 +33,7 @@ class QueryConfig:
33 33 # Feature flags
34 34 enable_text_embedding: bool = True
35 35 enable_query_rewrite: bool = True
36   - enable_multilang_search: bool = True # Enable multi-language search using translations
37   -
  36 +
38 37 # Query rewrite dictionary (loaded from external file)
39 38 rewrite_dictionary: Dict[str, str] = field(default_factory=dict)
40 39  
... ...
search/es_query_builder.py
... ... @@ -24,20 +24,21 @@ class ESQueryBuilder:
24 24 image_embedding_field: Optional[str] = None,
25 25 source_fields: Optional[List[str]] = None,
26 26 function_score_config: Optional[FunctionScoreConfig] = None,
27   - enable_multilang_search: bool = True,
28 27 default_language: str = "en",
29 28 knn_boost: float = 0.25
30 29 ):
31 30 """
32 31 Initialize query builder.
33 32  
  33 + Multi-language search (translation-based cross-language recall) is always enabled:
  34 + queries are matched against both detected-language and translated zh/en clauses.
  35 +
34 36 Args:
35 37 match_fields: Fields to search for text matching
36 38 text_embedding_field: Field name for text embeddings
37 39 image_embedding_field: Field name for image embeddings
38 40 source_fields: Fields to return in search results (_source includes)
39 41 function_score_config: Function score configuration
40   - enable_multilang_search: Enable multi-language search using translations
41 42 default_language: Default language to use when detection fails or returns "unknown"
42 43 knn_boost: Boost value for KNN (embedding recall)
43 44 """
... ... @@ -46,7 +47,6 @@ class ESQueryBuilder:
46 47 self.image_embedding_field = image_embedding_field
47 48 self.source_fields = source_fields
48 49 self.function_score_config = function_score_config
49   - self.enable_multilang_search = enable_multilang_search
50 50 self.default_language = default_language
51 51 self.knn_boost = knn_boost
52 52  
... ... @@ -488,35 +488,31 @@ class ESQueryBuilder:
488 488 }
489 489 })
490 490  
491   - # 2. Translation queries - lower boost (0.4) for other languages
492   - if self.enable_multilang_search:
493   - if language != 'zh' and translations.get('zh'):
494   - zh_fields, _ = self._get_match_fields('zh')
495   - should_clauses.append({
496   - "multi_match": {
497   - "query": translations['zh'],
498   - "fields": zh_fields,
499   - # "operator": "AND",
500   - "minimum_should_match": "75%",
501   - "tie_breaker": tie_breaker_base_query,
502   - "boost": 0.4,
503   - "_name": "base_query_trans_zh"
504   - }
505   - })
506   -
507   - if language != 'en' and translations.get('en'):
508   - en_fields, _ = self._get_match_fields('en')
509   - should_clauses.append({
510   - "multi_match": {
511   - "query": translations['en'],
512   - "fields": en_fields,
513   - # "operator": "AND",
514   - "minimum_should_match": "75%",
515   - "tie_breaker": tie_breaker_base_query,
516   - "boost": 0.4,
517   - "_name": "base_query_trans_en"
518   - }
519   - })
  491 + # 2. Translation queries - lower boost (0.4) for other languages (multi-language search always on)
  492 + if language != 'zh' and translations.get('zh'):
  493 + zh_fields, _ = self._get_match_fields('zh')
  494 + should_clauses.append({
  495 + "multi_match": {
  496 + "query": translations['zh'],
  497 + "fields": zh_fields,
  498 + "minimum_should_match": "75%",
  499 + "tie_breaker": tie_breaker_base_query,
  500 + "boost": 0.4,
  501 + "_name": "base_query_trans_zh"
  502 + }
  503 + })
  504 + if language != 'en' and translations.get('en'):
  505 + en_fields, _ = self._get_match_fields('en')
  506 + should_clauses.append({
  507 + "multi_match": {
  508 + "query": translations['en'],
  509 + "fields": en_fields,
  510 + "minimum_should_match": "75%",
  511 + "tie_breaker": tie_breaker_base_query,
  512 + "boost": 0.4,
  513 + "_name": "base_query_trans_en"
  514 + }
  515 + })
520 516  
521 517 if False and is_long_query:
522 518 boost = 0.5 * pow(min(1.0, token_count / 10.0), 0.9)
... ...
search/searcher.py
... ... @@ -119,7 +119,6 @@ class Searcher:
119 119 image_embedding_field=self.image_embedding_field,
120 120 source_fields=self.source_fields,
121 121 function_score_config=self.config.function_score,
122   - enable_multilang_search=self.config.query_config.enable_multilang_search,
123 122 default_language=self.config.query_config.default_language,
124 123 knn_boost=self.config.query_config.knn_boost
125 124 )
... ...
tests/test_embedding_pipeline.py
... ... @@ -77,7 +77,6 @@ def _build_test_config() -> SearchConfig:
77 77 default_language="en",
78 78 enable_text_embedding=True,
79 79 enable_query_rewrite=False,
80   - enable_multilang_search=True,
81 80 rewrite_dictionary={},
82 81 translation_prompts={"query_zh": "e-commerce domain", "query_en": "e-commerce domain"},
83 82 text_embedding_field="title_embedding",
... ...