Commit 345d960b849cffe9486a6be7121b5adf87d4528c

Authored by tangwang
1 parent cff5e86f

1. 删除全局 enable_translation 配置

config/config_loader.py: 从 QueryConfig 类中删除 enable_translation 字段
config/config.yaml: 删除 enable_translation: true 配置项
config/config_loader.py: 从 to_dict() 方法中删除相关输出
2. 索引阶段(离线)- 使用租户配置
indexer/indexing_utils.py:
根据 tenant_config.translate_to_en 和 translate_to_zh 决定是否初始化 translator
只有任一方向开启时才创建 translator
indexer/document_transformer.py:
_fill_text_fields 从 tenant_config 读取 translate_to_en 和 translate_to_zh
调用 translate_for_indexing 时传递这两个参数
更新了文档注释
3. 查询阶段(在线)- 使用租户配置
query/query_parser.py:
parse() 方法新增 tenant_id 参数
根据租户配置决定翻译目标语言(translate_to_zh / translate_to_en)
如果两个都是 false,跳过翻译阶段
translator 属性不再依赖 enable_translation,总是可以初始化
search/searcher.py:
search() 方法中根据租户配置计算 enable_translation(用于日志和 metadata)
调用 query_parser.parse() 时传递 tenant_id
4. 翻译器方法更新
query/translator.py:
translate_for_indexing() 新增 translate_to_en 和 translate_to_zh 参数(默认 True 保持向后兼容)
根据这两个参数决定翻译目标
更新了文档注释
config/config.yaml
@@ -85,8 +85,7 @@ query_config: @@ -85,8 +85,7 @@ query_config:
85 - "en" 85 - "en"
86 default_language: "zh" 86 default_language: "zh"
87 87
88 - # 功能开关  
89 - enable_translation: true 88 + # 功能开关(翻译开关由tenant_config控制)
90 enable_text_embedding: true 89 enable_text_embedding: true
91 enable_query_rewrite: true 90 enable_query_rewrite: true
92 enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索) 91 enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索)
config/config_loader.py
@@ -32,7 +32,6 @@ class QueryConfig: @@ -32,7 +32,6 @@ class QueryConfig:
32 default_language: str = "zh" 32 default_language: str = "zh"
33 33
34 # Feature flags 34 # Feature flags
35 - enable_translation: bool = True  
36 enable_text_embedding: bool = True 35 enable_text_embedding: bool = True
37 enable_query_rewrite: bool = True 36 enable_query_rewrite: bool = True
38 enable_multilang_search: bool = True # Enable multi-language search using translations 37 enable_multilang_search: bool = True # Enable multi-language search using translations
@@ -230,7 +229,6 @@ class ConfigLoader: @@ -230,7 +229,6 @@ class ConfigLoader:
230 query_config = QueryConfig( 229 query_config = QueryConfig(
231 supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], 230 supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
232 default_language=query_config_data.get("default_language") or "zh", 231 default_language=query_config_data.get("default_language") or "zh",
233 - enable_translation=query_config_data.get("enable_translation", True),  
234 enable_text_embedding=query_config_data.get("enable_text_embedding", True), 232 enable_text_embedding=query_config_data.get("enable_text_embedding", True),
235 enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), 233 enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
236 rewrite_dictionary=rewrite_dictionary, 234 rewrite_dictionary=rewrite_dictionary,
@@ -369,7 +367,6 @@ class ConfigLoader: @@ -369,7 +367,6 @@ class ConfigLoader:
369 query_config_dict = { 367 query_config_dict = {
370 "supported_languages": config.query_config.supported_languages, 368 "supported_languages": config.query_config.supported_languages,
371 "default_language": config.query_config.default_language, 369 "default_language": config.query_config.default_language,
372 - "enable_translation": config.query_config.enable_translation,  
373 "enable_text_embedding": config.query_config.enable_text_embedding, 370 "enable_text_embedding": config.query_config.enable_text_embedding,
374 "enable_query_rewrite": config.query_config.enable_query_rewrite, 371 "enable_query_rewrite": config.query_config.enable_query_rewrite,
375 "translation_service": config.query_config.translation_service, 372 "translation_service": config.query_config.translation_service,
context/request_context.py
@@ -83,7 +83,7 @@ class RequestContext: @@ -83,7 +83,7 @@ class RequestContext:
83 # 元数据 83 # 元数据
84 self.metadata = { 84 self.metadata = {
85 'search_params': {}, # size, from_, filters等 85 'search_params': {}, # size, from_, filters等
86 - 'feature_flags': {}, # enable_translation, enable_embedding等 86 + 'feature_flags': {}, # translation_enabled (由tenant_config控制), enable_embedding等
87 'config_info': {}, # 索引配置、字段映射等 87 'config_info': {}, # 索引配置、字段映射等
88 'error_info': None, 88 'error_info': None,
89 'warnings': [] 89 'warnings': []
@@ -89,10 +89,11 @@ def example_with_searcher(): @@ -89,10 +89,11 @@ def example_with_searcher():
89 'from': 0 89 'from': 0
90 } 90 }
91 91
  92 + # 示例中写死 feature flags,实际环境由 tenant_config / 后端配置控制
92 context.metadata['feature_flags'] = { 93 context.metadata['feature_flags'] = {
93 - 'enable_translation': True,  
94 - 'enable_embedding': True,  
95 - 'enable_rerank': True 94 + 'translation_enabled': True,
  95 + 'embedding_enabled': True,
  96 + 'rerank_enabled': True
96 } 97 }
97 98
98 # 模拟搜索流程 99 # 模拟搜索流程
indexer/document_transformer.py
@@ -171,12 +171,18 @@ class SPUDocumentTransformer: @@ -171,12 +171,18 @@ class SPUDocumentTransformer:
171 primary_lang: str 171 primary_lang: str
172 ): 172 ):
173 """ 173 """
174 - 填充文本字段(根据主语言自动处理多语言翻译)。 174 + 填充文本字段(根据租户配置处理多语言翻译)。
175 175
176 - 翻译逻辑在translator内部处理:  
177 - - 如果店铺语言不等于zh,自动翻译成zh  
178 - - 如果店铺语言不等于en,自动翻译成en 176 + 翻译逻辑:
  177 + - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向
  178 + - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文
  179 + - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文
  180 + - 如果两个都是false,则不进行翻译,只填充主语言字段
179 """ 181 """
  182 + # 从租户配置中读取翻译方向
  183 + translate_to_en = bool(self.tenant_config.get('translate_to_en'))
  184 + translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))
  185 +
180 # Title 186 # Title
181 if pd.notna(spu_row.get('title')): 187 if pd.notna(spu_row.get('title')):
182 title_text = str(spu_row['title']) 188 title_text = str(spu_row['title'])
@@ -192,7 +198,9 @@ class SPUDocumentTransformer: @@ -192,7 +198,9 @@ class SPUDocumentTransformer:
192 title_text, 198 title_text,
193 shop_language=primary_lang, 199 shop_language=primary_lang,
194 source_lang=primary_lang, 200 source_lang=primary_lang,
195 - prompt=prompt_zh if primary_lang == 'zh' else prompt_en 201 + prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
  202 + translate_to_en=translate_to_en,
  203 + translate_to_zh=translate_to_zh,
196 ) 204 )
197 205
198 # 填充翻译结果 206 # 填充翻译结果
@@ -219,7 +227,9 @@ class SPUDocumentTransformer: @@ -219,7 +227,9 @@ class SPUDocumentTransformer:
219 brief_text, 227 brief_text,
220 shop_language=primary_lang, 228 shop_language=primary_lang,
221 source_lang=primary_lang, 229 source_lang=primary_lang,
222 - prompt=prompt 230 + prompt=prompt,
  231 + translate_to_en=translate_to_en,
  232 + translate_to_zh=translate_to_zh,
223 ) 233 )
224 doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None) 234 doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None)
225 doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None) 235 doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None)
@@ -243,7 +253,9 @@ class SPUDocumentTransformer: @@ -243,7 +253,9 @@ class SPUDocumentTransformer:
243 desc_text, 253 desc_text,
244 shop_language=primary_lang, 254 shop_language=primary_lang,
245 source_lang=primary_lang, 255 source_lang=primary_lang,
246 - prompt=prompt 256 + prompt=prompt,
  257 + translate_to_en=translate_to_en,
  258 + translate_to_zh=translate_to_zh,
247 ) 259 )
248 doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None) 260 doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None)
249 doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None) 261 doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None)
@@ -267,7 +279,9 @@ class SPUDocumentTransformer: @@ -267,7 +279,9 @@ class SPUDocumentTransformer:
267 vendor_text, 279 vendor_text,
268 shop_language=primary_lang, 280 shop_language=primary_lang,
269 source_lang=primary_lang, 281 source_lang=primary_lang,
270 - prompt=prompt 282 + prompt=prompt,
  283 + translate_to_en=translate_to_en,
  284 + translate_to_zh=translate_to_zh,
271 ) 285 )
272 doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None) 286 doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None)
273 doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None) 287 doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None)
indexer/indexing_utils.py
@@ -88,7 +88,11 @@ def create_document_transformer( @@ -88,7 +88,11 @@ def create_document_transformer(
88 if searchable_option_dimensions is None: 88 if searchable_option_dimensions is None:
89 searchable_option_dimensions = config.spu_config.searchable_option_dimensions 89 searchable_option_dimensions = config.spu_config.searchable_option_dimensions
90 90
91 - if translator is None and config.query_config.enable_translation: 91 + # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器
  92 + translate_to_en = bool(tenant_config.get("translate_to_en"))
  93 + translate_to_zh = bool(tenant_config.get("translate_to_zh"))
  94 +
  95 + if translator is None and (translate_to_en or translate_to_zh):
92 from query.translator import Translator 96 from query.translator import Translator
93 translator = Translator( 97 translator = Translator(
94 api_key=config.query_config.translation_api_key, 98 api_key=config.query_config.translation_api_key,
indexer/test_indexing.py
@@ -265,14 +265,12 @@ def test_document_transformer(): @@ -265,14 +265,12 @@ def test_document_transformer():
265 tenant_config_loader = get_tenant_config_loader() 265 tenant_config_loader = get_tenant_config_loader()
266 tenant_config = tenant_config_loader.get_tenant_config('162') 266 tenant_config = tenant_config_loader.get_tenant_config('162')
267 267
268 - # 初始化翻译器(如果启用)  
269 - translator = None  
270 - if config.query_config.enable_translation:  
271 - from query.translator import Translator  
272 - translator = Translator(  
273 - api_key=config.query_config.translation_api_key,  
274 - use_cache=True  
275 - ) 268 + # 初始化翻译器(测试环境总是启用,具体翻译方向由tenant_config控制)
  269 + from query.translator import Translator
  270 + translator = Translator(
  271 + api_key=config.query_config.translation_api_key,
  272 + use_cache=True
  273 + )
276 274
277 # 创建转换器 275 # 创建转换器
278 transformer = SPUDocumentTransformer( 276 transformer = SPUDocumentTransformer(
query/query_parser.py
@@ -115,7 +115,7 @@ class QueryParser: @@ -115,7 +115,7 @@ class QueryParser:
115 @property 115 @property
116 def translator(self) -> Translator: 116 def translator(self) -> Translator:
117 """Lazy load translator.""" 117 """Lazy load translator."""
118 - if self._translator is None and self.config.query_config.enable_translation: 118 + if self._translator is None:
119 logger.info("Initializing translator (lazy load)...") 119 logger.info("Initializing translator (lazy load)...")
120 self._translator = Translator( 120 self._translator = Translator(
121 api_key=self.config.query_config.translation_api_key, 121 api_key=self.config.query_config.translation_api_key,
@@ -153,7 +153,13 @@ class QueryParser: @@ -153,7 +153,13 @@ class QueryParser:
153 is_long = token_count >= 4 153 is_long = token_count >= 4
154 return is_short, is_long 154 return is_short, is_long
155 155
156 - def parse(self, query: str, generate_vector: bool = True, context: Optional[Any] = None) -> ParsedQuery: 156 + def parse(
  157 + self,
  158 + query: str,
  159 + tenant_id: Optional[str] = None,
  160 + generate_vector: bool = True,
  161 + context: Optional[Any] = None
  162 + ) -> ParsedQuery:
157 """ 163 """
158 Parse query through all processing stages. 164 Parse query through all processing stages.
159 165
@@ -221,12 +227,23 @@ class QueryParser: @@ -221,12 +227,23 @@ class QueryParser:
221 # Stage 4: Translation (with async support and conditional waiting) 227 # Stage 4: Translation (with async support and conditional waiting)
222 translations = {} 228 translations = {}
223 translation_futures = {} 229 translation_futures = {}
224 - if self.config.query_config.enable_translation:  
225 - try:  
226 - # Determine target languages for translation  
227 - # Simplified: always translate to Chinese and English  
228 - target_langs_for_translation = ['zh', 'en']  
229 - 230 + try:
  231 + # 根据租户配置决定翻译目标语言
  232 + from config.tenant_config_loader import get_tenant_config_loader
  233 + tenant_loader = get_tenant_config_loader()
  234 + tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default")
  235 +
  236 + translate_to_zh = bool(tenant_cfg.get("translate_to_zh"))
  237 + translate_to_en = bool(tenant_cfg.get("translate_to_en"))
  238 +
  239 + target_langs_for_translation = []
  240 + if translate_to_zh:
  241 + target_langs_for_translation.append('zh')
  242 + if translate_to_en:
  243 + target_langs_for_translation.append('en')
  244 +
  245 + # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段
  246 + if target_langs_for_translation:
230 target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang] 247 target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang]
231 248
232 if target_langs: 249 if target_langs:
@@ -279,11 +296,11 @@ class QueryParser: @@ -279,11 +296,11 @@ class QueryParser:
279 if translation: 296 if translation:
280 context.store_intermediate_result(f'translation_{lang}', translation) 297 context.store_intermediate_result(f'translation_{lang}', translation)
281 298
282 - except Exception as e:  
283 - error_msg = f"翻译失败 | 错误: {str(e)}"  
284 - log_info(error_msg)  
285 - if context:  
286 - context.add_warning(error_msg) 299 + except Exception as e:
  300 + error_msg = f"翻译失败 | 错误: {str(e)}"
  301 + log_info(error_msg)
  302 + if context:
  303 + context.add_warning(error_msg)
287 304
288 # Stage 5: Query analysis (keywords, token count, query type) 305 # Stage 5: Query analysis (keywords, token count, query type)
289 keywords = self._extract_keywords(query_text) 306 keywords = self._extract_keywords(query_text)
query/translator.py
@@ -584,17 +584,17 @@ class Translator: @@ -584,17 +584,17 @@ class Translator:
584 shop_language: str, 584 shop_language: str,
585 source_lang: Optional[str] = None, 585 source_lang: Optional[str] = None,
586 context: Optional[str] = None, 586 context: Optional[str] = None,
587 - prompt: Optional[str] = None 587 + prompt: Optional[str] = None,
  588 + translate_to_en: bool = True,
  589 + translate_to_zh: bool = True,
588 ) -> Dict[str, Optional[str]]: 590 ) -> Dict[str, Optional[str]]:
589 """ 591 """
590 - Translate text for indexing based on shop language configuration.  
591 -  
592 - This method automatically handles multi-language translation:  
593 - - If shop language is not 'zh', translate to Chinese (zh)  
594 - - If shop language is not 'en', translate to English (en) 592 + Translate text for indexing based on shop language and tenant configuration.
595 593
596 - All translation logic is internal - callers don't need to worry about  
597 - which languages to translate to. 594 + Translation behavior:
  595 + - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh)
  596 + - If translate_to_en=True and shop language is not 'en', translate to English (en)
  597 + - If both flags are False, no translation is performed (returns None for both)
598 598
599 Args: 599 Args:
600 text: Text to translate 600 text: Text to translate
@@ -602,10 +602,12 @@ class Translator: @@ -602,10 +602,12 @@ class Translator:
602 source_lang: Source language code (optional, auto-detect if None) 602 source_lang: Source language code (optional, auto-detect if None)
603 context: Additional context for translation (optional) 603 context: Additional context for translation (optional)
604 prompt: Translation prompt/instruction (optional) 604 prompt: Translation prompt/instruction (optional)
  605 + translate_to_en: Whether to translate to English (from tenant_config)
  606 + translate_to_zh: Whether to translate to Chinese (from tenant_config)
605 607
606 Returns: 608 Returns:
607 - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed)  
608 - Example: {'zh': '中文翻译', 'en': 'English translation'} 609 + Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled)
  610 + Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None}
609 """ 611 """
610 if not text or not text.strip(): 612 if not text or not text.strip():
611 return {'zh': None, 'en': None} 613 return {'zh': None, 'en': None}
@@ -618,11 +620,11 @@ class Translator: @@ -618,11 +620,11 @@ class Translator:
618 results = {'zh': None, 'en': None} 620 results = {'zh': None, 'en': None}
619 shop_lang_lower = shop_language.lower() if shop_language else "" 621 shop_lang_lower = shop_language.lower() if shop_language else ""
620 622
621 - # Determine which languages need translation 623 + # Determine which languages need translation based on tenant configuration
622 targets = [] 624 targets = []
623 - if "zh" not in shop_lang_lower: 625 + if translate_to_zh and "zh" not in shop_lang_lower:
624 targets.append("zh") 626 targets.append("zh")
625 - if "en" not in shop_lang_lower: 627 + if translate_to_en and "en" not in shop_lang_lower:
626 targets.append("en") 628 targets.append("en")
627 629
628 # If shop language is already zh and en, no translation needed 630 # If shop language is already zh and en, no translation needed
search/searcher.py
@@ -15,6 +15,7 @@ from .boolean_parser import BooleanParser, QueryNode @@ -15,6 +15,7 @@ from .boolean_parser import BooleanParser, QueryNode
15 from .es_query_builder import ESQueryBuilder 15 from .es_query_builder import ESQueryBuilder
16 from .rerank_engine import RerankEngine 16 from .rerank_engine import RerankEngine
17 from config import SearchConfig 17 from config import SearchConfig
  18 +from config.tenant_config_loader import get_tenant_config_loader
18 from config.utils import get_match_fields_for_index 19 from config.utils import get_match_fields_for_index
19 from context.request_context import RequestContext, RequestContextStage, create_request_context 20 from context.request_context import RequestContext, RequestContextStage, create_request_context
20 from api.models import FacetResult, FacetValue, FacetConfig 21 from api.models import FacetResult, FacetValue, FacetConfig
@@ -158,8 +159,10 @@ class Searcher: @@ -158,8 +159,10 @@ class Searcher:
158 if context is None: 159 if context is None:
159 context = create_request_context() 160 context = create_request_context()
160 161
161 - # Always use config defaults (these are backend configuration, not user parameters)  
162 - enable_translation = self.config.query_config.enable_translation 162 + # 根据租户配置决定翻译开关(离线/在线统一)
  163 + tenant_loader = get_tenant_config_loader()
  164 + tenant_cfg = tenant_loader.get_tenant_config(tenant_id)
  165 + enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh"))
163 enable_embedding = self.config.query_config.enable_text_embedding 166 enable_embedding = self.config.query_config.enable_text_embedding
164 enable_rerank = False # Temporarily disabled 167 enable_rerank = False # Temporarily disabled
165 168
@@ -199,6 +202,7 @@ class Searcher: @@ -199,6 +202,7 @@ class Searcher:
199 try: 202 try:
200 parsed_query = self.query_parser.parse( 203 parsed_query = self.query_parser.parse(
201 query, 204 query,
  205 + tenant_id=tenant_id,
202 generate_vector=enable_embedding, 206 generate_vector=enable_embedding,
203 context=context 207 context=context
204 ) 208 )
@@ -38,7 +38,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: @@ -38,7 +38,6 @@ def sample_search_config(sample_index_config) -> SearchConfig:
38 """样例搜索配置""" 38 """样例搜索配置"""
39 query_config = QueryConfig( 39 query_config = QueryConfig(
40 enable_query_rewrite=True, 40 enable_query_rewrite=True,
41 - enable_translation=True,  
42 enable_text_embedding=True, 41 enable_text_embedding=True,
43 supported_languages=["zh", "en"] 42 supported_languages=["zh", "en"]
44 ) 43 )
@@ -174,7 +173,6 @@ def temp_config_file() -> Generator[str, None, None]: @@ -174,7 +173,6 @@ def temp_config_file() -> Generator[str, None, None]:
174 "query_config": { 173 "query_config": {
175 "supported_languages": ["zh", "en"], 174 "supported_languages": ["zh", "en"],
176 "default_language": "zh", 175 "default_language": "zh",
177 - "enable_translation": True,  
178 "enable_text_embedding": True, 176 "enable_text_embedding": True,
179 "enable_query_rewrite": True 177 "enable_query_rewrite": True
180 }, 178 },