Commit 345d960b849cffe9486a6be7121b5adf87d4528c

Authored by tangwang
1 parent cff5e86f

1. 删除全局 enable_translation 配置

config/config_loader.py: 从 QueryConfig 类中删除 enable_translation 字段
config/config.yaml: 删除 enable_translation: true 配置项
config/config_loader.py: 从 to_dict() 方法中删除相关输出
2. 索引阶段(离线)- 使用租户配置
indexer/indexing_utils.py:
根据 tenant_config.translate_to_en 和 translate_to_zh 决定是否初始化 translator
只有任一方向开启时才创建 translator
indexer/document_transformer.py:
_fill_text_fields 从 tenant_config 读取 translate_to_en 和 translate_to_zh
调用 translate_for_indexing 时传递这两个参数
更新了文档注释
3. 查询阶段(在线)- 使用租户配置
query/query_parser.py:
parse() 方法新增 tenant_id 参数
根据租户配置决定翻译目标语言(translate_to_zh / translate_to_en)
如果两个都是 false,跳过翻译阶段
translator 属性不再依赖 enable_translation,总是可以初始化
search/searcher.py:
search() 方法中根据租户配置计算 enable_translation(用于日志和 metadata)
调用 query_parser.parse() 时传递 tenant_id
4. 翻译器方法更新
query/translator.py:
translate_for_indexing() 新增 translate_to_en 和 translate_to_zh 参数(默认 True 保持向后兼容)
根据这两个参数决定翻译目标
更新了文档注释
config/config.yaml
... ... @@ -85,8 +85,7 @@ query_config:
85 85 - "en"
86 86 default_language: "zh"
87 87  
88   - # 功能开关
89   - enable_translation: true
  88 + # 功能开关(翻译开关由tenant_config控制)
90 89 enable_text_embedding: true
91 90 enable_query_rewrite: true
92 91 enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索)
... ...
config/config_loader.py
... ... @@ -32,7 +32,6 @@ class QueryConfig:
32 32 default_language: str = "zh"
33 33  
34 34 # Feature flags
35   - enable_translation: bool = True
36 35 enable_text_embedding: bool = True
37 36 enable_query_rewrite: bool = True
38 37 enable_multilang_search: bool = True # Enable multi-language search using translations
... ... @@ -230,7 +229,6 @@ class ConfigLoader:
230 229 query_config = QueryConfig(
231 230 supported_languages=query_config_data.get("supported_languages") or ["zh", "en"],
232 231 default_language=query_config_data.get("default_language") or "zh",
233   - enable_translation=query_config_data.get("enable_translation", True),
234 232 enable_text_embedding=query_config_data.get("enable_text_embedding", True),
235 233 enable_query_rewrite=query_config_data.get("enable_query_rewrite", True),
236 234 rewrite_dictionary=rewrite_dictionary,
... ... @@ -369,7 +367,6 @@ class ConfigLoader:
369 367 query_config_dict = {
370 368 "supported_languages": config.query_config.supported_languages,
371 369 "default_language": config.query_config.default_language,
372   - "enable_translation": config.query_config.enable_translation,
373 370 "enable_text_embedding": config.query_config.enable_text_embedding,
374 371 "enable_query_rewrite": config.query_config.enable_query_rewrite,
375 372 "translation_service": config.query_config.translation_service,
... ...
context/request_context.py
... ... @@ -83,7 +83,7 @@ class RequestContext:
83 83 # 元数据
84 84 self.metadata = {
85 85 'search_params': {}, # size, from_, filters等
86   - 'feature_flags': {}, # enable_translation, enable_embedding等
  86 + 'feature_flags': {}, # translation_enabled (由tenant_config控制), enable_embedding等
87 87 'config_info': {}, # 索引配置、字段映射等
88 88 'error_info': None,
89 89 'warnings': []
... ...
example_usage.py
... ... @@ -89,10 +89,11 @@ def example_with_searcher():
89 89 'from': 0
90 90 }
91 91  
  92 + # 示例中写死 feature flags,实际环境由 tenant_config / 后端配置控制
92 93 context.metadata['feature_flags'] = {
93   - 'enable_translation': True,
94   - 'enable_embedding': True,
95   - 'enable_rerank': True
  94 + 'translation_enabled': True,
  95 + 'embedding_enabled': True,
  96 + 'rerank_enabled': True
96 97 }
97 98  
98 99 # 模拟搜索流程
... ...
indexer/document_transformer.py
... ... @@ -171,12 +171,18 @@ class SPUDocumentTransformer:
171 171 primary_lang: str
172 172 ):
173 173 """
174   - 填充文本字段(根据主语言自动处理多语言翻译)。
  174 + 填充文本字段(根据租户配置处理多语言翻译)。
175 175  
176   - 翻译逻辑在translator内部处理:
177   - - 如果店铺语言不等于zh,自动翻译成zh
178   - - 如果店铺语言不等于en,自动翻译成en
  176 + 翻译逻辑:
  177 + - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向
  178 + - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文
  179 + - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文
  180 + - 如果两个都是false,则不进行翻译,只填充主语言字段
179 181 """
  182 + # 从租户配置中读取翻译方向
  183 + translate_to_en = bool(self.tenant_config.get('translate_to_en'))
  184 + translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))
  185 +
180 186 # Title
181 187 if pd.notna(spu_row.get('title')):
182 188 title_text = str(spu_row['title'])
... ... @@ -192,7 +198,9 @@ class SPUDocumentTransformer:
192 198 title_text,
193 199 shop_language=primary_lang,
194 200 source_lang=primary_lang,
195   - prompt=prompt_zh if primary_lang == 'zh' else prompt_en
  201 + prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
  202 + translate_to_en=translate_to_en,
  203 + translate_to_zh=translate_to_zh,
196 204 )
197 205  
198 206 # 填充翻译结果
... ... @@ -219,7 +227,9 @@ class SPUDocumentTransformer:
219 227 brief_text,
220 228 shop_language=primary_lang,
221 229 source_lang=primary_lang,
222   - prompt=prompt
  230 + prompt=prompt,
  231 + translate_to_en=translate_to_en,
  232 + translate_to_zh=translate_to_zh,
223 233 )
224 234 doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None)
225 235 doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None)
... ... @@ -243,7 +253,9 @@ class SPUDocumentTransformer:
243 253 desc_text,
244 254 shop_language=primary_lang,
245 255 source_lang=primary_lang,
246   - prompt=prompt
  256 + prompt=prompt,
  257 + translate_to_en=translate_to_en,
  258 + translate_to_zh=translate_to_zh,
247 259 )
248 260 doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None)
249 261 doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None)
... ... @@ -267,7 +279,9 @@ class SPUDocumentTransformer:
267 279 vendor_text,
268 280 shop_language=primary_lang,
269 281 source_lang=primary_lang,
270   - prompt=prompt
  282 + prompt=prompt,
  283 + translate_to_en=translate_to_en,
  284 + translate_to_zh=translate_to_zh,
271 285 )
272 286 doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None)
273 287 doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None)
... ...
indexer/indexing_utils.py
... ... @@ -88,7 +88,11 @@ def create_document_transformer(
88 88 if searchable_option_dimensions is None:
89 89 searchable_option_dimensions = config.spu_config.searchable_option_dimensions
90 90  
91   - if translator is None and config.query_config.enable_translation:
  91 + # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器
  92 + translate_to_en = bool(tenant_config.get("translate_to_en"))
  93 + translate_to_zh = bool(tenant_config.get("translate_to_zh"))
  94 +
  95 + if translator is None and (translate_to_en or translate_to_zh):
92 96 from query.translator import Translator
93 97 translator = Translator(
94 98 api_key=config.query_config.translation_api_key,
... ...
indexer/test_indexing.py
... ... @@ -265,14 +265,12 @@ def test_document_transformer():
265 265 tenant_config_loader = get_tenant_config_loader()
266 266 tenant_config = tenant_config_loader.get_tenant_config('162')
267 267  
268   - # 初始化翻译器(如果启用)
269   - translator = None
270   - if config.query_config.enable_translation:
271   - from query.translator import Translator
272   - translator = Translator(
273   - api_key=config.query_config.translation_api_key,
274   - use_cache=True
275   - )
  268 + # 初始化翻译器(测试环境总是启用,具体翻译方向由tenant_config控制)
  269 + from query.translator import Translator
  270 + translator = Translator(
  271 + api_key=config.query_config.translation_api_key,
  272 + use_cache=True
  273 + )
276 274  
277 275 # 创建转换器
278 276 transformer = SPUDocumentTransformer(
... ...
query/query_parser.py
... ... @@ -115,7 +115,7 @@ class QueryParser:
115 115 @property
116 116 def translator(self) -> Translator:
117 117 """Lazy load translator."""
118   - if self._translator is None and self.config.query_config.enable_translation:
  118 + if self._translator is None:
119 119 logger.info("Initializing translator (lazy load)...")
120 120 self._translator = Translator(
121 121 api_key=self.config.query_config.translation_api_key,
... ... @@ -153,7 +153,13 @@ class QueryParser:
153 153 is_long = token_count >= 4
154 154 return is_short, is_long
155 155  
156   - def parse(self, query: str, generate_vector: bool = True, context: Optional[Any] = None) -> ParsedQuery:
  156 + def parse(
  157 + self,
  158 + query: str,
  159 + tenant_id: Optional[str] = None,
  160 + generate_vector: bool = True,
  161 + context: Optional[Any] = None
  162 + ) -> ParsedQuery:
157 163 """
158 164 Parse query through all processing stages.
159 165  
... ... @@ -221,12 +227,23 @@ class QueryParser:
221 227 # Stage 4: Translation (with async support and conditional waiting)
222 228 translations = {}
223 229 translation_futures = {}
224   - if self.config.query_config.enable_translation:
225   - try:
226   - # Determine target languages for translation
227   - # Simplified: always translate to Chinese and English
228   - target_langs_for_translation = ['zh', 'en']
229   -
  230 + try:
  231 + # 根据租户配置决定翻译目标语言
  232 + from config.tenant_config_loader import get_tenant_config_loader
  233 + tenant_loader = get_tenant_config_loader()
  234 + tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default")
  235 +
  236 + translate_to_zh = bool(tenant_cfg.get("translate_to_zh"))
  237 + translate_to_en = bool(tenant_cfg.get("translate_to_en"))
  238 +
  239 + target_langs_for_translation = []
  240 + if translate_to_zh:
  241 + target_langs_for_translation.append('zh')
  242 + if translate_to_en:
  243 + target_langs_for_translation.append('en')
  244 +
  245 + # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段
  246 + if target_langs_for_translation:
230 247 target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang]
231 248  
232 249 if target_langs:
... ... @@ -279,11 +296,11 @@ class QueryParser:
279 296 if translation:
280 297 context.store_intermediate_result(f'translation_{lang}', translation)
281 298  
282   - except Exception as e:
283   - error_msg = f"翻译失败 | 错误: {str(e)}"
284   - log_info(error_msg)
285   - if context:
286   - context.add_warning(error_msg)
  299 + except Exception as e:
  300 + error_msg = f"翻译失败 | 错误: {str(e)}"
  301 + log_info(error_msg)
  302 + if context:
  303 + context.add_warning(error_msg)
287 304  
288 305 # Stage 5: Query analysis (keywords, token count, query type)
289 306 keywords = self._extract_keywords(query_text)
... ...
query/translator.py
... ... @@ -584,17 +584,17 @@ class Translator:
584 584 shop_language: str,
585 585 source_lang: Optional[str] = None,
586 586 context: Optional[str] = None,
587   - prompt: Optional[str] = None
  587 + prompt: Optional[str] = None,
  588 + translate_to_en: bool = True,
  589 + translate_to_zh: bool = True,
588 590 ) -> Dict[str, Optional[str]]:
589 591 """
590   - Translate text for indexing based on shop language configuration.
591   -
592   - This method automatically handles multi-language translation:
593   - - If shop language is not 'zh', translate to Chinese (zh)
594   - - If shop language is not 'en', translate to English (en)
  592 + Translate text for indexing based on shop language and tenant configuration.
595 593  
596   - All translation logic is internal - callers don't need to worry about
597   - which languages to translate to.
  594 + Translation behavior:
  595 + - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh)
  596 + - If translate_to_en=True and shop language is not 'en', translate to English (en)
  597 + - If both flags are False, no translation is performed (returns None for both)
598 598  
599 599 Args:
600 600 text: Text to translate
... ... @@ -602,10 +602,12 @@ class Translator:
602 602 source_lang: Source language code (optional, auto-detect if None)
603 603 context: Additional context for translation (optional)
604 604 prompt: Translation prompt/instruction (optional)
  605 + translate_to_en: Whether to translate to English (from tenant_config)
  606 + translate_to_zh: Whether to translate to Chinese (from tenant_config)
605 607  
606 608 Returns:
607   - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed)
608   - Example: {'zh': '中文翻译', 'en': 'English translation'}
  609 + Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled)
  610 + Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None}
609 611 """
610 612 if not text or not text.strip():
611 613 return {'zh': None, 'en': None}
... ... @@ -618,11 +620,11 @@ class Translator:
618 620 results = {'zh': None, 'en': None}
619 621 shop_lang_lower = shop_language.lower() if shop_language else ""
620 622  
621   - # Determine which languages need translation
  623 + # Determine which languages need translation based on tenant configuration
622 624 targets = []
623   - if "zh" not in shop_lang_lower:
  625 + if translate_to_zh and "zh" not in shop_lang_lower:
624 626 targets.append("zh")
625   - if "en" not in shop_lang_lower:
  627 + if translate_to_en and "en" not in shop_lang_lower:
626 628 targets.append("en")
627 629  
628 630 # If shop language is already zh and en, no translation needed
... ...
search/searcher.py
... ... @@ -15,6 +15,7 @@ from .boolean_parser import BooleanParser, QueryNode
15 15 from .es_query_builder import ESQueryBuilder
16 16 from .rerank_engine import RerankEngine
17 17 from config import SearchConfig
  18 +from config.tenant_config_loader import get_tenant_config_loader
18 19 from config.utils import get_match_fields_for_index
19 20 from context.request_context import RequestContext, RequestContextStage, create_request_context
20 21 from api.models import FacetResult, FacetValue, FacetConfig
... ... @@ -158,8 +159,10 @@ class Searcher:
158 159 if context is None:
159 160 context = create_request_context()
160 161  
161   - # Always use config defaults (these are backend configuration, not user parameters)
162   - enable_translation = self.config.query_config.enable_translation
  162 + # 根据租户配置决定翻译开关(离线/在线统一)
  163 + tenant_loader = get_tenant_config_loader()
  164 + tenant_cfg = tenant_loader.get_tenant_config(tenant_id)
  165 + enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh"))
163 166 enable_embedding = self.config.query_config.enable_text_embedding
164 167 enable_rerank = False # Temporarily disabled
165 168  
... ... @@ -199,6 +202,7 @@ class Searcher:
199 202 try:
200 203 parsed_query = self.query_parser.parse(
201 204 query,
  205 + tenant_id=tenant_id,
202 206 generate_vector=enable_embedding,
203 207 context=context
204 208 )
... ...
tests/conftest.py
... ... @@ -38,7 +38,6 @@ def sample_search_config(sample_index_config) -> SearchConfig:
38 38 """样例搜索配置"""
39 39 query_config = QueryConfig(
40 40 enable_query_rewrite=True,
41   - enable_translation=True,
42 41 enable_text_embedding=True,
43 42 supported_languages=["zh", "en"]
44 43 )
... ... @@ -174,7 +173,6 @@ def temp_config_file() -> Generator[str, None, None]:
174 173 "query_config": {
175 174 "supported_languages": ["zh", "en"],
176 175 "default_language": "zh",
177   - "enable_translation": True,
178 176 "enable_text_embedding": True,
179 177 "enable_query_rewrite": True
180 178 },
... ...