Commit 345d960b849cffe9486a6be7121b5adf87d4528c
1 parent
cff5e86f
1. 删除全局 enable_translation 配置
config/config_loader.py: 从 QueryConfig 类中删除 enable_translation 字段 config/config.yaml: 删除 enable_translation: true 配置项 config/config_loader.py: 从 to_dict() 方法中删除相关输出 2. 索引阶段(离线)- 使用租户配置 indexer/indexing_utils.py: 根据 tenant_config.translate_to_en 和 translate_to_zh 决定是否初始化 translator 只有任一方向开启时才创建 translator indexer/document_transformer.py: _fill_text_fields 从 tenant_config 读取 translate_to_en 和 translate_to_zh 调用 translate_for_indexing 时传递这两个参数 更新了文档注释 3. 查询阶段(在线)- 使用租户配置 query/query_parser.py: parse() 方法新增 tenant_id 参数 根据租户配置决定翻译目标语言(translate_to_zh / translate_to_en) 如果两个都是 false,跳过翻译阶段 translator 属性不再依赖 enable_translation,总是可以初始化 search/searcher.py: search() 方法中根据租户配置计算 enable_translation(用于日志和 metadata) 调用 query_parser.parse() 时传递 tenant_id 4. 翻译器方法更新 query/translator.py: translate_for_indexing() 新增 translate_to_en 和 translate_to_zh 参数(默认 True 保持向后兼容) 根据这两个参数决定翻译目标 更新了文档注释
Showing
11 changed files
with
90 additions
and
56 deletions
Show diff stats
config/config.yaml
config/config_loader.py
| ... | ... | @@ -32,7 +32,6 @@ class QueryConfig: |
| 32 | 32 | default_language: str = "zh" |
| 33 | 33 | |
| 34 | 34 | # Feature flags |
| 35 | - enable_translation: bool = True | |
| 36 | 35 | enable_text_embedding: bool = True |
| 37 | 36 | enable_query_rewrite: bool = True |
| 38 | 37 | enable_multilang_search: bool = True # Enable multi-language search using translations |
| ... | ... | @@ -230,7 +229,6 @@ class ConfigLoader: |
| 230 | 229 | query_config = QueryConfig( |
| 231 | 230 | supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], |
| 232 | 231 | default_language=query_config_data.get("default_language") or "zh", |
| 233 | - enable_translation=query_config_data.get("enable_translation", True), | |
| 234 | 232 | enable_text_embedding=query_config_data.get("enable_text_embedding", True), |
| 235 | 233 | enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), |
| 236 | 234 | rewrite_dictionary=rewrite_dictionary, |
| ... | ... | @@ -369,7 +367,6 @@ class ConfigLoader: |
| 369 | 367 | query_config_dict = { |
| 370 | 368 | "supported_languages": config.query_config.supported_languages, |
| 371 | 369 | "default_language": config.query_config.default_language, |
| 372 | - "enable_translation": config.query_config.enable_translation, | |
| 373 | 370 | "enable_text_embedding": config.query_config.enable_text_embedding, |
| 374 | 371 | "enable_query_rewrite": config.query_config.enable_query_rewrite, |
| 375 | 372 | "translation_service": config.query_config.translation_service, | ... | ... |
context/request_context.py
| ... | ... | @@ -83,7 +83,7 @@ class RequestContext: |
| 83 | 83 | # 元数据 |
| 84 | 84 | self.metadata = { |
| 85 | 85 | 'search_params': {}, # size, from_, filters等 |
| 86 | - 'feature_flags': {}, # enable_translation, enable_embedding等 | |
| 86 | + 'feature_flags': {}, # translation_enabled (由tenant_config控制), enable_embedding等 | |
| 87 | 87 | 'config_info': {}, # 索引配置、字段映射等 |
| 88 | 88 | 'error_info': None, |
| 89 | 89 | 'warnings': [] | ... | ... |
example_usage.py
| ... | ... | @@ -89,10 +89,11 @@ def example_with_searcher(): |
| 89 | 89 | 'from': 0 |
| 90 | 90 | } |
| 91 | 91 | |
| 92 | + # 示例中写死 feature flags,实际环境由 tenant_config / 后端配置控制 | |
| 92 | 93 | context.metadata['feature_flags'] = { |
| 93 | - 'enable_translation': True, | |
| 94 | - 'enable_embedding': True, | |
| 95 | - 'enable_rerank': True | |
| 94 | + 'translation_enabled': True, | |
| 95 | + 'embedding_enabled': True, | |
| 96 | + 'rerank_enabled': True | |
| 96 | 97 | } |
| 97 | 98 | |
| 98 | 99 | # 模拟搜索流程 | ... | ... |
indexer/document_transformer.py
| ... | ... | @@ -171,12 +171,18 @@ class SPUDocumentTransformer: |
| 171 | 171 | primary_lang: str |
| 172 | 172 | ): |
| 173 | 173 | """ |
| 174 | - 填充文本字段(根据主语言自动处理多语言翻译)。 | |
| 174 | + 填充文本字段(根据租户配置处理多语言翻译)。 | |
| 175 | 175 | |
| 176 | - 翻译逻辑在translator内部处理: | |
| 177 | - - 如果店铺语言不等于zh,自动翻译成zh | |
| 178 | - - 如果店铺语言不等于en,自动翻译成en | |
| 176 | + 翻译逻辑: | |
| 177 | + - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向 | |
| 178 | + - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文 | |
| 179 | + - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文 | |
| 180 | + - 如果两个都是false,则不进行翻译,只填充主语言字段 | |
| 179 | 181 | """ |
| 182 | + # 从租户配置中读取翻译方向 | |
| 183 | + translate_to_en = bool(self.tenant_config.get('translate_to_en')) | |
| 184 | + translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) | |
| 185 | + | |
| 180 | 186 | # Title |
| 181 | 187 | if pd.notna(spu_row.get('title')): |
| 182 | 188 | title_text = str(spu_row['title']) |
| ... | ... | @@ -192,7 +198,9 @@ class SPUDocumentTransformer: |
| 192 | 198 | title_text, |
| 193 | 199 | shop_language=primary_lang, |
| 194 | 200 | source_lang=primary_lang, |
| 195 | - prompt=prompt_zh if primary_lang == 'zh' else prompt_en | |
| 201 | + prompt=prompt_zh if primary_lang == 'zh' else prompt_en, | |
| 202 | + translate_to_en=translate_to_en, | |
| 203 | + translate_to_zh=translate_to_zh, | |
| 196 | 204 | ) |
| 197 | 205 | |
| 198 | 206 | # 填充翻译结果 |
| ... | ... | @@ -219,7 +227,9 @@ class SPUDocumentTransformer: |
| 219 | 227 | brief_text, |
| 220 | 228 | shop_language=primary_lang, |
| 221 | 229 | source_lang=primary_lang, |
| 222 | - prompt=prompt | |
| 230 | + prompt=prompt, | |
| 231 | + translate_to_en=translate_to_en, | |
| 232 | + translate_to_zh=translate_to_zh, | |
| 223 | 233 | ) |
| 224 | 234 | doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None) |
| 225 | 235 | doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None) |
| ... | ... | @@ -243,7 +253,9 @@ class SPUDocumentTransformer: |
| 243 | 253 | desc_text, |
| 244 | 254 | shop_language=primary_lang, |
| 245 | 255 | source_lang=primary_lang, |
| 246 | - prompt=prompt | |
| 256 | + prompt=prompt, | |
| 257 | + translate_to_en=translate_to_en, | |
| 258 | + translate_to_zh=translate_to_zh, | |
| 247 | 259 | ) |
| 248 | 260 | doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None) |
| 249 | 261 | doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None) |
| ... | ... | @@ -267,7 +279,9 @@ class SPUDocumentTransformer: |
| 267 | 279 | vendor_text, |
| 268 | 280 | shop_language=primary_lang, |
| 269 | 281 | source_lang=primary_lang, |
| 270 | - prompt=prompt | |
| 282 | + prompt=prompt, | |
| 283 | + translate_to_en=translate_to_en, | |
| 284 | + translate_to_zh=translate_to_zh, | |
| 271 | 285 | ) |
| 272 | 286 | doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None) |
| 273 | 287 | doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None) | ... | ... |
indexer/indexing_utils.py
| ... | ... | @@ -88,7 +88,11 @@ def create_document_transformer( |
| 88 | 88 | if searchable_option_dimensions is None: |
| 89 | 89 | searchable_option_dimensions = config.spu_config.searchable_option_dimensions |
| 90 | 90 | |
| 91 | - if translator is None and config.query_config.enable_translation: | |
| 91 | + # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器 | |
| 92 | + translate_to_en = bool(tenant_config.get("translate_to_en")) | |
| 93 | + translate_to_zh = bool(tenant_config.get("translate_to_zh")) | |
| 94 | + | |
| 95 | + if translator is None and (translate_to_en or translate_to_zh): | |
| 92 | 96 | from query.translator import Translator |
| 93 | 97 | translator = Translator( |
| 94 | 98 | api_key=config.query_config.translation_api_key, | ... | ... |
indexer/test_indexing.py
| ... | ... | @@ -265,14 +265,12 @@ def test_document_transformer(): |
| 265 | 265 | tenant_config_loader = get_tenant_config_loader() |
| 266 | 266 | tenant_config = tenant_config_loader.get_tenant_config('162') |
| 267 | 267 | |
| 268 | - # 初始化翻译器(如果启用) | |
| 269 | - translator = None | |
| 270 | - if config.query_config.enable_translation: | |
| 271 | - from query.translator import Translator | |
| 272 | - translator = Translator( | |
| 273 | - api_key=config.query_config.translation_api_key, | |
| 274 | - use_cache=True | |
| 275 | - ) | |
| 268 | + # 初始化翻译器(测试环境总是启用,具体翻译方向由tenant_config控制) | |
| 269 | + from query.translator import Translator | |
| 270 | + translator = Translator( | |
| 271 | + api_key=config.query_config.translation_api_key, | |
| 272 | + use_cache=True | |
| 273 | + ) | |
| 276 | 274 | |
| 277 | 275 | # 创建转换器 |
| 278 | 276 | transformer = SPUDocumentTransformer( | ... | ... |
query/query_parser.py
| ... | ... | @@ -115,7 +115,7 @@ class QueryParser: |
| 115 | 115 | @property |
| 116 | 116 | def translator(self) -> Translator: |
| 117 | 117 | """Lazy load translator.""" |
| 118 | - if self._translator is None and self.config.query_config.enable_translation: | |
| 118 | + if self._translator is None: | |
| 119 | 119 | logger.info("Initializing translator (lazy load)...") |
| 120 | 120 | self._translator = Translator( |
| 121 | 121 | api_key=self.config.query_config.translation_api_key, |
| ... | ... | @@ -153,7 +153,13 @@ class QueryParser: |
| 153 | 153 | is_long = token_count >= 4 |
| 154 | 154 | return is_short, is_long |
| 155 | 155 | |
| 156 | - def parse(self, query: str, generate_vector: bool = True, context: Optional[Any] = None) -> ParsedQuery: | |
| 156 | + def parse( | |
| 157 | + self, | |
| 158 | + query: str, | |
| 159 | + tenant_id: Optional[str] = None, | |
| 160 | + generate_vector: bool = True, | |
| 161 | + context: Optional[Any] = None | |
| 162 | + ) -> ParsedQuery: | |
| 157 | 163 | """ |
| 158 | 164 | Parse query through all processing stages. |
| 159 | 165 | |
| ... | ... | @@ -221,12 +227,23 @@ class QueryParser: |
| 221 | 227 | # Stage 4: Translation (with async support and conditional waiting) |
| 222 | 228 | translations = {} |
| 223 | 229 | translation_futures = {} |
| 224 | - if self.config.query_config.enable_translation: | |
| 225 | - try: | |
| 226 | - # Determine target languages for translation | |
| 227 | - # Simplified: always translate to Chinese and English | |
| 228 | - target_langs_for_translation = ['zh', 'en'] | |
| 229 | - | |
| 230 | + try: | |
| 231 | + # 根据租户配置决定翻译目标语言 | |
| 232 | + from config.tenant_config_loader import get_tenant_config_loader | |
| 233 | + tenant_loader = get_tenant_config_loader() | |
| 234 | + tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default") | |
| 235 | + | |
| 236 | + translate_to_zh = bool(tenant_cfg.get("translate_to_zh")) | |
| 237 | + translate_to_en = bool(tenant_cfg.get("translate_to_en")) | |
| 238 | + | |
| 239 | + target_langs_for_translation = [] | |
| 240 | + if translate_to_zh: | |
| 241 | + target_langs_for_translation.append('zh') | |
| 242 | + if translate_to_en: | |
| 243 | + target_langs_for_translation.append('en') | |
| 244 | + | |
| 245 | + # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段 | |
| 246 | + if target_langs_for_translation: | |
| 230 | 247 | target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang] |
| 231 | 248 | |
| 232 | 249 | if target_langs: |
| ... | ... | @@ -279,11 +296,11 @@ class QueryParser: |
| 279 | 296 | if translation: |
| 280 | 297 | context.store_intermediate_result(f'translation_{lang}', translation) |
| 281 | 298 | |
| 282 | - except Exception as e: | |
| 283 | - error_msg = f"翻译失败 | 错误: {str(e)}" | |
| 284 | - log_info(error_msg) | |
| 285 | - if context: | |
| 286 | - context.add_warning(error_msg) | |
| 299 | + except Exception as e: | |
| 300 | + error_msg = f"翻译失败 | 错误: {str(e)}" | |
| 301 | + log_info(error_msg) | |
| 302 | + if context: | |
| 303 | + context.add_warning(error_msg) | |
| 287 | 304 | |
| 288 | 305 | # Stage 5: Query analysis (keywords, token count, query type) |
| 289 | 306 | keywords = self._extract_keywords(query_text) | ... | ... |
query/translator.py
| ... | ... | @@ -584,17 +584,17 @@ class Translator: |
| 584 | 584 | shop_language: str, |
| 585 | 585 | source_lang: Optional[str] = None, |
| 586 | 586 | context: Optional[str] = None, |
| 587 | - prompt: Optional[str] = None | |
| 587 | + prompt: Optional[str] = None, | |
| 588 | + translate_to_en: bool = True, | |
| 589 | + translate_to_zh: bool = True, | |
| 588 | 590 | ) -> Dict[str, Optional[str]]: |
| 589 | 591 | """ |
| 590 | - Translate text for indexing based on shop language configuration. | |
| 591 | - | |
| 592 | - This method automatically handles multi-language translation: | |
| 593 | - - If shop language is not 'zh', translate to Chinese (zh) | |
| 594 | - - If shop language is not 'en', translate to English (en) | |
| 592 | + Translate text for indexing based on shop language and tenant configuration. | |
| 595 | 593 | |
| 596 | - All translation logic is internal - callers don't need to worry about | |
| 597 | - which languages to translate to. | |
| 594 | + Translation behavior: | |
| 595 | + - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh) | |
| 596 | + - If translate_to_en=True and shop language is not 'en', translate to English (en) | |
| 597 | + - If both flags are False, no translation is performed (returns None for both) | |
| 598 | 598 | |
| 599 | 599 | Args: |
| 600 | 600 | text: Text to translate |
| ... | ... | @@ -602,10 +602,12 @@ class Translator: |
| 602 | 602 | source_lang: Source language code (optional, auto-detect if None) |
| 603 | 603 | context: Additional context for translation (optional) |
| 604 | 604 | prompt: Translation prompt/instruction (optional) |
| 605 | + translate_to_en: Whether to translate to English (from tenant_config) | |
| 606 | + translate_to_zh: Whether to translate to Chinese (from tenant_config) | |
| 605 | 607 | |
| 606 | 608 | Returns: |
| 607 | - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed) | |
| 608 | - Example: {'zh': '中文翻译', 'en': 'English translation'} | |
| 609 | + Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled) | |
| 610 | + Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None} | |
| 609 | 611 | """ |
| 610 | 612 | if not text or not text.strip(): |
| 611 | 613 | return {'zh': None, 'en': None} |
| ... | ... | @@ -618,11 +620,11 @@ class Translator: |
| 618 | 620 | results = {'zh': None, 'en': None} |
| 619 | 621 | shop_lang_lower = shop_language.lower() if shop_language else "" |
| 620 | 622 | |
| 621 | - # Determine which languages need translation | |
| 623 | + # Determine which languages need translation based on tenant configuration | |
| 622 | 624 | targets = [] |
| 623 | - if "zh" not in shop_lang_lower: | |
| 625 | + if translate_to_zh and "zh" not in shop_lang_lower: | |
| 624 | 626 | targets.append("zh") |
| 625 | - if "en" not in shop_lang_lower: | |
| 627 | + if translate_to_en and "en" not in shop_lang_lower: | |
| 626 | 628 | targets.append("en") |
| 627 | 629 | |
| 628 | 630 | # If shop language is already zh and en, no translation needed | ... | ... |
search/searcher.py
| ... | ... | @@ -15,6 +15,7 @@ from .boolean_parser import BooleanParser, QueryNode |
| 15 | 15 | from .es_query_builder import ESQueryBuilder |
| 16 | 16 | from .rerank_engine import RerankEngine |
| 17 | 17 | from config import SearchConfig |
| 18 | +from config.tenant_config_loader import get_tenant_config_loader | |
| 18 | 19 | from config.utils import get_match_fields_for_index |
| 19 | 20 | from context.request_context import RequestContext, RequestContextStage, create_request_context |
| 20 | 21 | from api.models import FacetResult, FacetValue, FacetConfig |
| ... | ... | @@ -158,8 +159,10 @@ class Searcher: |
| 158 | 159 | if context is None: |
| 159 | 160 | context = create_request_context() |
| 160 | 161 | |
| 161 | - # Always use config defaults (these are backend configuration, not user parameters) | |
| 162 | - enable_translation = self.config.query_config.enable_translation | |
| 162 | + # 根据租户配置决定翻译开关(离线/在线统一) | |
| 163 | + tenant_loader = get_tenant_config_loader() | |
| 164 | + tenant_cfg = tenant_loader.get_tenant_config(tenant_id) | |
| 165 | + enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh")) | |
| 163 | 166 | enable_embedding = self.config.query_config.enable_text_embedding |
| 164 | 167 | enable_rerank = False # Temporarily disabled |
| 165 | 168 | |
| ... | ... | @@ -199,6 +202,7 @@ class Searcher: |
| 199 | 202 | try: |
| 200 | 203 | parsed_query = self.query_parser.parse( |
| 201 | 204 | query, |
| 205 | + tenant_id=tenant_id, | |
| 202 | 206 | generate_vector=enable_embedding, |
| 203 | 207 | context=context |
| 204 | 208 | ) | ... | ... |
tests/conftest.py
| ... | ... | @@ -38,7 +38,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: |
| 38 | 38 | """样例搜索配置""" |
| 39 | 39 | query_config = QueryConfig( |
| 40 | 40 | enable_query_rewrite=True, |
| 41 | - enable_translation=True, | |
| 42 | 41 | enable_text_embedding=True, |
| 43 | 42 | supported_languages=["zh", "en"] |
| 44 | 43 | ) |
| ... | ... | @@ -174,7 +173,6 @@ def temp_config_file() -> Generator[str, None, None]: |
| 174 | 173 | "query_config": { |
| 175 | 174 | "supported_languages": ["zh", "en"], |
| 176 | 175 | "default_language": "zh", |
| 177 | - "enable_translation": True, | |
| 178 | 176 | "enable_text_embedding": True, |
| 179 | 177 | "enable_query_rewrite": True |
| 180 | 178 | }, | ... | ... |