From 345d960b849cffe9486a6be7121b5adf87d4528c Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 18 Dec 2025 19:18:28 +0800 Subject: [PATCH] 1. 删除全局 enable_translation 配置 config/config_loader.py: 从 QueryConfig 类中删除 enable_translation 字段 config/config.yaml: 删除 enable_translation: true 配置项 config/config_loader.py: 从 to_dict() 方法中删除相关输出 2. 索引阶段(离线)- 使用租户配置 indexer/indexing_utils.py: 根据 tenant_config.translate_to_en 和 translate_to_zh 决定是否初始化 translator 只有任一方向开启时才创建 translator indexer/document_transformer.py: _fill_text_fields 从 tenant_config 读取 translate_to_en 和 translate_to_zh 调用 translate_for_indexing 时传递这两个参数 更新了文档注释 3. 查询阶段(在线)- 使用租户配置 query/query_parser.py: parse() 方法新增 tenant_id 参数 根据租户配置决定翻译目标语言(translate_to_zh / translate_to_en) 如果两个都是 false,跳过翻译阶段 translator 属性不再依赖 enable_translation,总是可以初始化 search/searcher.py: search() 方法中根据租户配置计算 enable_translation(用于日志和 metadata) 调用 query_parser.parse() 时传递 tenant_id 4. 翻译器方法更新 query/translator.py: translate_for_indexing() 新增 translate_to_en 和 translate_to_zh 参数(默认 True 保持向后兼容) 根据这两个参数决定翻译目标 更新了文档注释 --- config/config.yaml | 3 +-- config/config_loader.py | 3 --- context/request_context.py | 2 +- example_usage.py | 7 ++++--- indexer/document_transformer.py | 30 ++++++++++++++++++++++-------- indexer/indexing_utils.py | 6 +++++- indexer/test_indexing.py | 14 ++++++-------- query/query_parser.py | 43 ++++++++++++++++++++++++++++++------------- query/translator.py | 28 +++++++++++++++------------- search/searcher.py | 8 ++++++-- tests/conftest.py | 2 -- 11 files changed, 90 insertions(+), 56 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 206ab3b..faaa95f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -85,8 +85,7 @@ query_config: - "en" default_language: "zh" - # 功能开关 - enable_translation: true + # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索) diff --git a/config/config_loader.py b/config/config_loader.py index f24f3e6..6de1b05 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -32,7 +32,6 @@ class QueryConfig: default_language: str = "zh" # Feature flags - enable_translation: bool = True enable_text_embedding: bool = True enable_query_rewrite: bool = True enable_multilang_search: bool = True # Enable multi-language search using translations @@ -230,7 +229,6 @@ class ConfigLoader: query_config = QueryConfig( supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], default_language=query_config_data.get("default_language") or "zh", - enable_translation=query_config_data.get("enable_translation", True), enable_text_embedding=query_config_data.get("enable_text_embedding", True), enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), rewrite_dictionary=rewrite_dictionary, @@ -369,7 +367,6 @@ class ConfigLoader: query_config_dict = { "supported_languages": config.query_config.supported_languages, "default_language": config.query_config.default_language, - "enable_translation": config.query_config.enable_translation, "enable_text_embedding": config.query_config.enable_text_embedding, "enable_query_rewrite": config.query_config.enable_query_rewrite, "translation_service": config.query_config.translation_service, diff --git a/context/request_context.py b/context/request_context.py index b37e82b..71dc60e 100644 --- a/context/request_context.py +++ b/context/request_context.py @@ -83,7 +83,7 @@ class RequestContext: # 元数据 self.metadata = { 'search_params': {}, # size, from_, filters等 - 'feature_flags': {}, # enable_translation, enable_embedding等 + 'feature_flags': {}, # translation_enabled (由tenant_config控制), enable_embedding等 'config_info': {}, # 索引配置、字段映射等 'error_info': None, 'warnings': [] diff --git a/example_usage.py b/example_usage.py index 1e48da2..a053ecf 100644 --- a/example_usage.py +++ b/example_usage.py @@ -89,10 +89,11 @@ def example_with_searcher(): 'from': 0 } + # 示例中写死 feature flags,实际环境由 tenant_config / 后端配置控制 context.metadata['feature_flags'] = { - 'enable_translation': True, - 'enable_embedding': True, - 'enable_rerank': True + 'translation_enabled': True, + 'embedding_enabled': True, + 'rerank_enabled': True } # 模拟搜索流程 diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py index 168b40e..1d5a34f 100644 --- a/indexer/document_transformer.py +++ b/indexer/document_transformer.py @@ -171,12 +171,18 @@ class SPUDocumentTransformer: primary_lang: str ): """ - 填充文本字段(根据主语言自动处理多语言翻译)。 + 填充文本字段(根据租户配置处理多语言翻译)。 - 翻译逻辑在translator内部处理: - - 如果店铺语言不等于zh,自动翻译成zh - - 如果店铺语言不等于en,自动翻译成en + 翻译逻辑: + - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向 + - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文 + - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文 + - 如果两个都是false,则不进行翻译,只填充主语言字段 """ + # 从租户配置中读取翻译方向 + translate_to_en = bool(self.tenant_config.get('translate_to_en')) + translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) + # Title if pd.notna(spu_row.get('title')): title_text = str(spu_row['title']) @@ -192,7 +198,9 @@ class SPUDocumentTransformer: title_text, shop_language=primary_lang, source_lang=primary_lang, - prompt=prompt_zh if primary_lang == 'zh' else prompt_en + prompt=prompt_zh if primary_lang == 'zh' else prompt_en, + translate_to_en=translate_to_en, + translate_to_zh=translate_to_zh, ) # 填充翻译结果 @@ -219,7 +227,9 @@ class SPUDocumentTransformer: brief_text, shop_language=primary_lang, source_lang=primary_lang, - prompt=prompt + prompt=prompt, + translate_to_en=translate_to_en, + translate_to_zh=translate_to_zh, ) doc['brief_zh'] = translations.get('zh') or (brief_text if primary_lang == 'zh' else None) doc['brief_en'] = translations.get('en') or (brief_text if primary_lang == 'en' else None) @@ -243,7 +253,9 @@ class SPUDocumentTransformer: desc_text, shop_language=primary_lang, source_lang=primary_lang, - prompt=prompt + prompt=prompt, + translate_to_en=translate_to_en, + translate_to_zh=translate_to_zh, ) doc['description_zh'] = translations.get('zh') or (desc_text if primary_lang == 'zh' else None) doc['description_en'] = translations.get('en') or (desc_text if primary_lang == 'en' else None) @@ -267,7 +279,9 @@ class SPUDocumentTransformer: vendor_text, shop_language=primary_lang, source_lang=primary_lang, - prompt=prompt + prompt=prompt, + translate_to_en=translate_to_en, + translate_to_zh=translate_to_zh, ) doc['vendor_zh'] = translations.get('zh') or (vendor_text if primary_lang == 'zh' else None) doc['vendor_en'] = translations.get('en') or (vendor_text if primary_lang == 'en' else None) diff --git a/indexer/indexing_utils.py b/indexer/indexing_utils.py index 62fce55..d82634f 100644 --- a/indexer/indexing_utils.py +++ b/indexer/indexing_utils.py @@ -88,7 +88,11 @@ def create_document_transformer( if searchable_option_dimensions is None: searchable_option_dimensions = config.spu_config.searchable_option_dimensions - if translator is None and config.query_config.enable_translation: + # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器 + translate_to_en = bool(tenant_config.get("translate_to_en")) + translate_to_zh = bool(tenant_config.get("translate_to_zh")) + + if translator is None and (translate_to_en or translate_to_zh): from query.translator import Translator translator = Translator( api_key=config.query_config.translation_api_key, diff --git a/indexer/test_indexing.py b/indexer/test_indexing.py index 13b892c..c018054 100755 --- a/indexer/test_indexing.py +++ b/indexer/test_indexing.py @@ -265,14 +265,12 @@ def test_document_transformer(): tenant_config_loader = get_tenant_config_loader() tenant_config = tenant_config_loader.get_tenant_config('162') - # 初始化翻译器(如果启用) - translator = None - if config.query_config.enable_translation: - from query.translator import Translator - translator = Translator( - api_key=config.query_config.translation_api_key, - use_cache=True - ) + # 初始化翻译器(测试环境总是启用,具体翻译方向由tenant_config控制) + from query.translator import Translator + translator = Translator( + api_key=config.query_config.translation_api_key, + use_cache=True + ) # 创建转换器 transformer = SPUDocumentTransformer( diff --git a/query/query_parser.py b/query/query_parser.py index 010b458..d0c6112 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -115,7 +115,7 @@ class QueryParser: @property def translator(self) -> Translator: """Lazy load translator.""" - if self._translator is None and self.config.query_config.enable_translation: + if self._translator is None: logger.info("Initializing translator (lazy load)...") self._translator = Translator( api_key=self.config.query_config.translation_api_key, @@ -153,7 +153,13 @@ class QueryParser: is_long = token_count >= 4 return is_short, is_long - def parse(self, query: str, generate_vector: bool = True, context: Optional[Any] = None) -> ParsedQuery: + def parse( + self, + query: str, + tenant_id: Optional[str] = None, + generate_vector: bool = True, + context: Optional[Any] = None + ) -> ParsedQuery: """ Parse query through all processing stages. @@ -221,12 +227,23 @@ class QueryParser: # Stage 4: Translation (with async support and conditional waiting) translations = {} translation_futures = {} - if self.config.query_config.enable_translation: - try: - # Determine target languages for translation - # Simplified: always translate to Chinese and English - target_langs_for_translation = ['zh', 'en'] - + try: + # 根据租户配置决定翻译目标语言 + from config.tenant_config_loader import get_tenant_config_loader + tenant_loader = get_tenant_config_loader() + tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default") + + translate_to_zh = bool(tenant_cfg.get("translate_to_zh")) + translate_to_en = bool(tenant_cfg.get("translate_to_en")) + + target_langs_for_translation = [] + if translate_to_zh: + target_langs_for_translation.append('zh') + if translate_to_en: + target_langs_for_translation.append('en') + + # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段 + if target_langs_for_translation: target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang] if target_langs: @@ -279,11 +296,11 @@ class QueryParser: if translation: context.store_intermediate_result(f'translation_{lang}', translation) - except Exception as e: - error_msg = f"翻译失败 | 错误: {str(e)}" - log_info(error_msg) - if context: - context.add_warning(error_msg) + except Exception as e: + error_msg = f"翻译失败 | 错误: {str(e)}" + log_info(error_msg) + if context: + context.add_warning(error_msg) # Stage 5: Query analysis (keywords, token count, query type) keywords = self._extract_keywords(query_text) diff --git a/query/translator.py b/query/translator.py index 0e77f77..16f3c99 100644 --- a/query/translator.py +++ b/query/translator.py @@ -584,17 +584,17 @@ class Translator: shop_language: str, source_lang: Optional[str] = None, context: Optional[str] = None, - prompt: Optional[str] = None + prompt: Optional[str] = None, + translate_to_en: bool = True, + translate_to_zh: bool = True, ) -> Dict[str, Optional[str]]: """ - Translate text for indexing based on shop language configuration. - - This method automatically handles multi-language translation: - - If shop language is not 'zh', translate to Chinese (zh) - - If shop language is not 'en', translate to English (en) + Translate text for indexing based on shop language and tenant configuration. - All translation logic is internal - callers don't need to worry about - which languages to translate to. + Translation behavior: + - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh) + - If translate_to_en=True and shop language is not 'en', translate to English (en) + - If both flags are False, no translation is performed (returns None for both) Args: text: Text to translate @@ -602,10 +602,12 @@ class Translator: source_lang: Source language code (optional, auto-detect if None) context: Additional context for translation (optional) prompt: Translation prompt/instruction (optional) + translate_to_en: Whether to translate to English (from tenant_config) + translate_to_zh: Whether to translate to Chinese (from tenant_config) Returns: - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed) - Example: {'zh': '中文翻译', 'en': 'English translation'} + Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled) + Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None} """ if not text or not text.strip(): return {'zh': None, 'en': None} @@ -618,11 +620,11 @@ class Translator: results = {'zh': None, 'en': None} shop_lang_lower = shop_language.lower() if shop_language else "" - # Determine which languages need translation + # Determine which languages need translation based on tenant configuration targets = [] - if "zh" not in shop_lang_lower: + if translate_to_zh and "zh" not in shop_lang_lower: targets.append("zh") - if "en" not in shop_lang_lower: + if translate_to_en and "en" not in shop_lang_lower: targets.append("en") # If shop language is already zh and en, no translation needed diff --git a/search/searcher.py b/search/searcher.py index cd262f8..ae8e83e 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -15,6 +15,7 @@ from .boolean_parser import BooleanParser, QueryNode from .es_query_builder import ESQueryBuilder from .rerank_engine import RerankEngine from config import SearchConfig +from config.tenant_config_loader import get_tenant_config_loader from config.utils import get_match_fields_for_index from context.request_context import RequestContext, RequestContextStage, create_request_context from api.models import FacetResult, FacetValue, FacetConfig @@ -158,8 +159,10 @@ class Searcher: if context is None: context = create_request_context() - # Always use config defaults (these are backend configuration, not user parameters) - enable_translation = self.config.query_config.enable_translation + # 根据租户配置决定翻译开关(离线/在线统一) + tenant_loader = get_tenant_config_loader() + tenant_cfg = tenant_loader.get_tenant_config(tenant_id) + enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh")) enable_embedding = self.config.query_config.enable_text_embedding enable_rerank = False # Temporarily disabled @@ -199,6 +202,7 @@ class Searcher: try: parsed_query = self.query_parser.parse( query, + tenant_id=tenant_id, generate_vector=enable_embedding, context=context ) diff --git a/tests/conftest.py b/tests/conftest.py index d95206f..573d53e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,7 +38,6 @@ def sample_search_config(sample_index_config) -> SearchConfig: """样例搜索配置""" query_config = QueryConfig( enable_query_rewrite=True, - enable_translation=True, enable_text_embedding=True, supported_languages=["zh", "en"] ) @@ -174,7 +173,6 @@ def temp_config_file() -> Generator[str, None, None]: "query_config": { "supported_languages": ["zh", "en"], "default_language": "zh", - "enable_translation": True, "enable_text_embedding": True, "enable_query_rewrite": True }, -- libgit2 0.21.2