Commit 038e4e2facaffdb4f3d6e824379232f801bf250d
1 parent
3a5fda00
refactor(i18n): translate_to_en/zh 改为可配置 index_languages,默认 [en,zh]
- config: 新增 SUPPORTED_INDEX_LANGUAGES(38 种语言)、DEFAULT_INDEX_LANGUAGES、 normalize_index_languages、resolve_index_languages;get_tenant_config 统一注入 index_languages - config.yaml: 租户配置改用 index_languages,默认 [en,zh],保留 translate_to_* 兼容解析 - query/translator: translate_for_indexing 改为接收 index_languages,返回多语言 Dict - query/query_parser: 翻译目标从 index_languages 解析,need_wait_translation 按 index_langs 判断 - search/searcher: enable_translation 改为基于 index_languages 是否非空 - indexer: document_transformer 按 index_languages 填多语言字段;indexing_utils 仅多语言时初始化翻译器 - tests: 租户配置与索引测试改为断言 index_languages - README: 更新 TODO 说明已支持 index_languages
Showing
9 changed files
with
205 additions
and
164 deletions
Show diff stats
README.md
| 1 | - | |
| 2 | 1 | # TODO |
| 3 | 2 | |
| 3 | +**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。 | |
| 4 | + | |
| 4 | 5 | 前端: |
| 5 | 6 | 搜索模态框 |
| 6 | 7 | 点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。 | ... | ... |
config/config.yaml
| ... | ... | @@ -149,32 +149,25 @@ spu_config: |
| 149 | 149 | searchable_option_dimensions: ['option1', 'option2', 'option3'] |
| 150 | 150 | |
| 151 | 151 | # 租户配置(Tenant Configuration) |
| 152 | -# 每个租户可以配置主语言和翻译选项 | |
| 152 | +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) | |
| 153 | +# 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集 | |
| 153 | 154 | tenant_config: |
| 154 | - # 默认配置(未配置的租户使用此配置) | |
| 155 | 155 | default: |
| 156 | 156 | primary_language: "en" |
| 157 | - translate_to_en: true | |
| 158 | - translate_to_zh: false | |
| 159 | - # 租户特定配置 | |
| 157 | + index_languages: ["en", "zh"] | |
| 160 | 158 | tenants: |
| 161 | 159 | "1": |
| 162 | 160 | primary_language: "zh" |
| 163 | - translate_to_en: true | |
| 164 | - translate_to_zh: false | |
| 161 | + index_languages: ["zh", "en"] | |
| 165 | 162 | "2": |
| 166 | 163 | primary_language: "en" |
| 167 | - translate_to_en: false | |
| 168 | - translate_to_zh: true | |
| 164 | + index_languages: ["en", "zh"] | |
| 169 | 165 | "3": |
| 170 | 166 | primary_language: "zh" |
| 171 | - translate_to_en: true | |
| 172 | - translate_to_zh: false | |
| 167 | + index_languages: ["zh", "en"] | |
| 173 | 168 | "162": |
| 174 | 169 | primary_language: "zh" |
| 175 | - translate_to_en: true | |
| 176 | - translate_to_zh: true | |
| 170 | + index_languages: ["zh", "en"] | |
| 177 | 171 | "170": |
| 178 | 172 | primary_language: "en" |
| 179 | - translate_to_en: true | |
| 180 | - translate_to_zh: true | |
| 173 | + index_languages: ["en", "zh"] | ... | ... |
config/tenant_config_loader.py
| 1 | 1 | """ |
| 2 | 2 | 租户配置加载器。 |
| 3 | 3 | |
| 4 | -从统一配置文件(config.yaml)加载租户配置,包括主语言和翻译配置。 | |
| 4 | +从统一配置文件(config.yaml)加载租户配置,包括主语言和索引语言(index_languages)。 | |
| 5 | +支持旧配置 translate_to_en / translate_to_zh 的兼容解析。 | |
| 5 | 6 | """ |
| 6 | 7 | |
| 7 | 8 | import logging |
| 8 | -from typing import Dict, Any, Optional | |
| 9 | +from typing import Dict, Any, Optional, List | |
| 9 | 10 | |
| 10 | 11 | logger = logging.getLogger(__name__) |
| 11 | 12 | |
| 13 | +# 支持的索引语言:code -> display name(供商家勾选主市场语言等场景使用) | |
| 14 | +SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = { | |
| 15 | + "en": "English", | |
| 16 | + "zh": "Chinese", | |
| 17 | + "zh_tw": "Traditional Chinese", | |
| 18 | + "ru": "Russian", | |
| 19 | + "ja": "Japanese", | |
| 20 | + "ko": "Korean", | |
| 21 | + "es": "Spanish", | |
| 22 | + "fr": "French", | |
| 23 | + "pt": "Portuguese", | |
| 24 | + "de": "German", | |
| 25 | + "it": "Italian", | |
| 26 | + "th": "Thai", | |
| 27 | + "vi": "Vietnamese", | |
| 28 | + "id": "Indonesian", | |
| 29 | + "ms": "Malay", | |
| 30 | + "ar": "Arabic", | |
| 31 | + "hi": "Hindi", | |
| 32 | + "he": "Hebrew", | |
| 33 | + "my": "Burmese", | |
| 34 | + "ta": "Tamil", | |
| 35 | + "ur": "Urdu", | |
| 36 | + "bn": "Bengali", | |
| 37 | + "pl": "Polish", | |
| 38 | + "nl": "Dutch", | |
| 39 | + "ro": "Romanian", | |
| 40 | + "tr": "Turkish", | |
| 41 | + "km": "Khmer", | |
| 42 | + "lo": "Lao", | |
| 43 | + "yue": "Cantonese", | |
| 44 | + "cs": "Czech", | |
| 45 | + "el": "Greek", | |
| 46 | + "sv": "Swedish", | |
| 47 | + "hu": "Hungarian", | |
| 48 | + "da": "Danish", | |
| 49 | + "fi": "Finnish", | |
| 50 | + "uk": "Ukrainian", | |
| 51 | + "bg": "Bulgarian", | |
| 52 | +} | |
| 53 | + | |
| 54 | +DEFAULT_INDEX_LANGUAGES: List[str] = ["en", "zh"] | |
| 55 | + | |
| 56 | + | |
| 57 | +def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]: | |
| 58 | + """ | |
| 59 | + 将 index_languages 配置规范化为合法语言代码列表。 | |
| 60 | + None 或空时返回 DEFAULT_INDEX_LANGUAGES。 | |
| 61 | + """ | |
| 62 | + if value is None: | |
| 63 | + return list(DEFAULT_INDEX_LANGUAGES) | |
| 64 | + if not isinstance(value, (list, tuple)): | |
| 65 | + return list(DEFAULT_INDEX_LANGUAGES) | |
| 66 | + valid: List[str] = [] | |
| 67 | + seen: set = set() | |
| 68 | + for item in value: | |
| 69 | + code = (item or "").strip().lower() | |
| 70 | + if not code or code in seen: | |
| 71 | + continue | |
| 72 | + if code in SUPPORTED_INDEX_LANGUAGES: | |
| 73 | + valid.append(code) | |
| 74 | + seen.add(code) | |
| 75 | + return valid if valid else list(DEFAULT_INDEX_LANGUAGES) | |
| 76 | + | |
| 77 | + | |
| 78 | +def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]: | |
| 79 | + """ | |
| 80 | + 从租户配置解析 index_languages。 | |
| 81 | + 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。 | |
| 82 | + """ | |
| 83 | + if "index_languages" in tenant_config: | |
| 84 | + return normalize_index_languages( | |
| 85 | + tenant_config["index_languages"], | |
| 86 | + tenant_config.get("primary_language") or "en", | |
| 87 | + ) | |
| 88 | + primary = (tenant_config.get("primary_language") or "en").strip().lower() | |
| 89 | + to_en = bool(tenant_config.get("translate_to_en")) | |
| 90 | + to_zh = bool(tenant_config.get("translate_to_zh")) | |
| 91 | + langs: List[str] = [] | |
| 92 | + if primary and primary in SUPPORTED_INDEX_LANGUAGES: | |
| 93 | + langs.append(primary) | |
| 94 | + for code in ("en", "zh"): | |
| 95 | + if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)): | |
| 96 | + if code in SUPPORTED_INDEX_LANGUAGES: | |
| 97 | + langs.append(code) | |
| 98 | + return langs if langs else list(DEFAULT_INDEX_LANGUAGES) | |
| 99 | + | |
| 12 | 100 | |
| 13 | 101 | class TenantConfigLoader: |
| 14 | 102 | """租户配置加载器。""" |
| ... | ... | @@ -36,14 +124,9 @@ class TenantConfigLoader: |
| 36 | 124 | return self._config |
| 37 | 125 | except Exception as e: |
| 38 | 126 | logger.error(f"Failed to load tenant config: {e}", exc_info=True) |
| 39 | - # 返回默认配置 | |
| 40 | 127 | self._config = { |
| 41 | - "default": { | |
| 42 | - "primary_language": "en", | |
| 43 | - "translate_to_en": True, | |
| 44 | - "translate_to_zh": False | |
| 45 | - }, | |
| 46 | - "tenants": {} | |
| 128 | + "default": {"primary_language": "en", "index_languages": ["en", "zh"]}, | |
| 129 | + "tenants": {}, | |
| 47 | 130 | } |
| 48 | 131 | return self._config |
| 49 | 132 | |
| ... | ... | @@ -55,21 +138,18 @@ class TenantConfigLoader: |
| 55 | 138 | tenant_id: 租户ID |
| 56 | 139 | |
| 57 | 140 | Returns: |
| 58 | - 租户配置字典,如果租户不存在则返回默认配置 | |
| 141 | + 租户配置字典,若租户不存在则用默认配置。始终包含已解析的 index_languages。 | |
| 59 | 142 | """ |
| 60 | 143 | config = self.load_config() |
| 61 | 144 | tenant_id_str = str(tenant_id) |
| 62 | - | |
| 145 | + default = config.get("default", {"primary_language": "en", "index_languages": ["en", "zh"]}) | |
| 63 | 146 | tenants = config.get("tenants", {}) |
| 64 | - if tenant_id_str in tenants: | |
| 65 | - return tenants[tenant_id_str] | |
| 66 | - else: | |
| 147 | + raw = tenants[tenant_id_str] if tenant_id_str in tenants else default | |
| 148 | + if tenant_id_str not in tenants: | |
| 67 | 149 | logger.debug(f"Tenant {tenant_id} not found in config, using default") |
| 68 | - return config.get("default", { | |
| 69 | - "primary_language": "en", | |
| 70 | - "translate_to_en": True, | |
| 71 | - "translate_to_zh": False | |
| 72 | - }) | |
| 150 | + out = dict(raw) | |
| 151 | + out["index_languages"] = resolve_index_languages(raw) | |
| 152 | + return out | |
| 73 | 153 | |
| 74 | 154 | def reload(self): |
| 75 | 155 | """重新加载配置(用于配置更新)。""" | ... | ... |
indexer/document_transformer.py
| ... | ... | @@ -172,49 +172,32 @@ class SPUDocumentTransformer: |
| 172 | 172 | primary_lang: str |
| 173 | 173 | ): |
| 174 | 174 | """ |
| 175 | - 填充文本字段(根据租户配置处理多语言翻译)。 | |
| 176 | - | |
| 177 | - 翻译逻辑: | |
| 178 | - - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向 | |
| 179 | - - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文 | |
| 180 | - - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文 | |
| 181 | - - 如果两个都是false,则不进行翻译,只填充主语言字段 | |
| 175 | + 填充文本字段(根据租户 index_languages 处理多语言翻译)。 | |
| 176 | + 仅写入 primary_language 及 index_languages 中配置的语言。 | |
| 182 | 177 | """ |
| 183 | - # 从租户配置中读取翻译方向 | |
| 184 | - translate_to_en = bool(self.tenant_config.get('translate_to_en')) | |
| 185 | - translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) | |
| 186 | - | |
| 187 | - def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None): | |
| 188 | - """ | |
| 189 | - Write multilingual text field as an object, e.g.: | |
| 190 | - doc[field_name] = {"zh": "...", "en": "..."} | |
| 191 | - Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh. | |
| 192 | - """ | |
| 178 | + index_langs = self.tenant_config.get("index_languages") or ["en", "zh"] | |
| 179 | + | |
| 180 | + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, Optional[str]]] = None): | |
| 181 | + """写入多语言对象 doc[field_name] = {"zh": "...", "en": "...", ...},仅包含 index_languages。""" | |
| 193 | 182 | if not source_text or not str(source_text).strip(): |
| 194 | 183 | return |
| 195 | - | |
| 196 | 184 | obj: Dict[str, str] = {} |
| 197 | 185 | src = str(source_text) |
| 198 | 186 | obj[primary_lang] = src |
| 199 | - | |
| 200 | 187 | tr = translations or {} |
| 201 | - if translate_to_en and primary_lang != "en": | |
| 202 | - en_text = tr.get("en") | |
| 203 | - if en_text and str(en_text).strip(): | |
| 204 | - obj["en"] = str(en_text) | |
| 205 | - if translate_to_zh and primary_lang != "zh": | |
| 206 | - zh_text = tr.get("zh") | |
| 207 | - if zh_text and str(zh_text).strip(): | |
| 208 | - obj["zh"] = str(zh_text) | |
| 209 | - | |
| 188 | + for lang in index_langs: | |
| 189 | + if lang == primary_lang: | |
| 190 | + continue | |
| 191 | + val = tr.get(lang) | |
| 192 | + if val and str(val).strip(): | |
| 193 | + obj[lang] = str(val) | |
| 210 | 194 | if obj: |
| 211 | 195 | doc[field_name] = obj |
| 212 | 196 | |
| 213 | 197 | # Title |
| 214 | 198 | if pd.notna(spu_row.get('title')): |
| 215 | 199 | title_text = str(spu_row['title']) |
| 216 | - | |
| 217 | - translations: Dict[str, str] = {} | |
| 200 | + translations: Dict[str, Optional[str]] = {} | |
| 218 | 201 | if self.translator: |
| 219 | 202 | prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') |
| 220 | 203 | prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') |
| ... | ... | @@ -223,16 +206,14 @@ class SPUDocumentTransformer: |
| 223 | 206 | shop_language=primary_lang, |
| 224 | 207 | source_lang=primary_lang, |
| 225 | 208 | prompt=prompt_zh if primary_lang == 'zh' else prompt_en, |
| 226 | - translate_to_en=translate_to_en, | |
| 227 | - translate_to_zh=translate_to_zh, | |
| 209 | + index_languages=index_langs, | |
| 228 | 210 | ) or {} |
| 229 | - | |
| 230 | 211 | _set_lang_obj("title", title_text, translations) |
| 231 | 212 | |
| 232 | 213 | # Brief |
| 233 | 214 | if pd.notna(spu_row.get('brief')): |
| 234 | 215 | brief_text = str(spu_row['brief']) |
| 235 | - translations: Dict[str, str] = {} | |
| 216 | + translations = {} | |
| 236 | 217 | if self.translator: |
| 237 | 218 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 238 | 219 | translations = self.translator.translate_for_indexing( |
| ... | ... | @@ -240,15 +221,14 @@ class SPUDocumentTransformer: |
| 240 | 221 | shop_language=primary_lang, |
| 241 | 222 | source_lang=primary_lang, |
| 242 | 223 | prompt=prompt, |
| 243 | - translate_to_en=translate_to_en, | |
| 244 | - translate_to_zh=translate_to_zh, | |
| 224 | + index_languages=index_langs, | |
| 245 | 225 | ) or {} |
| 246 | 226 | _set_lang_obj("brief", brief_text, translations) |
| 247 | 227 | |
| 248 | 228 | # Description |
| 249 | 229 | if pd.notna(spu_row.get('description')): |
| 250 | 230 | desc_text = str(spu_row['description']) |
| 251 | - translations: Dict[str, str] = {} | |
| 231 | + translations = {} | |
| 252 | 232 | if self.translator: |
| 253 | 233 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 254 | 234 | translations = self.translator.translate_for_indexing( |
| ... | ... | @@ -256,15 +236,14 @@ class SPUDocumentTransformer: |
| 256 | 236 | shop_language=primary_lang, |
| 257 | 237 | source_lang=primary_lang, |
| 258 | 238 | prompt=prompt, |
| 259 | - translate_to_en=translate_to_en, | |
| 260 | - translate_to_zh=translate_to_zh, | |
| 239 | + index_languages=index_langs, | |
| 261 | 240 | ) or {} |
| 262 | 241 | _set_lang_obj("description", desc_text, translations) |
| 263 | 242 | |
| 264 | 243 | # Vendor |
| 265 | 244 | if pd.notna(spu_row.get('vendor')): |
| 266 | 245 | vendor_text = str(spu_row['vendor']) |
| 267 | - translations: Dict[str, str] = {} | |
| 246 | + translations = {} | |
| 268 | 247 | if self.translator: |
| 269 | 248 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 270 | 249 | translations = self.translator.translate_for_indexing( |
| ... | ... | @@ -272,8 +251,7 @@ class SPUDocumentTransformer: |
| 272 | 251 | shop_language=primary_lang, |
| 273 | 252 | source_lang=primary_lang, |
| 274 | 253 | prompt=prompt, |
| 275 | - translate_to_en=translate_to_en, | |
| 276 | - translate_to_zh=translate_to_zh, | |
| 254 | + index_languages=index_langs, | |
| 277 | 255 | ) or {} |
| 278 | 256 | _set_lang_obj("vendor", vendor_text, translations) |
| 279 | 257 | ... | ... |
indexer/indexing_utils.py
| ... | ... | @@ -96,11 +96,9 @@ def create_document_transformer( |
| 96 | 96 | if searchable_option_dimensions is None: |
| 97 | 97 | searchable_option_dimensions = config.spu_config.searchable_option_dimensions |
| 98 | 98 | |
| 99 | - # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器 | |
| 100 | - translate_to_en = bool(tenant_config.get("translate_to_en")) | |
| 101 | - translate_to_zh = bool(tenant_config.get("translate_to_zh")) | |
| 102 | - | |
| 103 | - if translator is None and (translate_to_en or translate_to_zh): | |
| 99 | + index_langs = tenant_config.get("index_languages") or [] | |
| 100 | + need_translator = len(index_langs) > 1 | |
| 101 | + if translator is None and need_translator: | |
| 104 | 102 | from query.translator import Translator |
| 105 | 103 | translator = Translator( |
| 106 | 104 | api_key=config.query_config.translation_api_key, | ... | ... |
indexer/test_indexing.py
| ... | ... | @@ -44,19 +44,25 @@ def test_tenant_config(): |
| 44 | 44 | # 测试默认配置 |
| 45 | 45 | default_config = tenant_config_loader.get_tenant_config("999") |
| 46 | 46 | print(f"默认配置: {default_config}") |
| 47 | + assert "index_languages" in default_config, "默认配置应包含 index_languages" | |
| 48 | + assert "en" in default_config["index_languages"] and "zh" in default_config["index_languages"], \ | |
| 49 | + "默认 index_languages 应包含 en, zh" | |
| 50 | + print("✓ 默认配置正确(index_languages 含 en, zh)") | |
| 47 | 51 | |
| 48 | - # 测试租户162(翻译关闭) | |
| 52 | + # 测试租户162(index_languages: zh, en) | |
| 49 | 53 | tenant_162_config = tenant_config_loader.get_tenant_config("162") |
| 50 | 54 | print(f"租户162配置: {tenant_162_config}") |
| 51 | - assert tenant_162_config['translate_to_en'] == False, "租户162翻译应该关闭" | |
| 52 | - assert tenant_162_config['translate_to_zh'] == False, "租户162翻译应该关闭" | |
| 53 | - print("✓ 租户162配置正确(翻译关闭)") | |
| 55 | + idx = tenant_162_config.get("index_languages") or [] | |
| 56 | + assert "zh" in idx and "en" in idx, "租户162 index_languages 应包含 zh, en" | |
| 57 | + print("✓ 租户162配置正确(index_languages 含 zh, en)") | |
| 54 | 58 | |
| 55 | - # 测试其他租户 | |
| 59 | + # 测试租户1 | |
| 56 | 60 | tenant_1_config = tenant_config_loader.get_tenant_config("1") |
| 57 | 61 | print(f"租户1配置: {tenant_1_config}") |
| 58 | - assert tenant_1_config['translate_to_en'] == True, "租户1应该启用英文翻译" | |
| 59 | - print("✓ 租户1配置正确(翻译开启)") | |
| 62 | + idx1 = tenant_1_config.get("index_languages") or [] | |
| 63 | + assert "zh" in idx1 and "en" in idx1, "租户1 index_languages 应包含 zh, en" | |
| 64 | + assert tenant_1_config.get("primary_language") == "zh", "租户1 主语言为 zh" | |
| 65 | + print("✓ 租户1配置正确(index_languages 含 zh, en,主语言 zh)") | |
| 60 | 66 | |
| 61 | 67 | return True |
| 62 | 68 | except Exception as e: |
| ... | ... | @@ -118,13 +124,12 @@ def test_full_indexing(tenant_id: str = "162"): |
| 118 | 124 | print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") |
| 119 | 125 | print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") |
| 120 | 126 | |
| 121 | - # 检查租户162的翻译状态 | |
| 127 | + # 租户162 index_languages [zh, en],应有 title.en | |
| 122 | 128 | if tenant_id == "162": |
| 123 | - # 租户162翻译应该关闭:只写入主语言,不应出现 title.en | |
| 124 | - if isinstance(title_obj, dict) and title_obj.get("en") is None: | |
| 125 | - print(f" ✓ 翻译已关闭(title.en为空)") | |
| 129 | + if isinstance(title_obj, dict) and title_obj.get("en"): | |
| 130 | + print(f" ✓ 多语言索引正常(title.en 已填充)") | |
| 126 | 131 | else: |
| 127 | - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") | |
| 132 | + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空") | |
| 128 | 133 | |
| 129 | 134 | return True |
| 130 | 135 | |
| ... | ... | @@ -199,12 +204,12 @@ def test_incremental_indexing(tenant_id: str = "162"): |
| 199 | 204 | print(f" SKU数量: {len(doc.get('skus', []))}") |
| 200 | 205 | print(f" 规格数量: {len(doc.get('specifications', []))}") |
| 201 | 206 | |
| 202 | - # 检查租户162的翻译状态 | |
| 207 | + # 租户162 配置了 index_languages [zh, en],应有 title.en | |
| 203 | 208 | if tenant_id == "162": |
| 204 | - if isinstance(title_obj, dict) and title_obj.get("en") is None: | |
| 205 | - print(f" ✓ 翻译已关闭(title.en为空)") | |
| 209 | + if isinstance(title_obj, dict) and title_obj.get("en"): | |
| 210 | + print(f" ✓ 多语言索引正常(title.en 已填充)") | |
| 206 | 211 | else: |
| 207 | - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") | |
| 212 | + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空") | |
| 208 | 213 | |
| 209 | 214 | return True |
| 210 | 215 | |
| ... | ... | @@ -298,11 +303,13 @@ def test_document_transformer(): |
| 298 | 303 | print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}") |
| 299 | 304 | print(f" SKU数量: {len(doc.get('skus', []))}") |
| 300 | 305 | |
| 301 | - # 验证租户162翻译关闭 | |
| 302 | - if isinstance(title_obj, dict) and title_obj.get("en") is None: | |
| 303 | - print(f" ✓ 翻译已关闭(符合租户162配置)") | |
| 306 | + # 租户162 index_languages [zh, en],主语言 zh,应有 zh(原文)与 en(翻译) | |
| 307 | + if isinstance(title_obj, dict) and title_obj.get("zh") and title_obj.get("en"): | |
| 308 | + print(f" ✓ 多语言字段正确(zh + en)") | |
| 309 | + elif isinstance(title_obj, dict) and title_obj.get("zh"): | |
| 310 | + print(f" ⚠ 仅有 zh(若未配置翻译或翻译未调用可接受)") | |
| 304 | 311 | else: |
| 305 | - print(f" ⚠ 警告:翻译应该关闭") | |
| 312 | + print(f" ⚠ 未发现预期多语言字段") | |
| 306 | 313 | |
| 307 | 314 | return True |
| 308 | 315 | else: | ... | ... |
query/query_parser.py
| ... | ... | @@ -58,8 +58,7 @@ class ParsedQuery: |
| 58 | 58 | "rewritten_query": self.rewritten_query, |
| 59 | 59 | "detected_language": self.detected_language, |
| 60 | 60 | "translations": self.translations, |
| 61 | - "domain": self.domain, | |
| 62 | - "has_vector": self.query_vector is not None | |
| 61 | + "domain": self.domain | |
| 63 | 62 | } |
| 64 | 63 | return result |
| 65 | 64 | |
| ... | ... | @@ -228,23 +227,16 @@ class QueryParser: |
| 228 | 227 | translations = {} |
| 229 | 228 | translation_futures = {} |
| 230 | 229 | try: |
| 231 | - # 根据租户配置决定翻译目标语言 | |
| 230 | + # 根据租户配置的 index_languages 决定翻译目标语言 | |
| 232 | 231 | from config.tenant_config_loader import get_tenant_config_loader |
| 233 | 232 | tenant_loader = get_tenant_config_loader() |
| 234 | 233 | tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default") |
| 234 | + index_langs = tenant_cfg.get("index_languages") or ["en", "zh"] | |
| 235 | 235 | |
| 236 | - translate_to_zh = bool(tenant_cfg.get("translate_to_zh")) | |
| 237 | - translate_to_en = bool(tenant_cfg.get("translate_to_en")) | |
| 236 | + target_langs_for_translation = [lang for lang in index_langs if lang != detected_lang] | |
| 238 | 237 | |
| 239 | - target_langs_for_translation = [] | |
| 240 | - if translate_to_zh: | |
| 241 | - target_langs_for_translation.append('zh') | |
| 242 | - if translate_to_en: | |
| 243 | - target_langs_for_translation.append('en') | |
| 244 | - | |
| 245 | - # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段 | |
| 246 | 238 | if target_langs_for_translation: |
| 247 | - target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang] | |
| 239 | + target_langs = target_langs_for_translation | |
| 248 | 240 | |
| 249 | 241 | if target_langs: |
| 250 | 242 | # Use e-commerce context for better disambiguation |
| ... | ... | @@ -254,8 +246,8 @@ class QueryParser: |
| 254 | 246 | self.config.query_config.translation_prompts.get('default_zh') |
| 255 | 247 | |
| 256 | 248 | # Determine if we need to wait for translation results |
| 257 | - # If detected_lang is neither 'en' nor 'zh', we must wait for translation | |
| 258 | - need_wait_translation = detected_lang not in ['en', 'zh'] | |
| 249 | + # If detected_lang is not in index_languages, we must wait for translation | |
| 250 | + need_wait_translation = detected_lang not in index_langs | |
| 259 | 251 | |
| 260 | 252 | if need_wait_translation: |
| 261 | 253 | # Use async method that returns Futures, so we can wait for results | ... | ... |
query/translator.py
| ... | ... | @@ -792,6 +792,18 @@ class Translator: |
| 792 | 792 | # The user can configure a glossary for better results |
| 793 | 793 | return translated_text |
| 794 | 794 | |
| 795 | + def _shop_lang_matches(self, shop_lang_lower: str, lang_code: str) -> bool: | |
| 796 | + """True if shop language matches index language (use source, no translate).""" | |
| 797 | + if not shop_lang_lower or not lang_code: | |
| 798 | + return False | |
| 799 | + if shop_lang_lower == lang_code: | |
| 800 | + return True | |
| 801 | + if lang_code == "zh" and "zh" in shop_lang_lower: | |
| 802 | + return True | |
| 803 | + if lang_code == "en" and "en" in shop_lang_lower: | |
| 804 | + return True | |
| 805 | + return False | |
| 806 | + | |
| 795 | 807 | def translate_for_indexing( |
| 796 | 808 | self, |
| 797 | 809 | text: str, |
| ... | ... | @@ -799,76 +811,55 @@ class Translator: |
| 799 | 811 | source_lang: Optional[str] = None, |
| 800 | 812 | context: Optional[str] = None, |
| 801 | 813 | prompt: Optional[str] = None, |
| 802 | - translate_to_en: bool = True, | |
| 803 | - translate_to_zh: bool = True, | |
| 814 | + index_languages: Optional[List[str]] = None, | |
| 804 | 815 | ) -> Dict[str, Optional[str]]: |
| 805 | 816 | """ |
| 806 | - Translate text for indexing based on shop language and tenant configuration. | |
| 807 | - | |
| 808 | - Translation behavior: | |
| 809 | - - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh) | |
| 810 | - - If translate_to_en=True and shop language is not 'en', translate to English (en) | |
| 811 | - - If both flags are False, no translation is performed (returns None for both) | |
| 812 | - | |
| 817 | + Translate text for indexing based on shop language and tenant index_languages. | |
| 818 | + | |
| 819 | + For each language in index_languages: use source text if shop language matches, | |
| 820 | + otherwise translate to that language. | |
| 821 | + | |
| 813 | 822 | Args: |
| 814 | 823 | text: Text to translate |
| 815 | - shop_language: Shop's configured language (e.g., 'zh', 'en', 'ru') | |
| 816 | - source_lang: Source language code (optional, auto-detect if None) | |
| 824 | + shop_language: Shop primary language (e.g. 'zh', 'en', 'ru') | |
| 825 | + source_lang: Source language code (optional) | |
| 817 | 826 | context: Additional context for translation (optional) |
| 818 | - prompt: Translation prompt/instruction (optional) | |
| 819 | - translate_to_en: Whether to translate to English (from tenant_config) | |
| 820 | - translate_to_zh: Whether to translate to Chinese (from tenant_config) | |
| 821 | - | |
| 827 | + prompt: Translation prompt (optional) | |
| 828 | + index_languages: Languages to index (from tenant_config). Default ["en", "zh"]. | |
| 829 | + | |
| 822 | 830 | Returns: |
| 823 | - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled) | |
| 824 | - Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None} | |
| 831 | + Dict keyed by each index_language with translated or source text (or None). | |
| 825 | 832 | """ |
| 833 | + langs = index_languages if index_languages else ["en", "zh"] | |
| 834 | + results = {lang: None for lang in langs} | |
| 826 | 835 | if not text or not text.strip(): |
| 827 | - return {'zh': None, 'en': None} | |
| 828 | - | |
| 829 | - # Skip translation for symbol-only queries | |
| 836 | + return results | |
| 830 | 837 | if re.match(r'^[\d\s_-]+$', text): |
| 831 | 838 | logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'") |
| 832 | - return {'zh': None, 'en': None} | |
| 833 | - | |
| 834 | - results = {'zh': None, 'en': None} | |
| 835 | - shop_lang_lower = shop_language.lower() if shop_language else "" | |
| 836 | - | |
| 837 | - # Determine which languages need translation based on tenant configuration | |
| 838 | - targets = [] | |
| 839 | - if translate_to_zh and "zh" not in shop_lang_lower: | |
| 840 | - targets.append("zh") | |
| 841 | - if translate_to_en and "en" not in shop_lang_lower: | |
| 842 | - targets.append("en") | |
| 843 | - | |
| 844 | - # If shop language is already zh and en, no translation needed | |
| 845 | - if not targets: | |
| 846 | - # Use original text for both languages | |
| 847 | - if "zh" in shop_lang_lower: | |
| 848 | - results['zh'] = text | |
| 849 | - if "en" in shop_lang_lower: | |
| 850 | - results['en'] = text | |
| 851 | 839 | return results |
| 852 | - | |
| 853 | - # Translate to each target language | |
| 840 | + | |
| 841 | + shop_lang_lower = (shop_language or "").strip().lower() | |
| 842 | + targets = [] | |
| 843 | + for lang in langs: | |
| 844 | + if self._shop_lang_matches(shop_lang_lower, lang): | |
| 845 | + results[lang] = text | |
| 846 | + else: | |
| 847 | + targets.append(lang) | |
| 848 | + | |
| 854 | 849 | for target_lang in targets: |
| 855 | - # Check cache first | |
| 856 | 850 | cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt) |
| 857 | 851 | if cached: |
| 858 | 852 | results[target_lang] = cached |
| 859 | 853 | logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}") |
| 860 | 854 | continue |
| 861 | - | |
| 862 | - # Translate synchronously for indexing (we need the result immediately) | |
| 863 | 855 | translated = self.translate( |
| 864 | 856 | text, |
| 865 | 857 | target_lang=target_lang, |
| 866 | 858 | source_lang=source_lang or shop_language, |
| 867 | 859 | context=context, |
| 868 | - prompt=prompt | |
| 860 | + prompt=prompt, | |
| 869 | 861 | ) |
| 870 | 862 | results[target_lang] = translated |
| 871 | - | |
| 872 | 863 | return results |
| 873 | 864 | |
| 874 | 865 | def get_translation_needs( | ... | ... |
search/searcher.py
| ... | ... | @@ -165,7 +165,8 @@ class Searcher: |
| 165 | 165 | # 根据租户配置决定翻译开关(离线/在线统一) |
| 166 | 166 | tenant_loader = get_tenant_config_loader() |
| 167 | 167 | tenant_cfg = tenant_loader.get_tenant_config(tenant_id) |
| 168 | - enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh")) | |
| 168 | + index_langs = tenant_cfg.get("index_languages") or [] | |
| 169 | + enable_translation = len(index_langs) > 0 | |
| 169 | 170 | enable_embedding = self.config.query_config.enable_text_embedding |
| 170 | 171 | enable_rerank = False # Temporarily disabled |
| 171 | 172 | ... | ... |