Commit 038e4e2facaffdb4f3d6e824379232f801bf250d

Authored by tangwang
1 parent 3a5fda00

refactor(i18n): translate_to_en/zh 改为可配置 index_languages,默认 [en,zh]

- config: 新增 SUPPORTED_INDEX_LANGUAGES(38 种语言)、DEFAULT_INDEX_LANGUAGES、
  normalize_index_languages、resolve_index_languages;get_tenant_config 统一注入 index_languages
- config.yaml: 租户配置改用 index_languages,默认 [en,zh],保留 translate_to_* 兼容解析
- query/translator: translate_for_indexing 改为接收 index_languages,返回多语言 Dict
- query/query_parser: 翻译目标从 index_languages 解析,need_wait_translation 按 index_langs 判断
- search/searcher: enable_translation 改为基于 index_languages 是否非空
- indexer: document_transformer 按 index_languages 填多语言字段;indexing_utils 仅多语言时初始化翻译器
- tests: 租户配置与索引测试改为断言 index_languages
- README: 更新 TODO 说明已支持 index_languages
1 -  
2 # TODO 1 # TODO
3 2
  3 +**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。
  4 +
4 前端: 5 前端:
5 搜索模态框 6 搜索模态框
6 点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。 7 点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。
config/config.yaml
@@ -149,32 +149,25 @@ spu_config: @@ -149,32 +149,25 @@ spu_config:
149 searchable_option_dimensions: ['option1', 'option2', 'option3'] 149 searchable_option_dimensions: ['option1', 'option2', 'option3']
150 150
151 # 租户配置(Tenant Configuration) 151 # 租户配置(Tenant Configuration)
152 -# 每个租户可以配置主语言和翻译选项 152 +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
  153 +# 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集
153 tenant_config: 154 tenant_config:
154 - # 默认配置(未配置的租户使用此配置)  
155 default: 155 default:
156 primary_language: "en" 156 primary_language: "en"
157 - translate_to_en: true  
158 - translate_to_zh: false  
159 - # 租户特定配置 157 + index_languages: ["en", "zh"]
160 tenants: 158 tenants:
161 "1": 159 "1":
162 primary_language: "zh" 160 primary_language: "zh"
163 - translate_to_en: true  
164 - translate_to_zh: false 161 + index_languages: ["zh", "en"]
165 "2": 162 "2":
166 primary_language: "en" 163 primary_language: "en"
167 - translate_to_en: false  
168 - translate_to_zh: true 164 + index_languages: ["en", "zh"]
169 "3": 165 "3":
170 primary_language: "zh" 166 primary_language: "zh"
171 - translate_to_en: true  
172 - translate_to_zh: false 167 + index_languages: ["zh", "en"]
173 "162": 168 "162":
174 primary_language: "zh" 169 primary_language: "zh"
175 - translate_to_en: true  
176 - translate_to_zh: true 170 + index_languages: ["zh", "en"]
177 "170": 171 "170":
178 primary_language: "en" 172 primary_language: "en"
179 - translate_to_en: true  
180 - translate_to_zh: true 173 + index_languages: ["en", "zh"]
config/tenant_config_loader.py
1 """ 1 """
2 租户配置加载器。 2 租户配置加载器。
3 3
4 -从统一配置文件(config.yaml)加载租户配置,包括主语言和翻译配置。 4 +从统一配置文件(config.yaml)加载租户配置,包括主语言和索引语言(index_languages)。
  5 +支持旧配置 translate_to_en / translate_to_zh 的兼容解析。
5 """ 6 """
6 7
7 import logging 8 import logging
8 -from typing import Dict, Any, Optional 9 +from typing import Dict, Any, Optional, List
9 10
10 logger = logging.getLogger(__name__) 11 logger = logging.getLogger(__name__)
11 12
  13 +# 支持的索引语言:code -> display name(供商家勾选主市场语言等场景使用)
  14 +SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = {
  15 + "en": "English",
  16 + "zh": "Chinese",
  17 + "zh_tw": "Traditional Chinese",
  18 + "ru": "Russian",
  19 + "ja": "Japanese",
  20 + "ko": "Korean",
  21 + "es": "Spanish",
  22 + "fr": "French",
  23 + "pt": "Portuguese",
  24 + "de": "German",
  25 + "it": "Italian",
  26 + "th": "Thai",
  27 + "vi": "Vietnamese",
  28 + "id": "Indonesian",
  29 + "ms": "Malay",
  30 + "ar": "Arabic",
  31 + "hi": "Hindi",
  32 + "he": "Hebrew",
  33 + "my": "Burmese",
  34 + "ta": "Tamil",
  35 + "ur": "Urdu",
  36 + "bn": "Bengali",
  37 + "pl": "Polish",
  38 + "nl": "Dutch",
  39 + "ro": "Romanian",
  40 + "tr": "Turkish",
  41 + "km": "Khmer",
  42 + "lo": "Lao",
  43 + "yue": "Cantonese",
  44 + "cs": "Czech",
  45 + "el": "Greek",
  46 + "sv": "Swedish",
  47 + "hu": "Hungarian",
  48 + "da": "Danish",
  49 + "fi": "Finnish",
  50 + "uk": "Ukrainian",
  51 + "bg": "Bulgarian",
  52 +}
  53 +
  54 +DEFAULT_INDEX_LANGUAGES: List[str] = ["en", "zh"]
  55 +
  56 +
  57 +def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]:
  58 + """
  59 + 将 index_languages 配置规范化为合法语言代码列表。
  60 + None 或空时返回 DEFAULT_INDEX_LANGUAGES。
  61 + """
  62 + if value is None:
  63 + return list(DEFAULT_INDEX_LANGUAGES)
  64 + if not isinstance(value, (list, tuple)):
  65 + return list(DEFAULT_INDEX_LANGUAGES)
  66 + valid: List[str] = []
  67 + seen: set = set()
  68 + for item in value:
  69 + code = (item or "").strip().lower()
  70 + if not code or code in seen:
  71 + continue
  72 + if code in SUPPORTED_INDEX_LANGUAGES:
  73 + valid.append(code)
  74 + seen.add(code)
  75 + return valid if valid else list(DEFAULT_INDEX_LANGUAGES)
  76 +
  77 +
  78 +def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]:
  79 + """
  80 + 从租户配置解析 index_languages。
  81 + 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。
  82 + """
  83 + if "index_languages" in tenant_config:
  84 + return normalize_index_languages(
  85 + tenant_config["index_languages"],
  86 + tenant_config.get("primary_language") or "en",
  87 + )
  88 + primary = (tenant_config.get("primary_language") or "en").strip().lower()
  89 + to_en = bool(tenant_config.get("translate_to_en"))
  90 + to_zh = bool(tenant_config.get("translate_to_zh"))
  91 + langs: List[str] = []
  92 + if primary and primary in SUPPORTED_INDEX_LANGUAGES:
  93 + langs.append(primary)
  94 + for code in ("en", "zh"):
  95 + if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)):
  96 + if code in SUPPORTED_INDEX_LANGUAGES:
  97 + langs.append(code)
  98 + return langs if langs else list(DEFAULT_INDEX_LANGUAGES)
  99 +
12 100
13 class TenantConfigLoader: 101 class TenantConfigLoader:
14 """租户配置加载器。""" 102 """租户配置加载器。"""
@@ -36,14 +124,9 @@ class TenantConfigLoader: @@ -36,14 +124,9 @@ class TenantConfigLoader:
36 return self._config 124 return self._config
37 except Exception as e: 125 except Exception as e:
38 logger.error(f"Failed to load tenant config: {e}", exc_info=True) 126 logger.error(f"Failed to load tenant config: {e}", exc_info=True)
39 - # 返回默认配置  
40 self._config = { 127 self._config = {
41 - "default": {  
42 - "primary_language": "en",  
43 - "translate_to_en": True,  
44 - "translate_to_zh": False  
45 - },  
46 - "tenants": {} 128 + "default": {"primary_language": "en", "index_languages": ["en", "zh"]},
  129 + "tenants": {},
47 } 130 }
48 return self._config 131 return self._config
49 132
@@ -55,21 +138,18 @@ class TenantConfigLoader: @@ -55,21 +138,18 @@ class TenantConfigLoader:
55 tenant_id: 租户ID 138 tenant_id: 租户ID
56 139
57 Returns: 140 Returns:
58 - 租户配置字典,如果租户不存在则返回默认配置 141 + 租户配置字典,若租户不存在则用默认配置。始终包含已解析的 index_languages。
59 """ 142 """
60 config = self.load_config() 143 config = self.load_config()
61 tenant_id_str = str(tenant_id) 144 tenant_id_str = str(tenant_id)
62 - 145 + default = config.get("default", {"primary_language": "en", "index_languages": ["en", "zh"]})
63 tenants = config.get("tenants", {}) 146 tenants = config.get("tenants", {})
64 - if tenant_id_str in tenants:  
65 - return tenants[tenant_id_str]  
66 - else: 147 + raw = tenants[tenant_id_str] if tenant_id_str in tenants else default
  148 + if tenant_id_str not in tenants:
67 logger.debug(f"Tenant {tenant_id} not found in config, using default") 149 logger.debug(f"Tenant {tenant_id} not found in config, using default")
68 - return config.get("default", {  
69 - "primary_language": "en",  
70 - "translate_to_en": True,  
71 - "translate_to_zh": False  
72 - }) 150 + out = dict(raw)
  151 + out["index_languages"] = resolve_index_languages(raw)
  152 + return out
73 153
74 def reload(self): 154 def reload(self):
75 """重新加载配置(用于配置更新)。""" 155 """重新加载配置(用于配置更新)。"""
indexer/document_transformer.py
@@ -172,49 +172,32 @@ class SPUDocumentTransformer: @@ -172,49 +172,32 @@ class SPUDocumentTransformer:
172 primary_lang: str 172 primary_lang: str
173 ): 173 ):
174 """ 174 """
175 - 填充文本字段(根据租户配置处理多语言翻译)。  
176 -  
177 - 翻译逻辑:  
178 - - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向  
179 - - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文  
180 - - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文  
181 - - 如果两个都是false,则不进行翻译,只填充主语言字段 175 + 填充文本字段(根据租户 index_languages 处理多语言翻译)。
  176 + 仅写入 primary_language 及 index_languages 中配置的语言。
182 """ 177 """
183 - # 从租户配置中读取翻译方向  
184 - translate_to_en = bool(self.tenant_config.get('translate_to_en'))  
185 - translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))  
186 -  
187 - def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None):  
188 - """  
189 - Write multilingual text field as an object, e.g.:  
190 - doc[field_name] = {"zh": "...", "en": "..."}  
191 - Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh.  
192 - """ 178 + index_langs = self.tenant_config.get("index_languages") or ["en", "zh"]
  179 +
  180 + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, Optional[str]]] = None):
  181 + """写入多语言对象 doc[field_name] = {"zh": "...", "en": "...", ...},仅包含 index_languages。"""
193 if not source_text or not str(source_text).strip(): 182 if not source_text or not str(source_text).strip():
194 return 183 return
195 -  
196 obj: Dict[str, str] = {} 184 obj: Dict[str, str] = {}
197 src = str(source_text) 185 src = str(source_text)
198 obj[primary_lang] = src 186 obj[primary_lang] = src
199 -  
200 tr = translations or {} 187 tr = translations or {}
201 - if translate_to_en and primary_lang != "en":  
202 - en_text = tr.get("en")  
203 - if en_text and str(en_text).strip():  
204 - obj["en"] = str(en_text)  
205 - if translate_to_zh and primary_lang != "zh":  
206 - zh_text = tr.get("zh")  
207 - if zh_text and str(zh_text).strip():  
208 - obj["zh"] = str(zh_text)  
209 - 188 + for lang in index_langs:
  189 + if lang == primary_lang:
  190 + continue
  191 + val = tr.get(lang)
  192 + if val and str(val).strip():
  193 + obj[lang] = str(val)
210 if obj: 194 if obj:
211 doc[field_name] = obj 195 doc[field_name] = obj
212 196
213 # Title 197 # Title
214 if pd.notna(spu_row.get('title')): 198 if pd.notna(spu_row.get('title')):
215 title_text = str(spu_row['title']) 199 title_text = str(spu_row['title'])
216 -  
217 - translations: Dict[str, str] = {} 200 + translations: Dict[str, Optional[str]] = {}
218 if self.translator: 201 if self.translator:
219 prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') 202 prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh')
220 prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') 203 prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en')
@@ -223,16 +206,14 @@ class SPUDocumentTransformer: @@ -223,16 +206,14 @@ class SPUDocumentTransformer:
223 shop_language=primary_lang, 206 shop_language=primary_lang,
224 source_lang=primary_lang, 207 source_lang=primary_lang,
225 prompt=prompt_zh if primary_lang == 'zh' else prompt_en, 208 prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
226 - translate_to_en=translate_to_en,  
227 - translate_to_zh=translate_to_zh, 209 + index_languages=index_langs,
228 ) or {} 210 ) or {}
229 -  
230 _set_lang_obj("title", title_text, translations) 211 _set_lang_obj("title", title_text, translations)
231 212
232 # Brief 213 # Brief
233 if pd.notna(spu_row.get('brief')): 214 if pd.notna(spu_row.get('brief')):
234 brief_text = str(spu_row['brief']) 215 brief_text = str(spu_row['brief'])
235 - translations: Dict[str, str] = {} 216 + translations = {}
236 if self.translator: 217 if self.translator:
237 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') 218 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
238 translations = self.translator.translate_for_indexing( 219 translations = self.translator.translate_for_indexing(
@@ -240,15 +221,14 @@ class SPUDocumentTransformer: @@ -240,15 +221,14 @@ class SPUDocumentTransformer:
240 shop_language=primary_lang, 221 shop_language=primary_lang,
241 source_lang=primary_lang, 222 source_lang=primary_lang,
242 prompt=prompt, 223 prompt=prompt,
243 - translate_to_en=translate_to_en,  
244 - translate_to_zh=translate_to_zh, 224 + index_languages=index_langs,
245 ) or {} 225 ) or {}
246 _set_lang_obj("brief", brief_text, translations) 226 _set_lang_obj("brief", brief_text, translations)
247 227
248 # Description 228 # Description
249 if pd.notna(spu_row.get('description')): 229 if pd.notna(spu_row.get('description')):
250 desc_text = str(spu_row['description']) 230 desc_text = str(spu_row['description'])
251 - translations: Dict[str, str] = {} 231 + translations = {}
252 if self.translator: 232 if self.translator:
253 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') 233 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
254 translations = self.translator.translate_for_indexing( 234 translations = self.translator.translate_for_indexing(
@@ -256,15 +236,14 @@ class SPUDocumentTransformer: @@ -256,15 +236,14 @@ class SPUDocumentTransformer:
256 shop_language=primary_lang, 236 shop_language=primary_lang,
257 source_lang=primary_lang, 237 source_lang=primary_lang,
258 prompt=prompt, 238 prompt=prompt,
259 - translate_to_en=translate_to_en,  
260 - translate_to_zh=translate_to_zh, 239 + index_languages=index_langs,
261 ) or {} 240 ) or {}
262 _set_lang_obj("description", desc_text, translations) 241 _set_lang_obj("description", desc_text, translations)
263 242
264 # Vendor 243 # Vendor
265 if pd.notna(spu_row.get('vendor')): 244 if pd.notna(spu_row.get('vendor')):
266 vendor_text = str(spu_row['vendor']) 245 vendor_text = str(spu_row['vendor'])
267 - translations: Dict[str, str] = {} 246 + translations = {}
268 if self.translator: 247 if self.translator:
269 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') 248 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
270 translations = self.translator.translate_for_indexing( 249 translations = self.translator.translate_for_indexing(
@@ -272,8 +251,7 @@ class SPUDocumentTransformer: @@ -272,8 +251,7 @@ class SPUDocumentTransformer:
272 shop_language=primary_lang, 251 shop_language=primary_lang,
273 source_lang=primary_lang, 252 source_lang=primary_lang,
274 prompt=prompt, 253 prompt=prompt,
275 - translate_to_en=translate_to_en,  
276 - translate_to_zh=translate_to_zh, 254 + index_languages=index_langs,
277 ) or {} 255 ) or {}
278 _set_lang_obj("vendor", vendor_text, translations) 256 _set_lang_obj("vendor", vendor_text, translations)
279 257
indexer/indexing_utils.py
@@ -96,11 +96,9 @@ def create_document_transformer( @@ -96,11 +96,9 @@ def create_document_transformer(
96 if searchable_option_dimensions is None: 96 if searchable_option_dimensions is None:
97 searchable_option_dimensions = config.spu_config.searchable_option_dimensions 97 searchable_option_dimensions = config.spu_config.searchable_option_dimensions
98 98
99 - # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器  
100 - translate_to_en = bool(tenant_config.get("translate_to_en"))  
101 - translate_to_zh = bool(tenant_config.get("translate_to_zh"))  
102 -  
103 - if translator is None and (translate_to_en or translate_to_zh): 99 + index_langs = tenant_config.get("index_languages") or []
  100 + need_translator = len(index_langs) > 1
  101 + if translator is None and need_translator:
104 from query.translator import Translator 102 from query.translator import Translator
105 translator = Translator( 103 translator = Translator(
106 api_key=config.query_config.translation_api_key, 104 api_key=config.query_config.translation_api_key,
indexer/test_indexing.py
@@ -44,19 +44,25 @@ def test_tenant_config(): @@ -44,19 +44,25 @@ def test_tenant_config():
44 # 测试默认配置 44 # 测试默认配置
45 default_config = tenant_config_loader.get_tenant_config("999") 45 default_config = tenant_config_loader.get_tenant_config("999")
46 print(f"默认配置: {default_config}") 46 print(f"默认配置: {default_config}")
  47 + assert "index_languages" in default_config, "默认配置应包含 index_languages"
  48 + assert "en" in default_config["index_languages"] and "zh" in default_config["index_languages"], \
  49 + "默认 index_languages 应包含 en, zh"
  50 + print("✓ 默认配置正确(index_languages 含 en, zh)")
47 51
48 - # 测试租户162(翻译关闭 52 + # 测试租户162(index_languages: zh, en
49 tenant_162_config = tenant_config_loader.get_tenant_config("162") 53 tenant_162_config = tenant_config_loader.get_tenant_config("162")
50 print(f"租户162配置: {tenant_162_config}") 54 print(f"租户162配置: {tenant_162_config}")
51 - assert tenant_162_config['translate_to_en'] == False, "租户162翻译应该关闭"  
52 - assert tenant_162_config['translate_to_zh'] == False, "租户162翻译应该关闭"  
53 - print("✓ 租户162配置正确(翻译关闭)") 55 + idx = tenant_162_config.get("index_languages") or []
  56 + assert "zh" in idx and "en" in idx, "租户162 index_languages 应包含 zh, en"
  57 + print("✓ 租户162配置正确(index_languages 含 zh, en)")
54 58
55 - # 测试其他租户 59 + # 测试租户1
56 tenant_1_config = tenant_config_loader.get_tenant_config("1") 60 tenant_1_config = tenant_config_loader.get_tenant_config("1")
57 print(f"租户1配置: {tenant_1_config}") 61 print(f"租户1配置: {tenant_1_config}")
58 - assert tenant_1_config['translate_to_en'] == True, "租户1应该启用英文翻译"  
59 - print("✓ 租户1配置正确(翻译开启)") 62 + idx1 = tenant_1_config.get("index_languages") or []
  63 + assert "zh" in idx1 and "en" in idx1, "租户1 index_languages 应包含 zh, en"
  64 + assert tenant_1_config.get("primary_language") == "zh", "租户1 主语言为 zh"
  65 + print("✓ 租户1配置正确(index_languages 含 zh, en,主语言 zh)")
60 66
61 return True 67 return True
62 except Exception as e: 68 except Exception as e:
@@ -118,13 +124,12 @@ def test_full_indexing(tenant_id: str = "162"): @@ -118,13 +124,12 @@ def test_full_indexing(tenant_id: str = "162"):
118 print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") 124 print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
119 print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") 125 print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
120 126
121 - # 检查租户162的翻译状态 127 + # 租户162 index_languages [zh, en],应有 title.en
122 if tenant_id == "162": 128 if tenant_id == "162":
123 - # 租户162翻译应该关闭:只写入主语言,不应出现 title.en  
124 - if isinstance(title_obj, dict) and title_obj.get("en") is None:  
125 - print(f" ✓ 翻译已关闭(title.en为空)") 129 + if isinstance(title_obj, dict) and title_obj.get("en"):
  130 + print(f" ✓ 多语言索引正常(title.en 已填充)")
126 else: 131 else:
127 - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") 132 + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空")
128 133
129 return True 134 return True
130 135
@@ -199,12 +204,12 @@ def test_incremental_indexing(tenant_id: str = "162"): @@ -199,12 +204,12 @@ def test_incremental_indexing(tenant_id: str = "162"):
199 print(f" SKU数量: {len(doc.get('skus', []))}") 204 print(f" SKU数量: {len(doc.get('skus', []))}")
200 print(f" 规格数量: {len(doc.get('specifications', []))}") 205 print(f" 规格数量: {len(doc.get('specifications', []))}")
201 206
202 - # 检查租户162的翻译状态 207 + # 租户162 配置了 index_languages [zh, en],应有 title.en
203 if tenant_id == "162": 208 if tenant_id == "162":
204 - if isinstance(title_obj, dict) and title_obj.get("en") is None:  
205 - print(f" ✓ 翻译已关闭(title.en为空)") 209 + if isinstance(title_obj, dict) and title_obj.get("en"):
  210 + print(f" ✓ 多语言索引正常(title.en 已填充)")
206 else: 211 else:
207 - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") 212 + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空")
208 213
209 return True 214 return True
210 215
@@ -298,11 +303,13 @@ def test_document_transformer(): @@ -298,11 +303,13 @@ def test_document_transformer():
298 print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}") 303 print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
299 print(f" SKU数量: {len(doc.get('skus', []))}") 304 print(f" SKU数量: {len(doc.get('skus', []))}")
300 305
301 - # 验证租户162翻译关闭  
302 - if isinstance(title_obj, dict) and title_obj.get("en") is None:  
303 - print(f" ✓ 翻译已关闭(符合租户162配置)") 306 + # 租户162 index_languages [zh, en],主语言 zh,应有 zh(原文)与 en(翻译)
  307 + if isinstance(title_obj, dict) and title_obj.get("zh") and title_obj.get("en"):
  308 + print(f" ✓ 多语言字段正确(zh + en)")
  309 + elif isinstance(title_obj, dict) and title_obj.get("zh"):
  310 + print(f" ⚠ 仅有 zh(若未配置翻译或翻译未调用可接受)")
304 else: 311 else:
305 - print(f" ⚠ 警告:翻译应该关闭") 312 + print(f" ⚠ 未发现预期多语言字段")
306 313
307 return True 314 return True
308 else: 315 else:
query/query_parser.py
@@ -58,8 +58,7 @@ class ParsedQuery: @@ -58,8 +58,7 @@ class ParsedQuery:
58 "rewritten_query": self.rewritten_query, 58 "rewritten_query": self.rewritten_query,
59 "detected_language": self.detected_language, 59 "detected_language": self.detected_language,
60 "translations": self.translations, 60 "translations": self.translations,
61 - "domain": self.domain,  
62 - "has_vector": self.query_vector is not None 61 + "domain": self.domain
63 } 62 }
64 return result 63 return result
65 64
@@ -228,23 +227,16 @@ class QueryParser: @@ -228,23 +227,16 @@ class QueryParser:
228 translations = {} 227 translations = {}
229 translation_futures = {} 228 translation_futures = {}
230 try: 229 try:
231 - # 根据租户配置决定翻译目标语言 230 + # 根据租户配置的 index_languages 决定翻译目标语言
232 from config.tenant_config_loader import get_tenant_config_loader 231 from config.tenant_config_loader import get_tenant_config_loader
233 tenant_loader = get_tenant_config_loader() 232 tenant_loader = get_tenant_config_loader()
234 tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default") 233 tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default")
  234 + index_langs = tenant_cfg.get("index_languages") or ["en", "zh"]
235 235
236 - translate_to_zh = bool(tenant_cfg.get("translate_to_zh"))  
237 - translate_to_en = bool(tenant_cfg.get("translate_to_en")) 236 + target_langs_for_translation = [lang for lang in index_langs if lang != detected_lang]
238 237
239 - target_langs_for_translation = []  
240 - if translate_to_zh:  
241 - target_langs_for_translation.append('zh')  
242 - if translate_to_en:  
243 - target_langs_for_translation.append('en')  
244 -  
245 - # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段  
246 if target_langs_for_translation: 238 if target_langs_for_translation:
247 - target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang] 239 + target_langs = target_langs_for_translation
248 240
249 if target_langs: 241 if target_langs:
250 # Use e-commerce context for better disambiguation 242 # Use e-commerce context for better disambiguation
@@ -254,8 +246,8 @@ class QueryParser: @@ -254,8 +246,8 @@ class QueryParser:
254 self.config.query_config.translation_prompts.get('default_zh') 246 self.config.query_config.translation_prompts.get('default_zh')
255 247
256 # Determine if we need to wait for translation results 248 # Determine if we need to wait for translation results
257 - # If detected_lang is neither 'en' nor 'zh', we must wait for translation  
258 - need_wait_translation = detected_lang not in ['en', 'zh'] 249 + # If detected_lang is not in index_languages, we must wait for translation
  250 + need_wait_translation = detected_lang not in index_langs
259 251
260 if need_wait_translation: 252 if need_wait_translation:
261 # Use async method that returns Futures, so we can wait for results 253 # Use async method that returns Futures, so we can wait for results
query/translator.py
@@ -792,6 +792,18 @@ class Translator: @@ -792,6 +792,18 @@ class Translator:
792 # The user can configure a glossary for better results 792 # The user can configure a glossary for better results
793 return translated_text 793 return translated_text
794 794
  795 + def _shop_lang_matches(self, shop_lang_lower: str, lang_code: str) -> bool:
  796 + """True if shop language matches index language (use source, no translate)."""
  797 + if not shop_lang_lower or not lang_code:
  798 + return False
  799 + if shop_lang_lower == lang_code:
  800 + return True
  801 + if lang_code == "zh" and "zh" in shop_lang_lower:
  802 + return True
  803 + if lang_code == "en" and "en" in shop_lang_lower:
  804 + return True
  805 + return False
  806 +
795 def translate_for_indexing( 807 def translate_for_indexing(
796 self, 808 self,
797 text: str, 809 text: str,
@@ -799,76 +811,55 @@ class Translator: @@ -799,76 +811,55 @@ class Translator:
799 source_lang: Optional[str] = None, 811 source_lang: Optional[str] = None,
800 context: Optional[str] = None, 812 context: Optional[str] = None,
801 prompt: Optional[str] = None, 813 prompt: Optional[str] = None,
802 - translate_to_en: bool = True,  
803 - translate_to_zh: bool = True, 814 + index_languages: Optional[List[str]] = None,
804 ) -> Dict[str, Optional[str]]: 815 ) -> Dict[str, Optional[str]]:
805 """ 816 """
806 - Translate text for indexing based on shop language and tenant configuration.  
807 -  
808 - Translation behavior:  
809 - - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh)  
810 - - If translate_to_en=True and shop language is not 'en', translate to English (en)  
811 - - If both flags are False, no translation is performed (returns None for both)  
812 - 817 + Translate text for indexing based on shop language and tenant index_languages.
  818 +
  819 + For each language in index_languages: use source text if shop language matches,
  820 + otherwise translate to that language.
  821 +
813 Args: 822 Args:
814 text: Text to translate 823 text: Text to translate
815 - shop_language: Shop's configured language (e.g., 'zh', 'en', 'ru')  
816 - source_lang: Source language code (optional, auto-detect if None) 824 + shop_language: Shop primary language (e.g. 'zh', 'en', 'ru')
  825 + source_lang: Source language code (optional)
817 context: Additional context for translation (optional) 826 context: Additional context for translation (optional)
818 - prompt: Translation prompt/instruction (optional)  
819 - translate_to_en: Whether to translate to English (from tenant_config)  
820 - translate_to_zh: Whether to translate to Chinese (from tenant_config)  
821 - 827 + prompt: Translation prompt (optional)
  828 + index_languages: Languages to index (from tenant_config). Default ["en", "zh"].
  829 +
822 Returns: 830 Returns:
823 - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled)  
824 - Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None} 831 + Dict keyed by each index_language with translated or source text (or None).
825 """ 832 """
  833 + langs = index_languages if index_languages else ["en", "zh"]
  834 + results = {lang: None for lang in langs}
826 if not text or not text.strip(): 835 if not text or not text.strip():
827 - return {'zh': None, 'en': None}  
828 -  
829 - # Skip translation for symbol-only queries 836 + return results
830 if re.match(r'^[\d\s_-]+$', text): 837 if re.match(r'^[\d\s_-]+$', text):
831 logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'") 838 logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'")
832 - return {'zh': None, 'en': None}  
833 -  
834 - results = {'zh': None, 'en': None}  
835 - shop_lang_lower = shop_language.lower() if shop_language else ""  
836 -  
837 - # Determine which languages need translation based on tenant configuration  
838 - targets = []  
839 - if translate_to_zh and "zh" not in shop_lang_lower:  
840 - targets.append("zh")  
841 - if translate_to_en and "en" not in shop_lang_lower:  
842 - targets.append("en")  
843 -  
844 - # If shop language is already zh and en, no translation needed  
845 - if not targets:  
846 - # Use original text for both languages  
847 - if "zh" in shop_lang_lower:  
848 - results['zh'] = text  
849 - if "en" in shop_lang_lower:  
850 - results['en'] = text  
851 return results 839 return results
852 -  
853 - # Translate to each target language 840 +
  841 + shop_lang_lower = (shop_language or "").strip().lower()
  842 + targets = []
  843 + for lang in langs:
  844 + if self._shop_lang_matches(shop_lang_lower, lang):
  845 + results[lang] = text
  846 + else:
  847 + targets.append(lang)
  848 +
854 for target_lang in targets: 849 for target_lang in targets:
855 - # Check cache first  
856 cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt) 850 cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt)
857 if cached: 851 if cached:
858 results[target_lang] = cached 852 results[target_lang] = cached
859 logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}") 853 logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}")
860 continue 854 continue
861 -  
862 - # Translate synchronously for indexing (we need the result immediately)  
863 translated = self.translate( 855 translated = self.translate(
864 text, 856 text,
865 target_lang=target_lang, 857 target_lang=target_lang,
866 source_lang=source_lang or shop_language, 858 source_lang=source_lang or shop_language,
867 context=context, 859 context=context,
868 - prompt=prompt 860 + prompt=prompt,
869 ) 861 )
870 results[target_lang] = translated 862 results[target_lang] = translated
871 -  
872 return results 863 return results
873 864
874 def get_translation_needs( 865 def get_translation_needs(
search/searcher.py
@@ -165,7 +165,8 @@ class Searcher: @@ -165,7 +165,8 @@ class Searcher:
165 # 根据租户配置决定翻译开关(离线/在线统一) 165 # 根据租户配置决定翻译开关(离线/在线统一)
166 tenant_loader = get_tenant_config_loader() 166 tenant_loader = get_tenant_config_loader()
167 tenant_cfg = tenant_loader.get_tenant_config(tenant_id) 167 tenant_cfg = tenant_loader.get_tenant_config(tenant_id)
168 - enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh")) 168 + index_langs = tenant_cfg.get("index_languages") or []
  169 + enable_translation = len(index_langs) > 0
169 enable_embedding = self.config.query_config.enable_text_embedding 170 enable_embedding = self.config.query_config.enable_text_embedding
170 enable_rerank = False # Temporarily disabled 171 enable_rerank = False # Temporarily disabled
171 172