Commit 038e4e2facaffdb4f3d6e824379232f801bf250d

Authored by tangwang
1 parent 3a5fda00

refactor(i18n): translate_to_en/zh 改为可配置 index_languages,默认 [en,zh]

- config: 新增 SUPPORTED_INDEX_LANGUAGES(38 种语言)、DEFAULT_INDEX_LANGUAGES、
  normalize_index_languages、resolve_index_languages;get_tenant_config 统一注入 index_languages
- config.yaml: 租户配置改用 index_languages,默认 [en,zh],保留 translate_to_* 兼容解析
- query/translator: translate_for_indexing 改为接收 index_languages,返回多语言 Dict
- query/query_parser: 翻译目标从 index_languages 解析,need_wait_translation 按 index_langs 判断
- search/searcher: enable_translation 改为基于 index_languages 是否非空
- indexer: document_transformer 按 index_languages 填多语言字段;indexing_utils 仅多语言时初始化翻译器
- tests: 租户配置与索引测试改为断言 index_languages
- README: 更新 TODO 说明已支持 index_languages
README.md
1   -
2 1 # TODO
3 2  
  3 +**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。
  4 +
4 5 前端:
5 6 搜索模态框
6 7 点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。
... ...
config/config.yaml
... ... @@ -149,32 +149,25 @@ spu_config:
149 149 searchable_option_dimensions: ['option1', 'option2', 'option3']
150 150  
151 151 # 租户配置(Tenant Configuration)
152   -# 每个租户可以配置主语言和翻译选项
  152 +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
  153 +# 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集
153 154 tenant_config:
154   - # 默认配置(未配置的租户使用此配置)
155 155 default:
156 156 primary_language: "en"
157   - translate_to_en: true
158   - translate_to_zh: false
159   - # 租户特定配置
  157 + index_languages: ["en", "zh"]
160 158 tenants:
161 159 "1":
162 160 primary_language: "zh"
163   - translate_to_en: true
164   - translate_to_zh: false
  161 + index_languages: ["zh", "en"]
165 162 "2":
166 163 primary_language: "en"
167   - translate_to_en: false
168   - translate_to_zh: true
  164 + index_languages: ["en", "zh"]
169 165 "3":
170 166 primary_language: "zh"
171   - translate_to_en: true
172   - translate_to_zh: false
  167 + index_languages: ["zh", "en"]
173 168 "162":
174 169 primary_language: "zh"
175   - translate_to_en: true
176   - translate_to_zh: true
  170 + index_languages: ["zh", "en"]
177 171 "170":
178 172 primary_language: "en"
179   - translate_to_en: true
180   - translate_to_zh: true
  173 + index_languages: ["en", "zh"]
... ...
config/tenant_config_loader.py
1 1 """
2 2 租户配置加载器。
3 3  
4   -从统一配置文件(config.yaml)加载租户配置,包括主语言和翻译配置。
  4 +从统一配置文件(config.yaml)加载租户配置,包括主语言和索引语言(index_languages)。
  5 +支持旧配置 translate_to_en / translate_to_zh 的兼容解析。
5 6 """
6 7  
7 8 import logging
8   -from typing import Dict, Any, Optional
  9 +from typing import Dict, Any, Optional, List
9 10  
10 11 logger = logging.getLogger(__name__)
11 12  
  13 +# 支持的索引语言:code -> display name(供商家勾选主市场语言等场景使用)
  14 +SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = {
  15 + "en": "English",
  16 + "zh": "Chinese",
  17 + "zh_tw": "Traditional Chinese",
  18 + "ru": "Russian",
  19 + "ja": "Japanese",
  20 + "ko": "Korean",
  21 + "es": "Spanish",
  22 + "fr": "French",
  23 + "pt": "Portuguese",
  24 + "de": "German",
  25 + "it": "Italian",
  26 + "th": "Thai",
  27 + "vi": "Vietnamese",
  28 + "id": "Indonesian",
  29 + "ms": "Malay",
  30 + "ar": "Arabic",
  31 + "hi": "Hindi",
  32 + "he": "Hebrew",
  33 + "my": "Burmese",
  34 + "ta": "Tamil",
  35 + "ur": "Urdu",
  36 + "bn": "Bengali",
  37 + "pl": "Polish",
  38 + "nl": "Dutch",
  39 + "ro": "Romanian",
  40 + "tr": "Turkish",
  41 + "km": "Khmer",
  42 + "lo": "Lao",
  43 + "yue": "Cantonese",
  44 + "cs": "Czech",
  45 + "el": "Greek",
  46 + "sv": "Swedish",
  47 + "hu": "Hungarian",
  48 + "da": "Danish",
  49 + "fi": "Finnish",
  50 + "uk": "Ukrainian",
  51 + "bg": "Bulgarian",
  52 +}
  53 +
  54 +DEFAULT_INDEX_LANGUAGES: List[str] = ["en", "zh"]
  55 +
  56 +
  57 +def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]:
  58 + """
  59 + 将 index_languages 配置规范化为合法语言代码列表。
  60 + None 或空时返回 DEFAULT_INDEX_LANGUAGES。
  61 + """
  62 + if value is None:
  63 + return list(DEFAULT_INDEX_LANGUAGES)
  64 + if not isinstance(value, (list, tuple)):
  65 + return list(DEFAULT_INDEX_LANGUAGES)
  66 + valid: List[str] = []
  67 + seen: set = set()
  68 + for item in value:
  69 + code = (item or "").strip().lower()
  70 + if not code or code in seen:
  71 + continue
  72 + if code in SUPPORTED_INDEX_LANGUAGES:
  73 + valid.append(code)
  74 + seen.add(code)
  75 + return valid if valid else list(DEFAULT_INDEX_LANGUAGES)
  76 +
  77 +
  78 +def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]:
  79 + """
  80 + 从租户配置解析 index_languages。
  81 + 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。
  82 + """
  83 + if "index_languages" in tenant_config:
  84 + return normalize_index_languages(
  85 + tenant_config["index_languages"],
  86 + tenant_config.get("primary_language") or "en",
  87 + )
  88 + primary = (tenant_config.get("primary_language") or "en").strip().lower()
  89 + to_en = bool(tenant_config.get("translate_to_en"))
  90 + to_zh = bool(tenant_config.get("translate_to_zh"))
  91 + langs: List[str] = []
  92 + if primary and primary in SUPPORTED_INDEX_LANGUAGES:
  93 + langs.append(primary)
  94 + for code in ("en", "zh"):
  95 + if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)):
  96 + if code in SUPPORTED_INDEX_LANGUAGES:
  97 + langs.append(code)
  98 + return langs if langs else list(DEFAULT_INDEX_LANGUAGES)
  99 +
12 100  
13 101 class TenantConfigLoader:
14 102 """租户配置加载器。"""
... ... @@ -36,14 +124,9 @@ class TenantConfigLoader:
36 124 return self._config
37 125 except Exception as e:
38 126 logger.error(f"Failed to load tenant config: {e}", exc_info=True)
39   - # 返回默认配置
40 127 self._config = {
41   - "default": {
42   - "primary_language": "en",
43   - "translate_to_en": True,
44   - "translate_to_zh": False
45   - },
46   - "tenants": {}
  128 + "default": {"primary_language": "en", "index_languages": ["en", "zh"]},
  129 + "tenants": {},
47 130 }
48 131 return self._config
49 132  
... ... @@ -55,21 +138,18 @@ class TenantConfigLoader:
55 138 tenant_id: 租户ID
56 139  
57 140 Returns:
58   - 租户配置字典,如果租户不存在则返回默认配置
  141 + 租户配置字典,若租户不存在则用默认配置。始终包含已解析的 index_languages。
59 142 """
60 143 config = self.load_config()
61 144 tenant_id_str = str(tenant_id)
62   -
  145 + default = config.get("default", {"primary_language": "en", "index_languages": ["en", "zh"]})
63 146 tenants = config.get("tenants", {})
64   - if tenant_id_str in tenants:
65   - return tenants[tenant_id_str]
66   - else:
  147 + raw = tenants[tenant_id_str] if tenant_id_str in tenants else default
  148 + if tenant_id_str not in tenants:
67 149 logger.debug(f"Tenant {tenant_id} not found in config, using default")
68   - return config.get("default", {
69   - "primary_language": "en",
70   - "translate_to_en": True,
71   - "translate_to_zh": False
72   - })
  150 + out = dict(raw)
  151 + out["index_languages"] = resolve_index_languages(raw)
  152 + return out
73 153  
74 154 def reload(self):
75 155 """重新加载配置(用于配置更新)。"""
... ...
indexer/document_transformer.py
... ... @@ -172,49 +172,32 @@ class SPUDocumentTransformer:
172 172 primary_lang: str
173 173 ):
174 174 """
175   - 填充文本字段(根据租户配置处理多语言翻译)。
176   -
177   - 翻译逻辑:
178   - - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向
179   - - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文
180   - - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文
181   - - 如果两个都是false,则不进行翻译,只填充主语言字段
  175 + 填充文本字段(根据租户 index_languages 处理多语言翻译)。
  176 + 仅写入 primary_language 及 index_languages 中配置的语言。
182 177 """
183   - # 从租户配置中读取翻译方向
184   - translate_to_en = bool(self.tenant_config.get('translate_to_en'))
185   - translate_to_zh = bool(self.tenant_config.get('translate_to_zh'))
186   -
187   - def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None):
188   - """
189   - Write multilingual text field as an object, e.g.:
190   - doc[field_name] = {"zh": "...", "en": "..."}
191   - Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh.
192   - """
  178 + index_langs = self.tenant_config.get("index_languages") or ["en", "zh"]
  179 +
  180 + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, Optional[str]]] = None):
  181 + """写入多语言对象 doc[field_name] = {"zh": "...", "en": "...", ...},仅包含 index_languages。"""
193 182 if not source_text or not str(source_text).strip():
194 183 return
195   -
196 184 obj: Dict[str, str] = {}
197 185 src = str(source_text)
198 186 obj[primary_lang] = src
199   -
200 187 tr = translations or {}
201   - if translate_to_en and primary_lang != "en":
202   - en_text = tr.get("en")
203   - if en_text and str(en_text).strip():
204   - obj["en"] = str(en_text)
205   - if translate_to_zh and primary_lang != "zh":
206   - zh_text = tr.get("zh")
207   - if zh_text and str(zh_text).strip():
208   - obj["zh"] = str(zh_text)
209   -
  188 + for lang in index_langs:
  189 + if lang == primary_lang:
  190 + continue
  191 + val = tr.get(lang)
  192 + if val and str(val).strip():
  193 + obj[lang] = str(val)
210 194 if obj:
211 195 doc[field_name] = obj
212 196  
213 197 # Title
214 198 if pd.notna(spu_row.get('title')):
215 199 title_text = str(spu_row['title'])
216   -
217   - translations: Dict[str, str] = {}
  200 + translations: Dict[str, Optional[str]] = {}
218 201 if self.translator:
219 202 prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh')
220 203 prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en')
... ... @@ -223,16 +206,14 @@ class SPUDocumentTransformer:
223 206 shop_language=primary_lang,
224 207 source_lang=primary_lang,
225 208 prompt=prompt_zh if primary_lang == 'zh' else prompt_en,
226   - translate_to_en=translate_to_en,
227   - translate_to_zh=translate_to_zh,
  209 + index_languages=index_langs,
228 210 ) or {}
229   -
230 211 _set_lang_obj("title", title_text, translations)
231 212  
232 213 # Brief
233 214 if pd.notna(spu_row.get('brief')):
234 215 brief_text = str(spu_row['brief'])
235   - translations: Dict[str, str] = {}
  216 + translations = {}
236 217 if self.translator:
237 218 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
238 219 translations = self.translator.translate_for_indexing(
... ... @@ -240,15 +221,14 @@ class SPUDocumentTransformer:
240 221 shop_language=primary_lang,
241 222 source_lang=primary_lang,
242 223 prompt=prompt,
243   - translate_to_en=translate_to_en,
244   - translate_to_zh=translate_to_zh,
  224 + index_languages=index_langs,
245 225 ) or {}
246 226 _set_lang_obj("brief", brief_text, translations)
247 227  
248 228 # Description
249 229 if pd.notna(spu_row.get('description')):
250 230 desc_text = str(spu_row['description'])
251   - translations: Dict[str, str] = {}
  231 + translations = {}
252 232 if self.translator:
253 233 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
254 234 translations = self.translator.translate_for_indexing(
... ... @@ -256,15 +236,14 @@ class SPUDocumentTransformer:
256 236 shop_language=primary_lang,
257 237 source_lang=primary_lang,
258 238 prompt=prompt,
259   - translate_to_en=translate_to_en,
260   - translate_to_zh=translate_to_zh,
  239 + index_languages=index_langs,
261 240 ) or {}
262 241 _set_lang_obj("description", desc_text, translations)
263 242  
264 243 # Vendor
265 244 if pd.notna(spu_row.get('vendor')):
266 245 vendor_text = str(spu_row['vendor'])
267   - translations: Dict[str, str] = {}
  246 + translations = {}
268 247 if self.translator:
269 248 prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en')
270 249 translations = self.translator.translate_for_indexing(
... ... @@ -272,8 +251,7 @@ class SPUDocumentTransformer:
272 251 shop_language=primary_lang,
273 252 source_lang=primary_lang,
274 253 prompt=prompt,
275   - translate_to_en=translate_to_en,
276   - translate_to_zh=translate_to_zh,
  254 + index_languages=index_langs,
277 255 ) or {}
278 256 _set_lang_obj("vendor", vendor_text, translations)
279 257  
... ...
indexer/indexing_utils.py
... ... @@ -96,11 +96,9 @@ def create_document_transformer(
96 96 if searchable_option_dimensions is None:
97 97 searchable_option_dimensions = config.spu_config.searchable_option_dimensions
98 98  
99   - # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器
100   - translate_to_en = bool(tenant_config.get("translate_to_en"))
101   - translate_to_zh = bool(tenant_config.get("translate_to_zh"))
102   -
103   - if translator is None and (translate_to_en or translate_to_zh):
  99 + index_langs = tenant_config.get("index_languages") or []
  100 + need_translator = len(index_langs) > 1
  101 + if translator is None and need_translator:
104 102 from query.translator import Translator
105 103 translator = Translator(
106 104 api_key=config.query_config.translation_api_key,
... ...
indexer/test_indexing.py
... ... @@ -44,19 +44,25 @@ def test_tenant_config():
44 44 # 测试默认配置
45 45 default_config = tenant_config_loader.get_tenant_config("999")
46 46 print(f"默认配置: {default_config}")
  47 + assert "index_languages" in default_config, "默认配置应包含 index_languages"
  48 + assert "en" in default_config["index_languages"] and "zh" in default_config["index_languages"], \
  49 + "默认 index_languages 应包含 en, zh"
  50 + print("✓ 默认配置正确(index_languages 含 en, zh)")
47 51  
48   - # 测试租户162(翻译关闭
  52 + # 测试租户162(index_languages: zh, en
49 53 tenant_162_config = tenant_config_loader.get_tenant_config("162")
50 54 print(f"租户162配置: {tenant_162_config}")
51   - assert tenant_162_config['translate_to_en'] == False, "租户162翻译应该关闭"
52   - assert tenant_162_config['translate_to_zh'] == False, "租户162翻译应该关闭"
53   - print("✓ 租户162配置正确(翻译关闭)")
  55 + idx = tenant_162_config.get("index_languages") or []
  56 + assert "zh" in idx and "en" in idx, "租户162 index_languages 应包含 zh, en"
  57 + print("✓ 租户162配置正确(index_languages 含 zh, en)")
54 58  
55   - # 测试其他租户
  59 + # 测试租户1
56 60 tenant_1_config = tenant_config_loader.get_tenant_config("1")
57 61 print(f"租户1配置: {tenant_1_config}")
58   - assert tenant_1_config['translate_to_en'] == True, "租户1应该启用英文翻译"
59   - print("✓ 租户1配置正确(翻译开启)")
  62 + idx1 = tenant_1_config.get("index_languages") or []
  63 + assert "zh" in idx1 and "en" in idx1, "租户1 index_languages 应包含 zh, en"
  64 + assert tenant_1_config.get("primary_language") == "zh", "租户1 主语言为 zh"
  65 + print("✓ 租户1配置正确(index_languages 含 zh, en,主语言 zh)")
60 66  
61 67 return True
62 68 except Exception as e:
... ... @@ -118,13 +124,12 @@ def test_full_indexing(tenant_id: str = "162"):
118 124 print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
119 125 print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}")
120 126  
121   - # 检查租户162的翻译状态
  127 + # 租户162 index_languages [zh, en],应有 title.en
122 128 if tenant_id == "162":
123   - # 租户162翻译应该关闭:只写入主语言,不应出现 title.en
124   - if isinstance(title_obj, dict) and title_obj.get("en") is None:
125   - print(f" ✓ 翻译已关闭(title.en为空)")
  129 + if isinstance(title_obj, dict) and title_obj.get("en"):
  130 + print(f" ✓ 多语言索引正常(title.en 已填充)")
126 131 else:
127   - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
  132 + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空")
128 133  
129 134 return True
130 135  
... ... @@ -199,12 +204,12 @@ def test_incremental_indexing(tenant_id: str = "162"):
199 204 print(f" SKU数量: {len(doc.get('skus', []))}")
200 205 print(f" 规格数量: {len(doc.get('specifications', []))}")
201 206  
202   - # 检查租户162的翻译状态
  207 + # 租户162 配置了 index_languages [zh, en],应有 title.en
203 208 if tenant_id == "162":
204   - if isinstance(title_obj, dict) and title_obj.get("en") is None:
205   - print(f" ✓ 翻译已关闭(title.en为空)")
  209 + if isinstance(title_obj, dict) and title_obj.get("en"):
  210 + print(f" ✓ 多语言索引正常(title.en 已填充)")
206 211 else:
207   - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
  212 + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空")
208 213  
209 214 return True
210 215  
... ... @@ -298,11 +303,13 @@ def test_document_transformer():
298 303 print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}")
299 304 print(f" SKU数量: {len(doc.get('skus', []))}")
300 305  
301   - # 验证租户162翻译关闭
302   - if isinstance(title_obj, dict) and title_obj.get("en") is None:
303   - print(f" ✓ 翻译已关闭(符合租户162配置)")
  306 + # 租户162 index_languages [zh, en],主语言 zh,应有 zh(原文)与 en(翻译)
  307 + if isinstance(title_obj, dict) and title_obj.get("zh") and title_obj.get("en"):
  308 + print(f" ✓ 多语言字段正确(zh + en)")
  309 + elif isinstance(title_obj, dict) and title_obj.get("zh"):
  310 + print(f" ⚠ 仅有 zh(若未配置翻译或翻译未调用可接受)")
304 311 else:
305   - print(f" ⚠ 警告:翻译应该关闭")
  312 + print(f" ⚠ 未发现预期多语言字段")
306 313  
307 314 return True
308 315 else:
... ...
query/query_parser.py
... ... @@ -58,8 +58,7 @@ class ParsedQuery:
58 58 "rewritten_query": self.rewritten_query,
59 59 "detected_language": self.detected_language,
60 60 "translations": self.translations,
61   - "domain": self.domain,
62   - "has_vector": self.query_vector is not None
  61 + "domain": self.domain
63 62 }
64 63 return result
65 64  
... ... @@ -228,23 +227,16 @@ class QueryParser:
228 227 translations = {}
229 228 translation_futures = {}
230 229 try:
231   - # 根据租户配置决定翻译目标语言
  230 + # 根据租户配置的 index_languages 决定翻译目标语言
232 231 from config.tenant_config_loader import get_tenant_config_loader
233 232 tenant_loader = get_tenant_config_loader()
234 233 tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default")
  234 + index_langs = tenant_cfg.get("index_languages") or ["en", "zh"]
235 235  
236   - translate_to_zh = bool(tenant_cfg.get("translate_to_zh"))
237   - translate_to_en = bool(tenant_cfg.get("translate_to_en"))
  236 + target_langs_for_translation = [lang for lang in index_langs if lang != detected_lang]
238 237  
239   - target_langs_for_translation = []
240   - if translate_to_zh:
241   - target_langs_for_translation.append('zh')
242   - if translate_to_en:
243   - target_langs_for_translation.append('en')
244   -
245   - # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段
246 238 if target_langs_for_translation:
247   - target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang]
  239 + target_langs = target_langs_for_translation
248 240  
249 241 if target_langs:
250 242 # Use e-commerce context for better disambiguation
... ... @@ -254,8 +246,8 @@ class QueryParser:
254 246 self.config.query_config.translation_prompts.get('default_zh')
255 247  
256 248 # Determine if we need to wait for translation results
257   - # If detected_lang is neither 'en' nor 'zh', we must wait for translation
258   - need_wait_translation = detected_lang not in ['en', 'zh']
  249 + # If detected_lang is not in index_languages, we must wait for translation
  250 + need_wait_translation = detected_lang not in index_langs
259 251  
260 252 if need_wait_translation:
261 253 # Use async method that returns Futures, so we can wait for results
... ...
query/translator.py
... ... @@ -792,6 +792,18 @@ class Translator:
792 792 # The user can configure a glossary for better results
793 793 return translated_text
794 794  
  795 + def _shop_lang_matches(self, shop_lang_lower: str, lang_code: str) -> bool:
  796 + """True if shop language matches index language (use source, no translate)."""
  797 + if not shop_lang_lower or not lang_code:
  798 + return False
  799 + if shop_lang_lower == lang_code:
  800 + return True
  801 + if lang_code == "zh" and "zh" in shop_lang_lower:
  802 + return True
  803 + if lang_code == "en" and "en" in shop_lang_lower:
  804 + return True
  805 + return False
  806 +
795 807 def translate_for_indexing(
796 808 self,
797 809 text: str,
... ... @@ -799,76 +811,55 @@ class Translator:
799 811 source_lang: Optional[str] = None,
800 812 context: Optional[str] = None,
801 813 prompt: Optional[str] = None,
802   - translate_to_en: bool = True,
803   - translate_to_zh: bool = True,
  814 + index_languages: Optional[List[str]] = None,
804 815 ) -> Dict[str, Optional[str]]:
805 816 """
806   - Translate text for indexing based on shop language and tenant configuration.
807   -
808   - Translation behavior:
809   - - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh)
810   - - If translate_to_en=True and shop language is not 'en', translate to English (en)
811   - - If both flags are False, no translation is performed (returns None for both)
812   -
  817 + Translate text for indexing based on shop language and tenant index_languages.
  818 +
  819 + For each language in index_languages: use source text if shop language matches,
  820 + otherwise translate to that language.
  821 +
813 822 Args:
814 823 text: Text to translate
815   - shop_language: Shop's configured language (e.g., 'zh', 'en', 'ru')
816   - source_lang: Source language code (optional, auto-detect if None)
  824 + shop_language: Shop primary language (e.g. 'zh', 'en', 'ru')
  825 + source_lang: Source language code (optional)
817 826 context: Additional context for translation (optional)
818   - prompt: Translation prompt/instruction (optional)
819   - translate_to_en: Whether to translate to English (from tenant_config)
820   - translate_to_zh: Whether to translate to Chinese (from tenant_config)
821   -
  827 + prompt: Translation prompt (optional)
  828 + index_languages: Languages to index (from tenant_config). Default ["en", "zh"].
  829 +
822 830 Returns:
823   - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled)
824   - Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None}
  831 + Dict keyed by each index_language with translated or source text (or None).
825 832 """
  833 + langs = index_languages if index_languages else ["en", "zh"]
  834 + results = {lang: None for lang in langs}
826 835 if not text or not text.strip():
827   - return {'zh': None, 'en': None}
828   -
829   - # Skip translation for symbol-only queries
  836 + return results
830 837 if re.match(r'^[\d\s_-]+$', text):
831 838 logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'")
832   - return {'zh': None, 'en': None}
833   -
834   - results = {'zh': None, 'en': None}
835   - shop_lang_lower = shop_language.lower() if shop_language else ""
836   -
837   - # Determine which languages need translation based on tenant configuration
838   - targets = []
839   - if translate_to_zh and "zh" not in shop_lang_lower:
840   - targets.append("zh")
841   - if translate_to_en and "en" not in shop_lang_lower:
842   - targets.append("en")
843   -
844   - # If shop language is already zh and en, no translation needed
845   - if not targets:
846   - # Use original text for both languages
847   - if "zh" in shop_lang_lower:
848   - results['zh'] = text
849   - if "en" in shop_lang_lower:
850   - results['en'] = text
851 839 return results
852   -
853   - # Translate to each target language
  840 +
  841 + shop_lang_lower = (shop_language or "").strip().lower()
  842 + targets = []
  843 + for lang in langs:
  844 + if self._shop_lang_matches(shop_lang_lower, lang):
  845 + results[lang] = text
  846 + else:
  847 + targets.append(lang)
  848 +
854 849 for target_lang in targets:
855   - # Check cache first
856 850 cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt)
857 851 if cached:
858 852 results[target_lang] = cached
859 853 logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}")
860 854 continue
861   -
862   - # Translate synchronously for indexing (we need the result immediately)
863 855 translated = self.translate(
864 856 text,
865 857 target_lang=target_lang,
866 858 source_lang=source_lang or shop_language,
867 859 context=context,
868   - prompt=prompt
  860 + prompt=prompt,
869 861 )
870 862 results[target_lang] = translated
871   -
872 863 return results
873 864  
874 865 def get_translation_needs(
... ...
search/searcher.py
... ... @@ -165,7 +165,8 @@ class Searcher:
165 165 # 根据租户配置决定翻译开关(离线/在线统一)
166 166 tenant_loader = get_tenant_config_loader()
167 167 tenant_cfg = tenant_loader.get_tenant_config(tenant_id)
168   - enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh"))
  168 + index_langs = tenant_cfg.get("index_languages") or []
  169 + enable_translation = len(index_langs) > 0
169 170 enable_embedding = self.config.query_config.enable_text_embedding
170 171 enable_rerank = False # Temporarily disabled
171 172  
... ...