Commit 038e4e2facaffdb4f3d6e824379232f801bf250d
1 parent
3a5fda00
refactor(i18n): translate_to_en/zh 改为可配置 index_languages,默认 [en,zh]
- config: 新增 SUPPORTED_INDEX_LANGUAGES(38 种语言)、DEFAULT_INDEX_LANGUAGES、 normalize_index_languages、resolve_index_languages;get_tenant_config 统一注入 index_languages - config.yaml: 租户配置改用 index_languages,默认 [en,zh],保留 translate_to_* 兼容解析 - query/translator: translate_for_indexing 改为接收 index_languages,返回多语言 Dict - query/query_parser: 翻译目标从 index_languages 解析,need_wait_translation 按 index_langs 判断 - search/searcher: enable_translation 改为基于 index_languages 是否非空 - indexer: document_transformer 按 index_languages 填多语言字段;indexing_utils 仅多语言时初始化翻译器 - tests: 租户配置与索引测试改为断言 index_languages - README: 更新 TODO 说明已支持 index_languages
Showing
9 changed files
with
205 additions
and
164 deletions
Show diff stats
README.md
| 1 | - | ||
| 2 | # TODO | 1 | # TODO |
| 3 | 2 | ||
| 3 | +**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。 | ||
| 4 | + | ||
| 4 | 前端: | 5 | 前端: |
| 5 | 搜索模态框 | 6 | 搜索模态框 |
| 6 | 点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。 | 7 | 点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。 |
config/config.yaml
| @@ -149,32 +149,25 @@ spu_config: | @@ -149,32 +149,25 @@ spu_config: | ||
| 149 | searchable_option_dimensions: ['option1', 'option2', 'option3'] | 149 | searchable_option_dimensions: ['option1', 'option2', 'option3'] |
| 150 | 150 | ||
| 151 | # 租户配置(Tenant Configuration) | 151 | # 租户配置(Tenant Configuration) |
| 152 | -# 每个租户可以配置主语言和翻译选项 | 152 | +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) |
| 153 | +# 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集 | ||
| 153 | tenant_config: | 154 | tenant_config: |
| 154 | - # 默认配置(未配置的租户使用此配置) | ||
| 155 | default: | 155 | default: |
| 156 | primary_language: "en" | 156 | primary_language: "en" |
| 157 | - translate_to_en: true | ||
| 158 | - translate_to_zh: false | ||
| 159 | - # 租户特定配置 | 157 | + index_languages: ["en", "zh"] |
| 160 | tenants: | 158 | tenants: |
| 161 | "1": | 159 | "1": |
| 162 | primary_language: "zh" | 160 | primary_language: "zh" |
| 163 | - translate_to_en: true | ||
| 164 | - translate_to_zh: false | 161 | + index_languages: ["zh", "en"] |
| 165 | "2": | 162 | "2": |
| 166 | primary_language: "en" | 163 | primary_language: "en" |
| 167 | - translate_to_en: false | ||
| 168 | - translate_to_zh: true | 164 | + index_languages: ["en", "zh"] |
| 169 | "3": | 165 | "3": |
| 170 | primary_language: "zh" | 166 | primary_language: "zh" |
| 171 | - translate_to_en: true | ||
| 172 | - translate_to_zh: false | 167 | + index_languages: ["zh", "en"] |
| 173 | "162": | 168 | "162": |
| 174 | primary_language: "zh" | 169 | primary_language: "zh" |
| 175 | - translate_to_en: true | ||
| 176 | - translate_to_zh: true | 170 | + index_languages: ["zh", "en"] |
| 177 | "170": | 171 | "170": |
| 178 | primary_language: "en" | 172 | primary_language: "en" |
| 179 | - translate_to_en: true | ||
| 180 | - translate_to_zh: true | 173 | + index_languages: ["en", "zh"] |
config/tenant_config_loader.py
| 1 | """ | 1 | """ |
| 2 | 租户配置加载器。 | 2 | 租户配置加载器。 |
| 3 | 3 | ||
| 4 | -从统一配置文件(config.yaml)加载租户配置,包括主语言和翻译配置。 | 4 | +从统一配置文件(config.yaml)加载租户配置,包括主语言和索引语言(index_languages)。 |
| 5 | +支持旧配置 translate_to_en / translate_to_zh 的兼容解析。 | ||
| 5 | """ | 6 | """ |
| 6 | 7 | ||
| 7 | import logging | 8 | import logging |
| 8 | -from typing import Dict, Any, Optional | 9 | +from typing import Dict, Any, Optional, List |
| 9 | 10 | ||
| 10 | logger = logging.getLogger(__name__) | 11 | logger = logging.getLogger(__name__) |
| 11 | 12 | ||
| 13 | +# 支持的索引语言:code -> display name(供商家勾选主市场语言等场景使用) | ||
| 14 | +SUPPORTED_INDEX_LANGUAGES: Dict[str, str] = { | ||
| 15 | + "en": "English", | ||
| 16 | + "zh": "Chinese", | ||
| 17 | + "zh_tw": "Traditional Chinese", | ||
| 18 | + "ru": "Russian", | ||
| 19 | + "ja": "Japanese", | ||
| 20 | + "ko": "Korean", | ||
| 21 | + "es": "Spanish", | ||
| 22 | + "fr": "French", | ||
| 23 | + "pt": "Portuguese", | ||
| 24 | + "de": "German", | ||
| 25 | + "it": "Italian", | ||
| 26 | + "th": "Thai", | ||
| 27 | + "vi": "Vietnamese", | ||
| 28 | + "id": "Indonesian", | ||
| 29 | + "ms": "Malay", | ||
| 30 | + "ar": "Arabic", | ||
| 31 | + "hi": "Hindi", | ||
| 32 | + "he": "Hebrew", | ||
| 33 | + "my": "Burmese", | ||
| 34 | + "ta": "Tamil", | ||
| 35 | + "ur": "Urdu", | ||
| 36 | + "bn": "Bengali", | ||
| 37 | + "pl": "Polish", | ||
| 38 | + "nl": "Dutch", | ||
| 39 | + "ro": "Romanian", | ||
| 40 | + "tr": "Turkish", | ||
| 41 | + "km": "Khmer", | ||
| 42 | + "lo": "Lao", | ||
| 43 | + "yue": "Cantonese", | ||
| 44 | + "cs": "Czech", | ||
| 45 | + "el": "Greek", | ||
| 46 | + "sv": "Swedish", | ||
| 47 | + "hu": "Hungarian", | ||
| 48 | + "da": "Danish", | ||
| 49 | + "fi": "Finnish", | ||
| 50 | + "uk": "Ukrainian", | ||
| 51 | + "bg": "Bulgarian", | ||
| 52 | +} | ||
| 53 | + | ||
| 54 | +DEFAULT_INDEX_LANGUAGES: List[str] = ["en", "zh"] | ||
| 55 | + | ||
| 56 | + | ||
| 57 | +def normalize_index_languages(value: Any, primary_language: str = "en") -> List[str]: | ||
| 58 | + """ | ||
| 59 | + 将 index_languages 配置规范化为合法语言代码列表。 | ||
| 60 | + None 或空时返回 DEFAULT_INDEX_LANGUAGES。 | ||
| 61 | + """ | ||
| 62 | + if value is None: | ||
| 63 | + return list(DEFAULT_INDEX_LANGUAGES) | ||
| 64 | + if not isinstance(value, (list, tuple)): | ||
| 65 | + return list(DEFAULT_INDEX_LANGUAGES) | ||
| 66 | + valid: List[str] = [] | ||
| 67 | + seen: set = set() | ||
| 68 | + for item in value: | ||
| 69 | + code = (item or "").strip().lower() | ||
| 70 | + if not code or code in seen: | ||
| 71 | + continue | ||
| 72 | + if code in SUPPORTED_INDEX_LANGUAGES: | ||
| 73 | + valid.append(code) | ||
| 74 | + seen.add(code) | ||
| 75 | + return valid if valid else list(DEFAULT_INDEX_LANGUAGES) | ||
| 76 | + | ||
| 77 | + | ||
| 78 | +def resolve_index_languages(tenant_config: Dict[str, Any]) -> List[str]: | ||
| 79 | + """ | ||
| 80 | + 从租户配置解析 index_languages。 | ||
| 81 | + 若存在 index_languages 则用之;否则按旧配置 translate_to_en / translate_to_zh 推导。 | ||
| 82 | + """ | ||
| 83 | + if "index_languages" in tenant_config: | ||
| 84 | + return normalize_index_languages( | ||
| 85 | + tenant_config["index_languages"], | ||
| 86 | + tenant_config.get("primary_language") or "en", | ||
| 87 | + ) | ||
| 88 | + primary = (tenant_config.get("primary_language") or "en").strip().lower() | ||
| 89 | + to_en = bool(tenant_config.get("translate_to_en")) | ||
| 90 | + to_zh = bool(tenant_config.get("translate_to_zh")) | ||
| 91 | + langs: List[str] = [] | ||
| 92 | + if primary and primary in SUPPORTED_INDEX_LANGUAGES: | ||
| 93 | + langs.append(primary) | ||
| 94 | + for code in ("en", "zh"): | ||
| 95 | + if code not in langs and ((code == "en" and to_en) or (code == "zh" and to_zh)): | ||
| 96 | + if code in SUPPORTED_INDEX_LANGUAGES: | ||
| 97 | + langs.append(code) | ||
| 98 | + return langs if langs else list(DEFAULT_INDEX_LANGUAGES) | ||
| 99 | + | ||
| 12 | 100 | ||
| 13 | class TenantConfigLoader: | 101 | class TenantConfigLoader: |
| 14 | """租户配置加载器。""" | 102 | """租户配置加载器。""" |
| @@ -36,14 +124,9 @@ class TenantConfigLoader: | @@ -36,14 +124,9 @@ class TenantConfigLoader: | ||
| 36 | return self._config | 124 | return self._config |
| 37 | except Exception as e: | 125 | except Exception as e: |
| 38 | logger.error(f"Failed to load tenant config: {e}", exc_info=True) | 126 | logger.error(f"Failed to load tenant config: {e}", exc_info=True) |
| 39 | - # 返回默认配置 | ||
| 40 | self._config = { | 127 | self._config = { |
| 41 | - "default": { | ||
| 42 | - "primary_language": "en", | ||
| 43 | - "translate_to_en": True, | ||
| 44 | - "translate_to_zh": False | ||
| 45 | - }, | ||
| 46 | - "tenants": {} | 128 | + "default": {"primary_language": "en", "index_languages": ["en", "zh"]}, |
| 129 | + "tenants": {}, | ||
| 47 | } | 130 | } |
| 48 | return self._config | 131 | return self._config |
| 49 | 132 | ||
| @@ -55,21 +138,18 @@ class TenantConfigLoader: | @@ -55,21 +138,18 @@ class TenantConfigLoader: | ||
| 55 | tenant_id: 租户ID | 138 | tenant_id: 租户ID |
| 56 | 139 | ||
| 57 | Returns: | 140 | Returns: |
| 58 | - 租户配置字典,如果租户不存在则返回默认配置 | 141 | + 租户配置字典,若租户不存在则用默认配置。始终包含已解析的 index_languages。 |
| 59 | """ | 142 | """ |
| 60 | config = self.load_config() | 143 | config = self.load_config() |
| 61 | tenant_id_str = str(tenant_id) | 144 | tenant_id_str = str(tenant_id) |
| 62 | - | 145 | + default = config.get("default", {"primary_language": "en", "index_languages": ["en", "zh"]}) |
| 63 | tenants = config.get("tenants", {}) | 146 | tenants = config.get("tenants", {}) |
| 64 | - if tenant_id_str in tenants: | ||
| 65 | - return tenants[tenant_id_str] | ||
| 66 | - else: | 147 | + raw = tenants[tenant_id_str] if tenant_id_str in tenants else default |
| 148 | + if tenant_id_str not in tenants: | ||
| 67 | logger.debug(f"Tenant {tenant_id} not found in config, using default") | 149 | logger.debug(f"Tenant {tenant_id} not found in config, using default") |
| 68 | - return config.get("default", { | ||
| 69 | - "primary_language": "en", | ||
| 70 | - "translate_to_en": True, | ||
| 71 | - "translate_to_zh": False | ||
| 72 | - }) | 150 | + out = dict(raw) |
| 151 | + out["index_languages"] = resolve_index_languages(raw) | ||
| 152 | + return out | ||
| 73 | 153 | ||
| 74 | def reload(self): | 154 | def reload(self): |
| 75 | """重新加载配置(用于配置更新)。""" | 155 | """重新加载配置(用于配置更新)。""" |
indexer/document_transformer.py
| @@ -172,49 +172,32 @@ class SPUDocumentTransformer: | @@ -172,49 +172,32 @@ class SPUDocumentTransformer: | ||
| 172 | primary_lang: str | 172 | primary_lang: str |
| 173 | ): | 173 | ): |
| 174 | """ | 174 | """ |
| 175 | - 填充文本字段(根据租户配置处理多语言翻译)。 | ||
| 176 | - | ||
| 177 | - 翻译逻辑: | ||
| 178 | - - 根据 tenant_config 中的 translate_to_zh 和 translate_to_en 决定翻译方向 | ||
| 179 | - - 如果 translate_to_zh=true,且店铺语言不是zh,则翻译到中文 | ||
| 180 | - - 如果 translate_to_en=true,且店铺语言不是en,则翻译到英文 | ||
| 181 | - - 如果两个都是false,则不进行翻译,只填充主语言字段 | 175 | + 填充文本字段(根据租户 index_languages 处理多语言翻译)。 |
| 176 | + 仅写入 primary_language 及 index_languages 中配置的语言。 | ||
| 182 | """ | 177 | """ |
| 183 | - # 从租户配置中读取翻译方向 | ||
| 184 | - translate_to_en = bool(self.tenant_config.get('translate_to_en')) | ||
| 185 | - translate_to_zh = bool(self.tenant_config.get('translate_to_zh')) | ||
| 186 | - | ||
| 187 | - def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, str]] = None): | ||
| 188 | - """ | ||
| 189 | - Write multilingual text field as an object, e.g.: | ||
| 190 | - doc[field_name] = {"zh": "...", "en": "..."} | ||
| 191 | - Only writes keys based on tenant primary_language + translate_to_en/translate_to_zh. | ||
| 192 | - """ | 178 | + index_langs = self.tenant_config.get("index_languages") or ["en", "zh"] |
| 179 | + | ||
| 180 | + def _set_lang_obj(field_name: str, source_text: Optional[str], translations: Optional[Dict[str, Optional[str]]] = None): | ||
| 181 | + """写入多语言对象 doc[field_name] = {"zh": "...", "en": "...", ...},仅包含 index_languages。""" | ||
| 193 | if not source_text or not str(source_text).strip(): | 182 | if not source_text or not str(source_text).strip(): |
| 194 | return | 183 | return |
| 195 | - | ||
| 196 | obj: Dict[str, str] = {} | 184 | obj: Dict[str, str] = {} |
| 197 | src = str(source_text) | 185 | src = str(source_text) |
| 198 | obj[primary_lang] = src | 186 | obj[primary_lang] = src |
| 199 | - | ||
| 200 | tr = translations or {} | 187 | tr = translations or {} |
| 201 | - if translate_to_en and primary_lang != "en": | ||
| 202 | - en_text = tr.get("en") | ||
| 203 | - if en_text and str(en_text).strip(): | ||
| 204 | - obj["en"] = str(en_text) | ||
| 205 | - if translate_to_zh and primary_lang != "zh": | ||
| 206 | - zh_text = tr.get("zh") | ||
| 207 | - if zh_text and str(zh_text).strip(): | ||
| 208 | - obj["zh"] = str(zh_text) | ||
| 209 | - | 188 | + for lang in index_langs: |
| 189 | + if lang == primary_lang: | ||
| 190 | + continue | ||
| 191 | + val = tr.get(lang) | ||
| 192 | + if val and str(val).strip(): | ||
| 193 | + obj[lang] = str(val) | ||
| 210 | if obj: | 194 | if obj: |
| 211 | doc[field_name] = obj | 195 | doc[field_name] = obj |
| 212 | 196 | ||
| 213 | # Title | 197 | # Title |
| 214 | if pd.notna(spu_row.get('title')): | 198 | if pd.notna(spu_row.get('title')): |
| 215 | title_text = str(spu_row['title']) | 199 | title_text = str(spu_row['title']) |
| 216 | - | ||
| 217 | - translations: Dict[str, str] = {} | 200 | + translations: Dict[str, Optional[str]] = {} |
| 218 | if self.translator: | 201 | if self.translator: |
| 219 | prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') | 202 | prompt_zh = self.translation_prompts.get('product_title_zh') or self.translation_prompts.get('default_zh') |
| 220 | prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') | 203 | prompt_en = self.translation_prompts.get('product_title_en') or self.translation_prompts.get('default_en') |
| @@ -223,16 +206,14 @@ class SPUDocumentTransformer: | @@ -223,16 +206,14 @@ class SPUDocumentTransformer: | ||
| 223 | shop_language=primary_lang, | 206 | shop_language=primary_lang, |
| 224 | source_lang=primary_lang, | 207 | source_lang=primary_lang, |
| 225 | prompt=prompt_zh if primary_lang == 'zh' else prompt_en, | 208 | prompt=prompt_zh if primary_lang == 'zh' else prompt_en, |
| 226 | - translate_to_en=translate_to_en, | ||
| 227 | - translate_to_zh=translate_to_zh, | 209 | + index_languages=index_langs, |
| 228 | ) or {} | 210 | ) or {} |
| 229 | - | ||
| 230 | _set_lang_obj("title", title_text, translations) | 211 | _set_lang_obj("title", title_text, translations) |
| 231 | 212 | ||
| 232 | # Brief | 213 | # Brief |
| 233 | if pd.notna(spu_row.get('brief')): | 214 | if pd.notna(spu_row.get('brief')): |
| 234 | brief_text = str(spu_row['brief']) | 215 | brief_text = str(spu_row['brief']) |
| 235 | - translations: Dict[str, str] = {} | 216 | + translations = {} |
| 236 | if self.translator: | 217 | if self.translator: |
| 237 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') | 218 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 238 | translations = self.translator.translate_for_indexing( | 219 | translations = self.translator.translate_for_indexing( |
| @@ -240,15 +221,14 @@ class SPUDocumentTransformer: | @@ -240,15 +221,14 @@ class SPUDocumentTransformer: | ||
| 240 | shop_language=primary_lang, | 221 | shop_language=primary_lang, |
| 241 | source_lang=primary_lang, | 222 | source_lang=primary_lang, |
| 242 | prompt=prompt, | 223 | prompt=prompt, |
| 243 | - translate_to_en=translate_to_en, | ||
| 244 | - translate_to_zh=translate_to_zh, | 224 | + index_languages=index_langs, |
| 245 | ) or {} | 225 | ) or {} |
| 246 | _set_lang_obj("brief", brief_text, translations) | 226 | _set_lang_obj("brief", brief_text, translations) |
| 247 | 227 | ||
| 248 | # Description | 228 | # Description |
| 249 | if pd.notna(spu_row.get('description')): | 229 | if pd.notna(spu_row.get('description')): |
| 250 | desc_text = str(spu_row['description']) | 230 | desc_text = str(spu_row['description']) |
| 251 | - translations: Dict[str, str] = {} | 231 | + translations = {} |
| 252 | if self.translator: | 232 | if self.translator: |
| 253 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') | 233 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 254 | translations = self.translator.translate_for_indexing( | 234 | translations = self.translator.translate_for_indexing( |
| @@ -256,15 +236,14 @@ class SPUDocumentTransformer: | @@ -256,15 +236,14 @@ class SPUDocumentTransformer: | ||
| 256 | shop_language=primary_lang, | 236 | shop_language=primary_lang, |
| 257 | source_lang=primary_lang, | 237 | source_lang=primary_lang, |
| 258 | prompt=prompt, | 238 | prompt=prompt, |
| 259 | - translate_to_en=translate_to_en, | ||
| 260 | - translate_to_zh=translate_to_zh, | 239 | + index_languages=index_langs, |
| 261 | ) or {} | 240 | ) or {} |
| 262 | _set_lang_obj("description", desc_text, translations) | 241 | _set_lang_obj("description", desc_text, translations) |
| 263 | 242 | ||
| 264 | # Vendor | 243 | # Vendor |
| 265 | if pd.notna(spu_row.get('vendor')): | 244 | if pd.notna(spu_row.get('vendor')): |
| 266 | vendor_text = str(spu_row['vendor']) | 245 | vendor_text = str(spu_row['vendor']) |
| 267 | - translations: Dict[str, str] = {} | 246 | + translations = {} |
| 268 | if self.translator: | 247 | if self.translator: |
| 269 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') | 248 | prompt = self.translation_prompts.get('default_zh') or self.translation_prompts.get('default_en') |
| 270 | translations = self.translator.translate_for_indexing( | 249 | translations = self.translator.translate_for_indexing( |
| @@ -272,8 +251,7 @@ class SPUDocumentTransformer: | @@ -272,8 +251,7 @@ class SPUDocumentTransformer: | ||
| 272 | shop_language=primary_lang, | 251 | shop_language=primary_lang, |
| 273 | source_lang=primary_lang, | 252 | source_lang=primary_lang, |
| 274 | prompt=prompt, | 253 | prompt=prompt, |
| 275 | - translate_to_en=translate_to_en, | ||
| 276 | - translate_to_zh=translate_to_zh, | 254 | + index_languages=index_langs, |
| 277 | ) or {} | 255 | ) or {} |
| 278 | _set_lang_obj("vendor", vendor_text, translations) | 256 | _set_lang_obj("vendor", vendor_text, translations) |
| 279 | 257 |
indexer/indexing_utils.py
| @@ -96,11 +96,9 @@ def create_document_transformer( | @@ -96,11 +96,9 @@ def create_document_transformer( | ||
| 96 | if searchable_option_dimensions is None: | 96 | if searchable_option_dimensions is None: |
| 97 | searchable_option_dimensions = config.spu_config.searchable_option_dimensions | 97 | searchable_option_dimensions = config.spu_config.searchable_option_dimensions |
| 98 | 98 | ||
| 99 | - # 根据租户配置决定是否需要翻译:只要开启任一方向的翻译,就初始化翻译器 | ||
| 100 | - translate_to_en = bool(tenant_config.get("translate_to_en")) | ||
| 101 | - translate_to_zh = bool(tenant_config.get("translate_to_zh")) | ||
| 102 | - | ||
| 103 | - if translator is None and (translate_to_en or translate_to_zh): | 99 | + index_langs = tenant_config.get("index_languages") or [] |
| 100 | + need_translator = len(index_langs) > 1 | ||
| 101 | + if translator is None and need_translator: | ||
| 104 | from query.translator import Translator | 102 | from query.translator import Translator |
| 105 | translator = Translator( | 103 | translator = Translator( |
| 106 | api_key=config.query_config.translation_api_key, | 104 | api_key=config.query_config.translation_api_key, |
indexer/test_indexing.py
| @@ -44,19 +44,25 @@ def test_tenant_config(): | @@ -44,19 +44,25 @@ def test_tenant_config(): | ||
| 44 | # 测试默认配置 | 44 | # 测试默认配置 |
| 45 | default_config = tenant_config_loader.get_tenant_config("999") | 45 | default_config = tenant_config_loader.get_tenant_config("999") |
| 46 | print(f"默认配置: {default_config}") | 46 | print(f"默认配置: {default_config}") |
| 47 | + assert "index_languages" in default_config, "默认配置应包含 index_languages" | ||
| 48 | + assert "en" in default_config["index_languages"] and "zh" in default_config["index_languages"], \ | ||
| 49 | + "默认 index_languages 应包含 en, zh" | ||
| 50 | + print("✓ 默认配置正确(index_languages 含 en, zh)") | ||
| 47 | 51 | ||
| 48 | - # 测试租户162(翻译关闭) | 52 | + # 测试租户162(index_languages: zh, en) |
| 49 | tenant_162_config = tenant_config_loader.get_tenant_config("162") | 53 | tenant_162_config = tenant_config_loader.get_tenant_config("162") |
| 50 | print(f"租户162配置: {tenant_162_config}") | 54 | print(f"租户162配置: {tenant_162_config}") |
| 51 | - assert tenant_162_config['translate_to_en'] == False, "租户162翻译应该关闭" | ||
| 52 | - assert tenant_162_config['translate_to_zh'] == False, "租户162翻译应该关闭" | ||
| 53 | - print("✓ 租户162配置正确(翻译关闭)") | 55 | + idx = tenant_162_config.get("index_languages") or [] |
| 56 | + assert "zh" in idx and "en" in idx, "租户162 index_languages 应包含 zh, en" | ||
| 57 | + print("✓ 租户162配置正确(index_languages 含 zh, en)") | ||
| 54 | 58 | ||
| 55 | - # 测试其他租户 | 59 | + # 测试租户1 |
| 56 | tenant_1_config = tenant_config_loader.get_tenant_config("1") | 60 | tenant_1_config = tenant_config_loader.get_tenant_config("1") |
| 57 | print(f"租户1配置: {tenant_1_config}") | 61 | print(f"租户1配置: {tenant_1_config}") |
| 58 | - assert tenant_1_config['translate_to_en'] == True, "租户1应该启用英文翻译" | ||
| 59 | - print("✓ 租户1配置正确(翻译开启)") | 62 | + idx1 = tenant_1_config.get("index_languages") or [] |
| 63 | + assert "zh" in idx1 and "en" in idx1, "租户1 index_languages 应包含 zh, en" | ||
| 64 | + assert tenant_1_config.get("primary_language") == "zh", "租户1 主语言为 zh" | ||
| 65 | + print("✓ 租户1配置正确(index_languages 含 zh, en,主语言 zh)") | ||
| 60 | 66 | ||
| 61 | return True | 67 | return True |
| 62 | except Exception as e: | 68 | except Exception as e: |
| @@ -118,13 +124,12 @@ def test_full_indexing(tenant_id: str = "162"): | @@ -118,13 +124,12 @@ def test_full_indexing(tenant_id: str = "162"): | ||
| 118 | print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") | 124 | print(f" 标题 (中文): {title_obj.get('zh', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") |
| 119 | print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") | 125 | print(f" 标题 (英文): {title_obj.get('en', 'N/A') if isinstance(title_obj, dict) else 'N/A'}") |
| 120 | 126 | ||
| 121 | - # 检查租户162的翻译状态 | 127 | + # 租户162 index_languages [zh, en],应有 title.en |
| 122 | if tenant_id == "162": | 128 | if tenant_id == "162": |
| 123 | - # 租户162翻译应该关闭:只写入主语言,不应出现 title.en | ||
| 124 | - if isinstance(title_obj, dict) and title_obj.get("en") is None: | ||
| 125 | - print(f" ✓ 翻译已关闭(title.en为空)") | 129 | + if isinstance(title_obj, dict) and title_obj.get("en"): |
| 130 | + print(f" ✓ 多语言索引正常(title.en 已填充)") | ||
| 126 | else: | 131 | else: |
| 127 | - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") | 132 | + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空") |
| 128 | 133 | ||
| 129 | return True | 134 | return True |
| 130 | 135 | ||
| @@ -199,12 +204,12 @@ def test_incremental_indexing(tenant_id: str = "162"): | @@ -199,12 +204,12 @@ def test_incremental_indexing(tenant_id: str = "162"): | ||
| 199 | print(f" SKU数量: {len(doc.get('skus', []))}") | 204 | print(f" SKU数量: {len(doc.get('skus', []))}") |
| 200 | print(f" 规格数量: {len(doc.get('specifications', []))}") | 205 | print(f" 规格数量: {len(doc.get('specifications', []))}") |
| 201 | 206 | ||
| 202 | - # 检查租户162的翻译状态 | 207 | + # 租户162 配置了 index_languages [zh, en],应有 title.en |
| 203 | if tenant_id == "162": | 208 | if tenant_id == "162": |
| 204 | - if isinstance(title_obj, dict) and title_obj.get("en") is None: | ||
| 205 | - print(f" ✓ 翻译已关闭(title.en为空)") | 209 | + if isinstance(title_obj, dict) and title_obj.get("en"): |
| 210 | + print(f" ✓ 多语言索引正常(title.en 已填充)") | ||
| 206 | else: | 211 | else: |
| 207 | - print(f" ⚠ 警告:翻译应该关闭,但title.en有值: {title_obj.get('en') if isinstance(title_obj, dict) else None}") | 212 | + print(f" ⚠ 警告:租户162 配置 [zh,en],但 title.en 为空") |
| 208 | 213 | ||
| 209 | return True | 214 | return True |
| 210 | 215 | ||
| @@ -298,11 +303,13 @@ def test_document_transformer(): | @@ -298,11 +303,13 @@ def test_document_transformer(): | ||
| 298 | print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}") | 303 | print(f" title.en: {title_obj.get('en') if isinstance(title_obj, dict) else None}") |
| 299 | print(f" SKU数量: {len(doc.get('skus', []))}") | 304 | print(f" SKU数量: {len(doc.get('skus', []))}") |
| 300 | 305 | ||
| 301 | - # 验证租户162翻译关闭 | ||
| 302 | - if isinstance(title_obj, dict) and title_obj.get("en") is None: | ||
| 303 | - print(f" ✓ 翻译已关闭(符合租户162配置)") | 306 | + # 租户162 index_languages [zh, en],主语言 zh,应有 zh(原文)与 en(翻译) |
| 307 | + if isinstance(title_obj, dict) and title_obj.get("zh") and title_obj.get("en"): | ||
| 308 | + print(f" ✓ 多语言字段正确(zh + en)") | ||
| 309 | + elif isinstance(title_obj, dict) and title_obj.get("zh"): | ||
| 310 | + print(f" ⚠ 仅有 zh(若未配置翻译或翻译未调用可接受)") | ||
| 304 | else: | 311 | else: |
| 305 | - print(f" ⚠ 警告:翻译应该关闭") | 312 | + print(f" ⚠ 未发现预期多语言字段") |
| 306 | 313 | ||
| 307 | return True | 314 | return True |
| 308 | else: | 315 | else: |
query/query_parser.py
| @@ -58,8 +58,7 @@ class ParsedQuery: | @@ -58,8 +58,7 @@ class ParsedQuery: | ||
| 58 | "rewritten_query": self.rewritten_query, | 58 | "rewritten_query": self.rewritten_query, |
| 59 | "detected_language": self.detected_language, | 59 | "detected_language": self.detected_language, |
| 60 | "translations": self.translations, | 60 | "translations": self.translations, |
| 61 | - "domain": self.domain, | ||
| 62 | - "has_vector": self.query_vector is not None | 61 | + "domain": self.domain |
| 63 | } | 62 | } |
| 64 | return result | 63 | return result |
| 65 | 64 | ||
| @@ -228,23 +227,16 @@ class QueryParser: | @@ -228,23 +227,16 @@ class QueryParser: | ||
| 228 | translations = {} | 227 | translations = {} |
| 229 | translation_futures = {} | 228 | translation_futures = {} |
| 230 | try: | 229 | try: |
| 231 | - # 根据租户配置决定翻译目标语言 | 230 | + # 根据租户配置的 index_languages 决定翻译目标语言 |
| 232 | from config.tenant_config_loader import get_tenant_config_loader | 231 | from config.tenant_config_loader import get_tenant_config_loader |
| 233 | tenant_loader = get_tenant_config_loader() | 232 | tenant_loader = get_tenant_config_loader() |
| 234 | tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default") | 233 | tenant_cfg = tenant_loader.get_tenant_config(tenant_id or "default") |
| 234 | + index_langs = tenant_cfg.get("index_languages") or ["en", "zh"] | ||
| 235 | 235 | ||
| 236 | - translate_to_zh = bool(tenant_cfg.get("translate_to_zh")) | ||
| 237 | - translate_to_en = bool(tenant_cfg.get("translate_to_en")) | 236 | + target_langs_for_translation = [lang for lang in index_langs if lang != detected_lang] |
| 238 | 237 | ||
| 239 | - target_langs_for_translation = [] | ||
| 240 | - if translate_to_zh: | ||
| 241 | - target_langs_for_translation.append('zh') | ||
| 242 | - if translate_to_en: | ||
| 243 | - target_langs_for_translation.append('en') | ||
| 244 | - | ||
| 245 | - # 如果该租户未开启任何翻译方向,则直接跳过翻译阶段 | ||
| 246 | if target_langs_for_translation: | 238 | if target_langs_for_translation: |
| 247 | - target_langs = [lang for lang in target_langs_for_translation if detected_lang != lang] | 239 | + target_langs = target_langs_for_translation |
| 248 | 240 | ||
| 249 | if target_langs: | 241 | if target_langs: |
| 250 | # Use e-commerce context for better disambiguation | 242 | # Use e-commerce context for better disambiguation |
| @@ -254,8 +246,8 @@ class QueryParser: | @@ -254,8 +246,8 @@ class QueryParser: | ||
| 254 | self.config.query_config.translation_prompts.get('default_zh') | 246 | self.config.query_config.translation_prompts.get('default_zh') |
| 255 | 247 | ||
| 256 | # Determine if we need to wait for translation results | 248 | # Determine if we need to wait for translation results |
| 257 | - # If detected_lang is neither 'en' nor 'zh', we must wait for translation | ||
| 258 | - need_wait_translation = detected_lang not in ['en', 'zh'] | 249 | + # If detected_lang is not in index_languages, we must wait for translation |
| 250 | + need_wait_translation = detected_lang not in index_langs | ||
| 259 | 251 | ||
| 260 | if need_wait_translation: | 252 | if need_wait_translation: |
| 261 | # Use async method that returns Futures, so we can wait for results | 253 | # Use async method that returns Futures, so we can wait for results |
query/translator.py
| @@ -792,6 +792,18 @@ class Translator: | @@ -792,6 +792,18 @@ class Translator: | ||
| 792 | # The user can configure a glossary for better results | 792 | # The user can configure a glossary for better results |
| 793 | return translated_text | 793 | return translated_text |
| 794 | 794 | ||
| 795 | + def _shop_lang_matches(self, shop_lang_lower: str, lang_code: str) -> bool: | ||
| 796 | + """True if shop language matches index language (use source, no translate).""" | ||
| 797 | + if not shop_lang_lower or not lang_code: | ||
| 798 | + return False | ||
| 799 | + if shop_lang_lower == lang_code: | ||
| 800 | + return True | ||
| 801 | + if lang_code == "zh" and "zh" in shop_lang_lower: | ||
| 802 | + return True | ||
| 803 | + if lang_code == "en" and "en" in shop_lang_lower: | ||
| 804 | + return True | ||
| 805 | + return False | ||
| 806 | + | ||
| 795 | def translate_for_indexing( | 807 | def translate_for_indexing( |
| 796 | self, | 808 | self, |
| 797 | text: str, | 809 | text: str, |
| @@ -799,76 +811,55 @@ class Translator: | @@ -799,76 +811,55 @@ class Translator: | ||
| 799 | source_lang: Optional[str] = None, | 811 | source_lang: Optional[str] = None, |
| 800 | context: Optional[str] = None, | 812 | context: Optional[str] = None, |
| 801 | prompt: Optional[str] = None, | 813 | prompt: Optional[str] = None, |
| 802 | - translate_to_en: bool = True, | ||
| 803 | - translate_to_zh: bool = True, | 814 | + index_languages: Optional[List[str]] = None, |
| 804 | ) -> Dict[str, Optional[str]]: | 815 | ) -> Dict[str, Optional[str]]: |
| 805 | """ | 816 | """ |
| 806 | - Translate text for indexing based on shop language and tenant configuration. | ||
| 807 | - | ||
| 808 | - Translation behavior: | ||
| 809 | - - If translate_to_zh=True and shop language is not 'zh', translate to Chinese (zh) | ||
| 810 | - - If translate_to_en=True and shop language is not 'en', translate to English (en) | ||
| 811 | - - If both flags are False, no translation is performed (returns None for both) | ||
| 812 | - | 817 | + Translate text for indexing based on shop language and tenant index_languages. |
| 818 | + | ||
| 819 | + For each language in index_languages: use source text if shop language matches, | ||
| 820 | + otherwise translate to that language. | ||
| 821 | + | ||
| 813 | Args: | 822 | Args: |
| 814 | text: Text to translate | 823 | text: Text to translate |
| 815 | - shop_language: Shop's configured language (e.g., 'zh', 'en', 'ru') | ||
| 816 | - source_lang: Source language code (optional, auto-detect if None) | 824 | + shop_language: Shop primary language (e.g. 'zh', 'en', 'ru') |
| 825 | + source_lang: Source language code (optional) | ||
| 817 | context: Additional context for translation (optional) | 826 | context: Additional context for translation (optional) |
| 818 | - prompt: Translation prompt/instruction (optional) | ||
| 819 | - translate_to_en: Whether to translate to English (from tenant_config) | ||
| 820 | - translate_to_zh: Whether to translate to Chinese (from tenant_config) | ||
| 821 | - | 827 | + prompt: Translation prompt (optional) |
| 828 | + index_languages: Languages to index (from tenant_config). Default ["en", "zh"]. | ||
| 829 | + | ||
| 822 | Returns: | 830 | Returns: |
| 823 | - Dictionary with 'zh' and 'en' keys containing translated text (or None if not needed/not enabled) | ||
| 824 | - Example: {'zh': '中文翻译', 'en': 'English translation'} or {'zh': None, 'en': None} | 831 | + Dict keyed by each index_language with translated or source text (or None). |
| 825 | """ | 832 | """ |
| 833 | + langs = index_languages if index_languages else ["en", "zh"] | ||
| 834 | + results = {lang: None for lang in langs} | ||
| 826 | if not text or not text.strip(): | 835 | if not text or not text.strip(): |
| 827 | - return {'zh': None, 'en': None} | ||
| 828 | - | ||
| 829 | - # Skip translation for symbol-only queries | 836 | + return results |
| 830 | if re.match(r'^[\d\s_-]+$', text): | 837 | if re.match(r'^[\d\s_-]+$', text): |
| 831 | logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'") | 838 | logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'") |
| 832 | - return {'zh': None, 'en': None} | ||
| 833 | - | ||
| 834 | - results = {'zh': None, 'en': None} | ||
| 835 | - shop_lang_lower = shop_language.lower() if shop_language else "" | ||
| 836 | - | ||
| 837 | - # Determine which languages need translation based on tenant configuration | ||
| 838 | - targets = [] | ||
| 839 | - if translate_to_zh and "zh" not in shop_lang_lower: | ||
| 840 | - targets.append("zh") | ||
| 841 | - if translate_to_en and "en" not in shop_lang_lower: | ||
| 842 | - targets.append("en") | ||
| 843 | - | ||
| 844 | - # If shop language is already zh and en, no translation needed | ||
| 845 | - if not targets: | ||
| 846 | - # Use original text for both languages | ||
| 847 | - if "zh" in shop_lang_lower: | ||
| 848 | - results['zh'] = text | ||
| 849 | - if "en" in shop_lang_lower: | ||
| 850 | - results['en'] = text | ||
| 851 | return results | 839 | return results |
| 852 | - | ||
| 853 | - # Translate to each target language | 840 | + |
| 841 | + shop_lang_lower = (shop_language or "").strip().lower() | ||
| 842 | + targets = [] | ||
| 843 | + for lang in langs: | ||
| 844 | + if self._shop_lang_matches(shop_lang_lower, lang): | ||
| 845 | + results[lang] = text | ||
| 846 | + else: | ||
| 847 | + targets.append(lang) | ||
| 848 | + | ||
| 854 | for target_lang in targets: | 849 | for target_lang in targets: |
| 855 | - # Check cache first | ||
| 856 | cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt) | 850 | cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt) |
| 857 | if cached: | 851 | if cached: |
| 858 | results[target_lang] = cached | 852 | results[target_lang] = cached |
| 859 | logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}") | 853 | logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}") |
| 860 | continue | 854 | continue |
| 861 | - | ||
| 862 | - # Translate synchronously for indexing (we need the result immediately) | ||
| 863 | translated = self.translate( | 855 | translated = self.translate( |
| 864 | text, | 856 | text, |
| 865 | target_lang=target_lang, | 857 | target_lang=target_lang, |
| 866 | source_lang=source_lang or shop_language, | 858 | source_lang=source_lang or shop_language, |
| 867 | context=context, | 859 | context=context, |
| 868 | - prompt=prompt | 860 | + prompt=prompt, |
| 869 | ) | 861 | ) |
| 870 | results[target_lang] = translated | 862 | results[target_lang] = translated |
| 871 | - | ||
| 872 | return results | 863 | return results |
| 873 | 864 | ||
| 874 | def get_translation_needs( | 865 | def get_translation_needs( |
search/searcher.py
| @@ -165,7 +165,8 @@ class Searcher: | @@ -165,7 +165,8 @@ class Searcher: | ||
| 165 | # 根据租户配置决定翻译开关(离线/在线统一) | 165 | # 根据租户配置决定翻译开关(离线/在线统一) |
| 166 | tenant_loader = get_tenant_config_loader() | 166 | tenant_loader = get_tenant_config_loader() |
| 167 | tenant_cfg = tenant_loader.get_tenant_config(tenant_id) | 167 | tenant_cfg = tenant_loader.get_tenant_config(tenant_id) |
| 168 | - enable_translation = bool(tenant_cfg.get("translate_to_en") or tenant_cfg.get("translate_to_zh")) | 168 | + index_langs = tenant_cfg.get("index_languages") or [] |
| 169 | + enable_translation = len(index_langs) > 0 | ||
| 169 | enable_embedding = self.config.query_config.enable_text_embedding | 170 | enable_embedding = self.config.query_config.enable_text_embedding |
| 170 | enable_rerank = False # Temporarily disabled | 171 | enable_rerank = False # Temporarily disabled |
| 171 | 172 |