Commit e56fbdc11793ad7ec31042bc4e63a7f2a1c7780e
1 parent
0410cf4d
query trans
Showing
1 changed file
with
21 additions
and
0 deletions
Show diff stats
query/query_parser.py
| @@ -151,6 +151,17 @@ class QueryParser: | @@ -151,6 +151,17 @@ class QueryParser: | ||
| 151 | """Return pre-initialized translator.""" | 151 | """Return pre-initialized translator.""" |
| 152 | return self._translator | 152 | return self._translator |
| 153 | 153 | ||
| 154 | + @staticmethod | ||
| 155 | + def _pick_query_translation_model(source_lang: str, target_lang: str) -> str: | ||
| 156 | + """Pick the translation capability for query-time translation.""" | ||
| 157 | + src = str(source_lang or "").strip().lower() | ||
| 158 | + tgt = str(target_lang or "").strip().lower() | ||
| 159 | + if src == "zh" and tgt == "en": | ||
| 160 | + return "opus-mt-zh-en" | ||
| 161 | + if src == "en" and tgt == "zh": | ||
| 162 | + return "opus-mt-en-zh" | ||
| 163 | + return "deepl" | ||
| 164 | + | ||
| 154 | def _simple_tokenize(self, text: str) -> List[str]: | 165 | def _simple_tokenize(self, text: str) -> List[str]: |
| 155 | """ | 166 | """ |
| 156 | Lightweight tokenizer fallback. | 167 | Lightweight tokenizer fallback. |
| @@ -303,21 +314,31 @@ class QueryParser: | @@ -303,21 +314,31 @@ class QueryParser: | ||
| 303 | thread_name_prefix="query-translation-wait", | 314 | thread_name_prefix="query-translation-wait", |
| 304 | ) | 315 | ) |
| 305 | for lang in target_langs: | 316 | for lang in target_langs: |
| 317 | + model_name = self._pick_query_translation_model(detected_lang, lang) | ||
| 318 | + log_debug( | ||
| 319 | + f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" | ||
| 320 | + ) | ||
| 306 | translation_futures[lang] = translation_executor.submit( | 321 | translation_futures[lang] = translation_executor.submit( |
| 307 | self.translator.translate, | 322 | self.translator.translate, |
| 308 | query_text, | 323 | query_text, |
| 309 | lang, | 324 | lang, |
| 310 | detected_lang, | 325 | detected_lang, |
| 311 | "ecommerce_search_query", | 326 | "ecommerce_search_query", |
| 327 | + model_name, | ||
| 312 | ) | 328 | ) |
| 313 | else: | 329 | else: |
| 314 | for lang in target_langs: | 330 | for lang in target_langs: |
| 331 | + model_name = self._pick_query_translation_model(detected_lang, lang) | ||
| 332 | + log_debug( | ||
| 333 | + f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" | ||
| 334 | + ) | ||
| 315 | self._translation_executor.submit( | 335 | self._translation_executor.submit( |
| 316 | self.translator.translate, | 336 | self.translator.translate, |
| 317 | query_text, | 337 | query_text, |
| 318 | lang, | 338 | lang, |
| 319 | detected_lang, | 339 | detected_lang, |
| 320 | "ecommerce_search_query", | 340 | "ecommerce_search_query", |
| 341 | + model_name, | ||
| 321 | ) | 342 | ) |
| 322 | 343 | ||
| 323 | if translations: | 344 | if translations: |