Commit e56fbdc11793ad7ec31042bc4e63a7f2a1c7780e

Authored by tangwang
1 parent 0410cf4d

query trans

Showing 1 changed file with 21 additions and 0 deletions   Show diff stats
query/query_parser.py
... ... @@ -151,6 +151,17 @@ class QueryParser:
151 151 """Return pre-initialized translator."""
152 152 return self._translator
153 153  
  154 + @staticmethod
  155 + def _pick_query_translation_model(source_lang: str, target_lang: str) -> str:
  156 + """Pick the translation capability for query-time translation."""
  157 + src = str(source_lang or "").strip().lower()
  158 + tgt = str(target_lang or "").strip().lower()
  159 + if src == "zh" and tgt == "en":
  160 + return "opus-mt-zh-en"
  161 + if src == "en" and tgt == "zh":
  162 + return "opus-mt-en-zh"
  163 + return "deepl"
  164 +
154 165 def _simple_tokenize(self, text: str) -> List[str]:
155 166 """
156 167 Lightweight tokenizer fallback.
... ... @@ -303,21 +314,31 @@ class QueryParser:
303 314 thread_name_prefix="query-translation-wait",
304 315 )
305 316 for lang in target_langs:
  317 + model_name = self._pick_query_translation_model(detected_lang, lang)
  318 + log_debug(
  319 + f"Submitting query translation | source={detected_lang} target={lang} model={model_name}"
  320 + )
306 321 translation_futures[lang] = translation_executor.submit(
307 322 self.translator.translate,
308 323 query_text,
309 324 lang,
310 325 detected_lang,
311 326 "ecommerce_search_query",
  327 + model_name,
312 328 )
313 329 else:
314 330 for lang in target_langs:
  331 + model_name = self._pick_query_translation_model(detected_lang, lang)
  332 + log_debug(
  333 + f"Submitting query translation | source={detected_lang} target={lang} model={model_name}"
  334 + )
315 335 self._translation_executor.submit(
316 336 self.translator.translate,
317 337 query_text,
318 338 lang,
319 339 detected_lang,
320 340 "ecommerce_search_query",
  341 + model_name,
321 342 )
322 343  
323 344 if translations:
... ...