diff --git a/query/query_parser.py b/query/query_parser.py index 364ddd4..d24e4fa 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -214,7 +214,7 @@ class QueryParser: if context: context.store_intermediate_result('detected_language', detected_lang) - # Stage 4: Translation + # Stage 4: Translation (async mode - only returns cached results, missing ones translated in background) translations = {} if self.config.query_config.enable_translation: try: @@ -228,16 +228,24 @@ class QueryParser: ) if target_langs: - log_info(f"开始翻译 | 源语言: {detected_lang} | 目标语言: {target_langs}") # Use e-commerce context for better disambiguation translation_context = 'e-commerce product search' + # Use async mode: returns cached translations immediately, missing ones translated in background translations = self.translator.translate_multi( query_text, target_langs, source_lang=detected_lang, - context=translation_context + context=translation_context, + async_mode=True ) - log_info(f"翻译完成 | 结果: {translations}") + # Filter out None values (missing translations that are being processed async) + translations = {k: v for k, v in translations.items() if v is not None} + + if translations: + log_info(f"翻译完成(缓存命中) | 结果: {translations}") + else: + log_debug(f"翻译未命中缓存,异步翻译中...") + if context: context.store_intermediate_result('translations', translations) for lang, translation in translations.items(): diff --git a/query/translator.py b/query/translator.py index 2f99f75..2f99310 100644 --- a/query/translator.py +++ b/query/translator.py @@ -5,8 +5,13 @@ Supports DeepL API for high-quality translations. """ import requests +import threading +from concurrent.futures import ThreadPoolExecutor from typing import Dict, List, Optional from utils.cache import DictCache +import logging + +logger = logging.getLogger(__name__) # Try to import DEEPL_AUTH_KEY, but allow import to fail try: @@ -66,6 +71,12 @@ class Translator: self.cache = DictCache(".cache/translations.json") else: self.cache = None + + # Thread pool for async translation + self.executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="translator") + + # Thread pool for async translation + self.executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="translator") def translate( self, @@ -266,24 +277,87 @@ class Translator: text: str, target_langs: List[str], source_lang: Optional[str] = None, - context: Optional[str] = None + context: Optional[str] = None, + async_mode: bool = True ) -> Dict[str, Optional[str]]: """ Translate text to multiple target languages. + + In async_mode=True (default): + - Returns cached translations immediately if available + - Launches async tasks for missing translations (non-blocking) + - Returns None for missing translations (will be available in cache next time) + + In async_mode=False: + - Waits for all translations to complete (blocking) Args: text: Text to translate target_langs: List of target language codes source_lang: Source language code (optional) context: Context hint for translation (optional) + async_mode: If True, return cached results immediately and translate missing ones async Returns: - Dictionary mapping language code to translated text + Dictionary mapping language code to translated text (only cached results in async mode) """ results = {} + missing_langs = [] + + # First, get cached translations for lang in target_langs: - results[lang] = self.translate(text, lang, source_lang, context) + cached = self._get_cached_translation(text, lang, source_lang, context) + if cached is not None: + results[lang] = cached + else: + missing_langs.append(lang) + + # If async mode and there are missing translations, launch async tasks + if async_mode and missing_langs: + for lang in missing_langs: + self._translate_async(text, lang, source_lang, context) + # Return None for missing translations + for lang in missing_langs: + results[lang] = None + else: + # Synchronous mode: wait for all translations + for lang in missing_langs: + results[lang] = self.translate(text, lang, source_lang, context) + return results + + def _get_cached_translation( + self, + text: str, + target_lang: str, + source_lang: Optional[str] = None, + context: Optional[str] = None + ) -> Optional[str]: + """Get translation from cache if available.""" + if not self.cache: + return None + + translation_context = context or self.translation_context + cache_key = f"{source_lang or 'auto'}:{target_lang}:{translation_context}:{text}" + return self.cache.get(cache_key, category="translations") + + def _translate_async( + self, + text: str, + target_lang: str, + source_lang: Optional[str] = None, + context: Optional[str] = None + ): + """Launch async translation task.""" + def _do_translate(): + try: + result = self.translate(text, target_lang, source_lang, context) + if result: + logger.debug(f"Async translation completed: {text} -> {target_lang}: {result}") + except Exception as e: + logger.warning(f"Async translation failed: {text} -> {target_lang}: {e}") + + self.executor.submit(_do_translate) def _add_ecommerce_context( self, -- libgit2 0.21.2