""" Translation service for multi-language query support. Supports DeepL API for high-quality translations. #### 官方文档: https://developers.deepl.com/api-reference/translate/request-translation ##### """ import requests from concurrent.futures import ThreadPoolExecutor from typing import Dict, List, Optional from utils.cache import DictCache import logging logger = logging.getLogger(__name__) # Try to import DEEPL_AUTH_KEY, but allow import to fail try: from config.env_config import DEEPL_AUTH_KEY except ImportError: DEEPL_AUTH_KEY = None class Translator: """Multi-language translator using DeepL API.""" DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier # Language code mapping LANG_CODE_MAP = { 'zh': 'ZH', 'en': 'EN', 'ru': 'RU', 'ar': 'AR', 'ja': 'JA', 'es': 'ES', 'de': 'DE', 'fr': 'FR', 'it': 'IT', 'pt': 'PT', } def __init__( self, api_key: Optional[str] = None, use_cache: bool = True, timeout: int = 10, glossary_id: Optional[str] = None, translation_context: Optional[str] = None ): """ Initialize translator. Args: api_key: DeepL API key (or None to use from config/env) use_cache: Whether to cache translations timeout: Request timeout in seconds glossary_id: DeepL glossary ID for custom terminology (optional) translation_context: Context hint for translation (e.g., "e-commerce", "product search") """ # Get API key from config if not provided if api_key is None and DEEPL_AUTH_KEY: api_key = DEEPL_AUTH_KEY self.api_key = api_key self.timeout = timeout self.use_cache = use_cache self.glossary_id = glossary_id self.translation_context = translation_context or "e-commerce product search" if use_cache: self.cache = DictCache(".cache/translations.json") else: self.cache = None # Thread pool for async translation self.executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="translator") def translate( self, text: str, target_lang: str, source_lang: Optional[str] = None, context: Optional[str] = None, prompt: Optional[str] = None ) -> Optional[str]: """ Translate text to target language (synchronous mode). Args: text: Text to translate target_lang: Target language code ('zh', 'en', 'ru', etc.) source_lang: Source language code (optional, auto-detect if None) context: Additional context for translation (overrides default context) prompt: Translation prompt/instruction (optional, for better translation quality) Returns: Translated text or None if translation fails """ if not text or not text.strip(): return text # Normalize language codes target_lang = target_lang.lower() if source_lang: source_lang = source_lang.lower() # Use provided context or default context translation_context = context or self.translation_context # Build cache key (include prompt in cache key if provided) cache_key_parts = [source_lang or 'auto', target_lang, translation_context] if prompt: cache_key_parts.append(prompt) cache_key_parts.append(text) cache_key = ':'.join(cache_key_parts) # Check cache (include context and prompt in cache key for accuracy) if self.use_cache: cached = self.cache.get(cache_key, category="translations") if cached: return cached # If no API key, return mock translation (for testing) if not self.api_key: logger.debug(f"[Translator] No API key, returning original text (mock mode)") return text # Translate using DeepL with fallback result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt) # If translation failed, try fallback to free API if result is None and "api.deepl.com" in self.DEEPL_API_URL: logger.debug(f"[Translator] Pro API failed, trying free API...") result = self._translate_deepl_free(text, target_lang, source_lang, translation_context, prompt) # If still failed, return original text with warning if result is None: logger.warning(f"[Translator] Translation failed for '{text[:50]}...', returning original text") result = text # Cache result if result and self.use_cache: self.cache.set(cache_key, result, category="translations") return result def _translate_deepl( self, text: str, target_lang: str, source_lang: Optional[str], context: Optional[str] = None, prompt: Optional[str] = None ) -> Optional[str]: """ Translate using DeepL API with context and glossary support. Args: text: Text to translate target_lang: Target language code source_lang: Source language code (optional) context: Context hint for translation (e.g., "e-commerce product search") """ # Map to DeepL language codes target_code = self.LANG_CODE_MAP.get(target_lang, target_lang.upper()) headers = { "Authorization": f"DeepL-Auth-Key {self.api_key}", "Content-Type": "application/json", } # Use prompt as context parameter for DeepL API (not as text prefix) # According to DeepL API: context is "Additional context that can influence a translation but is not translated itself" # If prompt is provided, use it as context; otherwise use the default context api_context = prompt if prompt else context # For e-commerce, add context words to help DeepL understand the domain # This is especially important for single-word ambiguous terms like "车" (car vs rook) text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, api_context) payload = { "text": [text_to_translate], "target_lang": target_code, } if source_lang: source_code = self.LANG_CODE_MAP.get(source_lang, source_lang.upper()) payload["source_lang"] = source_code # Add context parameter (prompt or default context) # Context influences translation but is not translated itself if api_context: payload["context"] = api_context # Add glossary if configured if self.glossary_id: payload["glossary_id"] = self.glossary_id # Note: DeepL API v2 supports "context" parameter for additional context # that influences translation but is not translated itself. # We use prompt as context parameter when provided. try: response = requests.post( self.DEEPL_API_URL, headers=headers, json=payload, timeout=self.timeout ) if response.status_code == 200: data = response.json() if "translations" in data and len(data["translations"]) > 0: translated_text = data["translations"][0]["text"] # If we added context, extract just the term from the result if needs_extraction: translated_text = self._extract_term_from_translation( translated_text, text, target_code ) return translated_text else: logger.error(f"[Translator] DeepL API error: {response.status_code} - {response.text}") return None except requests.Timeout: logger.warning(f"[Translator] Translation request timed out") return None except Exception as e: logger.error(f"[Translator] Translation failed: {e}", exc_info=True) return None def _translate_deepl_free( self, text: str, target_lang: str, source_lang: Optional[str], context: Optional[str] = None, prompt: Optional[str] = None ) -> Optional[str]: """ Translate using DeepL Free API. Note: Free API may not support glossary_id parameter. """ # Map to DeepL language codes target_code = self.LANG_CODE_MAP.get(target_lang, target_lang.upper()) headers = { "Authorization": f"DeepL-Auth-Key {self.api_key}", "Content-Type": "application/json", } # Use prompt as context parameter for DeepL API api_context = prompt if prompt else context payload = { "text": [text], "target_lang": target_code, } if source_lang: source_code = self.LANG_CODE_MAP.get(source_lang, source_lang.upper()) payload["source_lang"] = source_code # Add context parameter if api_context: payload["context"] = api_context # Note: Free API typically doesn't support glossary_id # But we can still use context hints in the text try: response = requests.post( "https://api-free.deepl.com/v2/translate", headers=headers, json=payload, timeout=self.timeout ) if response.status_code == 200: data = response.json() if "translations" in data and len(data["translations"]) > 0: return data["translations"][0]["text"] else: logger.error(f"[Translator] DeepL Free API error: {response.status_code} - {response.text}") return None except requests.Timeout: logger.warning(f"[Translator] Free API request timed out") return None except Exception as e: logger.error(f"[Translator] Free API translation failed: {e}", exc_info=True) return None def translate_multi( self, text: str, target_langs: List[str], source_lang: Optional[str] = None, context: Optional[str] = None, async_mode: bool = True, prompt: Optional[str] = None ) -> Dict[str, Optional[str]]: """ Translate text to multiple target languages. In async_mode=True (default): - Returns cached translations immediately if available - Launches async tasks for missing translations (non-blocking) - Returns None for missing translations (will be available in cache next time) In async_mode=False: - Waits for all translations to complete (blocking) Args: text: Text to translate target_langs: List of target language codes source_lang: Source language code (optional) context: Context hint for translation (optional) async_mode: If True, return cached results immediately and translate missing ones async prompt: Translation prompt/instruction (optional) Returns: Dictionary mapping language code to translated text (only cached results in async mode) """ results = {} missing_langs = [] # First, get cached translations for lang in target_langs: cached = self._get_cached_translation(text, lang, source_lang, context, prompt) if cached is not None: results[lang] = cached else: missing_langs.append(lang) # If async mode and there are missing translations, launch async tasks if async_mode and missing_langs: for lang in missing_langs: self._translate_async(text, lang, source_lang, context, prompt) # Return None for missing translations for lang in missing_langs: results[lang] = None else: # Synchronous mode: wait for all translations for lang in missing_langs: results[lang] = self.translate(text, lang, source_lang, context, prompt) return results def _get_cached_translation( self, text: str, target_lang: str, source_lang: Optional[str] = None, context: Optional[str] = None, prompt: Optional[str] = None ) -> Optional[str]: """Get translation from cache if available.""" if not self.cache: return None translation_context = context or self.translation_context cache_key_parts = [source_lang or 'auto', target_lang, translation_context] if prompt: cache_key_parts.append(prompt) cache_key_parts.append(text) cache_key = ':'.join(cache_key_parts) return self.cache.get(cache_key, category="translations") def _translate_async( self, text: str, target_lang: str, source_lang: Optional[str] = None, context: Optional[str] = None, prompt: Optional[str] = None ): """Launch async translation task.""" def _do_translate(): try: result = self.translate(text, target_lang, source_lang, context, prompt) if result: logger.debug(f"Async translation completed: {text} -> {target_lang}: {result}") except Exception as e: logger.warning(f"Async translation failed: {text} -> {target_lang}: {e}") self.executor.submit(_do_translate) def _add_ecommerce_context( self, text: str, source_lang: Optional[str], context: Optional[str] ) -> tuple: """ Add e-commerce context to text for better disambiguation. For single-word ambiguous Chinese terms, we add context words that help DeepL understand this is an e-commerce/product search context. Args: text: Original text to translate source_lang: Source language code context: Context hint Returns: Tuple of (text_with_context, needs_extraction) - text_with_context: Text to send to DeepL - needs_extraction: Whether we need to extract the term from the result """ # Only apply for e-commerce context and Chinese source if not context or "e-commerce" not in context.lower(): return text, False if not source_lang or source_lang.lower() != 'zh': return text, False # For single-word queries, add context to help disambiguation text_stripped = text.strip() if len(text_stripped.split()) == 1 and len(text_stripped) <= 2: # Common ambiguous Chinese e-commerce terms like "车" (car vs rook) # We add a context phrase: "购买 [term]" (buy [term]) or "商品 [term]" (product [term]) # This helps DeepL understand the e-commerce context # We'll need to extract just the term from the translation result context_phrase = f"购买 {text_stripped}" return context_phrase, True # For multi-word queries, DeepL usually has enough context return text, False def _extract_term_from_translation( self, translated_text: str, original_text: str, target_lang_code: str ) -> str: """ Extract the actual term from a translation that included context. For example, if we translated "购买 车" (buy car) and got "buy car", we want to extract just "car". Args: translated_text: Full translation result original_text: Original single-word query target_lang_code: Target language code (EN, ZH, etc.) Returns: Extracted term or original translation if extraction fails """ # For English target, try to extract the last word (the actual term) if target_lang_code == "EN": words = translated_text.strip().split() if len(words) > 1: # Usually the last word is the term we want # But we need to be smart - if it's "buy car", we want "car" # Common context words to skip: buy, purchase, product, item, etc. context_words = {"buy", "purchase", "product", "item", "commodity", "goods"} # Try to find the term (not a context word) for word in reversed(words): word_lower = word.lower().rstrip('.,!?;:') if word_lower not in context_words: return word_lower # If all words are context words, return the last one return words[-1].lower().rstrip('.,!?;:') # For other languages or if extraction fails, return as-is # The user can configure a glossary for better results return translated_text def get_translation_needs( self, detected_lang: str, supported_langs: List[str] ) -> List[str]: """ Determine which languages need translation. Args: detected_lang: Detected query language supported_langs: List of supported languages Returns: List of language codes to translate to """ # If detected language is in supported list, translate to others if detected_lang in supported_langs: return [lang for lang in supported_langs if lang != detected_lang] # Otherwise, translate to all supported languages return supported_langs