""" Translation service for multi-language query support. Supports DeepL API for high-quality translations. """ import requests from typing import Dict, List, Optional from utils.cache import DictCache # Try to import DEEPL_AUTH_KEY, but allow import to fail try: from config.env_config import DEEPL_AUTH_KEY except ImportError: DEEPL_AUTH_KEY = None class Translator: """Multi-language translator using DeepL API.""" DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier # Language code mapping LANG_CODE_MAP = { 'zh': 'ZH', 'en': 'EN', 'ru': 'RU', 'ar': 'AR', 'ja': 'JA', 'es': 'ES', 'de': 'DE', 'fr': 'FR', 'it': 'IT', 'pt': 'PT', } def __init__( self, api_key: Optional[str] = None, use_cache: bool = True, timeout: int = 10, glossary_id: Optional[str] = None, translation_context: Optional[str] = None ): """ Initialize translator. Args: api_key: DeepL API key (or None to use from config/env) use_cache: Whether to cache translations timeout: Request timeout in seconds glossary_id: DeepL glossary ID for custom terminology (optional) translation_context: Context hint for translation (e.g., "e-commerce", "product search") """ # Get API key from config if not provided if api_key is None and DEEPL_AUTH_KEY: api_key = DEEPL_AUTH_KEY self.api_key = api_key self.timeout = timeout self.use_cache = use_cache self.glossary_id = glossary_id self.translation_context = translation_context or "e-commerce product search" if use_cache: self.cache = DictCache(".cache/translations.json") else: self.cache = None def translate( self, text: str, target_lang: str, source_lang: Optional[str] = None, context: Optional[str] = None ) -> Optional[str]: """ Translate text to target language. Args: text: Text to translate target_lang: Target language code ('zh', 'en', 'ru', etc.) source_lang: Source language code (optional, auto-detect if None) context: Additional context for translation (overrides default context) Returns: Translated text or None if translation fails """ if not text or not text.strip(): return text # Normalize language codes target_lang = target_lang.lower() if source_lang: source_lang = source_lang.lower() # Use provided context or default context translation_context = context or self.translation_context # Check cache (include context in cache key for accuracy) if self.use_cache: cache_key = f"{source_lang or 'auto'}:{target_lang}:{translation_context}:{text}" cached = self.cache.get(cache_key, category="translations") if cached: return cached # If no API key, return mock translation (for testing) if not self.api_key: print(f"[Translator] No API key, returning original text (mock mode)") return text # Translate using DeepL with fallback result = self._translate_deepl(text, target_lang, source_lang, translation_context) # If translation failed, try fallback to free API if result is None and "api.deepl.com" in self.DEEPL_API_URL: print(f"[Translator] Pro API failed, trying free API...") result = self._translate_deepl_free(text, target_lang, source_lang, translation_context) # If still failed, return original text with warning if result is None: print(f"[Translator] Translation failed, returning original text") result = text # Cache result if result and self.use_cache: cache_key = f"{source_lang or 'auto'}:{target_lang}:{translation_context}:{text}" self.cache.set(cache_key, result, category="translations") return result def _translate_deepl( self, text: str, target_lang: str, source_lang: Optional[str], context: Optional[str] = None ) -> Optional[str]: """ Translate using DeepL API with context and glossary support. Args: text: Text to translate target_lang: Target language code source_lang: Source language code (optional) context: Context hint for translation (e.g., "e-commerce product search") """ # Map to DeepL language codes target_code = self.LANG_CODE_MAP.get(target_lang, target_lang.upper()) headers = { "Authorization": f"DeepL-Auth-Key {self.api_key}", "Content-Type": "application/json", } # Build text with context for better disambiguation # For e-commerce, add context words to help DeepL understand the domain # This is especially important for single-word ambiguous terms like "车" (car vs rook) text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, context) payload = { "text": [text_to_translate], "target_lang": target_code, } if source_lang: source_code = self.LANG_CODE_MAP.get(source_lang, source_lang.upper()) payload["source_lang"] = source_code # Add glossary if configured if self.glossary_id: payload["glossary_id"] = self.glossary_id # Note: DeepL API v2 doesn't have a direct "context" parameter, # but we can improve translation by: # 1. Using glossary for domain-specific terms (best solution) # 2. Adding context words to the text (for single-word queries) - implemented in _add_ecommerce_context # 3. Using more specific source language detection try: response = requests.post( self.DEEPL_API_URL, headers=headers, json=payload, timeout=self.timeout ) if response.status_code == 200: data = response.json() if "translations" in data and len(data["translations"]) > 0: translated_text = data["translations"][0]["text"] # If we added context, extract just the term from the result if needs_extraction: translated_text = self._extract_term_from_translation( translated_text, text, target_code ) return translated_text else: print(f"[Translator] DeepL API error: {response.status_code} - {response.text}") return None except requests.Timeout: print(f"[Translator] Translation request timed out") return None except Exception as e: print(f"[Translator] Translation failed: {e}") return None def _translate_deepl_free( self, text: str, target_lang: str, source_lang: Optional[str], context: Optional[str] = None ) -> Optional[str]: """ Translate using DeepL Free API. Note: Free API may not support glossary_id parameter. """ # Map to DeepL language codes target_code = self.LANG_CODE_MAP.get(target_lang, target_lang.upper()) headers = { "Authorization": f"DeepL-Auth-Key {self.api_key}", "Content-Type": "application/json", } payload = { "text": [text], "target_lang": target_code, } if source_lang: source_code = self.LANG_CODE_MAP.get(source_lang, source_lang.upper()) payload["source_lang"] = source_code # Note: Free API typically doesn't support glossary_id # But we can still use context hints in the text try: response = requests.post( "https://api-free.deepl.com/v2/translate", headers=headers, json=payload, timeout=self.timeout ) if response.status_code == 200: data = response.json() if "translations" in data and len(data["translations"]) > 0: return data["translations"][0]["text"] else: print(f"[Translator] DeepL Free API error: {response.status_code} - {response.text}") return None except requests.Timeout: print(f"[Translator] Free API request timed out") return None except Exception as e: print(f"[Translator] Free API translation failed: {e}") return None def translate_multi( self, text: str, target_langs: List[str], source_lang: Optional[str] = None, context: Optional[str] = None ) -> Dict[str, Optional[str]]: """ Translate text to multiple target languages. Args: text: Text to translate target_langs: List of target language codes source_lang: Source language code (optional) context: Context hint for translation (optional) Returns: Dictionary mapping language code to translated text """ results = {} for lang in target_langs: results[lang] = self.translate(text, lang, source_lang, context) return results def _add_ecommerce_context( self, text: str, source_lang: Optional[str], context: Optional[str] ) -> tuple: """ Add e-commerce context to text for better disambiguation. For single-word ambiguous Chinese terms, we add context words that help DeepL understand this is an e-commerce/product search context. Args: text: Original text to translate source_lang: Source language code context: Context hint Returns: Tuple of (text_with_context, needs_extraction) - text_with_context: Text to send to DeepL - needs_extraction: Whether we need to extract the term from the result """ # Only apply for e-commerce context and Chinese source if not context or "e-commerce" not in context.lower(): return text, False if not source_lang or source_lang.lower() != 'zh': return text, False # For single-word queries, add context to help disambiguation text_stripped = text.strip() if len(text_stripped.split()) == 1 and len(text_stripped) <= 2: # Common ambiguous Chinese e-commerce terms like "车" (car vs rook) # We add a context phrase: "购买 [term]" (buy [term]) or "商品 [term]" (product [term]) # This helps DeepL understand the e-commerce context # We'll need to extract just the term from the translation result context_phrase = f"购买 {text_stripped}" return context_phrase, True # For multi-word queries, DeepL usually has enough context return text, False def _extract_term_from_translation( self, translated_text: str, original_text: str, target_lang_code: str ) -> str: """ Extract the actual term from a translation that included context. For example, if we translated "购买 车" (buy car) and got "buy car", we want to extract just "car". Args: translated_text: Full translation result original_text: Original single-word query target_lang_code: Target language code (EN, ZH, etc.) Returns: Extracted term or original translation if extraction fails """ # For English target, try to extract the last word (the actual term) if target_lang_code == "EN": words = translated_text.strip().split() if len(words) > 1: # Usually the last word is the term we want # But we need to be smart - if it's "buy car", we want "car" # Common context words to skip: buy, purchase, product, item, etc. context_words = {"buy", "purchase", "product", "item", "commodity", "goods"} # Try to find the term (not a context word) for word in reversed(words): word_lower = word.lower().rstrip('.,!?;:') if word_lower not in context_words: return word_lower # If all words are context words, return the last one return words[-1].lower().rstrip('.,!?;:') # For other languages or if extraction fails, return as-is # The user can configure a glossary for better results return translated_text def get_translation_needs( self, detected_lang: str, supported_langs: List[str] ) -> List[str]: """ Determine which languages need translation. Args: detected_lang: Detected query language supported_langs: List of supported languages Returns: List of language codes to translate to """ # If detected language is in supported list, translate to others if detected_lang in supported_langs: return [lang for lang in supported_langs if lang != detected_lang] # Otherwise, translate to all supported languages return supported_langs