""" DeepL backend provider. This module only handles network calls to DeepL. It does not handle cache, async fanout, or fallback semantics. """ from __future__ import annotations import logging import os import re from typing import Dict, Optional, Tuple import requests from config.services_config import get_translation_config logger = logging.getLogger(__name__) DEFAULT_CONTEXTS: Dict[str, Dict[str, str]] = { "sku_name": { "zh": "商品SKU名称", "en": "product SKU name", }, "ecommerce_search_query": { "zh": "电商", "en": "e-commerce", }, "general": { "zh": "", "en": "", }, } SCENE_NAMES = frozenset(DEFAULT_CONTEXTS.keys()) def _merge_contexts(raw: object) -> Dict[str, Dict[str, str]]: merged: Dict[str, Dict[str, str]] = { scene: dict(lang_map) for scene, lang_map in DEFAULT_CONTEXTS.items() } if not isinstance(raw, dict): return merged for scene, lang_map in raw.items(): if not isinstance(lang_map, dict): continue scene_name = str(scene or "").strip() if not scene_name: continue merged.setdefault(scene_name, {}) for lang, value in lang_map.items(): lang_key = str(lang or "").strip().lower() context_value = str(value or "").strip() if lang_key and context_value: merged[scene_name][lang_key] = context_value return merged class DeepLProvider: API_URL = "https://api.deepl.com/v2/translate" # Pro tier LANG_CODE_MAP = { "zh": "ZH", "en": "EN", "ru": "RU", "ar": "AR", "ja": "JA", "es": "ES", "de": "DE", "fr": "FR", "it": "IT", "pt": "PT", } def __init__( self, api_key: Optional[str], *, timeout: float = 10.0, glossary_id: Optional[str] = None, ) -> None: cfg = get_translation_config() provider_cfg = cfg.providers.get("deepl", {}) if isinstance(cfg.providers, dict) else {} self.api_key = api_key or os.getenv("DEEPL_AUTH_KEY") self.timeout = float(provider_cfg.get("timeout_sec") or timeout or 10.0) self.glossary_id = glossary_id or provider_cfg.get("glossary_id") self.model = "deepl" self.context_presets = _merge_contexts(provider_cfg.get("contexts")) if not self.api_key: logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable") def _resolve_request_context( self, target_lang: str, context: Optional[str], prompt: Optional[str], ) -> Optional[str]: if prompt: return prompt if context in SCENE_NAMES: scene_map = self.context_presets.get(context) or self.context_presets.get("default") or {} tgt = (target_lang or "").strip().lower() return scene_map.get(tgt) or scene_map.get("en") if context: return context scene_map = self.context_presets.get("default") or {} tgt = (target_lang or "").strip().lower() return scene_map.get(tgt) or scene_map.get("en") def translate( self, text: str, target_lang: str, source_lang: Optional[str] = None, context: Optional[str] = None, prompt: Optional[str] = None, ) -> Optional[str]: if not self.api_key: return None target_code = self.LANG_CODE_MAP.get((target_lang or "").lower(), (target_lang or "").upper()) headers = { "Authorization": f"DeepL-Auth-Key {self.api_key}", "Content-Type": "application/json", } api_context = self._resolve_request_context(target_lang, context, prompt) text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, api_context) payload = { "text": [text_to_translate], "target_lang": target_code, } if source_lang: payload["source_lang"] = self.LANG_CODE_MAP.get(source_lang.lower(), source_lang.upper()) if api_context: payload["context"] = api_context if self.glossary_id: payload["glossary_id"] = self.glossary_id try: response = requests.post(self.API_URL, headers=headers, json=payload, timeout=self.timeout) if response.status_code != 200: logger.warning( "[deepl] Failed | status=%s tgt=%s body=%s", response.status_code, target_code, (response.text or "")[:200], ) return None data = response.json() translations = data.get("translations") or [] if not translations: return None translated = translations[0].get("text") if not translated: return None if needs_extraction: translated = self._extract_term_from_translation(translated, text, target_code) return translated except requests.Timeout: logger.warning("[deepl] Timeout | tgt=%s timeout=%.1fs", target_code, self.timeout) return None except Exception as exc: logger.warning("[deepl] Exception | tgt=%s error=%s", target_code, exc, exc_info=True) return None def _add_ecommerce_context( self, text: str, source_lang: Optional[str], context: Optional[str], ) -> Tuple[str, bool]: if not context or "e-commerce" not in context.lower(): return text, False if (source_lang or "").lower() != "zh": return text, False term = (text or "").strip() if len(term.split()) == 1 and len(term) <= 2: return f"购买 {term}", True return text, False def _extract_term_from_translation( self, translated_text: str, original_text: str, target_lang_code: str, ) -> str: del original_text if target_lang_code != "EN": return translated_text words = translated_text.strip().split() if len(words) <= 1: return translated_text context_words = {"buy", "purchase", "product", "item", "commodity", "goods"} for word in reversed(words): normalized = re.sub(r"[.,!?;:]+$", "", word.lower()) if normalized not in context_words: return normalized return re.sub(r"[.,!?;:]+$", "", words[-1].lower())