translator.py 5.7 KB
"""
Translation service for multi-language query support.

Supports DeepL API for high-quality translations.
"""

import requests
from typing import Dict, List, Optional
from utils.cache import DictCache


class Translator:
    """Multi-language translator using DeepL API."""

    DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"  # Free tier
    # DEEPL_API_URL = "https://api.deepl.com/v2/translate"  # Pro tier

    # Language code mapping
    LANG_CODE_MAP = {
        'zh': 'ZH',
        'en': 'EN',
        'ru': 'RU',
        'ar': 'AR',
        'ja': 'JA',
        'es': 'ES',
        'de': 'DE',
        'fr': 'FR',
        'it': 'IT',
        'pt': 'PT',
    }

    def __init__(
        self,
        api_key: Optional[str] = None,
        use_cache: bool = True,
        timeout: int = 10
    ):
        """
        Initialize translator.

        Args:
            api_key: DeepL API key (or None to use from config/env)
            use_cache: Whether to cache translations
            timeout: Request timeout in seconds
        """
        # Get API key from config if not provided
        if api_key is None:
            try:
                from config.env_config import get_deepl_key
                api_key = get_deepl_key()
            except ImportError:
                pass

        self.api_key = api_key
        self.timeout = timeout
        self.use_cache = use_cache

        if use_cache:
            self.cache = DictCache(".cache/translations.json")
        else:
            self.cache = None

    def translate(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str] = None
    ) -> Optional[str]:
        """
        Translate text to target language.

        Args:
            text: Text to translate
            target_lang: Target language code ('zh', 'en', 'ru', etc.)
            source_lang: Source language code (optional, auto-detect if None)

        Returns:
            Translated text or None if translation fails
        """
        if not text or not text.strip():
            return text

        # Normalize language codes
        target_lang = target_lang.lower()
        if source_lang:
            source_lang = source_lang.lower()

        # Check cache
        if self.use_cache:
            cache_key = f"{source_lang or 'auto'}:{target_lang}:{text}"
            cached = self.cache.get(cache_key, category="translations")
            if cached:
                return cached

        # If no API key, return mock translation (for testing)
        if not self.api_key:
            print(f"[Translator] No API key, returning original text (mock mode)")
            return text

        # Translate using DeepL
        result = self._translate_deepl(text, target_lang, source_lang)

        # Cache result
        if result and self.use_cache:
            cache_key = f"{source_lang or 'auto'}:{target_lang}:{text}"
            self.cache.set(cache_key, result, category="translations")

        return result

    def _translate_deepl(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str]
    ) -> Optional[str]:
        """Translate using DeepL API."""
        # Map to DeepL language codes
        target_code = self.LANG_CODE_MAP.get(target_lang, target_lang.upper())

        headers = {
            "Authorization": f"DeepL-Auth-Key {self.api_key}",
            "Content-Type": "application/json",
        }

        payload = {
            "text": [text],
            "target_lang": target_code,
        }

        if source_lang:
            source_code = self.LANG_CODE_MAP.get(source_lang, source_lang.upper())
            payload["source_lang"] = source_code

        try:
            response = requests.post(
                self.DEEPL_API_URL,
                headers=headers,
                json=payload,
                timeout=self.timeout
            )

            if response.status_code == 200:
                data = response.json()
                if "translations" in data and len(data["translations"]) > 0:
                    return data["translations"][0]["text"]
            else:
                print(f"[Translator] DeepL API error: {response.status_code} - {response.text}")
                return None

        except requests.Timeout:
            print(f"[Translator] Translation request timed out")
            return None
        except Exception as e:
            print(f"[Translator] Translation failed: {e}")
            return None

    def translate_multi(
        self,
        text: str,
        target_langs: List[str],
        source_lang: Optional[str] = None
    ) -> Dict[str, Optional[str]]:
        """
        Translate text to multiple target languages.

        Args:
            text: Text to translate
            target_langs: List of target language codes
            source_lang: Source language code (optional)

        Returns:
            Dictionary mapping language code to translated text
        """
        results = {}
        for lang in target_langs:
            results[lang] = self.translate(text, lang, source_lang)
        return results

    def get_translation_needs(
        self,
        detected_lang: str,
        supported_langs: List[str]
    ) -> List[str]:
        """
        Determine which languages need translation.

        Args:
            detected_lang: Detected query language
            supported_langs: List of supported languages

        Returns:
            List of language codes to translate to
        """
        # If detected language is in supported list, translate to others
        if detected_lang in supported_langs:
            return [lang for lang in supported_langs if lang != detected_lang]

        # Otherwise, translate to all supported languages
        return supported_langs