"""
Translation service for multi-language query support.

Supports multiple translation models:
- Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model
- DeepL: DeepL API for high-quality translations

使用方法 (Usage):

```python
from query.translator import Translator

# 使用默认的 qwen 模型（推荐）
translator = Translator()  # 默认使用 qwen 模型

# 或显式指定模型
translator = Translator(model='qwen')  # 使用 qwen 模型
translator = Translator(model='deepl')  # 使用 DeepL 模型

# 翻译文本
result = translator.translate(
    text="我看到这个视频后没有笑",
    target_lang="en",
    source_lang="auto"  # 自动检测源语言
)
```

配置说明 (Configuration):
- Qwen 模型需要设置 DASHSCOPE_API_KEY 环境变量（在 .env 文件中）
- DeepL 模型需要设置 DEEPL_AUTH_KEY 环境变量（在 .env 文件中）

Qwen 模型参考文档：
- 官方文档：https://help.aliyun.com/zh/model-studio/get-api-key
- 模型：qwen-mt-flash（快速翻译模型）

DeepL 官方文档：
https://developers.deepl.com/api-reference/translate/request-translation
"""

import os
import requests
import re
import redis
from concurrent.futures import ThreadPoolExecutor, Future
from datetime import timedelta
from typing import Dict, List, Optional, Union
import logging

logger = logging.getLogger(__name__)

from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG
from openai import OpenAI


class Translator:
    """
    Multi-language translator supporting Qwen and DeepL APIs.
    
    Default model is 'qwen' which uses Alibaba Cloud DashScope API.
    """

    DEEPL_API_URL = "https://api.deepl.com/v2/translate"  # Pro tier
    QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"  # 北京地域
    # 如果使用新加坡地域的模型，需要将base_url替换为：https://dashscope-intl.aliyuncs.com/compatible-mode/v1
    QWEN_MODEL = "qwen-mt-flash"  # 快速翻译模型

    # Language code mapping
    LANG_CODE_MAP = {
        'zh': 'ZH',
        'en': 'EN',
        'ru': 'RU',
        'ar': 'AR',
        'ja': 'JA',
        'es': 'ES',
        'de': 'DE',
        'fr': 'FR',
        'it': 'IT',
        'pt': 'PT',
    }

    def __init__(
        self,
        model: str = "qwen",
        api_key: Optional[str] = None,
        use_cache: bool = True,
        timeout: int = 10,
        glossary_id: Optional[str] = None,
        translation_context: Optional[str] = None
    ):
        """
        Initialize translator.

        Args:
            model: Translation model to use. Options: 'qwen' (default) or 'deepl'
            api_key: API key for the selected model (or None to use from config/env)
            use_cache: Whether to cache translations
            timeout: Request timeout in seconds
            glossary_id: DeepL glossary ID for custom terminology (optional, only for DeepL)
            translation_context: Context hint for translation (e.g., "e-commerce", "product search")
        """
        self.model = model.lower()
        if self.model not in ['qwen', 'deepl']:
            raise ValueError(f"Unsupported model: {model}. Supported models: 'qwen', 'deepl'")
        
        # Get API key from config if not provided
        if api_key is None:
            if self.model == 'qwen':
                api_key = DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
            else:  # deepl
                api_key = DEEPL_AUTH_KEY or os.getenv("DEEPL_AUTH_KEY")

        self.api_key = api_key
        self.timeout = timeout
        self.use_cache = use_cache
        self.glossary_id = glossary_id
        self.translation_context = translation_context or "e-commerce product search"
        
        # Initialize OpenAI client for Qwen if needed
        self.qwen_client = None
        if self.model == 'qwen':
            if not self.api_key:
                logger.warning("DASHSCOPE_API_KEY not set. Qwen translation will not work.")
            else:
                self.qwen_client = OpenAI(
                    api_key=self.api_key,
                    base_url=self.QWEN_BASE_URL,
                )

        # Initialize Redis cache if enabled
        if use_cache:
            try:
                self.redis_client = redis.Redis(
                    host=REDIS_CONFIG.get('host', 'localhost'),
                    port=REDIS_CONFIG.get('port', 6479),
                    password=REDIS_CONFIG.get('password'),
                    decode_responses=True,  # Return str instead of bytes
                    socket_timeout=REDIS_CONFIG.get('socket_timeout', 1),
                    socket_connect_timeout=REDIS_CONFIG.get('socket_connect_timeout', 1),
                    retry_on_timeout=REDIS_CONFIG.get('retry_on_timeout', False),
                    health_check_interval=10,  # 避免复用坏连接
                )
                # Test connection
                self.redis_client.ping()
                expire_days = REDIS_CONFIG.get('translation_cache_expire_days', 360)
                self.expire_time = timedelta(days=expire_days)
                self.expire_seconds = int(self.expire_time.total_seconds())  # Redis 需要秒数
                self.cache_prefix = REDIS_CONFIG.get('translation_cache_prefix', 'trans')
                logger.info("Redis cache initialized for translations")
            except Exception as e:
                logger.warning(f"Failed to initialize Redis cache: {e}, falling back to no cache")
                self.redis_client = None
                self.cache = None
        else:
            self.redis_client = None
            self.cache = None
        
        # Thread pool for async translation
        self.executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="translator")

    def translate(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> Optional[str]:
        """
        Translate text to target language (synchronous mode).

        Args:
            text: Text to translate
            target_lang: Target language code ('zh', 'en', 'ru', etc.)
            source_lang: Source language code (option al, auto-detect if None)
            context: Additional context for translation (overrides default context)
            prompt: Translation prompt/instruction (optional, for better translation quality)

        Returns:
            Translated text or None if translation fails
        """
        if not text or not text.strip():
            return text

        # Normalize language codes
        target_lang = target_lang.lower()
        if source_lang:
            source_lang = source_lang.lower()

        # Optimization: Skip translation if not needed
        if target_lang == 'en' and self._is_english_text(text):
            logger.info(f"[Translator] Text is already English, skipping translation: '{text[:50]}...'")
            return text
        
        if target_lang == 'zh' and (self._contains_chinese(text) or self._is_pure_number(text)):
            logger.info(
                f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | "
                f"Source language: {source_lang or 'auto'} | Result: Skip translation (contains Chinese or pure number)"
            )
            return text

        # Use provided context or default context
        translation_context = context or self.translation_context
        
        # Build cache key (include prompt in cache key if provided)
        cache_key_parts = [source_lang or 'auto', target_lang, translation_context]
        if prompt:
            cache_key_parts.append(prompt)
        cache_key_parts.append(text)
        cache_key = ':'.join(cache_key_parts)

        # Check cache (include context and prompt in cache key for accuracy)
        if self.use_cache and self.redis_client:
            cached = self._get_cached_translation_redis(text, target_lang, source_lang, translation_context, prompt)
            if cached:
                logger.info(
                    f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | "
                    f"Source language: {source_lang or 'auto'} | Result: '{cached}' | Source: Cache hit"
                )
                return cached

        # If no API key, return mock translation (for testing)
        if not self.api_key:
            logger.info(
                f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | "
                f"Source language: {source_lang or 'auto'} | Result: '{text}' | Source: Mock mode (no API key)"
            )
            return text

        # Translate using selected model
        logger.info(
            f"[Translator] Translation request | Model: {self.model} | Original text: '{text}' | Target language: {target_lang} | "
            f"Source language: {source_lang or 'auto'} | Context: {translation_context} | "
            f"Prompt: {'yes' if prompt else 'no'} | Status: Starting translation"
        )
        
        if self.model == 'qwen':
            result = self._translate_qwen(text, target_lang, source_lang, translation_context, prompt)
        else:  # deepl
            result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt)

        # If still failed, return original text with warning
        if result is None:
            logger.warning(
                f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | "
                f"Source language: {source_lang or 'auto'} | Result: '{text}' | Status: Translation failed, returning original"
            )
            result = text
        else:
            logger.info(
                f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | "
                f"Source language: {source_lang or 'auto'} | Result: '{result}' | Status: Translation successful"
            )

        # Cache result
        if result and self.use_cache and self.redis_client:
            self._set_cached_translation_redis(text, target_lang, result, source_lang, translation_context, prompt)

        return result

    def _translate_qwen(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str],
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> Optional[str]:
        """
        Translate using Qwen MT Flash model via Alibaba Cloud DashScope API.
        
        Args:
            text: Text to translate
            target_lang: Target language code ('zh', 'en', 'ru', etc.)
            source_lang: Source language code (optional, 'auto' if None)
            context: Context hint for translation (optional)
            prompt: Translation prompt/instruction (optional)
            
        Returns:
            Translated text or None if translation fails
        """
        if not self.qwen_client:
            logger.error("[Translator] Qwen client not initialized. Check DASHSCOPE_API_KEY.")
            return None
        
        # Qwen (qwen-mt-plus/flash/turbo) supported languages mapping
        # 标准来自：你提供的“语言 / 英文名 / 代码”表
        qwen_lang_map = {
            "en": "English",
            "zh": "Chinese",
            "zh_tw": "Traditional Chinese",
            "ru": "Russian",
            "ja": "Japanese",
            "ko": "Korean",
            "es": "Spanish",
            "fr": "French",
            "pt": "Portuguese",
            "de": "German",
            "it": "Italian",
            "th": "Thai",
            "vi": "Vietnamese",
            "id": "Indonesian",
            "ms": "Malay",
            "ar": "Arabic",
            "hi": "Hindi",
            "he": "Hebrew",
            "my": "Burmese",
            "ta": "Tamil",
            "ur": "Urdu",
            "bn": "Bengali",
            "pl": "Polish",
            "nl": "Dutch",
            "ro": "Romanian",
            "tr": "Turkish",
            "km": "Khmer",
            "lo": "Lao",
            "yue": "Cantonese",
            "cs": "Czech",
            "el": "Greek",
            "sv": "Swedish",
            "hu": "Hungarian",
            "da": "Danish",
            "fi": "Finnish",
            "uk": "Ukrainian",
            "bg": "Bulgarian",
        }
        
        # Convert target language
        target_lang_normalized = target_lang.lower()
        target_lang_qwen = qwen_lang_map.get(target_lang_normalized, target_lang.capitalize())

        # Convert source language
        source_lang_normalized = (source_lang or "").strip().lower()
        if not source_lang_normalized or source_lang_normalized == "auto":
            source_lang_qwen = "auto"
        else:
            source_lang_qwen = qwen_lang_map.get(source_lang_normalized, source_lang.capitalize())
        
        # Prepare translation options
        translation_options = {
            "source_lang": source_lang_qwen,
            "target_lang": target_lang_qwen,
        }
        
        # Prepare messages
        messages = [
            {
                "role": "user",
                "content": text
            }
        ]
        
        try:
            completion = self.qwen_client.chat.completions.create(
                model=self.QWEN_MODEL,
                messages=messages,
                extra_body={
                    "translation_options": translation_options
                }
            )
            
            translated_text = completion.choices[0].message.content.strip()
            
            logger.debug(
                f"[Translator] Qwen API response success | Original text: '{text}' | Target language: {target_lang_qwen} | "
                f"Translation result: '{translated_text}'"
            )
            return translated_text
            
        except Exception as e:
            logger.error(
                f"[Translator] Qwen API request exception | Original text: '{text}' | Target language: {target_lang_qwen} | "
                f"Error: {e}", exc_info=True
            )
            return None

    def _translate_deepl(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str],
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> Optional[str]:
        """
        Translate using DeepL API with context and glossary support.
        
        Args:
            text: Text to translate
            target_lang: Target language code
            source_lang: Source language code (optional)
            context: Context hint for translation (e.g., "e-commerce product search")
        """
        # Map to DeepL language codes
        target_code = self.LANG_CODE_MAP.get(target_lang, target_lang.upper())

        headers = {
            "Authorization": f"DeepL-Auth-Key {self.api_key}",
            "Content-Type": "application/json",
        }

        # Use prompt as context parameter for DeepL API (not as text prefix)
        # According to DeepL API: context is "Additional context that can influence a translation but is not translated itself"
        # If prompt is provided, use it as context; otherwise use the default context
        api_context = prompt if prompt else context
        
        # For e-commerce, add context words to help DeepL understand the domain
        # This is especially important for single-word ambiguous terms like "车" (car vs rook)
        text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, api_context)

        payload = {
            "text": [text_to_translate],
            "target_lang": target_code,
        }

        if source_lang:
            source_code = self.LANG_CODE_MAP.get(source_lang, source_lang.upper())
            payload["source_lang"] = source_code

        # Add context parameter (prompt or default context)
        # Context influences translation but is not translated itself
        if api_context:
            payload["context"] = api_context

        # Add glossary if configured
        if self.glossary_id:
            payload["glossary_id"] = self.glossary_id

        # Note: DeepL API v2 supports "context" parameter for additional context
        # that influences translation but is not translated itself.
        # We use prompt as context parameter when provided.

        try:
            response = requests.post(
                self.DEEPL_API_URL,
                headers=headers,
                json=payload,
                timeout=self.timeout
            )

            if response.status_code == 200:
                data = response.json()
                if "translations" in data and len(data["translations"]) > 0:
                    translated_text = data["translations"][0]["text"]
                    # If we added context, extract just the term from the result
                    if needs_extraction:
                        translated_text = self._extract_term_from_translation(
                            translated_text, text, target_code
                        )
                    logger.debug(
                        f"[Translator] DeepL API response success | Original text: '{text}' | Target language: {target_code} | "
                        f"Translation result: '{translated_text}'"
                    )
                    return translated_text
            else:
                logger.error(
                    f"[Translator] DeepL API error | Original text: '{text}' | Target language: {target_code} | "
                    f"Status code: {response.status_code} | Error message: {response.text}"
                )
                return None

        except requests.Timeout:
            logger.warning(
                f"[Translator] DeepL API request timeout | Original text: '{text}' | Target language: {target_code} | "
                f"Timeout: {self.timeout}s"
            )
            return None
        except Exception as e:
            logger.error(
                f"[Translator] DeepL API request exception | Original text: '{text}' | Target language: {target_code} | "
                f"Error: {e}", exc_info=True
            )
            return None

    # NOTE: _translate_deepl_free is intentionally not implemented.
    # We do not support automatic fallback to the free endpoint, to avoid
    # mixing Pro keys with https://api-free.deepl.com and related 403 errors.

    def translate_multi(
        self,
        text: str,
        target_langs: List[str],
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        async_mode: bool = True,
        prompt: Optional[str] = None
    ) -> Dict[str, Optional[str]]:
        """
        Translate text to multiple target languages.
        
        In async_mode=True (default):
        - Returns cached translations immediately if available
        - For translations that can be optimized (e.g., pure numbers, already in target language),
          returns result immediately via synchronous call
        - Launches async tasks for other missing translations (non-blocking)
        - Returns None for missing translations that require async processing
        
        In async_mode=False:
        - Waits for all translations to complete (blocking)

        Args:
            text: Text to translate
            target_langs: List of target language codes
            source_lang: Source language code (optional)
            context: Context hint for translation (optional)
            async_mode: If True, return cached results immediately and translate missing ones async
            prompt: Translation prompt/instruction (optional)

        Returns:
            Dictionary mapping language code to translated text (only cached results in async mode)
        """
        results = {}
        missing_langs = []
        async_langs = []
        
        # First, get cached translations
        for lang in target_langs:
            cached = self._get_cached_translation(text, lang, source_lang, context, prompt)
            if cached is not None:
                results[lang] = cached
            else:
                missing_langs.append(lang)
        
        # If async mode and there are missing translations
        if async_mode and missing_langs:
            # Check if translation can be optimized (immediate return)
            for lang in missing_langs:
                target_lang = lang.lower()
                # Check optimization conditions (same as in translate method)
                can_optimize = False
                if target_lang == 'en' and self._is_english_text(text):
                    can_optimize = True
                elif target_lang == 'zh' and (self._contains_chinese(text) or self._is_pure_number(text)):
                    can_optimize = True
                
                if can_optimize:
                    # Can be optimized, call translate synchronously for immediate result
                    results[lang] = self.translate(text, lang, source_lang, context, prompt)
                else:
                    # Requires actual translation, add to async list
                    async_langs.append(lang)
            
            # Launch async tasks for translations that require actual API calls
            if async_langs:
                for lang in async_langs:
                    self._translate_async(text, lang, source_lang, context, prompt)
                # Return None for async translations
                for lang in async_langs:
                    results[lang] = None
        else:
            # Synchronous mode: wait for all translations
            for lang in missing_langs:
                results[lang] = self.translate(text, lang, source_lang, context, prompt)
        
        return results
    
    def translate_multi_async(
        self,
        text: str,
        target_langs: List[str],
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> Dict[str, Union[str, Future]]:
        """
        Translate text to multiple target languages asynchronously, returning Futures that can be awaited.
        
        This method returns a dictionary where:
        - If translation is cached, the value is the translation string (immediate)
        - If translation needs to be done, the value is a Future object that can be awaited
        
        Args:
            text: Text to translate
            target_langs: List of target language codes
            source_lang: Source language code (optional)
            context: Context hint for translation (optional)
            prompt: Translation prompt/instruction (optional)

        Returns:
            Dictionary mapping language code to either translation string (cached) or Future object
        """
        results = {}
        missing_langs = []
        
        # First, get cached translations
        for lang in target_langs:
            cached = self._get_cached_translation(text, lang, source_lang, context, prompt)
            if cached is not None:
                results[lang] = cached
            else:
                missing_langs.append(lang)
        
        # For missing translations, submit async tasks and return Futures
        for lang in missing_langs:
            future = self.executor.submit(
                self.translate,
                text,
                lang,
                source_lang,
                context,
                prompt
            )
            results[lang] = future
        
        return results
    
    def _get_cached_translation(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> Optional[str]:
        """Get translation from cache if available."""
        if not self.redis_client:
            return None
        return self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt)
    
    def _get_cached_translation_redis(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> Optional[str]:
        """
        Get translation from Redis cache with sliding expiration.
        
        滑动过期机制：每次访问缓存时，重置过期时间为配置的过期时间（默认720天）。
        这样缓存会在最后一次访问后的720天才过期，而不是写入后的720天。
        这确保了常用的翻译缓存不会被过早删除。
        """
        if not self.redis_client:
            return None
        
        try:
            # Build cache key: prefix:target_lang:text
            # For simplicity, we use target_lang and text as key
            # Context and prompt are not included in key to maximize cache hits
            cache_key = f"{self.cache_prefix}:{target_lang.upper()}:{text}"
            value = self.redis_client.get(cache_key)
            if value:
                # Sliding expiration: reset expiration time on access
                # 每次读取缓存时，重置过期时间为配置的过期时间（最后一次访问后的N天才过期）
                try:
                    self.redis_client.expire(cache_key, self.expire_seconds)
                except Exception as expire_error:
                    # 即使 expire 失败，也返回缓存值（不影响功能）
                    logger.warning(
                        f"[Translator] Failed to update cache expiration for key {cache_key}: {expire_error}"
                    )
                
                logger.debug(
                    f"[Translator] Redis cache hit | Original text: '{text}' | Target language: {target_lang} | "
                    f"Cache key: {cache_key} | Translation result: '{value}' | TTL reset to {self.expire_seconds}s"
                )
                return value
            logger.debug(
                f"[Translator] Redis cache miss | Original text: '{text}' | Target language: {target_lang} | "
                f"Cache key: {cache_key}"
            )
            return None
        except Exception as e:
            logger.error(f"[Translator] Redis error during get translation cache | Original text: '{text}' | Target language: {target_lang} | Error: {e}")
            return None
    
    def _set_cached_translation_redis(
        self,
        text: str,
        target_lang: str,
        translation: str,
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> None:
        """Store translation in Redis cache."""
        if not self.redis_client:
            return
        
        try:
            cache_key = f"{self.cache_prefix}:{target_lang.upper()}:{text}"
            self.redis_client.setex(cache_key, self.expire_seconds, translation)
            logger.info(
                f"[Translator] Redis cache write | Original text: '{text}' | Target language: {target_lang} | "
                f"Cache key: {cache_key} | Translation result: '{translation}'"
            )
        except Exception as e:
            logger.error(
                f"[Translator] Redis cache write failed | Original text: '{text}' | Target language: {target_lang} | "
                f"Error: {e}"
            )
    
    def _translate_async(
        self,
        text: str,
        target_lang: str,
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None
    ):
        """Launch async translation task."""
        def _do_translate():
            try:
                result = self.translate(text, target_lang, source_lang, context, prompt)
                if result:
                    logger.debug(f"Async translation completed: {text} -> {target_lang}: {result}")
            except Exception as e:
                logger.warning(f"Async translation failed: {text} -> {target_lang}: {e}")
        
        self.executor.submit(_do_translate)

    def _add_ecommerce_context(
        self,
        text: str,
        source_lang: Optional[str],
        context: Optional[str]
    ) -> tuple:
        """
        Add e-commerce context to text for better disambiguation.
        
        For single-word ambiguous Chinese terms, we add context words that help
        DeepL understand this is an e-commerce/product search context.
        
        Args:
            text: Original text to translate
            source_lang: Source language code
            context: Context hint
            
        Returns:
            Tuple of (text_with_context, needs_extraction)
            - text_with_context: Text to send to DeepL
            - needs_extraction: Whether we need to extract the term from the result
        """
        # Only apply for e-commerce context and Chinese source
        if not context or "e-commerce" not in context.lower():
            return text, False
            
        if not source_lang or source_lang.lower() != 'zh':
            return text, False
            
        # For single-word queries, add context to help disambiguation
        text_stripped = text.strip()
        if len(text_stripped.split()) == 1 and len(text_stripped) <= 2:
            # Common ambiguous Chinese e-commerce terms like "车" (car vs rook)
            # We add a context phrase: "购买 [term]" (buy [term]) or "商品 [term]" (product [term])
            # This helps DeepL understand the e-commerce context
            # We'll need to extract just the term from the translation result
            context_phrase = f"购买 {text_stripped}"
            return context_phrase, True
        
        # For multi-word queries, DeepL usually has enough context
        return text, False

    def _extract_term_from_translation(
        self,
        translated_text: str,
        original_text: str,
        target_lang_code: str
    ) -> str:
        """
        Extract the actual term from a translation that included context.
        
        For example, if we translated "购买 车" (buy car) and got "buy car",
        we want to extract just "car".
        
        Args:
            translated_text: Full translation result
            original_text: Original single-word query
            target_lang_code: Target language code (EN, ZH, etc.)
            
        Returns:
            Extracted term or original translation if extraction fails
        """
        # For English target, try to extract the last word (the actual term)
        if target_lang_code == "EN":
            words = translated_text.strip().split()
            if len(words) > 1:
                # Usually the last word is the term we want
                # But we need to be smart - if it's "buy car", we want "car"
                # Common context words to skip: buy, purchase, product, item, etc.
                context_words = {"buy", "purchase", "product", "item", "commodity", "goods"}
                # Try to find the term (not a context word)
                for word in reversed(words):
                    word_lower = word.lower().rstrip('.,!?;:')
                    if word_lower not in context_words:
                        return word_lower
                # If all words are context words, return the last one
                return words[-1].lower().rstrip('.,!?;:')
        
        # For other languages or if extraction fails, return as-is
        # The user can configure a glossary for better results
        return translated_text

    def _shop_lang_matches(self, shop_lang_lower: str, lang_code: str) -> bool:
        """True if shop language matches index language (use source, no translate)."""
        if not shop_lang_lower or not lang_code:
            return False
        if shop_lang_lower == lang_code:
            return True
        if lang_code == "zh" and "zh" in shop_lang_lower:
            return True
        if lang_code == "en" and "en" in shop_lang_lower:
            return True
        return False

    def translate_for_indexing(
        self,
        text: str,
        shop_language: str,
        source_lang: Optional[str] = None,
        context: Optional[str] = None,
        prompt: Optional[str] = None,
        index_languages: Optional[List[str]] = None,
    ) -> Dict[str, Optional[str]]:
        """
        Translate text for indexing based on shop language and tenant index_languages.

        For each language in index_languages: use source text if shop language matches,
        otherwise translate to that language.

        Args:
            text: Text to translate
            shop_language: Shop primary language (e.g. 'zh', 'en', 'ru')
            source_lang: Source language code (optional)
            context: Additional context for translation (optional)
            prompt: Translation prompt (optional)
            index_languages: Languages to index (from tenant_config). Default ["en", "zh"].

        Returns:
            Dict keyed by each index_language with translated or source text (or None).
        """
        langs = index_languages if index_languages else ["en", "zh"]
        results = {lang: None for lang in langs}
        if not text or not text.strip():
            return results
        if re.match(r'^[\d\s_-]+$', text):
            logger.info(f"[Translator] Skip translation for symbol-only query: '{text}'")
            return results

        shop_lang_lower = (shop_language or "").strip().lower()
        targets = []
        for lang in langs:
            if self._shop_lang_matches(shop_lang_lower, lang):
                results[lang] = text
            else:
                targets.append(lang)

        for target_lang in targets:
            cached = self._get_cached_translation_redis(text, target_lang, source_lang, context, prompt)
            if cached:
                results[target_lang] = cached
                logger.debug(f"[Translator] Cache hit for indexing: '{text}' -> {target_lang}: {cached}")
                continue
            translated = self.translate(
                text,
                target_lang=target_lang,
                source_lang=source_lang or shop_language,
                context=context,
                prompt=prompt,
            )
            results[target_lang] = translated
        return results

    def get_translation_needs(
        self,
        detected_lang: str,
        supported_langs: List[str]
    ) -> List[str]:
        """
        Determine which languages need translation.

        Args:
            detected_lang: Detected query language
            supported_langs: List of supported languages

        Returns:
            List of language codes to translate to
        """
        # If detected language is in supported list, translate to others
        if detected_lang in supported_langs:
            return [lang for lang in supported_langs if detected_lang != lang]

        # Otherwise, translate to all supported languages
        return supported_langs
    
    def _is_english_text(self, text: str) -> bool:
        """
        Check if text is primarily English (ASCII letters, numbers, common punctuation).
        
        Args:
            text: Text to check
            
        Returns:
            True if text appears to be English
        """
        if not text or not text.strip():
            return True
        
        # Remove whitespace and common punctuation
        text_clean = re.sub(r'[\s\.,!?;:\-\'\"\(\)\[\]{}]', '', text)
        if not text_clean:
            return True
        
        # Check if all remaining characters are ASCII (letters, numbers)
        # This is a simple heuristic: if most characters are ASCII, it's likely English
        ascii_count = sum(1 for c in text_clean if ord(c) < 128)
        ratio = ascii_count / len(text_clean) if text_clean else 0
        
        # If more than 80% are ASCII characters, consider it English
        return ratio > 0.8
    
    def _contains_chinese(self, text: str) -> bool:
        """
        Check if text contains Chinese characters (Han characters).
        
        Args:
            text: Text to check
            
        Returns:
            True if text contains Chinese characters
        """
        if not text:
            return False
        
        # Check for Chinese characters (Unicode range: \u4e00-\u9fff)
        chinese_pattern = re.compile(r'[\u4e00-\u9fff]')
        return bool(chinese_pattern.search(text))
    
    def _is_pure_number(self, text: str) -> bool:
        """
        Check if text is purely numeric (digits, possibly with spaces, dots, commas).
        
        Args:
            text: Text to check
            
        Returns:
            True if text is purely numeric
        """
        if not text or not text.strip():
            return False
        
        # Remove whitespace, dots, commas (common number separators)
        text_clean = re.sub(r'[\s\.,]', '', text.strip())
        if not text_clean:
            return False
        
        # Check if all remaining characters are digits
        return text_clean.isdigit()