""" LLM-based translation backend (DashScope-compatible OpenAI API). Failure semantics are strict: - success: translated string - failure: None """ from __future__ import annotations import logging import os import time from typing import Optional from openai import OpenAI from config.env_config import DASHSCOPE_API_KEY from config.services_config import get_translation_config from config.translate_prompts import TRANSLATION_PROMPTS, SOURCE_LANG_CODE_MAP logger = logging.getLogger(__name__) DEFAULT_QWEN_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" DEFAULT_LLM_MODEL = "qwen-flash" def _build_prompt( text: str, *, source_lang: Optional[str], target_lang: str, scene: Optional[str], ) -> str: """ 从 config.translate_prompts.TRANSLATION_PROMPTS 中构建提示词。 要求:模板必须包含 {source_lang}({src_lang_code}){target_lang}({tgt_lang_code})。 这里统一使用 code 作为占位的 lang 与 label,外部接口仍然只传语言 code。 """ tgt = (target_lang or "").lower() or "en" src = (source_lang or "auto").lower() # 将业务上下文 scene 映射为模板分组名 normalized_scene = (scene or "").strip() or "general" # 如果出现历史词,则报错,用于发现错误 if normalized_scene in {"query", "ecommerce_search", "ecommerce_search_query"}: group_key = "ecommerce_search_query" elif normalized_scene in {"product_title", "sku_name"}: group_key = "sku_name" else: group_key = normalized_scene group = TRANSLATION_PROMPTS.get(group_key) or TRANSLATION_PROMPTS["general"] # 先按目标语言 code 取模板,取不到回退到英文 template = group.get(tgt) or group.get("en") if not template: # 理论上不会发生,兜底一个简单模板 template = ( "You are a professional {source_lang} ({src_lang_code}) to " "{target_lang} ({tgt_lang_code}) translator, output only the translation: {text}" ) # 目前不额外维护语言名称映射,直接使用 code 作为 label source_lang_label = SOURCE_LANG_CODE_MAP.get(src, src) target_lang_label = SOURCE_LANG_CODE_MAP.get(tgt, tgt) return template.format( source_lang=source_lang_label, src_lang_code=src, target_lang=target_lang_label, tgt_lang_code=tgt, text=text, ) class LLMTranslatorProvider: def __init__( self, *, model: Optional[str] = None, timeout_sec: float = 30.0, base_url: Optional[str] = None, ) -> None: cfg = get_translation_config() llm_cfg = cfg.providers.get("llm", {}) if isinstance(cfg.providers, dict) else {} self.model = model or llm_cfg.get("model") or DEFAULT_LLM_MODEL self.timeout_sec = float(llm_cfg.get("timeout_sec") or timeout_sec or 30.0) self.base_url = ( (base_url or "").strip() or (llm_cfg.get("base_url") or "").strip() or os.getenv("DASHSCOPE_BASE_URL") or DEFAULT_QWEN_BASE_URL ) self.client = self._create_client() def _create_client(self) -> Optional[OpenAI]: api_key = DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY") if not api_key: logger.warning("DASHSCOPE_API_KEY not set; llm translation unavailable") return None try: return OpenAI(api_key=api_key, base_url=self.base_url) except Exception as exc: logger.error("Failed to initialize llm translation client: %s", exc, exc_info=True) return None def translate( self, text: str, target_lang: str, source_lang: Optional[str] = None, context: Optional[str] = None, prompt: Optional[str] = None, ) -> Optional[str]: if not text or not str(text).strip(): return text if not self.client: return None tgt = (target_lang or "").lower() or "en" src = (source_lang or "auto").lower() scene = context or "default" user_prompt = prompt or _build_prompt( text=text, source_lang=src, target_lang=tgt, scene=scene, ) start = time.time() try: logger.info( "[llm] Request | src=%s tgt=%s model=%s prompt=%s", src, tgt, self.model, user_prompt, ) completion = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": user_prompt}], timeout=self.timeout_sec, ) content = (completion.choices[0].message.content or "").strip() latency_ms = (time.time() - start) * 1000 if not content: logger.warning("[llm] Empty result | src=%s tgt=%s latency=%.1fms", src, tgt, latency_ms) return None logger.info("[llm] Success | src=%s tgt=%s src_text=%s response=%s latency=%.1fms", src, tgt, text, content, latency_ms) return content except Exception as exc: latency_ms = (time.time() - start) * 1000 logger.warning( "[llm] Failed | src=%s tgt=%s latency=%.1fms error=%s", src, tgt, latency_ms, exc, exc_info=True, ) return None def llm_translate( text: str, target_lang: str, *, source_lang: Optional[str] = None, source_lang_label: Optional[str] = None, target_lang_label: Optional[str] = None, timeout_sec: Optional[float] = None, ) -> Optional[str]: provider = LLMTranslatorProvider(timeout_sec=timeout_sec or 30.0) return provider.translate( text=text, target_lang=target_lang, source_lang=source_lang, context=None, ) __all__ = ["LLMTranslatorProvider", "llm_translate"]