Blame view

providers/translation.py 5.67 KB
42e3aea6   tangwang   tidy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  """
  Translation provider - direct (in-process) or HTTP service.
  """
  
  from __future__ import annotations
  
  import logging
  from typing import Any, Dict, List, Optional, Union
  
  from concurrent.futures import Future, ThreadPoolExecutor
  import requests
  
  from config.services_config import get_translation_config, get_translation_base_url
  
  logger = logging.getLogger(__name__)
  
  
  class HttpTranslationProvider:
      """Translation via HTTP service."""
  
      def __init__(
          self,
          base_url: str,
          model: str = "qwen",
          timeout_sec: float = 10.0,
          translation_context: Optional[str] = None,
      ):
          self.base_url = (base_url or "").rstrip("/")
          self.model = model or "qwen"
          self.timeout_sec = float(timeout_sec or 10.0)
          self.translation_context = translation_context or "e-commerce product search"
          self.executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="http-translator")
  
      def _translate_once(
          self,
          text: str,
          target_lang: str,
          source_lang: Optional[str] = None,
      ) -> Optional[str]:
          if not text or not str(text).strip():
              return text
          try:
              url = f"{self.base_url}/translate"
              payload = {
                  "text": text,
                  "target_lang": target_lang,
                  "source_lang": source_lang or "auto",
                  "model": self.model,
              }
              response = requests.post(url, json=payload, timeout=self.timeout_sec)
              if response.status_code != 200:
                  logger.warning(
                      "HTTP translator failed: status=%s body=%s",
                      response.status_code,
                      (response.text or "")[:200],
                  )
                  return None
              data = response.json()
              translated = data.get("translated_text")
              return translated if translated is not None else None
          except Exception as exc:
              logger.warning("HTTP translator request failed: %s", exc, exc_info=True)
              return None
  
      def translate(
          self,
          text: str,
          target_lang: str,
          source_lang: Optional[str] = None,
          context: Optional[str] = None,
          prompt: Optional[str] = None,
      ) -> Optional[str]:
          del context, prompt
          result = self._translate_once(text=text, target_lang=target_lang, source_lang=source_lang)
          return result if result is not None else text
  
      def translate_multi(
          self,
          text: str,
          target_langs: List[str],
          source_lang: Optional[str] = None,
          context: Optional[str] = None,
          async_mode: bool = True,
          prompt: Optional[str] = None,
      ) -> Dict[str, Optional[str]]:
          del context, async_mode, prompt
          out: Dict[str, Optional[str]] = {}
          for lang in target_langs:
              out[lang] = self.translate(text, lang, source_lang=source_lang)
          return out
  
      def translate_multi_async(
          self,
          text: str,
          target_langs: List[str],
          source_lang: Optional[str] = None,
          context: Optional[str] = None,
          prompt: Optional[str] = None,
      ) -> Dict[str, Union[str, Future]]:
          del context, prompt
          out: Dict[str, Union[str, Future]] = {}
          for lang in target_langs:
              out[lang] = self.executor.submit(self.translate, text, lang, source_lang)
          return out
  
      def translate_for_indexing(
          self,
          text: str,
          shop_language: str,
          source_lang: Optional[str] = None,
          context: Optional[str] = None,
          prompt: Optional[str] = None,
          index_languages: Optional[List[str]] = None,
      ) -> Dict[str, Optional[str]]:
          del context, prompt
          langs = index_languages if index_languages else ["en", "zh"]
          source = source_lang or shop_language or "auto"
          out: Dict[str, Optional[str]] = {}
          for lang in langs:
              if lang == shop_language:
                  out[lang] = text
              else:
                  out[lang] = self.translate(text, target_lang=lang, source_lang=source)
          return out
  
  
  def create_translation_provider(query_config: Any = None) -> Any:
      """
      Create translation provider from services config.
  
      query_config: optional, for api_key/glossary_id/context (used by direct provider).
      """
      cfg = get_translation_config()
      provider = cfg.provider
      pc = cfg.get_provider_cfg()
  
      if provider in ("direct", "local", "inprocess"):
          from query.translator import Translator
          model = pc.get("model") or "qwen"
          qc = query_config or _empty_query_config()
          return Translator(
              model=model,
              api_key=getattr(qc, "translation_api_key", None),
              use_cache=True,
              glossary_id=getattr(qc, "translation_glossary_id", None),
              translation_context=getattr(qc, "translation_context", "e-commerce product search"),
          )
  
      if provider in ("http", "service"):
          base_url = get_translation_base_url()
          model = pc.get("model") or "qwen"
          timeout = pc.get("timeout_sec", 10.0)
          qc = query_config or _empty_query_config()
          return HttpTranslationProvider(
              base_url=base_url,
              model=model,
              timeout_sec=float(timeout),
              translation_context=getattr(qc, "translation_context", "e-commerce product search"),
          )
  
ed948666   tangwang   tidy
161
      raise ValueError(f"Unsupported translation provider: {provider}")
42e3aea6   tangwang   tidy
162
163
164
165
166
167
168
169
170
  
  
  def _empty_query_config() -> Any:
      """Minimal object with default translation attrs."""
      class _QC:
          translation_api_key = None
          translation_glossary_id = None
          translation_context = "e-commerce product search"
      return _QC()