Blame view

providers/translation.py 6.13 KB
d4cadc13   tangwang   翻译重构
1
  """Translation provider factory and HTTP provider implementation."""
a0a173ae   tangwang   last
2
3
4
  from __future__ import annotations
  
  import logging
6f7840cf   tangwang   refactor: rename ...
5
  from typing import Any, Dict, List, Optional, Sequence, Union
a0a173ae   tangwang   last
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  import requests
  
  from config.services_config import get_translation_config, get_translation_base_url
  
  logger = logging.getLogger(__name__)
  
  
  class HttpTranslationProvider:
      """Translation via HTTP service."""
  
      def __init__(
          self,
          base_url: str,
          model: str = "qwen",
          timeout_sec: float = 10.0,
a0a173ae   tangwang   last
21
22
23
24
      ):
          self.base_url = (base_url or "").rstrip("/")
          self.model = model or "qwen"
          self.timeout_sec = float(timeout_sec or 10.0)
a0a173ae   tangwang   last
25
  
6f7840cf   tangwang   refactor: rename ...
26
27
28
29
30
31
32
33
34
35
      @property
      def supports_batch(self) -> bool:
          """
          Whether this provider supports list input natively.
  
          当前实现中,我们已经在 `_translate_once` 内处理了 list
          所以可以直接视为支持 batch
          """
          return True
  
a0a173ae   tangwang   last
36
37
      def _translate_once(
          self,
6f7840cf   tangwang   refactor: rename ...
38
          text: Union[str, Sequence[str]],
a0a173ae   tangwang   last
39
40
          target_lang: str,
          source_lang: Optional[str] = None,
d4cadc13   tangwang   翻译重构
41
42
          context: Optional[str] = None,
          prompt: Optional[str] = None,
6f7840cf   tangwang   refactor: rename ...
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
      ) -> Union[Optional[str], List[Optional[str]]]:
          # 允许 text 为单个字符串或字符串列表
          if isinstance(text, (list, tuple)):
              # 上游约定:列表输入时,输出列表一一对应;失败位置为 None
              results: List[Optional[str]] = []
              for item in text:
                  if item is None or not str(item).strip():
                      # 空字符串/None 不视为失败,原样返回以保持语义
                      results.append(item)  # type: ignore[arg-type]
                      continue
                  try:
                      single = self._translate_once(
                          text=str(item),
                          target_lang=target_lang,
                          source_lang=source_lang,
                          context=context,
                          prompt=prompt,
                      )
                      results.append(single)  # type: ignore[arg-type]
                  except Exception:
                      # 理论上不会进入,因为内部已捕获;兜底保持长度一致
                      results.append(None)
              return results
  
a0a173ae   tangwang   last
67
          if not text or not str(text).strip():
6f7840cf   tangwang   refactor: rename ...
68
              return text  # type: ignore[return-value]
a0a173ae   tangwang   last
69
70
71
72
73
74
75
76
          try:
              url = f"{self.base_url}/translate"
              payload = {
                  "text": text,
                  "target_lang": target_lang,
                  "source_lang": source_lang or "auto",
                  "model": self.model,
              }
d4cadc13   tangwang   翻译重构
77
78
79
80
              if context:
                  payload["context"] = context
              if prompt:
                  payload["prompt"] = prompt
a0a173ae   tangwang   last
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
              response = requests.post(url, json=payload, timeout=self.timeout_sec)
              if response.status_code != 200:
                  logger.warning(
                      "HTTP translator failed: status=%s body=%s",
                      response.status_code,
                      (response.text or "")[:200],
                  )
                  return None
              data = response.json()
              translated = data.get("translated_text")
              return translated if translated is not None else None
          except Exception as exc:
              logger.warning("HTTP translator request failed: %s", exc, exc_info=True)
              return None
  
      def translate(
          self,
6f7840cf   tangwang   refactor: rename ...
98
          text: Union[str, Sequence[str]],
a0a173ae   tangwang   last
99
100
101
102
          target_lang: str,
          source_lang: Optional[str] = None,
          context: Optional[str] = None,
          prompt: Optional[str] = None,
6f7840cf   tangwang   refactor: rename ...
103
      ) -> Union[Optional[str], List[Optional[str]]]:
d4cadc13   tangwang   翻译重构
104
105
106
107
108
109
110
          return self._translate_once(
              text=text,
              target_lang=target_lang,
              source_lang=source_lang,
              context=context,
              prompt=prompt,
          )
a0a173ae   tangwang   last
111
112
113
114
115
116
117
118
119
120
121
122
  
  
  def create_translation_provider(query_config: Any = None) -> Any:
      """
      Create translation provider from services config.
  
      query_config: optional, for api_key/glossary_id/context (used by direct provider).
      """
      cfg = get_translation_config()
      provider = cfg.provider
      pc = cfg.get_provider_cfg()
  
d4cadc13   tangwang   翻译重构
123
      if provider in ("qwen-mt", "direct", "local", "inprocess"):
a0a173ae   tangwang   last
124
          from query.qwen_mt_translate import Translator
d4cadc13   tangwang   翻译重构
125
          model = pc.get("model") or "qwen-mt-flash"
a0a173ae   tangwang   last
126
127
128
129
130
131
132
133
134
          qc = query_config or _empty_query_config()
          return Translator(
              model=model,
              api_key=getattr(qc, "translation_api_key", None),
              use_cache=True,
              glossary_id=getattr(qc, "translation_glossary_id", None),
              translation_context=getattr(qc, "translation_context", "e-commerce product search"),
          )
  
d4cadc13   tangwang   翻译重构
135
      elif provider in ("http", "service"):
a0a173ae   tangwang   last
136
137
138
139
140
141
142
143
          base_url = get_translation_base_url()
          model = pc.get("model") or "qwen"
          timeout = pc.get("timeout_sec", 10.0)
          qc = query_config or _empty_query_config()
          return HttpTranslationProvider(
              base_url=base_url,
              model=model,
              timeout_sec=float(timeout),
d4cadc13   tangwang   翻译重构
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
          )
  
      elif provider == "llm":
          from query.llm_translate import LLMTranslatorProvider
          model = pc.get("model")
          timeout = float(pc.get("timeout_sec", 30.0))
          base_url = (pc.get("base_url") or "").strip() or None
          return LLMTranslatorProvider(
              model=model,
              timeout_sec=timeout,
              base_url=base_url,
          )
  
      elif provider == "deepl":
          from query.deepl_provider import DeepLProvider
          qc = query_config or _empty_query_config()
          return DeepLProvider(
              api_key=getattr(qc, "translation_api_key", None),
              timeout=float(pc.get("timeout_sec", 10.0)),
              glossary_id=pc.get("glossary_id") or getattr(qc, "translation_glossary_id", None),
a0a173ae   tangwang   last
164
165
166
167
168
169
170
171
172
173
174
175
          )
  
      raise ValueError(f"Unsupported translation provider: {provider}")
  
  
  def _empty_query_config() -> Any:
      """Minimal object with default translation attrs."""
      class _QC:
          translation_api_key = None
          translation_glossary_id = None
          translation_context = "e-commerce product search"
      return _QC()