Blame view

translation/backends/deepl.py 7.03 KB
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
1
2
3
4
5
  """DeepL translation backend."""
  
  from __future__ import annotations
  
  import logging
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
6
  import re
d6c29734   tangwang   translation optim...
7
  import time
0fd2f875   tangwang   translate
8
  from typing import List, Optional, Sequence, Tuple, Union
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
9
10
  
  import requests
d6c29734   tangwang   translation optim...
11
  from requests.adapters import HTTPAdapter
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
12
  
0fd2f875   tangwang   translate
13
14
  from translation.languages import DEEPL_LANGUAGE_CODES
  from translation.scenes import SCENE_DEEPL_CONTEXTS, normalize_scene_name
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
15
16
17
  
  logger = logging.getLogger(__name__)
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
18
19
  
  class DeepLTranslationBackend:
d6c29734   tangwang   translation optim...
20
21
22
23
      _CONNECT_TIMEOUT_SEC = 2.0
      _MAX_RETRIES = 2
      _RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
24
25
26
27
      def __init__(
          self,
          api_key: Optional[str],
          *,
0fd2f875   tangwang   translate
28
29
          api_url: str,
          timeout: float,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
30
31
          glossary_id: Optional[str] = None,
      ) -> None:
86d8358b   tangwang   config optimize
32
          self.api_key = api_key
0fd2f875   tangwang   translate
33
34
35
          self.api_url = api_url
          self.timeout = float(timeout)
          self.glossary_id = glossary_id
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
36
          self.model = "deepl"
d6c29734   tangwang   translation optim...
37
38
39
          self._session = requests.Session()
          self._session.mount("http://", HTTPAdapter(pool_connections=32, pool_maxsize=32, max_retries=0))
          self._session.mount("https://", HTTPAdapter(pool_connections=32, pool_maxsize=32, max_retries=0))
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
40
41
42
43
44
45
46
47
48
49
          if not self.api_key:
              logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable")
  
      @property
      def supports_batch(self) -> bool:
          return False
  
      def _resolve_request_context(
          self,
          target_lang: str,
0fd2f875   tangwang   translate
50
          scene: Optional[str],
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
51
      ) -> Optional[str]:
0fd2f875   tangwang   translate
52
53
54
55
56
          if scene is None:
              raise ValueError("deepl translation scene is required")
          normalized_scene = normalize_scene_name(scene)
          scene_map = SCENE_DEEPL_CONTEXTS[normalized_scene]
          tgt = str(target_lang or "").strip().lower()
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
57
58
          return scene_map.get(tgt) or scene_map.get("en")
  
d6c29734   tangwang   translation optim...
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
      def _build_headers(self) -> dict:
          return {
              "Authorization": f"DeepL-Auth-Key {self.api_key}",
              "Content-Type": "application/json",
          }
  
      def _build_payload(
          self,
          *,
          texts: List[str],
          target_code: str,
          source_lang: Optional[str],
          api_context: Optional[str],
      ) -> dict:
          payload: dict = {"text": texts, "target_lang": target_code}
          if source_lang:
              payload["source_lang"] = DEEPL_LANGUAGE_CODES.get(source_lang.lower(), source_lang.upper())
          if api_context:
              payload["context"] = api_context
          if self.glossary_id:
              payload["glossary_id"] = self.glossary_id
          return payload
  
      def _post_with_retries(self, *, headers: dict, payload: dict, target_code: str) -> Optional[dict]:
          for attempt in range(self._MAX_RETRIES + 1):
              try:
                  response = self._session.post(
                      self.api_url,
                      headers=headers,
                      json=payload,
                      timeout=(self._CONNECT_TIMEOUT_SEC, self.timeout),
                  )
              except requests.Timeout:
                  logger.warning("[deepl] Timeout | tgt=%s timeout=%.1fs", target_code, self.timeout)
                  return None
              except Exception as exc:
                  logger.warning("[deepl] Exception | tgt=%s error=%s", target_code, exc, exc_info=True)
                  return None
  
              if response.status_code == 200:
                  try:
                      return response.json()
                  except Exception as exc:
                      logger.warning("[deepl] Bad JSON | tgt=%s error=%s body=%s", target_code, exc, (response.text or "")[:200])
                      return None
  
              retryable = response.status_code in self._RETRYABLE_STATUS_CODES
              logger.warning(
                  "[deepl] Failed | status=%s tgt=%s attempt=%s/%s body=%s",
                  response.status_code,
                  target_code,
                  attempt + 1,
                  self._MAX_RETRIES + 1,
                  (response.text or "")[:200],
              )
              if (not retryable) or attempt >= self._MAX_RETRIES:
                  return None
              sleep_s = min(1.0, 0.1 * (2 ** attempt))
              time.sleep(sleep_s)
  
          return None
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
121
122
123
124
125
      def translate(
          self,
          text: Union[str, Sequence[str]],
          target_lang: str,
          source_lang: Optional[str] = None,
0fd2f875   tangwang   translate
126
          scene: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
127
128
129
130
131
132
133
134
135
136
137
      ) -> Union[Optional[str], List[Optional[str]]]:
          if isinstance(text, (list, tuple)):
              results: List[Optional[str]] = []
              for item in text:
                  if item is None or not str(item).strip():
                      results.append(item)  # type: ignore[arg-type]
                      continue
                  out = self.translate(
                      text=str(item),
                      target_lang=target_lang,
                      source_lang=source_lang,
0fd2f875   tangwang   translate
138
                      scene=scene,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
139
140
141
142
143
144
145
                  )
                  results.append(out)
              return results
  
          if not self.api_key:
              return None
  
0fd2f875   tangwang   translate
146
          target_code = DEEPL_LANGUAGE_CODES.get((target_lang or "").lower(), (target_lang or "").upper())
0fd2f875   tangwang   translate
147
          api_context = self._resolve_request_context(target_lang, scene)
d6c29734   tangwang   translation optim...
148
149
150
151
152
153
154
155
156
157
158
159
160
          headers = self._build_headers()
  
          text_to_translate, needs_extraction = self._add_ecommerce_context(str(text), source_lang, api_context)
  
          payload = self._build_payload(
              texts=[text_to_translate],
              target_code=target_code,
              source_lang=source_lang,
              api_context=api_context,
          )
          data = self._post_with_retries(headers=headers, payload=payload, target_code=target_code)
          translations = (data or {}).get("translations") or []
          if not translations:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
161
              return None
d6c29734   tangwang   translation optim...
162
163
          translated = translations[0].get("text") if isinstance(translations[0], dict) else None
          if not translated:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
164
              return None
d6c29734   tangwang   translation optim...
165
166
167
          if needs_extraction:
              translated = self._extract_term_from_translation(translated, text, target_code)
          return translated
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
168
169
170
171
172
  
      def _add_ecommerce_context(
          self,
          text: str,
          source_lang: Optional[str],
0fd2f875   tangwang   translate
173
          scene: Optional[str],
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
174
      ) -> Tuple[str, bool]:
0fd2f875   tangwang   translate
175
          if not scene or "e-commerce" not in scene.lower():
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
              return text, False
          if (source_lang or "").lower() != "zh":
              return text, False
  
          term = (text or "").strip()
          if len(term.split()) == 1 and len(term) <= 2:
              return f"购买 {term}", True
          return text, False
  
      def _extract_term_from_translation(
          self,
          translated_text: str,
          original_text: str,
          target_lang_code: str,
      ) -> str:
          del original_text
          if target_lang_code != "EN":
              return translated_text
  
          words = translated_text.strip().split()
          if len(words) <= 1:
              return translated_text
          context_words = {"buy", "purchase", "product", "item", "commodity", "goods"}
          for word in reversed(words):
              normalized = re.sub(r"[.,!?;:]+$", "", word.lower())
              if normalized not in context_words:
                  return normalized
          return re.sub(r"[.,!?;:]+$", "", words[-1].lower())