deepl.py
7.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
"""DeepL translation backend."""
from __future__ import annotations
import logging
import re
import time
from typing import List, Optional, Sequence, Tuple, Union
import requests
from requests.adapters import HTTPAdapter
from translation.languages import DEEPL_LANGUAGE_CODES
from translation.scenes import SCENE_DEEPL_CONTEXTS, normalize_scene_name
logger = logging.getLogger(__name__)
class DeepLTranslationBackend:
_CONNECT_TIMEOUT_SEC = 2.0
_MAX_RETRIES = 2
_RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
def __init__(
self,
api_key: Optional[str],
*,
api_url: str,
timeout: float,
glossary_id: Optional[str] = None,
) -> None:
self.api_key = api_key
self.api_url = api_url
self.timeout = float(timeout)
self.glossary_id = glossary_id
self.model = "deepl"
self._session = requests.Session()
self._session.mount("http://", HTTPAdapter(pool_connections=32, pool_maxsize=32, max_retries=0))
self._session.mount("https://", HTTPAdapter(pool_connections=32, pool_maxsize=32, max_retries=0))
if not self.api_key:
logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable")
@property
def supports_batch(self) -> bool:
return False
def _resolve_request_context(
self,
target_lang: str,
scene: Optional[str],
) -> Optional[str]:
if scene is None:
raise ValueError("deepl translation scene is required")
normalized_scene = normalize_scene_name(scene)
scene_map = SCENE_DEEPL_CONTEXTS[normalized_scene]
tgt = str(target_lang or "").strip().lower()
return scene_map.get(tgt) or scene_map.get("en")
def _build_headers(self) -> dict:
return {
"Authorization": f"DeepL-Auth-Key {self.api_key}",
"Content-Type": "application/json",
}
def _build_payload(
self,
*,
texts: List[str],
target_code: str,
source_lang: Optional[str],
api_context: Optional[str],
) -> dict:
payload: dict = {"text": texts, "target_lang": target_code}
if source_lang:
payload["source_lang"] = DEEPL_LANGUAGE_CODES.get(source_lang.lower(), source_lang.upper())
if api_context:
payload["context"] = api_context
if self.glossary_id:
payload["glossary_id"] = self.glossary_id
return payload
def _post_with_retries(self, *, headers: dict, payload: dict, target_code: str) -> Optional[dict]:
for attempt in range(self._MAX_RETRIES + 1):
try:
response = self._session.post(
self.api_url,
headers=headers,
json=payload,
timeout=(self._CONNECT_TIMEOUT_SEC, self.timeout),
)
except requests.Timeout:
logger.warning("[deepl] Timeout | tgt=%s timeout=%.1fs", target_code, self.timeout)
return None
except Exception as exc:
logger.warning("[deepl] Exception | tgt=%s error=%s", target_code, exc, exc_info=True)
return None
if response.status_code == 200:
try:
return response.json()
except Exception as exc:
logger.warning("[deepl] Bad JSON | tgt=%s error=%s body=%s", target_code, exc, (response.text or "")[:200])
return None
retryable = response.status_code in self._RETRYABLE_STATUS_CODES
logger.warning(
"[deepl] Failed | status=%s tgt=%s attempt=%s/%s body=%s",
response.status_code,
target_code,
attempt + 1,
self._MAX_RETRIES + 1,
(response.text or "")[:200],
)
if (not retryable) or attempt >= self._MAX_RETRIES:
return None
sleep_s = min(1.0, 0.1 * (2 ** attempt))
time.sleep(sleep_s)
return None
def translate(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
scene: Optional[str] = None,
) -> Union[Optional[str], List[Optional[str]]]:
if isinstance(text, (list, tuple)):
results: List[Optional[str]] = []
for item in text:
if item is None or not str(item).strip():
results.append(item) # type: ignore[arg-type]
continue
out = self.translate(
text=str(item),
target_lang=target_lang,
source_lang=source_lang,
scene=scene,
)
results.append(out)
return results
if not self.api_key:
return None
target_code = DEEPL_LANGUAGE_CODES.get((target_lang or "").lower(), (target_lang or "").upper())
api_context = self._resolve_request_context(target_lang, scene)
headers = self._build_headers()
text_to_translate, needs_extraction = self._add_ecommerce_context(str(text), source_lang, api_context)
payload = self._build_payload(
texts=[text_to_translate],
target_code=target_code,
source_lang=source_lang,
api_context=api_context,
)
data = self._post_with_retries(headers=headers, payload=payload, target_code=target_code)
translations = (data or {}).get("translations") or []
if not translations:
return None
translated = translations[0].get("text") if isinstance(translations[0], dict) else None
if not translated:
return None
if needs_extraction:
translated = self._extract_term_from_translation(translated, text, target_code)
return translated
def _add_ecommerce_context(
self,
text: str,
source_lang: Optional[str],
scene: Optional[str],
) -> Tuple[str, bool]:
if not scene or "e-commerce" not in scene.lower():
return text, False
if (source_lang or "").lower() != "zh":
return text, False
term = (text or "").strip()
if len(term.split()) == 1 and len(term) <= 2:
return f"购买 {term}", True
return text, False
def _extract_term_from_translation(
self,
translated_text: str,
original_text: str,
target_lang_code: str,
) -> str:
del original_text
if target_lang_code != "EN":
return translated_text
words = translated_text.strip().split()
if len(words) <= 1:
return translated_text
context_words = {"buy", "purchase", "product", "item", "commodity", "goods"}
for word in reversed(words):
normalized = re.sub(r"[.,!?;:]+$", "", word.lower())
if normalized not in context_words:
return normalized
return re.sub(r"[.,!?;:]+$", "", words[-1].lower())