5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
1
2
3
4
5
|
"""DeepL translation backend."""
from __future__ import annotations
import logging
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
6
|
import re
|
d6c29734
tangwang
translation optim...
|
7
|
import time
|
0fd2f875
tangwang
translate
|
8
|
from typing import List, Optional, Sequence, Tuple, Union
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
9
10
|
import requests
|
d6c29734
tangwang
translation optim...
|
11
|
from requests.adapters import HTTPAdapter
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
12
|
|
0fd2f875
tangwang
translate
|
13
14
|
from translation.languages import DEEPL_LANGUAGE_CODES
from translation.scenes import SCENE_DEEPL_CONTEXTS, normalize_scene_name
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
15
16
17
|
logger = logging.getLogger(__name__)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
18
19
|
class DeepLTranslationBackend:
|
d6c29734
tangwang
translation optim...
|
20
21
22
23
|
_CONNECT_TIMEOUT_SEC = 2.0
_MAX_RETRIES = 2
_RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
24
25
26
27
|
def __init__(
self,
api_key: Optional[str],
*,
|
0fd2f875
tangwang
translate
|
28
29
|
api_url: str,
timeout: float,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
30
31
|
glossary_id: Optional[str] = None,
) -> None:
|
86d8358b
tangwang
config optimize
|
32
|
self.api_key = api_key
|
0fd2f875
tangwang
translate
|
33
34
35
|
self.api_url = api_url
self.timeout = float(timeout)
self.glossary_id = glossary_id
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
36
|
self.model = "deepl"
|
d6c29734
tangwang
translation optim...
|
37
38
39
|
self._session = requests.Session()
self._session.mount("http://", HTTPAdapter(pool_connections=32, pool_maxsize=32, max_retries=0))
self._session.mount("https://", HTTPAdapter(pool_connections=32, pool_maxsize=32, max_retries=0))
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
40
41
42
43
44
45
46
47
48
49
|
if not self.api_key:
logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable")
@property
def supports_batch(self) -> bool:
return False
def _resolve_request_context(
self,
target_lang: str,
|
0fd2f875
tangwang
translate
|
50
|
scene: Optional[str],
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
51
|
) -> Optional[str]:
|
0fd2f875
tangwang
translate
|
52
53
54
55
56
|
if scene is None:
raise ValueError("deepl translation scene is required")
normalized_scene = normalize_scene_name(scene)
scene_map = SCENE_DEEPL_CONTEXTS[normalized_scene]
tgt = str(target_lang or "").strip().lower()
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
57
58
|
return scene_map.get(tgt) or scene_map.get("en")
|
d6c29734
tangwang
translation optim...
|
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
def _build_headers(self) -> dict:
return {
"Authorization": f"DeepL-Auth-Key {self.api_key}",
"Content-Type": "application/json",
}
def _build_payload(
self,
*,
texts: List[str],
target_code: str,
source_lang: Optional[str],
api_context: Optional[str],
) -> dict:
payload: dict = {"text": texts, "target_lang": target_code}
if source_lang:
payload["source_lang"] = DEEPL_LANGUAGE_CODES.get(source_lang.lower(), source_lang.upper())
if api_context:
payload["context"] = api_context
if self.glossary_id:
payload["glossary_id"] = self.glossary_id
return payload
def _post_with_retries(self, *, headers: dict, payload: dict, target_code: str) -> Optional[dict]:
for attempt in range(self._MAX_RETRIES + 1):
try:
response = self._session.post(
self.api_url,
headers=headers,
json=payload,
timeout=(self._CONNECT_TIMEOUT_SEC, self.timeout),
)
except requests.Timeout:
logger.warning("[deepl] Timeout | tgt=%s timeout=%.1fs", target_code, self.timeout)
return None
except Exception as exc:
logger.warning("[deepl] Exception | tgt=%s error=%s", target_code, exc, exc_info=True)
return None
if response.status_code == 200:
try:
return response.json()
except Exception as exc:
logger.warning("[deepl] Bad JSON | tgt=%s error=%s body=%s", target_code, exc, (response.text or "")[:200])
return None
retryable = response.status_code in self._RETRYABLE_STATUS_CODES
logger.warning(
"[deepl] Failed | status=%s tgt=%s attempt=%s/%s body=%s",
response.status_code,
target_code,
attempt + 1,
self._MAX_RETRIES + 1,
(response.text or "")[:200],
)
if (not retryable) or attempt >= self._MAX_RETRIES:
return None
sleep_s = min(1.0, 0.1 * (2 ** attempt))
time.sleep(sleep_s)
return None
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
121
122
123
124
125
|
def translate(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
|
0fd2f875
tangwang
translate
|
126
|
scene: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
127
128
129
130
131
132
133
134
135
136
137
|
) -> Union[Optional[str], List[Optional[str]]]:
if isinstance(text, (list, tuple)):
results: List[Optional[str]] = []
for item in text:
if item is None or not str(item).strip():
results.append(item) # type: ignore[arg-type]
continue
out = self.translate(
text=str(item),
target_lang=target_lang,
source_lang=source_lang,
|
0fd2f875
tangwang
translate
|
138
|
scene=scene,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
139
140
141
142
143
144
145
|
)
results.append(out)
return results
if not self.api_key:
return None
|
0fd2f875
tangwang
translate
|
146
|
target_code = DEEPL_LANGUAGE_CODES.get((target_lang or "").lower(), (target_lang or "").upper())
|
0fd2f875
tangwang
translate
|
147
|
api_context = self._resolve_request_context(target_lang, scene)
|
d6c29734
tangwang
translation optim...
|
148
149
150
151
152
153
154
155
156
157
158
159
160
|
headers = self._build_headers()
text_to_translate, needs_extraction = self._add_ecommerce_context(str(text), source_lang, api_context)
payload = self._build_payload(
texts=[text_to_translate],
target_code=target_code,
source_lang=source_lang,
api_context=api_context,
)
data = self._post_with_retries(headers=headers, payload=payload, target_code=target_code)
translations = (data or {}).get("translations") or []
if not translations:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
161
|
return None
|
d6c29734
tangwang
translation optim...
|
162
163
|
translated = translations[0].get("text") if isinstance(translations[0], dict) else None
if not translated:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
164
|
return None
|
d6c29734
tangwang
translation optim...
|
165
166
167
|
if needs_extraction:
translated = self._extract_term_from_translation(translated, text, target_code)
return translated
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
168
169
170
171
172
|
def _add_ecommerce_context(
self,
text: str,
source_lang: Optional[str],
|
0fd2f875
tangwang
translate
|
173
|
scene: Optional[str],
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
174
|
) -> Tuple[str, bool]:
|
0fd2f875
tangwang
translate
|
175
|
if not scene or "e-commerce" not in scene.lower():
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
return text, False
if (source_lang or "").lower() != "zh":
return text, False
term = (text or "").strip()
if len(term.split()) == 1 and len(term) <= 2:
return f"购买 {term}", True
return text, False
def _extract_term_from_translation(
self,
translated_text: str,
original_text: str,
target_lang_code: str,
) -> str:
del original_text
if target_lang_code != "EN":
return translated_text
words = translated_text.strip().split()
if len(words) <= 1:
return translated_text
context_words = {"buy", "purchase", "product", "item", "commodity", "goods"}
for word in reversed(words):
normalized = re.sub(r"[.,!?;:]+$", "", word.lower())
if normalized not in context_words:
return normalized
return re.sub(r"[.,!?;:]+$", "", words[-1].lower())
|