5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
1
2
3
4
5
|
"""DeepL translation backend."""
from __future__ import annotations
import logging
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
6
|
import re
|
0fd2f875
tangwang
translate
|
7
|
from typing import List, Optional, Sequence, Tuple, Union
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
8
9
10
|
import requests
|
0fd2f875
tangwang
translate
|
11
12
|
from translation.languages import DEEPL_LANGUAGE_CODES
from translation.scenes import SCENE_DEEPL_CONTEXTS, normalize_scene_name
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
13
14
15
|
logger = logging.getLogger(__name__)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
16
17
|
class DeepLTranslationBackend:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
18
19
20
21
|
def __init__(
self,
api_key: Optional[str],
*,
|
0fd2f875
tangwang
translate
|
22
23
|
api_url: str,
timeout: float,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
24
25
|
glossary_id: Optional[str] = None,
) -> None:
|
86d8358b
tangwang
config optimize
|
26
|
self.api_key = api_key
|
0fd2f875
tangwang
translate
|
27
28
29
|
self.api_url = api_url
self.timeout = float(timeout)
self.glossary_id = glossary_id
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
30
|
self.model = "deepl"
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
31
32
33
34
35
36
37
38
39
40
|
if not self.api_key:
logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable")
@property
def supports_batch(self) -> bool:
return False
def _resolve_request_context(
self,
target_lang: str,
|
0fd2f875
tangwang
translate
|
41
|
scene: Optional[str],
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
42
|
) -> Optional[str]:
|
0fd2f875
tangwang
translate
|
43
44
45
46
47
|
if scene is None:
raise ValueError("deepl translation scene is required")
normalized_scene = normalize_scene_name(scene)
scene_map = SCENE_DEEPL_CONTEXTS[normalized_scene]
tgt = str(target_lang or "").strip().lower()
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
48
49
50
51
52
53
54
|
return scene_map.get(tgt) or scene_map.get("en")
def translate(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
|
0fd2f875
tangwang
translate
|
55
|
scene: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
56
57
58
59
60
61
62
63
64
65
66
|
) -> Union[Optional[str], List[Optional[str]]]:
if isinstance(text, (list, tuple)):
results: List[Optional[str]] = []
for item in text:
if item is None or not str(item).strip():
results.append(item) # type: ignore[arg-type]
continue
out = self.translate(
text=str(item),
target_lang=target_lang,
source_lang=source_lang,
|
0fd2f875
tangwang
translate
|
67
|
scene=scene,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
68
69
70
71
72
73
74
|
)
results.append(out)
return results
if not self.api_key:
return None
|
0fd2f875
tangwang
translate
|
75
|
target_code = DEEPL_LANGUAGE_CODES.get((target_lang or "").lower(), (target_lang or "").upper())
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
76
77
78
79
80
|
headers = {
"Authorization": f"DeepL-Auth-Key {self.api_key}",
"Content-Type": "application/json",
}
|
0fd2f875
tangwang
translate
|
81
|
api_context = self._resolve_request_context(target_lang, scene)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
82
83
84
85
86
87
88
|
text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, api_context)
payload = {
"text": [text_to_translate],
"target_lang": target_code,
}
if source_lang:
|
0fd2f875
tangwang
translate
|
89
|
payload["source_lang"] = DEEPL_LANGUAGE_CODES.get(source_lang.lower(), source_lang.upper())
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
90
91
92
93
94
95
|
if api_context:
payload["context"] = api_context
if self.glossary_id:
payload["glossary_id"] = self.glossary_id
try:
|
0fd2f875
tangwang
translate
|
96
|
response = requests.post(self.api_url, headers=headers, json=payload, timeout=self.timeout)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
if response.status_code != 200:
logger.warning(
"[deepl] Failed | status=%s tgt=%s body=%s",
response.status_code,
target_code,
(response.text or "")[:200],
)
return None
data = response.json()
translations = data.get("translations") or []
if not translations:
return None
translated = translations[0].get("text")
if not translated:
return None
if needs_extraction:
translated = self._extract_term_from_translation(translated, text, target_code)
return translated
except requests.Timeout:
logger.warning("[deepl] Timeout | tgt=%s timeout=%.1fs", target_code, self.timeout)
return None
except Exception as exc:
logger.warning("[deepl] Exception | tgt=%s error=%s", target_code, exc, exc_info=True)
return None
def _add_ecommerce_context(
self,
text: str,
source_lang: Optional[str],
|
0fd2f875
tangwang
translate
|
127
|
scene: Optional[str],
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
128
|
) -> Tuple[str, bool]:
|
0fd2f875
tangwang
translate
|
129
|
if not scene or "e-commerce" not in scene.lower():
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
return text, False
if (source_lang or "").lower() != "zh":
return text, False
term = (text or "").strip()
if len(term.split()) == 1 and len(term) <= 2:
return f"购买 {term}", True
return text, False
def _extract_term_from_translation(
self,
translated_text: str,
original_text: str,
target_lang_code: str,
) -> str:
del original_text
if target_lang_code != "EN":
return translated_text
words = translated_text.strip().split()
if len(words) <= 1:
return translated_text
context_words = {"buy", "purchase", "product", "item", "commodity", "goods"}
for word in reversed(words):
normalized = re.sub(r"[.,!?;:]+$", "", word.lower())
if normalized not in context_words:
return normalized
return re.sub(r"[.,!?;:]+$", "", words[-1].lower())
|