5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
1
2
3
4
5
6
7
|
"""DeepL translation backend."""
from __future__ import annotations
import logging
import os
import re
|
0fd2f875
tangwang
translate
|
8
|
from typing import List, Optional, Sequence, Tuple, Union
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
9
10
11
|
import requests
|
0fd2f875
tangwang
translate
|
12
13
|
from translation.languages import DEEPL_LANGUAGE_CODES
from translation.scenes import SCENE_DEEPL_CONTEXTS, normalize_scene_name
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
14
15
16
|
logger = logging.getLogger(__name__)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
17
18
|
class DeepLTranslationBackend:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
19
20
21
22
|
def __init__(
self,
api_key: Optional[str],
*,
|
0fd2f875
tangwang
translate
|
23
24
|
api_url: str,
timeout: float,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
25
26
|
glossary_id: Optional[str] = None,
) -> None:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
27
|
self.api_key = api_key or os.getenv("DEEPL_AUTH_KEY")
|
0fd2f875
tangwang
translate
|
28
29
30
|
self.api_url = api_url
self.timeout = float(timeout)
self.glossary_id = glossary_id
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
31
|
self.model = "deepl"
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
32
33
34
35
36
37
38
39
40
41
|
if not self.api_key:
logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable")
@property
def supports_batch(self) -> bool:
return False
def _resolve_request_context(
self,
target_lang: str,
|
0fd2f875
tangwang
translate
|
42
|
scene: Optional[str],
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
43
|
) -> Optional[str]:
|
0fd2f875
tangwang
translate
|
44
45
46
47
48
|
if scene is None:
raise ValueError("deepl translation scene is required")
normalized_scene = normalize_scene_name(scene)
scene_map = SCENE_DEEPL_CONTEXTS[normalized_scene]
tgt = str(target_lang or "").strip().lower()
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
49
50
51
52
53
54
55
|
return scene_map.get(tgt) or scene_map.get("en")
def translate(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
|
0fd2f875
tangwang
translate
|
56
|
scene: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
57
58
59
60
61
62
63
64
65
66
67
|
) -> Union[Optional[str], List[Optional[str]]]:
if isinstance(text, (list, tuple)):
results: List[Optional[str]] = []
for item in text:
if item is None or not str(item).strip():
results.append(item) # type: ignore[arg-type]
continue
out = self.translate(
text=str(item),
target_lang=target_lang,
source_lang=source_lang,
|
0fd2f875
tangwang
translate
|
68
|
scene=scene,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
69
70
71
72
73
74
75
|
)
results.append(out)
return results
if not self.api_key:
return None
|
0fd2f875
tangwang
translate
|
76
|
target_code = DEEPL_LANGUAGE_CODES.get((target_lang or "").lower(), (target_lang or "").upper())
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
77
78
79
80
81
|
headers = {
"Authorization": f"DeepL-Auth-Key {self.api_key}",
"Content-Type": "application/json",
}
|
0fd2f875
tangwang
translate
|
82
|
api_context = self._resolve_request_context(target_lang, scene)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
83
84
85
86
87
88
89
|
text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, api_context)
payload = {
"text": [text_to_translate],
"target_lang": target_code,
}
if source_lang:
|
0fd2f875
tangwang
translate
|
90
|
payload["source_lang"] = DEEPL_LANGUAGE_CODES.get(source_lang.lower(), source_lang.upper())
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
91
92
93
94
95
96
|
if api_context:
payload["context"] = api_context
if self.glossary_id:
payload["glossary_id"] = self.glossary_id
try:
|
0fd2f875
tangwang
translate
|
97
|
response = requests.post(self.api_url, headers=headers, json=payload, timeout=self.timeout)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
if response.status_code != 200:
logger.warning(
"[deepl] Failed | status=%s tgt=%s body=%s",
response.status_code,
target_code,
(response.text or "")[:200],
)
return None
data = response.json()
translations = data.get("translations") or []
if not translations:
return None
translated = translations[0].get("text")
if not translated:
return None
if needs_extraction:
translated = self._extract_term_from_translation(translated, text, target_code)
return translated
except requests.Timeout:
logger.warning("[deepl] Timeout | tgt=%s timeout=%.1fs", target_code, self.timeout)
return None
except Exception as exc:
logger.warning("[deepl] Exception | tgt=%s error=%s", target_code, exc, exc_info=True)
return None
def _add_ecommerce_context(
self,
text: str,
source_lang: Optional[str],
|
0fd2f875
tangwang
translate
|
128
|
scene: Optional[str],
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
129
|
) -> Tuple[str, bool]:
|
0fd2f875
tangwang
translate
|
130
|
if not scene or "e-commerce" not in scene.lower():
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
return text, False
if (source_lang or "").lower() != "zh":
return text, False
term = (text or "").strip()
if len(term.split()) == 1 and len(term) <= 2:
return f"购买 {term}", True
return text, False
def _extract_term_from_translation(
self,
translated_text: str,
original_text: str,
target_lang_code: str,
) -> str:
del original_text
if target_lang_code != "EN":
return translated_text
words = translated_text.strip().split()
if len(words) <= 1:
return translated_text
context_words = {"buy", "purchase", "product", "item", "commodity", "goods"}
for word in reversed(words):
normalized = re.sub(r"[.,!?;:]+$", "", word.lower())
if normalized not in context_words:
return normalized
return re.sub(r"[.,!?;:]+$", "", words[-1].lower())
|