cd4ce66d
tangwang
trans logs
|
1
|
"""Qwen-MT translation backend."""
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
2
3
4
|
from __future__ import annotations
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
5
6
7
8
9
10
|
import logging
import os
import re
import time
from typing import List, Optional, Sequence, Union
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
11
12
|
from openai import OpenAI
|
cd4ce66d
tangwang
trans logs
|
13
|
from config.env_config import DASHSCOPE_API_KEY
|
0fd2f875
tangwang
translate
|
14
|
from translation.languages import QWEN_LANGUAGE_CODES
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
15
16
17
18
19
|
logger = logging.getLogger(__name__)
class QwenMTTranslationBackend:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
20
21
|
def __init__(
self,
|
0fd2f875
tangwang
translate
|
22
23
24
|
capability_name: str,
model: str,
base_url: str,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
25
|
api_key: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
26
27
|
timeout: int = 10,
glossary_id: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
28
|
):
|
0fd2f875
tangwang
translate
|
29
30
31
32
|
self.capability_name = capability_name
self.model = self._normalize_capability_name(capability_name)
self.qwen_model_name = self._normalize_model_name(model)
self.base_url = base_url
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
33
|
self.timeout = int(timeout)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
34
|
self.glossary_id = glossary_id
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
35
|
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
36
37
|
self._api_key = api_key or self._default_api_key(self.model)
self._qwen_client: Optional[OpenAI] = None
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
38
39
|
if self._api_key:
try:
|
0fd2f875
tangwang
translate
|
40
|
self._qwen_client = OpenAI(api_key=self._api_key, base_url=self.base_url)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
41
42
43
44
45
|
except Exception as exc:
logger.warning("Failed to initialize qwen-mt client: %s", exc, exc_info=True)
else:
logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable")
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
46
47
48
49
50
|
@property
def supports_batch(self) -> bool:
return True
@staticmethod
|
0fd2f875
tangwang
translate
|
51
52
53
54
55
|
def _normalize_capability_name(name: str) -> str:
normalized = str(name or "").strip().lower()
if normalized != "qwen-mt":
raise ValueError(f"Qwen-MT backend capability must be 'qwen-mt', got '{name}'")
return normalized
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
56
57
|
@staticmethod
|
0fd2f875
tangwang
translate
|
58
59
60
61
62
|
def _normalize_model_name(model: str) -> str:
normalized = str(model or "").strip()
if not normalized:
raise ValueError("qwen-mt backend model is required")
return normalized
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
63
64
65
66
67
68
|
@staticmethod
def _default_api_key(model: str) -> Optional[str]:
del model
return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
69
70
71
72
73
|
def translate(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
|
0fd2f875
tangwang
translate
|
74
|
scene: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
75
76
77
78
79
80
81
82
83
84
85
|
) -> Union[Optional[str], List[Optional[str]]]:
if isinstance(text, (list, tuple)):
results: List[Optional[str]] = []
for item in text:
if item is None or not str(item).strip():
results.append(item) # type: ignore[arg-type]
continue
out = self.translate(
text=str(item),
target_lang=target_lang,
source_lang=source_lang,
|
0fd2f875
tangwang
translate
|
86
|
scene=scene,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
)
results.append(out)
return results
if not text or not str(text).strip():
return text # type: ignore[return-value]
tgt = (target_lang or "").strip().lower()
src = (source_lang or "").strip().lower() or None
if tgt == "en" and self._is_english_text(text):
return text
if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)):
return text
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
101
|
result = self._translate_qwen(text, tgt, src)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
102
103
104
105
106
107
108
109
110
111
112
113
|
return result
def _translate_qwen(
self,
text: str,
target_lang: str,
source_lang: Optional[str],
) -> Optional[str]:
if not self._qwen_client:
return None
tgt_norm = (target_lang or "").strip().lower()
src_norm = (source_lang or "").strip().lower()
|
0fd2f875
tangwang
translate
|
114
115
|
tgt_qwen = QWEN_LANGUAGE_CODES.get(tgt_norm, tgt_norm.capitalize())
src_qwen = "auto" if not src_norm or src_norm == "auto" else QWEN_LANGUAGE_CODES.get(src_norm, src_norm.capitalize())
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
start = time.time()
try:
completion = self._qwen_client.chat.completions.create(
model=self.qwen_model_name,
messages=[{"role": "user", "content": text}],
extra_body={
"translation_options": {
"source_lang": src_qwen,
"target_lang": tgt_qwen,
}
},
timeout=self.timeout,
)
content = (completion.choices[0].message.content or "").strip()
if not content:
return None
logger.info("[qwen-mt] Success | src=%s tgt=%s latency=%.1fms", src_qwen, tgt_qwen, (time.time() - start) * 1000)
return content
except Exception as exc:
logger.warning(
"[qwen-mt] Failed | src=%s tgt=%s latency=%.1fms error=%s",
src_qwen,
tgt_qwen,
(time.time() - start) * 1000,
exc,
exc_info=True,
)
return None
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
145
146
147
148
149
150
151
152
153
154
155
156
|
@staticmethod
def _contains_chinese(text: str) -> bool:
return bool(re.search(r"[\u4e00-\u9fff]", text or ""))
@staticmethod
def _is_english_text(text: str) -> bool:
stripped = (text or "").strip()
return bool(stripped) and bool(re.fullmatch(r"[A-Za-z0-9\s\W]+", stripped)) and not QwenMTTranslationBackend._contains_chinese(stripped)
@staticmethod
def _is_pure_number(text: str) -> bool:
return bool(re.fullmatch(r"[\d.\-+%/,: ]+", (text or "").strip()))
|