translation.py
6.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""Translation provider factory and HTTP provider implementation."""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional, Sequence, Union
import requests
from config.services_config import get_translation_config, get_translation_base_url
logger = logging.getLogger(__name__)
class HttpTranslationProvider:
"""Translation via HTTP service."""
def __init__(
self,
base_url: str,
model: str = "qwen",
timeout_sec: float = 10.0,
):
self.base_url = (base_url or "").rstrip("/")
self.model = model or "qwen"
self.timeout_sec = float(timeout_sec or 10.0)
@property
def supports_batch(self) -> bool:
"""
Whether this provider supports list input natively.
当前实现中,我们已经在 `_translate_once` 内处理了 list,
所以可以直接视为支持 batch。
"""
return True
def _translate_once(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
context: Optional[str] = None,
prompt: Optional[str] = None,
) -> Union[Optional[str], List[Optional[str]]]:
# 允许 text 为单个字符串或字符串列表
if isinstance(text, (list, tuple)):
# 上游约定:列表输入时,输出列表一一对应;失败位置为 None
results: List[Optional[str]] = []
for item in text:
if item is None or not str(item).strip():
# 空字符串/None 不视为失败,原样返回以保持语义
results.append(item) # type: ignore[arg-type]
continue
try:
single = self._translate_once(
text=str(item),
target_lang=target_lang,
source_lang=source_lang,
context=context,
prompt=prompt,
)
results.append(single) # type: ignore[arg-type]
except Exception:
# 理论上不会进入,因为内部已捕获;兜底保持长度一致
results.append(None)
return results
if not text or not str(text).strip():
return text # type: ignore[return-value]
try:
url = f"{self.base_url}/translate"
payload = {
"text": text,
"target_lang": target_lang,
"source_lang": source_lang or "auto",
"model": self.model,
}
if context:
payload["context"] = context
if prompt:
payload["prompt"] = prompt
response = requests.post(url, json=payload, timeout=self.timeout_sec)
if response.status_code != 200:
logger.warning(
"HTTP translator failed: status=%s body=%s",
response.status_code,
(response.text or "")[:200],
)
return None
data = response.json()
translated = data.get("translated_text")
return translated if translated is not None else None
except Exception as exc:
logger.warning("HTTP translator request failed: %s", exc, exc_info=True)
return None
def translate(
self,
text: Union[str, Sequence[str]],
target_lang: str,
source_lang: Optional[str] = None,
context: Optional[str] = None,
prompt: Optional[str] = None,
) -> Union[Optional[str], List[Optional[str]]]:
return self._translate_once(
text=text,
target_lang=target_lang,
source_lang=source_lang,
context=context,
prompt=prompt,
)
def create_translation_provider(query_config: Any = None) -> Any:
"""
Create translation provider from services config.
query_config: optional, for api_key/glossary_id/context (used by direct provider).
"""
cfg = get_translation_config()
provider = cfg.provider
pc = cfg.get_provider_cfg()
if provider in ("qwen-mt", "direct", "local", "inprocess"):
from query.qwen_mt_translate import Translator
model = pc.get("model") or "qwen-mt-flash"
qc = query_config or _empty_query_config()
return Translator(
model=model,
api_key=getattr(qc, "translation_api_key", None),
use_cache=True,
glossary_id=getattr(qc, "translation_glossary_id", None),
translation_context=getattr(qc, "translation_context", "e-commerce product search"),
)
elif provider in ("http", "service"):
base_url = get_translation_base_url()
model = pc.get("model") or "qwen"
timeout = pc.get("timeout_sec", 10.0)
qc = query_config or _empty_query_config()
return HttpTranslationProvider(
base_url=base_url,
model=model,
timeout_sec=float(timeout),
)
elif provider == "llm":
from query.llm_translate import LLMTranslatorProvider
model = pc.get("model")
timeout = float(pc.get("timeout_sec", 30.0))
base_url = (pc.get("base_url") or "").strip() or None
return LLMTranslatorProvider(
model=model,
timeout_sec=timeout,
base_url=base_url,
)
elif provider == "deepl":
from query.deepl_provider import DeepLProvider
qc = query_config or _empty_query_config()
return DeepLProvider(
api_key=getattr(qc, "translation_api_key", None),
timeout=float(pc.get("timeout_sec", 10.0)),
glossary_id=pc.get("glossary_id") or getattr(qc, "translation_glossary_id", None),
)
raise ValueError(f"Unsupported translation provider: {provider}")
def _empty_query_config() -> Any:
"""Minimal object with default translation attrs."""
class _QC:
translation_api_key = None
translation_glossary_id = None
translation_context = "e-commerce product search"
return _QC()