5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
1
2
3
4
5
|
"""Translation service orchestration."""
from __future__ import annotations
import logging
|
0fd2f875
tangwang
translate
|
6
|
import threading
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
7
8
|
from typing import Dict, List, Optional
|
0fd2f875
tangwang
translate
|
9
|
from config.services_config import get_translation_config
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
10
|
from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol
|
0fd2f875
tangwang
translate
|
11
12
13
14
15
16
17
|
from translation.settings import (
TranslationConfig,
get_enabled_translation_models,
get_translation_capability,
normalize_translation_model,
normalize_translation_scene,
)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
18
19
20
21
22
23
24
|
logger = logging.getLogger(__name__)
class TranslationService:
"""Owns translation backends and routes calls by model and scene."""
|
0fd2f875
tangwang
translate
|
25
|
def __init__(self, config: Optional[TranslationConfig] = None) -> None:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
26
|
self.config = config or get_translation_config()
|
0fd2f875
tangwang
translate
|
27
|
self._enabled_capabilities = self._collect_enabled_capabilities()
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
28
|
self._backends: Dict[str, TranslationBackendProtocol] = {}
|
0fd2f875
tangwang
translate
|
29
30
31
|
self._backend_lock = threading.Lock()
if not self._enabled_capabilities:
raise ValueError("No enabled translation backends found in services.translation.capabilities")
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
32
|
|
0fd2f875
tangwang
translate
|
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
enabled: Dict[str, Dict[str, object]] = {}
for name in get_enabled_translation_models(self.config):
capability = get_translation_capability(self.config, name, require_enabled=True)
backend_type = capability.get("backend")
if not backend_type:
raise ValueError(f"Translation capability '{name}' must define a backend")
enabled[name] = capability
return enabled
def _create_backend(
self,
*,
name: str,
backend_type: str,
cfg: Dict[str, object],
) -> TranslationBackendProtocol:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
50
|
registry = {
|
0fd2f875
tangwang
translate
|
51
|
"qwen_mt": self._create_qwen_mt_backend,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
52
53
|
"deepl": self._create_deepl_backend,
"llm": self._create_llm_backend,
|
0fd2f875
tangwang
translate
|
54
55
|
"local_nllb": self._create_local_nllb_backend,
"local_marian": self._create_local_marian_backend,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
56
|
}
|
0fd2f875
tangwang
translate
|
57
58
59
60
|
factory = registry.get(backend_type)
if factory is None:
raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
return factory(name=name, cfg=cfg)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
61
|
|
0fd2f875
tangwang
translate
|
62
|
def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
63
64
|
from translation.backends.qwen_mt import QwenMTTranslationBackend
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
65
|
return QwenMTTranslationBackend(
|
0fd2f875
tangwang
translate
|
66
67
68
|
capability_name=name,
model=str(cfg["model"]).strip(),
base_url=str(cfg["base_url"]).strip(),
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
69
|
api_key=cfg.get("api_key"),
|
0fd2f875
tangwang
translate
|
70
71
|
use_cache=bool(cfg["use_cache"]),
timeout=int(cfg["timeout_sec"]),
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
72
|
glossary_id=cfg.get("glossary_id"),
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
73
74
|
)
|
0fd2f875
tangwang
translate
|
75
|
def _create_deepl_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
76
77
|
from translation.backends.deepl import DeepLTranslationBackend
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
78
79
|
return DeepLTranslationBackend(
api_key=cfg.get("api_key"),
|
0fd2f875
tangwang
translate
|
80
81
|
api_url=str(cfg["api_url"]).strip(),
timeout=float(cfg["timeout_sec"]),
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
82
83
84
|
glossary_id=cfg.get("glossary_id"),
)
|
0fd2f875
tangwang
translate
|
85
|
def _create_llm_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
86
87
|
from translation.backends.llm import LLMTranslationBackend
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
88
|
return LLMTranslationBackend(
|
0fd2f875
tangwang
translate
|
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
capability_name=name,
model=str(cfg["model"]).strip(),
timeout_sec=float(cfg["timeout_sec"]),
base_url=str(cfg["base_url"]).strip(),
)
def _create_local_nllb_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
from translation.backends.local_seq2seq import NLLBTranslationBackend
return NLLBTranslationBackend(
name=name,
model_id=str(cfg["model_id"]).strip(),
model_dir=str(cfg["model_dir"]).strip(),
device=str(cfg["device"]).strip(),
torch_dtype=str(cfg["torch_dtype"]).strip(),
batch_size=int(cfg["batch_size"]),
max_input_length=int(cfg["max_input_length"]),
max_new_tokens=int(cfg["max_new_tokens"]),
num_beams=int(cfg["num_beams"]),
|
3eff49b7
tangwang
trans nllb-200-di...
|
108
|
attn_implementation=cfg.get("attn_implementation"),
|
0fd2f875
tangwang
translate
|
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
)
def _create_local_marian_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
from translation.backends.local_seq2seq import MarianMTTranslationBackend, get_marian_language_direction
source_lang, target_lang = get_marian_language_direction(name)
return MarianMTTranslationBackend(
name=name,
model_id=str(cfg["model_id"]).strip(),
model_dir=str(cfg["model_dir"]).strip(),
device=str(cfg["device"]).strip(),
torch_dtype=str(cfg["torch_dtype"]).strip(),
batch_size=int(cfg["batch_size"]),
max_input_length=int(cfg["max_input_length"]),
max_new_tokens=int(cfg["max_new_tokens"]),
num_beams=int(cfg["num_beams"]),
source_langs=[source_lang],
target_langs=[target_lang],
|
3eff49b7
tangwang
trans nllb-200-di...
|
128
|
attn_implementation=cfg.get("attn_implementation"),
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
129
130
131
132
|
)
@property
def available_models(self) -> List[str]:
|
0fd2f875
tangwang
translate
|
133
134
135
136
|
return list(self._enabled_capabilities.keys())
@property
def loaded_models(self) -> List[str]:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
137
138
139
|
return list(self._backends.keys())
def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
|
0fd2f875
tangwang
translate
|
140
141
142
|
normalized = normalize_translation_model(self.config, model)
capability_cfg = self._enabled_capabilities.get(normalized)
if capability_cfg is None:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
143
144
145
146
|
raise ValueError(
f"Translation model '{normalized}' is not enabled. "
f"Available models: {', '.join(self.available_models) or 'none'}"
)
|
0fd2f875
tangwang
translate
|
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
backend = self._backends.get(normalized)
if backend is not None:
return backend
with self._backend_lock:
backend = self._backends.get(normalized)
if backend is None:
backend_type = str(capability_cfg["backend"])
logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type)
backend = self._create_backend(
name=normalized,
backend_type=backend_type,
cfg=capability_cfg,
)
self._backends[normalized] = backend
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
161
162
163
164
165
166
167
168
169
170
|
return backend
def translate(
self,
text: TranslateInput,
target_lang: str,
source_lang: Optional[str] = None,
*,
model: Optional[str] = None,
scene: Optional[str] = None,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
171
172
|
) -> TranslateOutput:
backend = self.get_backend(model)
|
0fd2f875
tangwang
translate
|
173
|
active_scene = normalize_translation_scene(self.config, scene)
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
174
175
176
177
|
return backend.translate(
text=text,
target_lang=target_lang,
source_lang=source_lang,
|
0fd2f875
tangwang
translate
|
178
|
scene=active_scene,
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
179
|
)
|