Blame view

translation/service.py 7.05 KB
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
1
2
3
4
5
  """Translation service orchestration."""
  
  from __future__ import annotations
  
  import logging
0fd2f875   tangwang   translate
6
  import threading
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
7
8
  from typing import Dict, List, Optional
  
0fd2f875   tangwang   translate
9
  from config.services_config import get_translation_config
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
10
  from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol
0fd2f875   tangwang   translate
11
12
13
14
15
16
17
  from translation.settings import (
      TranslationConfig,
      get_enabled_translation_models,
      get_translation_capability,
      normalize_translation_model,
      normalize_translation_scene,
  )
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
18
19
20
21
22
23
24
  
  logger = logging.getLogger(__name__)
  
  
  class TranslationService:
      """Owns translation backends and routes calls by model and scene."""
  
0fd2f875   tangwang   translate
25
      def __init__(self, config: Optional[TranslationConfig] = None) -> None:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
26
          self.config = config or get_translation_config()
0fd2f875   tangwang   translate
27
          self._enabled_capabilities = self._collect_enabled_capabilities()
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
28
          self._backends: Dict[str, TranslationBackendProtocol] = {}
0fd2f875   tangwang   translate
29
30
31
          self._backend_lock = threading.Lock()
          if not self._enabled_capabilities:
              raise ValueError("No enabled translation backends found in services.translation.capabilities")
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
32
  
0fd2f875   tangwang   translate
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
      def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
          enabled: Dict[str, Dict[str, object]] = {}
          for name in get_enabled_translation_models(self.config):
              capability = get_translation_capability(self.config, name, require_enabled=True)
              backend_type = capability.get("backend")
              if not backend_type:
                  raise ValueError(f"Translation capability '{name}' must define a backend")
              enabled[name] = capability
          return enabled
  
      def _create_backend(
          self,
          *,
          name: str,
          backend_type: str,
          cfg: Dict[str, object],
      ) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
50
          registry = {
0fd2f875   tangwang   translate
51
              "qwen_mt": self._create_qwen_mt_backend,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
52
53
              "deepl": self._create_deepl_backend,
              "llm": self._create_llm_backend,
0fd2f875   tangwang   translate
54
55
              "local_nllb": self._create_local_nllb_backend,
              "local_marian": self._create_local_marian_backend,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
56
          }
0fd2f875   tangwang   translate
57
58
59
60
          factory = registry.get(backend_type)
          if factory is None:
              raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
          return factory(name=name, cfg=cfg)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
61
  
0fd2f875   tangwang   translate
62
      def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
63
64
          from translation.backends.qwen_mt import QwenMTTranslationBackend
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
65
          return QwenMTTranslationBackend(
0fd2f875   tangwang   translate
66
67
68
              capability_name=name,
              model=str(cfg["model"]).strip(),
              base_url=str(cfg["base_url"]).strip(),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
69
              api_key=cfg.get("api_key"),
0fd2f875   tangwang   translate
70
71
              use_cache=bool(cfg["use_cache"]),
              timeout=int(cfg["timeout_sec"]),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
72
              glossary_id=cfg.get("glossary_id"),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
73
74
          )
  
0fd2f875   tangwang   translate
75
      def _create_deepl_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
76
77
          from translation.backends.deepl import DeepLTranslationBackend
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
78
79
          return DeepLTranslationBackend(
              api_key=cfg.get("api_key"),
0fd2f875   tangwang   translate
80
81
              api_url=str(cfg["api_url"]).strip(),
              timeout=float(cfg["timeout_sec"]),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
82
83
84
              glossary_id=cfg.get("glossary_id"),
          )
  
0fd2f875   tangwang   translate
85
      def _create_llm_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
86
87
          from translation.backends.llm import LLMTranslationBackend
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
88
          return LLMTranslationBackend(
0fd2f875   tangwang   translate
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
              capability_name=name,
              model=str(cfg["model"]).strip(),
              timeout_sec=float(cfg["timeout_sec"]),
              base_url=str(cfg["base_url"]).strip(),
          )
  
      def _create_local_nllb_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
          from translation.backends.local_seq2seq import NLLBTranslationBackend
  
          return NLLBTranslationBackend(
              name=name,
              model_id=str(cfg["model_id"]).strip(),
              model_dir=str(cfg["model_dir"]).strip(),
              device=str(cfg["device"]).strip(),
              torch_dtype=str(cfg["torch_dtype"]).strip(),
              batch_size=int(cfg["batch_size"]),
              max_input_length=int(cfg["max_input_length"]),
              max_new_tokens=int(cfg["max_new_tokens"]),
              num_beams=int(cfg["num_beams"]),
3eff49b7   tangwang   trans nllb-200-di...
108
              attn_implementation=cfg.get("attn_implementation"),
0fd2f875   tangwang   translate
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
          )
  
      def _create_local_marian_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
          from translation.backends.local_seq2seq import MarianMTTranslationBackend, get_marian_language_direction
  
          source_lang, target_lang = get_marian_language_direction(name)
  
          return MarianMTTranslationBackend(
              name=name,
              model_id=str(cfg["model_id"]).strip(),
              model_dir=str(cfg["model_dir"]).strip(),
              device=str(cfg["device"]).strip(),
              torch_dtype=str(cfg["torch_dtype"]).strip(),
              batch_size=int(cfg["batch_size"]),
              max_input_length=int(cfg["max_input_length"]),
              max_new_tokens=int(cfg["max_new_tokens"]),
              num_beams=int(cfg["num_beams"]),
              source_langs=[source_lang],
              target_langs=[target_lang],
3eff49b7   tangwang   trans nllb-200-di...
128
              attn_implementation=cfg.get("attn_implementation"),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
129
130
131
132
          )
  
      @property
      def available_models(self) -> List[str]:
0fd2f875   tangwang   translate
133
134
135
136
          return list(self._enabled_capabilities.keys())
  
      @property
      def loaded_models(self) -> List[str]:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
137
138
139
          return list(self._backends.keys())
  
      def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
0fd2f875   tangwang   translate
140
141
142
          normalized = normalize_translation_model(self.config, model)
          capability_cfg = self._enabled_capabilities.get(normalized)
          if capability_cfg is None:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
143
144
145
146
              raise ValueError(
                  f"Translation model '{normalized}' is not enabled. "
                  f"Available models: {', '.join(self.available_models) or 'none'}"
              )
0fd2f875   tangwang   translate
147
148
149
150
151
152
153
154
155
156
157
158
159
160
          backend = self._backends.get(normalized)
          if backend is not None:
              return backend
          with self._backend_lock:
              backend = self._backends.get(normalized)
              if backend is None:
                  backend_type = str(capability_cfg["backend"])
                  logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type)
                  backend = self._create_backend(
                      name=normalized,
                      backend_type=backend_type,
                      cfg=capability_cfg,
                  )
                  self._backends[normalized] = backend
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
161
162
163
164
165
166
167
168
169
170
          return backend
  
      def translate(
          self,
          text: TranslateInput,
          target_lang: str,
          source_lang: Optional[str] = None,
          *,
          model: Optional[str] = None,
          scene: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
171
172
      ) -> TranslateOutput:
          backend = self.get_backend(model)
0fd2f875   tangwang   translate
173
          active_scene = normalize_translation_scene(self.config, scene)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
174
175
176
177
          return backend.translate(
              text=text,
              target_lang=target_lang,
              source_lang=source_lang,
0fd2f875   tangwang   translate
178
              scene=active_scene,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
179
          )