Blame view

translation/service.py 6.92 KB
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
1
2
3
4
5
  """Translation service orchestration."""
  
  from __future__ import annotations
  
  import logging
0fd2f875   tangwang   translate
6
  import threading
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
7
8
  from typing import Dict, List, Optional
  
0fd2f875   tangwang   translate
9
  from config.services_config import get_translation_config
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
10
  from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol
0fd2f875   tangwang   translate
11
12
13
14
15
16
17
  from translation.settings import (
      TranslationConfig,
      get_enabled_translation_models,
      get_translation_capability,
      normalize_translation_model,
      normalize_translation_scene,
  )
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
18
19
20
21
22
23
24
  
  logger = logging.getLogger(__name__)
  
  
  class TranslationService:
      """Owns translation backends and routes calls by model and scene."""
  
0fd2f875   tangwang   translate
25
      def __init__(self, config: Optional[TranslationConfig] = None) -> None:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
26
          self.config = config or get_translation_config()
0fd2f875   tangwang   translate
27
          self._enabled_capabilities = self._collect_enabled_capabilities()
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
28
          self._backends: Dict[str, TranslationBackendProtocol] = {}
0fd2f875   tangwang   translate
29
30
31
          self._backend_lock = threading.Lock()
          if not self._enabled_capabilities:
              raise ValueError("No enabled translation backends found in services.translation.capabilities")
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
32
  
0fd2f875   tangwang   translate
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
      def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
          enabled: Dict[str, Dict[str, object]] = {}
          for name in get_enabled_translation_models(self.config):
              capability = get_translation_capability(self.config, name, require_enabled=True)
              backend_type = capability.get("backend")
              if not backend_type:
                  raise ValueError(f"Translation capability '{name}' must define a backend")
              enabled[name] = capability
          return enabled
  
      def _create_backend(
          self,
          *,
          name: str,
          backend_type: str,
          cfg: Dict[str, object],
      ) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
50
          registry = {
0fd2f875   tangwang   translate
51
              "qwen_mt": self._create_qwen_mt_backend,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
52
53
              "deepl": self._create_deepl_backend,
              "llm": self._create_llm_backend,
0fd2f875   tangwang   translate
54
55
              "local_nllb": self._create_local_nllb_backend,
              "local_marian": self._create_local_marian_backend,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
56
          }
0fd2f875   tangwang   translate
57
58
59
60
          factory = registry.get(backend_type)
          if factory is None:
              raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
          return factory(name=name, cfg=cfg)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
61
  
0fd2f875   tangwang   translate
62
      def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
63
64
          from translation.backends.qwen_mt import QwenMTTranslationBackend
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
65
          return QwenMTTranslationBackend(
0fd2f875   tangwang   translate
66
67
68
              capability_name=name,
              model=str(cfg["model"]).strip(),
              base_url=str(cfg["base_url"]).strip(),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
69
              api_key=cfg.get("api_key"),
0fd2f875   tangwang   translate
70
71
              use_cache=bool(cfg["use_cache"]),
              timeout=int(cfg["timeout_sec"]),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
72
              glossary_id=cfg.get("glossary_id"),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
73
74
          )
  
0fd2f875   tangwang   translate
75
      def _create_deepl_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
76
77
          from translation.backends.deepl import DeepLTranslationBackend
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
78
79
          return DeepLTranslationBackend(
              api_key=cfg.get("api_key"),
0fd2f875   tangwang   translate
80
81
              api_url=str(cfg["api_url"]).strip(),
              timeout=float(cfg["timeout_sec"]),
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
82
83
84
              glossary_id=cfg.get("glossary_id"),
          )
  
0fd2f875   tangwang   translate
85
      def _create_llm_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
86
87
          from translation.backends.llm import LLMTranslationBackend
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
88
          return LLMTranslationBackend(
0fd2f875   tangwang   translate
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
              capability_name=name,
              model=str(cfg["model"]).strip(),
              timeout_sec=float(cfg["timeout_sec"]),
              base_url=str(cfg["base_url"]).strip(),
          )
  
      def _create_local_nllb_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
          from translation.backends.local_seq2seq import NLLBTranslationBackend
  
          return NLLBTranslationBackend(
              name=name,
              model_id=str(cfg["model_id"]).strip(),
              model_dir=str(cfg["model_dir"]).strip(),
              device=str(cfg["device"]).strip(),
              torch_dtype=str(cfg["torch_dtype"]).strip(),
              batch_size=int(cfg["batch_size"]),
              max_input_length=int(cfg["max_input_length"]),
              max_new_tokens=int(cfg["max_new_tokens"]),
              num_beams=int(cfg["num_beams"]),
          )
  
      def _create_local_marian_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
          from translation.backends.local_seq2seq import MarianMTTranslationBackend, get_marian_language_direction
  
          source_lang, target_lang = get_marian_language_direction(name)
  
          return MarianMTTranslationBackend(
              name=name,
              model_id=str(cfg["model_id"]).strip(),
              model_dir=str(cfg["model_dir"]).strip(),
              device=str(cfg["device"]).strip(),
              torch_dtype=str(cfg["torch_dtype"]).strip(),
              batch_size=int(cfg["batch_size"]),
              max_input_length=int(cfg["max_input_length"]),
              max_new_tokens=int(cfg["max_new_tokens"]),
              num_beams=int(cfg["num_beams"]),
              source_langs=[source_lang],
              target_langs=[target_lang],
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
127
128
129
130
          )
  
      @property
      def available_models(self) -> List[str]:
0fd2f875   tangwang   translate
131
132
133
134
          return list(self._enabled_capabilities.keys())
  
      @property
      def loaded_models(self) -> List[str]:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
135
136
137
          return list(self._backends.keys())
  
      def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
0fd2f875   tangwang   translate
138
139
140
          normalized = normalize_translation_model(self.config, model)
          capability_cfg = self._enabled_capabilities.get(normalized)
          if capability_cfg is None:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
141
142
143
144
              raise ValueError(
                  f"Translation model '{normalized}' is not enabled. "
                  f"Available models: {', '.join(self.available_models) or 'none'}"
              )
0fd2f875   tangwang   translate
145
146
147
148
149
150
151
152
153
154
155
156
157
158
          backend = self._backends.get(normalized)
          if backend is not None:
              return backend
          with self._backend_lock:
              backend = self._backends.get(normalized)
              if backend is None:
                  backend_type = str(capability_cfg["backend"])
                  logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type)
                  backend = self._create_backend(
                      name=normalized,
                      backend_type=backend_type,
                      cfg=capability_cfg,
                  )
                  self._backends[normalized] = backend
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
159
160
161
162
163
164
165
166
167
168
          return backend
  
      def translate(
          self,
          text: TranslateInput,
          target_lang: str,
          source_lang: Optional[str] = None,
          *,
          model: Optional[str] = None,
          scene: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
169
170
      ) -> TranslateOutput:
          backend = self.get_backend(model)
0fd2f875   tangwang   translate
171
          active_scene = normalize_translation_scene(self.config, scene)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
172
173
174
175
          return backend.translate(
              text=text,
              target_lang=target_lang,
              source_lang=source_lang,
0fd2f875   tangwang   translate
176
              scene=active_scene,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
177
          )