Blame view

translation/backends/qwen_mt.py 5.27 KB
cd4ce66d   tangwang   trans logs
1
  """Qwen-MT translation backend."""
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
2
3
4
  
  from __future__ import annotations
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
5
  import logging
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
6
7
8
9
  import re
  import time
  from typing import List, Optional, Sequence, Union
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
10
11
  from openai import OpenAI
  
0fd2f875   tangwang   translate
12
  from translation.languages import QWEN_LANGUAGE_CODES
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
13
14
15
16
17
  
  logger = logging.getLogger(__name__)
  
  
  class QwenMTTranslationBackend:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
18
19
      def __init__(
          self,
0fd2f875   tangwang   translate
20
21
22
          capability_name: str,
          model: str,
          base_url: str,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
23
          api_key: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
24
25
          timeout: int = 10,
          glossary_id: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
26
      ):
0fd2f875   tangwang   translate
27
28
29
30
          self.capability_name = capability_name
          self.model = self._normalize_capability_name(capability_name)
          self.qwen_model_name = self._normalize_model_name(model)
          self.base_url = base_url
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
31
          self.timeout = int(timeout)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
32
          self.glossary_id = glossary_id
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
33
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
34
35
          self._api_key = api_key or self._default_api_key(self.model)
          self._qwen_client: Optional[OpenAI] = None
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
36
37
          if self._api_key:
              try:
0fd2f875   tangwang   translate
38
                  self._qwen_client = OpenAI(api_key=self._api_key, base_url=self.base_url)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
39
40
41
42
43
              except Exception as exc:
                  logger.warning("Failed to initialize qwen-mt client: %s", exc, exc_info=True)
          else:
              logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable")
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
44
45
46
47
48
      @property
      def supports_batch(self) -> bool:
          return True
  
      @staticmethod
0fd2f875   tangwang   translate
49
50
51
52
53
      def _normalize_capability_name(name: str) -> str:
          normalized = str(name or "").strip().lower()
          if normalized != "qwen-mt":
              raise ValueError(f"Qwen-MT backend capability must be 'qwen-mt', got '{name}'")
          return normalized
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
54
55
  
      @staticmethod
0fd2f875   tangwang   translate
56
57
58
59
60
      def _normalize_model_name(model: str) -> str:
          normalized = str(model or "").strip()
          if not normalized:
              raise ValueError("qwen-mt backend model is required")
          return normalized
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
61
62
63
64
  
      @staticmethod
      def _default_api_key(model: str) -> Optional[str]:
          del model
86d8358b   tangwang   config optimize
65
          return None
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
66
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
67
68
69
70
71
      def translate(
          self,
          text: Union[str, Sequence[str]],
          target_lang: str,
          source_lang: Optional[str] = None,
0fd2f875   tangwang   translate
72
          scene: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
73
74
75
76
77
78
79
80
81
82
83
      ) -> Union[Optional[str], List[Optional[str]]]:
          if isinstance(text, (list, tuple)):
              results: List[Optional[str]] = []
              for item in text:
                  if item is None or not str(item).strip():
                      results.append(item)  # type: ignore[arg-type]
                      continue
                  out = self.translate(
                      text=str(item),
                      target_lang=target_lang,
                      source_lang=source_lang,
0fd2f875   tangwang   translate
84
                      scene=scene,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
85
86
87
88
89
90
91
92
93
94
95
96
97
98
                  )
                  results.append(out)
              return results
  
          if not text or not str(text).strip():
              return text  # type: ignore[return-value]
  
          tgt = (target_lang or "").strip().lower()
          src = (source_lang or "").strip().lower() or None
          if tgt == "en" and self._is_english_text(text):
              return text
          if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)):
              return text
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
99
          result = self._translate_qwen(text, tgt, src)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
100
101
102
103
104
105
106
107
108
109
110
111
          return result
  
      def _translate_qwen(
          self,
          text: str,
          target_lang: str,
          source_lang: Optional[str],
      ) -> Optional[str]:
          if not self._qwen_client:
              return None
          tgt_norm = (target_lang or "").strip().lower()
          src_norm = (source_lang or "").strip().lower()
0fd2f875   tangwang   translate
112
113
          tgt_qwen = QWEN_LANGUAGE_CODES.get(tgt_norm, tgt_norm.capitalize())
          src_qwen = "auto" if not src_norm or src_norm == "auto" else QWEN_LANGUAGE_CODES.get(src_norm, src_norm.capitalize())
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
          start = time.time()
          try:
              completion = self._qwen_client.chat.completions.create(
                  model=self.qwen_model_name,
                  messages=[{"role": "user", "content": text}],
                  extra_body={
                      "translation_options": {
                          "source_lang": src_qwen,
                          "target_lang": tgt_qwen,
                      }
                  },
                  timeout=self.timeout,
              )
              content = (completion.choices[0].message.content or "").strip()
              if not content:
                  return None
              logger.info("[qwen-mt] Success | src=%s tgt=%s latency=%.1fms", src_qwen, tgt_qwen, (time.time() - start) * 1000)
              return content
          except Exception as exc:
              logger.warning(
                  "[qwen-mt] Failed | src=%s tgt=%s latency=%.1fms error=%s",
                  src_qwen,
                  tgt_qwen,
                  (time.time() - start) * 1000,
                  exc,
                  exc_info=True,
              )
              return None
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
143
144
145
146
147
148
149
150
151
152
153
154
      @staticmethod
      def _contains_chinese(text: str) -> bool:
          return bool(re.search(r"[\u4e00-\u9fff]", text or ""))
  
      @staticmethod
      def _is_english_text(text: str) -> bool:
          stripped = (text or "").strip()
          return bool(stripped) and bool(re.fullmatch(r"[A-Za-z0-9\s\W]+", stripped)) and not QwenMTTranslationBackend._contains_chinese(stripped)
  
      @staticmethod
      def _is_pure_number(text: str) -> bool:
          return bool(re.fullmatch(r"[\d.\-+%/,: ]+", (text or "").strip()))