Blame view

translation/backends/qwen_mt.py 5.37 KB
cd4ce66d   tangwang   trans logs
1
  """Qwen-MT translation backend."""
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
2
3
4
  
  from __future__ import annotations
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
5
6
7
8
9
10
  import logging
  import os
  import re
  import time
  from typing import List, Optional, Sequence, Union
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
11
12
  from openai import OpenAI
  
cd4ce66d   tangwang   trans logs
13
  from config.env_config import DASHSCOPE_API_KEY
0fd2f875   tangwang   translate
14
  from translation.languages import QWEN_LANGUAGE_CODES
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
15
16
17
18
19
  
  logger = logging.getLogger(__name__)
  
  
  class QwenMTTranslationBackend:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
20
21
      def __init__(
          self,
0fd2f875   tangwang   translate
22
23
24
          capability_name: str,
          model: str,
          base_url: str,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
25
          api_key: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
26
27
          timeout: int = 10,
          glossary_id: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
28
      ):
0fd2f875   tangwang   translate
29
30
31
32
          self.capability_name = capability_name
          self.model = self._normalize_capability_name(capability_name)
          self.qwen_model_name = self._normalize_model_name(model)
          self.base_url = base_url
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
33
          self.timeout = int(timeout)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
34
          self.glossary_id = glossary_id
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
35
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
36
37
          self._api_key = api_key or self._default_api_key(self.model)
          self._qwen_client: Optional[OpenAI] = None
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
38
39
          if self._api_key:
              try:
0fd2f875   tangwang   translate
40
                  self._qwen_client = OpenAI(api_key=self._api_key, base_url=self.base_url)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
41
42
43
44
45
              except Exception as exc:
                  logger.warning("Failed to initialize qwen-mt client: %s", exc, exc_info=True)
          else:
              logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable")
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
46
47
48
49
50
      @property
      def supports_batch(self) -> bool:
          return True
  
      @staticmethod
0fd2f875   tangwang   translate
51
52
53
54
55
      def _normalize_capability_name(name: str) -> str:
          normalized = str(name or "").strip().lower()
          if normalized != "qwen-mt":
              raise ValueError(f"Qwen-MT backend capability must be 'qwen-mt', got '{name}'")
          return normalized
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
56
57
  
      @staticmethod
0fd2f875   tangwang   translate
58
59
60
61
62
      def _normalize_model_name(model: str) -> str:
          normalized = str(model or "").strip()
          if not normalized:
              raise ValueError("qwen-mt backend model is required")
          return normalized
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
63
64
65
66
67
68
  
      @staticmethod
      def _default_api_key(model: str) -> Optional[str]:
          del model
          return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
69
70
71
72
73
      def translate(
          self,
          text: Union[str, Sequence[str]],
          target_lang: str,
          source_lang: Optional[str] = None,
0fd2f875   tangwang   translate
74
          scene: Optional[str] = None,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
75
76
77
78
79
80
81
82
83
84
85
      ) -> Union[Optional[str], List[Optional[str]]]:
          if isinstance(text, (list, tuple)):
              results: List[Optional[str]] = []
              for item in text:
                  if item is None or not str(item).strip():
                      results.append(item)  # type: ignore[arg-type]
                      continue
                  out = self.translate(
                      text=str(item),
                      target_lang=target_lang,
                      source_lang=source_lang,
0fd2f875   tangwang   translate
86
                      scene=scene,
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
87
88
89
90
91
92
93
94
95
96
97
98
99
100
                  )
                  results.append(out)
              return results
  
          if not text or not str(text).strip():
              return text  # type: ignore[return-value]
  
          tgt = (target_lang or "").strip().lower()
          src = (source_lang or "").strip().lower() or None
          if tgt == "en" and self._is_english_text(text):
              return text
          if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)):
              return text
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
101
          result = self._translate_qwen(text, tgt, src)
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
102
103
104
105
106
107
108
109
110
111
112
113
          return result
  
      def _translate_qwen(
          self,
          text: str,
          target_lang: str,
          source_lang: Optional[str],
      ) -> Optional[str]:
          if not self._qwen_client:
              return None
          tgt_norm = (target_lang or "").strip().lower()
          src_norm = (source_lang or "").strip().lower()
0fd2f875   tangwang   translate
114
115
          tgt_qwen = QWEN_LANGUAGE_CODES.get(tgt_norm, tgt_norm.capitalize())
          src_qwen = "auto" if not src_norm or src_norm == "auto" else QWEN_LANGUAGE_CODES.get(src_norm, src_norm.capitalize())
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
          start = time.time()
          try:
              completion = self._qwen_client.chat.completions.create(
                  model=self.qwen_model_name,
                  messages=[{"role": "user", "content": text}],
                  extra_body={
                      "translation_options": {
                          "source_lang": src_qwen,
                          "target_lang": tgt_qwen,
                      }
                  },
                  timeout=self.timeout,
              )
              content = (completion.choices[0].message.content or "").strip()
              if not content:
                  return None
              logger.info("[qwen-mt] Success | src=%s tgt=%s latency=%.1fms", src_qwen, tgt_qwen, (time.time() - start) * 1000)
              return content
          except Exception as exc:
              logger.warning(
                  "[qwen-mt] Failed | src=%s tgt=%s latency=%.1fms error=%s",
                  src_qwen,
                  tgt_qwen,
                  (time.time() - start) * 1000,
                  exc,
                  exc_info=True,
              )
              return None
  
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
145
146
147
148
149
150
151
152
153
154
155
156
      @staticmethod
      def _contains_chinese(text: str) -> bool:
          return bool(re.search(r"[\u4e00-\u9fff]", text or ""))
  
      @staticmethod
      def _is_english_text(text: str) -> bool:
          stripped = (text or "").strip()
          return bool(stripped) and bool(re.fullmatch(r"[A-Za-z0-9\s\W]+", stripped)) and not QwenMTTranslationBackend._contains_chinese(stripped)
  
      @staticmethod
      def _is_pure_number(text: str) -> bool:
          return bool(re.fullmatch(r"[\d.\-+%/,: ]+", (text or "").strip()))