Commit 3cd09b3bf456bd8cefb7e62750c215f0137ef974

Authored by tangwang
1 parent 001b4889

翻译接口改为调用qwen-mt-flash

文档: 翻译模块说明.md
... ... @@ -31,3 +31,7 @@ CACHE_DIR=.cache
31 31  
32 32 # Frontend API Base URL
33 33 API_BASE_URL=http://120.76.41.98:6002
  34 +
  35 +
  36 +DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b
  37 +OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0
... ...
api/routes/search.py
... ... @@ -117,7 +117,7 @@ async def search(request: SearchRequest, http_request: Request):
117 117 performance_summary = context.get_summary() if context else None
118 118  
119 119 # Convert to response model
120   - return SearchResponse(
  120 + response = SearchResponse(
121 121 results=result.results,
122 122 total=result.total,
123 123 max_score=result.max_score,
... ... @@ -130,6 +130,18 @@ async def search(request: SearchRequest, http_request: Request):
130 130 debug_info=result.debug_info
131 131 )
132 132  
  133 + # Log complete response JSON
  134 + context.logger.info(
  135 + "Search response | "
  136 + f"Total results: {response.total} | "
  137 + f"Max score: {response.max_score:.4f} | "
  138 + f"Time: {response.took_ms}ms | "
  139 + f"Response: {response.model_dump_json()}",
  140 + extra={'reqid': context.reqid, 'uid': context.uid}
  141 + )
  142 +
  143 + return response
  144 +
133 145 except Exception as e:
134 146 # Log error in context
135 147 if context:
... ... @@ -203,7 +215,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
203 215 # Include performance summary in response
204 216 performance_summary = context.get_summary() if context else None
205 217  
206   - return SearchResponse(
  218 + response = SearchResponse(
207 219 results=result.results,
208 220 total=result.total,
209 221 max_score=result.max_score,
... ... @@ -215,6 +227,18 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
215 227 performance_info=performance_summary
216 228 )
217 229  
  230 + # Log complete response JSON
  231 + context.logger.info(
  232 + "Image search response | "
  233 + f"Total results: {response.total} | "
  234 + f"Max score: {response.max_score:.4f} | "
  235 + f"Time: {response.took_ms}ms | "
  236 + f"Response: {response.model_dump_json()}",
  237 + extra={'reqid': context.reqid, 'uid': context.uid}
  238 + )
  239 +
  240 + return response
  241 +
218 242 except ValueError as e:
219 243 if context:
220 244 context.set_error(e)
... ...
api/translator_app.py
... ... @@ -11,7 +11,7 @@ uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
11 11 使用说明:
12 12 Translation HTTP Service
13 13  
14   -This service provides a RESTful API for text translation using DeepL API.
  14 +This service provides a RESTful API for text translation using Qwen (default) or DeepL API.
15 15 The service runs on port 6006 and provides a simple translation endpoint.
16 16  
17 17 API Endpoint:
... ... @@ -21,7 +21,8 @@ Request Body (JSON):
21 21 {
22 22 "text": "要翻译的文本",
23 23 "target_lang": "en", # Required: target language code (zh, en, ru, etc.)
24   - "source_lang": "zh" # Optional: source language code (auto-detect if not provided)
  24 + "source_lang": "zh", # Optional: source language code (auto-detect if not provided)
  25 + "model": "qwen" # Optional: translation model ("qwen" or "deepl", default: "qwen")
25 26 }
26 27  
27 28 Response (JSON):
... ... @@ -52,7 +53,17 @@ Usage Examples:
52 53 "target_lang": "zh"
53 54 }'
54 55  
55   -3. Translate Russian to English:
  56 +3. Translate using DeepL model:
  57 + curl -X POST http://localhost:6006/translate \
  58 + -H "Content-Type: application/json" \
  59 + -d '{
  60 + "text": "商品名称",
  61 + "target_lang": "en",
  62 + "source_lang": "zh",
  63 + "model": "deepl"
  64 + }'
  65 +
  66 +4. Translate Russian to English:
56 67 curl -X POST http://localhost:6006/translate \
57 68 -H "Content-Type: application/json" \
58 69 -d '{
... ... @@ -77,7 +88,7 @@ import sys
77 88 import logging
78 89 import argparse
79 90 import uvicorn
80   -from typing import Optional
  91 +from typing import Optional, Dict
81 92 from fastapi import FastAPI, HTTPException
82 93 from fastapi.responses import JSONResponse
83 94 from fastapi.middleware.cors import CORSMiddleware
... ... @@ -87,7 +98,7 @@ from pydantic import BaseModel, Field
87 98 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
88 99  
89 100 from query.translator import Translator
90   -from config.env_config import DEEPL_AUTH_KEY, REDIS_CONFIG
  101 +from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG
91 102  
92 103 # Configure logging
93 104 logging.basicConfig(
... ... @@ -99,22 +110,22 @@ logger = logging.getLogger(__name__)
99 110 # Fixed translation prompt
100 111 TRANSLATION_PROMPT = "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
101 112  
102   -# Global translator instance
103   -_translator: Optional[Translator] = None
  113 +# Global translator instances cache (keyed by model)
  114 +_translators: Dict[str, Translator] = {}
104 115  
105 116  
106   -def init_translator():
107   - """Initialize translator instance."""
108   - global _translator
109   - if _translator is None:
110   - logger.info("Initializing translator...")
111   - _translator = Translator(
112   - api_key=DEEPL_AUTH_KEY,
  117 +def get_translator(model: str = "qwen") -> Translator:
  118 + """Get or create translator instance for the specified model."""
  119 + global _translators
  120 + if model not in _translators:
  121 + logger.info(f"Initializing translator with model: {model}...")
  122 + _translators[model] = Translator(
  123 + model=model,
113 124 use_cache=True,
114 125 timeout=10
115 126 )
116   - logger.info("Translator initialized")
117   - return _translator
  127 + logger.info(f"Translator initialized with model: {model}")
  128 + return _translators[model]
118 129  
119 130  
120 131 # Request/Response models
... ... @@ -123,13 +134,15 @@ class TranslationRequest(BaseModel):
123 134 text: str = Field(..., description="Text to translate")
124 135 target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)")
125 136 source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)")
  137 + model: Optional[str] = Field("qwen", description="Translation model: 'qwen' (default) or 'deepl'")
126 138  
127 139 class Config:
128 140 json_schema_extra = {
129 141 "example": {
130 142 "text": "商品名称",
131 143 "target_lang": "en",
132   - "source_lang": "zh"
  144 + "source_lang": "zh",
  145 + "model": "qwen"
133 146 }
134 147 }
135 148  
... ... @@ -141,12 +154,13 @@ class TranslationResponse(BaseModel):
141 154 source_lang: Optional[str] = Field(None, description="Source language code (detected or provided)")
142 155 translated_text: str = Field(..., description="Translated text")
143 156 status: str = Field(..., description="Translation status")
  157 + model: str = Field(..., description="Translation model used")
144 158  
145 159  
146 160 # Create FastAPI app
147 161 app = FastAPI(
148 162 title="Translation Service API",
149   - description="RESTful API for text translation using DeepL",
  163 + description="RESTful API for text translation using Qwen (default) or DeepL",
150 164 version="1.0.0",
151 165 docs_url="/docs",
152 166 redoc_url="/redoc"
... ... @@ -166,9 +180,11 @@ app.add_middleware(
166 180 async def startup_event():
167 181 """Initialize translator on startup."""
168 182 logger.info("Starting Translation Service API on port 6006")
  183 + # Get default model from environment variable or use 'qwen'
  184 + default_model = os.getenv("TRANSLATION_MODEL", "qwen")
169 185 try:
170   - init_translator()
171   - logger.info("Translation service ready")
  186 + get_translator(model=default_model)
  187 + logger.info(f"Translation service ready with default model: {default_model}")
172 188 except Exception as e:
173 189 logger.error(f"Failed to initialize translator: {e}", exc_info=True)
174 190 logger.warning("Service will start but translation may not work correctly")
... ... @@ -178,10 +194,13 @@ async def startup_event():
178 194 async def health_check():
179 195 """Health check endpoint."""
180 196 try:
181   - translator = init_translator()
  197 + default_model = os.getenv("TRANSLATION_MODEL", "qwen")
  198 + translator = get_translator(model=default_model)
182 199 return {
183 200 "status": "healthy",
184 201 "service": "translation",
  202 + "default_model": default_model,
  203 + "available_models": list(_translators.keys()),
185 204 "translator_initialized": translator is not None,
186 205 "cache_enabled": translator.use_cache if translator else False
187 206 }
... ... @@ -203,6 +222,8 @@ async def translate(request: TranslationRequest):
203 222  
204 223 Uses a fixed prompt optimized for product SKU name translation.
205 224 The translation is cached in Redis for performance.
  225 +
  226 + Supports both Qwen (default) and DeepL models via the 'model' parameter.
206 227 """
207 228 if not request.text or not request.text.strip():
208 229 raise HTTPException(
... ... @@ -216,8 +237,17 @@ async def translate(request: TranslationRequest):
216 237 detail="target_lang is required"
217 238 )
218 239  
  240 + # Validate model parameter
  241 + model = request.model.lower() if request.model else "qwen"
  242 + if model not in ['qwen', 'deepl']:
  243 + raise HTTPException(
  244 + status_code=400,
  245 + detail=f"Invalid model: {model}. Supported models: 'qwen', 'deepl'"
  246 + )
  247 +
219 248 try:
220   - translator = init_translator()
  249 + # Get translator instance for the specified model
  250 + translator = get_translator(model=model)
221 251  
222 252 # Translate using the fixed prompt
223 253 translated_text = translator.translate(
... ... @@ -238,7 +268,8 @@ async def translate(request: TranslationRequest):
238 268 target_lang=request.target_lang,
239 269 source_lang=request.source_lang,
240 270 translated_text=translated_text,
241   - status="success"
  271 + status="success",
  272 + model=translator.model
242 273 )
243 274  
244 275 except HTTPException:
... ...
config/env_config.py
... ... @@ -39,6 +39,9 @@ REDIS_CONFIG = {
39 39 # DeepL API Key
40 40 DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY')
41 41  
  42 +# DashScope API Key (for Qwen models)
  43 +DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY')
  44 +
42 45 # API Service Configuration
43 46 API_HOST = os.getenv('API_HOST', '0.0.0.0')
44 47 API_PORT = int(os.getenv('API_PORT', 6002))
... ...
docs/翻译模块说明.md 0 → 100644
... ... @@ -0,0 +1,256 @@
  1 +# 翻译模块说明(Qwen / DeepL)
  2 +
  3 +本文档汇总翻译模块的**接口使用说明**与**Python 模块用法**,对应代码:
  4 +
  5 +- HTTP 服务:`api/translator_app.py`
  6 +- Python 模块:`query/translator.py`
  7 +
  8 +---
  9 +
  10 +## 1. 功能概述
  11 +
  12 +当前翻译模块支持两种后端:
  13 +
  14 +- **Qwen(默认)**:通过阿里云百炼 DashScope 的 OpenAI 兼容接口调用 `qwen-mt-flash`
  15 +- **DeepL**:通过 DeepL API 调用翻译(保留原有能力)
  16 +
  17 +两种方式均支持:
  18 +
  19 +- **Redis 缓存**(如启用):同文案同目标语言命中缓存直接返回
  20 +- **`source_lang` 自动检测**:当 `source_lang` 为空或 `"auto"` 时启用自动检测(Qwen 使用 `"auto"`)
  21 +
  22 +---
  23 +
  24 +## 2. 环境变量与配置
  25 +
  26 +项目会在 `config/env_config.py` 中加载项目根目录的 `.env`,常用变量如下:
  27 +
  28 +```env
  29 +# Qwen / DashScope
  30 +DASHSCOPE_API_KEY=sk-xxx
  31 +
  32 +# DeepL
  33 +DEEPL_AUTH_KEY=xxx
  34 +
  35 +# 可选:翻译服务默认模型(HTTP 服务启动后若请求不传 model,则使用此默认值)
  36 +TRANSLATION_MODEL=qwen # 或 deepl
  37 +```
  38 +
  39 +说明:
  40 +
  41 +- **Qwen** 使用 `DASHSCOPE_API_KEY`
  42 +- **DeepL** 使用 `DEEPL_AUTH_KEY`
  43 +- `.env` 中的 `OPENAI_API_KEY` 不是本翻译模块必须项(当前实现用的是 `DASHSCOPE_API_KEY`)
  44 +
  45 +---
  46 +
  47 +## 3. HTTP 翻译服务(`api/translator_app.py`)
  48 +
  49 +### 3.1 启动命令
  50 +
  51 +推荐(热更新):
  52 +
  53 +```bash
  54 +cd /home/tw/SearchEngine
  55 +uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
  56 +```
  57 +
  58 +指定默认模型(不传请求 `model` 时生效):
  59 +
  60 +```bash
  61 +cd /home/tw/SearchEngine
  62 +export TRANSLATION_MODEL=qwen # 或 deepl
  63 +uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
  64 +```
  65 +
  66 +### 3.2 接口列表
  67 +
  68 +- **GET** `/health`:健康检查(返回默认模型、已初始化模型列表等)
  69 +- **POST** `/translate`:翻译文本
  70 +- **GET** `/docs`:Swagger UI
  71 +
  72 +### 3.3 `/translate` 请求参数
  73 +
  74 +请求体(JSON):
  75 +
  76 +```json
  77 +{
  78 + "text": "要翻译的文本",
  79 + "target_lang": "en",
  80 + "source_lang": "auto",
  81 + "model": "qwen"
  82 +}
  83 +```
  84 +
  85 +- **text**:必填,待翻译文本
  86 +- **target_lang**:必填,目标语言代码(见“语言支持”)
  87 +- **source_lang**:可选,源语言代码;不传或传 `"auto"` 时自动检测
  88 +- **model**:可选,`"qwen"` 或 `"deepl"`;默认 `"qwen"`
  89 +
  90 +### 3.4 `/translate` 返回参数
  91 +
  92 +响应体(JSON,成功时):
  93 +
  94 +```json
  95 +{
  96 + "text": "商品名称",
  97 + "target_lang": "en",
  98 + "source_lang": "zh",
  99 + "translated_text": "Product name",
  100 + "status": "success",
  101 + "model": "qwen"
  102 +}
  103 +```
  104 +
  105 +### 3.5 请求示例(curl)
  106 +
  107 +健康检查:
  108 +
  109 +```bash
  110 +curl http://localhost:6006/health
  111 +```
  112 +
  113 +默认(qwen)中文 → 英文:
  114 +
  115 +```bash
  116 +curl -X POST http://localhost:6006/translate \
  117 + -H "Content-Type: application/json" \
  118 + -d '{"text":"我看到这个视频后没有笑","target_lang":"en","source_lang":"auto"}'
  119 +```
  120 +
  121 +显式指定 qwen,英文 → 简体中文:
  122 +
  123 +```bash
  124 +curl -X POST http://localhost:6006/translate \
  125 + -H "Content-Type: application/json" \
  126 + -d '{"text":"Product name","target_lang":"zh","source_lang":"en","model":"qwen"}'
  127 +```
  128 +
  129 +繁体中文(`zh_tw`)测试:
  130 +
  131 +```bash
  132 +curl -X POST http://localhost:6006/translate \
  133 + -H "Content-Type: application/json" \
  134 + -d '{"text":"商品名稱","target_lang":"zh_tw","source_lang":"auto","model":"qwen"}'
  135 +```
  136 +
  137 +切换 DeepL:
  138 +
  139 +```bash
  140 +curl -X POST http://localhost:6006/translate \
  141 + -H "Content-Type: application/json" \
  142 + -d '{"text":"商品名称","target_lang":"en","source_lang":"zh","model":"deepl"}'
  143 +```
  144 +
  145 +### 3.6 关于提示词(Prompt)
  146 +
  147 +HTTP 服务内部使用了固定提示词 `TRANSLATION_PROMPT`(适用于“商品 SKU 英文名”场景),并通过 `prompt` 参数传入 `Translator.translate()`。
  148 +
  149 +- **DeepL**:`prompt` 会作为 DeepL 的 `context` 使用(影响翻译但不被翻译)
  150 +- **Qwen**:当前实现未将 `prompt/context` 传给 Qwen 的 `translation_options`(即对 Qwen 不生效)
  151 +
  152 +---
  153 +
  154 +## 4. Python 翻译模块(`query/translator.py`)
  155 +
  156 +### 4.1 基本用法
  157 +
  158 +```python
  159 +from query.translator import Translator
  160 +
  161 +# 默认使用 qwen
  162 +translator = Translator()
  163 +
  164 +result = translator.translate(
  165 + text="我看到这个视频后没有笑",
  166 + target_lang="en",
  167 + source_lang="auto",
  168 +)
  169 +print(result)
  170 +```
  171 +
  172 +显式选择模型:
  173 +
  174 +```python
  175 +translator_qwen = Translator(model="qwen")
  176 +translator_deepl = Translator(model="deepl")
  177 +```
  178 +
  179 +### 4.2 关键参数
  180 +
  181 +- `Translator(model="qwen" | "deepl")`:选择翻译模型,默认 `"qwen"`
  182 +- `translate(text, target_lang, source_lang=None, context=None, prompt=None)`:
  183 + - `target_lang` / `source_lang`:语言代码(见“语言支持”)
  184 + - `source_lang` 为空或 `"auto"`:自动检测
  185 + - `prompt`:
  186 + - DeepL:作为 `context` 使用
  187 + - Qwen:当前未使用
  188 +
  189 +### 4.3 缓存(Redis)
  190 +
  191 +`Translator(use_cache=True)` 时会连接 Redis 并缓存翻译结果。
  192 +
  193 +- Redis 连接配置来自 `config/env_config.py` 的 `REDIS_CONFIG`
  194 +- 缓存 key 前缀默认 `trans`(可用 `REDIS_TRANSLATION_CACHE_PREFIX` 覆盖)
  195 +
  196 +---
  197 +
  198 +## 5. Qwen 语言支持(按 qwen-mt-plus/flash/turbo 标准)
  199 +
  200 +> 以下为 Qwen 翻译模型支持的语言(**代码 → 英文名**),并已用于 `query/translator.py` 的映射。
  201 +
  202 +| 代码 | 英文名 |
  203 +|------|--------|
  204 +| en | English |
  205 +| zh | Chinese |
  206 +| zh_tw | Traditional Chinese |
  207 +| ru | Russian |
  208 +| ja | Japanese |
  209 +| ko | Korean |
  210 +| es | Spanish |
  211 +| fr | French |
  212 +| pt | Portuguese |
  213 +| de | German |
  214 +| it | Italian |
  215 +| th | Thai |
  216 +| vi | Vietnamese |
  217 +| id | Indonesian |
  218 +| ms | Malay |
  219 +| ar | Arabic |
  220 +| hi | Hindi |
  221 +| he | Hebrew |
  222 +| my | Burmese |
  223 +| ta | Tamil |
  224 +| ur | Urdu |
  225 +| bn | Bengali |
  226 +| pl | Polish |
  227 +| nl | Dutch |
  228 +| ro | Romanian |
  229 +| tr | Turkish |
  230 +| km | Khmer |
  231 +| lo | Lao |
  232 +| yue | Cantonese |
  233 +| cs | Czech |
  234 +| el | Greek |
  235 +| sv | Swedish |
  236 +| hu | Hungarian |
  237 +| da | Danish |
  238 +| fi | Finnish |
  239 +| uk | Ukrainian |
  240 +| bg | Bulgarian |
  241 +
  242 +---
  243 +
  244 +## 6. 常见问题(FAQ)
  245 +
  246 +### 6.1 Qwen 调用报错 / 无法初始化
  247 +
  248 +- 确认 `.env` 中已配置 `DASHSCOPE_API_KEY`
  249 +- 确认安装依赖:`openai`(Python 包)
  250 +- 如在海外地域使用模型,将 `base_url` 切换为 `https://dashscope-intl.aliyuncs.com/compatible-mode/v1`
  251 +
  252 +### 6.2 DeepL 返回 403 / 翻译失败
  253 +
  254 +- 确认 `.env` 中已配置 `DEEPL_AUTH_KEY`
  255 +- 若使用的是 Pro key,请使用 `https://api.deepl.com/v2/translate`(当前代码即为该地址)
  256 +
... ...
query/translator.py
1 1 """
2 2 Translation service for multi-language query support.
3 3  
4   -Supports DeepL API for high-quality translations.
  4 +Supports multiple translation models:
  5 +- Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model
  6 +- DeepL: DeepL API for high-quality translations
5 7  
  8 +使用方法 (Usage):
6 9  
7   -#### 官方文档:
8   -https://developers.deepl.com/api-reference/translate/request-translation
9   -#####
  10 +```python
  11 +from query.translator import Translator
  12 +
  13 +# 使用默认的 qwen 模型(推荐)
  14 +translator = Translator() # 默认使用 qwen 模型
  15 +
  16 +# 或显式指定模型
  17 +translator = Translator(model='qwen') # 使用 qwen 模型
  18 +translator = Translator(model='deepl') # 使用 DeepL 模型
  19 +
  20 +# 翻译文本
  21 +result = translator.translate(
  22 + text="我看到这个视频后没有笑",
  23 + target_lang="en",
  24 + source_lang="auto" # 自动检测源语言
  25 +)
  26 +```
10 27  
  28 +配置说明 (Configuration):
  29 +- Qwen 模型需要设置 DASHSCOPE_API_KEY 环境变量(在 .env 文件中)
  30 +- DeepL 模型需要设置 DEEPL_AUTH_KEY 环境变量(在 .env 文件中)
11 31  
  32 +Qwen 模型参考文档:
  33 +- 官方文档:https://help.aliyun.com/zh/model-studio/get-api-key
  34 +- 模型:qwen-mt-flash(快速翻译模型)
  35 +
  36 +DeepL 官方文档:
  37 +https://developers.deepl.com/api-reference/translate/request-translation
12 38 """
13 39  
  40 +import os
14 41 import requests
15 42 import re
16 43 import redis
... ... @@ -21,18 +48,21 @@ import logging
21 48  
22 49 logger = logging.getLogger(__name__)
23 50  
24   -# Try to import DEEPL_AUTH_KEY and REDIS_CONFIG, but allow import to fail
25   -try:
26   - from config.env_config import DEEPL_AUTH_KEY, REDIS_CONFIG
27   -except ImportError:
28   - DEEPL_AUTH_KEY = None
29   - REDIS_CONFIG = {}
  51 +from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG
  52 +from openai import OpenAI
30 53  
31 54  
32 55 class Translator:
33   - """Multi-language translator using DeepL API."""
  56 + """
  57 + Multi-language translator supporting Qwen and DeepL APIs.
  58 +
  59 + Default model is 'qwen' which uses Alibaba Cloud DashScope API.
  60 + """
34 61  
35 62 DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier
  63 + QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" # 北京地域
  64 + # 如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/compatible-mode/v1
  65 + QWEN_MODEL = "qwen-mt-flash" # 快速翻译模型
36 66  
37 67 # Language code mapping
38 68 LANG_CODE_MAP = {
... ... @@ -50,6 +80,7 @@ class Translator:
50 80  
51 81 def __init__(
52 82 self,
  83 + model: str = "qwen",
53 84 api_key: Optional[str] = None,
54 85 use_cache: bool = True,
55 86 timeout: int = 10,
... ... @@ -60,21 +91,40 @@ class Translator:
60 91 Initialize translator.
61 92  
62 93 Args:
63   - api_key: DeepL API key (or None to use from config/env)
  94 + model: Translation model to use. Options: 'qwen' (default) or 'deepl'
  95 + api_key: API key for the selected model (or None to use from config/env)
64 96 use_cache: Whether to cache translations
65 97 timeout: Request timeout in seconds
66   - glossary_id: DeepL glossary ID for custom terminology (optional)
  98 + glossary_id: DeepL glossary ID for custom terminology (optional, only for DeepL)
67 99 translation_context: Context hint for translation (e.g., "e-commerce", "product search")
68 100 """
  101 + self.model = model.lower()
  102 + if self.model not in ['qwen', 'deepl']:
  103 + raise ValueError(f"Unsupported model: {model}. Supported models: 'qwen', 'deepl'")
  104 +
69 105 # Get API key from config if not provided
70   - if api_key is None and DEEPL_AUTH_KEY:
71   - api_key = DEEPL_AUTH_KEY
  106 + if api_key is None:
  107 + if self.model == 'qwen':
  108 + api_key = DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
  109 + else: # deepl
  110 + api_key = DEEPL_AUTH_KEY or os.getenv("DEEPL_AUTH_KEY")
72 111  
73 112 self.api_key = api_key
74 113 self.timeout = timeout
75 114 self.use_cache = use_cache
76 115 self.glossary_id = glossary_id
77 116 self.translation_context = translation_context or "e-commerce product search"
  117 +
  118 + # Initialize OpenAI client for Qwen if needed
  119 + self.qwen_client = None
  120 + if self.model == 'qwen':
  121 + if not self.api_key:
  122 + logger.warning("DASHSCOPE_API_KEY not set. Qwen translation will not work.")
  123 + else:
  124 + self.qwen_client = OpenAI(
  125 + api_key=self.api_key,
  126 + base_url=self.QWEN_BASE_URL,
  127 + )
78 128  
79 129 # Initialize Redis cache if enabled
80 130 if use_cache:
... ... @@ -119,7 +169,7 @@ class Translator:
119 169 Args:
120 170 text: Text to translate
121 171 target_lang: Target language code ('zh', 'en', 'ru', etc.)
122   - source_lang: Source language code (optional, auto-detect if None)
  172 + source_lang: Source language code (option al, auto-detect if None)
123 173 context: Additional context for translation (overrides default context)
124 174 prompt: Translation prompt/instruction (optional, for better translation quality)
125 175  
... ... @@ -174,13 +224,17 @@ class Translator:
174 224 )
175 225 return text
176 226  
177   - # Translate using DeepL (Pro endpoint only, no free fallback)
  227 + # Translate using selected model
178 228 logger.info(
179   - f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | "
  229 + f"[Translator] Translation request | Model: {self.model} | Original text: '{text}' | Target language: {target_lang} | "
180 230 f"Source language: {source_lang or 'auto'} | Context: {translation_context} | "
181 231 f"Prompt: {'yes' if prompt else 'no'} | Status: Starting translation"
182 232 )
183   - result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt)
  233 +
  234 + if self.model == 'qwen':
  235 + result = self._translate_qwen(text, target_lang, source_lang, translation_context, prompt)
  236 + else: # deepl
  237 + result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt)
184 238  
185 239 # If still failed, return original text with warning
186 240 if result is None:
... ... @@ -201,6 +255,122 @@ class Translator:
201 255  
202 256 return result
203 257  
  258 + def _translate_qwen(
  259 + self,
  260 + text: str,
  261 + target_lang: str,
  262 + source_lang: Optional[str],
  263 + context: Optional[str] = None,
  264 + prompt: Optional[str] = None
  265 + ) -> Optional[str]:
  266 + """
  267 + Translate using Qwen MT Flash model via Alibaba Cloud DashScope API.
  268 +
  269 + Args:
  270 + text: Text to translate
  271 + target_lang: Target language code ('zh', 'en', 'ru', etc.)
  272 + source_lang: Source language code (optional, 'auto' if None)
  273 + context: Context hint for translation (optional)
  274 + prompt: Translation prompt/instruction (optional)
  275 +
  276 + Returns:
  277 + Translated text or None if translation fails
  278 + """
  279 + if not self.qwen_client:
  280 + logger.error("[Translator] Qwen client not initialized. Check DASHSCOPE_API_KEY.")
  281 + return None
  282 +
  283 + # Qwen (qwen-mt-plus/flash/turbo) supported languages mapping
  284 + # 标准来自:你提供的“语言 / 英文名 / 代码”表
  285 + qwen_lang_map = {
  286 + "en": "English",
  287 + "zh": "Chinese",
  288 + "zh_tw": "Traditional Chinese",
  289 + "ru": "Russian",
  290 + "ja": "Japanese",
  291 + "ko": "Korean",
  292 + "es": "Spanish",
  293 + "fr": "French",
  294 + "pt": "Portuguese",
  295 + "de": "German",
  296 + "it": "Italian",
  297 + "th": "Thai",
  298 + "vi": "Vietnamese",
  299 + "id": "Indonesian",
  300 + "ms": "Malay",
  301 + "ar": "Arabic",
  302 + "hi": "Hindi",
  303 + "he": "Hebrew",
  304 + "my": "Burmese",
  305 + "ta": "Tamil",
  306 + "ur": "Urdu",
  307 + "bn": "Bengali",
  308 + "pl": "Polish",
  309 + "nl": "Dutch",
  310 + "ro": "Romanian",
  311 + "tr": "Turkish",
  312 + "km": "Khmer",
  313 + "lo": "Lao",
  314 + "yue": "Cantonese",
  315 + "cs": "Czech",
  316 + "el": "Greek",
  317 + "sv": "Swedish",
  318 + "hu": "Hungarian",
  319 + "da": "Danish",
  320 + "fi": "Finnish",
  321 + "uk": "Ukrainian",
  322 + "bg": "Bulgarian",
  323 + }
  324 +
  325 + # Convert target language
  326 + target_lang_normalized = target_lang.lower()
  327 + target_lang_qwen = qwen_lang_map.get(target_lang_normalized, target_lang.capitalize())
  328 +
  329 + # Convert source language
  330 + source_lang_normalized = (source_lang or "").strip().lower()
  331 + if not source_lang_normalized or source_lang_normalized == "auto":
  332 + source_lang_qwen = "auto"
  333 + else:
  334 + source_lang_qwen = qwen_lang_map.get(source_lang_normalized, source_lang.capitalize())
  335 +
  336 + # Prepare translation options
  337 + translation_options = {
  338 + "source_lang": source_lang_qwen,
  339 + "target_lang": target_lang_qwen,
  340 + }
  341 +
  342 + # Prepare messages
  343 + messages = [
  344 + {
  345 + "role": "user",
  346 + "content": text
  347 + }
  348 + ]
  349 +
  350 + try:
  351 + completion = self.qwen_client.chat.completions.create(
  352 + model=self.QWEN_MODEL,
  353 + messages=messages,
  354 + extra_body={
  355 + "translation_options": translation_options
  356 + }
  357 + )
  358 +
  359 + translated_text = completion.choices[0].message.content.strip()
  360 +
  361 + logger.debug(
  362 + f"[Translator] Qwen API response success | Original text: '{text}' | Target language: {target_lang_qwen} | "
  363 + f"Translation result: '{translated_text}'"
  364 + )
  365 + return translated_text
  366 +
  367 + except Exception as e:
  368 + logger.error(
  369 + f"[Translator] Qwen API request exception | Original text: '{text}' | Target language: {target_lang_qwen} | "
  370 + f"Error: {e}", exc_info=True
  371 + )
  372 + return None
  373 +
204 374 def _translate_deepl(
205 375 self,
206 376 text: str,
... ...