Commit 3cd09b3bf456bd8cefb7e62750c215f0137ef974

Authored by tangwang
1 parent 001b4889

翻译接口改为调用qwen-mt-flash

文档: 翻译模块说明.md
@@ -31,3 +31,7 @@ CACHE_DIR=.cache @@ -31,3 +31,7 @@ CACHE_DIR=.cache
31 31
32 # Frontend API Base URL 32 # Frontend API Base URL
33 API_BASE_URL=http://120.76.41.98:6002 33 API_BASE_URL=http://120.76.41.98:6002
  34 +
  35 +
  36 +DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b
  37 +OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0
api/routes/search.py
@@ -117,7 +117,7 @@ async def search(request: SearchRequest, http_request: Request): @@ -117,7 +117,7 @@ async def search(request: SearchRequest, http_request: Request):
117 performance_summary = context.get_summary() if context else None 117 performance_summary = context.get_summary() if context else None
118 118
119 # Convert to response model 119 # Convert to response model
120 - return SearchResponse( 120 + response = SearchResponse(
121 results=result.results, 121 results=result.results,
122 total=result.total, 122 total=result.total,
123 max_score=result.max_score, 123 max_score=result.max_score,
@@ -130,6 +130,18 @@ async def search(request: SearchRequest, http_request: Request): @@ -130,6 +130,18 @@ async def search(request: SearchRequest, http_request: Request):
130 debug_info=result.debug_info 130 debug_info=result.debug_info
131 ) 131 )
132 132
  133 + # Log complete response JSON
  134 + context.logger.info(
  135 + "Search response | "
  136 + f"Total results: {response.total} | "
  137 + f"Max score: {response.max_score:.4f} | "
  138 + f"Time: {response.took_ms}ms | "
  139 + f"Response: {response.model_dump_json()}",
  140 + extra={'reqid': context.reqid, 'uid': context.uid}
  141 + )
  142 +
  143 + return response
  144 +
133 except Exception as e: 145 except Exception as e:
134 # Log error in context 146 # Log error in context
135 if context: 147 if context:
@@ -203,7 +215,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): @@ -203,7 +215,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
203 # Include performance summary in response 215 # Include performance summary in response
204 performance_summary = context.get_summary() if context else None 216 performance_summary = context.get_summary() if context else None
205 217
206 - return SearchResponse( 218 + response = SearchResponse(
207 results=result.results, 219 results=result.results,
208 total=result.total, 220 total=result.total,
209 max_score=result.max_score, 221 max_score=result.max_score,
@@ -215,6 +227,18 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): @@ -215,6 +227,18 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request):
215 performance_info=performance_summary 227 performance_info=performance_summary
216 ) 228 )
217 229
  230 + # Log complete response JSON
  231 + context.logger.info(
  232 + "Image search response | "
  233 + f"Total results: {response.total} | "
  234 + f"Max score: {response.max_score:.4f} | "
  235 + f"Time: {response.took_ms}ms | "
  236 + f"Response: {response.model_dump_json()}",
  237 + extra={'reqid': context.reqid, 'uid': context.uid}
  238 + )
  239 +
  240 + return response
  241 +
218 except ValueError as e: 242 except ValueError as e:
219 if context: 243 if context:
220 context.set_error(e) 244 context.set_error(e)
api/translator_app.py
@@ -11,7 +11,7 @@ uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload @@ -11,7 +11,7 @@ uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
11 使用说明: 11 使用说明:
12 Translation HTTP Service 12 Translation HTTP Service
13 13
14 -This service provides a RESTful API for text translation using DeepL API. 14 +This service provides a RESTful API for text translation using Qwen (default) or DeepL API.
15 The service runs on port 6006 and provides a simple translation endpoint. 15 The service runs on port 6006 and provides a simple translation endpoint.
16 16
17 API Endpoint: 17 API Endpoint:
@@ -21,7 +21,8 @@ Request Body (JSON): @@ -21,7 +21,8 @@ Request Body (JSON):
21 { 21 {
22 "text": "要翻译的文本", 22 "text": "要翻译的文本",
23 "target_lang": "en", # Required: target language code (zh, en, ru, etc.) 23 "target_lang": "en", # Required: target language code (zh, en, ru, etc.)
24 - "source_lang": "zh" # Optional: source language code (auto-detect if not provided) 24 + "source_lang": "zh", # Optional: source language code (auto-detect if not provided)
  25 + "model": "qwen" # Optional: translation model ("qwen" or "deepl", default: "qwen")
25 } 26 }
26 27
27 Response (JSON): 28 Response (JSON):
@@ -52,7 +53,17 @@ Usage Examples: @@ -52,7 +53,17 @@ Usage Examples:
52 "target_lang": "zh" 53 "target_lang": "zh"
53 }' 54 }'
54 55
55 -3. Translate Russian to English: 56 +3. Translate using DeepL model:
  57 + curl -X POST http://localhost:6006/translate \
  58 + -H "Content-Type: application/json" \
  59 + -d '{
  60 + "text": "商品名称",
  61 + "target_lang": "en",
  62 + "source_lang": "zh",
  63 + "model": "deepl"
  64 + }'
  65 +
  66 +4. Translate Russian to English:
56 curl -X POST http://localhost:6006/translate \ 67 curl -X POST http://localhost:6006/translate \
57 -H "Content-Type: application/json" \ 68 -H "Content-Type: application/json" \
58 -d '{ 69 -d '{
@@ -77,7 +88,7 @@ import sys @@ -77,7 +88,7 @@ import sys
77 import logging 88 import logging
78 import argparse 89 import argparse
79 import uvicorn 90 import uvicorn
80 -from typing import Optional 91 +from typing import Optional, Dict
81 from fastapi import FastAPI, HTTPException 92 from fastapi import FastAPI, HTTPException
82 from fastapi.responses import JSONResponse 93 from fastapi.responses import JSONResponse
83 from fastapi.middleware.cors import CORSMiddleware 94 from fastapi.middleware.cors import CORSMiddleware
@@ -87,7 +98,7 @@ from pydantic import BaseModel, Field @@ -87,7 +98,7 @@ from pydantic import BaseModel, Field
87 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 98 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
88 99
89 from query.translator import Translator 100 from query.translator import Translator
90 -from config.env_config import DEEPL_AUTH_KEY, REDIS_CONFIG 101 +from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG
91 102
92 # Configure logging 103 # Configure logging
93 logging.basicConfig( 104 logging.basicConfig(
@@ -99,22 +110,22 @@ logger = logging.getLogger(__name__) @@ -99,22 +110,22 @@ logger = logging.getLogger(__name__)
99 # Fixed translation prompt 110 # Fixed translation prompt
100 TRANSLATION_PROMPT = "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." 111 TRANSLATION_PROMPT = "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
101 112
102 -# Global translator instance  
103 -_translator: Optional[Translator] = None 113 +# Global translator instances cache (keyed by model)
  114 +_translators: Dict[str, Translator] = {}
104 115
105 116
106 -def init_translator():  
107 - """Initialize translator instance."""  
108 - global _translator  
109 - if _translator is None:  
110 - logger.info("Initializing translator...")  
111 - _translator = Translator(  
112 - api_key=DEEPL_AUTH_KEY, 117 +def get_translator(model: str = "qwen") -> Translator:
  118 + """Get or create translator instance for the specified model."""
  119 + global _translators
  120 + if model not in _translators:
  121 + logger.info(f"Initializing translator with model: {model}...")
  122 + _translators[model] = Translator(
  123 + model=model,
113 use_cache=True, 124 use_cache=True,
114 timeout=10 125 timeout=10
115 ) 126 )
116 - logger.info("Translator initialized")  
117 - return _translator 127 + logger.info(f"Translator initialized with model: {model}")
  128 + return _translators[model]
118 129
119 130
120 # Request/Response models 131 # Request/Response models
@@ -123,13 +134,15 @@ class TranslationRequest(BaseModel): @@ -123,13 +134,15 @@ class TranslationRequest(BaseModel):
123 text: str = Field(..., description="Text to translate") 134 text: str = Field(..., description="Text to translate")
124 target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)") 135 target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)")
125 source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)") 136 source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)")
  137 + model: Optional[str] = Field("qwen", description="Translation model: 'qwen' (default) or 'deepl'")
126 138
127 class Config: 139 class Config:
128 json_schema_extra = { 140 json_schema_extra = {
129 "example": { 141 "example": {
130 "text": "商品名称", 142 "text": "商品名称",
131 "target_lang": "en", 143 "target_lang": "en",
132 - "source_lang": "zh" 144 + "source_lang": "zh",
  145 + "model": "qwen"
133 } 146 }
134 } 147 }
135 148
@@ -141,12 +154,13 @@ class TranslationResponse(BaseModel): @@ -141,12 +154,13 @@ class TranslationResponse(BaseModel):
141 source_lang: Optional[str] = Field(None, description="Source language code (detected or provided)") 154 source_lang: Optional[str] = Field(None, description="Source language code (detected or provided)")
142 translated_text: str = Field(..., description="Translated text") 155 translated_text: str = Field(..., description="Translated text")
143 status: str = Field(..., description="Translation status") 156 status: str = Field(..., description="Translation status")
  157 + model: str = Field(..., description="Translation model used")
144 158
145 159
146 # Create FastAPI app 160 # Create FastAPI app
147 app = FastAPI( 161 app = FastAPI(
148 title="Translation Service API", 162 title="Translation Service API",
149 - description="RESTful API for text translation using DeepL", 163 + description="RESTful API for text translation using Qwen (default) or DeepL",
150 version="1.0.0", 164 version="1.0.0",
151 docs_url="/docs", 165 docs_url="/docs",
152 redoc_url="/redoc" 166 redoc_url="/redoc"
@@ -166,9 +180,11 @@ app.add_middleware( @@ -166,9 +180,11 @@ app.add_middleware(
166 async def startup_event(): 180 async def startup_event():
167 """Initialize translator on startup.""" 181 """Initialize translator on startup."""
168 logger.info("Starting Translation Service API on port 6006") 182 logger.info("Starting Translation Service API on port 6006")
  183 + # Get default model from environment variable or use 'qwen'
  184 + default_model = os.getenv("TRANSLATION_MODEL", "qwen")
169 try: 185 try:
170 - init_translator()  
171 - logger.info("Translation service ready") 186 + get_translator(model=default_model)
  187 + logger.info(f"Translation service ready with default model: {default_model}")
172 except Exception as e: 188 except Exception as e:
173 logger.error(f"Failed to initialize translator: {e}", exc_info=True) 189 logger.error(f"Failed to initialize translator: {e}", exc_info=True)
174 logger.warning("Service will start but translation may not work correctly") 190 logger.warning("Service will start but translation may not work correctly")
@@ -178,10 +194,13 @@ async def startup_event(): @@ -178,10 +194,13 @@ async def startup_event():
178 async def health_check(): 194 async def health_check():
179 """Health check endpoint.""" 195 """Health check endpoint."""
180 try: 196 try:
181 - translator = init_translator() 197 + default_model = os.getenv("TRANSLATION_MODEL", "qwen")
  198 + translator = get_translator(model=default_model)
182 return { 199 return {
183 "status": "healthy", 200 "status": "healthy",
184 "service": "translation", 201 "service": "translation",
  202 + "default_model": default_model,
  203 + "available_models": list(_translators.keys()),
185 "translator_initialized": translator is not None, 204 "translator_initialized": translator is not None,
186 "cache_enabled": translator.use_cache if translator else False 205 "cache_enabled": translator.use_cache if translator else False
187 } 206 }
@@ -203,6 +222,8 @@ async def translate(request: TranslationRequest): @@ -203,6 +222,8 @@ async def translate(request: TranslationRequest):
203 222
204 Uses a fixed prompt optimized for product SKU name translation. 223 Uses a fixed prompt optimized for product SKU name translation.
205 The translation is cached in Redis for performance. 224 The translation is cached in Redis for performance.
  225 +
  226 + Supports both Qwen (default) and DeepL models via the 'model' parameter.
206 """ 227 """
207 if not request.text or not request.text.strip(): 228 if not request.text or not request.text.strip():
208 raise HTTPException( 229 raise HTTPException(
@@ -216,8 +237,17 @@ async def translate(request: TranslationRequest): @@ -216,8 +237,17 @@ async def translate(request: TranslationRequest):
216 detail="target_lang is required" 237 detail="target_lang is required"
217 ) 238 )
218 239
  240 + # Validate model parameter
  241 + model = request.model.lower() if request.model else "qwen"
  242 + if model not in ['qwen', 'deepl']:
  243 + raise HTTPException(
  244 + status_code=400,
  245 + detail=f"Invalid model: {model}. Supported models: 'qwen', 'deepl'"
  246 + )
  247 +
219 try: 248 try:
220 - translator = init_translator() 249 + # Get translator instance for the specified model
  250 + translator = get_translator(model=model)
221 251
222 # Translate using the fixed prompt 252 # Translate using the fixed prompt
223 translated_text = translator.translate( 253 translated_text = translator.translate(
@@ -238,7 +268,8 @@ async def translate(request: TranslationRequest): @@ -238,7 +268,8 @@ async def translate(request: TranslationRequest):
238 target_lang=request.target_lang, 268 target_lang=request.target_lang,
239 source_lang=request.source_lang, 269 source_lang=request.source_lang,
240 translated_text=translated_text, 270 translated_text=translated_text,
241 - status="success" 271 + status="success",
  272 + model=translator.model
242 ) 273 )
243 274
244 except HTTPException: 275 except HTTPException:
config/env_config.py
@@ -39,6 +39,9 @@ REDIS_CONFIG = { @@ -39,6 +39,9 @@ REDIS_CONFIG = {
39 # DeepL API Key 39 # DeepL API Key
40 DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY') 40 DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY')
41 41
  42 +# DashScope API Key (for Qwen models)
  43 +DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY')
  44 +
42 # API Service Configuration 45 # API Service Configuration
43 API_HOST = os.getenv('API_HOST', '0.0.0.0') 46 API_HOST = os.getenv('API_HOST', '0.0.0.0')
44 API_PORT = int(os.getenv('API_PORT', 6002)) 47 API_PORT = int(os.getenv('API_PORT', 6002))
docs/翻译模块说明.md 0 → 100644
@@ -0,0 +1,256 @@ @@ -0,0 +1,256 @@
  1 +# 翻译模块说明(Qwen / DeepL)
  2 +
  3 +本文档汇总翻译模块的**接口使用说明**与**Python 模块用法**,对应代码:
  4 +
  5 +- HTTP 服务:`api/translator_app.py`
  6 +- Python 模块:`query/translator.py`
  7 +
  8 +---
  9 +
  10 +## 1. 功能概述
  11 +
  12 +当前翻译模块支持两种后端:
  13 +
  14 +- **Qwen(默认)**:通过阿里云百炼 DashScope 的 OpenAI 兼容接口调用 `qwen-mt-flash`
  15 +- **DeepL**:通过 DeepL API 调用翻译(保留原有能力)
  16 +
  17 +两种方式均支持:
  18 +
  19 +- **Redis 缓存**(如启用):同文案同目标语言命中缓存直接返回
  20 +- **`source_lang` 自动检测**:当 `source_lang` 为空或 `"auto"` 时启用自动检测(Qwen 使用 `"auto"`)
  21 +
  22 +---
  23 +
  24 +## 2. 环境变量与配置
  25 +
  26 +项目会在 `config/env_config.py` 中加载项目根目录的 `.env`,常用变量如下:
  27 +
  28 +```env
  29 +# Qwen / DashScope
  30 +DASHSCOPE_API_KEY=sk-xxx
  31 +
  32 +# DeepL
  33 +DEEPL_AUTH_KEY=xxx
  34 +
  35 +# 可选:翻译服务默认模型(HTTP 服务启动后若请求不传 model,则使用此默认值)
  36 +TRANSLATION_MODEL=qwen # 或 deepl
  37 +```
  38 +
  39 +说明:
  40 +
  41 +- **Qwen** 使用 `DASHSCOPE_API_KEY`
  42 +- **DeepL** 使用 `DEEPL_AUTH_KEY`
  43 +- `.env` 中的 `OPENAI_API_KEY` 不是本翻译模块必须项(当前实现用的是 `DASHSCOPE_API_KEY`)
  44 +
  45 +---
  46 +
  47 +## 3. HTTP 翻译服务(`api/translator_app.py`)
  48 +
  49 +### 3.1 启动命令
  50 +
  51 +推荐(热更新):
  52 +
  53 +```bash
  54 +cd /home/tw/SearchEngine
  55 +uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
  56 +```
  57 +
  58 +指定默认模型(不传请求 `model` 时生效):
  59 +
  60 +```bash
  61 +cd /home/tw/SearchEngine
  62 +export TRANSLATION_MODEL=qwen # 或 deepl
  63 +uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
  64 +```
  65 +
  66 +### 3.2 接口列表
  67 +
  68 +- **GET** `/health`:健康检查(返回默认模型、已初始化模型列表等)
  69 +- **POST** `/translate`:翻译文本
  70 +- **GET** `/docs`:Swagger UI
  71 +
  72 +### 3.3 `/translate` 请求参数
  73 +
  74 +请求体(JSON):
  75 +
  76 +```json
  77 +{
  78 + "text": "要翻译的文本",
  79 + "target_lang": "en",
  80 + "source_lang": "auto",
  81 + "model": "qwen"
  82 +}
  83 +```
  84 +
  85 +- **text**:必填,待翻译文本
  86 +- **target_lang**:必填,目标语言代码(见“语言支持”)
  87 +- **source_lang**:可选,源语言代码;不传或传 `"auto"` 时自动检测
  88 +- **model**:可选,`"qwen"` 或 `"deepl"`;默认 `"qwen"`
  89 +
  90 +### 3.4 `/translate` 返回参数
  91 +
  92 +响应体(JSON,成功时):
  93 +
  94 +```json
  95 +{
  96 + "text": "商品名称",
  97 + "target_lang": "en",
  98 + "source_lang": "zh",
  99 + "translated_text": "Product name",
  100 + "status": "success",
  101 + "model": "qwen"
  102 +}
  103 +```
  104 +
  105 +### 3.5 请求示例(curl)
  106 +
  107 +健康检查:
  108 +
  109 +```bash
  110 +curl http://localhost:6006/health
  111 +```
  112 +
  113 +默认(qwen)中文 → 英文:
  114 +
  115 +```bash
  116 +curl -X POST http://localhost:6006/translate \
  117 + -H "Content-Type: application/json" \
  118 + -d '{"text":"我看到这个视频后没有笑","target_lang":"en","source_lang":"auto"}'
  119 +```
  120 +
  121 +显式指定 qwen,英文 → 简体中文:
  122 +
  123 +```bash
  124 +curl -X POST http://localhost:6006/translate \
  125 + -H "Content-Type: application/json" \
  126 + -d '{"text":"Product name","target_lang":"zh","source_lang":"en","model":"qwen"}'
  127 +```
  128 +
  129 +繁体中文(`zh_tw`)测试:
  130 +
  131 +```bash
  132 +curl -X POST http://localhost:6006/translate \
  133 + -H "Content-Type: application/json" \
  134 + -d '{"text":"商品名稱","target_lang":"zh_tw","source_lang":"auto","model":"qwen"}'
  135 +```
  136 +
  137 +切换 DeepL:
  138 +
  139 +```bash
  140 +curl -X POST http://localhost:6006/translate \
  141 + -H "Content-Type: application/json" \
  142 + -d '{"text":"商品名称","target_lang":"en","source_lang":"zh","model":"deepl"}'
  143 +```
  144 +
  145 +### 3.6 关于提示词(Prompt)
  146 +
  147 +HTTP 服务内部使用了固定提示词 `TRANSLATION_PROMPT`(适用于“商品 SKU 英文名”场景),并通过 `prompt` 参数传入 `Translator.translate()`。
  148 +
  149 +- **DeepL**:`prompt` 会作为 DeepL 的 `context` 使用(影响翻译但不被翻译)
  150 +- **Qwen**:当前实现未将 `prompt/context` 传给 Qwen 的 `translation_options`(即对 Qwen 不生效)
  151 +
  152 +---
  153 +
  154 +## 4. Python 翻译模块(`query/translator.py`)
  155 +
  156 +### 4.1 基本用法
  157 +
  158 +```python
  159 +from query.translator import Translator
  160 +
  161 +# 默认使用 qwen
  162 +translator = Translator()
  163 +
  164 +result = translator.translate(
  165 + text="我看到这个视频后没有笑",
  166 + target_lang="en",
  167 + source_lang="auto",
  168 +)
  169 +print(result)
  170 +```
  171 +
  172 +显式选择模型:
  173 +
  174 +```python
  175 +translator_qwen = Translator(model="qwen")
  176 +translator_deepl = Translator(model="deepl")
  177 +```
  178 +
  179 +### 4.2 关键参数
  180 +
  181 +- `Translator(model="qwen" | "deepl")`:选择翻译模型,默认 `"qwen"`
  182 +- `translate(text, target_lang, source_lang=None, context=None, prompt=None)`:
  183 + - `target_lang` / `source_lang`:语言代码(见“语言支持”)
  184 + - `source_lang` 为空或 `"auto"`:自动检测
  185 + - `prompt`:
  186 + - DeepL:作为 `context` 使用
  187 + - Qwen:当前未使用
  188 +
  189 +### 4.3 缓存(Redis)
  190 +
  191 +`Translator(use_cache=True)` 时会连接 Redis 并缓存翻译结果。
  192 +
  193 +- Redis 连接配置来自 `config/env_config.py` 的 `REDIS_CONFIG`
  194 +- 缓存 key 前缀默认 `trans`(可用 `REDIS_TRANSLATION_CACHE_PREFIX` 覆盖)
  195 +
  196 +---
  197 +
  198 +## 5. Qwen 语言支持(按 qwen-mt-plus/flash/turbo 标准)
  199 +
  200 +> 以下为 Qwen 翻译模型支持的语言(**代码 → 英文名**),并已用于 `query/translator.py` 的映射。
  201 +
  202 +| 代码 | 英文名 |
  203 +|------|--------|
  204 +| en | English |
  205 +| zh | Chinese |
  206 +| zh_tw | Traditional Chinese |
  207 +| ru | Russian |
  208 +| ja | Japanese |
  209 +| ko | Korean |
  210 +| es | Spanish |
  211 +| fr | French |
  212 +| pt | Portuguese |
  213 +| de | German |
  214 +| it | Italian |
  215 +| th | Thai |
  216 +| vi | Vietnamese |
  217 +| id | Indonesian |
  218 +| ms | Malay |
  219 +| ar | Arabic |
  220 +| hi | Hindi |
  221 +| he | Hebrew |
  222 +| my | Burmese |
  223 +| ta | Tamil |
  224 +| ur | Urdu |
  225 +| bn | Bengali |
  226 +| pl | Polish |
  227 +| nl | Dutch |
  228 +| ro | Romanian |
  229 +| tr | Turkish |
  230 +| km | Khmer |
  231 +| lo | Lao |
  232 +| yue | Cantonese |
  233 +| cs | Czech |
  234 +| el | Greek |
  235 +| sv | Swedish |
  236 +| hu | Hungarian |
  237 +| da | Danish |
  238 +| fi | Finnish |
  239 +| uk | Ukrainian |
  240 +| bg | Bulgarian |
  241 +
  242 +---
  243 +
  244 +## 6. 常见问题(FAQ)
  245 +
  246 +### 6.1 Qwen 调用报错 / 无法初始化
  247 +
  248 +- 确认 `.env` 中已配置 `DASHSCOPE_API_KEY`
  249 +- 确认安装依赖:`openai`(Python 包)
  250 +- 如在海外地域使用模型,将 `base_url` 切换为 `https://dashscope-intl.aliyuncs.com/compatible-mode/v1`
  251 +
  252 +### 6.2 DeepL 返回 403 / 翻译失败
  253 +
  254 +- 确认 `.env` 中已配置 `DEEPL_AUTH_KEY`
  255 +- 若使用的是 Pro key,请使用 `https://api.deepl.com/v2/translate`(当前代码即为该地址)
  256 +
query/translator.py
1 """ 1 """
2 Translation service for multi-language query support. 2 Translation service for multi-language query support.
3 3
4 -Supports DeepL API for high-quality translations. 4 +Supports multiple translation models:
  5 +- Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model
  6 +- DeepL: DeepL API for high-quality translations
5 7
  8 +使用方法 (Usage):
6 9
7 -#### 官方文档:  
8 -https://developers.deepl.com/api-reference/translate/request-translation  
9 -##### 10 +```python
  11 +from query.translator import Translator
  12 +
  13 +# 使用默认的 qwen 模型(推荐)
  14 +translator = Translator() # 默认使用 qwen 模型
  15 +
  16 +# 或显式指定模型
  17 +translator = Translator(model='qwen') # 使用 qwen 模型
  18 +translator = Translator(model='deepl') # 使用 DeepL 模型
  19 +
  20 +# 翻译文本
  21 +result = translator.translate(
  22 + text="我看到这个视频后没有笑",
  23 + target_lang="en",
  24 + source_lang="auto" # 自动检测源语言
  25 +)
  26 +```
10 27
  28 +配置说明 (Configuration):
  29 +- Qwen 模型需要设置 DASHSCOPE_API_KEY 环境变量(在 .env 文件中)
  30 +- DeepL 模型需要设置 DEEPL_AUTH_KEY 环境变量(在 .env 文件中)
11 31
  32 +Qwen 模型参考文档:
  33 +- 官方文档:https://help.aliyun.com/zh/model-studio/get-api-key
  34 +- 模型:qwen-mt-flash(快速翻译模型)
  35 +
  36 +DeepL 官方文档:
  37 +https://developers.deepl.com/api-reference/translate/request-translation
12 """ 38 """
13 39
  40 +import os
14 import requests 41 import requests
15 import re 42 import re
16 import redis 43 import redis
@@ -21,18 +48,21 @@ import logging @@ -21,18 +48,21 @@ import logging
21 48
22 logger = logging.getLogger(__name__) 49 logger = logging.getLogger(__name__)
23 50
24 -# Try to import DEEPL_AUTH_KEY and REDIS_CONFIG, but allow import to fail  
25 -try:  
26 - from config.env_config import DEEPL_AUTH_KEY, REDIS_CONFIG  
27 -except ImportError:  
28 - DEEPL_AUTH_KEY = None  
29 - REDIS_CONFIG = {} 51 +from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG
  52 +from openai import OpenAI
30 53
31 54
32 class Translator: 55 class Translator:
33 - """Multi-language translator using DeepL API.""" 56 + """
  57 + Multi-language translator supporting Qwen and DeepL APIs.
  58 +
  59 + Default model is 'qwen' which uses Alibaba Cloud DashScope API.
  60 + """
34 61
35 DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier 62 DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier
  63 + QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" # 北京地域
  64 + # 如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/compatible-mode/v1
  65 + QWEN_MODEL = "qwen-mt-flash" # 快速翻译模型
36 66
37 # Language code mapping 67 # Language code mapping
38 LANG_CODE_MAP = { 68 LANG_CODE_MAP = {
@@ -50,6 +80,7 @@ class Translator: @@ -50,6 +80,7 @@ class Translator:
50 80
51 def __init__( 81 def __init__(
52 self, 82 self,
  83 + model: str = "qwen",
53 api_key: Optional[str] = None, 84 api_key: Optional[str] = None,
54 use_cache: bool = True, 85 use_cache: bool = True,
55 timeout: int = 10, 86 timeout: int = 10,
@@ -60,21 +91,40 @@ class Translator: @@ -60,21 +91,40 @@ class Translator:
60 Initialize translator. 91 Initialize translator.
61 92
62 Args: 93 Args:
63 - api_key: DeepL API key (or None to use from config/env) 94 + model: Translation model to use. Options: 'qwen' (default) or 'deepl'
  95 + api_key: API key for the selected model (or None to use from config/env)
64 use_cache: Whether to cache translations 96 use_cache: Whether to cache translations
65 timeout: Request timeout in seconds 97 timeout: Request timeout in seconds
66 - glossary_id: DeepL glossary ID for custom terminology (optional) 98 + glossary_id: DeepL glossary ID for custom terminology (optional, only for DeepL)
67 translation_context: Context hint for translation (e.g., "e-commerce", "product search") 99 translation_context: Context hint for translation (e.g., "e-commerce", "product search")
68 """ 100 """
  101 + self.model = model.lower()
  102 + if self.model not in ['qwen', 'deepl']:
  103 + raise ValueError(f"Unsupported model: {model}. Supported models: 'qwen', 'deepl'")
  104 +
69 # Get API key from config if not provided 105 # Get API key from config if not provided
70 - if api_key is None and DEEPL_AUTH_KEY:  
71 - api_key = DEEPL_AUTH_KEY 106 + if api_key is None:
  107 + if self.model == 'qwen':
  108 + api_key = DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
  109 + else: # deepl
  110 + api_key = DEEPL_AUTH_KEY or os.getenv("DEEPL_AUTH_KEY")
72 111
73 self.api_key = api_key 112 self.api_key = api_key
74 self.timeout = timeout 113 self.timeout = timeout
75 self.use_cache = use_cache 114 self.use_cache = use_cache
76 self.glossary_id = glossary_id 115 self.glossary_id = glossary_id
77 self.translation_context = translation_context or "e-commerce product search" 116 self.translation_context = translation_context or "e-commerce product search"
  117 +
  118 + # Initialize OpenAI client for Qwen if needed
  119 + self.qwen_client = None
  120 + if self.model == 'qwen':
  121 + if not self.api_key:
  122 + logger.warning("DASHSCOPE_API_KEY not set. Qwen translation will not work.")
  123 + else:
  124 + self.qwen_client = OpenAI(
  125 + api_key=self.api_key,
  126 + base_url=self.QWEN_BASE_URL,
  127 + )
78 128
79 # Initialize Redis cache if enabled 129 # Initialize Redis cache if enabled
80 if use_cache: 130 if use_cache:
@@ -119,7 +169,7 @@ class Translator: @@ -119,7 +169,7 @@ class Translator:
119 Args: 169 Args:
120 text: Text to translate 170 text: Text to translate
121 target_lang: Target language code ('zh', 'en', 'ru', etc.) 171 target_lang: Target language code ('zh', 'en', 'ru', etc.)
122 - source_lang: Source language code (optional, auto-detect if None) 172 + source_lang: Source language code (option al, auto-detect if None)
123 context: Additional context for translation (overrides default context) 173 context: Additional context for translation (overrides default context)
124 prompt: Translation prompt/instruction (optional, for better translation quality) 174 prompt: Translation prompt/instruction (optional, for better translation quality)
125 175
@@ -174,13 +224,17 @@ class Translator: @@ -174,13 +224,17 @@ class Translator:
174 ) 224 )
175 return text 225 return text
176 226
177 - # Translate using DeepL (Pro endpoint only, no free fallback) 227 + # Translate using selected model
178 logger.info( 228 logger.info(
179 - f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | " 229 + f"[Translator] Translation request | Model: {self.model} | Original text: '{text}' | Target language: {target_lang} | "
180 f"Source language: {source_lang or 'auto'} | Context: {translation_context} | " 230 f"Source language: {source_lang or 'auto'} | Context: {translation_context} | "
181 f"Prompt: {'yes' if prompt else 'no'} | Status: Starting translation" 231 f"Prompt: {'yes' if prompt else 'no'} | Status: Starting translation"
182 ) 232 )
183 - result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt) 233 +
  234 + if self.model == 'qwen':
  235 + result = self._translate_qwen(text, target_lang, source_lang, translation_context, prompt)
  236 + else: # deepl
  237 + result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt)
184 238
185 # If still failed, return original text with warning 239 # If still failed, return original text with warning
186 if result is None: 240 if result is None:
@@ -201,6 +255,122 @@ class Translator: @@ -201,6 +255,122 @@ class Translator:
201 255
202 return result 256 return result
203 257
  258 + def _translate_qwen(
  259 + self,
  260 + text: str,
  261 + target_lang: str,
  262 + source_lang: Optional[str],
  263 + context: Optional[str] = None,
  264 + prompt: Optional[str] = None
  265 + ) -> Optional[str]:
  266 + """
  267 + Translate using Qwen MT Flash model via Alibaba Cloud DashScope API.
  268 +
  269 + Args:
  270 + text: Text to translate
  271 + target_lang: Target language code ('zh', 'en', 'ru', etc.)
  272 + source_lang: Source language code (optional, 'auto' if None)
  273 + context: Context hint for translation (optional)
  274 + prompt: Translation prompt/instruction (optional)
  275 +
  276 + Returns:
  277 + Translated text or None if translation fails
  278 + """
  279 + if not self.qwen_client:
  280 + logger.error("[Translator] Qwen client not initialized. Check DASHSCOPE_API_KEY.")
  281 + return None
  282 +
  283 + # Qwen (qwen-mt-plus/flash/turbo) supported languages mapping
  284 + # 标准来自:你提供的“语言 / 英文名 / 代码”表
  285 + qwen_lang_map = {
  286 + "en": "English",
  287 + "zh": "Chinese",
  288 + "zh_tw": "Traditional Chinese",
  289 + "ru": "Russian",
  290 + "ja": "Japanese",
  291 + "ko": "Korean",
  292 + "es": "Spanish",
  293 + "fr": "French",
  294 + "pt": "Portuguese",
  295 + "de": "German",
  296 + "it": "Italian",
  297 + "th": "Thai",
  298 + "vi": "Vietnamese",
  299 + "id": "Indonesian",
  300 + "ms": "Malay",
  301 + "ar": "Arabic",
  302 + "hi": "Hindi",
  303 + "he": "Hebrew",
  304 + "my": "Burmese",
  305 + "ta": "Tamil",
  306 + "ur": "Urdu",
  307 + "bn": "Bengali",
  308 + "pl": "Polish",
  309 + "nl": "Dutch",
  310 + "ro": "Romanian",
  311 + "tr": "Turkish",
  312 + "km": "Khmer",
  313 + "lo": "Lao",
  314 + "yue": "Cantonese",
  315 + "cs": "Czech",
  316 + "el": "Greek",
  317 + "sv": "Swedish",
  318 + "hu": "Hungarian",
  319 + "da": "Danish",
  320 + "fi": "Finnish",
  321 + "uk": "Ukrainian",
  322 + "bg": "Bulgarian",
  323 + }
  324 +
  325 + # Convert target language
  326 + target_lang_normalized = target_lang.lower()
  327 + target_lang_qwen = qwen_lang_map.get(target_lang_normalized, target_lang.capitalize())
  328 +
  329 + # Convert source language
  330 + source_lang_normalized = (source_lang or "").strip().lower()
  331 + if not source_lang_normalized or source_lang_normalized == "auto":
  332 + source_lang_qwen = "auto"
  333 + else:
  334 + source_lang_qwen = qwen_lang_map.get(source_lang_normalized, source_lang.capitalize())
  335 +
  336 + # Prepare translation options
  337 + translation_options = {
  338 + "source_lang": source_lang_qwen,
  339 + "target_lang": target_lang_qwen,
  340 + }
  341 +
  342 + # Prepare messages
  343 + messages = [
  344 + {
  345 + "role": "user",
  346 + "content": text
  347 + }
  348 + ]
  349 +
  350 + try:
  351 + completion = self.qwen_client.chat.completions.create(
  352 + model=self.QWEN_MODEL,
  353 + messages=messages,
  354 + extra_body={
  355 + "translation_options": translation_options
  356 + }
  357 + )
  358 +
  359 + translated_text = completion.choices[0].message.content.strip()
  360 +
  361 + logger.debug(
  362 + f"[Translator] Qwen API response success | Original text: '{text}' | Target language: {target_lang_qwen} | "
  363 + f"Translation result: '{translated_text}'"
  364 + )
  365 + return translated_text
  366 +
  367 + except Exception as e:
  368 + logger.error(
  369 + f"[Translator] Qwen API request exception | Original text: '{text}' | Target language: {target_lang_qwen} | "
  370 + f"Error: {e}", exc_info=True
  371 + )
  372 + return None
  373 +
204 def _translate_deepl( 374 def _translate_deepl(
205 self, 375 self,
206 text: str, 376 text: str,