Commit 3cd09b3bf456bd8cefb7e62750c215f0137ef974
1 parent
001b4889
翻译接口改为调用qwen-mt-flash
文档: 翻译模块说明.md
Showing
6 changed files
with
532 additions
and
44 deletions
Show diff stats
| @@ -31,3 +31,7 @@ CACHE_DIR=.cache | @@ -31,3 +31,7 @@ CACHE_DIR=.cache | ||
| 31 | 31 | ||
| 32 | # Frontend API Base URL | 32 | # Frontend API Base URL |
| 33 | API_BASE_URL=http://120.76.41.98:6002 | 33 | API_BASE_URL=http://120.76.41.98:6002 |
| 34 | + | ||
| 35 | + | ||
| 36 | +DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b | ||
| 37 | +OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0 |
api/routes/search.py
| @@ -117,7 +117,7 @@ async def search(request: SearchRequest, http_request: Request): | @@ -117,7 +117,7 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 117 | performance_summary = context.get_summary() if context else None | 117 | performance_summary = context.get_summary() if context else None |
| 118 | 118 | ||
| 119 | # Convert to response model | 119 | # Convert to response model |
| 120 | - return SearchResponse( | 120 | + response = SearchResponse( |
| 121 | results=result.results, | 121 | results=result.results, |
| 122 | total=result.total, | 122 | total=result.total, |
| 123 | max_score=result.max_score, | 123 | max_score=result.max_score, |
| @@ -130,6 +130,18 @@ async def search(request: SearchRequest, http_request: Request): | @@ -130,6 +130,18 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 130 | debug_info=result.debug_info | 130 | debug_info=result.debug_info |
| 131 | ) | 131 | ) |
| 132 | 132 | ||
| 133 | + # Log complete response JSON | ||
| 134 | + context.logger.info( | ||
| 135 | + "Search response | " | ||
| 136 | + f"Total results: {response.total} | " | ||
| 137 | + f"Max score: {response.max_score:.4f} | " | ||
| 138 | + f"Time: {response.took_ms}ms | " | ||
| 139 | + f"Response: {response.model_dump_json()}", | ||
| 140 | + extra={'reqid': context.reqid, 'uid': context.uid} | ||
| 141 | + ) | ||
| 142 | + | ||
| 143 | + return response | ||
| 144 | + | ||
| 133 | except Exception as e: | 145 | except Exception as e: |
| 134 | # Log error in context | 146 | # Log error in context |
| 135 | if context: | 147 | if context: |
| @@ -203,7 +215,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -203,7 +215,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 203 | # Include performance summary in response | 215 | # Include performance summary in response |
| 204 | performance_summary = context.get_summary() if context else None | 216 | performance_summary = context.get_summary() if context else None |
| 205 | 217 | ||
| 206 | - return SearchResponse( | 218 | + response = SearchResponse( |
| 207 | results=result.results, | 219 | results=result.results, |
| 208 | total=result.total, | 220 | total=result.total, |
| 209 | max_score=result.max_score, | 221 | max_score=result.max_score, |
| @@ -215,6 +227,18 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -215,6 +227,18 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 215 | performance_info=performance_summary | 227 | performance_info=performance_summary |
| 216 | ) | 228 | ) |
| 217 | 229 | ||
| 230 | + # Log complete response JSON | ||
| 231 | + context.logger.info( | ||
| 232 | + "Image search response | " | ||
| 233 | + f"Total results: {response.total} | " | ||
| 234 | + f"Max score: {response.max_score:.4f} | " | ||
| 235 | + f"Time: {response.took_ms}ms | " | ||
| 236 | + f"Response: {response.model_dump_json()}", | ||
| 237 | + extra={'reqid': context.reqid, 'uid': context.uid} | ||
| 238 | + ) | ||
| 239 | + | ||
| 240 | + return response | ||
| 241 | + | ||
| 218 | except ValueError as e: | 242 | except ValueError as e: |
| 219 | if context: | 243 | if context: |
| 220 | context.set_error(e) | 244 | context.set_error(e) |
api/translator_app.py
| @@ -11,7 +11,7 @@ uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | @@ -11,7 +11,7 @@ uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | ||
| 11 | 使用说明: | 11 | 使用说明: |
| 12 | Translation HTTP Service | 12 | Translation HTTP Service |
| 13 | 13 | ||
| 14 | -This service provides a RESTful API for text translation using DeepL API. | 14 | +This service provides a RESTful API for text translation using Qwen (default) or DeepL API. |
| 15 | The service runs on port 6006 and provides a simple translation endpoint. | 15 | The service runs on port 6006 and provides a simple translation endpoint. |
| 16 | 16 | ||
| 17 | API Endpoint: | 17 | API Endpoint: |
| @@ -21,7 +21,8 @@ Request Body (JSON): | @@ -21,7 +21,8 @@ Request Body (JSON): | ||
| 21 | { | 21 | { |
| 22 | "text": "要翻译的文本", | 22 | "text": "要翻译的文本", |
| 23 | "target_lang": "en", # Required: target language code (zh, en, ru, etc.) | 23 | "target_lang": "en", # Required: target language code (zh, en, ru, etc.) |
| 24 | - "source_lang": "zh" # Optional: source language code (auto-detect if not provided) | 24 | + "source_lang": "zh", # Optional: source language code (auto-detect if not provided) |
| 25 | + "model": "qwen" # Optional: translation model ("qwen" or "deepl", default: "qwen") | ||
| 25 | } | 26 | } |
| 26 | 27 | ||
| 27 | Response (JSON): | 28 | Response (JSON): |
| @@ -52,7 +53,17 @@ Usage Examples: | @@ -52,7 +53,17 @@ Usage Examples: | ||
| 52 | "target_lang": "zh" | 53 | "target_lang": "zh" |
| 53 | }' | 54 | }' |
| 54 | 55 | ||
| 55 | -3. Translate Russian to English: | 56 | +3. Translate using DeepL model: |
| 57 | + curl -X POST http://localhost:6006/translate \ | ||
| 58 | + -H "Content-Type: application/json" \ | ||
| 59 | + -d '{ | ||
| 60 | + "text": "商品名称", | ||
| 61 | + "target_lang": "en", | ||
| 62 | + "source_lang": "zh", | ||
| 63 | + "model": "deepl" | ||
| 64 | + }' | ||
| 65 | + | ||
| 66 | +4. Translate Russian to English: | ||
| 56 | curl -X POST http://localhost:6006/translate \ | 67 | curl -X POST http://localhost:6006/translate \ |
| 57 | -H "Content-Type: application/json" \ | 68 | -H "Content-Type: application/json" \ |
| 58 | -d '{ | 69 | -d '{ |
| @@ -77,7 +88,7 @@ import sys | @@ -77,7 +88,7 @@ import sys | ||
| 77 | import logging | 88 | import logging |
| 78 | import argparse | 89 | import argparse |
| 79 | import uvicorn | 90 | import uvicorn |
| 80 | -from typing import Optional | 91 | +from typing import Optional, Dict |
| 81 | from fastapi import FastAPI, HTTPException | 92 | from fastapi import FastAPI, HTTPException |
| 82 | from fastapi.responses import JSONResponse | 93 | from fastapi.responses import JSONResponse |
| 83 | from fastapi.middleware.cors import CORSMiddleware | 94 | from fastapi.middleware.cors import CORSMiddleware |
| @@ -87,7 +98,7 @@ from pydantic import BaseModel, Field | @@ -87,7 +98,7 @@ from pydantic import BaseModel, Field | ||
| 87 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 98 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 88 | 99 | ||
| 89 | from query.translator import Translator | 100 | from query.translator import Translator |
| 90 | -from config.env_config import DEEPL_AUTH_KEY, REDIS_CONFIG | 101 | +from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG |
| 91 | 102 | ||
| 92 | # Configure logging | 103 | # Configure logging |
| 93 | logging.basicConfig( | 104 | logging.basicConfig( |
| @@ -99,22 +110,22 @@ logger = logging.getLogger(__name__) | @@ -99,22 +110,22 @@ logger = logging.getLogger(__name__) | ||
| 99 | # Fixed translation prompt | 110 | # Fixed translation prompt |
| 100 | TRANSLATION_PROMPT = "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." | 111 | TRANSLATION_PROMPT = "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." |
| 101 | 112 | ||
| 102 | -# Global translator instance | ||
| 103 | -_translator: Optional[Translator] = None | 113 | +# Global translator instances cache (keyed by model) |
| 114 | +_translators: Dict[str, Translator] = {} | ||
| 104 | 115 | ||
| 105 | 116 | ||
| 106 | -def init_translator(): | ||
| 107 | - """Initialize translator instance.""" | ||
| 108 | - global _translator | ||
| 109 | - if _translator is None: | ||
| 110 | - logger.info("Initializing translator...") | ||
| 111 | - _translator = Translator( | ||
| 112 | - api_key=DEEPL_AUTH_KEY, | 117 | +def get_translator(model: str = "qwen") -> Translator: |
| 118 | + """Get or create translator instance for the specified model.""" | ||
| 119 | + global _translators | ||
| 120 | + if model not in _translators: | ||
| 121 | + logger.info(f"Initializing translator with model: {model}...") | ||
| 122 | + _translators[model] = Translator( | ||
| 123 | + model=model, | ||
| 113 | use_cache=True, | 124 | use_cache=True, |
| 114 | timeout=10 | 125 | timeout=10 |
| 115 | ) | 126 | ) |
| 116 | - logger.info("Translator initialized") | ||
| 117 | - return _translator | 127 | + logger.info(f"Translator initialized with model: {model}") |
| 128 | + return _translators[model] | ||
| 118 | 129 | ||
| 119 | 130 | ||
| 120 | # Request/Response models | 131 | # Request/Response models |
| @@ -123,13 +134,15 @@ class TranslationRequest(BaseModel): | @@ -123,13 +134,15 @@ class TranslationRequest(BaseModel): | ||
| 123 | text: str = Field(..., description="Text to translate") | 134 | text: str = Field(..., description="Text to translate") |
| 124 | target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)") | 135 | target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)") |
| 125 | source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)") | 136 | source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)") |
| 137 | + model: Optional[str] = Field("qwen", description="Translation model: 'qwen' (default) or 'deepl'") | ||
| 126 | 138 | ||
| 127 | class Config: | 139 | class Config: |
| 128 | json_schema_extra = { | 140 | json_schema_extra = { |
| 129 | "example": { | 141 | "example": { |
| 130 | "text": "商品名称", | 142 | "text": "商品名称", |
| 131 | "target_lang": "en", | 143 | "target_lang": "en", |
| 132 | - "source_lang": "zh" | 144 | + "source_lang": "zh", |
| 145 | + "model": "qwen" | ||
| 133 | } | 146 | } |
| 134 | } | 147 | } |
| 135 | 148 | ||
| @@ -141,12 +154,13 @@ class TranslationResponse(BaseModel): | @@ -141,12 +154,13 @@ class TranslationResponse(BaseModel): | ||
| 141 | source_lang: Optional[str] = Field(None, description="Source language code (detected or provided)") | 154 | source_lang: Optional[str] = Field(None, description="Source language code (detected or provided)") |
| 142 | translated_text: str = Field(..., description="Translated text") | 155 | translated_text: str = Field(..., description="Translated text") |
| 143 | status: str = Field(..., description="Translation status") | 156 | status: str = Field(..., description="Translation status") |
| 157 | + model: str = Field(..., description="Translation model used") | ||
| 144 | 158 | ||
| 145 | 159 | ||
| 146 | # Create FastAPI app | 160 | # Create FastAPI app |
| 147 | app = FastAPI( | 161 | app = FastAPI( |
| 148 | title="Translation Service API", | 162 | title="Translation Service API", |
| 149 | - description="RESTful API for text translation using DeepL", | 163 | + description="RESTful API for text translation using Qwen (default) or DeepL", |
| 150 | version="1.0.0", | 164 | version="1.0.0", |
| 151 | docs_url="/docs", | 165 | docs_url="/docs", |
| 152 | redoc_url="/redoc" | 166 | redoc_url="/redoc" |
| @@ -166,9 +180,11 @@ app.add_middleware( | @@ -166,9 +180,11 @@ app.add_middleware( | ||
| 166 | async def startup_event(): | 180 | async def startup_event(): |
| 167 | """Initialize translator on startup.""" | 181 | """Initialize translator on startup.""" |
| 168 | logger.info("Starting Translation Service API on port 6006") | 182 | logger.info("Starting Translation Service API on port 6006") |
| 183 | + # Get default model from environment variable or use 'qwen' | ||
| 184 | + default_model = os.getenv("TRANSLATION_MODEL", "qwen") | ||
| 169 | try: | 185 | try: |
| 170 | - init_translator() | ||
| 171 | - logger.info("Translation service ready") | 186 | + get_translator(model=default_model) |
| 187 | + logger.info(f"Translation service ready with default model: {default_model}") | ||
| 172 | except Exception as e: | 188 | except Exception as e: |
| 173 | logger.error(f"Failed to initialize translator: {e}", exc_info=True) | 189 | logger.error(f"Failed to initialize translator: {e}", exc_info=True) |
| 174 | logger.warning("Service will start but translation may not work correctly") | 190 | logger.warning("Service will start but translation may not work correctly") |
| @@ -178,10 +194,13 @@ async def startup_event(): | @@ -178,10 +194,13 @@ async def startup_event(): | ||
| 178 | async def health_check(): | 194 | async def health_check(): |
| 179 | """Health check endpoint.""" | 195 | """Health check endpoint.""" |
| 180 | try: | 196 | try: |
| 181 | - translator = init_translator() | 197 | + default_model = os.getenv("TRANSLATION_MODEL", "qwen") |
| 198 | + translator = get_translator(model=default_model) | ||
| 182 | return { | 199 | return { |
| 183 | "status": "healthy", | 200 | "status": "healthy", |
| 184 | "service": "translation", | 201 | "service": "translation", |
| 202 | + "default_model": default_model, | ||
| 203 | + "available_models": list(_translators.keys()), | ||
| 185 | "translator_initialized": translator is not None, | 204 | "translator_initialized": translator is not None, |
| 186 | "cache_enabled": translator.use_cache if translator else False | 205 | "cache_enabled": translator.use_cache if translator else False |
| 187 | } | 206 | } |
| @@ -203,6 +222,8 @@ async def translate(request: TranslationRequest): | @@ -203,6 +222,8 @@ async def translate(request: TranslationRequest): | ||
| 203 | 222 | ||
| 204 | Uses a fixed prompt optimized for product SKU name translation. | 223 | Uses a fixed prompt optimized for product SKU name translation. |
| 205 | The translation is cached in Redis for performance. | 224 | The translation is cached in Redis for performance. |
| 225 | + | ||
| 226 | + Supports both Qwen (default) and DeepL models via the 'model' parameter. | ||
| 206 | """ | 227 | """ |
| 207 | if not request.text or not request.text.strip(): | 228 | if not request.text or not request.text.strip(): |
| 208 | raise HTTPException( | 229 | raise HTTPException( |
| @@ -216,8 +237,17 @@ async def translate(request: TranslationRequest): | @@ -216,8 +237,17 @@ async def translate(request: TranslationRequest): | ||
| 216 | detail="target_lang is required" | 237 | detail="target_lang is required" |
| 217 | ) | 238 | ) |
| 218 | 239 | ||
| 240 | + # Validate model parameter | ||
| 241 | + model = request.model.lower() if request.model else "qwen" | ||
| 242 | + if model not in ['qwen', 'deepl']: | ||
| 243 | + raise HTTPException( | ||
| 244 | + status_code=400, | ||
| 245 | + detail=f"Invalid model: {model}. Supported models: 'qwen', 'deepl'" | ||
| 246 | + ) | ||
| 247 | + | ||
| 219 | try: | 248 | try: |
| 220 | - translator = init_translator() | 249 | + # Get translator instance for the specified model |
| 250 | + translator = get_translator(model=model) | ||
| 221 | 251 | ||
| 222 | # Translate using the fixed prompt | 252 | # Translate using the fixed prompt |
| 223 | translated_text = translator.translate( | 253 | translated_text = translator.translate( |
| @@ -238,7 +268,8 @@ async def translate(request: TranslationRequest): | @@ -238,7 +268,8 @@ async def translate(request: TranslationRequest): | ||
| 238 | target_lang=request.target_lang, | 268 | target_lang=request.target_lang, |
| 239 | source_lang=request.source_lang, | 269 | source_lang=request.source_lang, |
| 240 | translated_text=translated_text, | 270 | translated_text=translated_text, |
| 241 | - status="success" | 271 | + status="success", |
| 272 | + model=translator.model | ||
| 242 | ) | 273 | ) |
| 243 | 274 | ||
| 244 | except HTTPException: | 275 | except HTTPException: |
config/env_config.py
| @@ -39,6 +39,9 @@ REDIS_CONFIG = { | @@ -39,6 +39,9 @@ REDIS_CONFIG = { | ||
| 39 | # DeepL API Key | 39 | # DeepL API Key |
| 40 | DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY') | 40 | DEEPL_AUTH_KEY = os.getenv('DEEPL_AUTH_KEY') |
| 41 | 41 | ||
| 42 | +# DashScope API Key (for Qwen models) | ||
| 43 | +DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY') | ||
| 44 | + | ||
| 42 | # API Service Configuration | 45 | # API Service Configuration |
| 43 | API_HOST = os.getenv('API_HOST', '0.0.0.0') | 46 | API_HOST = os.getenv('API_HOST', '0.0.0.0') |
| 44 | API_PORT = int(os.getenv('API_PORT', 6002)) | 47 | API_PORT = int(os.getenv('API_PORT', 6002)) |
| @@ -0,0 +1,256 @@ | @@ -0,0 +1,256 @@ | ||
| 1 | +# 翻译模块说明(Qwen / DeepL) | ||
| 2 | + | ||
| 3 | +本文档汇总翻译模块的**接口使用说明**与**Python 模块用法**,对应代码: | ||
| 4 | + | ||
| 5 | +- HTTP 服务:`api/translator_app.py` | ||
| 6 | +- Python 模块:`query/translator.py` | ||
| 7 | + | ||
| 8 | +--- | ||
| 9 | + | ||
| 10 | +## 1. 功能概述 | ||
| 11 | + | ||
| 12 | +当前翻译模块支持两种后端: | ||
| 13 | + | ||
| 14 | +- **Qwen(默认)**:通过阿里云百炼 DashScope 的 OpenAI 兼容接口调用 `qwen-mt-flash` | ||
| 15 | +- **DeepL**:通过 DeepL API 调用翻译(保留原有能力) | ||
| 16 | + | ||
| 17 | +两种方式均支持: | ||
| 18 | + | ||
| 19 | +- **Redis 缓存**(如启用):同文案同目标语言命中缓存直接返回 | ||
| 20 | +- **`source_lang` 自动检测**:当 `source_lang` 为空或 `"auto"` 时启用自动检测(Qwen 使用 `"auto"`) | ||
| 21 | + | ||
| 22 | +--- | ||
| 23 | + | ||
| 24 | +## 2. 环境变量与配置 | ||
| 25 | + | ||
| 26 | +项目会在 `config/env_config.py` 中加载项目根目录的 `.env`,常用变量如下: | ||
| 27 | + | ||
| 28 | +```env | ||
| 29 | +# Qwen / DashScope | ||
| 30 | +DASHSCOPE_API_KEY=sk-xxx | ||
| 31 | + | ||
| 32 | +# DeepL | ||
| 33 | +DEEPL_AUTH_KEY=xxx | ||
| 34 | + | ||
| 35 | +# 可选:翻译服务默认模型(HTTP 服务启动后若请求不传 model,则使用此默认值) | ||
| 36 | +TRANSLATION_MODEL=qwen # 或 deepl | ||
| 37 | +``` | ||
| 38 | + | ||
| 39 | +说明: | ||
| 40 | + | ||
| 41 | +- **Qwen** 使用 `DASHSCOPE_API_KEY` | ||
| 42 | +- **DeepL** 使用 `DEEPL_AUTH_KEY` | ||
| 43 | +- `.env` 中的 `OPENAI_API_KEY` 不是本翻译模块必须项(当前实现用的是 `DASHSCOPE_API_KEY`) | ||
| 44 | + | ||
| 45 | +--- | ||
| 46 | + | ||
| 47 | +## 3. HTTP 翻译服务(`api/translator_app.py`) | ||
| 48 | + | ||
| 49 | +### 3.1 启动命令 | ||
| 50 | + | ||
| 51 | +推荐(热更新): | ||
| 52 | + | ||
| 53 | +```bash | ||
| 54 | +cd /home/tw/SearchEngine | ||
| 55 | +uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | ||
| 56 | +``` | ||
| 57 | + | ||
| 58 | +指定默认模型(不传请求 `model` 时生效): | ||
| 59 | + | ||
| 60 | +```bash | ||
| 61 | +cd /home/tw/SearchEngine | ||
| 62 | +export TRANSLATION_MODEL=qwen # 或 deepl | ||
| 63 | +uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | ||
| 64 | +``` | ||
| 65 | + | ||
| 66 | +### 3.2 接口列表 | ||
| 67 | + | ||
| 68 | +- **GET** `/health`:健康检查(返回默认模型、已初始化模型列表等) | ||
| 69 | +- **POST** `/translate`:翻译文本 | ||
| 70 | +- **GET** `/docs`:Swagger UI | ||
| 71 | + | ||
| 72 | +### 3.3 `/translate` 请求参数 | ||
| 73 | + | ||
| 74 | +请求体(JSON): | ||
| 75 | + | ||
| 76 | +```json | ||
| 77 | +{ | ||
| 78 | + "text": "要翻译的文本", | ||
| 79 | + "target_lang": "en", | ||
| 80 | + "source_lang": "auto", | ||
| 81 | + "model": "qwen" | ||
| 82 | +} | ||
| 83 | +``` | ||
| 84 | + | ||
| 85 | +- **text**:必填,待翻译文本 | ||
| 86 | +- **target_lang**:必填,目标语言代码(见“语言支持”) | ||
| 87 | +- **source_lang**:可选,源语言代码;不传或传 `"auto"` 时自动检测 | ||
| 88 | +- **model**:可选,`"qwen"` 或 `"deepl"`;默认 `"qwen"` | ||
| 89 | + | ||
| 90 | +### 3.4 `/translate` 返回参数 | ||
| 91 | + | ||
| 92 | +响应体(JSON,成功时): | ||
| 93 | + | ||
| 94 | +```json | ||
| 95 | +{ | ||
| 96 | + "text": "商品名称", | ||
| 97 | + "target_lang": "en", | ||
| 98 | + "source_lang": "zh", | ||
| 99 | + "translated_text": "Product name", | ||
| 100 | + "status": "success", | ||
| 101 | + "model": "qwen" | ||
| 102 | +} | ||
| 103 | +``` | ||
| 104 | + | ||
| 105 | +### 3.5 请求示例(curl) | ||
| 106 | + | ||
| 107 | +健康检查: | ||
| 108 | + | ||
| 109 | +```bash | ||
| 110 | +curl http://localhost:6006/health | ||
| 111 | +``` | ||
| 112 | + | ||
| 113 | +默认(qwen)中文 → 英文: | ||
| 114 | + | ||
| 115 | +```bash | ||
| 116 | +curl -X POST http://localhost:6006/translate \ | ||
| 117 | + -H "Content-Type: application/json" \ | ||
| 118 | + -d '{"text":"我看到这个视频后没有笑","target_lang":"en","source_lang":"auto"}' | ||
| 119 | +``` | ||
| 120 | + | ||
| 121 | +显式指定 qwen,英文 → 简体中文: | ||
| 122 | + | ||
| 123 | +```bash | ||
| 124 | +curl -X POST http://localhost:6006/translate \ | ||
| 125 | + -H "Content-Type: application/json" \ | ||
| 126 | + -d '{"text":"Product name","target_lang":"zh","source_lang":"en","model":"qwen"}' | ||
| 127 | +``` | ||
| 128 | + | ||
| 129 | +繁体中文(`zh_tw`)测试: | ||
| 130 | + | ||
| 131 | +```bash | ||
| 132 | +curl -X POST http://localhost:6006/translate \ | ||
| 133 | + -H "Content-Type: application/json" \ | ||
| 134 | + -d '{"text":"商品名稱","target_lang":"zh_tw","source_lang":"auto","model":"qwen"}' | ||
| 135 | +``` | ||
| 136 | + | ||
| 137 | +切换 DeepL: | ||
| 138 | + | ||
| 139 | +```bash | ||
| 140 | +curl -X POST http://localhost:6006/translate \ | ||
| 141 | + -H "Content-Type: application/json" \ | ||
| 142 | + -d '{"text":"商品名称","target_lang":"en","source_lang":"zh","model":"deepl"}' | ||
| 143 | +``` | ||
| 144 | + | ||
| 145 | +### 3.6 关于提示词(Prompt) | ||
| 146 | + | ||
| 147 | +HTTP 服务内部使用了固定提示词 `TRANSLATION_PROMPT`(适用于“商品 SKU 英文名”场景),并通过 `prompt` 参数传入 `Translator.translate()`。 | ||
| 148 | + | ||
| 149 | +- **DeepL**:`prompt` 会作为 DeepL 的 `context` 使用(影响翻译但不被翻译) | ||
| 150 | +- **Qwen**:当前实现未将 `prompt/context` 传给 Qwen 的 `translation_options`(即对 Qwen 不生效) | ||
| 151 | + | ||
| 152 | +--- | ||
| 153 | + | ||
| 154 | +## 4. Python 翻译模块(`query/translator.py`) | ||
| 155 | + | ||
| 156 | +### 4.1 基本用法 | ||
| 157 | + | ||
| 158 | +```python | ||
| 159 | +from query.translator import Translator | ||
| 160 | + | ||
| 161 | +# 默认使用 qwen | ||
| 162 | +translator = Translator() | ||
| 163 | + | ||
| 164 | +result = translator.translate( | ||
| 165 | + text="我看到这个视频后没有笑", | ||
| 166 | + target_lang="en", | ||
| 167 | + source_lang="auto", | ||
| 168 | +) | ||
| 169 | +print(result) | ||
| 170 | +``` | ||
| 171 | + | ||
| 172 | +显式选择模型: | ||
| 173 | + | ||
| 174 | +```python | ||
| 175 | +translator_qwen = Translator(model="qwen") | ||
| 176 | +translator_deepl = Translator(model="deepl") | ||
| 177 | +``` | ||
| 178 | + | ||
| 179 | +### 4.2 关键参数 | ||
| 180 | + | ||
| 181 | +- `Translator(model="qwen" | "deepl")`:选择翻译模型,默认 `"qwen"` | ||
| 182 | +- `translate(text, target_lang, source_lang=None, context=None, prompt=None)`: | ||
| 183 | + - `target_lang` / `source_lang`:语言代码(见“语言支持”) | ||
| 184 | + - `source_lang` 为空或 `"auto"`:自动检测 | ||
| 185 | + - `prompt`: | ||
| 186 | + - DeepL:作为 `context` 使用 | ||
| 187 | + - Qwen:当前未使用 | ||
| 188 | + | ||
| 189 | +### 4.3 缓存(Redis) | ||
| 190 | + | ||
| 191 | +`Translator(use_cache=True)` 时会连接 Redis 并缓存翻译结果。 | ||
| 192 | + | ||
| 193 | +- Redis 连接配置来自 `config/env_config.py` 的 `REDIS_CONFIG` | ||
| 194 | +- 缓存 key 前缀默认 `trans`(可用 `REDIS_TRANSLATION_CACHE_PREFIX` 覆盖) | ||
| 195 | + | ||
| 196 | +--- | ||
| 197 | + | ||
| 198 | +## 5. Qwen 语言支持(按 qwen-mt-plus/flash/turbo 标准) | ||
| 199 | + | ||
| 200 | +> 以下为 Qwen 翻译模型支持的语言(**代码 → 英文名**),并已用于 `query/translator.py` 的映射。 | ||
| 201 | + | ||
| 202 | +| 代码 | 英文名 | | ||
| 203 | +|------|--------| | ||
| 204 | +| en | English | | ||
| 205 | +| zh | Chinese | | ||
| 206 | +| zh_tw | Traditional Chinese | | ||
| 207 | +| ru | Russian | | ||
| 208 | +| ja | Japanese | | ||
| 209 | +| ko | Korean | | ||
| 210 | +| es | Spanish | | ||
| 211 | +| fr | French | | ||
| 212 | +| pt | Portuguese | | ||
| 213 | +| de | German | | ||
| 214 | +| it | Italian | | ||
| 215 | +| th | Thai | | ||
| 216 | +| vi | Vietnamese | | ||
| 217 | +| id | Indonesian | | ||
| 218 | +| ms | Malay | | ||
| 219 | +| ar | Arabic | | ||
| 220 | +| hi | Hindi | | ||
| 221 | +| he | Hebrew | | ||
| 222 | +| my | Burmese | | ||
| 223 | +| ta | Tamil | | ||
| 224 | +| ur | Urdu | | ||
| 225 | +| bn | Bengali | | ||
| 226 | +| pl | Polish | | ||
| 227 | +| nl | Dutch | | ||
| 228 | +| ro | Romanian | | ||
| 229 | +| tr | Turkish | | ||
| 230 | +| km | Khmer | | ||
| 231 | +| lo | Lao | | ||
| 232 | +| yue | Cantonese | | ||
| 233 | +| cs | Czech | | ||
| 234 | +| el | Greek | | ||
| 235 | +| sv | Swedish | | ||
| 236 | +| hu | Hungarian | | ||
| 237 | +| da | Danish | | ||
| 238 | +| fi | Finnish | | ||
| 239 | +| uk | Ukrainian | | ||
| 240 | +| bg | Bulgarian | | ||
| 241 | + | ||
| 242 | +--- | ||
| 243 | + | ||
| 244 | +## 6. 常见问题(FAQ) | ||
| 245 | + | ||
| 246 | +### 6.1 Qwen 调用报错 / 无法初始化 | ||
| 247 | + | ||
| 248 | +- 确认 `.env` 中已配置 `DASHSCOPE_API_KEY` | ||
| 249 | +- 确认安装依赖:`openai`(Python 包) | ||
| 250 | +- 如在海外地域使用模型,将 `base_url` 切换为 `https://dashscope-intl.aliyuncs.com/compatible-mode/v1` | ||
| 251 | + | ||
| 252 | +### 6.2 DeepL 返回 403 / 翻译失败 | ||
| 253 | + | ||
| 254 | +- 确认 `.env` 中已配置 `DEEPL_AUTH_KEY` | ||
| 255 | +- 若使用的是 Pro key,请使用 `https://api.deepl.com/v2/translate`(当前代码即为该地址) | ||
| 256 | + |
query/translator.py
| 1 | """ | 1 | """ |
| 2 | Translation service for multi-language query support. | 2 | Translation service for multi-language query support. |
| 3 | 3 | ||
| 4 | -Supports DeepL API for high-quality translations. | 4 | +Supports multiple translation models: |
| 5 | +- Qwen (default): Alibaba Cloud DashScope API using qwen-mt-flash model | ||
| 6 | +- DeepL: DeepL API for high-quality translations | ||
| 5 | 7 | ||
| 8 | +使用方法 (Usage): | ||
| 6 | 9 | ||
| 7 | -#### 官方文档: | ||
| 8 | -https://developers.deepl.com/api-reference/translate/request-translation | ||
| 9 | -##### | 10 | +```python |
| 11 | +from query.translator import Translator | ||
| 12 | + | ||
| 13 | +# 使用默认的 qwen 模型(推荐) | ||
| 14 | +translator = Translator() # 默认使用 qwen 模型 | ||
| 15 | + | ||
| 16 | +# 或显式指定模型 | ||
| 17 | +translator = Translator(model='qwen') # 使用 qwen 模型 | ||
| 18 | +translator = Translator(model='deepl') # 使用 DeepL 模型 | ||
| 19 | + | ||
| 20 | +# 翻译文本 | ||
| 21 | +result = translator.translate( | ||
| 22 | + text="我看到这个视频后没有笑", | ||
| 23 | + target_lang="en", | ||
| 24 | + source_lang="auto" # 自动检测源语言 | ||
| 25 | +) | ||
| 26 | +``` | ||
| 10 | 27 | ||
| 28 | +配置说明 (Configuration): | ||
| 29 | +- Qwen 模型需要设置 DASHSCOPE_API_KEY 环境变量(在 .env 文件中) | ||
| 30 | +- DeepL 模型需要设置 DEEPL_AUTH_KEY 环境变量(在 .env 文件中) | ||
| 11 | 31 | ||
| 32 | +Qwen 模型参考文档: | ||
| 33 | +- 官方文档:https://help.aliyun.com/zh/model-studio/get-api-key | ||
| 34 | +- 模型:qwen-mt-flash(快速翻译模型) | ||
| 35 | + | ||
| 36 | +DeepL 官方文档: | ||
| 37 | +https://developers.deepl.com/api-reference/translate/request-translation | ||
| 12 | """ | 38 | """ |
| 13 | 39 | ||
| 40 | +import os | ||
| 14 | import requests | 41 | import requests |
| 15 | import re | 42 | import re |
| 16 | import redis | 43 | import redis |
| @@ -21,18 +48,21 @@ import logging | @@ -21,18 +48,21 @@ import logging | ||
| 21 | 48 | ||
| 22 | logger = logging.getLogger(__name__) | 49 | logger = logging.getLogger(__name__) |
| 23 | 50 | ||
| 24 | -# Try to import DEEPL_AUTH_KEY and REDIS_CONFIG, but allow import to fail | ||
| 25 | -try: | ||
| 26 | - from config.env_config import DEEPL_AUTH_KEY, REDIS_CONFIG | ||
| 27 | -except ImportError: | ||
| 28 | - DEEPL_AUTH_KEY = None | ||
| 29 | - REDIS_CONFIG = {} | 51 | +from config.env_config import DEEPL_AUTH_KEY, DASHSCOPE_API_KEY, REDIS_CONFIG |
| 52 | +from openai import OpenAI | ||
| 30 | 53 | ||
| 31 | 54 | ||
| 32 | class Translator: | 55 | class Translator: |
| 33 | - """Multi-language translator using DeepL API.""" | 56 | + """ |
| 57 | + Multi-language translator supporting Qwen and DeepL APIs. | ||
| 58 | + | ||
| 59 | + Default model is 'qwen' which uses Alibaba Cloud DashScope API. | ||
| 60 | + """ | ||
| 34 | 61 | ||
| 35 | DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier | 62 | DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier |
| 63 | + QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" # 北京地域 | ||
| 64 | + # 如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 | ||
| 65 | + QWEN_MODEL = "qwen-mt-flash" # 快速翻译模型 | ||
| 36 | 66 | ||
| 37 | # Language code mapping | 67 | # Language code mapping |
| 38 | LANG_CODE_MAP = { | 68 | LANG_CODE_MAP = { |
| @@ -50,6 +80,7 @@ class Translator: | @@ -50,6 +80,7 @@ class Translator: | ||
| 50 | 80 | ||
| 51 | def __init__( | 81 | def __init__( |
| 52 | self, | 82 | self, |
| 83 | + model: str = "qwen", | ||
| 53 | api_key: Optional[str] = None, | 84 | api_key: Optional[str] = None, |
| 54 | use_cache: bool = True, | 85 | use_cache: bool = True, |
| 55 | timeout: int = 10, | 86 | timeout: int = 10, |
| @@ -60,21 +91,40 @@ class Translator: | @@ -60,21 +91,40 @@ class Translator: | ||
| 60 | Initialize translator. | 91 | Initialize translator. |
| 61 | 92 | ||
| 62 | Args: | 93 | Args: |
| 63 | - api_key: DeepL API key (or None to use from config/env) | 94 | + model: Translation model to use. Options: 'qwen' (default) or 'deepl' |
| 95 | + api_key: API key for the selected model (or None to use from config/env) | ||
| 64 | use_cache: Whether to cache translations | 96 | use_cache: Whether to cache translations |
| 65 | timeout: Request timeout in seconds | 97 | timeout: Request timeout in seconds |
| 66 | - glossary_id: DeepL glossary ID for custom terminology (optional) | 98 | + glossary_id: DeepL glossary ID for custom terminology (optional, only for DeepL) |
| 67 | translation_context: Context hint for translation (e.g., "e-commerce", "product search") | 99 | translation_context: Context hint for translation (e.g., "e-commerce", "product search") |
| 68 | """ | 100 | """ |
| 101 | + self.model = model.lower() | ||
| 102 | + if self.model not in ['qwen', 'deepl']: | ||
| 103 | + raise ValueError(f"Unsupported model: {model}. Supported models: 'qwen', 'deepl'") | ||
| 104 | + | ||
| 69 | # Get API key from config if not provided | 105 | # Get API key from config if not provided |
| 70 | - if api_key is None and DEEPL_AUTH_KEY: | ||
| 71 | - api_key = DEEPL_AUTH_KEY | 106 | + if api_key is None: |
| 107 | + if self.model == 'qwen': | ||
| 108 | + api_key = DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY") | ||
| 109 | + else: # deepl | ||
| 110 | + api_key = DEEPL_AUTH_KEY or os.getenv("DEEPL_AUTH_KEY") | ||
| 72 | 111 | ||
| 73 | self.api_key = api_key | 112 | self.api_key = api_key |
| 74 | self.timeout = timeout | 113 | self.timeout = timeout |
| 75 | self.use_cache = use_cache | 114 | self.use_cache = use_cache |
| 76 | self.glossary_id = glossary_id | 115 | self.glossary_id = glossary_id |
| 77 | self.translation_context = translation_context or "e-commerce product search" | 116 | self.translation_context = translation_context or "e-commerce product search" |
| 117 | + | ||
| 118 | + # Initialize OpenAI client for Qwen if needed | ||
| 119 | + self.qwen_client = None | ||
| 120 | + if self.model == 'qwen': | ||
| 121 | + if not self.api_key: | ||
| 122 | + logger.warning("DASHSCOPE_API_KEY not set. Qwen translation will not work.") | ||
| 123 | + else: | ||
| 124 | + self.qwen_client = OpenAI( | ||
| 125 | + api_key=self.api_key, | ||
| 126 | + base_url=self.QWEN_BASE_URL, | ||
| 127 | + ) | ||
| 78 | 128 | ||
| 79 | # Initialize Redis cache if enabled | 129 | # Initialize Redis cache if enabled |
| 80 | if use_cache: | 130 | if use_cache: |
| @@ -119,7 +169,7 @@ class Translator: | @@ -119,7 +169,7 @@ class Translator: | ||
| 119 | Args: | 169 | Args: |
| 120 | text: Text to translate | 170 | text: Text to translate |
| 121 | target_lang: Target language code ('zh', 'en', 'ru', etc.) | 171 | target_lang: Target language code ('zh', 'en', 'ru', etc.) |
| 122 | - source_lang: Source language code (optional, auto-detect if None) | 172 | + source_lang: Source language code (option al, auto-detect if None) |
| 123 | context: Additional context for translation (overrides default context) | 173 | context: Additional context for translation (overrides default context) |
| 124 | prompt: Translation prompt/instruction (optional, for better translation quality) | 174 | prompt: Translation prompt/instruction (optional, for better translation quality) |
| 125 | 175 | ||
| @@ -174,13 +224,17 @@ class Translator: | @@ -174,13 +224,17 @@ class Translator: | ||
| 174 | ) | 224 | ) |
| 175 | return text | 225 | return text |
| 176 | 226 | ||
| 177 | - # Translate using DeepL (Pro endpoint only, no free fallback) | 227 | + # Translate using selected model |
| 178 | logger.info( | 228 | logger.info( |
| 179 | - f"[Translator] Translation request | Original text: '{text}' | Target language: {target_lang} | " | 229 | + f"[Translator] Translation request | Model: {self.model} | Original text: '{text}' | Target language: {target_lang} | " |
| 180 | f"Source language: {source_lang or 'auto'} | Context: {translation_context} | " | 230 | f"Source language: {source_lang or 'auto'} | Context: {translation_context} | " |
| 181 | f"Prompt: {'yes' if prompt else 'no'} | Status: Starting translation" | 231 | f"Prompt: {'yes' if prompt else 'no'} | Status: Starting translation" |
| 182 | ) | 232 | ) |
| 183 | - result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt) | 233 | + |
| 234 | + if self.model == 'qwen': | ||
| 235 | + result = self._translate_qwen(text, target_lang, source_lang, translation_context, prompt) | ||
| 236 | + else: # deepl | ||
| 237 | + result = self._translate_deepl(text, target_lang, source_lang, translation_context, prompt) | ||
| 184 | 238 | ||
| 185 | # If still failed, return original text with warning | 239 | # If still failed, return original text with warning |
| 186 | if result is None: | 240 | if result is None: |
| @@ -201,6 +255,122 @@ class Translator: | @@ -201,6 +255,122 @@ class Translator: | ||
| 201 | 255 | ||
| 202 | return result | 256 | return result |
| 203 | 257 | ||
| 258 | + def _translate_qwen( | ||
| 259 | + self, | ||
| 260 | + text: str, | ||
| 261 | + target_lang: str, | ||
| 262 | + source_lang: Optional[str], | ||
| 263 | + context: Optional[str] = None, | ||
| 264 | + prompt: Optional[str] = None | ||
| 265 | + ) -> Optional[str]: | ||
| 266 | + """ | ||
| 267 | + Translate using Qwen MT Flash model via Alibaba Cloud DashScope API. | ||
| 268 | + | ||
| 269 | + Args: | ||
| 270 | + text: Text to translate | ||
| 271 | + target_lang: Target language code ('zh', 'en', 'ru', etc.) | ||
| 272 | + source_lang: Source language code (optional, 'auto' if None) | ||
| 273 | + context: Context hint for translation (optional) | ||
| 274 | + prompt: Translation prompt/instruction (optional) | ||
| 275 | + | ||
| 276 | + Returns: | ||
| 277 | + Translated text or None if translation fails | ||
| 278 | + """ | ||
| 279 | + if not self.qwen_client: | ||
| 280 | + logger.error("[Translator] Qwen client not initialized. Check DASHSCOPE_API_KEY.") | ||
| 281 | + return None | ||
| 282 | + | ||
| 283 | + # Qwen (qwen-mt-plus/flash/turbo) supported languages mapping | ||
| 284 | + # 标准来自:你提供的“语言 / 英文名 / 代码”表 | ||
| 285 | + qwen_lang_map = { | ||
| 286 | + "en": "English", | ||
| 287 | + "zh": "Chinese", | ||
| 288 | + "zh_tw": "Traditional Chinese", | ||
| 289 | + "ru": "Russian", | ||
| 290 | + "ja": "Japanese", | ||
| 291 | + "ko": "Korean", | ||
| 292 | + "es": "Spanish", | ||
| 293 | + "fr": "French", | ||
| 294 | + "pt": "Portuguese", | ||
| 295 | + "de": "German", | ||
| 296 | + "it": "Italian", | ||
| 297 | + "th": "Thai", | ||
| 298 | + "vi": "Vietnamese", | ||
| 299 | + "id": "Indonesian", | ||
| 300 | + "ms": "Malay", | ||
| 301 | + "ar": "Arabic", | ||
| 302 | + "hi": "Hindi", | ||
| 303 | + "he": "Hebrew", | ||
| 304 | + "my": "Burmese", | ||
| 305 | + "ta": "Tamil", | ||
| 306 | + "ur": "Urdu", | ||
| 307 | + "bn": "Bengali", | ||
| 308 | + "pl": "Polish", | ||
| 309 | + "nl": "Dutch", | ||
| 310 | + "ro": "Romanian", | ||
| 311 | + "tr": "Turkish", | ||
| 312 | + "km": "Khmer", | ||
| 313 | + "lo": "Lao", | ||
| 314 | + "yue": "Cantonese", | ||
| 315 | + "cs": "Czech", | ||
| 316 | + "el": "Greek", | ||
| 317 | + "sv": "Swedish", | ||
| 318 | + "hu": "Hungarian", | ||
| 319 | + "da": "Danish", | ||
| 320 | + "fi": "Finnish", | ||
| 321 | + "uk": "Ukrainian", | ||
| 322 | + "bg": "Bulgarian", | ||
| 323 | + } | ||
| 324 | + | ||
| 325 | + # Convert target language | ||
| 326 | + target_lang_normalized = target_lang.lower() | ||
| 327 | + target_lang_qwen = qwen_lang_map.get(target_lang_normalized, target_lang.capitalize()) | ||
| 328 | + | ||
| 329 | + # Convert source language | ||
| 330 | + source_lang_normalized = (source_lang or "").strip().lower() | ||
| 331 | + if not source_lang_normalized or source_lang_normalized == "auto": | ||
| 332 | + source_lang_qwen = "auto" | ||
| 333 | + else: | ||
| 334 | + source_lang_qwen = qwen_lang_map.get(source_lang_normalized, source_lang.capitalize()) | ||
| 335 | + | ||
| 336 | + # Prepare translation options | ||
| 337 | + translation_options = { | ||
| 338 | + "source_lang": source_lang_qwen, | ||
| 339 | + "target_lang": target_lang_qwen, | ||
| 340 | + } | ||
| 341 | + | ||
| 342 | + # Prepare messages | ||
| 343 | + messages = [ | ||
| 344 | + { | ||
| 345 | + "role": "user", | ||
| 346 | + "content": text | ||
| 347 | + } | ||
| 348 | + ] | ||
| 349 | + | ||
| 350 | + try: | ||
| 351 | + completion = self.qwen_client.chat.completions.create( | ||
| 352 | + model=self.QWEN_MODEL, | ||
| 353 | + messages=messages, | ||
| 354 | + extra_body={ | ||
| 355 | + "translation_options": translation_options | ||
| 356 | + } | ||
| 357 | + ) | ||
| 358 | + | ||
| 359 | + translated_text = completion.choices[0].message.content.strip() | ||
| 360 | + | ||
| 361 | + logger.debug( | ||
| 362 | + f"[Translator] Qwen API response success | Original text: '{text}' | Target language: {target_lang_qwen} | " | ||
| 363 | + f"Translation result: '{translated_text}'" | ||
| 364 | + ) | ||
| 365 | + return translated_text | ||
| 366 | + | ||
| 367 | + except Exception as e: | ||
| 368 | + logger.error( | ||
| 369 | + f"[Translator] Qwen API request exception | Original text: '{text}' | Target language: {target_lang_qwen} | " | ||
| 370 | + f"Error: {e}", exc_info=True | ||
| 371 | + ) | ||
| 372 | + return None | ||
| 373 | + | ||
| 204 | def _translate_deepl( | 374 | def _translate_deepl( |
| 205 | self, | 375 | self, |
| 206 | text: str, | 376 | text: str, |