From 0fd2f875d0489b437589ee50a0bd108bdbbda1d9 Mon Sep 17 00:00:00 2001 From: tangwang Date: Tue, 17 Mar 2026 19:21:34 +0800 Subject: [PATCH] translate --- .gitignore | 4 +++- README.md | 15 ++++++++++----- api/translator_app.py | 372 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ config/config.yaml | 56 ++++++++++++++++++++++++++++++++++++++++---------------- config/config_loader.py | 12 ------------ config/env_config.py | 4 ---- config/services_config.py | 116 ++++++-------------------------------------------------------------------------------------------------------------- config/translate_prompts.py | 42 ------------------------------------------ docs/DEVELOPER_GUIDE.md | 40 +++++++++++++++++++++++----------------- docs/QUICKSTART.md | 59 +++++++++++++++++++++++++++++++++++++++-------------------- docs/TODO.txt | 56 ++++++++++++-------------------------------------------- docs/工作总结-微服务性能优化与架构.md | 22 +++++++++++----------- docs/搜索API对接指南.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++-------------- docs/系统设计文档.md | 11 +---------- docs/缓存与Redis使用说明.md | 15 ++++++--------- docs/翻译模块说明.md | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- indexer/README.md | 10 +++++++--- indexer/document_transformer.py | 11 ++++------- indexer/incremental_service.py | 5 ++--- indexer/indexing_utils.py | 5 ++--- indexer/test_indexing.py | 8 ++------ providers/__init__.py | 8 +------- providers/translation.py | 28 ---------------------------- query/__init__.py | 2 -- query/deepl_provider.py | 3 --- query/llm_translate.py | 5 ----- query/query_parser.py | 4 ++-- query/qwen_mt_translate.py | 5 ----- query/test_translation.py | 299 ++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- requirements_translator_service.txt | 20 ++++++++++++++++++++ scripts/download_translation_models.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/setup_translator_venv.sh | 43 +++++++++++++++++++++++++++++++++++++++++++ scripts/start_translator.sh | 37 +++++++++++++++++++++++++++++++------ tests/ci/test_service_api_contracts.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- tests/test_translation_local_backends.py | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/test_translator_failure_semantics.py | 12 +++++++++--- translation/__init__.py | 27 +++++++++++++++++++++------ translation/backends/__init__.py | 12 +----------- translation/backends/deepl.py | 106 ++++++++++++++++++++++------------------------------------------------------------------------------------ translation/backends/llm.py | 101 ++++++++++++++++++++++++++++++----------------------------------------------------------------------- translation/backends/local_seq2seq.py | 277 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ translation/backends/qwen_mt.py | 90 ++++++++++++++++++++++++++++++++++++++---------------------------------------------------- translation/client.py | 24 +++++++++++++----------- translation/languages.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ translation/prompts.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ translation/protocols.py | 3 +-- translation/scenes.py | 36 ++++++++++++++++++++++++++++++++++++ translation/service.py | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------- translation/settings.py | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 49 files changed, 1765 insertions(+), 1180 deletions(-) delete mode 100644 config/translate_prompts.py delete mode 100644 providers/translation.py delete mode 100644 query/deepl_provider.py delete mode 100644 query/llm_translate.py delete mode 100644 query/qwen_mt_translate.py mode change 100755 => 100644 query/test_translation.py create mode 100644 requirements_translator_service.txt create mode 100755 scripts/download_translation_models.py create mode 100755 scripts/setup_translator_venv.sh create mode 100644 tests/test_translation_local_backends.py create mode 100644 translation/backends/local_seq2seq.py create mode 100644 translation/languages.py create mode 100644 translation/prompts.py create mode 100644 translation/scenes.py create mode 100644 translation/settings.py diff --git a/.gitignore b/.gitignore index a75c88b..6a6ec5c 100644 --- a/.gitignore +++ b/.gitignore @@ -73,4 +73,6 @@ logs_*/ .runtime/ .venv* -.pytest_cache \ No newline at end of file +.pytest_cache + +models/ diff --git a/README.md b/README.md index 41e27f8..0aac059 100644 --- a/README.md +++ b/README.md @@ -60,12 +60,16 @@ source activate.sh - `search/`:召回、排序、结果组织 - `query/`:查询解析、多语言处理、改写 - `indexer/`:MySQL 行数据 -> ES 文档的转换与索引流程 -- `providers/`:能力调用抽象(translation/embedding/rerank) +- `providers/`:能力调用抽象(embedding/rerank) +- `translation/`:翻译服务客户端、服务编排与后端实现 - `reranker/`:重排服务及后端实现 - `embeddings/`:向量服务(文本/图像) - `config/`:配置加载与服务配置解析 -关键设计:**Provider(调用方式)与 Backend(推理实现)分离**,新增能力优先在协议与工厂注册,不改调用方主流程。 +关键设计: + +- embedding / rerank 继续采用 **Provider(调用方式)与 Backend(推理实现)分离** +- translation 采用 **一个 translator service + 多个 capability backend**,业务侧统一调用 6006,不再做翻译 provider 选择 --- @@ -89,9 +93,10 @@ source activate.sh | 2. 运行与排障 | `docs/Usage-Guide.md` | | 3. API 详细说明 | `docs/搜索API对接指南.md` | | 4. 快速参数速查 | `docs/搜索API速查表.md` | -| 5. 首次环境搭建、生产凭证 | `docs/QUICKSTART.md` §1.4–1.8 | -| 6. TEI 文本向量专项 | `docs/TEI_SERVICE说明文档.md` | -| 7. CN-CLIP 图片向量专项 | `docs/CNCLIP_SERVICE说明文档.md` | +| 5. 翻译专项 | `docs/翻译模块说明.md` | +| 6. 首次环境搭建、生产凭证 | `docs/QUICKSTART.md` §1.4–1.8 | +| 7. TEI 文本向量专项 | `docs/TEI_SERVICE说明文档.md` | +| 8. CN-CLIP 图片向量专项 | `docs/CNCLIP_SERVICE说明文档.md` | --- diff --git a/api/translator_app.py b/api/translator_app.py index a93aeaf..fc90a74 100644 --- a/api/translator_app.py +++ b/api/translator_app.py @@ -1,99 +1,24 @@ +"""Translator service HTTP app.""" -""" - -# 方式1:直接运行 -python api/translator_app.py - -# 方式2:使用 uvicorn -uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload - - -使用说明: -Translation HTTP Service - -This service provides a RESTful API for text translation using Qwen (default) or DeepL API. -The service runs on port 6006 and provides a simple translation endpoint. - -API Endpoint: - POST /translate - -Request Body (JSON): - { - "text": "要翻译的文本", - "target_lang": "en", # Required: target language code (zh, en, ru, etc.) - "source_lang": "zh", # Optional: source language code (auto-detect if not provided) - "model": "qwen" # Optional: translation model ("qwen" or "deepl", default: "qwen") - } - -Response (JSON): - { - "text": "要翻译的文本", - "target_lang": "en", - "source_lang": "zh", - "translated_text": "Text to translate", - "status": "success" - } - -Usage Examples: - -1. Translate Chinese to English: - curl -X POST http://localhost:6006/translate \ - -H "Content-Type: application/json" \ - -d '{ - "text": "商品名称", - "target_lang": "en", - "source_lang": "zh" - }' - -2. Translate with auto-detection: - curl -X POST http://localhost:6006/translate \ - -H "Content-Type: application/json" \ - -d '{ - "text": "Product name", - "target_lang": "zh" - }' - -3. Translate using DeepL model: - curl -X POST http://localhost:6006/translate \ - -H "Content-Type: application/json" \ - -d '{ - "text": "商品名称", - "target_lang": "en", - "source_lang": "zh", - "model": "deepl" - }' - -4. Translate Russian to English: - curl -X POST http://localhost:6006/translate \ - -H "Content-Type: application/json" \ - -d '{ - "text": "Название товара", - "target_lang": "en", - "source_lang": "ru" - }' - -Health Check: - GET /health - - curl http://localhost:6006/health - -Start the service: - python api/translator_app.py - # or - uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload -""" - -import logging import argparse +import logging +from contextlib import asynccontextmanager +from functools import lru_cache +from typing import List, Optional, Union + import uvicorn -from typing import Dict, List, Optional, Union from fastapi import FastAPI, HTTPException -from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel, Field +from fastapi.responses import JSONResponse +from pydantic import BaseModel, ConfigDict, Field from config.services_config import get_translation_config from translation.service import TranslationService +from translation.settings import ( + get_enabled_translation_models, + normalize_translation_model, + normalize_translation_scene, +) # Configure logging logging.basicConfig( @@ -102,37 +27,33 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -_translation_service: Optional[TranslationService] = None - +@lru_cache(maxsize=1) def get_translation_service() -> TranslationService: - global _translation_service - if _translation_service is None: - _translation_service = TranslationService(get_translation_config()) - return _translation_service + return TranslationService(get_translation_config()) # Request/Response models class TranslationRequest(BaseModel): """Translation request model.""" - text: Union[str, List[str]] = Field(..., description="Text to translate (string or list of strings)") - target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)") - source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)") - model: Optional[str] = Field(None, description="Translation model: qwen-mt | deepl | llm") - scene: Optional[str] = Field(None, description="Translation scene, paired with model routing") - context: Optional[str] = Field(None, description="Deprecated alias of scene") - prompt: Optional[str] = Field(None, description="Optional prompt override") - class Config: - json_schema_extra = { + model_config = ConfigDict( + json_schema_extra={ "example": { "text": "商品名称", "target_lang": "en", "source_lang": "zh", "model": "llm", - "scene": "sku_name" + "scene": "sku_name", } } + ) + + text: Union[str, List[str]] = Field(..., description="Text to translate (string or list of strings)") + target_lang: str = Field(..., description="Target language code (zh, en, ru, etc.)") + source_lang: Optional[str] = Field(None, description="Source language code (optional, auto-detect if not provided)") + model: Optional[str] = Field(None, description="Enabled translation capability name") + scene: Optional[str] = Field(None, description="Translation scene, paired with model routing") class TranslationResponse(BaseModel): @@ -149,13 +70,108 @@ class TranslationResponse(BaseModel): scene: str = Field(..., description="Translation scene used") +def _normalize_scene(service: TranslationService, scene: Optional[str]) -> str: + return normalize_translation_scene(service.config, scene) + + +def _normalize_model(service: TranslationService, model: Optional[str]) -> str: + return normalize_translation_model(service.config, model or service.config["default_model"]) + + +def _ensure_valid_text(text: Union[str, List[str]]) -> None: + if isinstance(text, list): + if not text: + raise HTTPException(status_code=400, detail="Text list cannot be empty") + return + if not text or not text.strip(): + raise HTTPException(status_code=400, detail="Text cannot be empty") + + +def _normalize_batch_result( + original: List[str], + translated: Union[str, List[Optional[str]], None], +) -> List[Optional[str]]: + if translated is None: + return [None for _ in original] + if not isinstance(translated, list): + raise HTTPException(status_code=500, detail="Batch translation provider returned non-list result") + return [translated[idx] if idx < len(translated) else None for idx, _ in enumerate(original)] + + +def _translate_batch( + service: TranslationService, + raw_text: List[str], + *, + target_lang: str, + source_lang: Optional[str], + model: str, + scene: str, +) -> List[Optional[str]]: + backend = service.get_backend(model) + if getattr(backend, "supports_batch", False): + try: + translated = service.translate( + text=raw_text, + target_lang=target_lang, + source_lang=source_lang, + model=model, + scene=scene, + ) + return _normalize_batch_result(raw_text, translated) + except ValueError: + raise + except Exception as exc: + logger.error("Batch translation failed: %s", exc, exc_info=True) + + results: List[Optional[str]] = [] + for item in raw_text: + if item is None or not str(item).strip(): + results.append(item) # type: ignore[arg-type] + continue + try: + out = service.translate( + text=str(item), + target_lang=target_lang, + source_lang=source_lang, + model=model, + scene=scene, + ) + except ValueError: + raise + except Exception as exc: + logger.warning("Per-item translation failed: %s", exc, exc_info=True) + out = None + results.append(out) + return results + + +@asynccontextmanager +async def lifespan(_: FastAPI): + """Warm the default backend on process startup.""" + logger.info("Starting Translation Service API") + service = get_translation_service() + default_backend = service.get_backend(service.config["default_model"]) + logger.info( + "Translation service ready | default_model=%s available_models=%s loaded_models=%s", + service.config["default_model"], + service.available_models, + service.loaded_models, + ) + logger.info( + "Default translation backend warmed up | model=%s", + getattr(default_backend, "model", service.config["default_model"]), + ) + yield + + # Create FastAPI app app = FastAPI( title="Translation Service API", - description="RESTful API for text translation using Qwen (default) or DeepL", + description="Translation service with pluggable capabilities and scene routing", version="1.0.0", docs_url="/docs", - redoc_url="/redoc" + redoc_url="/redoc", + lifespan=lifespan, ) # Add CORS middleware @@ -168,22 +184,6 @@ app.add_middleware( ) -@app.on_event("startup") -async def startup_event(): - """Initialize translator on startup.""" - logger.info("Starting Translation Service API on port 6006") - try: - service = get_translation_service() - logger.info( - "Translation service ready | default_model=%s available_models=%s", - service.config.default_model, - service.available_models, - ) - except Exception as e: - logger.error(f"Failed to initialize translator: {e}", exc_info=True) - raise - - @app.get("/health") async def health_check(): """Health check endpoint.""" @@ -192,10 +192,11 @@ async def health_check(): return { "status": "healthy", "service": "translation", - "default_model": service.config.default_model, - "default_scene": service.config.default_scene, + "default_model": service.config["default_model"], + "default_scene": service.config["default_scene"], "available_models": service.available_models, - "enabled_capabilities": service.config.enabled_models, + "enabled_capabilities": get_enabled_translation_models(service.config), + "loaded_models": service.loaded_models, } except Exception as e: logger.error(f"Health check failed: {e}") @@ -210,106 +211,27 @@ async def health_check(): @app.post("/translate", response_model=TranslationResponse) async def translate(request: TranslationRequest): - """ - Translate text to target language. - - Uses a fixed prompt optimized for product SKU name translation. - The translation is cached in Redis for performance. - - Supports both Qwen (default) and DeepL models via the 'model' parameter. - """ - # 允许 text 为字符串或字符串列表 - if isinstance(request.text, list): - if not request.text: - raise HTTPException( - status_code=400, - detail="Text list cannot be empty" - ) - else: - if not request.text or not request.text.strip(): - raise HTTPException( - status_code=400, - detail="Text cannot be empty" - ) - + _ensure_valid_text(request.text) + if not request.target_lang: - raise HTTPException( - status_code=400, - detail="target_lang is required" - ) - + raise HTTPException(status_code=400, detail="target_lang is required") + try: service = get_translation_service() - scene = (request.scene or request.context or service.config.default_scene).strip() or "general" - model = service.config.normalize_model_name(request.model or service.config.default_model) + scene = _normalize_scene(service, request.scene) + model = _normalize_model(service, request.model) translator = service.get_backend(model) raw_text = request.text - # 如果是列表,并且底层 provider 声明支持 batch,则直接传 list - if isinstance(raw_text, list) and getattr(translator, "supports_batch", False): - try: - translated_list = service.translate( - text=raw_text, - target_lang=request.target_lang, - source_lang=request.source_lang, - model=model, - scene=scene, - prompt=request.prompt, - ) - except Exception as exc: - logger.error("Batch translation failed: %s", exc, exc_info=True) - # 回退到逐条拆分逻辑 - translated_list = None - - if translated_list is not None: - # 规范化为 List[Optional[str]],并保证长度对应 - if not isinstance(translated_list, list): - raise HTTPException( - status_code=500, - detail="Batch translation provider returned non-list result", - ) - normalized: List[Optional[str]] = [] - for idx, item in enumerate(raw_text): - if idx < len(translated_list): - val = translated_list[idx] - else: - val = None - # 失败语义:失败位置为 None - normalized.append(val) - - return TranslationResponse( - text=raw_text, - target_lang=request.target_lang, - source_lang=request.source_lang, - translated_text=normalized, - status="success", - model=str(getattr(translator, "model", model)), - scene=scene, - ) - - # 否则:统一走逐条拆分逻辑(包括不支持 batch 的 provider) if isinstance(raw_text, list): - results: List[Optional[str]] = [] - for item in raw_text: - if item is None or not str(item).strip(): - # 空元素不视为失败,直接返回原值 - results.append(item) # type: ignore[arg-type] - continue - try: - out = service.translate( - text=str(item), - target_lang=request.target_lang, - source_lang=request.source_lang, - model=model, - scene=scene, - prompt=request.prompt, - ) - except Exception as exc: - logger.warning("Per-item translation failed: %s", exc, exc_info=True) - out = None - # 失败语义:该元素为 None - results.append(out) - + results = _translate_batch( + service, + raw_text, + target_lang=request.target_lang, + source_lang=request.source_lang, + model=model, + scene=scene, + ) return TranslationResponse( text=raw_text, target_lang=request.target_lang, @@ -320,21 +242,16 @@ async def translate(request: TranslationRequest): scene=scene, ) - # 单文本模式:保持原有严格失败语义 translated_text = service.translate( text=raw_text, target_lang=request.target_lang, source_lang=request.source_lang, model=model, scene=scene, - prompt=request.prompt, ) if translated_text is None: - raise HTTPException( - status_code=500, - detail="Translation failed" - ) + raise HTTPException(status_code=500, detail="Translation failed") return TranslationResponse( text=raw_text, @@ -348,12 +265,11 @@ async def translate(request: TranslationRequest): except HTTPException: raise + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) from e except Exception as e: logger.error(f"Translation error: {e}", exc_info=True) - raise HTTPException( - status_code=500, - detail=f"Translation error: {str(e)}" - ) + raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}") @app.get("/") diff --git a/config/config.yaml b/config/config.yaml index a421abb..ef70ab7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -77,10 +77,6 @@ query_config: text_embedding_field: "title_embedding" image_embedding_field: null - # 翻译API配置(provider/URL 在 services.translation) - translation_service: "deepl" - translation_api_key: null # 通过环境变量设置 - # 返回字段配置(_source includes) # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 source_fields: null @@ -116,33 +112,61 @@ services: key_prefix: "trans:v2" ttl_seconds: 62208000 sliding_expiration: true - key_include_context: true - key_include_prompt: true + key_include_scene: true key_include_source_lang: true capabilities: qwen-mt: enabled: true + backend: "qwen_mt" model: "qwen-mt-flash" + base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" timeout_sec: 10.0 use_cache: true llm: enabled: true + backend: "llm" model: "qwen-flash" - # 可选:覆盖 DashScope 兼容模式的 Endpoint 与超时 - # base_url 留空则使用 DASHSCOPE_BASE_URL 或默认地域 - base_url: "" + base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" timeout_sec: 30.0 deepl: enabled: false - model: "deepl" + backend: "deepl" + api_url: "https://api.deepl.com/v2/translate" timeout_sec: 10.0 - # 可选:用于术语表翻译(由 query_config.translation_glossary_id 衔接) glossary_id: "" - google: - enabled: false - project_id: "" - location: "global" - model: "" + nllb-200-distilled-600m: + enabled: true + backend: "local_nllb" + model_id: "facebook/nllb-200-distilled-600M" + model_dir: "./models/translation/facebook/nllb-200-distilled-600M" + device: "cuda" + torch_dtype: "float16" + batch_size: 8 + max_input_length: 256 + max_new_tokens: 256 + num_beams: 1 + opus-mt-zh-en: + enabled: true + backend: "local_marian" + model_id: "Helsinki-NLP/opus-mt-zh-en" + model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en" + device: "cuda" + torch_dtype: "float16" + batch_size: 16 + max_input_length: 256 + max_new_tokens: 256 + num_beams: 1 + opus-mt-en-zh: + enabled: true + backend: "local_marian" + model_id: "Helsinki-NLP/opus-mt-en-zh" + model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh" + device: "cuda" + torch_dtype: "float16" + batch_size: 16 + max_input_length: 256 + max_new_tokens: 256 + num_beams: 1 embedding: provider: "http" # http base_url: "http://127.0.0.1:6005" diff --git a/config/config_loader.py b/config/config_loader.py index 10be1ae..e64c198 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -37,12 +37,6 @@ class QueryConfig: # Query rewrite dictionary (loaded from external file) rewrite_dictionary: Dict[str, str] = field(default_factory=dict) - # Translation settings (provider/URL in services.translation) - translation_service: str = "deepl" - translation_api_key: Optional[str] = None - translation_glossary_id: Optional[str] = None - translation_context: str = "e-commerce product search" - # Embedding field names text_embedding_field: Optional[str] = "title_embedding" image_embedding_field: Optional[str] = None @@ -234,7 +228,6 @@ class ConfigLoader: # Parse query config query_config_data = config_data.get("query_config", {}) - services_data = config_data.get("services", {}) if isinstance(config_data.get("services", {}), dict) else {} rewrite_dictionary = self._load_rewrite_dictionary() search_fields_cfg = query_config_data.get("search_fields", {}) text_strategy_cfg = query_config_data.get("text_query_strategy", {}) @@ -245,10 +238,6 @@ class ConfigLoader: enable_text_embedding=query_config_data.get("enable_text_embedding", True), enable_query_rewrite=query_config_data.get("enable_query_rewrite", True), rewrite_dictionary=rewrite_dictionary, - translation_api_key=query_config_data.get("translation_api_key"), - translation_service=query_config_data.get("translation_service") or "deepl", - translation_glossary_id=query_config_data.get("translation_glossary_id"), - translation_context=query_config_data.get("translation_context") or "e-commerce product search", text_embedding_field=query_config_data.get("text_embedding_field"), image_embedding_field=query_config_data.get("image_embedding_field"), source_fields=query_config_data.get("source_fields"), @@ -459,7 +448,6 @@ class ConfigLoader: "default_language": config.query_config.default_language, "enable_text_embedding": config.query_config.enable_text_embedding, "enable_query_rewrite": config.query_config.enable_query_rewrite, - "translation_service": config.query_config.translation_service, "text_embedding_field": config.query_config.text_embedding_field, "image_embedding_field": config.query_config.image_embedding_field, "source_fields": config.query_config.source_fields, diff --git a/config/env_config.py b/config/env_config.py index 50cad06..92274d3 100644 --- a/config/env_config.py +++ b/config/env_config.py @@ -65,9 +65,6 @@ EMBEDDING_HOST = os.getenv('EMBEDDING_HOST', '127.0.0.1') EMBEDDING_PORT = int(os.getenv('EMBEDDING_PORT', 6005)) TRANSLATION_HOST = os.getenv('TRANSLATION_HOST', '127.0.0.1') TRANSLATION_PORT = int(os.getenv('TRANSLATION_PORT', 6006)) -TRANSLATION_PROVIDER = os.getenv('TRANSLATION_PROVIDER', 'direct') # deprecated -TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'llm') -TRANSLATION_SCENE = os.getenv('TRANSLATION_SCENE', 'general') RERANKER_HOST = os.getenv('RERANKER_HOST', '127.0.0.1') RERANKER_PORT = int(os.getenv('RERANKER_PORT', 6007)) RERANK_PROVIDER = os.getenv('RERANK_PROVIDER', 'http') @@ -79,7 +76,6 @@ INDEXER_BASE_URL = os.getenv('INDEXER_BASE_URL') or ( f'http://localhost:{INDEXER_PORT}' if INDEXER_HOST == '0.0.0.0' else f'http://{INDEXER_HOST}:{INDEXER_PORT}' ) EMBEDDING_SERVICE_URL = os.getenv('EMBEDDING_SERVICE_URL') or f'http://{EMBEDDING_HOST}:{EMBEDDING_PORT}' -TRANSLATION_SERVICE_URL = os.getenv('TRANSLATION_SERVICE_URL') or f'http://{TRANSLATION_HOST}:{TRANSLATION_PORT}' RERANKER_SERVICE_URL = os.getenv('RERANKER_SERVICE_URL') or f'http://{RERANKER_HOST}:{RERANKER_PORT}/rerank' # Model IDs / paths diff --git a/config/services_config.py b/config/services_config.py index cce5e80..9141322 100644 --- a/config/services_config.py +++ b/config/services_config.py @@ -15,6 +15,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional import yaml +from translation.settings import TranslationConfig, build_translation_config, get_translation_cache @dataclass @@ -29,42 +30,6 @@ class ServiceConfig: return self.providers.get(p, {}) if isinstance(self.providers, dict) else {} -@dataclass -class TranslationServiceConfig: - """Dedicated config model for the translation service.""" - - service_url: str - timeout_sec: float - default_model: str - default_scene: str - capabilities: Dict[str, Dict[str, Any]] = field(default_factory=dict) - cache: Dict[str, Any] = field(default_factory=dict) - - def normalize_model_name(self, model: Optional[str]) -> str: - normalized = str(model or self.default_model).strip().lower() - aliases = { - "qwen": "qwen-mt", - "qwen-mt-flash": "qwen-mt", - "qwen-mt-flush": "qwen-mt", - "service": self.default_model, - "default": self.default_model, - } - return aliases.get(normalized, normalized) - - @property - def enabled_models(self) -> List[str]: - items: List[str] = [] - for name, cfg in self.capabilities.items(): - if isinstance(cfg, dict) and bool(cfg.get("enabled", False)): - items.append(str(name).strip().lower()) - return items - - def get_capability_cfg(self, model: Optional[str]) -> Dict[str, Any]: - normalized = self.normalize_model_name(model) - value = self.capabilities.get(normalized) - return dict(value) if isinstance(value, dict) else {} - - def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]: if config_path is None: config_path = Path(__file__).parent / "config.yaml" @@ -94,70 +59,10 @@ def _resolve_provider_name(env_name: str, config_provider: Any, capability: str) return str(provider).strip().lower() -def _resolve_translation() -> TranslationServiceConfig: +def _resolve_translation() -> TranslationConfig: raw = _load_services_raw() cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {} - - service_url = ( - os.getenv("TRANSLATION_SERVICE_URL") - or cfg.get("service_url") - or cfg.get("base_url") - or "http://127.0.0.1:6006" - ) - timeout_sec = float(os.getenv("TRANSLATION_TIMEOUT_SEC") or cfg.get("timeout_sec") or 10.0) - - raw_capabilities = cfg.get("capabilities") - if not isinstance(raw_capabilities, dict): - raw_capabilities = cfg.get("providers") - capabilities = raw_capabilities if isinstance(raw_capabilities, dict) else {} - - default_model = str( - os.getenv("TRANSLATION_MODEL") - or cfg.get("default_model") - or cfg.get("provider") - or "qwen-mt" - ).strip().lower() - default_scene = str( - os.getenv("TRANSLATION_SCENE") - or cfg.get("default_scene") - or "general" - ).strip() or "general" - - resolved_capabilities: Dict[str, Dict[str, Any]] = {} - for name, value in capabilities.items(): - if not isinstance(value, dict): - continue - normalized = str(name or "").strip().lower() - if not normalized: - continue - copied = dict(value) - copied.setdefault("enabled", normalized == default_model) - resolved_capabilities[normalized] = copied - - aliases = { - "qwen": "qwen-mt", - "qwen-mt-flash": "qwen-mt", - "qwen-mt-flush": "qwen-mt", - } - default_model = aliases.get(default_model, default_model) - - if default_model not in resolved_capabilities: - raise ValueError( - f"services.translation.default_model '{default_model}' is not defined in capabilities" - ) - if not bool(resolved_capabilities[default_model].get("enabled", False)): - resolved_capabilities[default_model]["enabled"] = True - - cache_cfg = cfg.get("cache", {}) if isinstance(cfg.get("cache"), dict) else {} - - return TranslationServiceConfig( - service_url=str(service_url).rstrip("/"), - timeout_sec=timeout_sec, - default_model=default_model, - default_scene=default_scene, - capabilities=resolved_capabilities, - cache=cache_cfg, - ) + return build_translation_config(cfg) def _resolve_embedding() -> ServiceConfig: @@ -237,7 +142,7 @@ def get_embedding_backend_config() -> tuple[str, dict]: @lru_cache(maxsize=1) -def get_translation_config() -> TranslationServiceConfig: +def get_translation_config() -> TranslationConfig: return _resolve_translation() @@ -252,20 +157,11 @@ def get_rerank_config() -> ServiceConfig: def get_translation_base_url() -> str: - return get_translation_config().service_url + return str(get_translation_config()["service_url"]) def get_translation_cache_config() -> Dict[str, Any]: - cache_cfg = get_translation_config().cache - return { - "enabled": bool(cache_cfg.get("enabled", True)), - "key_prefix": str(cache_cfg.get("key_prefix", "trans:v2")), - "ttl_seconds": int(cache_cfg.get("ttl_seconds", 360 * 24 * 3600)), - "sliding_expiration": bool(cache_cfg.get("sliding_expiration", True)), - "key_include_context": bool(cache_cfg.get("key_include_context", True)), - "key_include_prompt": bool(cache_cfg.get("key_include_prompt", True)), - "key_include_source_lang": bool(cache_cfg.get("key_include_source_lang", True)), - } + return get_translation_cache(get_translation_config()) def get_embedding_base_url() -> str: diff --git a/config/translate_prompts.py b/config/translate_prompts.py deleted file mode 100644 index 2637f8e..0000000 --- a/config/translate_prompts.py +++ /dev/null @@ -1,42 +0,0 @@ -from config.tenant_config_loader import SOURCE_LANG_CODE_MAP, TARGET_LANG_CODE_MAP - -TRANSLATION_PROMPTS = { - "general": { - "zh": "你是一名专业的 {source_lang}({src_lang_code})到 {target_lang}({tgt_lang_code})翻译专家,请准确传达原文含义并符合{target_lang}语言习惯,只输出翻译结果:{text}", - "en": "You are a professional {source_lang} ({src_lang_code}) to {target_lang} ({tgt_lang_code}) translator. Accurately convey the meaning following {target_lang} grammar and usage, output only the translation: {text}", - "ru": "Вы профессиональный переводчик с {source_lang} ({src_lang_code}) на {target_lang} ({tgt_lang_code}). Точно передайте смысл текста, соблюдая нормы {target_lang}, выводите только перевод: {text}", - "ar": "أنت مترجم محترف من {source_lang} ({src_lang_code}) إلى {target_lang} ({tgt_lang_code}). انقل المعنى بدقة وفق قواعد {target_lang} وأخرج الترجمة فقط: {text}", - "ja": "あなたは {source_lang}({src_lang_code})から {target_lang}({tgt_lang_code})へのプロ翻訳者です。意味を正確に伝え、{target_lang}の表現に従い、翻訳のみ出力してください:{text}", - "es": "Eres un traductor profesional de {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Transmite con precisión el significado y devuelve solo la traducción: {text}", - "de": "Du bist ein professioneller Übersetzer von {source_lang} ({src_lang_code}) nach {target_lang} ({tgt_lang_code}). Gib die Bedeutung korrekt wieder und gib nur die Übersetzung aus: {text}", - "fr": "Vous êtes un traducteur professionnel de {source_lang} ({src_lang_code}) vers {target_lang} ({tgt_lang_code}). Transmettez fidèlement le sens et produisez uniquement la traduction : {text}", - "it": "Sei un traduttore professionista da {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Trasmetti accuratamente il significato e restituisci solo la traduzione: {text}", - "pt": "Você é um tradutor profissional de {source_lang} ({src_lang_code}) para {target_lang} ({tgt_lang_code}). Transmita o significado com precisão e produza apenas a tradução: {text}" - }, - - "sku_name": { - "zh": "你是一名专业的 {source_lang}({src_lang_code})到 {target_lang}({tgt_lang_code})电商翻译专家,请将原文翻译为{target_lang}商品SKU名称,要求准确完整、简洁专业,只输出结果:{text}", - "en": "You are a professional {source_lang} ({src_lang_code}) to {target_lang} ({tgt_lang_code}) ecommerce translator. Translate into a concise and accurate {target_lang} product SKU name, output only the result: {text}", - "ru": "Вы переводчик e-commerce с {source_lang} ({src_lang_code}) на {target_lang} ({tgt_lang_code}). Переведите в краткое и точное название SKU товара на {target_lang}, выводите только результат: {text}", - "ar": "أنت مترجم تجارة إلكترونية من {source_lang} ({src_lang_code}) إلى {target_lang} ({tgt_lang_code}). ترجم إلى اسم SKU للمنتج بلغة {target_lang} بدقة واختصار، وأخرج النتيجة فقط: {text}", - "ja": "{source_lang}({src_lang_code})から {target_lang}({tgt_lang_code})へのEC翻訳者として、簡潔で正確な{target_lang}の商品SKU名に翻訳し、結果のみ出力してください:{text}", - "es": "Eres un traductor ecommerce de {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce a un nombre SKU de producto en {target_lang}, preciso y conciso, devuelve solo el resultado: {text}", - "de": "Du bist ein E-Commerce-Übersetzer von {source_lang} ({src_lang_code}) nach {target_lang} ({tgt_lang_code}). Übersetze in einen präzisen und kurzen {target_lang} Produkt-SKU-Namen, nur Ergebnis ausgeben: {text}", - "fr": "Vous êtes un traducteur e-commerce de {source_lang} ({src_lang_code}) vers {target_lang} ({tgt_lang_code}). Traduisez en un nom SKU produit {target_lang} précis et concis, sortie uniquement : {text}", - "it": "Sei un traduttore ecommerce da {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce in un nome SKU prodotto {target_lang} conciso e accurato, restituisci solo il risultato: {text}", - "pt": "Você é um tradutor de e-commerce de {source_lang} ({src_lang_code}) para {target_lang} ({tgt_lang_code}). Traduza para um nome SKU de produto {target_lang} conciso e preciso, produza apenas o resultado: {text}" - }, - - "ecommerce_search_query": { - "zh": "你是一名专业的 {source_lang}({src_lang_code})到 {target_lang}({tgt_lang_code})翻译助手,请将电商搜索词准确翻译为{target_lang}并符合搜索习惯,只输出结果:{text}", - "en": "You are a professional {source_lang} ({src_lang_code}) to {target_lang} ({tgt_lang_code}) translator. Translate the ecommerce search query accurately following {target_lang} search habits, output only the result: {text}", - "ru": "Вы переводчик с {source_lang} ({src_lang_code}) на {target_lang} ({tgt_lang_code}). Переведите поисковый запрос e-commerce с учётом привычек поиска, выводите только результат: {text}", - "ar": "أنت مترجم من {source_lang} ({src_lang_code}) إلى {target_lang} ({tgt_lang_code}). ترجم عبارة البحث للتجارة الإلكترونية بما يناسب عادات البحث وأخرج النتيجة فقط: {text}", - "ja": "{source_lang}({src_lang_code})から {target_lang}({tgt_lang_code})への翻訳者として、EC検索キーワードを{target_lang}の検索習慣に合わせて翻訳し、結果のみ出力してください:{text}", - "es": "Eres un traductor de {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce la consulta de búsqueda ecommerce según los hábitos de búsqueda y devuelve solo el resultado: {text}", - "de": "Du bist ein Übersetzer von {source_lang} ({src_lang_code}) nach {target_lang} ({tgt_lang_code}). Übersetze die E-Commerce-Suchanfrage entsprechend den Suchgewohnheiten, nur Ergebnis ausgeben: {text}", - "fr": "Vous êtes un traducteur de {source_lang} ({src_lang_code}) vers {target_lang} ({tgt_lang_code}). Traduisez la requête de recherche e-commerce selon les habitudes de recherche, sortie uniquement : {text}", - "it": "Sei un traduttore da {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce la query di ricerca ecommerce secondo le abitudini di ricerca e restituisci solo il risultato: {text}", - "pt": "Você é um tradutor de {source_lang} ({src_lang_code}) para {target_lang} ({tgt_lang_code}). Traduza a consulta de busca de ecommerce conforme os hábitos de busca e produza apenas o resultado: {text}" - } -} diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md index 736bc2c..c0e6e8a 100644 --- a/docs/DEVELOPER_GUIDE.md +++ b/docs/DEVELOPER_GUIDE.md @@ -43,6 +43,7 @@ 以下文档由本指南引用,按需深入: - [QUICKSTART.md](./QUICKSTART.md) — 环境、服务、模块、请求示例;§2–§4 含基础配置与 Provider/模块扩展 +- [翻译模块说明.md](./翻译模块说明.md) — translator service、capability 配置、本地模型部署与接口契约 - [系统设计文档.md](./系统设计文档.md) — 索引结构、数据流、通用化设计 - [搜索API对接指南.md](./搜索API对接指南.md) — 搜索/索引/管理接口完整说明 - [QUICKSTART.md](./QUICKSTART.md) §1.4–1.8 — 系统要求、Python 环境、外部服务与生产凭证、店匠数据源 @@ -64,7 +65,7 @@ - **多租户**:单套代码与索引结构,通过 `tenant_id` 隔离数据;租户级配置(如主语言、索引语言)由配置与 tenant_config 支持。 - **可配置**:字段权重、搜索域、排序表达式、查询改写、功能开关等由配置驱动,避免硬编码业务逻辑。 -- **可扩展**:翻译/向量/重排采用 Provider + 后端可插拔设计,新增实现时遵循协议与配置规范,不破坏现有调用方。 +- **可扩展**:embedding / rerank 采用 Provider + 后端可插拔设计;translation 采用 translator service + capability backend 设计。新增实现时遵循协议与配置规范,不破坏现有调用方。 - **不负责**:商品主数据同步、店铺配置写库、全量/增量调度策略由上游(如 Java 索引程序)负责;本仓库专注“如何查、如何建 doc”。 --- @@ -109,7 +110,8 @@ query/ # 查询解析:规范化、改写、翻译、embedding search/ # 搜索执行:多语言查询构建、Searcher、重排客户端、分数融合 embeddings/ # 向量化:服务端(server)、文本/图像后端、协议与配置 reranker/ # 重排:服务端(server)、后端(backends)、配置 -providers/ # 能力提供者:翻译/向量/重排的客户端抽象与工厂 +providers/ # 能力提供者:向量/重排的客户端抽象与工厂 +translation/ # 翻译:服务客户端、服务编排、后端实现、本地模型接入 suggestion/ # 建议:索引构建、建议检索 utils/ # 共享工具:ES 客户端、DB 连接等 mappings/ # ES 索引 mapping 定义(如 search_products.json) @@ -119,7 +121,7 @@ tests/ # 单元与集成测试 docs/ # 文档(含本指南) ``` -- **约定**:业务逻辑按能力放入对应顶层包;新增“能力”时优先考虑是否属于现有某包或 providers,避免随意新建顶层包导致分叉。 +- **约定**:业务逻辑按能力放入对应顶层包;新增“能力”时优先考虑是否属于现有某包、`translation/` 或 providers,避免随意新建顶层包导致分叉。 --- @@ -166,7 +168,7 @@ docs/ # 文档(含本指南) ### 4.8 providers -- **职责**:统一“能力”的调用方式。向量、重排仍是标准 provider 工厂;翻译侧的 `create_translation_provider()` 现在固定返回 translator service client,由 6006 服务统一承接后端选择与路由。 +- **职责**:统一“能力”的调用方式。向量、重排仍是标准 provider 工厂;翻译侧通过 `translation.create_translation_client()` 获取 translator service client,由 6006 服务统一承接后端选择与路由。 - **原则**:业务代码只依赖调用接口,不依赖具体 URL 或服务内后端类型;翻译能力新增时优先扩展 `translation/backends/` 与 `services.translation.capabilities`,而不是在业务侧新增 provider 分支。 - **详见**:本指南 §7.2;[QUICKSTART.md](./QUICKSTART.md) §3。 @@ -197,14 +199,14 @@ docs/ # 文档(含本指南) ### 5.2 配置驱动 - 搜索行为(字段权重、搜索域、排序、function_score、重排融合参数等)来自 `config/config.yaml`,由 `ConfigLoader` 加载。 -- 能力访问来自 `config.yaml` 的 `services` 块及环境变量,由 `config/services_config` 解析。 +- 能力访问来自 `config.yaml` 的 `services` 块,由 `config/services_config` 解析。 - 其中翻译单独采用“service + capabilities”模型:调用方只配 `service_url` / `default_model` / `default_scene`,服务内通过 `capabilities` 控制启用哪些翻译能力。 - 新增开关或参数时,优先在现有 config 结构下扩展,避免新增散落配置文件。 ### 5.3 单一配置源与优先级 -- 同一类配置只在一个地方定义默认值;覆盖顺序约定为:**环境变量 > config 文件**。 -- 服务 URL、后端类型等均在 `services.` 下配置;环境变量用于部署态覆盖(如 `TRANSLATION_SERVICE_URL`、`TRANSLATION_MODEL`、`RERANKER_SERVICE_URL`、`RERANK_BACKEND`)。 +- 同一类配置只在一个地方定义默认值;业务行为以 `config/config.yaml` 为唯一来源,敏感信息与端口等部署变量放在环境变量。 +- 服务 URL、后端类型等均在 `services.` 下配置;翻译的 `service_url` / `default_model` / `default_scene` 不再接受环境变量覆盖,避免出现“看配置和实际行为不一致”。 ### 5.4 调用方与实现解耦(Client + Backend) @@ -232,7 +234,7 @@ docs/ # 文档(含本指南) ### 5.8 启动初始化约束 -- 重资源与关键依赖(如 translator、text/image encoder)应在服务启动期初始化一次并复用,避免请求期懒加载。 +- translator service 在进程启动时应完成配置校验并预热默认 backend;其余已启用 capability 可按首次请求懒加载,避免多个本地翻译模型在启动阶段一次性占满显存。 - 若配置声明启用某能力(例如 GPU 后端),但运行资源不满足,应直接启动失败,不自动降级为其它后端。 ### 5.9 环境隔离 @@ -276,21 +278,23 @@ services: default_scene: "general" timeout_sec: 10.0 capabilities: - llm: { enabled: true, model: "qwen-flash" } - qwen-mt: { enabled: true, model: "qwen-mt-flash" } - deepl: { enabled: false, timeout_sec: 10.0 } + llm: { enabled: true, backend: "llm", model: "qwen-flash", base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1", timeout_sec: 30.0 } + qwen-mt: { enabled: true, backend: "qwen_mt", model: "qwen-mt-flash", base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1", timeout_sec: 10.0, use_cache: true } + deepl: { enabled: false, backend: "deepl", api_url: "https://api.deepl.com/v2/translate", timeout_sec: 10.0 } ``` - **provider**:调用方如何访问(如 HTTP)。 - **backend / backends**:当能力由本仓库内服务提供时,该服务加载哪个后端及参数。 - **translation.service_url**:业务侧统一调用的翻译服务地址。 - **translation.capabilities**:翻译服务内部可启用的能力注册表。 +- **translation 内部静态规则**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一位于 `translation/`,不是外部 YAML 配置。 - 解析入口:`config/services_config.py` 的 `get_*_config()` 及 `get_*_base_url()` / `get_rerank_service_url()` 等。 ### 6.3 环境变量(常用) -- 能力 URL:`TRANSLATION_SERVICE_URL`、`EMBEDDING_SERVICE_URL`、`RERANKER_SERVICE_URL` -- 能力选择:`TRANSLATION_MODEL`、`TRANSLATION_SCENE`、`EMBEDDING_PROVIDER`、`EMBEDDING_BACKEND`、`RERANK_PROVIDER`、`RERANK_BACKEND` +- 能力 URL:`EMBEDDING_SERVICE_URL`、`RERANKER_SERVICE_URL` +- 能力选择:`EMBEDDING_PROVIDER`、`EMBEDDING_BACKEND`、`RERANK_PROVIDER`、`RERANK_BACKEND` +- 翻译服务行为:统一查看 `config/config.yaml -> services.translation` - 环境与索引:`ES_HOST`、`ES_INDEX_NAMESPACE`、`RUNTIME_ENV`、DB 与 Redis 等 详见 [QUICKSTART.md](./QUICKSTART.md) §1.6(.env 与生产凭证)、[Usage-Guide.md](./Usage-Guide.md)。 @@ -301,7 +305,8 @@ services: ### 7.1 何时看扩展规范 -- 新增或替换**翻译/向量/重排**的调用方式(如新的 HTTP 客户端、gRPC):见本指南 §7.2、[QUICKSTART.md](./QUICKSTART.md) §3。 +- 新增或替换**向量/重排**的调用方式(如新的 HTTP 客户端、gRPC):见本指南 §7.2、[QUICKSTART.md](./QUICKSTART.md) §3。 +- 新增翻译能力(如新云端模型或本地模型):见本指南 §7.2 中的 translation 特例说明。 - 新增或替换**向量/重排**的推理实现(如新模型、vLLM):见本指南 §7.3–§7.6。 ### 7.2 新增 Provider(调用方式) @@ -316,7 +321,7 @@ services: 1. 在 `translation/backends/` 中实现新 backend。 2. 在 `translation/service.py` 中注册工厂。 3. 在 `services.translation.capabilities.` 下增加配置,并用 `enabled` 控制是否启用。 -4. 业务调用方保持不变,仍只通过 `create_translation_provider()` 调 6006。 +4. 业务调用方保持不变,仍只通过 `create_translation_client()` 调 6006。 ### 7.3 新增 Backend(推理实现) @@ -331,7 +336,7 @@ services: ### 7.4 禁止做法 - 在业务代码中硬编码服务 URL 或后端类型。 -- 新增能力时复制一套独立配置体系或新顶层包,而不纳入 `services` 与 providers/backends。 +- 新增能力时复制一套独立配置体系或新顶层包,而不纳入 `services` 与 providers/backends;translation 也必须纳入 `services.translation.capabilities` 与 `translation/backends/`。 - 新增后端时破坏现有协议(如修改返回长度、顺序或 meta 约定)。 ### 7.5 重排与向量化协议与配置速查 @@ -404,7 +409,7 @@ services: - [ ] 新逻辑放在合适的现有包中,未随意新建与现有能力平行的顶层包。 - [ ] 未在业务代码中硬编码服务 URL、后端类型或租户 ID。 -- [ ] 调用外部能力(翻译/向量/重排)时通过 providers 工厂获取实例,配置来自 `services_config`。 +- [ ] 调用外部能力时遵循统一入口:translation 使用 `translation.create_translation_client()`,embedding / rerank 使用 providers 工厂,配置来自 `services_config`。 ### 9.2 配置与扩展 @@ -441,6 +446,7 @@ services: | Provider 与基础配置、模块扩展(协议与后端) | [QUICKSTART.md](./QUICKSTART.md) §2–§4、本指南 §7 | | 索引结构、数据流、通用化设计 | [系统设计文档.md](./系统设计文档.md) | | 搜索/索引 API 完整说明 | [搜索API对接指南.md](./搜索API对接指南.md) | +| 翻译模块与本地模型 | [翻译模块说明.md](./翻译模块说明.md) | | 搜索 API 参数速查 | [搜索API速查表.md](./搜索API速查表.md) | | 首次部署、新机器环境、生产凭证 | [QUICKSTART.md](./QUICKSTART.md) §1.4–1.8 | | 运维、日志、多环境、故障 | [Usage-Guide.md](./Usage-Guide.md) | diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index a394346..b129f70 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -162,13 +162,19 @@ curl -X POST http://localhost:6005/embed/image \ #### Translator 服务(6006) ```bash +./scripts/setup_translator_venv.sh +./.venv-translator/bin/python scripts/download_translation_models.py --all-local # 如需本地模型 ./scripts/start_translator.sh curl -X POST http://localhost:6006/translate \ -H "Content-Type: application/json" \ - -d '{"text":"商品名称","target_lang":"en","source_lang":"zh"}' + -d '{"text":"商品名称","target_lang":"en","source_lang":"zh","model":"qwen-mt","scene":"sku_name"}' ``` +说明: +- translator service 是翻译统一入口,业务侧不再直接选择翻译 provider。 +- 本地模型默认关闭;需先在 `config/config.yaml -> services.translation.capabilities` 中启用,再通过 `model` 指定。 + #### Reranker 服务(6007) ```bash @@ -372,25 +378,25 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: |--------|------| | 索引结构(mapping) | 修改 `mappings/search_products.json` → `./scripts/create_tenant_index.sh ` → 重新导入 | | 搜索字段/权重/排序/重排 | 修改 `config/config.yaml` 对应块 | -| provider 与服务 URL | 修改 `config/config.yaml` 的 `services` 块,或用环境变量覆盖 | +| provider 与服务 URL | 修改 `config/config.yaml` 的 `services` 块;translation 的 `service_url/default_model/default_scene` 只认 YAML,embedding/rerank 仍可按需用环境变量覆盖 | --- -## 3. Provider 架构 +## 3. 能力接入架构 目标:调用方稳定、配置可切换、单一配置源。 ### 3.1 当前代码结构 -- 模块:`providers/` -- 工厂:`create_translation_provider()`、`create_embedding_provider()`、`create_rerank_provider()` +- 模块:`providers/` + `translation/` +- 工厂:`translation.create_translation_client()`、`create_embedding_provider()`、`create_rerank_provider()` - 配置解析:`config/services_config.py` -| 能力 | Provider 实现 | 调用方 | -|------|---------------|--------| -| translation | `providers/translation.py`(direct/http) | `query/query_parser.py`、索引链路 | -| embedding | `providers/embedding.py`(http) | 文本/图像编码调用 | -| rerank | `providers/rerank.py`(http) | `search/rerank_client.py` | +| 能力 | 调用入口 | 服务内实现 | +|------|----------|------------| +| translation | `translation/client.py` | `translation/service.py` + `translation/backends/` | +| embedding | `providers/embedding.py`(http) | embedding 服务内 backend | +| rerank | `providers/rerank.py`(http) | reranker 服务内 backend | ### 3.2 配置与覆盖 @@ -399,10 +405,17 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: ```yaml services: translation: - provider: "direct" - providers: - direct: { model: "qwen" } - http: { base_url: "http://127.0.0.1:6006", model: "qwen", timeout_sec: 10.0 } + service_url: "http://127.0.0.1:6006" + default_model: "llm" + default_scene: "general" + timeout_sec: 10.0 + capabilities: + qwen-mt: { enabled: true, backend: "qwen_mt", model: "qwen-mt-flash", base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1", timeout_sec: 10.0, use_cache: true } + llm: { enabled: true, backend: "llm", model: "qwen-flash", base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1", timeout_sec: 30.0 } + deepl: { enabled: false, backend: "deepl", api_url: "https://api.deepl.com/v2/translate", timeout_sec: 10.0 } + nllb-200-distilled-600m: { enabled: false, backend: "local_nllb", model_id: "facebook/nllb-200-distilled-600M" } + opus-mt-zh-en: { enabled: false, backend: "local_marian", model_id: "Helsinki-NLP/opus-mt-zh-en" } + opus-mt-en-zh: { enabled: false, backend: "local_marian", model_id: "Helsinki-NLP/opus-mt-en-zh" } embedding: provider: "http" backend: "tei" @@ -419,8 +432,6 @@ services: 环境变量覆盖(优先级更高): -- `TRANSLATION_PROVIDER` -- `TRANSLATION_SERVICE_URL` - `EMBEDDING_SERVICE_URL` - `EMBEDDING_BACKEND` - `TEI_BASE_URL` @@ -429,11 +440,19 @@ services: - `RERANK_DASHSCOPE_API_KEY_CN` / `RERANK_DASHSCOPE_API_KEY_US`(`dashscope_rerank` 后端鉴权) - `RERANK_DASHSCOPE_ENDPOINT`(`dashscope_rerank` 地域 endpoint 覆盖) -### 3.3 新增 provider 的最小步骤 +### 3.3 新增接入能力的最小步骤 -1. 在 `providers/.py` 实现 provider 类 -2. 在 `create_*_provider()` 注册 -3. 在 `config/config.yaml` 的 `services..providers` 新增配置 +1. translation 新增能力: + 在 `translation/backends/` 实现 backend,在 `translation/service.py` 注册,并在 `services.translation.capabilities` 增加配置。 +2. embedding / rerank 新增调用方式: + 在 `providers/.py` 实现 provider 类,并在 `create_*_provider()` 注册。 +3. embedding / rerank 新增服务内模型: + 在对应服务的 `backends/` 下实现并注册,在 `services..backends` 新增配置。 + +说明: +- translation 的 scene 规则、语言码映射、prompt 模板、模型方向约束位于 `translation/` 内部,不再放到 `config/`。 +- 翻译公共接口只暴露 `model + scene`,不暴露 `prompt`。 +- translation 的 `service_url`、`default_model`、`default_scene` 来自 `config/config.yaml -> services.translation`,不再由环境变量静默覆盖。 --- diff --git a/docs/TODO.txt b/docs/TODO.txt index 1f04326..d784be6 100644 --- a/docs/TODO.txt +++ b/docs/TODO.txt @@ -86,52 +86,21 @@ translator的设计 : QueryParser 里面 并不是调用的6006,目前是把6006做了一个provider,然后translate的总体配置又有6006的baseurl,很混乱。 -config.yaml 里面的 翻译的配置 不是“6006 专用配置”,而是搜索服务的 -6006本来之前是做一个provider。 -结果后面改造成了综合体,但是还没改完,改到一半发现之前的实现跟我的设计或者想法有偏差。 +翻译模块重构已完成。以下旧结论已失效,不再适用: +- 业务侧不再把 translation 当 provider 选择。 +- `QueryParser` / indexer 统一通过 `translation.create_translation_client()` 调用 6006 translator service。 +- 翻译配置统一为 `services.translation`: + - 外部配置只保留部署相关项,如 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend/base_url/api_url/model_dir` 等。 + - scene 规则、语言码映射、LLM prompt 模板、本地模型方向约束统一收口在 `translation/` 内部。 +- 外部接口统一使用 `model + scene`,不再对外暴露 `prompt`。 -需要继续改完!!!!!!!! +以以下文档为准: - -- `config.yaml` **不是“6006 专用配置”**,而是整个系统的 **统一 services 配置**,由 `config/services_config.py` 读取,**搜索 API 进程和翻译服务进程都会用到它**。 -- 关键决定行为的是这一行: - -```yaml -translation: - provider: "llm" -``` - -在当前配置下: - -- 搜索 API 进程里,`QueryParser` 初始化翻译器时走的是: - -```python -create_translation_provider(...) # provider == "llm" -``` - -进而返回的是 `LLMTranslatorProvider`(本进程内调用),**不会走 `base_url`,也不会走 6006 端口**。 -- `base_url: "http://127.0.0.1:6006"` 只在 `provider: "http"` / `"service"` 时被 `HttpTranslationProvider` 使用;在 `provider: "llm"` 时,这个字段对 `QueryParser` 是完全被忽略的。 - -所以现在的实际情况是: - -- **QueryParser 中的翻译是“本进程直连 LLM API”**,所以日志在搜索后端自己的日志文件里。 -- 如果你希望「QueryParser 永远通过 6006 端口的翻译服务」,需要把 provider 改成 HTTP: - -```yaml -translation: - provider: "http" # ← 改成 http 或 service - cache: ... - providers: - http: - base_url: "http://127.0.0.1:6006" - model: "llm" # 或 "qwen-mt-flush",看你想用哪个 - timeout_sec: 10.0 - llm:. - model: "qwen-flash" # 留给翻译服务自身内部使用 - qwen-mt: ... - deepl: ... -``` +- `docs/翻译模块说明.md` +- `docs/DEVELOPER_GUIDE.md` +- `docs/QUICKSTART.md` +- `docs/搜索API对接指南.md` @@ -259,4 +228,3 @@ https://cloud.tencent.com/document/product/1729/113395#4.-.E7.A4.BA.E4.BE.8B 登录 百炼美国地域控制台:https://modelstudio.console.aliyun.com/us-east-1?spm=5176.2020520104.0.0.6b383a98WjpXff 在 API Key 管理 中创建或复制一个适用于美国地域的 Key - diff --git a/docs/工作总结-微服务性能优化与架构.md b/docs/工作总结-微服务性能优化与架构.md index 2659c33..accce52 100644 --- a/docs/工作总结-微服务性能优化与架构.md +++ b/docs/工作总结-微服务性能优化与架构.md @@ -84,11 +84,10 @@ instruction: "Given a shopping query, rank product titles by relevance" **背景**:原使用 DeepL,后迁移至 **qwen-mt**(如 `qwen-mt-flash`)。qwen-mt 云端限速约 **RPM=60(每分钟 60 请求)**,此前未做大商品量压测,未暴露问题;高并发索引或查询场景下易触限。 **当前方案**: -- **迁移至 qwen-flash**:在配置中将翻译改为 **LLM provider + qwen-flash 模型**,由 DashScope 兼容 API 调用,可配置化切换。 -- **可配置化(具体配置)**: - - **入口**:`config/config.yaml` → `services.translation`;`provider: "llm"` 时使用 `providers.llm`,`model: "qwen-flash"`,`timeout_sec: 30`,`base_url` 可选(为空则用 `DASHSCOPE_BASE_URL`);环境变量 `DASHSCOPE_API_KEY` 注入 Key。 - - **Provider 取值**:`provider` 可为 `http`(走翻译服务 6006)、`qwen-mt`(直连 qwen-mt-flush 等)、`deepl`(DeepL API)、`llm`(对话模型 qwen-flash 等);工厂函数 `providers/translation.py` 的 `create_translation_provider(query_config)` 根据 `get_translation_config()` 解析结果返回对应实现。 - - **调用位置**:QueryParser(`query/query_parser.py`)与 Indexer(`indexer/incremental_service.py`、`indexer/indexing_utils.py`)均通过 `create_translation_provider(...)` 获取实例,不写死 URL 或模型名。 +- **统一 translator service**:业务侧统一走 6006,按 `model + scene` 选择能力,不再存在翻译 provider 分支。 +- **配置入口**:`config/config.yaml` → `services.translation`,显式声明 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend`、`base_url/api_url`、timeout 与本地模型运行参数。 +- **内部规则收口**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一放在 `translation/` 内部,不再散落在 `config/`、`query/` 等位置。 +- **调用位置**:QueryParser 与 Indexer 均通过 `translation.create_translation_client()` 获取客户端,不写死 URL 或模型名。 - **缓存**:`services.translation.cache` 支持 `key_prefix: "trans:v2"`、`ttl_seconds`、`sliding_expiration` 等,翻译结果写 Redis,减轻重复请求对限速的影响。 - **场景支撑**:在线索引(indexer)与 query 请求(QueryParser)共用同一套 provider 配置;可按环境或租户通过修改 `config.yaml` 或环境变量切换 provider/model。 - **待配合**:**金伟侧对索引侧翻译调用做流量控制**(限流/排队/批量聚合),避免索引高峰打满 qwen 限速,影响在线 query 翻译。 @@ -113,14 +112,15 @@ instruction: "Given a shopping query, rank product titles by relevance" ## 二、架构 -### 1. Provider 与动态选择翻译 +### 1. Translator Service 与动态选择翻译 -- **设计**:参考 `docs/系统设计文档.md`、`docs/DEVELOPER_GUIDE.md`,翻译/向量/重排均采用 **Provider + Backend** 解耦;配置单一来源为 `config/config.yaml` 的 `services` 块,环境变量可覆盖。 +- **设计**:翻译已从 provider 架构中独立出来,采用 **一个 translator service + 多个 capability backend**;配置单一来源为 `config/config.yaml` 的 `services.translation` 块,`service_url` / `default_model` / `default_scene` 不再接受环境变量静默覆盖。 - **翻译(具体实现)**: - - **工厂**:`providers/translation.py` 的 `create_translation_provider(query_config)`;内部调用 `config/services_config.get_translation_config()` 得到 `provider` 与 `providers.` 参数。 - - **分支**:`provider in ("qwen-mt", "direct", "local", "inprocess")` → 使用 `query/qwen_mt_translate.py` 的 `Translator`(model 如 qwen-mt-flush);`provider == "http"` 或 `"service"` → `HttpTranslationProvider`(base_url 为翻译服务 6006,model 如 qwen);`provider == "llm"` → `query/llm_translate.py` 的 `LLMTranslatorProvider`(model 如 qwen-flash,base_url 可选);`provider == "deepl"` → `query/deepl_provider.py` 的 `DeepLProvider`。 - - **调用方**:`query/query_parser.py`(搜索前翻译)、`indexer/incremental_service.py`、`indexer/indexing_utils.py`(索引时翻译)均通过上述工厂获取实例,不写死 URL 或模型名。 -- **效果**:仅改 `config.yaml` 的 `services.translation.provider` 及对应 `providers.` 即可切换 DeepL、qwen-mt、qwen-flash(llm)、HTTP 翻译服务等。 + - **业务入口**:`translation.create_translation_client()` + - **服务编排**:`translation/service.py` + - **后端实现**:`translation/backends/qwen_mt.py`、`translation/backends/llm.py`、`translation/backends/deepl.py`、`translation/backends/local_seq2seq.py` + - **调用方**:`query/query_parser.py`、`indexer/incremental_service.py`、`indexer/indexing_utils.py` +- **效果**:仅改 `services.translation.default_model` 或启用的 capability,即可切换云端/本地翻译能力;调用方始终只连 6006。 ### 2. 服务的监控与拉起机制 diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index 5621b4f..dfacfa4 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -159,7 +159,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \ |------|------|------|------| | 向量服务 | 6005 | `POST /embed/text` | 文本向量化 | | 向量服务 | 6005 | `POST /embed/image` | 图片向量化 | -| 翻译服务 | 6006 | `POST /translate` | 文本翻译(Qwen/DeepL) | +| 翻译服务 | 6006 | `POST /translate` | 文本翻译(支持 qwen-mt / llm / deepl / 本地模型) | | 重排服务 | 6007 | `POST /rerank` | 检索结果重排 | | 内容理解(Indexer 内) | 6004 | `POST /indexer/enrich-content` | 根据商品标题生成 qanchors、tags 等,供 indexer 微服务组合方式使用 | @@ -1650,7 +1650,7 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ | 服务 | 默认端口 | Base URL | 说明 | |------|----------|----------|------| | 向量服务 | 6005 | `http://localhost:6005` | 文本/图片向量化,用于语义搜索与以图搜图 | -| 翻译服务 | 6006 | `http://localhost:6006` | 多语言翻译(Qwen/DeepL) | +| 翻译服务 | 6006 | `http://localhost:6006` | 多语言翻译(云端与本地模型统一入口) | | 重排服务 | 6007 | `http://localhost:6007` | 对检索结果进行二次排序 | 生产环境请将 `localhost` 替换为实际服务地址。 @@ -1801,12 +1801,12 @@ curl "http://localhost:6007/health" ### 7.3 翻译服务(Translation) -- **Base URL**: `http://localhost:6006`(可通过 `TRANSLATION_SERVICE_URL` 覆盖) +- **Base URL**: `http://localhost:6006`(以 `config/config.yaml -> services.translation.service_url` 为准) - **启动**: `./scripts/start_translator.sh` #### 7.3.1 `POST /translate` — 文本翻译 -支持 Qwen(默认)与 DeepL 模型,适用于商品名称、描述等电商场景。 +支持 translator service 内所有已启用 capability,适用于商品名称、描述、query 等电商场景。当前可配置能力包括 `qwen-mt`、`llm`、`deepl` 以及本地模型 `nllb-200-distilled-600m`、`opus-mt-zh-en`、`opus-mt-en-zh`。 **请求体**(支持单条字符串或字符串列表): ```json @@ -1814,8 +1814,8 @@ curl "http://localhost:6007/health" "text": "商品名称", "target_lang": "en", "source_lang": "zh", - "model": "qwen", - "context": "sku_name" + "model": "qwen-mt", + "scene": "sku_name" } ``` @@ -1825,8 +1825,8 @@ curl "http://localhost:6007/health" "text": ["商品名称1", "商品名称2"], "target_lang": "en", "source_lang": "zh", - "model": "qwen", - "context": "sku_name" + "model": "qwen-mt", + "scene": "sku_name" } ``` @@ -1834,9 +1834,13 @@ curl "http://localhost:6007/health" |------|------|------|------| | `text` | string \| string[] | Y | 待翻译文本,既支持单条字符串,也支持字符串列表(批量翻译) | | `target_lang` | string | Y | 目标语言:`zh`、`en`、`ru` 等 | -| `source_lang` | string | N | 源语言,不传则自动检测 | -| `model` | string | N | `qwen`(默认)、`deepl` 或 `llm` | -| `context` | string | N | 翻译场景参数:商品标题翻译使用 `sku_name`,搜索请求中的 query 翻译使用 `ecommerce_search_query`,其它通用场景可不传或使用 `general` | +| `source_lang` | string | N | 源语言。云端模型可不传;`nllb-200-distilled-600m` 建议显式传入 | +| `model` | string | N | 已启用 capability 名称,如 `qwen-mt`、`llm`、`deepl`、`nllb-200-distilled-600m`、`opus-mt-zh-en`、`opus-mt-en-zh` | +| `scene` | string | N | 翻译场景参数,与 `model` 配套使用;当前标准值为 `sku_name`、`ecommerce_search_query`、`general` | + +说明: +- 外部接口不接受 `prompt`;LLM prompt 由服务端按 `scene` 自动生成。 +- 传入未定义的 `scene` 或未启用的 `model` 会返回 `400`。 **响应**: ```json @@ -1846,7 +1850,8 @@ curl "http://localhost:6007/health" "source_lang": "zh", "translated_text": "Product name", "status": "success", - "model": "qwen" + "model": "qwen-mt", + "scene": "sku_name" } ``` @@ -1858,13 +1863,14 @@ curl "http://localhost:6007/health" "source_lang": "zh", "translated_text": ["Product name 1", "Product name 2"], "status": "success", - "model": "qwen" + "model": "qwen-mt", + "scene": "sku_name" } ``` > **失败语义(批量)**:当 `text` 为列表时,如果其中某条翻译失败,对应位置返回 `null`(即 `translated_text[i] = null`),并保持数组长度与顺序不变;接口整体仍返回 `status="success"`,用于避免“部分失败”导致整批请求失败。 -> **实现提示(可忽略)**:服务端会尽可能使用底层翻译 provider 的批量能力(若支持),否则自动拆分逐条翻译;无论采用哪种方式,上述批量契约保持一致。 +> **实现提示(可忽略)**:服务端会尽可能使用底层 backend 的批量能力(若支持),否则自动拆分逐条翻译;无论采用哪种方式,上述批量契约保持一致。 **完整 curl 示例**: @@ -1902,12 +1908,38 @@ curl -X POST "http://localhost:6006/translate" \ }' ``` +使用本地 OPUS 模型(中文 → 英文): +```bash +curl -X POST "http://localhost:6006/translate" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "蓝牙耳机", + "target_lang": "en", + "source_lang": "zh", + "model": "opus-mt-zh-en", + "scene": "sku_name" + }' +``` + #### 7.3.2 `GET /health` — 健康检查 ```bash curl "http://localhost:6006/health" ``` +典型响应: +```json +{ + "status": "healthy", + "service": "translation", + "default_model": "llm", + "default_scene": "general", + "available_models": ["qwen-mt", "llm", "opus-mt-zh-en"], + "enabled_capabilities": ["qwen-mt", "llm", "opus-mt-zh-en"], + "loaded_models": ["llm"] +} +``` + ### 7.4 内容理解字段生成(Indexer 服务内) 内容理解字段生成接口部署在 **Indexer 服务**(默认端口 6004)内,与「翻译、向量化」等独立端口微服务并列,供采用**微服务组合**方式的 indexer 调用。 diff --git a/docs/系统设计文档.md b/docs/系统设计文档.md index e7b6001..f2668a4 100644 --- a/docs/系统设计文档.md +++ b/docs/系统设计文档.md @@ -382,16 +382,7 @@ query_config: # 实际翻译 provider 与模型在通用 services 配置中定义 ``` -实际代码中,通过通用的 translation provider 抽象来选择具体后端和模型,文档不固定绑定某一个具体翻译服务或模型名称,以保持可配置性。 - -此外,为了支持**高质量、提示词可控的 LLM 翻译**(例如商品富化脚本、离线分析工具),在 `query/llm_translate.py` 中提供了一个独立的 LLM 翻译辅助模块: - -- **配置入口**:`config/config.yaml -> services.translation.providers.llm`,用于指定: - - `model`: 例如 `qwen-flash`(DashScope 兼容模式的对话模型) - - `base_url`: 可选;为空时使用环境变量 `DASHSCOPE_BASE_URL` 或默认 Endpoint - - `timeout_sec`: LLM 调用超时 -- **环境变量**:仍通过 `DASHSCOPE_API_KEY` 注入 DashScope API Key。 -- **使用方式**:主查询路径继续使用 machine translation(`query.translator.Translator`),只在需要更强表达控制的场景(如批量标注、产品分类脚本)中显式调用 `llm_translate()`。 +实际代码中,翻译已改为统一的 translator service 架构:业务侧通过 `translation.create_translation_client()` 访问 6006,由 `translation/service.py` 在服务内按 `model + scene` 路由到具体 backend。scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束等翻译域知识位于 `translation/` 内部,不再通过外部 provider 抽象分散管理。 #### 功能特性 1. **语言检测**:自动检测查询语言 diff --git a/docs/缓存与Redis使用说明.md b/docs/缓存与Redis使用说明.md index f5677c9..6373636 100644 --- a/docs/缓存与Redis使用说明.md +++ b/docs/缓存与Redis使用说明.md @@ -21,7 +21,7 @@ | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 | |------------|----------|----------------|----------|------| | 向量缓存(text/image embedding) | `{EMBEDDING_CACHE_PREFIX}:{query_or_url}` / `{EMBEDDING_CACHE_PREFIX}:image:{url_or_path}` | **BF16 bytes**(每维 2 字节大端存储),读取后恢复为 `np.float32` | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`(文本)与 `embeddings/image_encoder.py`(图片);前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 | -| 翻译结果缓存(Qwen-MT 翻译) | `{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `query/qwen_mt_translate.py` + `config/config.yaml` | +| 翻译结果缓存(Qwen-MT 翻译) | `{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/backends/qwen_mt.py` + `config/config.yaml` | | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` | 下面按模块详细说明。 @@ -71,9 +71,9 @@ --- -## 3. 翻译结果缓存(query/qwen_mt_translate.py) +## 3. 翻译结果缓存(translation/backends/qwen_mt.py) -- **代码位置**:`query/qwen_mt_translate.py` 中 `Translator` 类 +- **代码位置**:`translation/backends/qwen_mt.py` 中 `QwenMTTranslationBackend` - **用途**:缓存 Qwen-MT 翻译(及 translator service 复用的翻译)结果,减少云端请求,遵守限速。 - **配置入口**:`config/config.yaml -> services.translation.cache`,统一由 `config/services_config.get_translation_cache_config()` 解析。 @@ -95,8 +95,7 @@ - `sha256(payload)`:对以下内容整体做 SHA-256: - `model` - `src` / `tgt` - - `context`(受 `key_include_context` 控制) - - `prompt`(受 `key_include_prompt` 控制) + - `scene`(受 `key_include_scene` 控制) - 原始 `text` > 注意:所有 key 设计集中在 `_build_cache_key`,**不要在其他位置手动拼翻译缓存 key**。 @@ -120,8 +119,7 @@ services: key_prefix: "trans:v2" ttl_seconds: 62208000 # 默认约 720 天 sliding_expiration: true - key_include_context: true - key_include_prompt: true + key_include_scene: true key_include_source_lang: true ``` @@ -138,7 +136,7 @@ services: ### 3.4 关联模块 -- `api/translator_app.py` 会通过 `query.qwen_mt_translate.Translator` 复用同一套缓存逻辑; +- `api/translator_app.py` 会通过 `translation.backends.qwen_mt.QwenMTTranslationBackend` 复用同一套缓存逻辑; - 文档说明:`docs/翻译模块说明.md` 中提到“推荐通过 Redis 翻译缓存复用结果”。 --- @@ -345,4 +343,3 @@ python scripts/redis/redis_memory_heavy_keys.py --top 100 - **文档同步**: - 新增缓存后,应在本文件中补充一行总览表 + 详细小节; - 若缓存与外部系统/历史实现兼容(如 Java 侧翻译缓存),需在说明中显式标注。 - diff --git a/docs/翻译模块说明.md b/docs/翻译模块说明.md index 8b3c1e6..9734723 100644 --- a/docs/翻译模块说明.md +++ b/docs/翻译模块说明.md @@ -10,11 +10,6 @@ DASHSCOPE_API_KEY=sk-xxx # DeepL DEEPL_AUTH_KEY=xxx - -# 可选 -TRANSLATION_SERVICE_URL=http://127.0.0.1:6006 -TRANSLATION_MODEL=llm # 默认能力;也可传 qwen-mt / deepl -TRANSLATION_SCENE=general ``` > **重要限速说明(Qwen 机翻)** @@ -29,7 +24,11 @@ TRANSLATION_SCENE=general - 业务侧(`QueryParser` / indexer)统一调用 `http://127.0.0.1:6006` - 服务内按 `services.translation.capabilities` 加载并管理各翻译能力 -- 每种能力独立配置 `enabled`、`model`、`timeout` 等参数 +- 已启用 capability 统一注册,后端实例按首次调用懒加载,避免多个本地模型在启动阶段一次性占满显存 +- `config.yaml` 只保留部署相关配置;scene 规则、语言码映射、prompt 模板、模型方向约束等翻译域知识统一收口在 `translation/` 内部 +- 每种能力独立配置 `enabled`、`model`、`base_url/api_url`、`timeout`、本地模型运行参数等部署项 +- 每种能力显式声明 `backend` 类型,例如 `qwen_mt`、`llm`、`deepl`、`local_nllb`、`local_marian` +- `service_url`、`default_model`、`default_scene` 只从 `config/config.yaml` 读取,不再接受环境变量静默覆盖 - 外部接口通过 `model + scene` 指定本次使用哪种能力、哪个场景 配置入口在 `config/config.yaml -> services.translation`,核心字段示例: @@ -44,19 +43,65 @@ services: capabilities: qwen-mt: enabled: true + backend: "qwen_mt" model: "qwen-mt-flash" + base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" llm: enabled: true + backend: "llm" model: "qwen-flash" + base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" deepl: enabled: false + backend: "deepl" + api_url: "https://api.deepl.com/v2/translate" + nllb-200-distilled-600m: + enabled: false + backend: "local_nllb" + model_id: "facebook/nllb-200-distilled-600M" + opus-mt-zh-en: + enabled: false + backend: "local_marian" + model_id: "Helsinki-NLP/opus-mt-zh-en" + opus-mt-en-zh: + enabled: false + backend: "local_marian" + model_id: "Helsinki-NLP/opus-mt-en-zh" ``` +## 本地模型部署 + +本仓库已内置 3 个本地机翻 capability: + +- `nllb-200-distilled-600m` +- `opus-mt-zh-en` +- `opus-mt-en-zh` + +推荐流程: + +1. 创建独立运行环境:`./scripts/setup_translator_venv.sh` +2. 下载本地模型:`./.venv-translator/bin/python scripts/download_translation_models.py --all-local` +3. 在 `config/config.yaml` 中把对应 capability 的 `enabled` 改为 `true` +4. 启动服务:`./scripts/start_translator.sh` + +默认模型目录: + +- `models/translation/facebook/nllb-200-distilled-600M` +- `models/translation/Helsinki-NLP/opus-mt-zh-en` +- `models/translation/Helsinki-NLP/opus-mt-en-zh` + +说明: + +- 目前只支持 3 个标准 scene:`general`、`sku_name`、`ecommerce_search_query` +- `nllb-200-distilled-600m` 支持多语,但依赖明确的 `source_lang` +- 两个 OPUS 模型分别只支持 `zh -> en` 与 `en -> zh` +- 本地模型建议单 worker 运行,避免重复加载占用显存 + ## HTTP 接口契约(translator service,端口 6006) 服务默认监听 `http://localhost:6006`,提供: -- `POST /translate`: 文本翻译(支持 `qwen/qwen-mt`、`deepl`、`llm`) +- `POST /translate`: 文本翻译(支持所有已启用 capability) - `GET /health`: 健康检查 ### `POST /translate` @@ -69,8 +114,7 @@ services: "target_lang": "en", "source_lang": "zh", "model": "qwen-mt", - "scene": "sku_name", - "prompt": null + "scene": "sku_name" } ``` @@ -110,15 +154,16 @@ services: 说明: -- `scene` 是标准字段,`context` 仅保留为兼容别名 +- `scene` 是标准字段 +- `prompt` 不属于外部接口;LLM prompt 由 translator service 内部根据 `scene` 生成 - `model` 只能选择已在 `services.translation.capabilities` 中启用的能力 -- `/health` 会返回 `default_model`、`default_scene` 与 `enabled_capabilities` +- `/health` 会返回 `default_model`、`default_scene`、`enabled_capabilities` 与 `loaded_models` --- ## 开发者接口约定(代码调用) -代码侧(如 query/indexer)仍通过 `providers.translation.create_translation_provider()` 获取实例并调用 `translate()`,但该实例现在固定是 **translator service client**,不再在业务侧做翻译 provider 选择。 +代码侧(如 query/indexer)通过 `translation.create_translation_client()` 获取实例并调用 `translate()`;业务侧不再存在翻译 provider 选择逻辑。 ### 输入输出形状(Shape) @@ -131,6 +176,6 @@ services: 服务客户端与服务内后端都可以暴露 `supports_batch`。若后端不支持批量,服务端会逐条拆分并保持 shape。 -为便于上层(如 `api/translator_app.py`)做最优调用,provider 可暴露: +为便于上层(如 `api/translator_app.py`)做最优调用,client / backend 可暴露: - `supports_batch: bool`(property) diff --git a/indexer/README.md b/indexer/README.md index 5ae2a7a..e1b0df9 100644 --- a/indexer/README.md +++ b/indexer/README.md @@ -204,17 +204,21 @@ categoryPath.set(categoryLang, translationCategoryPath) 你当前要使用的翻译接口(Python 侧): ```bash -curl -X POST http://43.166.252.75:6006/translate \ +curl -X POST http://127.0.0.1:6006/translate \ -H "Content-Type: application/json" \ -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣", "target_lang":"en", - "source_lang":"auto"}' + "source_lang":"zh", + "model":"qwen-mt", + "scene":"sku_name"}' ``` - 请求参数: - `text`:待翻译文本; - `target_lang`:目标语言(如 `"en"`、`"zh"` 等); - - `source_lang`:源语言(支持 `"auto"` 自动检测)。 + - `source_lang`:源语言; + - `model`:启用的翻译能力名称; + - `scene`:翻译场景(如 `sku_name`、`general`)。 - 响应(参考 Java `TranslationServiceImpl.querySaasTranslate`): - JSON 里包含 `status` 字段,如果是 `"success"`,且 `translated_text` 非空,则返回翻译结果。 diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py index 1dfea72..b4885bd 100644 --- a/indexer/document_transformer.py +++ b/indexer/document_transformer.py @@ -18,9 +18,6 @@ from indexer.product_enrich import analyze_products logger = logging.getLogger(__name__) -from query.qwen_mt_translate import Translator - - class SPUDocumentTransformer: """SPU文档转换器,将SPU、SKU、Option数据转换为ES文档格式。""" @@ -75,7 +72,7 @@ class SPUDocumentTransformer: text=text, target_lang=lang, source_lang=source_lang, - context=scene, + scene=scene, ) return translations @@ -351,7 +348,7 @@ class SPUDocumentTransformer: text=brief_text, source_lang=primary_lang, index_languages=index_langs, - scene="default", + scene="general", ) _set_lang_obj("brief", brief_text, translations) @@ -364,7 +361,7 @@ class SPUDocumentTransformer: text=desc_text, source_lang=primary_lang, index_languages=index_langs, - scene="default", + scene="general", ) _set_lang_obj("description", desc_text, translations) @@ -377,7 +374,7 @@ class SPUDocumentTransformer: text=vendor_text, source_lang=primary_lang, index_languages=index_langs, - scene="default", + scene="general", ) _set_lang_obj("vendor", vendor_text, translations) diff --git a/indexer/incremental_service.py b/indexer/incremental_service.py index a00d880..5da37b3 100644 --- a/indexer/incremental_service.py +++ b/indexer/incremental_service.py @@ -14,6 +14,7 @@ from indexer.indexer_logger import ( get_indexer_logger, log_index_request, log_index_result, log_spu_processing ) from config import ConfigLoader +from translation import create_translation_client # Configure logger logger = logging.getLogger(__name__) @@ -56,9 +57,7 @@ class IncrementalIndexerService: or ["option1", "option2", "option3"] ) - from providers import create_translation_provider - - self._translator = create_translation_provider(self._config.query_config) + self._translator = create_translation_client() # Text embedding encoder (strict when enabled) if bool(getattr(self._config.query_config, "enable_text_embedding", False)): diff --git a/indexer/indexing_utils.py b/indexer/indexing_utils.py index 89cb5a7..c63ccac 100644 --- a/indexer/indexing_utils.py +++ b/indexer/indexing_utils.py @@ -10,6 +10,7 @@ from sqlalchemy import Engine, text from config import ConfigLoader from config.tenant_config_loader import get_tenant_config_loader from indexer.document_transformer import SPUDocumentTransformer +from translation import create_translation_client logger = logging.getLogger(__name__) @@ -100,9 +101,7 @@ def create_document_transformer( index_langs = tenant_config.get("index_languages") or [] need_translator = len(index_langs) > 1 if translator is None and need_translator: - from providers import create_translation_provider - - translator = create_translation_provider(config.query_config) + translator = create_translation_client() # 初始化encoder(如果启用标题向量化且未提供encoder) if encoder is None and enable_title_embedding and config.query_config.enable_text_embedding: diff --git a/indexer/test_indexing.py b/indexer/test_indexing.py index 1d2aef2..a7d9f4e 100755 --- a/indexer/test_indexing.py +++ b/indexer/test_indexing.py @@ -273,11 +273,8 @@ def test_document_transformer(): tenant_config = tenant_config_loader.get_tenant_config('162') # 初始化翻译器(测试环境总是启用,具体翻译方向由tenant_config控制) - from query.qwen_mt_translate import Translator - translator = Translator( - api_key=config.query_config.translation_api_key, - use_cache=True - ) + from translation.backends.qwen_mt import QwenMTTranslationBackend + translator = QwenMTTranslationBackend(use_cache=True) # 创建转换器 transformer = SPUDocumentTransformer( @@ -366,4 +363,3 @@ def main(): if __name__ == '__main__': sys.exit(main()) - diff --git a/providers/__init__.py b/providers/__init__.py index 0dbba48..4a0f1d3 100644 --- a/providers/__init__.py +++ b/providers/__init__.py @@ -1,15 +1,9 @@ -""" -Pluggable providers for translation, embedding, rerank. +"""Pluggable providers for embedding and rerank.""" -All provider selection is driven by config/services_config (services block). -""" - -from .translation import create_translation_provider from .rerank import create_rerank_provider from .embedding import create_embedding_provider __all__ = [ - "create_translation_provider", "create_rerank_provider", "create_embedding_provider", ] diff --git a/providers/translation.py b/providers/translation.py deleted file mode 100644 index 11154a1..0000000 --- a/providers/translation.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Translation client factory for business callers.""" - -from __future__ import annotations - -from typing import Any - -from config.services_config import get_translation_config -from translation.client import TranslationServiceClient - - -def create_translation_provider(query_config: Any = None) -> TranslationServiceClient: - """ - Create a translation client. - - Translation is no longer selected via provider mechanism on the caller side. - Search / indexer always talk to the translator service, while the service - itself decides which translation capabilities are enabled and how to route. - """ - - cfg = get_translation_config() - qc = query_config - default_scene = getattr(qc, "translation_context", None) if qc is not None else None - return TranslationServiceClient( - base_url=cfg.service_url, - default_model=cfg.default_model, - default_scene=default_scene or cfg.default_scene, - timeout_sec=cfg.timeout_sec, - ) diff --git a/query/__init__.py b/query/__init__.py index 26c9c4a..4a3bea2 100644 --- a/query/__init__.py +++ b/query/__init__.py @@ -1,13 +1,11 @@ """Query package initialization.""" from .language_detector import LanguageDetector -from .qwen_mt_translate import Translator from .query_rewriter import QueryRewriter, QueryNormalizer from .query_parser import QueryParser, ParsedQuery __all__ = [ 'LanguageDetector', - 'Translator', 'QueryRewriter', 'QueryNormalizer', 'QueryParser', diff --git a/query/deepl_provider.py b/query/deepl_provider.py deleted file mode 100644 index 6932288..0000000 --- a/query/deepl_provider.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Backward-compatible import for DeepL translation backend.""" - -from translation.backends.deepl import DeepLProvider, DeepLTranslationBackend diff --git a/query/llm_translate.py b/query/llm_translate.py deleted file mode 100644 index 6723a86..0000000 --- a/query/llm_translate.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Backward-compatible import for LLM translation backend.""" - -from translation.backends.llm import LLMTranslationBackend, LLMTranslatorProvider, llm_translate - -__all__ = ["LLMTranslationBackend", "LLMTranslatorProvider", "llm_translate"] diff --git a/query/query_parser.py b/query/query_parser.py index 1ff110e..6435aa5 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -12,8 +12,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed, wait from embeddings.text_encoder import TextEmbeddingEncoder from config import SearchConfig +from translation import create_translation_client from .language_detector import LanguageDetector -from providers import create_translation_provider from .query_rewriter import QueryRewriter, QueryNormalizer logger = logging.getLogger(__name__) @@ -138,7 +138,7 @@ class QueryParser: cfg.service_url, cfg.default_model, ) - self._translator = create_translation_provider(self.config.query_config) + self._translator = create_translation_client() self._translation_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="query-translation") @property diff --git a/query/qwen_mt_translate.py b/query/qwen_mt_translate.py deleted file mode 100644 index 03e9552..0000000 --- a/query/qwen_mt_translate.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Backward-compatible import for Qwen-MT translation backend.""" - -from translation.backends.qwen_mt import QwenMTTranslationBackend, Translator - -__all__ = ["QwenMTTranslationBackend", "Translator"] diff --git a/query/test_translation.py b/query/test_translation.py old mode 100755 new mode 100644 index 3e69676..738b277 --- a/query/test_translation.py +++ b/query/test_translation.py @@ -1,261 +1,42 @@ #!/usr/bin/env python3 -""" -Translation function test script. - -Test content: -1. Translation prompt configuration loading -2. Synchronous translation (indexing scenario) -3. Asynchronous translation (query scenario) -4. Usage of different prompts -5. Cache functionality -6. DeepL Context parameter usage -""" - -import sys -import os -from pathlib import Path -from concurrent.futures import ThreadPoolExecutor - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from config import ConfigLoader -from query.qwen_mt_translate import Translator -import logging - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - - -def test_config_loading(): - """Test configuration loading""" - print("\n" + "="*60) - print("Test 1: Configuration loading") - print("="*60) - - try: - config_loader = ConfigLoader() - config = config_loader.load_config() - - print(f"✓ Configuration loaded successfully") - print(f" Translation service: {config.query_config.translation_service}") - - return config - except Exception as e: - print(f"✗ Configuration loading failed: {e}") - import traceback - traceback.print_exc() - return None - - -def test_translator_sync(config): - """Test synchronous translation (indexing scenario)""" - print("\n" + "="*60) - print("Test 2: Synchronous translation (indexing scenario)") - print("="*60) - - if not config: - print("✗ Skipped: Configuration not loaded") - return None - - try: - translator = Translator( - api_key=config.query_config.translation_api_key, - use_cache=True, - glossary_id=config.query_config.translation_glossary_id, - translation_context=config.query_config.translation_context - ) - - # 测试商品标题翻译(使用sku_name提示词) - test_texts = [ - ("蓝牙耳机", "zh", "en", "sku_name"), - ("Wireless Headphones", "en", "zh", "sku_name"), - ] - - for text, source_lang, target_lang, scene in test_texts: - print(f"\nTranslation test:") - print(f" Original text ({source_lang}): {text}") - print(f" Target language: {target_lang}") - print(f" Scene: {scene}") - - result = translator.translate( - text, - target_lang=target_lang, - source_lang=source_lang, - context=scene, - ) - - if result: - print(f" Result: {result}") - print(f" ✓ Translation successful") - else: - print(f" ⚠ Translation returned None (possibly mock mode or no API key)") - - return translator - - except Exception as e: - print(f"✗ Synchronous translation test failed: {e}") - import traceback - traceback.print_exc() - return None - - -def test_translator_async(config, translator): - """Test asynchronous translation (query scenario)""" - print("\n" + "="*60) - print("Test 3: Asynchronous translation (query scenario)") - print("="*60) - - if not config or not translator: - print("✗ Skipped: Configuration or translator not initialized") - return - - try: - query_text = "手机" - target_langs = ['en'] - source_lang = 'zh' - - print(f"Query text: {query_text}") - print(f"Target languages: {target_langs}") - print("Scene: ecommerce_search_query") - - print(f"\nConcurrent translation via generic translate():") - with ThreadPoolExecutor(max_workers=len(target_langs)) as executor: - futures = { - lang: executor.submit( - translator.translate, - query_text, - lang, - source_lang, - "ecommerce_search_query", - ) - for lang in target_langs - } - for lang, future in futures.items(): - print(f" {lang}: {future.result()}") - - except Exception as e: - print(f"✗ Asynchronous translation test failed: {e}") - import traceback - traceback.print_exc() - - -def test_cache(): - """测试缓存功能""" - print("\n" + "="*60) - print("Test 4: Cache functionality") - print("="*60) - - try: - config_loader = ConfigLoader() - config = config_loader.load_config() - - translator = Translator( - api_key=config.query_config.translation_api_key, - use_cache=True - ) - - test_text = "测试文本" - target_lang = "en" - source_lang = "zh" - - print(f"First translation (should call API or return mock):") - result1 = translator.translate(test_text, target_lang, source_lang, context="default") - print(f" Result: {result1}") - - print(f"\nSecond translation (should use cache):") - result2 = translator.translate(test_text, target_lang, source_lang, context="default") - print(f" Result: {result2}") - - if result1 == result2: - print(f" ✓ Cache functionality working properly") - else: - print(f" ⚠ Cache might have issues") - - except Exception as e: - print(f"✗ Cache test failed: {e}") - import traceback - traceback.print_exc() - - -def test_context_parameter(): - """Test DeepL Context parameter usage""" - print("\n" + "="*60) - print("Test 5: DeepL Context parameter") - print("="*60) - - try: - config_loader = ConfigLoader() - config = config_loader.load_config() - - translator = Translator( - api_key=config.query_config.translation_api_key, - use_cache=False # 禁用缓存以便测试 - ) - - # 测试带context和不带context的翻译 - text = "手机" - - print(f"Test text: {text}") - print("Scene: ecommerce_search_query") - - # 带context的翻译 - result_with_context = translator.translate( - text, - target_lang='en', - source_lang='zh', - context="ecommerce_search_query", - ) - print(f"\nTranslation result with context: {result_with_context}") - - # 不带context的翻译 - result_without_context = translator.translate( - text, - target_lang='en', - source_lang='zh', - prompt=None - ) - print(f"Translation result without context: {result_without_context}") - - print(f"\n✓ Context parameter test completed") - print(f" Note: According to DeepL API, context parameter affects translation but does not participate in translation itself") - - except Exception as e: - print(f"✗ Context parameter test failed: {e}") - import traceback - traceback.print_exc() - - -def main(): - """Main test function""" - print("="*60) - print("Translation function test") - print("="*60) - - # 测试1: 配置加载 - config = test_config_loading() - - # 测试2: 同步翻译 - translator = test_translator_sync(config) - - # 测试3: 异步翻译 - test_translator_async(config, translator) - - # 测试4: 缓存功能 - test_cache() - - # 测试5: Context参数 - test_context_parameter() - - print("\n" + "="*60) - print("Test completed") - print("="*60) - - -if __name__ == '__main__': +"""Manual smoke test for the translator service.""" + +from __future__ import annotations + +import argparse +import json +from typing import Optional + +from translation import create_translation_client + + +def main() -> None: + parser = argparse.ArgumentParser(description="Smoke test the translator service") + parser.add_argument("--text", default="蓝牙耳机", help="Text to translate") + parser.add_argument("--source-lang", default="zh", help="Source language") + parser.add_argument("--target-lang", default="en", help="Target language") + parser.add_argument("--model", default=None, help="Enabled translation capability name") + parser.add_argument("--scene", default="sku_name", help="Translation scene") + args = parser.parse_args() + + client = create_translation_client() + result: Optional[str] = client.translate( + text=args.text, + target_lang=args.target_lang, + source_lang=args.source_lang, + model=args.model, + scene=args.scene, + ) + payload = { + "text": args.text, + "source_lang": args.source_lang, + "target_lang": args.target_lang, + "model": args.model or client.default_model, + "scene": args.scene, + "translated_text": result, + } + print(json.dumps(payload, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": main() - diff --git a/requirements_translator_service.txt b/requirements_translator_service.txt new file mode 100644 index 0000000..a6d72df --- /dev/null +++ b/requirements_translator_service.txt @@ -0,0 +1,20 @@ +# Dependencies for isolated translator service venv. + +pyyaml>=6.0 +python-dotenv>=1.0.0 +redis>=5.0.0 +numpy>=1.24.0 +openai>=1.0.0 +fastapi>=0.100.0 +uvicorn[standard]>=0.23.0 +pydantic>=2.0.0 +requests>=2.31.0 +httpx>=0.24.0 +tqdm>=4.65.0 + +torch>=2.0.0 +transformers>=4.30.0 +sentencepiece>=0.2.0 +sacremoses>=0.1.1 +safetensors>=0.4.0 +huggingface_hub>=0.24.0 diff --git a/scripts/download_translation_models.py b/scripts/download_translation_models.py new file mode 100755 index 0000000..7f6558f --- /dev/null +++ b/scripts/download_translation_models.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +"""Download local translation models declared in services.translation.capabilities.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +import os +import sys +from typing import Iterable + +from huggingface_hub import snapshot_download + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) +os.environ.setdefault("HF_HUB_DISABLE_XET", "1") + +from config.services_config import get_translation_config + + +LOCAL_BACKENDS = {"local_nllb", "local_marian"} + + +def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: + cfg = get_translation_config() + for name, capability in cfg.capabilities.items(): + backend = str(capability.get("backend") or "").strip().lower() + if backend not in LOCAL_BACKENDS: + continue + if selected and name not in selected: + continue + yield name, capability + + +def main() -> None: + parser = argparse.ArgumentParser(description="Download local translation models") + parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") + parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") + args = parser.parse_args() + + selected = {item.strip().lower() for item in args.models if item.strip()} or None + if not args.all_local and not selected: + parser.error("pass --all-local or --models ...") + + for name, capability in iter_local_capabilities(selected): + model_id = str(capability.get("model_id") or "").strip() + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() + if not model_id or not model_dir: + raise ValueError(f"Capability '{name}' must define model_id and model_dir") + model_dir.parent.mkdir(parents=True, exist_ok=True) + print(f"[download] {name} -> {model_dir} ({model_id})") + snapshot_download( + repo_id=model_id, + local_dir=str(model_dir), + ) + print(f"[done] {name}") + + +if __name__ == "__main__": + main() diff --git a/scripts/setup_translator_venv.sh b/scripts/setup_translator_venv.sh new file mode 100755 index 0000000..760b4e8 --- /dev/null +++ b/scripts/setup_translator_venv.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Create isolated venv for translator service (.venv-translator). +# +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "${PROJECT_ROOT}" + +VENV_DIR="${PROJECT_ROOT}/.venv-translator" +PYTHON_BIN="${PYTHON_BIN:-python3}" +TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" + +if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then + echo "ERROR: python not found: ${PYTHON_BIN}" >&2 + exit 1 +fi + +if [[ -d "${VENV_DIR}" && ! -f "${VENV_DIR}/bin/activate" ]]; then + echo "Found broken venv at ${VENV_DIR}, recreating..." + rm -rf "${VENV_DIR}" +fi + +if [[ ! -d "${VENV_DIR}" ]]; then + echo "Creating ${VENV_DIR}" + "${PYTHON_BIN}" -m venv "${VENV_DIR}" +else + echo "Reusing ${VENV_DIR}" +fi + +mkdir -p "${TMP_DIR}" +export TMPDIR="${TMP_DIR}" +PIP_ARGS=(--no-cache-dir) + +echo "Using TMPDIR=${TMPDIR}" +"${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel +"${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt + +echo +echo "Done." +echo "Translator venv: ${VENV_DIR}" +echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local" +echo "Start service: ./scripts/start_translator.sh" diff --git a/scripts/start_translator.sh b/scripts/start_translator.sh index 4f45adc..7f2a8fa 100755 --- a/scripts/start_translator.sh +++ b/scripts/start_translator.sh @@ -1,25 +1,50 @@ #!/bin/bash # -# Start Translation Service +# Start Translation Service (port 6006). +# +# Design: +# - Run in isolated venv `.venv-translator` +# - Load enabled translation capabilities at startup +# - Local models should be downloaded ahead of time into configured model_dir # - set -euo pipefail -cd "$(dirname "$0")/.." -source ./activate.sh +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "${PROJECT_ROOT}" + +TRANSLATOR_VENV="${TRANSLATOR_VENV:-${PROJECT_ROOT}/.venv-translator}" +PYTHON_BIN="${TRANSLATOR_VENV}/bin/python" + +if [[ ! -x "${PYTHON_BIN}" ]]; then + echo "ERROR: translator venv not found: ${TRANSLATOR_VENV}" >&2 + echo "Please run: ./scripts/setup_translator_venv.sh" >&2 + exit 1 +fi + +# shellcheck source=scripts/lib/load_env.sh +source "${PROJECT_ROOT}/scripts/lib/load_env.sh" +load_env_file "${PROJECT_ROOT}/.env" TRANSLATION_HOST="${TRANSLATION_HOST:-0.0.0.0}" TRANSLATION_PORT="${TRANSLATION_PORT:-6006}" +DEFAULT_MODEL=$("${PYTHON_BIN}" -c "from config.services_config import get_translation_config; print(get_translation_config()['default_model'])") +ENABLED_MODELS=$("${PYTHON_BIN}" -c "from config.services_config import get_translation_config; from translation.settings import get_enabled_translation_models; print(','.join(get_enabled_translation_models(get_translation_config())))") echo "========================================" echo "Starting Translation Service" echo "========================================" +echo "Python: ${PYTHON_BIN}" echo "Host: ${TRANSLATION_HOST}" echo "Port: ${TRANSLATION_PORT}" -echo "Default model: ${TRANSLATION_MODEL:-qwen}" +echo "Default model: ${DEFAULT_MODEL}" +echo "Enabled models: ${ENABLED_MODELS}" +echo +echo "Tips:" +echo " - Use a single worker so local models are loaded once." +echo " - Download local models first if you enable them in config." echo -exec python -m uvicorn api.translator_app:app \ +exec "${PYTHON_BIN}" -m uvicorn api.translator_app:app \ --host "${TRANSLATION_HOST}" \ --port "${TRANSLATION_PORT}" \ --workers 1 diff --git a/tests/ci/test_service_api_contracts.py b/tests/ci/test_service_api_contracts.py index 3668818..98be804 100644 --- a/tests/ci/test_service_api_contracts.py +++ b/tests/ci/test_service_api_contracts.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd import pytest from fastapi.testclient import TestClient +from translation.scenes import normalize_scene_name class _FakeSearcher: @@ -571,18 +572,34 @@ def test_embedding_image_contract(embedding_module): class _FakeTranslator: - model = "qwen" - use_cache = True - - def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None): + model = "qwen-mt" + supports_batch = True + + def translate( + self, + text: str | List[str], + target_lang: str, + source_lang: str | None = None, + scene: str | None = None, + ): + del source_lang, scene + if isinstance(text, list): + return [f"{item}-{target_lang}" for item in text] return f"{text}-{target_lang}" class _FailingTranslator: - model = "qwen" - use_cache = True - - def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None): + model = "qwen-mt" + supports_batch = True + + def translate( + self, + text: str | List[str], + target_lang: str, + source_lang: str | None = None, + scene: str | None = None, + ): + del text, target_lang, source_lang, scene return None @@ -591,7 +608,44 @@ def translator_client(monkeypatch): import api.translator_app as translator_app translator_app.app.router.on_startup.clear() - monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FakeTranslator()) + + class _FakeService: + def __init__(self, translator): + self._translator = translator + self.config = { + "default_model": "qwen-mt", + "default_scene": "general", + "capabilities": { + "qwen-mt": { + "enabled": True, + "backend": "qwen_mt", + "model": "qwen-mt-flash", + "base_url": "https://example.com", + "timeout_sec": 10.0, + "use_cache": True, + } + }, + "cache": { + "enabled": True, + "key_prefix": "trans:v2", + "ttl_seconds": 60, + "sliding_expiration": True, + "key_include_scene": True, + "key_include_source_lang": True, + }, + } + self.available_models = ["qwen-mt"] + self.loaded_models = ["qwen-mt"] + + def get_backend(self, model=None): + del model + return self._translator + + def translate(self, **kwargs): + kwargs.pop("model", None) + return self._translator.translate(**kwargs) + + monkeypatch.setattr(translator_app, "get_translation_service", lambda: _FakeService(_FakeTranslator())) with TestClient(translator_app.app) as client: yield client @@ -610,7 +664,44 @@ def test_translator_api_failure_returns_500(monkeypatch): import api.translator_app as translator_app translator_app.app.router.on_startup.clear() - monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FailingTranslator()) + + class _FakeService: + def __init__(self, translator): + self._translator = translator + self.config = { + "default_model": "qwen-mt", + "default_scene": "general", + "capabilities": { + "qwen-mt": { + "enabled": True, + "backend": "qwen_mt", + "model": "qwen-mt-flash", + "base_url": "https://example.com", + "timeout_sec": 10.0, + "use_cache": True, + } + }, + "cache": { + "enabled": True, + "key_prefix": "trans:v2", + "ttl_seconds": 60, + "sliding_expiration": True, + "key_include_scene": True, + "key_include_source_lang": True, + }, + } + self.available_models = ["qwen-mt"] + self.loaded_models = ["qwen-mt"] + + def get_backend(self, model=None): + del model + return self._translator + + def translate(self, **kwargs): + kwargs.pop("model", None) + return self._translator.translate(**kwargs) + + monkeypatch.setattr(translator_app, "get_translation_service", lambda: _FakeService(_FailingTranslator())) with TestClient(translator_app.app) as client: response = client.post( @@ -626,6 +717,7 @@ def test_translator_health_contract(translator_client: TestClient): response = translator_client.get("/health") assert response.status_code == 200 assert response.json()["status"] == "healthy" + assert response.json()["loaded_models"] == ["qwen-mt"] class _FakeReranker: diff --git a/tests/test_translation_local_backends.py b/tests/test_translation_local_backends.py new file mode 100644 index 0000000..37f74d3 --- /dev/null +++ b/tests/test_translation_local_backends.py @@ -0,0 +1,170 @@ +import torch + +from translation.backends.local_seq2seq import MarianMTTranslationBackend, NLLBTranslationBackend +from translation.service import TranslationService + + +class _FakeBatch(dict): + def to(self, device): + self["device"] = device + return self + + +class _FakeTokenizer: + def __init__(self): + self.src_lang = None + self.pad_token = "" + self.eos_token = "" + self.lang_code_to_id = {"eng_Latn": 101, "zho_Hans": 202} + self.last_call = None + + def __call__(self, texts, **kwargs): + self.last_call = {"texts": list(texts), **kwargs} + return _FakeBatch({"input_ids": torch.tensor([[1, 2, 3]])}) + + def batch_decode(self, generated, skip_special_tokens=True): + del generated, skip_special_tokens + return ["translated" for _ in range(len(self.last_call["texts"]))] + + def convert_tokens_to_ids(self, token): + return self.lang_code_to_id[token] + + +class _FakeModel: + def to(self, device): + self.device = device + return self + + def eval(self): + return self + + def generate(self, **kwargs): + self.last_generate_kwargs = kwargs + return [[42]] + + +def _stub_load_model(self): + self.tokenizer = _FakeTokenizer() + self.seq2seq_model = _FakeModel() + + +def test_marian_language_validation(monkeypatch): + monkeypatch.setattr(MarianMTTranslationBackend, "_load_model", _stub_load_model) + backend = MarianMTTranslationBackend( + name="opus-mt-zh-en", + model_id="Helsinki-NLP/opus-mt-zh-en", + model_dir="./models/translation/Helsinki-NLP/opus-mt-zh-en", + device="cpu", + torch_dtype="float32", + batch_size=1, + max_input_length=16, + max_new_tokens=16, + num_beams=1, + source_langs=["zh"], + target_langs=["en"], + ) + + result = backend.translate("测试", source_lang="zh", target_lang="en") + assert result == "translated" + + try: + backend.translate("test", source_lang="en", target_lang="zh") + except ValueError as exc: + assert "source languages" in str(exc) + else: + raise AssertionError("Expected unsupported source language to raise") + + +def test_nllb_uses_src_lang_and_forced_bos(monkeypatch): + monkeypatch.setattr(NLLBTranslationBackend, "_load_model", _stub_load_model) + backend = NLLBTranslationBackend( + name="nllb-200-distilled-600m", + model_id="facebook/nllb-200-distilled-600M", + model_dir="./models/translation/facebook/nllb-200-distilled-600M", + device="cpu", + torch_dtype="float32", + batch_size=1, + max_input_length=16, + max_new_tokens=16, + num_beams=1, + ) + + result = backend.translate("test", source_lang="en", target_lang="zh") + + assert result == "translated" + assert backend.tokenizer.src_lang == "eng_Latn" + assert backend.seq2seq_model.last_generate_kwargs["forced_bos_token_id"] == 202 + + +def test_translation_service_lazy_loads_enabled_backends(monkeypatch): + created = [] + + def _fake_create_backend(self, *, name, backend_type, cfg): + del self, cfg + created.append((name, backend_type)) + + class _Backend: + model = name + + @property + def supports_batch(self): + return True + + def translate(self, text, target_lang, source_lang=None, scene=None): + del target_lang, source_lang, scene + return text + + return _Backend() + + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) + config = { + "service_url": "http://127.0.0.1:6006", + "timeout_sec": 10.0, + "default_model": "opus-mt-en-zh", + "default_scene": "general", + "capabilities": { + "opus-mt-en-zh": { + "enabled": True, + "backend": "local_marian", + "model_id": "dummy", + "model_dir": "dummy", + "device": "cpu", + "torch_dtype": "float32", + "batch_size": 1, + "max_input_length": 8, + "max_new_tokens": 8, + "num_beams": 1, + }, + "nllb-200-distilled-600m": { + "enabled": True, + "backend": "local_nllb", + "model_id": "dummy", + "model_dir": "dummy", + "device": "cpu", + "torch_dtype": "float32", + "batch_size": 1, + "max_input_length": 8, + "max_new_tokens": 8, + "num_beams": 1, + }, + }, + "cache": { + "enabled": True, + "key_prefix": "trans:v2", + "ttl_seconds": 60, + "sliding_expiration": True, + "key_include_scene": True, + "key_include_source_lang": True, + }, + } + + service = TranslationService(config) + + assert service.available_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"] + assert service.loaded_models == [] + + backend = service.get_backend("opus-mt-en-zh") + + assert backend.model == "opus-mt-en-zh" + assert created == [("opus-mt-en-zh", "local_marian")] + assert service.loaded_models == ["opus-mt-en-zh"] diff --git a/tests/test_translator_failure_semantics.py b/tests/test_translator_failure_semantics.py index 5f8fde8..728f9cd 100644 --- a/tests/test_translator_failure_semantics.py +++ b/tests/test_translator_failure_semantics.py @@ -1,4 +1,4 @@ -from query.qwen_mt_translate import Translator +from translation.backends.qwen_mt import QwenMTTranslationBackend class _RecordingRedis: @@ -10,7 +10,13 @@ class _RecordingRedis: def test_translate_failure_returns_none_and_skips_cache(monkeypatch): - translator = Translator(model="qwen", api_key="dummy-key", use_cache=False) + translator = QwenMTTranslationBackend( + capability_name="qwen-mt", + model="qwen-mt-flash", + base_url="https://dashscope-us.aliyuncs.com/compatible-mode/v1", + api_key="dummy-key", + use_cache=False, + ) fake_redis = _RecordingRedis() translator.use_cache = True translator.redis_client = fake_redis @@ -23,7 +29,7 @@ def test_translate_failure_returns_none_and_skips_cache(monkeypatch): text="商品标题", target_lang="en", source_lang="zh", - prompt="translate for product search", + scene="sku_name", ) assert result is None diff --git a/translation/__init__.py b/translation/__init__.py index 546253d..5f2c740 100644 --- a/translation/__init__.py +++ b/translation/__init__.py @@ -1,8 +1,23 @@ """Translation package.""" -__all__ = [ - "client", - "service", - "protocols", - "backends", -] +from __future__ import annotations + +from typing import Any + +__all__ = ["TranslationServiceClient", "create_translation_client", "TranslationService"] + + +def __getattr__(name: str) -> Any: + if name in {"TranslationServiceClient", "create_translation_client"}: + from .client import TranslationServiceClient, create_translation_client + + exports = { + "TranslationServiceClient": TranslationServiceClient, + "create_translation_client": create_translation_client, + } + return exports[name] + if name == "TranslationService": + from .service import TranslationService + + return TranslationService + raise AttributeError(name) diff --git a/translation/backends/__init__.py b/translation/backends/__init__.py index 2e0af75..5d42c5d 100644 --- a/translation/backends/__init__.py +++ b/translation/backends/__init__.py @@ -1,11 +1 @@ -"""Translation backend registry.""" - -from .deepl import DeepLTranslationBackend -from .llm import LLMTranslationBackend -from .qwen_mt import QwenMTTranslationBackend - -__all__ = [ - "DeepLTranslationBackend", - "LLMTranslationBackend", - "QwenMTTranslationBackend", -] +"""Translation backend implementations.""" diff --git a/translation/backends/deepl.py b/translation/backends/deepl.py index 92173ef..85dfbeb 100644 --- a/translation/backends/deepl.py +++ b/translation/backends/deepl.py @@ -5,81 +5,30 @@ from __future__ import annotations import logging import os import re -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import List, Optional, Sequence, Tuple, Union import requests -from config.services_config import get_translation_config +from translation.languages import DEEPL_LANGUAGE_CODES +from translation.scenes import SCENE_DEEPL_CONTEXTS, normalize_scene_name logger = logging.getLogger(__name__) -DEFAULT_CONTEXTS: Dict[str, Dict[str, str]] = { - "sku_name": { - "zh": "商品SKU名称", - "en": "product SKU name", - }, - "ecommerce_search_query": { - "zh": "电商", - "en": "e-commerce", - }, - "general": { - "zh": "", - "en": "", - }, -} -SCENE_NAMES = frozenset(DEFAULT_CONTEXTS.keys()) - - -def _merge_contexts(raw: object) -> Dict[str, Dict[str, str]]: - merged: Dict[str, Dict[str, str]] = { - scene: dict(lang_map) for scene, lang_map in DEFAULT_CONTEXTS.items() - } - if not isinstance(raw, dict): - return merged - for scene, lang_map in raw.items(): - if not isinstance(lang_map, dict): - continue - scene_name = str(scene or "").strip() - if not scene_name: - continue - merged.setdefault(scene_name, {}) - for lang, value in lang_map.items(): - lang_key = str(lang or "").strip().lower() - context_value = str(value or "").strip() - if lang_key and context_value: - merged[scene_name][lang_key] = context_value - return merged - class DeepLTranslationBackend: - API_URL = "https://api.deepl.com/v2/translate" - LANG_CODE_MAP = { - "zh": "ZH", - "en": "EN", - "ru": "RU", - "ar": "AR", - "ja": "JA", - "es": "ES", - "de": "DE", - "fr": "FR", - "it": "IT", - "pt": "PT", - } - def __init__( self, api_key: Optional[str], *, - timeout: float = 10.0, + api_url: str, + timeout: float, glossary_id: Optional[str] = None, ) -> None: - cfg = get_translation_config() - provider_cfg = cfg.get_capability_cfg("deepl") self.api_key = api_key or os.getenv("DEEPL_AUTH_KEY") - self.timeout = float(provider_cfg.get("timeout_sec") or timeout or 10.0) - self.glossary_id = glossary_id or provider_cfg.get("glossary_id") + self.api_url = api_url + self.timeout = float(timeout) + self.glossary_id = glossary_id self.model = "deepl" - self.context_presets = _merge_contexts(provider_cfg.get("contexts")) if not self.api_key: logger.warning("DEEPL_AUTH_KEY not set; DeepL translation is unavailable") @@ -90,19 +39,13 @@ class DeepLTranslationBackend: def _resolve_request_context( self, target_lang: str, - context: Optional[str], - prompt: Optional[str], + scene: Optional[str], ) -> Optional[str]: - if prompt: - return prompt - if context in SCENE_NAMES: - scene_map = self.context_presets.get(context) or self.context_presets.get("default") or {} - tgt = (target_lang or "").strip().lower() - return scene_map.get(tgt) or scene_map.get("en") - if context: - return context - scene_map = self.context_presets.get("default") or {} - tgt = (target_lang or "").strip().lower() + if scene is None: + raise ValueError("deepl translation scene is required") + normalized_scene = normalize_scene_name(scene) + scene_map = SCENE_DEEPL_CONTEXTS[normalized_scene] + tgt = str(target_lang or "").strip().lower() return scene_map.get(tgt) or scene_map.get("en") def translate( @@ -110,8 +53,7 @@ class DeepLTranslationBackend: text: Union[str, Sequence[str]], target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> Union[Optional[str], List[Optional[str]]]: if isinstance(text, (list, tuple)): results: List[Optional[str]] = [] @@ -123,8 +65,7 @@ class DeepLTranslationBackend: text=str(item), target_lang=target_lang, source_lang=source_lang, - context=context, - prompt=prompt, + scene=scene, ) results.append(out) return results @@ -132,13 +73,13 @@ class DeepLTranslationBackend: if not self.api_key: return None - target_code = self.LANG_CODE_MAP.get((target_lang or "").lower(), (target_lang or "").upper()) + target_code = DEEPL_LANGUAGE_CODES.get((target_lang or "").lower(), (target_lang or "").upper()) headers = { "Authorization": f"DeepL-Auth-Key {self.api_key}", "Content-Type": "application/json", } - api_context = self._resolve_request_context(target_lang, context, prompt) + api_context = self._resolve_request_context(target_lang, scene) text_to_translate, needs_extraction = self._add_ecommerce_context(text, source_lang, api_context) payload = { @@ -146,14 +87,14 @@ class DeepLTranslationBackend: "target_lang": target_code, } if source_lang: - payload["source_lang"] = self.LANG_CODE_MAP.get(source_lang.lower(), source_lang.upper()) + payload["source_lang"] = DEEPL_LANGUAGE_CODES.get(source_lang.lower(), source_lang.upper()) if api_context: payload["context"] = api_context if self.glossary_id: payload["glossary_id"] = self.glossary_id try: - response = requests.post(self.API_URL, headers=headers, json=payload, timeout=self.timeout) + response = requests.post(self.api_url, headers=headers, json=payload, timeout=self.timeout) if response.status_code != 200: logger.warning( "[deepl] Failed | status=%s tgt=%s body=%s", @@ -184,9 +125,9 @@ class DeepLTranslationBackend: self, text: str, source_lang: Optional[str], - context: Optional[str], + scene: Optional[str], ) -> Tuple[str, bool]: - if not context or "e-commerce" not in context.lower(): + if not scene or "e-commerce" not in scene.lower(): return text, False if (source_lang or "").lower() != "zh": return text, False @@ -215,6 +156,3 @@ class DeepLTranslationBackend: if normalized not in context_words: return normalized return re.sub(r"[.,!?;:]+$", "", words[-1].lower()) - - -DeepLProvider = DeepLTranslationBackend diff --git a/translation/backends/llm.py b/translation/backends/llm.py index 939b06d..2cdaa63 100644 --- a/translation/backends/llm.py +++ b/translation/backends/llm.py @@ -10,15 +10,12 @@ from typing import List, Optional, Sequence, Union from openai import OpenAI from config.env_config import DASHSCOPE_API_KEY -from config.services_config import get_translation_config -from config.translate_prompts import TRANSLATION_PROMPTS -from config.tenant_config_loader import SOURCE_LANG_CODE_MAP +from translation.languages import LANGUAGE_LABELS +from translation.prompts import TRANSLATION_PROMPTS +from translation.scenes import normalize_scene_name logger = logging.getLogger(__name__) -DEFAULT_QWEN_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" -DEFAULT_LLM_MODEL = "qwen-flash" - def _build_prompt( text: str, @@ -27,25 +24,16 @@ def _build_prompt( target_lang: str, scene: Optional[str], ) -> str: - tgt = (target_lang or "").lower() or "en" - src = (source_lang or "auto").lower() - normalized_scene = (scene or "").strip() or "general" - if normalized_scene in {"query", "ecommerce_search", "ecommerce_search_query"}: - group_key = "ecommerce_search_query" - elif normalized_scene in {"product_title", "sku_name"}: - group_key = "sku_name" - else: - group_key = normalized_scene - group = TRANSLATION_PROMPTS.get(group_key) or TRANSLATION_PROMPTS["general"] + tgt = str(target_lang or "").strip().lower() + src = str(source_lang or "auto").strip().lower() or "auto" + normalized_scene = normalize_scene_name(scene) + group = TRANSLATION_PROMPTS[normalized_scene] template = group.get(tgt) or group.get("en") - if not template: - template = ( - "You are a professional {source_lang} ({src_lang_code}) to " - "{target_lang} ({tgt_lang_code}) translator, output only the translation: {text}" - ) + if template is None: + raise ValueError(f"Missing llm translation prompt for scene='{normalized_scene}' target_lang='{tgt}'") - source_lang_label = SOURCE_LANG_CODE_MAP.get(src, src) - target_lang_label = SOURCE_LANG_CODE_MAP.get(tgt, tgt) + source_lang_label = LANGUAGE_LABELS.get(src, src) + target_lang_label = LANGUAGE_LABELS.get(tgt, tgt) return template.format( source_lang=source_lang_label, @@ -60,20 +48,15 @@ class LLMTranslationBackend: def __init__( self, *, - model: Optional[str] = None, - timeout_sec: float = 30.0, - base_url: Optional[str] = None, + capability_name: str, + model: str, + timeout_sec: float, + base_url: str, ) -> None: - cfg = get_translation_config() - llm_cfg = cfg.get_capability_cfg("llm") - self.model = model or llm_cfg.get("model") or DEFAULT_LLM_MODEL - self.timeout_sec = float(llm_cfg.get("timeout_sec") or timeout_sec or 30.0) - self.base_url = ( - (base_url or "").strip() - or (llm_cfg.get("base_url") or "").strip() - or os.getenv("DASHSCOPE_BASE_URL") - or DEFAULT_QWEN_BASE_URL - ) + self.capability_name = capability_name + self.model = model + self.timeout_sec = float(timeout_sec) + self.base_url = base_url self.client = self._create_client() @property @@ -96,22 +79,23 @@ class LLMTranslationBackend: text: str, target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> Optional[str]: if not text or not str(text).strip(): return text if not self.client: return None - tgt = (target_lang or "").lower() or "en" - src = (source_lang or "auto").lower() - scene = context or "default" - user_prompt = prompt or _build_prompt( + tgt = str(target_lang or "").strip().lower() + src = str(source_lang or "auto").strip().lower() or "auto" + if scene is None: + raise ValueError("llm translation scene is required") + normalized_scene = normalize_scene_name(scene) + user_prompt = _build_prompt( text=text, source_lang=src, target_lang=tgt, - scene=scene, + scene=normalized_scene, ) start = time.time() try: @@ -158,8 +142,7 @@ class LLMTranslationBackend: text: Union[str, Sequence[str]], target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> Union[Optional[str], List[Optional[str]]]: if isinstance(text, (list, tuple)): results: List[Optional[str]] = [] @@ -172,8 +155,7 @@ class LLMTranslationBackend: text=str(item), target_lang=target_lang, source_lang=source_lang, - context=context, - prompt=prompt, + scene=scene, ) ) return results @@ -182,28 +164,5 @@ class LLMTranslationBackend: text=str(text), target_lang=target_lang, source_lang=source_lang, - context=context, - prompt=prompt, + scene=scene, ) - - -LLMTranslatorProvider = LLMTranslationBackend - - -def llm_translate( - text: Union[str, Sequence[str]], - target_lang: str, - *, - source_lang: Optional[str] = None, - source_lang_label: Optional[str] = None, - target_lang_label: Optional[str] = None, - timeout_sec: Optional[float] = None, -) -> Union[Optional[str], List[Optional[str]]]: - del source_lang_label, target_lang_label - provider = LLMTranslationBackend(timeout_sec=timeout_sec or 30.0) - return provider.translate( - text=text, - target_lang=target_lang, - source_lang=source_lang, - context=None, - ) diff --git a/translation/backends/local_seq2seq.py b/translation/backends/local_seq2seq.py new file mode 100644 index 0000000..5ef9475 --- /dev/null +++ b/translation/backends/local_seq2seq.py @@ -0,0 +1,277 @@ +"""Local seq2seq translation backends powered by Transformers.""" + +from __future__ import annotations + +import logging +import os +import threading +from typing import Dict, List, Optional, Sequence, Union + +import torch +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer + +from translation.languages import MARIAN_LANGUAGE_DIRECTIONS, NLLB_LANGUAGE_CODES + +logger = logging.getLogger(__name__) + + +def _resolve_device(device: Optional[str]) -> str: + value = str(device or "auto").strip().lower() + if value == "auto": + return "cuda" if torch.cuda.is_available() else "cpu" + return value + + +def _resolve_dtype(dtype: Optional[str], device: str) -> Optional[torch.dtype]: + value = str(dtype or "auto").strip().lower() + if value == "auto": + return torch.float16 if device.startswith("cuda") else None + if value in {"float16", "fp16", "half"}: + return torch.float16 if device.startswith("cuda") else None + if value in {"bfloat16", "bf16"}: + return torch.bfloat16 + if value in {"float32", "fp32"}: + return torch.float32 + raise ValueError(f"Unsupported torch dtype: {dtype}") + + +class LocalSeq2SeqTranslationBackend: + """Base backend for local Hugging Face seq2seq translation models.""" + + def __init__( + self, + *, + name: str, + model_id: str, + model_dir: str, + device: str, + torch_dtype: str, + batch_size: int, + max_input_length: int, + max_new_tokens: int, + num_beams: int, + ) -> None: + self.model = name + self.model_id = model_id + self.model_dir = model_dir + self.device = _resolve_device(device) + self.torch_dtype = _resolve_dtype(torch_dtype, self.device) + self.batch_size = int(batch_size) + self.max_input_length = int(max_input_length) + self.max_new_tokens = int(max_new_tokens) + self.num_beams = int(num_beams) + self._lock = threading.Lock() + self._load_model() + + @property + def supports_batch(self) -> bool: + return True + + def _load_model(self) -> None: + model_path = self.model_dir if os.path.exists(self.model_dir) else self.model_id + logger.info( + "Loading local translation model | name=%s source=%s device=%s dtype=%s", + self.model, + model_path, + self.device, + self.torch_dtype, + ) + tokenizer_kwargs = self._tokenizer_kwargs() + model_kwargs = self._model_kwargs() + self.tokenizer = AutoTokenizer.from_pretrained(model_path, **tokenizer_kwargs) + self.seq2seq_model = AutoModelForSeq2SeqLM.from_pretrained(model_path, **model_kwargs) + self.seq2seq_model.to(self.device) + self.seq2seq_model.eval() + if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None: + self.tokenizer.pad_token = self.tokenizer.eos_token + + def _tokenizer_kwargs(self) -> Dict[str, object]: + return {} + + def _model_kwargs(self) -> Dict[str, object]: + kwargs: Dict[str, object] = {} + if self.torch_dtype is not None: + kwargs["dtype"] = self.torch_dtype + return kwargs + + def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: + if isinstance(text, str): + return [text] + return ["" if item is None else str(item) for item in text] + + def _validate_languages(self, source_lang: Optional[str], target_lang: str) -> None: + del source_lang, target_lang + + def _prepare_tokenizer(self, source_lang: Optional[str], target_lang: str) -> Dict[str, object]: + del source_lang, target_lang + return {} + + def _build_generate_kwargs(self, source_lang: Optional[str], target_lang: str) -> Dict[str, object]: + del source_lang, target_lang + return { + "num_beams": self.num_beams, + } + + def _translate_batch( + self, + texts: List[str], + target_lang: str, + source_lang: Optional[str] = None, + ) -> List[Optional[str]]: + self._validate_languages(source_lang, target_lang) + tokenizer_kwargs = self._prepare_tokenizer(source_lang, target_lang) + with self._lock, torch.inference_mode(): + encoded = self.tokenizer( + texts, + return_tensors="pt", + padding=True, + truncation=True, + max_length=self.max_input_length, + **tokenizer_kwargs, + ) + encoded = {key: value.to(self.device) for key, value in encoded.items()} + generate_kwargs = self._build_generate_kwargs(source_lang, target_lang) + input_ids = encoded.get("input_ids") + if input_ids is not None and "max_length" not in generate_kwargs: + generate_kwargs["max_length"] = int(input_ids.shape[-1]) + self.max_new_tokens + generated = self.seq2seq_model.generate( + **encoded, + **generate_kwargs, + ) + outputs = self.tokenizer.batch_decode(generated, skip_special_tokens=True) + return [item.strip() if item and item.strip() else None for item in outputs] + + def translate( + self, + text: Union[str, Sequence[str]], + target_lang: str, + source_lang: Optional[str] = None, + scene: Optional[str] = None, + ) -> Union[Optional[str], List[Optional[str]]]: + del scene + is_single = isinstance(text, str) + texts = self._normalize_texts(text) + outputs: List[Optional[str]] = [] + for start in range(0, len(texts), self.batch_size): + chunk = texts[start:start + self.batch_size] + if not any(item.strip() for item in chunk): + outputs.extend([None if not item.strip() else item for item in chunk]) # type: ignore[list-item] + continue + outputs.extend(self._translate_batch(chunk, target_lang=target_lang, source_lang=source_lang)) + return outputs[0] if is_single else outputs + + +class MarianMTTranslationBackend(LocalSeq2SeqTranslationBackend): + """Local backend for Marian/OPUS MT models.""" + + def __init__( + self, + *, + name: str, + model_id: str, + model_dir: str, + device: str, + torch_dtype: str, + batch_size: int, + max_input_length: int, + max_new_tokens: int, + num_beams: int, + source_langs: Sequence[str], + target_langs: Sequence[str], + ) -> None: + self.source_langs = {str(lang).strip().lower() for lang in source_langs if str(lang).strip()} + self.target_langs = {str(lang).strip().lower() for lang in target_langs if str(lang).strip()} + super().__init__( + name=name, + model_id=model_id, + model_dir=model_dir, + device=device, + torch_dtype=torch_dtype, + batch_size=batch_size, + max_input_length=max_input_length, + max_new_tokens=max_new_tokens, + num_beams=num_beams, + ) + + def _validate_languages(self, source_lang: Optional[str], target_lang: str) -> None: + src = str(source_lang or "").strip().lower() + tgt = str(target_lang or "").strip().lower() + if self.source_langs and src not in self.source_langs: + raise ValueError( + f"Model '{self.model}' only supports source languages: {sorted(self.source_langs)}" + ) + if self.target_langs and tgt not in self.target_langs: + raise ValueError( + f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" + ) + + +class NLLBTranslationBackend(LocalSeq2SeqTranslationBackend): + """Local backend for NLLB translation models.""" + + def __init__( + self, + *, + name: str, + model_id: str, + model_dir: str, + device: str, + torch_dtype: str, + batch_size: int, + max_input_length: int, + max_new_tokens: int, + num_beams: int, + language_codes: Optional[Dict[str, str]] = None, + ) -> None: + overrides = language_codes or {} + self.language_codes = { + **NLLB_LANGUAGE_CODES, + **{str(k).strip().lower(): str(v).strip() for k, v in overrides.items() if str(k).strip()}, + } + super().__init__( + name=name, + model_id=model_id, + model_dir=model_dir, + device=device, + torch_dtype=torch_dtype, + batch_size=batch_size, + max_input_length=max_input_length, + max_new_tokens=max_new_tokens, + num_beams=num_beams, + ) + + def _validate_languages(self, source_lang: Optional[str], target_lang: str) -> None: + src = str(source_lang or "").strip().lower() + tgt = str(target_lang or "").strip().lower() + if not src: + raise ValueError(f"Model '{self.model}' requires source_lang") + if src not in self.language_codes: + raise ValueError(f"Unsupported NLLB source language: {source_lang}") + if tgt not in self.language_codes: + raise ValueError(f"Unsupported NLLB target language: {target_lang}") + + def _prepare_tokenizer(self, source_lang: Optional[str], target_lang: str) -> Dict[str, object]: + del target_lang + src_code = self.language_codes[str(source_lang).strip().lower()] + self.tokenizer.src_lang = src_code + return {} + + def _build_generate_kwargs(self, source_lang: Optional[str], target_lang: str) -> Dict[str, object]: + del source_lang + tgt_code = self.language_codes[str(target_lang).strip().lower()] + forced_bos_token_id = None + if hasattr(self.tokenizer, "lang_code_to_id"): + forced_bos_token_id = self.tokenizer.lang_code_to_id.get(tgt_code) + if forced_bos_token_id is None: + forced_bos_token_id = self.tokenizer.convert_tokens_to_ids(tgt_code) + return { + "num_beams": self.num_beams, + "forced_bos_token_id": forced_bos_token_id, + } + + +def get_marian_language_direction(model_name: str) -> tuple[str, str]: + direction = MARIAN_LANGUAGE_DIRECTIONS.get(model_name) + if direction is None: + raise ValueError(f"Translation capability '{model_name}' is not registered with Marian language directions") + return direction diff --git a/translation/backends/qwen_mt.py b/translation/backends/qwen_mt.py index e35e8ad..751d553 100644 --- a/translation/backends/qwen_mt.py +++ b/translation/backends/qwen_mt.py @@ -14,53 +14,49 @@ from openai import OpenAI from config.env_config import DASHSCOPE_API_KEY, REDIS_CONFIG from config.services_config import get_translation_cache_config -from config.tenant_config_loader import SOURCE_LANG_CODE_MAP +from translation.languages import QWEN_LANGUAGE_CODES logger = logging.getLogger(__name__) class QwenMTTranslationBackend: - QWEN_DEFAULT_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" - QWEN_MODEL = "qwen-mt-flash" - SOURCE_LANG_CODE_MAP = SOURCE_LANG_CODE_MAP - def __init__( self, - model: str = "qwen", + capability_name: str, + model: str, + base_url: str, api_key: Optional[str] = None, use_cache: bool = True, timeout: int = 10, glossary_id: Optional[str] = None, - translation_context: Optional[str] = None, ): - self.model = self._normalize_model(model) + self.capability_name = capability_name + self.model = self._normalize_capability_name(capability_name) + self.qwen_model_name = self._normalize_model_name(model) + self.base_url = base_url self.timeout = int(timeout) self.use_cache = bool(use_cache) self.glossary_id = glossary_id - self.translation_context = translation_context or "e-commerce product search" cache_cfg = get_translation_cache_config() - self.cache_prefix = str(cache_cfg.get("key_prefix", "trans:v2")) - self.expire_seconds = int(cache_cfg.get("ttl_seconds", 360 * 24 * 3600)) - self.cache_sliding_expiration = bool(cache_cfg.get("sliding_expiration", True)) - self.cache_include_context = bool(cache_cfg.get("key_include_context", True)) - self.cache_include_prompt = bool(cache_cfg.get("key_include_prompt", True)) - self.cache_include_source_lang = bool(cache_cfg.get("key_include_source_lang", True)) + self.cache_prefix = str(cache_cfg["key_prefix"]) + self.expire_seconds = int(cache_cfg["ttl_seconds"]) + self.cache_sliding_expiration = bool(cache_cfg["sliding_expiration"]) + self.cache_include_scene = bool(cache_cfg["key_include_scene"]) + self.cache_include_source_lang = bool(cache_cfg["key_include_source_lang"]) - self.qwen_model_name = self._resolve_qwen_model_name(model) self._api_key = api_key or self._default_api_key(self.model) self._qwen_client: Optional[OpenAI] = None - base_url = os.getenv("DASHSCOPE_BASE_URL") or self.QWEN_DEFAULT_BASE_URL if self._api_key: try: - self._qwen_client = OpenAI(api_key=self._api_key, base_url=base_url) + self._qwen_client = OpenAI(api_key=self._api_key, base_url=self.base_url) except Exception as exc: logger.warning("Failed to initialize qwen-mt client: %s", exc, exc_info=True) else: logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable") self.redis_client = None - if self.use_cache and bool(cache_cfg.get("enabled", True)): + if self.use_cache and bool(cache_cfg["enabled"]): self.redis_client = self._init_redis_client() @property @@ -68,18 +64,18 @@ class QwenMTTranslationBackend: return True @staticmethod - def _normalize_model(model: str) -> str: - m = (model or "qwen").strip().lower() - if m.startswith("qwen"): - return "qwen-mt" - raise ValueError(f"Unsupported model: {model}. Supported models: 'qwen', 'qwen-mt', 'qwen-mt-flash'") + def _normalize_capability_name(name: str) -> str: + normalized = str(name or "").strip().lower() + if normalized != "qwen-mt": + raise ValueError(f"Qwen-MT backend capability must be 'qwen-mt', got '{name}'") + return normalized @staticmethod - def _resolve_qwen_model_name(model: str) -> str: - m = (model or "qwen").strip().lower() - if m in {"qwen", "qwen-mt"}: - return "qwen-mt-flash" - return m + def _normalize_model_name(model: str) -> str: + normalized = str(model or "").strip() + if not normalized: + raise ValueError("qwen-mt backend model is required") + return normalized @staticmethod def _default_api_key(model: str) -> Optional[str]: @@ -109,14 +105,12 @@ class QwenMTTranslationBackend: text: str, target_lang: str, source_lang: Optional[str], - context: Optional[str], - prompt: Optional[str], + scene: Optional[str], ) -> str: src = (source_lang or "auto").strip().lower() if self.cache_include_source_lang else "-" tgt = (target_lang or "").strip().lower() - ctx = (context or "").strip() if self.cache_include_context else "" - prm = (prompt or "").strip() if self.cache_include_prompt else "" - payload = f"model={self.model}\nsrc={src}\ntgt={tgt}\nctx={ctx}\nprm={prm}\ntext={text}" + scn = (scene or "").strip() if self.cache_include_scene else "" + payload = f"model={self.model}\nsrc={src}\ntgt={tgt}\nscene={scn}\ntext={text}" digest = hashlib.sha256(payload.encode("utf-8")).hexdigest() return f"{self.cache_prefix}:{self.model}:{src}:{tgt}:{digest}" @@ -125,8 +119,7 @@ class QwenMTTranslationBackend: text: Union[str, Sequence[str]], target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> Union[Optional[str], List[Optional[str]]]: if isinstance(text, (list, tuple)): results: List[Optional[str]] = [] @@ -138,8 +131,7 @@ class QwenMTTranslationBackend: text=str(item), target_lang=target_lang, source_lang=source_lang, - context=context, - prompt=prompt, + scene=scene, ) results.append(out) return results @@ -154,15 +146,14 @@ class QwenMTTranslationBackend: if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)): return text - translation_context = context or self.translation_context - cached = self._get_cached_translation_redis(text, tgt, src, translation_context, prompt) + cached = self._get_cached_translation_redis(text, tgt, src, scene) if cached is not None: return cached result = self._translate_qwen(text, tgt, src) if result is not None: - self._set_cached_translation_redis(text, tgt, result, src, translation_context, prompt) + self._set_cached_translation_redis(text, tgt, result, src, scene) return result def _translate_qwen( @@ -175,8 +166,8 @@ class QwenMTTranslationBackend: return None tgt_norm = (target_lang or "").strip().lower() src_norm = (source_lang or "").strip().lower() - tgt_qwen = self.SOURCE_LANG_CODE_MAP.get(tgt_norm, tgt_norm.capitalize()) - src_qwen = "auto" if not src_norm or src_norm == "auto" else self.SOURCE_LANG_CODE_MAP.get(src_norm, src_norm.capitalize()) + tgt_qwen = QWEN_LANGUAGE_CODES.get(tgt_norm, tgt_norm.capitalize()) + src_qwen = "auto" if not src_norm or src_norm == "auto" else QWEN_LANGUAGE_CODES.get(src_norm, src_norm.capitalize()) start = time.time() try: completion = self._qwen_client.chat.completions.create( @@ -211,12 +202,11 @@ class QwenMTTranslationBackend: text: str, target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> Optional[str]: if not self.redis_client: return None - key = self._build_cache_key(text, target_lang, source_lang, context, prompt) + key = self._build_cache_key(text, target_lang, source_lang, scene) try: value = self.redis_client.get(key) if value and self.cache_sliding_expiration: @@ -232,12 +222,11 @@ class QwenMTTranslationBackend: target_lang: str, translation: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> None: if not self.redis_client: return - key = self._build_cache_key(text, target_lang, source_lang, context, prompt) + key = self._build_cache_key(text, target_lang, source_lang, scene) try: self.redis_client.setex(key, self.expire_seconds, translation) except Exception as exc: @@ -255,6 +244,3 @@ class QwenMTTranslationBackend: @staticmethod def _is_pure_number(text: str) -> bool: return bool(re.fullmatch(r"[\d.\-+%/,: ]+", (text or "").strip())) - - -Translator = QwenMTTranslationBackend diff --git a/translation/client.py b/translation/client.py index ede2858..6896f5c 100644 --- a/translation/client.py +++ b/translation/client.py @@ -8,6 +8,7 @@ from typing import List, Optional, Sequence, Union import requests from config.services_config import get_translation_config +from translation.settings import normalize_translation_model, normalize_translation_scene logger = logging.getLogger(__name__) @@ -24,10 +25,10 @@ class TranslationServiceClient: timeout_sec: Optional[float] = None, ) -> None: cfg = get_translation_config() - self.base_url = (base_url or cfg.service_url).rstrip("/") - self.default_model = cfg.normalize_model_name(default_model or cfg.default_model) - self.default_scene = (default_scene or cfg.default_scene or "general").strip() or "general" - self.timeout_sec = float(timeout_sec or cfg.timeout_sec or 10.0) + self.base_url = str(base_url or cfg["service_url"]).rstrip("/") + self.default_model = normalize_translation_model(cfg, default_model or cfg["default_model"]) + self.default_scene = normalize_translation_scene(cfg, default_scene or cfg["default_scene"]) + self.timeout_sec = float(cfg["timeout_sec"] if timeout_sec is None else timeout_sec) @property def model(self) -> str: @@ -42,22 +43,18 @@ class TranslationServiceClient: text: Union[str, Sequence[str]], target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, - model: Optional[str] = None, scene: Optional[str] = None, + model: Optional[str] = None, ) -> Union[Optional[str], List[Optional[str]]]: if isinstance(text, tuple): text = list(text) payload = { "text": text, "target_lang": target_lang, - "source_lang": source_lang or "auto", + "source_lang": source_lang, "model": (model or self.default_model), - "scene": (scene or context or self.default_scene), + "scene": self.default_scene if scene is None else scene, } - if prompt: - payload["prompt"] = prompt try: response = requests.post( f"{self.base_url}/translate", @@ -84,3 +81,8 @@ class TranslationServiceClient: if isinstance(text, (list, tuple)): return [None for _ in text] return None + + +def create_translation_client() -> TranslationServiceClient: + """Create the business-side translation client.""" + return TranslationServiceClient() diff --git a/translation/languages.py b/translation/languages.py new file mode 100644 index 0000000..79ee64e --- /dev/null +++ b/translation/languages.py @@ -0,0 +1,67 @@ +"""Translation-internal language metadata.""" + +from __future__ import annotations + +from typing import Dict, Tuple + + +LANGUAGE_LABELS: Dict[str, str] = { + "zh": "Chinese", + "en": "English", + "ru": "Russian", + "ar": "Arabic", + "ja": "Japanese", + "es": "Spanish", + "de": "German", + "fr": "French", + "it": "Italian", + "pt": "Portuguese", +} + + +QWEN_LANGUAGE_CODES: Dict[str, str] = { + "zh": "Chinese", + "en": "English", + "ru": "Russian", + "ar": "Arabic", + "ja": "Japanese", + "es": "Spanish", + "de": "German", + "fr": "French", + "it": "Italian", + "pt": "Portuguese", +} + + +DEEPL_LANGUAGE_CODES: Dict[str, str] = { + "zh": "ZH", + "en": "EN", + "ru": "RU", + "ar": "AR", + "ja": "JA", + "es": "ES", + "de": "DE", + "fr": "FR", + "it": "IT", + "pt": "PT", +} + + +NLLB_LANGUAGE_CODES: Dict[str, str] = { + "en": "eng_Latn", + "zh": "zho_Hans", + "ru": "rus_Cyrl", + "ar": "arb_Arab", + "ja": "jpn_Jpan", + "es": "spa_Latn", + "de": "deu_Latn", + "fr": "fra_Latn", + "it": "ita_Latn", + "pt": "por_Latn", +} + + +MARIAN_LANGUAGE_DIRECTIONS: Dict[str, Tuple[str, str]] = { + "opus-mt-zh-en": ("zh", "en"), + "opus-mt-en-zh": ("en", "zh"), +} diff --git a/translation/prompts.py b/translation/prompts.py new file mode 100644 index 0000000..becd7d3 --- /dev/null +++ b/translation/prompts.py @@ -0,0 +1,45 @@ +"""Prompt templates for llm-based translation.""" + +from __future__ import annotations + +from typing import Dict + + +TRANSLATION_PROMPTS: Dict[str, Dict[str, str]] = { + "general": { + "zh": "你是一名专业的 {source_lang}({src_lang_code})到 {target_lang}({tgt_lang_code})翻译专家,请准确传达原文含义并符合{target_lang}语言习惯,只输出翻译结果:{text}", + "en": "You are a professional {source_lang} ({src_lang_code}) to {target_lang} ({tgt_lang_code}) translator. Accurately convey the meaning following {target_lang} grammar and usage, output only the translation: {text}", + "ru": "Вы профессиональный переводчик с {source_lang} ({src_lang_code}) на {target_lang} ({tgt_lang_code}). Точно передайте смысл текста, соблюдая нормы {target_lang}, выводите только перевод: {text}", + "ar": "أنت مترجم محترف من {source_lang} ({src_lang_code}) إلى {target_lang} ({tgt_lang_code}). انقل المعنى بدقة وفق قواعد {target_lang} وأخرج الترجمة فقط: {text}", + "ja": "あなたは {source_lang}({src_lang_code})から {target_lang}({tgt_lang_code})へのプロ翻訳者です。意味を正確に伝え、{target_lang}の表現に従い、翻訳のみ出力してください:{text}", + "es": "Eres un traductor profesional de {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Transmite con precisión el significado y devuelve solo la traducción: {text}", + "de": "Du bist ein professioneller Übersetzer von {source_lang} ({src_lang_code}) nach {target_lang} ({tgt_lang_code}). Gib die Bedeutung korrekt wieder und gib nur die Übersetzung aus: {text}", + "fr": "Vous êtes un traducteur professionnel de {source_lang} ({src_lang_code}) vers {target_lang} ({tgt_lang_code}). Transmettez fidèlement le sens et produisez uniquement la traduction : {text}", + "it": "Sei un traduttore professionista da {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Trasmetti accuratamente il significato e restituisci solo la traduzione: {text}", + "pt": "Você é um tradutor profissional de {source_lang} ({src_lang_code}) para {target_lang} ({tgt_lang_code}). Transmita o significado com precisão e produza apenas a tradução: {text}", + }, + "sku_name": { + "zh": "你是一名专业的 {source_lang}({src_lang_code})到 {target_lang}({tgt_lang_code})电商翻译专家,请将原文翻译为{target_lang}商品SKU名称,要求准确完整、简洁专业,只输出结果:{text}", + "en": "You are a professional {source_lang} ({src_lang_code}) to {target_lang} ({tgt_lang_code}) ecommerce translator. Translate into a concise and accurate {target_lang} product SKU name, output only the result: {text}", + "ru": "Вы переводчик e-commerce с {source_lang} ({src_lang_code}) на {target_lang} ({tgt_lang_code}). Переведите в краткое и точное название SKU товара на {target_lang}, выводите только результат: {text}", + "ar": "أنت مترجم تجارة إلكترونية من {source_lang} ({src_lang_code}) إلى {target_lang} ({tgt_lang_code}). ترجم إلى اسم SKU للمنتج بلغة {target_lang} بدقة واختصار، وأخرج النتيجة فقط: {text}", + "ja": "{source_lang}({src_lang_code})から {target_lang}({tgt_lang_code})へのEC翻訳者として、簡潔で正確な{target_lang}の商品SKU名に翻訳し、結果のみ出力してください:{text}", + "es": "Eres un traductor ecommerce de {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce a un nombre SKU de producto en {target_lang}, preciso y conciso, devuelve solo el resultado: {text}", + "de": "Du bist ein E-Commerce-Übersetzer von {source_lang} ({src_lang_code}) nach {target_lang} ({tgt_lang_code}). Übersetze in einen präzisen und kurzen {target_lang} Produkt-SKU-Namen, nur Ergebnis ausgeben: {text}", + "fr": "Vous êtes un traducteur e-commerce de {source_lang} ({src_lang_code}) vers {target_lang} ({tgt_lang_code}). Traduisez en un nom SKU produit {target_lang} précis et concis, sortie uniquement : {text}", + "it": "Sei un traduttore ecommerce da {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce in un nome SKU prodotto {target_lang} conciso e accurato, restituisci solo il risultato: {text}", + "pt": "Você é um tradutor de e-commerce de {source_lang} ({src_lang_code}) para {target_lang} ({tgt_lang_code}). Traduza para um nome SKU de produto {target_lang} conciso e preciso, produza apenas o resultado: {text}", + }, + "ecommerce_search_query": { + "zh": "你是一名专业的 {source_lang}({src_lang_code})到 {target_lang}({tgt_lang_code})翻译助手,请将电商搜索词准确翻译为{target_lang}并符合搜索习惯,只输出结果:{text}", + "en": "You are a professional {source_lang} ({src_lang_code}) to {target_lang} ({tgt_lang_code}) translator. Translate the ecommerce search query accurately following {target_lang} search habits, output only the result: {text}", + "ru": "Вы переводчик с {source_lang} ({src_lang_code}) на {target_lang} ({tgt_lang_code}). Переведите поисковый запрос e-commerce с учётом привычек поиска, выводите только результат: {text}", + "ar": "أنت مترجم من {source_lang} ({src_lang_code}) إلى {target_lang} ({tgt_lang_code}). ترجم عبارة البحث للتجارة الإلكترونية بما يناسب عادات البحث وأخرج النتيجة فقط: {text}", + "ja": "{source_lang}({src_lang_code})から {target_lang}({tgt_lang_code})への翻訳者として、EC検索キーワードを{target_lang}の検索習慣に合わせて翻訳し、結果のみ出力してください:{text}", + "es": "Eres un traductor de {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce la consulta de búsqueda ecommerce según los hábitos de búsqueda y devuelve solo el resultado: {text}", + "de": "Du bist ein Übersetzer von {source_lang} ({src_lang_code}) nach {target_lang} ({tgt_lang_code}). Übersetze die E-Commerce-Suchanfrage entsprechend den Suchgewohnheiten, nur Ergebnis ausgeben: {text}", + "fr": "Vous êtes un traducteur de {source_lang} ({src_lang_code}) vers {target_lang} ({tgt_lang_code}). Traduisez la requête de recherche e-commerce selon les habitudes de recherche, sortie uniquement : {text}", + "it": "Sei un traduttore da {source_lang} ({src_lang_code}) a {target_lang} ({tgt_lang_code}). Traduce la query di ricerca ecommerce secondo le abitudini di ricerca e restituisci solo il risultato: {text}", + "pt": "Você é um tradutor de {source_lang} ({src_lang_code}) para {target_lang} ({tgt_lang_code}). Traduza a consulta de busca de ecommerce conforme os hábitos de busca e produza apenas o resultado: {text}", + }, +} diff --git a/translation/protocols.py b/translation/protocols.py index 82bf6f9..db8926b 100644 --- a/translation/protocols.py +++ b/translation/protocols.py @@ -24,7 +24,6 @@ class TranslationBackendProtocol(Protocol): text: TranslateInput, target_lang: str, source_lang: Optional[str] = None, - context: Optional[str] = None, - prompt: Optional[str] = None, + scene: Optional[str] = None, ) -> TranslateOutput: ... diff --git a/translation/scenes.py b/translation/scenes.py new file mode 100644 index 0000000..4e56ebe --- /dev/null +++ b/translation/scenes.py @@ -0,0 +1,36 @@ +"""Canonical translation scenes and scene-specific metadata.""" + +from __future__ import annotations + +from typing import Dict + + +SCENE_DEEPL_CONTEXTS: Dict[str, Dict[str, str]] = { + "general": { + "zh": "", + "en": "", + }, + "sku_name": { + "zh": "商品SKU名称", + "en": "product SKU name", + }, + "ecommerce_search_query": { + "zh": "电商搜索词", + "en": "e-commerce search query", + }, +} + + +SUPPORTED_SCENES = frozenset(SCENE_DEEPL_CONTEXTS.keys()) + + +def normalize_scene_name(scene: str) -> str: + normalized = str(scene or "").strip() + if not normalized: + raise ValueError("translation scene cannot be empty") + if normalized not in SUPPORTED_SCENES: + raise ValueError( + f"Unsupported translation scene '{normalized}'. " + f"Supported scenes: {', '.join(sorted(SUPPORTED_SCENES))}" + ) + return normalized diff --git a/translation/service.py b/translation/service.py index 10ed49e..91ba0de 100644 --- a/translation/service.py +++ b/translation/service.py @@ -3,10 +3,18 @@ from __future__ import annotations import logging +import threading from typing import Dict, List, Optional -from config.services_config import TranslationServiceConfig, get_translation_config +from config.services_config import get_translation_config from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol +from translation.settings import ( + TranslationConfig, + get_enabled_translation_models, + get_translation_capability, + normalize_translation_model, + normalize_translation_scene, +) logger = logging.getLogger(__name__) @@ -14,72 +22,140 @@ logger = logging.getLogger(__name__) class TranslationService: """Owns translation backends and routes calls by model and scene.""" - def __init__(self, config: Optional[TranslationServiceConfig] = None) -> None: + def __init__(self, config: Optional[TranslationConfig] = None) -> None: self.config = config or get_translation_config() + self._enabled_capabilities = self._collect_enabled_capabilities() self._backends: Dict[str, TranslationBackendProtocol] = {} - self._init_enabled_backends() + self._backend_lock = threading.Lock() + if not self._enabled_capabilities: + raise ValueError("No enabled translation backends found in services.translation.capabilities") - def _init_enabled_backends(self) -> None: + def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: + enabled: Dict[str, Dict[str, object]] = {} + for name in get_enabled_translation_models(self.config): + capability = get_translation_capability(self.config, name, require_enabled=True) + backend_type = capability.get("backend") + if not backend_type: + raise ValueError(f"Translation capability '{name}' must define a backend") + enabled[name] = capability + return enabled + + def _create_backend( + self, + *, + name: str, + backend_type: str, + cfg: Dict[str, object], + ) -> TranslationBackendProtocol: registry = { - "qwen-mt": self._create_qwen_mt_backend, + "qwen_mt": self._create_qwen_mt_backend, "deepl": self._create_deepl_backend, "llm": self._create_llm_backend, + "local_nllb": self._create_local_nllb_backend, + "local_marian": self._create_local_marian_backend, } - for name in self.config.enabled_models: - factory = registry.get(name) - if factory is None: - logger.warning("Translation backend '%s' is enabled but not registered", name) - continue - self._backends[name] = factory() - - if not self._backends: - raise ValueError("No enabled translation backends found in services.translation.capabilities") + factory = registry.get(backend_type) + if factory is None: + raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") + return factory(name=name, cfg=cfg) - def _create_qwen_mt_backend(self) -> TranslationBackendProtocol: + def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: from translation.backends.qwen_mt import QwenMTTranslationBackend - cfg = self.config.get_capability_cfg("qwen-mt") return QwenMTTranslationBackend( - model=cfg.get("model") or "qwen-mt-flash", + capability_name=name, + model=str(cfg["model"]).strip(), + base_url=str(cfg["base_url"]).strip(), api_key=cfg.get("api_key"), - use_cache=bool(cfg.get("use_cache", True)), - timeout=int(cfg.get("timeout_sec", 10)), + use_cache=bool(cfg["use_cache"]), + timeout=int(cfg["timeout_sec"]), glossary_id=cfg.get("glossary_id"), - translation_context=cfg.get("translation_context"), ) - def _create_deepl_backend(self) -> TranslationBackendProtocol: + def _create_deepl_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: from translation.backends.deepl import DeepLTranslationBackend - cfg = self.config.get_capability_cfg("deepl") return DeepLTranslationBackend( api_key=cfg.get("api_key"), - timeout=float(cfg.get("timeout_sec", 10.0)), + api_url=str(cfg["api_url"]).strip(), + timeout=float(cfg["timeout_sec"]), glossary_id=cfg.get("glossary_id"), ) - def _create_llm_backend(self) -> TranslationBackendProtocol: + def _create_llm_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: from translation.backends.llm import LLMTranslationBackend - cfg = self.config.get_capability_cfg("llm") return LLMTranslationBackend( - model=cfg.get("model"), - timeout_sec=float(cfg.get("timeout_sec", 30.0)), - base_url=cfg.get("base_url"), + capability_name=name, + model=str(cfg["model"]).strip(), + timeout_sec=float(cfg["timeout_sec"]), + base_url=str(cfg["base_url"]).strip(), + ) + + def _create_local_nllb_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: + from translation.backends.local_seq2seq import NLLBTranslationBackend + + return NLLBTranslationBackend( + name=name, + model_id=str(cfg["model_id"]).strip(), + model_dir=str(cfg["model_dir"]).strip(), + device=str(cfg["device"]).strip(), + torch_dtype=str(cfg["torch_dtype"]).strip(), + batch_size=int(cfg["batch_size"]), + max_input_length=int(cfg["max_input_length"]), + max_new_tokens=int(cfg["max_new_tokens"]), + num_beams=int(cfg["num_beams"]), + ) + + def _create_local_marian_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: + from translation.backends.local_seq2seq import MarianMTTranslationBackend, get_marian_language_direction + + source_lang, target_lang = get_marian_language_direction(name) + + return MarianMTTranslationBackend( + name=name, + model_id=str(cfg["model_id"]).strip(), + model_dir=str(cfg["model_dir"]).strip(), + device=str(cfg["device"]).strip(), + torch_dtype=str(cfg["torch_dtype"]).strip(), + batch_size=int(cfg["batch_size"]), + max_input_length=int(cfg["max_input_length"]), + max_new_tokens=int(cfg["max_new_tokens"]), + num_beams=int(cfg["num_beams"]), + source_langs=[source_lang], + target_langs=[target_lang], ) @property def available_models(self) -> List[str]: + return list(self._enabled_capabilities.keys()) + + @property + def loaded_models(self) -> List[str]: return list(self._backends.keys()) def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: - normalized = self.config.normalize_model_name(model) - backend = self._backends.get(normalized) - if backend is None: + normalized = normalize_translation_model(self.config, model) + capability_cfg = self._enabled_capabilities.get(normalized) + if capability_cfg is None: raise ValueError( f"Translation model '{normalized}' is not enabled. " f"Available models: {', '.join(self.available_models) or 'none'}" ) + backend = self._backends.get(normalized) + if backend is not None: + return backend + with self._backend_lock: + backend = self._backends.get(normalized) + if backend is None: + backend_type = str(capability_cfg["backend"]) + logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type) + backend = self._create_backend( + name=normalized, + backend_type=backend_type, + cfg=capability_cfg, + ) + self._backends[normalized] = backend return backend def translate( @@ -90,14 +166,12 @@ class TranslationService: *, model: Optional[str] = None, scene: Optional[str] = None, - prompt: Optional[str] = None, ) -> TranslateOutput: backend = self.get_backend(model) - active_scene = (scene or self.config.default_scene or "general").strip() or "general" + active_scene = normalize_translation_scene(self.config, scene) return backend.translate( text=text, target_lang=target_lang, source_lang=source_lang, - context=active_scene, - prompt=prompt, + scene=active_scene, ) diff --git a/translation/settings.py b/translation/settings.py new file mode 100644 index 0000000..780a72e --- /dev/null +++ b/translation/settings.py @@ -0,0 +1,210 @@ +"""Translation config normalization and validation helpers.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Mapping, Optional + +from translation.scenes import normalize_scene_name + + +TranslationConfig = Dict[str, Any] + + +def build_translation_config(raw_cfg: Mapping[str, Any]) -> TranslationConfig: + if not isinstance(raw_cfg, Mapping): + raise ValueError("services.translation must be a mapping") + + config: TranslationConfig = { + "service_url": _require_http_url(raw_cfg.get("service_url"), "services.translation.service_url").rstrip("/"), + "timeout_sec": _require_positive_float(raw_cfg.get("timeout_sec"), "services.translation.timeout_sec"), + "default_model": _require_string(raw_cfg.get("default_model"), "services.translation.default_model").lower(), + "default_scene": normalize_scene_name( + _require_string(raw_cfg.get("default_scene"), "services.translation.default_scene") + ), + "cache": _build_cache_config(raw_cfg.get("cache")), + "capabilities": _build_capabilities(raw_cfg.get("capabilities")), + } + + default_model = config["default_model"] + capabilities = config["capabilities"] + if default_model not in capabilities: + raise ValueError( + f"services.translation.default_model '{default_model}' is not defined in services.translation.capabilities" + ) + if not capabilities[default_model]["enabled"]: + raise ValueError( + f"services.translation.default_model '{default_model}' must reference an enabled capability" + ) + if not get_enabled_translation_models(config): + raise ValueError("At least one translation capability must be enabled") + + return config + + +def normalize_translation_model(config: Mapping[str, Any], model: Optional[str]) -> str: + normalized = str(model or config.get("default_model") or "").strip().lower() + if not normalized: + raise ValueError("translation model cannot be empty") + return normalized + + +def normalize_translation_scene(config: Mapping[str, Any], scene: Optional[str]) -> str: + return normalize_scene_name(scene or config.get("default_scene")) + + +def get_enabled_translation_models(config: Mapping[str, Any]) -> List[str]: + capabilities = config.get("capabilities") + if not isinstance(capabilities, Mapping): + raise ValueError("translation config missing capabilities") + return [name for name, capability in capabilities.items() if isinstance(capability, Mapping) and capability.get("enabled") is True] + + +def get_translation_capability( + config: Mapping[str, Any], + model: Optional[str], + *, + require_enabled: bool = False, +) -> Dict[str, Any]: + normalized = normalize_translation_model(config, model) + capabilities = config.get("capabilities") + if not isinstance(capabilities, Mapping): + raise ValueError("translation config missing capabilities") + + capability = capabilities.get(normalized) + if not isinstance(capability, Mapping): + raise ValueError(f"Translation capability '{normalized}' is not defined") + if require_enabled and capability.get("enabled") is not True: + enabled = ", ".join(get_enabled_translation_models(config)) or "none" + raise ValueError(f"Translation model '{normalized}' is not enabled. Available models: {enabled}") + return dict(capability) + + +def get_translation_cache(config: Mapping[str, Any]) -> Dict[str, Any]: + cache = config.get("cache") + if not isinstance(cache, Mapping): + raise ValueError("translation config missing cache") + return dict(cache) + + +def _build_cache_config(raw_cache: Any) -> Dict[str, Any]: + if not isinstance(raw_cache, Mapping): + raise ValueError("services.translation.cache must be a mapping") + return { + "enabled": _require_bool(raw_cache.get("enabled"), "services.translation.cache.enabled"), + "key_prefix": _require_string(raw_cache.get("key_prefix"), "services.translation.cache.key_prefix"), + "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"), + "sliding_expiration": _require_bool( + raw_cache.get("sliding_expiration"), + "services.translation.cache.sliding_expiration", + ), + "key_include_scene": _require_bool( + raw_cache.get("key_include_scene"), + "services.translation.cache.key_include_scene", + ), + "key_include_source_lang": _require_bool( + raw_cache.get("key_include_source_lang"), + "services.translation.cache.key_include_source_lang", + ), + } + + +def _build_capabilities(raw_capabilities: Any) -> Dict[str, Dict[str, Any]]: + if not isinstance(raw_capabilities, Mapping): + raise ValueError("services.translation.capabilities must be a mapping") + + resolved: Dict[str, Dict[str, Any]] = {} + for name, raw_capability in raw_capabilities.items(): + if not isinstance(raw_capability, Mapping): + raise ValueError(f"services.translation.capabilities.{name} must be a mapping") + + capability_name = _require_string(name, "translation capability name").lower() + prefix = f"services.translation.capabilities.{capability_name}" + capability = dict(raw_capability) + capability["enabled"] = _require_bool(capability.get("enabled"), f"{prefix}.enabled") + capability["backend"] = _require_string(capability.get("backend"), f"{prefix}.backend").lower() + _validate_capability(capability_name, capability) + resolved[capability_name] = capability + + return resolved + + +def _validate_capability(name: str, capability: Mapping[str, Any]) -> None: + prefix = f"services.translation.capabilities.{name}" + backend = capability.get("backend") + + if backend == "qwen_mt": + _require_string(capability.get("model"), f"{prefix}.model") + _require_http_url(capability.get("base_url"), f"{prefix}.base_url") + _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec") + _require_bool(capability.get("use_cache"), f"{prefix}.use_cache") + return + + if backend == "llm": + _require_string(capability.get("model"), f"{prefix}.model") + _require_http_url(capability.get("base_url"), f"{prefix}.base_url") + _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec") + return + + if backend == "deepl": + _require_http_url(capability.get("api_url"), f"{prefix}.api_url") + _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec") + return + + if backend in {"local_nllb", "local_marian"}: + _require_string(capability.get("model_id"), f"{prefix}.model_id") + _require_string(capability.get("model_dir"), f"{prefix}.model_dir") + _require_string(capability.get("device"), f"{prefix}.device") + _require_string(capability.get("torch_dtype"), f"{prefix}.torch_dtype") + _require_positive_int(capability.get("batch_size"), f"{prefix}.batch_size") + _require_positive_int(capability.get("max_input_length"), f"{prefix}.max_input_length") + _require_positive_int(capability.get("max_new_tokens"), f"{prefix}.max_new_tokens") + _require_positive_int(capability.get("num_beams"), f"{prefix}.num_beams") + return + + raise ValueError(f"Unsupported translation backend '{backend}' for capability '{name}'") + + +def _require_string(value: Any, field_name: str) -> str: + text = str(value or "").strip() + if not text: + raise ValueError(f"{field_name} is required") + return text + + +def _require_float(value: Any, field_name: str) -> float: + if value in (None, ""): + raise ValueError(f"{field_name} is required") + return float(value) + + +def _require_positive_float(value: Any, field_name: str) -> float: + parsed = _require_float(value, field_name) + if parsed <= 0: + raise ValueError(f"{field_name} must be greater than 0") + return parsed + + +def _require_int(value: Any, field_name: str) -> int: + if value in (None, ""): + raise ValueError(f"{field_name} is required") + return int(value) + + +def _require_positive_int(value: Any, field_name: str) -> int: + parsed = _require_int(value, field_name) + if parsed <= 0: + raise ValueError(f"{field_name} must be greater than 0") + return parsed + + +def _require_bool(value: Any, field_name: str) -> bool: + if not isinstance(value, bool): + raise ValueError(f"{field_name} must be a boolean") + return value + + +def _require_http_url(value: Any, field_name: str) -> str: + text = _require_string(value, field_name) + if not (text.startswith("http://") or text.startswith("https://")): + raise ValueError(f"{field_name} must start with http:// or https://") + return text -- libgit2 0.21.2