Commit cd4ce66dc8c34567248091bc97356f0f00d32062

Authored by tangwang
1 parent c90f80ed

trans logs

api/translator_app.py
... ... @@ -2,8 +2,12 @@
2 2  
3 3 import argparse
4 4 import logging
  5 +import os
  6 +import pathlib
  7 +import time
5 8 from contextlib import asynccontextmanager
6 9 from functools import lru_cache
  10 +from logging.handlers import TimedRotatingFileHandler
7 11 from typing import List, Optional, Union
8 12  
9 13 import uvicorn
... ... @@ -20,12 +24,57 @@ from translation.settings import (
20 24 normalize_translation_scene,
21 25 )
22 26  
23   -# Configure logging
24   -logging.basicConfig(
25   - level=logging.INFO,
26   - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
27   -)
  27 +
  28 +def configure_translator_logging() -> None:
  29 + log_dir = pathlib.Path("logs")
  30 + verbose_dir = log_dir / "verbose"
  31 + log_dir.mkdir(exist_ok=True)
  32 + verbose_dir.mkdir(parents=True, exist_ok=True)
  33 +
  34 + log_level = os.getenv("LOG_LEVEL", "INFO").upper()
  35 + numeric_level = getattr(logging, log_level, logging.INFO)
  36 + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
  37 +
  38 + root_logger = logging.getLogger()
  39 + root_logger.setLevel(numeric_level)
  40 + root_logger.handlers.clear()
  41 +
  42 + console_handler = logging.StreamHandler()
  43 + console_handler.setLevel(numeric_level)
  44 + console_handler.setFormatter(formatter)
  45 + root_logger.addHandler(console_handler)
  46 +
  47 + file_handler = TimedRotatingFileHandler(
  48 + filename=log_dir / "translator_api.log",
  49 + when="midnight",
  50 + interval=1,
  51 + backupCount=30,
  52 + encoding="utf-8",
  53 + )
  54 + file_handler.setLevel(numeric_level)
  55 + file_handler.setFormatter(formatter)
  56 + root_logger.addHandler(file_handler)
  57 +
  58 + verbose_logger = logging.getLogger("translator.verbose")
  59 + verbose_logger.setLevel(numeric_level)
  60 + verbose_logger.handlers.clear()
  61 + verbose_logger.propagate = False
  62 +
  63 + verbose_handler = TimedRotatingFileHandler(
  64 + filename=verbose_dir / "translator_verbose.log",
  65 + when="midnight",
  66 + interval=1,
  67 + backupCount=30,
  68 + encoding="utf-8",
  69 + )
  70 + verbose_handler.setLevel(numeric_level)
  71 + verbose_handler.setFormatter(formatter)
  72 + verbose_logger.addHandler(verbose_handler)
  73 +
  74 +
  75 +configure_translator_logging()
28 76 logger = logging.getLogger(__name__)
  77 +verbose_logger = logging.getLogger("translator.verbose")
29 78  
30 79  
31 80 @lru_cache(maxsize=1)
... ... @@ -98,6 +147,37 @@ def _normalize_batch_result(
98 147 return [translated[idx] if idx < len(translated) else None for idx, _ in enumerate(original)]
99 148  
100 149  
  150 +def _text_preview(text: Optional[str], limit: int = 20) -> str:
  151 + normalized = str(text or "").replace("\n", "\\n")
  152 + return normalized[:limit]
  153 +
  154 +
  155 +def _request_metrics(text: Union[str, List[str]]) -> dict:
  156 + if isinstance(text, list):
  157 + lengths = [len(str(item or "")) for item in text]
  158 + return {
  159 + "request_count": len(text),
  160 + "lengths": lengths,
  161 + "first_preview": _text_preview(text[0] if text else ""),
  162 + }
  163 + return {
  164 + "request_count": 1,
  165 + "lengths": [len(str(text or ""))],
  166 + "first_preview": _text_preview(str(text or "")),
  167 + }
  168 +
  169 +
  170 +def _result_preview(translated: Union[str, List[Optional[str]], None]) -> str:
  171 + if isinstance(translated, list):
  172 + if not translated:
  173 + return ""
  174 + first = translated[0]
  175 + return _text_preview("" if first is None else str(first))
  176 + if translated is None:
  177 + return ""
  178 + return _text_preview(str(translated))
  179 +
  180 +
101 181 def _translate_batch(
102 182 service: TranslationService,
103 183 raw_text: List[str],
... ... @@ -108,6 +188,17 @@ def _translate_batch(
108 188 scene: str,
109 189 ) -> List[Optional[str]]:
110 190 backend = service.get_backend(model)
  191 + logger.info(
  192 + "Translation batch dispatch | model=%s scene=%s target_lang=%s source_lang=%s count=%s lengths=%s first_preview=%s supports_batch=%s",
  193 + model,
  194 + scene,
  195 + target_lang,
  196 + source_lang or "auto",
  197 + len(raw_text),
  198 + [len(str(item or "")) for item in raw_text],
  199 + _text_preview(raw_text[0] if raw_text else ""),
  200 + bool(getattr(backend, "supports_batch", False)),
  201 + )
111 202 if getattr(backend, "supports_batch", False):
112 203 try:
113 204 translated = service.translate(
... ... @@ -117,6 +208,13 @@ def _translate_batch(
117 208 model=model,
118 209 scene=scene,
119 210 )
  211 + verbose_logger.info(
  212 + "Translation batch result | model=%s scene=%s count=%s first_result=%s",
  213 + model,
  214 + scene,
  215 + len(raw_text),
  216 + _result_preview(translated),
  217 + )
120 218 return _normalize_batch_result(raw_text, translated)
121 219 except ValueError:
122 220 raise
... ... @@ -139,7 +237,17 @@ def _translate_batch(
139 237 except ValueError:
140 238 raise
141 239 except Exception as exc:
142   - logger.warning("Per-item translation failed: %s", exc, exc_info=True)
  240 + logger.warning(
  241 + "Per-item translation failed | model=%s scene=%s target_lang=%s source_lang=%s item_len=%s item_preview=%s error=%s",
  242 + model,
  243 + scene,
  244 + target_lang,
  245 + source_lang or "auto",
  246 + len(str(item or "")),
  247 + _text_preview(str(item or "")),
  248 + exc,
  249 + exc_info=True,
  250 + )
143 251 out = None
144 252 results.append(out)
145 253 return results
... ... @@ -147,19 +255,25 @@ def _translate_batch(
147 255  
148 256 @asynccontextmanager
149 257 async def lifespan(_: FastAPI):
150   - """Warm the default backend on process startup."""
  258 + """Initialize all enabled translation backends on process startup."""
151 259 logger.info("Starting Translation Service API")
152 260 service = get_translation_service()
153   - default_backend = service.get_backend(service.config["default_model"])
154 261 logger.info(
155   - "Translation service ready | default_model=%s available_models=%s loaded_models=%s",
  262 + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s",
156 263 service.config["default_model"],
  264 + service.config["default_scene"],
157 265 service.available_models,
158 266 service.loaded_models,
159 267 )
160 268 logger.info(
161   - "Default translation backend warmed up | model=%s",
162   - getattr(default_backend, "model", service.config["default_model"]),
  269 + "Translation backends initialized on startup | models=%s",
  270 + service.loaded_models,
  271 + )
  272 + verbose_logger.info(
  273 + "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
  274 + service.available_models,
  275 + service.config["cache"]["ttl_seconds"],
  276 + service.config["cache"]["sliding_expiration"],
163 277 )
164 278 yield
165 279  
... ... @@ -189,6 +303,12 @@ async def health_check():
189 303 """Health check endpoint."""
190 304 try:
191 305 service = get_translation_service()
  306 + logger.info(
  307 + "Health check | default_model=%s default_scene=%s loaded_models=%s",
  308 + service.config["default_model"],
  309 + service.config["default_scene"],
  310 + service.loaded_models,
  311 + )
192 312 return {
193 313 "status": "healthy",
194 314 "service": "translation",
... ... @@ -216,12 +336,33 @@ async def translate(request: TranslationRequest):
216 336 if not request.target_lang:
217 337 raise HTTPException(status_code=400, detail="target_lang is required")
218 338  
  339 + request_started = time.perf_counter()
219 340 try:
220 341 service = get_translation_service()
221 342 scene = _normalize_scene(service, request.scene)
222 343 model = _normalize_model(service, request.model)
223 344 translator = service.get_backend(model)
224 345 raw_text = request.text
  346 + metrics = _request_metrics(raw_text)
  347 + logger.info(
  348 + "Translation request | model=%s scene=%s target_lang=%s source_lang=%s count=%s lengths=%s first_preview=%s backend=%s",
  349 + model,
  350 + scene,
  351 + request.target_lang,
  352 + request.source_lang or "auto",
  353 + metrics["request_count"],
  354 + metrics["lengths"],
  355 + metrics["first_preview"],
  356 + getattr(translator, "model", model),
  357 + )
  358 + verbose_logger.info(
  359 + "Translation request detail | model=%s scene=%s target_lang=%s source_lang=%s payload=%s",
  360 + model,
  361 + scene,
  362 + request.target_lang,
  363 + request.source_lang or "auto",
  364 + raw_text,
  365 + )
225 366  
226 367 if isinstance(raw_text, list):
227 368 results = _translate_batch(
... ... @@ -232,6 +373,22 @@ async def translate(request: TranslationRequest):
232 373 model=model,
233 374 scene=scene,
234 375 )
  376 + latency_ms = (time.perf_counter() - request_started) * 1000
  377 + logger.info(
  378 + "Translation response | model=%s scene=%s count=%s first_result=%s latency_ms=%.2f",
  379 + model,
  380 + scene,
  381 + len(raw_text),
  382 + _result_preview(results),
  383 + latency_ms,
  384 + )
  385 + verbose_logger.info(
  386 + "Translation response detail | model=%s scene=%s translated=%s latency_ms=%.2f",
  387 + model,
  388 + scene,
  389 + results,
  390 + latency_ms,
  391 + )
235 392 return TranslationResponse(
236 393 text=raw_text,
237 394 target_lang=request.target_lang,
... ... @@ -253,6 +410,22 @@ async def translate(request: TranslationRequest):
253 410 if translated_text is None:
254 411 raise HTTPException(status_code=500, detail="Translation failed")
255 412  
  413 + latency_ms = (time.perf_counter() - request_started) * 1000
  414 + logger.info(
  415 + "Translation response | model=%s scene=%s count=1 first_result=%s latency_ms=%.2f",
  416 + model,
  417 + scene,
  418 + _result_preview(translated_text),
  419 + latency_ms,
  420 + )
  421 + verbose_logger.info(
  422 + "Translation response detail | model=%s scene=%s translated=%s latency_ms=%.2f",
  423 + model,
  424 + scene,
  425 + translated_text,
  426 + latency_ms,
  427 + )
  428 +
256 429 return TranslationResponse(
257 430 text=raw_text,
258 431 target_lang=request.target_lang,
... ... @@ -263,12 +436,22 @@ async def translate(request: TranslationRequest):
263 436 scene=scene,
264 437 )
265 438  
266   - except HTTPException:
  439 + except HTTPException as exc:
  440 + latency_ms = (time.perf_counter() - request_started) * 1000
  441 + logger.warning(
  442 + "Translation request failed | status_code=%s detail=%s latency_ms=%.2f",
  443 + exc.status_code,
  444 + exc.detail,
  445 + latency_ms,
  446 + )
267 447 raise
268 448 except ValueError as e:
  449 + latency_ms = (time.perf_counter() - request_started) * 1000
  450 + logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
269 451 raise HTTPException(status_code=400, detail=str(e)) from e
270 452 except Exception as e:
271   - logger.error(f"Translation error: {e}", exc_info=True)
  453 + latency_ms = (time.perf_counter() - request_started) * 1000
  454 + logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
272 455 raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
273 456  
274 457  
... ...
config/config.yaml
... ... @@ -106,12 +106,8 @@ services:
106 106 default_scene: "general"
107 107 timeout_sec: 10.0
108 108 cache:
109   - enabled: true
110   - key_prefix: "trans:v2"
111 109 ttl_seconds: 62208000
112 110 sliding_expiration: true
113   - key_include_scene: true
114   - key_include_source_lang: true
115 111 capabilities:
116 112 qwen-mt:
117 113 enabled: true
... ... @@ -126,12 +122,14 @@ services:
126 122 model: "qwen-flash"
127 123 base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
128 124 timeout_sec: 30.0
  125 + use_cache: true
129 126 deepl:
130   - enabled: false
  127 + enabled: true
131 128 backend: "deepl"
132 129 api_url: "https://api.deepl.com/v2/translate"
133 130 timeout_sec: 10.0
134 131 glossary_id: ""
  132 + use_cache: true
135 133 nllb-200-distilled-600m:
136 134 enabled: true
137 135 backend: "local_nllb"
... ... @@ -144,6 +142,7 @@ services:
144 142 max_new_tokens: 64
145 143 num_beams: 1
146 144 attn_implementation: "sdpa"
  145 + use_cache: true
147 146 opus-mt-zh-en:
148 147 enabled: true
149 148 backend: "local_marian"
... ... @@ -155,6 +154,7 @@ services:
155 154 max_input_length: 256
156 155 max_new_tokens: 256
157 156 num_beams: 1
  157 + use_cache: true
158 158 opus-mt-en-zh:
159 159 enabled: true
160 160 backend: "local_marian"
... ... @@ -166,6 +166,7 @@ services:
166 166 max_input_length: 256
167 167 max_new_tokens: 256
168 168 num_beams: 1
  169 + use_cache: true
169 170 embedding:
170 171 provider: "http" # http
171 172 base_url: "http://127.0.0.1:6005"
... ...
config/env_config.py
... ... @@ -42,8 +42,6 @@ REDIS_CONFIG = {
42 42 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)),
43 43 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true',
44 44 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months
45   - 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)),
46   - 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'),
47 45 # Embedding 缓存 key 前缀,例如 "embedding"
48 46 'embedding_cache_prefix': os.getenv('REDIS_EMBEDDING_CACHE_PREFIX', 'embedding'),
49 47 }
... ...
docs/工作总结-微服务性能优化与架构.md
... ... @@ -88,7 +88,7 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot;
88 88 - **配置入口**:`config/config.yaml` → `services.translation`,显式声明 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend`、`base_url/api_url`、timeout 与本地模型运行参数。
89 89 - **内部规则收口**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一放在 `translation/` 内部,不再散落在 `config/`、`query/` 等位置。
90 90 - **调用位置**:QueryParser 与 Indexer 均通过 `translation.create_translation_client()` 获取客户端,不写死 URL 或模型名。
91   -- **缓存**:`services.translation.cache` 支持 `key_prefix: "trans:v2"`、`ttl_seconds`、`sliding_expiration` 等,翻译结果写 Redis,减轻重复请求对限速的影响
  91 +- **缓存**:translator service 对所有 translation capability 统一接入 Redis 缓存;每个 capability 通过 `use_cache` 控制开关,key 格式固定为 `trans:{model}:{target_lang}:{source_text[:4]}{sha256}`
92 92 - **场景支撑**:在线索引(indexer)与 query 请求(QueryParser)共用同一套 provider 配置;可按环境或租户通过修改 `config.yaml` 或环境变量切换 provider/model。
93 93 - **待配合**:**金伟侧对索引侧翻译调用做流量控制**(限流/排队/批量聚合),避免索引高峰打满 qwen 限速,影响在线 query 翻译。
94 94  
... ...
docs/缓存与Redis使用说明.md
... ... @@ -12,7 +12,6 @@
12 12 - **Password**:`REDIS_PASSWORD`
13 13 - **Socket & 超时**:`REDIS_SOCKET_TIMEOUT` / `REDIS_SOCKET_CONNECT_TIMEOUT` / `REDIS_RETRY_ON_TIMEOUT`
14 14 - **通用缓存 TTL**:`REDIS_CACHE_EXPIRE_DAYS`(默认 `360*2` 天,代码注释为 “6 months”)
15   -- **翻译缓存 TTL & 前缀**:`REDIS_TRANSLATION_CACHE_EXPIRE_DAYS`、`REDIS_TRANSLATION_CACHE_PREFIX`
16 15  
17 16 ---
18 17  
... ... @@ -21,7 +20,7 @@
21 20 | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 |
22 21 |------------|----------|----------------|----------|------|
23 22 | 向量缓存(text/image embedding) | `{EMBEDDING_CACHE_PREFIX}:{query_or_url}` / `{EMBEDDING_CACHE_PREFIX}:image:{url_or_path}` | **BF16 bytes**(每维 2 字节大端存储),读取后恢复为 `np.float32` | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`(文本)与 `embeddings/image_encoder.py`(图片);前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 |
24   -| 翻译结果缓存(Qwen-MT 翻译) | `{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/backends/qwen_mt.py` + `config/config.yaml` |
  23 +| 翻译结果缓存(translator service) | `trans:{model}:{target_lang}:{source_text[:4]}{sha256(source_text)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/service.py` + `config/config.yaml` |
25 24 | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` |
26 25  
27 26 下面按模块详细说明。
... ... @@ -71,34 +70,29 @@
71 70  
72 71 ---
73 72  
74   -## 3. 翻译结果缓存(translation/backends/qwen_mt.py)
  73 +## 3. 翻译结果缓存(translation/service.py)
75 74  
76   -- **代码位置**:`translation/backends/qwen_mt.py` 中 `QwenMTTranslationBackend`
77   -- **用途**:缓存 Qwen-MT 翻译(及 translator service 复用的翻译)结果,减少云端请求,遵守限速。
78   -- **配置入口**:`config/config.yaml -> services.translation.cache`,统一由 `config/services_config.get_translation_cache_config()` 解析。
  75 +- **代码位置**:`translation/service.py`
  76 +- **用途**:统一缓存所有 translation capability 的翻译结果。
  77 +- **配置入口**:
  78 + - `config/config.yaml -> services.translation.cache`
  79 + - `config/config.yaml -> services.translation.capabilities.*.use_cache`
79 80  
80 81 ### 3.1 Key 设计
81 82  
82   -- 内部构造函数:`_build_cache_key(...)`
  83 +- 内部构造函数:`TranslationCache.build_key(...)`
83 84 - 模板:
84 85  
85 86 ```text
86   -{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}
  87 +trans:{model}:{target_lang}:{source_text[:4]}{sha256(source_text)}
87 88 ```
88 89  
89 90 其中:
90 91  
91   -- `cache_prefix`:来自 `services.translation.cache.key_prefix`,默认 `trans:v2`;
92   -- `model`:如 `"qwen-mt"`;
93   -- `src`:源语言(如 `zh` / `en` / `auto`),是否包含在 key 中由 `key_include_source_lang` 控制;
94   -- `tgt`:目标语言,如 `en` / `zh`;
95   -- `sha256(payload)`:对以下内容整体做 SHA-256:
96   - - `model`
97   - - `src` / `tgt`
98   - - `scene`(受 `key_include_scene` 控制)
99   - - 原始 `text`
100   -
101   -> 注意:所有 key 设计集中在 `_build_cache_key`,**不要在其他位置手动拼翻译缓存 key**。
  92 +- `model`:capability 名称,如 `qwen-mt`、`llm`、`opus-mt-zh-en`
  93 +- `target_lang`:目标语言,如 `en` / `zh`
  94 +- `source_text[:4]`:原文前 4 个字符
  95 +- `sha256(source_text)`:对完整原文做 SHA-256
102 96  
103 97 ### 3.2 Value 与类型
104 98  
... ... @@ -115,20 +109,25 @@
115 109 services:
116 110 translation:
117 111 cache:
118   - enabled: true
119   - key_prefix: "trans:v2"
120 112 ttl_seconds: 62208000 # 默认约 720 天
121 113 sliding_expiration: true
122   - key_include_scene: true
123   - key_include_source_lang: true
  114 + capabilities:
  115 + qwen-mt:
  116 + use_cache: true
  117 + llm:
  118 + use_cache: true
  119 + deepl:
  120 + use_cache: true
  121 + nllb-200-distilled-600m:
  122 + use_cache: true
  123 + opus-mt-zh-en:
  124 + use_cache: true
  125 + opus-mt-en-zh:
  126 + use_cache: true
124 127 ```
125 128  
126 129 - 运行时行为:
127   - - 创建 `Translator` 时,从 `cache_cfg` 读取:
128   - - `self.cache_prefix`
129   - - `self.expire_seconds`
130   - - `self.cache_sliding_expiration`
131   - - `self.cache_include_*` 一系列布尔开关;
  130 + - translator service 启动时初始化共享 Redis cache;
132 131 - **读缓存**:
133 132 - 命中后,若 `sliding_expiration=True`,会调用 `redis.expire(key, expire_seconds)`;
134 133 - **写缓存**:
... ... @@ -136,8 +135,8 @@ services:
136 135  
137 136 ### 3.4 关联模块
138 137  
139   -- `api/translator_app.py` 会通过 `translation.backends.qwen_mt.QwenMTTranslationBackend` 复用同一套缓存逻辑;
140   -- 文档说明:`docs/翻译模块说明.md` 中提到“推荐通过 Redis 翻译缓存复用结果”。
  138 +- `api/translator_app.py` 通过 `TranslationService` 统一复用同一套缓存逻辑;
  139 +- 所有翻译后端都通过 `TranslationService` 接入缓存。
141 140  
142 141 ---
143 142  
... ...
scripts/redis/redis_cache_health_check.py
... ... @@ -43,7 +43,6 @@ PROJECT_ROOT = Path(__file__).parent.parent.parent
43 43 sys.path.insert(0, str(PROJECT_ROOT))
44 44  
45 45 from config.env_config import REDIS_CONFIG # type: ignore
46   -from config.services_config import get_translation_cache_config # type: ignore
47 46 from embeddings.bf16 import decode_embedding_from_redis # type: ignore
48 47  
49 48  
... ... @@ -66,13 +65,11 @@ def _load_known_cache_types() -&gt; Dict[str, CacheTypeConfig]:
66 65 description="文本向量缓存(embeddings/text_encoder.py)",
67 66 )
68 67  
69   - # translation 缓存:prefix 来自 services.translation.cache.key_prefix
70   - cache_cfg = get_translation_cache_config()
71   - trans_prefix = cache_cfg.get("key_prefix", "trans:v2")
  68 + # translation 缓存:统一前缀 trans
72 69 cache_types["translation"] = CacheTypeConfig(
73 70 name="translation",
74   - pattern=f"{trans_prefix}:*",
75   - description="翻译结果缓存(query/qwen_mt_translate.Translator)",
  71 + pattern="trans:*",
  72 + description="翻译结果缓存(translation/service.py)",
76 73 )
77 74  
78 75 # anchors 缓存:prefix 来自 REDIS_CONFIG['anchor_cache_prefix'](若存在),否则 product_anchors
... ... @@ -400,4 +397,3 @@ def main() -&gt; None:
400 397  
401 398 if __name__ == "__main__":
402 399 main()
403   -
... ...
tests/ci/test_service_api_contracts.py
... ... @@ -625,12 +625,8 @@ def translator_client(monkeypatch):
625 625 }
626 626 },
627 627 "cache": {
628   - "enabled": True,
629   - "key_prefix": "trans:v2",
630 628 "ttl_seconds": 60,
631 629 "sliding_expiration": True,
632   - "key_include_scene": True,
633   - "key_include_source_lang": True,
634 630 },
635 631 }
636 632 self.available_models = ["qwen-mt"]
... ... @@ -681,12 +677,8 @@ def test_translator_api_failure_returns_500(monkeypatch):
681 677 }
682 678 },
683 679 "cache": {
684   - "enabled": True,
685   - "key_prefix": "trans:v2",
686 680 "ttl_seconds": 60,
687 681 "sliding_expiration": True,
688   - "key_include_scene": True,
689   - "key_include_source_lang": True,
690 682 },
691 683 }
692 684 self.available_models = ["qwen-mt"]
... ...
tests/test_translation_local_backends.py
... ... @@ -96,7 +96,7 @@ def test_nllb_uses_src_lang_and_forced_bos(monkeypatch):
96 96 assert backend.seq2seq_model.last_generate_kwargs["forced_bos_token_id"] == 202
97 97  
98 98  
99   -def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
  99 +def test_translation_service_preloads_enabled_backends(monkeypatch):
100 100 created = []
101 101  
102 102 def _fake_create_backend(self, *, name, backend_type, cfg):
... ... @@ -126,6 +126,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
126 126 "opus-mt-en-zh": {
127 127 "enabled": True,
128 128 "backend": "local_marian",
  129 + "use_cache": True,
129 130 "model_id": "dummy",
130 131 "model_dir": "dummy",
131 132 "device": "cpu",
... ... @@ -138,6 +139,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
138 139 "nllb-200-distilled-600m": {
139 140 "enabled": True,
140 141 "backend": "local_nllb",
  142 + "use_cache": True,
141 143 "model_id": "dummy",
142 144 "model_dir": "dummy",
143 145 "device": "cpu",
... ... @@ -149,22 +151,19 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
149 151 },
150 152 },
151 153 "cache": {
152   - "enabled": True,
153   - "key_prefix": "trans:v2",
154 154 "ttl_seconds": 60,
155 155 "sliding_expiration": True,
156   - "key_include_scene": True,
157   - "key_include_source_lang": True,
158 156 },
159 157 }
160 158  
161 159 service = TranslationService(config)
162 160  
163 161 assert service.available_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"]
164   - assert service.loaded_models == []
  162 + assert service.loaded_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"]
  163 + assert created == [
  164 + ("opus-mt-en-zh", "local_marian"),
  165 + ("nllb-200-distilled-600m", "local_nllb"),
  166 + ]
165 167  
166 168 backend = service.get_backend("opus-mt-en-zh")
167   -
168 169 assert backend.model == "opus-mt-en-zh"
169   - assert created == [("opus-mt-en-zh", "local_marian")]
170   - assert service.loaded_models == ["opus-mt-en-zh"]
... ...
tests/test_translator_failure_semantics.py
1   -from translation.backends.qwen_mt import QwenMTTranslationBackend
  1 +from translation.cache import TranslationCache
  2 +from translation.service import TranslationService
2 3  
3 4  
4   -class _RecordingRedis:
  5 +class _FakeCache:
5 6 def __init__(self):
6   - self.setex_calls = []
7   -
8   - def setex(self, key, ttl, value):
9   - self.setex_calls.append((key, ttl, value))
10   -
11   -
12   -def test_translate_failure_returns_none_and_skips_cache(monkeypatch):
13   - translator = QwenMTTranslationBackend(
14   - capability_name="qwen-mt",
15   - model="qwen-mt-flash",
16   - base_url="https://dashscope-us.aliyuncs.com/compatible-mode/v1",
17   - api_key="dummy-key",
18   - use_cache=False,
19   - )
20   - fake_redis = _RecordingRedis()
21   - translator.use_cache = True
22   - translator.redis_client = fake_redis
23   - translator.cache_prefix = "trans"
24   - translator.expire_seconds = 60
25   -
26   - monkeypatch.setattr(translator, "_translate_qwen", lambda *args, **kwargs: None)
27   -
28   - result = translator.translate(
29   - text="商品标题",
30   - target_lang="en",
31   - source_lang="zh",
32   - scene="sku_name",
33   - )
34   -
35   - assert result is None
36   - assert fake_redis.setex_calls == []
  7 + self.available = True
  8 + self.storage = {}
  9 + self.get_calls = []
  10 + self.set_calls = []
  11 +
  12 + def get(self, *, model, target_lang, source_text):
  13 + self.get_calls.append((model, target_lang, source_text))
  14 + return self.storage.get((model, target_lang, source_text))
  15 +
  16 + def set(self, *, model, target_lang, source_text, translated_text):
  17 + self.set_calls.append((model, target_lang, source_text, translated_text))
  18 + self.storage[(model, target_lang, source_text)] = translated_text
  19 +
  20 +
  21 +def test_translation_cache_key_format(monkeypatch):
  22 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  23 + cache = TranslationCache({"ttl_seconds": 60, "sliding_expiration": True})
  24 + key = cache.build_key(model="llm", target_lang="en", source_text="商品标题")
  25 + assert key.startswith("trans:llm:en:商品标题")
  26 + assert len(key) == len("trans:llm:en:商品标题") + 64
  27 +
  28 +
  29 +def test_service_caches_all_capabilities(monkeypatch):
  30 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  31 + created = {}
  32 +
  33 + def _fake_create_backend(self, *, name, backend_type, cfg):
  34 + del self, backend_type, cfg
  35 +
  36 + class _Backend:
  37 + model = name
  38 +
  39 + @property
  40 + def supports_batch(self):
  41 + return True
  42 +
  43 + def translate(self, text, target_lang, source_lang=None, scene=None):
  44 + del target_lang, source_lang, scene
  45 + if isinstance(text, list):
  46 + return [f"{name}:{item}" for item in text]
  47 + return f"{name}:{text}"
  48 +
  49 + backend = _Backend()
  50 + created[name] = backend
  51 + return backend
  52 +
  53 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  54 + config = {
  55 + "service_url": "http://127.0.0.1:6006",
  56 + "timeout_sec": 10.0,
  57 + "default_model": "llm",
  58 + "default_scene": "general",
  59 + "capabilities": {
  60 + "llm": {
  61 + "enabled": True,
  62 + "backend": "llm",
  63 + "model": "dummy-llm",
  64 + "base_url": "https://example.com",
  65 + "timeout_sec": 10.0,
  66 + "use_cache": True,
  67 + },
  68 + "opus-mt-zh-en": {
  69 + "enabled": True,
  70 + "backend": "local_marian",
  71 + "model_id": "dummy",
  72 + "model_dir": "dummy",
  73 + "device": "cpu",
  74 + "torch_dtype": "float32",
  75 + "batch_size": 8,
  76 + "max_input_length": 16,
  77 + "max_new_tokens": 16,
  78 + "num_beams": 1,
  79 + "use_cache": True,
  80 + },
  81 + },
  82 + "cache": {
  83 + "ttl_seconds": 60,
  84 + "sliding_expiration": True,
  85 + },
  86 + }
  87 +
  88 + service = TranslationService(config)
  89 + fake_cache = _FakeCache()
  90 + service._translation_cache = fake_cache
  91 +
  92 + first = service.translate("商品标题", target_lang="en", source_lang="zh", model="llm")
  93 + second = service.translate("商品标题", target_lang="en", source_lang="zh", model="llm")
  94 + batch = service.translate(["连衣裙", "衬衫"], target_lang="en", source_lang="zh", model="opus-mt-zh-en")
  95 +
  96 + assert first == "llm:商品标题"
  97 + assert second == "llm:商品标题"
  98 + assert batch == ["opus-mt-zh-en:连衣裙", "opus-mt-zh-en:衬衫"]
  99 + assert fake_cache.get_calls == [
  100 + ("llm", "en", "商品标题"),
  101 + ("llm", "en", "商品标题"),
  102 + ("opus-mt-zh-en", "en", "连衣裙"),
  103 + ("opus-mt-zh-en", "en", "衬衫"),
  104 + ]
  105 + assert fake_cache.set_calls == [
  106 + ("llm", "en", "商品标题", "llm:商品标题"),
  107 + ("opus-mt-zh-en", "en", "连衣裙", "opus-mt-zh-en:连衣裙"),
  108 + ("opus-mt-zh-en", "en", "衬衫", "opus-mt-zh-en:衬衫"),
  109 + ]
... ...
translation/README.md
... ... @@ -75,12 +75,8 @@ services:
75 75 default_scene: "general"
76 76 timeout_sec: 10.0
77 77 cache:
78   - enabled: true
79   - key_prefix: "trans:v2"
80 78 ttl_seconds: 62208000
81 79 sliding_expiration: true
82   - key_include_scene: true
83   - key_include_source_lang: true
84 80 capabilities:
85 81 qwen-mt:
86 82 enabled: true
... ... @@ -95,11 +91,13 @@ services:
95 91 model: "qwen-flash"
96 92 base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
97 93 timeout_sec: 30.0
  94 + use_cache: true
98 95 deepl:
99 96 enabled: false
100 97 backend: "deepl"
101 98 api_url: "https://api.deepl.com/v2/translate"
102 99 timeout_sec: 10.0
  100 + use_cache: true
103 101 nllb-200-distilled-600m:
104 102 enabled: true
105 103 backend: "local_nllb"
... ... @@ -112,6 +110,7 @@ services:
112 110 max_new_tokens: 64
113 111 num_beams: 1
114 112 attn_implementation: "sdpa"
  113 + use_cache: true
115 114 opus-mt-zh-en:
116 115 enabled: true
117 116 backend: "local_marian"
... ... @@ -123,6 +122,7 @@ services:
123 122 max_input_length: 256
124 123 max_new_tokens: 256
125 124 num_beams: 1
  125 + use_cache: true
126 126 opus-mt-en-zh:
127 127 enabled: true
128 128 backend: "local_marian"
... ... @@ -134,6 +134,7 @@ services:
134 134 max_input_length: 256
135 135 max_new_tokens: 256
136 136 num_beams: 1
  137 + use_cache: true
137 138 ```
138 139  
139 140 配置边界:
... ... @@ -247,16 +248,20 @@ TRANSLATION_PORT=6006
247 248  
248 249 ```json
249 250 {
250   - "status": "healthy",
  251 + "status": "healthy",
251 252 "service": "translation",
252 253 "default_model": "llm",
253 254 "default_scene": "general",
254 255 "available_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"],
255 256 "enabled_capabilities": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"],
256   - "loaded_models": ["llm"]
  257 + "loaded_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"]
257 258 }
258 259 ```
259 260  
  261 +说明:
  262 +- translator service 进程启动时会一次性初始化全部已启用 capability
  263 +- 因此本地模型加载失败、依赖缺失、配置错误会在启动阶段直接暴露,而不是拖到首个在线请求
  264 +
260 265 ## 7. 代码调用方式
261 266  
262 267 业务侧统一这样调用:
... ... @@ -317,6 +322,7 @@ results = translator.translate(
317 322 - 通用大模型翻译
318 323 - 根据 `scene` 生成内部 prompt
319 324 - 更灵活,但成本和稳定性取决于上游模型
  325 +- 支持 Redis 翻译缓存
320 326  
321 327 ### 8.3 DeepL
322 328  
... ... @@ -327,6 +333,7 @@ results = translator.translate(
327 333 - 商业翻译 API
328 334 - scene 会映射到内部上下文
329 335 - 当前默认关闭
  336 +- 支持 Redis 翻译缓存
330 337  
331 338 ### 8.4 `facebook/nllb-200-distilled-600M`
332 339  
... ... @@ -338,6 +345,7 @@ results = translator.translate(
338 345 - 简介:多语种翻译:覆盖约 200 种语言。作为NLLB-200系列的蒸馏版本,该模型通过知识蒸馏技术将原130亿参数模型压缩至600M,同时保持了80%以上的翻译质量。
339 346 - 本地目录:`models/translation/facebook/nllb-200-distilled-600M`
340 347 - 当前磁盘占用:约 `2.4G`
  348 +- 支持 Redis 翻译缓存
341 349 - 模型类型:多语种 Seq2Seq 机器翻译模型
342 350 - 来源:Meta NLLB(No Language Left Behind)系列的 600M 蒸馏版
343 351 - 结构特点:
... ... @@ -424,6 +432,7 @@ results = translator.translate(
424 432 - encoder-decoder Seq2Seq
425 433 - 聚焦特定语言对
426 434 - 模型更小、加载更轻、吞吐更高
  435 +- 支持 Redis 翻译缓存
427 436  
428 437 ### 8.6 `opus-mt-en-zh`
429 438  
... ... @@ -441,6 +450,13 @@ results = translator.translate(
441 450 - encoder-decoder Seq2Seq
442 451 - 双语定向模型
443 452 - 更适合中英双向拆分部署
  453 +- 支持 Redis 翻译缓存
  454 +
  455 +### 8.7 翻译缓存
  456 +
  457 +- 所有 translation capability 都使用统一的 Redis 缓存层
  458 +- 每个 capability 通过各自的 `use_cache` 控制是否启用缓存
  459 +- 缓存 key 格式固定为 `trans:{model}:{target_lang}:{source_text[:4]}{sha256}`
444 460  
445 461 ## 9. 本地模型安装与部署
446 462  
... ...
translation/backends/qwen_mt.py
1   -"""Qwen-MT translation backend with cache support."""
  1 +"""Qwen-MT translation backend."""
2 2  
3 3 from __future__ import annotations
4 4  
5   -import hashlib
6 5 import logging
7 6 import os
8 7 import re
9 8 import time
10 9 from typing import List, Optional, Sequence, Union
11 10  
12   -import redis
13 11 from openai import OpenAI
14 12  
15   -from config.env_config import DASHSCOPE_API_KEY, REDIS_CONFIG
16   -from config.services_config import get_translation_cache_config
  13 +from config.env_config import DASHSCOPE_API_KEY
17 14 from translation.languages import QWEN_LANGUAGE_CODES
18 15  
19 16 logger = logging.getLogger(__name__)
... ... @@ -26,7 +23,6 @@ class QwenMTTranslationBackend:
26 23 model: str,
27 24 base_url: str,
28 25 api_key: Optional[str] = None,
29   - use_cache: bool = True,
30 26 timeout: int = 10,
31 27 glossary_id: Optional[str] = None,
32 28 ):
... ... @@ -35,16 +31,8 @@ class QwenMTTranslationBackend:
35 31 self.qwen_model_name = self._normalize_model_name(model)
36 32 self.base_url = base_url
37 33 self.timeout = int(timeout)
38   - self.use_cache = bool(use_cache)
39 34 self.glossary_id = glossary_id
40 35  
41   - cache_cfg = get_translation_cache_config()
42   - self.cache_prefix = str(cache_cfg["key_prefix"])
43   - self.expire_seconds = int(cache_cfg["ttl_seconds"])
44   - self.cache_sliding_expiration = bool(cache_cfg["sliding_expiration"])
45   - self.cache_include_scene = bool(cache_cfg["key_include_scene"])
46   - self.cache_include_source_lang = bool(cache_cfg["key_include_source_lang"])
47   -
48 36 self._api_key = api_key or self._default_api_key(self.model)
49 37 self._qwen_client: Optional[OpenAI] = None
50 38 if self._api_key:
... ... @@ -55,10 +43,6 @@ class QwenMTTranslationBackend:
55 43 else:
56 44 logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable")
57 45  
58   - self.redis_client = None
59   - if self.use_cache and bool(cache_cfg["enabled"]):
60   - self.redis_client = self._init_redis_client()
61   -
62 46 @property
63 47 def supports_batch(self) -> bool:
64 48 return True
... ... @@ -82,38 +66,6 @@ class QwenMTTranslationBackend:
82 66 del model
83 67 return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
84 68  
85   - def _init_redis_client(self):
86   - try:
87   - client = redis.Redis(
88   - host=REDIS_CONFIG.get("host", "localhost"),
89   - port=REDIS_CONFIG.get("port", 6479),
90   - password=REDIS_CONFIG.get("password"),
91   - decode_responses=True,
92   - socket_timeout=REDIS_CONFIG.get("socket_timeout", 1),
93   - socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1),
94   - retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False),
95   - health_check_interval=10,
96   - )
97   - client.ping()
98   - return client
99   - except Exception as exc:
100   - logger.warning("Failed to initialize translation redis cache: %s", exc)
101   - return None
102   -
103   - def _build_cache_key(
104   - self,
105   - text: str,
106   - target_lang: str,
107   - source_lang: Optional[str],
108   - scene: Optional[str],
109   - ) -> str:
110   - src = (source_lang or "auto").strip().lower() if self.cache_include_source_lang else "-"
111   - tgt = (target_lang or "").strip().lower()
112   - scn = (scene or "").strip() if self.cache_include_scene else ""
113   - payload = f"model={self.model}\nsrc={src}\ntgt={tgt}\nscene={scn}\ntext={text}"
114   - digest = hashlib.sha256(payload.encode("utf-8")).hexdigest()
115   - return f"{self.cache_prefix}:{self.model}:{src}:{tgt}:{digest}"
116   -
117 69 def translate(
118 70 self,
119 71 text: Union[str, Sequence[str]],
... ... @@ -146,14 +98,7 @@ class QwenMTTranslationBackend:
146 98 if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)):
147 99 return text
148 100  
149   - cached = self._get_cached_translation_redis(text, tgt, src, scene)
150   - if cached is not None:
151   - return cached
152   -
153 101 result = self._translate_qwen(text, tgt, src)
154   -
155   - if result is not None:
156   - self._set_cached_translation_redis(text, tgt, result, src, scene)
157 102 return result
158 103  
159 104 def _translate_qwen(
... ... @@ -197,41 +142,6 @@ class QwenMTTranslationBackend:
197 142 )
198 143 return None
199 144  
200   - def _get_cached_translation_redis(
201   - self,
202   - text: str,
203   - target_lang: str,
204   - source_lang: Optional[str] = None,
205   - scene: Optional[str] = None,
206   - ) -> Optional[str]:
207   - if not self.redis_client:
208   - return None
209   - key = self._build_cache_key(text, target_lang, source_lang, scene)
210   - try:
211   - value = self.redis_client.get(key)
212   - if value and self.cache_sliding_expiration:
213   - self.redis_client.expire(key, self.expire_seconds)
214   - return value
215   - except Exception as exc:
216   - logger.warning("Redis get translation cache failed: %s", exc)
217   - return None
218   -
219   - def _set_cached_translation_redis(
220   - self,
221   - text: str,
222   - target_lang: str,
223   - translation: str,
224   - source_lang: Optional[str] = None,
225   - scene: Optional[str] = None,
226   - ) -> None:
227   - if not self.redis_client:
228   - return
229   - key = self._build_cache_key(text, target_lang, source_lang, scene)
230   - try:
231   - self.redis_client.setex(key, self.expire_seconds, translation)
232   - except Exception as exc:
233   - logger.warning("Redis set translation cache failed: %s", exc)
234   -
235 145 @staticmethod
236 146 def _contains_chinese(text: str) -> bool:
237 147 return bool(re.search(r"[\u4e00-\u9fff]", text or ""))
... ...
translation/cache.py 0 → 100644
... ... @@ -0,0 +1,92 @@
  1 +"""Shared translation cache utilities."""
  2 +
  3 +from __future__ import annotations
  4 +
  5 +import hashlib
  6 +import logging
  7 +from typing import Mapping, Optional
  8 +
  9 +import redis
  10 +
  11 +from config.env_config import REDIS_CONFIG
  12 +
  13 +logger = logging.getLogger(__name__)
  14 +
  15 +
  16 +class TranslationCache:
  17 + """Redis-backed cache shared by all translation capabilities."""
  18 +
  19 + def __init__(self, config: Mapping[str, object]) -> None:
  20 + self.ttl_seconds = int(config["ttl_seconds"])
  21 + self.sliding_expiration = bool(config["sliding_expiration"])
  22 + self.redis_client = self._init_redis_client()
  23 +
  24 + @property
  25 + def available(self) -> bool:
  26 + return self.redis_client is not None
  27 +
  28 + def build_key(self, *, model: str, target_lang: str, source_text: str) -> str:
  29 + normalized_model = str(model or "").strip().lower()
  30 + normalized_target_lang = str(target_lang or "").strip().lower()
  31 + text = str(source_text or "")
  32 + text_prefix = text[:4]
  33 + digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
  34 + return f"trans:{normalized_model}:{normalized_target_lang}:{text_prefix}{digest}"
  35 +
  36 + def get(self, *, model: str, target_lang: str, source_text: str) -> Optional[str]:
  37 + if self.redis_client is None:
  38 + return None
  39 + key = self.build_key(model=model, target_lang=target_lang, source_text=source_text)
  40 + try:
  41 + value = self.redis_client.get(key)
  42 + logger.info(
  43 + "Translation cache %s | model=%s target_lang=%s text_len=%s key=%s",
  44 + "hit" if value is not None else "miss",
  45 + model,
  46 + target_lang,
  47 + len(str(source_text or "")),
  48 + key,
  49 + )
  50 + if value and self.sliding_expiration:
  51 + self.redis_client.expire(key, self.ttl_seconds)
  52 + return value
  53 + except Exception as exc:
  54 + logger.warning("Redis get translation cache failed: %s", exc)
  55 + return None
  56 +
  57 + def set(self, *, model: str, target_lang: str, source_text: str, translated_text: str) -> None:
  58 + if self.redis_client is None:
  59 + return
  60 + key = self.build_key(model=model, target_lang=target_lang, source_text=source_text)
  61 + try:
  62 + self.redis_client.setex(key, self.ttl_seconds, translated_text)
  63 + logger.info(
  64 + "Translation cache write | model=%s target_lang=%s text_len=%s result_len=%s ttl_seconds=%s key=%s",
  65 + model,
  66 + target_lang,
  67 + len(str(source_text or "")),
  68 + len(str(translated_text or "")),
  69 + self.ttl_seconds,
  70 + key,
  71 + )
  72 + except Exception as exc:
  73 + logger.warning("Redis set translation cache failed: %s", exc)
  74 +
  75 + @staticmethod
  76 + def _init_redis_client() -> Optional[redis.Redis]:
  77 + try:
  78 + client = redis.Redis(
  79 + host=REDIS_CONFIG.get("host", "localhost"),
  80 + port=REDIS_CONFIG.get("port", 6479),
  81 + password=REDIS_CONFIG.get("password"),
  82 + decode_responses=True,
  83 + socket_timeout=REDIS_CONFIG.get("socket_timeout", 1),
  84 + socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1),
  85 + retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False),
  86 + health_check_interval=10,
  87 + )
  88 + client.ping()
  89 + return client
  90 + except Exception as exc:
  91 + logger.warning("Failed to initialize translation redis cache: %s", exc)
  92 + return None
... ...
translation/service.py
... ... @@ -3,10 +3,10 @@
3 3 from __future__ import annotations
4 4  
5 5 import logging
6   -import threading
7 6 from typing import Dict, List, Optional
8 7  
9 8 from config.services_config import get_translation_config
  9 +from translation.cache import TranslationCache
10 10 from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol
11 11 from translation.settings import (
12 12 TranslationConfig,
... ... @@ -25,10 +25,10 @@ class TranslationService:
25 25 def __init__(self, config: Optional[TranslationConfig] = None) -> None:
26 26 self.config = config or get_translation_config()
27 27 self._enabled_capabilities = self._collect_enabled_capabilities()
28   - self._backends: Dict[str, TranslationBackendProtocol] = {}
29   - self._backend_lock = threading.Lock()
30 28 if not self._enabled_capabilities:
31 29 raise ValueError("No enabled translation backends found in services.translation.capabilities")
  30 + self._translation_cache = TranslationCache(self.config["cache"])
  31 + self._backends = self._initialize_backends()
32 32  
33 33 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
34 34 enabled: Dict[str, Dict[str, object]] = {}
... ... @@ -59,6 +59,25 @@ class TranslationService:
59 59 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
60 60 return factory(name=name, cfg=cfg)
61 61  
  62 + def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:
  63 + backends: Dict[str, TranslationBackendProtocol] = {}
  64 + for name, capability_cfg in self._enabled_capabilities.items():
  65 + backend_type = str(capability_cfg["backend"])
  66 + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
  67 + backends[name] = self._create_backend(
  68 + name=name,
  69 + backend_type=backend_type,
  70 + cfg=capability_cfg,
  71 + )
  72 + logger.info(
  73 + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  74 + name,
  75 + backend_type,
  76 + bool(capability_cfg.get("use_cache")),
  77 + getattr(backends[name], "model", name),
  78 + )
  79 + return backends
  80 +
62 81 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
63 82 from translation.backends.qwen_mt import QwenMTTranslationBackend
64 83  
... ... @@ -67,7 +86,6 @@ class TranslationService:
67 86 model=str(cfg["model"]).strip(),
68 87 base_url=str(cfg["base_url"]).strip(),
69 88 api_key=cfg.get("api_key"),
70   - use_cache=bool(cfg["use_cache"]),
71 89 timeout=int(cfg["timeout_sec"]),
72 90 glossary_id=cfg.get("glossary_id"),
73 91 )
... ... @@ -138,26 +156,12 @@ class TranslationService:
138 156  
139 157 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
140 158 normalized = normalize_translation_model(self.config, model)
141   - capability_cfg = self._enabled_capabilities.get(normalized)
142   - if capability_cfg is None:
  159 + backend = self._backends.get(normalized)
  160 + if backend is None:
143 161 raise ValueError(
144 162 f"Translation model '{normalized}' is not enabled. "
145 163 f"Available models: {', '.join(self.available_models) or 'none'}"
146 164 )
147   - backend = self._backends.get(normalized)
148   - if backend is not None:
149   - return backend
150   - with self._backend_lock:
151   - backend = self._backends.get(normalized)
152   - if backend is None:
153   - backend_type = str(capability_cfg["backend"])
154   - logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type)
155   - backend = self._create_backend(
156   - name=normalized,
157   - backend_type=backend_type,
158   - cfg=capability_cfg,
159   - )
160   - self._backends[normalized] = backend
161 165 return backend
162 166  
163 167 def translate(
... ... @@ -169,11 +173,176 @@ class TranslationService:
169 173 model: Optional[str] = None,
170 174 scene: Optional[str] = None,
171 175 ) -> TranslateOutput:
172   - backend = self.get_backend(model)
  176 + normalized_model = normalize_translation_model(self.config, model)
  177 + backend = self.get_backend(normalized_model)
173 178 active_scene = normalize_translation_scene(self.config, scene)
174   - return backend.translate(
  179 + capability_cfg = self._enabled_capabilities[normalized_model]
  180 + use_cache = bool(capability_cfg.get("use_cache"))
  181 + text_count = 1 if isinstance(text, str) else len(list(text))
  182 + logger.info(
  183 + "Translation route | model=%s backend=%s scene=%s target_lang=%s source_lang=%s count=%s use_cache=%s cache_available=%s",
  184 + normalized_model,
  185 + getattr(backend, "model", normalized_model),
  186 + active_scene,
  187 + target_lang,
  188 + source_lang or "auto",
  189 + text_count,
  190 + use_cache,
  191 + self._translation_cache.available,
  192 + )
  193 + if not use_cache or not self._translation_cache.available:
  194 + return backend.translate(
  195 + text=text,
  196 + target_lang=target_lang,
  197 + source_lang=source_lang,
  198 + scene=active_scene,
  199 + )
  200 +
  201 + if isinstance(text, str):
  202 + return self._translate_with_cache(
  203 + backend,
  204 + text=text,
  205 + target_lang=target_lang,
  206 + source_lang=source_lang,
  207 + scene=active_scene,
  208 + model=normalized_model,
  209 + )
  210 +
  211 + return self._translate_batch_with_cache(
175 212 text=text,
176 213 target_lang=target_lang,
177 214 source_lang=source_lang,
  215 + backend=backend,
178 216 scene=active_scene,
  217 + model=normalized_model,
  218 + )
  219 +
  220 + def _translate_with_cache(
  221 + self,
  222 + backend: TranslationBackendProtocol,
  223 + *,
  224 + text: str,
  225 + target_lang: str,
  226 + source_lang: Optional[str],
  227 + scene: str,
  228 + model: str,
  229 + ) -> Optional[str]:
  230 + if not text.strip():
  231 + return text
  232 + cached = self._translation_cache.get(model=model, target_lang=target_lang, source_text=text)
  233 + if cached is not None:
  234 + logger.info(
  235 + "Translation cache served | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s",
  236 + model,
  237 + scene,
  238 + target_lang,
  239 + source_lang or "auto",
  240 + len(text),
  241 + )
  242 + return cached
  243 + translated = backend.translate(
  244 + text=text,
  245 + target_lang=target_lang,
  246 + source_lang=source_lang,
  247 + scene=scene,
179 248 )
  249 + if translated is not None:
  250 + self._translation_cache.set(
  251 + model=model,
  252 + target_lang=target_lang,
  253 + source_text=text,
  254 + translated_text=translated,
  255 + )
  256 + logger.info(
  257 + "Translation backend result cached | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s result_len=%s",
  258 + model,
  259 + scene,
  260 + target_lang,
  261 + source_lang or "auto",
  262 + len(text),
  263 + len(str(translated)),
  264 + )
  265 + else:
  266 + logger.warning(
  267 + "Translation backend returned empty result | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s",
  268 + model,
  269 + scene,
  270 + target_lang,
  271 + source_lang or "auto",
  272 + len(text),
  273 + )
  274 + return translated
  275 +
  276 + def _translate_batch_with_cache(
  277 + self,
  278 + *,
  279 + text: TranslateInput,
  280 + target_lang: str,
  281 + source_lang: Optional[str],
  282 + backend: TranslationBackendProtocol,
  283 + scene: str,
  284 + model: str,
  285 + ) -> List[Optional[str]]:
  286 + texts = list(text)
  287 + results: List[Optional[str]] = [None] * len(texts)
  288 + misses: List[str] = []
  289 + miss_indices: List[int] = []
  290 + cache_hits = 0
  291 +
  292 + for idx, item in enumerate(texts):
  293 + normalized_text = "" if item is None else str(item)
  294 + if not normalized_text.strip():
  295 + results[idx] = normalized_text
  296 + continue
  297 + cached = self._translation_cache.get(
  298 + model=model,
  299 + target_lang=target_lang,
  300 + source_text=normalized_text,
  301 + )
  302 + if cached is not None:
  303 + results[idx] = cached
  304 + cache_hits += 1
  305 + continue
  306 + misses.append(normalized_text)
  307 + miss_indices.append(idx)
  308 +
  309 + logger.info(
  310 + "Translation batch cache summary | model=%s scene=%s target_lang=%s source_lang=%s total=%s cache_hits=%s cache_misses=%s",
  311 + model,
  312 + scene,
  313 + target_lang,
  314 + source_lang or "auto",
  315 + len(texts),
  316 + cache_hits,
  317 + len(misses),
  318 + )
  319 +
  320 + if misses:
  321 + translated = backend.translate(
  322 + text=misses,
  323 + target_lang=target_lang,
  324 + source_lang=source_lang,
  325 + scene=scene,
  326 + )
  327 + translated_list = translated if isinstance(translated, list) else [translated]
  328 + for idx, original_text, translated_text in zip(miss_indices, misses, translated_list):
  329 + results[idx] = translated_text
  330 + if translated_text is not None:
  331 + self._translation_cache.set(
  332 + model=model,
  333 + target_lang=target_lang,
  334 + source_text=original_text,
  335 + translated_text=translated_text,
  336 + )
  337 + else:
  338 + logger.warning(
  339 + "Translation batch item returned empty result | model=%s scene=%s target_lang=%s source_lang=%s item_index=%s text_len=%s",
  340 + model,
  341 + scene,
  342 + target_lang,
  343 + source_lang or "auto",
  344 + idx,
  345 + len(original_text),
  346 + )
  347 +
  348 + return results
... ...
translation/settings.py
... ... @@ -90,21 +90,11 @@ def _build_cache_config(raw_cache: Any) -&gt; Dict[str, Any]:
90 90 if not isinstance(raw_cache, Mapping):
91 91 raise ValueError("services.translation.cache must be a mapping")
92 92 return {
93   - "enabled": _require_bool(raw_cache.get("enabled"), "services.translation.cache.enabled"),
94   - "key_prefix": _require_string(raw_cache.get("key_prefix"), "services.translation.cache.key_prefix"),
95 93 "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"),
96 94 "sliding_expiration": _require_bool(
97 95 raw_cache.get("sliding_expiration"),
98 96 "services.translation.cache.sliding_expiration",
99 97 ),
100   - "key_include_scene": _require_bool(
101   - raw_cache.get("key_include_scene"),
102   - "services.translation.cache.key_include_scene",
103   - ),
104   - "key_include_source_lang": _require_bool(
105   - raw_cache.get("key_include_source_lang"),
106   - "services.translation.cache.key_include_source_lang",
107   - ),
108 98 }
109 99  
110 100  
... ... @@ -131,12 +121,12 @@ def _build_capabilities(raw_capabilities: Any) -&gt; Dict[str, Dict[str, Any]]:
131 121 def _validate_capability(name: str, capability: Mapping[str, Any]) -> None:
132 122 prefix = f"services.translation.capabilities.{name}"
133 123 backend = capability.get("backend")
  124 + _require_bool(capability.get("use_cache"), f"{prefix}.use_cache")
134 125  
135 126 if backend == "qwen_mt":
136 127 _require_string(capability.get("model"), f"{prefix}.model")
137 128 _require_http_url(capability.get("base_url"), f"{prefix}.base_url")
138 129 _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec")
139   - _require_bool(capability.get("use_cache"), f"{prefix}.use_cache")
140 130 return
141 131  
142 132 if backend == "llm":
... ...