Commit cd4ce66dc8c34567248091bc97356f0f00d32062

Authored by tangwang
1 parent c90f80ed

trans logs

api/translator_app.py
@@ -2,8 +2,12 @@ @@ -2,8 +2,12 @@
2 2
3 import argparse 3 import argparse
4 import logging 4 import logging
  5 +import os
  6 +import pathlib
  7 +import time
5 from contextlib import asynccontextmanager 8 from contextlib import asynccontextmanager
6 from functools import lru_cache 9 from functools import lru_cache
  10 +from logging.handlers import TimedRotatingFileHandler
7 from typing import List, Optional, Union 11 from typing import List, Optional, Union
8 12
9 import uvicorn 13 import uvicorn
@@ -20,12 +24,57 @@ from translation.settings import ( @@ -20,12 +24,57 @@ from translation.settings import (
20 normalize_translation_scene, 24 normalize_translation_scene,
21 ) 25 )
22 26
23 -# Configure logging  
24 -logging.basicConfig(  
25 - level=logging.INFO,  
26 - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'  
27 -) 27 +
  28 +def configure_translator_logging() -> None:
  29 + log_dir = pathlib.Path("logs")
  30 + verbose_dir = log_dir / "verbose"
  31 + log_dir.mkdir(exist_ok=True)
  32 + verbose_dir.mkdir(parents=True, exist_ok=True)
  33 +
  34 + log_level = os.getenv("LOG_LEVEL", "INFO").upper()
  35 + numeric_level = getattr(logging, log_level, logging.INFO)
  36 + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
  37 +
  38 + root_logger = logging.getLogger()
  39 + root_logger.setLevel(numeric_level)
  40 + root_logger.handlers.clear()
  41 +
  42 + console_handler = logging.StreamHandler()
  43 + console_handler.setLevel(numeric_level)
  44 + console_handler.setFormatter(formatter)
  45 + root_logger.addHandler(console_handler)
  46 +
  47 + file_handler = TimedRotatingFileHandler(
  48 + filename=log_dir / "translator_api.log",
  49 + when="midnight",
  50 + interval=1,
  51 + backupCount=30,
  52 + encoding="utf-8",
  53 + )
  54 + file_handler.setLevel(numeric_level)
  55 + file_handler.setFormatter(formatter)
  56 + root_logger.addHandler(file_handler)
  57 +
  58 + verbose_logger = logging.getLogger("translator.verbose")
  59 + verbose_logger.setLevel(numeric_level)
  60 + verbose_logger.handlers.clear()
  61 + verbose_logger.propagate = False
  62 +
  63 + verbose_handler = TimedRotatingFileHandler(
  64 + filename=verbose_dir / "translator_verbose.log",
  65 + when="midnight",
  66 + interval=1,
  67 + backupCount=30,
  68 + encoding="utf-8",
  69 + )
  70 + verbose_handler.setLevel(numeric_level)
  71 + verbose_handler.setFormatter(formatter)
  72 + verbose_logger.addHandler(verbose_handler)
  73 +
  74 +
  75 +configure_translator_logging()
28 logger = logging.getLogger(__name__) 76 logger = logging.getLogger(__name__)
  77 +verbose_logger = logging.getLogger("translator.verbose")
29 78
30 79
31 @lru_cache(maxsize=1) 80 @lru_cache(maxsize=1)
@@ -98,6 +147,37 @@ def _normalize_batch_result( @@ -98,6 +147,37 @@ def _normalize_batch_result(
98 return [translated[idx] if idx < len(translated) else None for idx, _ in enumerate(original)] 147 return [translated[idx] if idx < len(translated) else None for idx, _ in enumerate(original)]
99 148
100 149
  150 +def _text_preview(text: Optional[str], limit: int = 20) -> str:
  151 + normalized = str(text or "").replace("\n", "\\n")
  152 + return normalized[:limit]
  153 +
  154 +
  155 +def _request_metrics(text: Union[str, List[str]]) -> dict:
  156 + if isinstance(text, list):
  157 + lengths = [len(str(item or "")) for item in text]
  158 + return {
  159 + "request_count": len(text),
  160 + "lengths": lengths,
  161 + "first_preview": _text_preview(text[0] if text else ""),
  162 + }
  163 + return {
  164 + "request_count": 1,
  165 + "lengths": [len(str(text or ""))],
  166 + "first_preview": _text_preview(str(text or "")),
  167 + }
  168 +
  169 +
  170 +def _result_preview(translated: Union[str, List[Optional[str]], None]) -> str:
  171 + if isinstance(translated, list):
  172 + if not translated:
  173 + return ""
  174 + first = translated[0]
  175 + return _text_preview("" if first is None else str(first))
  176 + if translated is None:
  177 + return ""
  178 + return _text_preview(str(translated))
  179 +
  180 +
101 def _translate_batch( 181 def _translate_batch(
102 service: TranslationService, 182 service: TranslationService,
103 raw_text: List[str], 183 raw_text: List[str],
@@ -108,6 +188,17 @@ def _translate_batch( @@ -108,6 +188,17 @@ def _translate_batch(
108 scene: str, 188 scene: str,
109 ) -> List[Optional[str]]: 189 ) -> List[Optional[str]]:
110 backend = service.get_backend(model) 190 backend = service.get_backend(model)
  191 + logger.info(
  192 + "Translation batch dispatch | model=%s scene=%s target_lang=%s source_lang=%s count=%s lengths=%s first_preview=%s supports_batch=%s",
  193 + model,
  194 + scene,
  195 + target_lang,
  196 + source_lang or "auto",
  197 + len(raw_text),
  198 + [len(str(item or "")) for item in raw_text],
  199 + _text_preview(raw_text[0] if raw_text else ""),
  200 + bool(getattr(backend, "supports_batch", False)),
  201 + )
111 if getattr(backend, "supports_batch", False): 202 if getattr(backend, "supports_batch", False):
112 try: 203 try:
113 translated = service.translate( 204 translated = service.translate(
@@ -117,6 +208,13 @@ def _translate_batch( @@ -117,6 +208,13 @@ def _translate_batch(
117 model=model, 208 model=model,
118 scene=scene, 209 scene=scene,
119 ) 210 )
  211 + verbose_logger.info(
  212 + "Translation batch result | model=%s scene=%s count=%s first_result=%s",
  213 + model,
  214 + scene,
  215 + len(raw_text),
  216 + _result_preview(translated),
  217 + )
120 return _normalize_batch_result(raw_text, translated) 218 return _normalize_batch_result(raw_text, translated)
121 except ValueError: 219 except ValueError:
122 raise 220 raise
@@ -139,7 +237,17 @@ def _translate_batch( @@ -139,7 +237,17 @@ def _translate_batch(
139 except ValueError: 237 except ValueError:
140 raise 238 raise
141 except Exception as exc: 239 except Exception as exc:
142 - logger.warning("Per-item translation failed: %s", exc, exc_info=True) 240 + logger.warning(
  241 + "Per-item translation failed | model=%s scene=%s target_lang=%s source_lang=%s item_len=%s item_preview=%s error=%s",
  242 + model,
  243 + scene,
  244 + target_lang,
  245 + source_lang or "auto",
  246 + len(str(item or "")),
  247 + _text_preview(str(item or "")),
  248 + exc,
  249 + exc_info=True,
  250 + )
143 out = None 251 out = None
144 results.append(out) 252 results.append(out)
145 return results 253 return results
@@ -147,19 +255,25 @@ def _translate_batch( @@ -147,19 +255,25 @@ def _translate_batch(
147 255
148 @asynccontextmanager 256 @asynccontextmanager
149 async def lifespan(_: FastAPI): 257 async def lifespan(_: FastAPI):
150 - """Warm the default backend on process startup.""" 258 + """Initialize all enabled translation backends on process startup."""
151 logger.info("Starting Translation Service API") 259 logger.info("Starting Translation Service API")
152 service = get_translation_service() 260 service = get_translation_service()
153 - default_backend = service.get_backend(service.config["default_model"])  
154 logger.info( 261 logger.info(
155 - "Translation service ready | default_model=%s available_models=%s loaded_models=%s", 262 + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s",
156 service.config["default_model"], 263 service.config["default_model"],
  264 + service.config["default_scene"],
157 service.available_models, 265 service.available_models,
158 service.loaded_models, 266 service.loaded_models,
159 ) 267 )
160 logger.info( 268 logger.info(
161 - "Default translation backend warmed up | model=%s",  
162 - getattr(default_backend, "model", service.config["default_model"]), 269 + "Translation backends initialized on startup | models=%s",
  270 + service.loaded_models,
  271 + )
  272 + verbose_logger.info(
  273 + "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
  274 + service.available_models,
  275 + service.config["cache"]["ttl_seconds"],
  276 + service.config["cache"]["sliding_expiration"],
163 ) 277 )
164 yield 278 yield
165 279
@@ -189,6 +303,12 @@ async def health_check(): @@ -189,6 +303,12 @@ async def health_check():
189 """Health check endpoint.""" 303 """Health check endpoint."""
190 try: 304 try:
191 service = get_translation_service() 305 service = get_translation_service()
  306 + logger.info(
  307 + "Health check | default_model=%s default_scene=%s loaded_models=%s",
  308 + service.config["default_model"],
  309 + service.config["default_scene"],
  310 + service.loaded_models,
  311 + )
192 return { 312 return {
193 "status": "healthy", 313 "status": "healthy",
194 "service": "translation", 314 "service": "translation",
@@ -216,12 +336,33 @@ async def translate(request: TranslationRequest): @@ -216,12 +336,33 @@ async def translate(request: TranslationRequest):
216 if not request.target_lang: 336 if not request.target_lang:
217 raise HTTPException(status_code=400, detail="target_lang is required") 337 raise HTTPException(status_code=400, detail="target_lang is required")
218 338
  339 + request_started = time.perf_counter()
219 try: 340 try:
220 service = get_translation_service() 341 service = get_translation_service()
221 scene = _normalize_scene(service, request.scene) 342 scene = _normalize_scene(service, request.scene)
222 model = _normalize_model(service, request.model) 343 model = _normalize_model(service, request.model)
223 translator = service.get_backend(model) 344 translator = service.get_backend(model)
224 raw_text = request.text 345 raw_text = request.text
  346 + metrics = _request_metrics(raw_text)
  347 + logger.info(
  348 + "Translation request | model=%s scene=%s target_lang=%s source_lang=%s count=%s lengths=%s first_preview=%s backend=%s",
  349 + model,
  350 + scene,
  351 + request.target_lang,
  352 + request.source_lang or "auto",
  353 + metrics["request_count"],
  354 + metrics["lengths"],
  355 + metrics["first_preview"],
  356 + getattr(translator, "model", model),
  357 + )
  358 + verbose_logger.info(
  359 + "Translation request detail | model=%s scene=%s target_lang=%s source_lang=%s payload=%s",
  360 + model,
  361 + scene,
  362 + request.target_lang,
  363 + request.source_lang or "auto",
  364 + raw_text,
  365 + )
225 366
226 if isinstance(raw_text, list): 367 if isinstance(raw_text, list):
227 results = _translate_batch( 368 results = _translate_batch(
@@ -232,6 +373,22 @@ async def translate(request: TranslationRequest): @@ -232,6 +373,22 @@ async def translate(request: TranslationRequest):
232 model=model, 373 model=model,
233 scene=scene, 374 scene=scene,
234 ) 375 )
  376 + latency_ms = (time.perf_counter() - request_started) * 1000
  377 + logger.info(
  378 + "Translation response | model=%s scene=%s count=%s first_result=%s latency_ms=%.2f",
  379 + model,
  380 + scene,
  381 + len(raw_text),
  382 + _result_preview(results),
  383 + latency_ms,
  384 + )
  385 + verbose_logger.info(
  386 + "Translation response detail | model=%s scene=%s translated=%s latency_ms=%.2f",
  387 + model,
  388 + scene,
  389 + results,
  390 + latency_ms,
  391 + )
235 return TranslationResponse( 392 return TranslationResponse(
236 text=raw_text, 393 text=raw_text,
237 target_lang=request.target_lang, 394 target_lang=request.target_lang,
@@ -253,6 +410,22 @@ async def translate(request: TranslationRequest): @@ -253,6 +410,22 @@ async def translate(request: TranslationRequest):
253 if translated_text is None: 410 if translated_text is None:
254 raise HTTPException(status_code=500, detail="Translation failed") 411 raise HTTPException(status_code=500, detail="Translation failed")
255 412
  413 + latency_ms = (time.perf_counter() - request_started) * 1000
  414 + logger.info(
  415 + "Translation response | model=%s scene=%s count=1 first_result=%s latency_ms=%.2f",
  416 + model,
  417 + scene,
  418 + _result_preview(translated_text),
  419 + latency_ms,
  420 + )
  421 + verbose_logger.info(
  422 + "Translation response detail | model=%s scene=%s translated=%s latency_ms=%.2f",
  423 + model,
  424 + scene,
  425 + translated_text,
  426 + latency_ms,
  427 + )
  428 +
256 return TranslationResponse( 429 return TranslationResponse(
257 text=raw_text, 430 text=raw_text,
258 target_lang=request.target_lang, 431 target_lang=request.target_lang,
@@ -263,12 +436,22 @@ async def translate(request: TranslationRequest): @@ -263,12 +436,22 @@ async def translate(request: TranslationRequest):
263 scene=scene, 436 scene=scene,
264 ) 437 )
265 438
266 - except HTTPException: 439 + except HTTPException as exc:
  440 + latency_ms = (time.perf_counter() - request_started) * 1000
  441 + logger.warning(
  442 + "Translation request failed | status_code=%s detail=%s latency_ms=%.2f",
  443 + exc.status_code,
  444 + exc.detail,
  445 + latency_ms,
  446 + )
267 raise 447 raise
268 except ValueError as e: 448 except ValueError as e:
  449 + latency_ms = (time.perf_counter() - request_started) * 1000
  450 + logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
269 raise HTTPException(status_code=400, detail=str(e)) from e 451 raise HTTPException(status_code=400, detail=str(e)) from e
270 except Exception as e: 452 except Exception as e:
271 - logger.error(f"Translation error: {e}", exc_info=True) 453 + latency_ms = (time.perf_counter() - request_started) * 1000
  454 + logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
272 raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}") 455 raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
273 456
274 457
config/config.yaml
@@ -106,12 +106,8 @@ services: @@ -106,12 +106,8 @@ services:
106 default_scene: "general" 106 default_scene: "general"
107 timeout_sec: 10.0 107 timeout_sec: 10.0
108 cache: 108 cache:
109 - enabled: true  
110 - key_prefix: "trans:v2"  
111 ttl_seconds: 62208000 109 ttl_seconds: 62208000
112 sliding_expiration: true 110 sliding_expiration: true
113 - key_include_scene: true  
114 - key_include_source_lang: true  
115 capabilities: 111 capabilities:
116 qwen-mt: 112 qwen-mt:
117 enabled: true 113 enabled: true
@@ -126,12 +122,14 @@ services: @@ -126,12 +122,14 @@ services:
126 model: "qwen-flash" 122 model: "qwen-flash"
127 base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" 123 base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
128 timeout_sec: 30.0 124 timeout_sec: 30.0
  125 + use_cache: true
129 deepl: 126 deepl:
130 - enabled: false 127 + enabled: true
131 backend: "deepl" 128 backend: "deepl"
132 api_url: "https://api.deepl.com/v2/translate" 129 api_url: "https://api.deepl.com/v2/translate"
133 timeout_sec: 10.0 130 timeout_sec: 10.0
134 glossary_id: "" 131 glossary_id: ""
  132 + use_cache: true
135 nllb-200-distilled-600m: 133 nllb-200-distilled-600m:
136 enabled: true 134 enabled: true
137 backend: "local_nllb" 135 backend: "local_nllb"
@@ -144,6 +142,7 @@ services: @@ -144,6 +142,7 @@ services:
144 max_new_tokens: 64 142 max_new_tokens: 64
145 num_beams: 1 143 num_beams: 1
146 attn_implementation: "sdpa" 144 attn_implementation: "sdpa"
  145 + use_cache: true
147 opus-mt-zh-en: 146 opus-mt-zh-en:
148 enabled: true 147 enabled: true
149 backend: "local_marian" 148 backend: "local_marian"
@@ -155,6 +154,7 @@ services: @@ -155,6 +154,7 @@ services:
155 max_input_length: 256 154 max_input_length: 256
156 max_new_tokens: 256 155 max_new_tokens: 256
157 num_beams: 1 156 num_beams: 1
  157 + use_cache: true
158 opus-mt-en-zh: 158 opus-mt-en-zh:
159 enabled: true 159 enabled: true
160 backend: "local_marian" 160 backend: "local_marian"
@@ -166,6 +166,7 @@ services: @@ -166,6 +166,7 @@ services:
166 max_input_length: 256 166 max_input_length: 256
167 max_new_tokens: 256 167 max_new_tokens: 256
168 num_beams: 1 168 num_beams: 1
  169 + use_cache: true
169 embedding: 170 embedding:
170 provider: "http" # http 171 provider: "http" # http
171 base_url: "http://127.0.0.1:6005" 172 base_url: "http://127.0.0.1:6005"
config/env_config.py
@@ -42,8 +42,6 @@ REDIS_CONFIG = { @@ -42,8 +42,6 @@ REDIS_CONFIG = {
42 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)), 42 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)),
43 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true', 43 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true',
44 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months 44 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months
45 - 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)),  
46 - 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'),  
47 # Embedding 缓存 key 前缀,例如 "embedding" 45 # Embedding 缓存 key 前缀,例如 "embedding"
48 'embedding_cache_prefix': os.getenv('REDIS_EMBEDDING_CACHE_PREFIX', 'embedding'), 46 'embedding_cache_prefix': os.getenv('REDIS_EMBEDDING_CACHE_PREFIX', 'embedding'),
49 } 47 }
docs/工作总结-微服务性能优化与架构.md
@@ -88,7 +88,7 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot; @@ -88,7 +88,7 @@ instruction: &quot;Given a shopping query, rank product titles by relevance&quot;
88 - **配置入口**:`config/config.yaml` → `services.translation`,显式声明 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend`、`base_url/api_url`、timeout 与本地模型运行参数。 88 - **配置入口**:`config/config.yaml` → `services.translation`,显式声明 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend`、`base_url/api_url`、timeout 与本地模型运行参数。
89 - **内部规则收口**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一放在 `translation/` 内部,不再散落在 `config/`、`query/` 等位置。 89 - **内部规则收口**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一放在 `translation/` 内部,不再散落在 `config/`、`query/` 等位置。
90 - **调用位置**:QueryParser 与 Indexer 均通过 `translation.create_translation_client()` 获取客户端,不写死 URL 或模型名。 90 - **调用位置**:QueryParser 与 Indexer 均通过 `translation.create_translation_client()` 获取客户端,不写死 URL 或模型名。
91 -- **缓存**:`services.translation.cache` 支持 `key_prefix: "trans:v2"`、`ttl_seconds`、`sliding_expiration` 等,翻译结果写 Redis,减轻重复请求对限速的影响 91 +- **缓存**:translator service 对所有 translation capability 统一接入 Redis 缓存;每个 capability 通过 `use_cache` 控制开关,key 格式固定为 `trans:{model}:{target_lang}:{source_text[:4]}{sha256}`
92 - **场景支撑**:在线索引(indexer)与 query 请求(QueryParser)共用同一套 provider 配置;可按环境或租户通过修改 `config.yaml` 或环境变量切换 provider/model。 92 - **场景支撑**:在线索引(indexer)与 query 请求(QueryParser)共用同一套 provider 配置;可按环境或租户通过修改 `config.yaml` 或环境变量切换 provider/model。
93 - **待配合**:**金伟侧对索引侧翻译调用做流量控制**(限流/排队/批量聚合),避免索引高峰打满 qwen 限速,影响在线 query 翻译。 93 - **待配合**:**金伟侧对索引侧翻译调用做流量控制**(限流/排队/批量聚合),避免索引高峰打满 qwen 限速,影响在线 query 翻译。
94 94
docs/缓存与Redis使用说明.md
@@ -12,7 +12,6 @@ @@ -12,7 +12,6 @@
12 - **Password**:`REDIS_PASSWORD` 12 - **Password**:`REDIS_PASSWORD`
13 - **Socket & 超时**:`REDIS_SOCKET_TIMEOUT` / `REDIS_SOCKET_CONNECT_TIMEOUT` / `REDIS_RETRY_ON_TIMEOUT` 13 - **Socket & 超时**:`REDIS_SOCKET_TIMEOUT` / `REDIS_SOCKET_CONNECT_TIMEOUT` / `REDIS_RETRY_ON_TIMEOUT`
14 - **通用缓存 TTL**:`REDIS_CACHE_EXPIRE_DAYS`(默认 `360*2` 天,代码注释为 “6 months”) 14 - **通用缓存 TTL**:`REDIS_CACHE_EXPIRE_DAYS`(默认 `360*2` 天,代码注释为 “6 months”)
15 -- **翻译缓存 TTL & 前缀**:`REDIS_TRANSLATION_CACHE_EXPIRE_DAYS`、`REDIS_TRANSLATION_CACHE_PREFIX`  
16 15
17 --- 16 ---
18 17
@@ -21,7 +20,7 @@ @@ -21,7 +20,7 @@
21 | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 | 20 | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 |
22 |------------|----------|----------------|----------|------| 21 |------------|----------|----------------|----------|------|
23 | 向量缓存(text/image embedding) | `{EMBEDDING_CACHE_PREFIX}:{query_or_url}` / `{EMBEDDING_CACHE_PREFIX}:image:{url_or_path}` | **BF16 bytes**(每维 2 字节大端存储),读取后恢复为 `np.float32` | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`(文本)与 `embeddings/image_encoder.py`(图片);前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 | 22 | 向量缓存(text/image embedding) | `{EMBEDDING_CACHE_PREFIX}:{query_or_url}` / `{EMBEDDING_CACHE_PREFIX}:image:{url_or_path}` | **BF16 bytes**(每维 2 字节大端存储),读取后恢复为 `np.float32` | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`(文本)与 `embeddings/image_encoder.py`(图片);前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 |
24 -| 翻译结果缓存(Qwen-MT 翻译) | `{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/backends/qwen_mt.py` + `config/config.yaml` | 23 +| 翻译结果缓存(translator service) | `trans:{model}:{target_lang}:{source_text[:4]}{sha256(source_text)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/service.py` + `config/config.yaml` |
25 | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` | 24 | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` |
26 25
27 下面按模块详细说明。 26 下面按模块详细说明。
@@ -71,34 +70,29 @@ @@ -71,34 +70,29 @@
71 70
72 --- 71 ---
73 72
74 -## 3. 翻译结果缓存(translation/backends/qwen_mt.py) 73 +## 3. 翻译结果缓存(translation/service.py)
75 74
76 -- **代码位置**:`translation/backends/qwen_mt.py` 中 `QwenMTTranslationBackend`  
77 -- **用途**:缓存 Qwen-MT 翻译(及 translator service 复用的翻译)结果,减少云端请求,遵守限速。  
78 -- **配置入口**:`config/config.yaml -> services.translation.cache`,统一由 `config/services_config.get_translation_cache_config()` 解析。 75 +- **代码位置**:`translation/service.py`
  76 +- **用途**:统一缓存所有 translation capability 的翻译结果。
  77 +- **配置入口**:
  78 + - `config/config.yaml -> services.translation.cache`
  79 + - `config/config.yaml -> services.translation.capabilities.*.use_cache`
79 80
80 ### 3.1 Key 设计 81 ### 3.1 Key 设计
81 82
82 -- 内部构造函数:`_build_cache_key(...)` 83 +- 内部构造函数:`TranslationCache.build_key(...)`
83 - 模板: 84 - 模板:
84 85
85 ```text 86 ```text
86 -{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)} 87 +trans:{model}:{target_lang}:{source_text[:4]}{sha256(source_text)}
87 ``` 88 ```
88 89
89 其中: 90 其中:
90 91
91 -- `cache_prefix`:来自 `services.translation.cache.key_prefix`,默认 `trans:v2`;  
92 -- `model`:如 `"qwen-mt"`;  
93 -- `src`:源语言(如 `zh` / `en` / `auto`),是否包含在 key 中由 `key_include_source_lang` 控制;  
94 -- `tgt`:目标语言,如 `en` / `zh`;  
95 -- `sha256(payload)`:对以下内容整体做 SHA-256:  
96 - - `model`  
97 - - `src` / `tgt`  
98 - - `scene`(受 `key_include_scene` 控制)  
99 - - 原始 `text`  
100 -  
101 -> 注意:所有 key 设计集中在 `_build_cache_key`,**不要在其他位置手动拼翻译缓存 key**。 92 +- `model`:capability 名称,如 `qwen-mt`、`llm`、`opus-mt-zh-en`
  93 +- `target_lang`:目标语言,如 `en` / `zh`
  94 +- `source_text[:4]`:原文前 4 个字符
  95 +- `sha256(source_text)`:对完整原文做 SHA-256
102 96
103 ### 3.2 Value 与类型 97 ### 3.2 Value 与类型
104 98
@@ -115,20 +109,25 @@ @@ -115,20 +109,25 @@
115 services: 109 services:
116 translation: 110 translation:
117 cache: 111 cache:
118 - enabled: true  
119 - key_prefix: "trans:v2"  
120 ttl_seconds: 62208000 # 默认约 720 天 112 ttl_seconds: 62208000 # 默认约 720 天
121 sliding_expiration: true 113 sliding_expiration: true
122 - key_include_scene: true  
123 - key_include_source_lang: true 114 + capabilities:
  115 + qwen-mt:
  116 + use_cache: true
  117 + llm:
  118 + use_cache: true
  119 + deepl:
  120 + use_cache: true
  121 + nllb-200-distilled-600m:
  122 + use_cache: true
  123 + opus-mt-zh-en:
  124 + use_cache: true
  125 + opus-mt-en-zh:
  126 + use_cache: true
124 ``` 127 ```
125 128
126 - 运行时行为: 129 - 运行时行为:
127 - - 创建 `Translator` 时,从 `cache_cfg` 读取:  
128 - - `self.cache_prefix`  
129 - - `self.expire_seconds`  
130 - - `self.cache_sliding_expiration`  
131 - - `self.cache_include_*` 一系列布尔开关; 130 + - translator service 启动时初始化共享 Redis cache;
132 - **读缓存**: 131 - **读缓存**:
133 - 命中后,若 `sliding_expiration=True`,会调用 `redis.expire(key, expire_seconds)`; 132 - 命中后,若 `sliding_expiration=True`,会调用 `redis.expire(key, expire_seconds)`;
134 - **写缓存**: 133 - **写缓存**:
@@ -136,8 +135,8 @@ services: @@ -136,8 +135,8 @@ services:
136 135
137 ### 3.4 关联模块 136 ### 3.4 关联模块
138 137
139 -- `api/translator_app.py` 会通过 `translation.backends.qwen_mt.QwenMTTranslationBackend` 复用同一套缓存逻辑;  
140 -- 文档说明:`docs/翻译模块说明.md` 中提到“推荐通过 Redis 翻译缓存复用结果”。 138 +- `api/translator_app.py` 通过 `TranslationService` 统一复用同一套缓存逻辑;
  139 +- 所有翻译后端都通过 `TranslationService` 接入缓存。
141 140
142 --- 141 ---
143 142
scripts/redis/redis_cache_health_check.py
@@ -43,7 +43,6 @@ PROJECT_ROOT = Path(__file__).parent.parent.parent @@ -43,7 +43,6 @@ PROJECT_ROOT = Path(__file__).parent.parent.parent
43 sys.path.insert(0, str(PROJECT_ROOT)) 43 sys.path.insert(0, str(PROJECT_ROOT))
44 44
45 from config.env_config import REDIS_CONFIG # type: ignore 45 from config.env_config import REDIS_CONFIG # type: ignore
46 -from config.services_config import get_translation_cache_config # type: ignore  
47 from embeddings.bf16 import decode_embedding_from_redis # type: ignore 46 from embeddings.bf16 import decode_embedding_from_redis # type: ignore
48 47
49 48
@@ -66,13 +65,11 @@ def _load_known_cache_types() -&gt; Dict[str, CacheTypeConfig]: @@ -66,13 +65,11 @@ def _load_known_cache_types() -&gt; Dict[str, CacheTypeConfig]:
66 description="文本向量缓存(embeddings/text_encoder.py)", 65 description="文本向量缓存(embeddings/text_encoder.py)",
67 ) 66 )
68 67
69 - # translation 缓存:prefix 来自 services.translation.cache.key_prefix  
70 - cache_cfg = get_translation_cache_config()  
71 - trans_prefix = cache_cfg.get("key_prefix", "trans:v2") 68 + # translation 缓存:统一前缀 trans
72 cache_types["translation"] = CacheTypeConfig( 69 cache_types["translation"] = CacheTypeConfig(
73 name="translation", 70 name="translation",
74 - pattern=f"{trans_prefix}:*",  
75 - description="翻译结果缓存(query/qwen_mt_translate.Translator)", 71 + pattern="trans:*",
  72 + description="翻译结果缓存(translation/service.py)",
76 ) 73 )
77 74
78 # anchors 缓存:prefix 来自 REDIS_CONFIG['anchor_cache_prefix'](若存在),否则 product_anchors 75 # anchors 缓存:prefix 来自 REDIS_CONFIG['anchor_cache_prefix'](若存在),否则 product_anchors
@@ -400,4 +397,3 @@ def main() -&gt; None: @@ -400,4 +397,3 @@ def main() -&gt; None:
400 397
401 if __name__ == "__main__": 398 if __name__ == "__main__":
402 main() 399 main()
403 -  
tests/ci/test_service_api_contracts.py
@@ -625,12 +625,8 @@ def translator_client(monkeypatch): @@ -625,12 +625,8 @@ def translator_client(monkeypatch):
625 } 625 }
626 }, 626 },
627 "cache": { 627 "cache": {
628 - "enabled": True,  
629 - "key_prefix": "trans:v2",  
630 "ttl_seconds": 60, 628 "ttl_seconds": 60,
631 "sliding_expiration": True, 629 "sliding_expiration": True,
632 - "key_include_scene": True,  
633 - "key_include_source_lang": True,  
634 }, 630 },
635 } 631 }
636 self.available_models = ["qwen-mt"] 632 self.available_models = ["qwen-mt"]
@@ -681,12 +677,8 @@ def test_translator_api_failure_returns_500(monkeypatch): @@ -681,12 +677,8 @@ def test_translator_api_failure_returns_500(monkeypatch):
681 } 677 }
682 }, 678 },
683 "cache": { 679 "cache": {
684 - "enabled": True,  
685 - "key_prefix": "trans:v2",  
686 "ttl_seconds": 60, 680 "ttl_seconds": 60,
687 "sliding_expiration": True, 681 "sliding_expiration": True,
688 - "key_include_scene": True,  
689 - "key_include_source_lang": True,  
690 }, 682 },
691 } 683 }
692 self.available_models = ["qwen-mt"] 684 self.available_models = ["qwen-mt"]
tests/test_translation_local_backends.py
@@ -96,7 +96,7 @@ def test_nllb_uses_src_lang_and_forced_bos(monkeypatch): @@ -96,7 +96,7 @@ def test_nllb_uses_src_lang_and_forced_bos(monkeypatch):
96 assert backend.seq2seq_model.last_generate_kwargs["forced_bos_token_id"] == 202 96 assert backend.seq2seq_model.last_generate_kwargs["forced_bos_token_id"] == 202
97 97
98 98
99 -def test_translation_service_lazy_loads_enabled_backends(monkeypatch): 99 +def test_translation_service_preloads_enabled_backends(monkeypatch):
100 created = [] 100 created = []
101 101
102 def _fake_create_backend(self, *, name, backend_type, cfg): 102 def _fake_create_backend(self, *, name, backend_type, cfg):
@@ -126,6 +126,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): @@ -126,6 +126,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
126 "opus-mt-en-zh": { 126 "opus-mt-en-zh": {
127 "enabled": True, 127 "enabled": True,
128 "backend": "local_marian", 128 "backend": "local_marian",
  129 + "use_cache": True,
129 "model_id": "dummy", 130 "model_id": "dummy",
130 "model_dir": "dummy", 131 "model_dir": "dummy",
131 "device": "cpu", 132 "device": "cpu",
@@ -138,6 +139,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): @@ -138,6 +139,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
138 "nllb-200-distilled-600m": { 139 "nllb-200-distilled-600m": {
139 "enabled": True, 140 "enabled": True,
140 "backend": "local_nllb", 141 "backend": "local_nllb",
  142 + "use_cache": True,
141 "model_id": "dummy", 143 "model_id": "dummy",
142 "model_dir": "dummy", 144 "model_dir": "dummy",
143 "device": "cpu", 145 "device": "cpu",
@@ -149,22 +151,19 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): @@ -149,22 +151,19 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch):
149 }, 151 },
150 }, 152 },
151 "cache": { 153 "cache": {
152 - "enabled": True,  
153 - "key_prefix": "trans:v2",  
154 "ttl_seconds": 60, 154 "ttl_seconds": 60,
155 "sliding_expiration": True, 155 "sliding_expiration": True,
156 - "key_include_scene": True,  
157 - "key_include_source_lang": True,  
158 }, 156 },
159 } 157 }
160 158
161 service = TranslationService(config) 159 service = TranslationService(config)
162 160
163 assert service.available_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"] 161 assert service.available_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"]
164 - assert service.loaded_models == [] 162 + assert service.loaded_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"]
  163 + assert created == [
  164 + ("opus-mt-en-zh", "local_marian"),
  165 + ("nllb-200-distilled-600m", "local_nllb"),
  166 + ]
165 167
166 backend = service.get_backend("opus-mt-en-zh") 168 backend = service.get_backend("opus-mt-en-zh")
167 -  
168 assert backend.model == "opus-mt-en-zh" 169 assert backend.model == "opus-mt-en-zh"
169 - assert created == [("opus-mt-en-zh", "local_marian")]  
170 - assert service.loaded_models == ["opus-mt-en-zh"]  
tests/test_translator_failure_semantics.py
1 -from translation.backends.qwen_mt import QwenMTTranslationBackend 1 +from translation.cache import TranslationCache
  2 +from translation.service import TranslationService
2 3
3 4
4 -class _RecordingRedis: 5 +class _FakeCache:
5 def __init__(self): 6 def __init__(self):
6 - self.setex_calls = []  
7 -  
8 - def setex(self, key, ttl, value):  
9 - self.setex_calls.append((key, ttl, value))  
10 -  
11 -  
12 -def test_translate_failure_returns_none_and_skips_cache(monkeypatch):  
13 - translator = QwenMTTranslationBackend(  
14 - capability_name="qwen-mt",  
15 - model="qwen-mt-flash",  
16 - base_url="https://dashscope-us.aliyuncs.com/compatible-mode/v1",  
17 - api_key="dummy-key",  
18 - use_cache=False,  
19 - )  
20 - fake_redis = _RecordingRedis()  
21 - translator.use_cache = True  
22 - translator.redis_client = fake_redis  
23 - translator.cache_prefix = "trans"  
24 - translator.expire_seconds = 60  
25 -  
26 - monkeypatch.setattr(translator, "_translate_qwen", lambda *args, **kwargs: None)  
27 -  
28 - result = translator.translate(  
29 - text="商品标题",  
30 - target_lang="en",  
31 - source_lang="zh",  
32 - scene="sku_name",  
33 - )  
34 -  
35 - assert result is None  
36 - assert fake_redis.setex_calls == [] 7 + self.available = True
  8 + self.storage = {}
  9 + self.get_calls = []
  10 + self.set_calls = []
  11 +
  12 + def get(self, *, model, target_lang, source_text):
  13 + self.get_calls.append((model, target_lang, source_text))
  14 + return self.storage.get((model, target_lang, source_text))
  15 +
  16 + def set(self, *, model, target_lang, source_text, translated_text):
  17 + self.set_calls.append((model, target_lang, source_text, translated_text))
  18 + self.storage[(model, target_lang, source_text)] = translated_text
  19 +
  20 +
  21 +def test_translation_cache_key_format(monkeypatch):
  22 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  23 + cache = TranslationCache({"ttl_seconds": 60, "sliding_expiration": True})
  24 + key = cache.build_key(model="llm", target_lang="en", source_text="商品标题")
  25 + assert key.startswith("trans:llm:en:商品标题")
  26 + assert len(key) == len("trans:llm:en:商品标题") + 64
  27 +
  28 +
  29 +def test_service_caches_all_capabilities(monkeypatch):
  30 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  31 + created = {}
  32 +
  33 + def _fake_create_backend(self, *, name, backend_type, cfg):
  34 + del self, backend_type, cfg
  35 +
  36 + class _Backend:
  37 + model = name
  38 +
  39 + @property
  40 + def supports_batch(self):
  41 + return True
  42 +
  43 + def translate(self, text, target_lang, source_lang=None, scene=None):
  44 + del target_lang, source_lang, scene
  45 + if isinstance(text, list):
  46 + return [f"{name}:{item}" for item in text]
  47 + return f"{name}:{text}"
  48 +
  49 + backend = _Backend()
  50 + created[name] = backend
  51 + return backend
  52 +
  53 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  54 + config = {
  55 + "service_url": "http://127.0.0.1:6006",
  56 + "timeout_sec": 10.0,
  57 + "default_model": "llm",
  58 + "default_scene": "general",
  59 + "capabilities": {
  60 + "llm": {
  61 + "enabled": True,
  62 + "backend": "llm",
  63 + "model": "dummy-llm",
  64 + "base_url": "https://example.com",
  65 + "timeout_sec": 10.0,
  66 + "use_cache": True,
  67 + },
  68 + "opus-mt-zh-en": {
  69 + "enabled": True,
  70 + "backend": "local_marian",
  71 + "model_id": "dummy",
  72 + "model_dir": "dummy",
  73 + "device": "cpu",
  74 + "torch_dtype": "float32",
  75 + "batch_size": 8,
  76 + "max_input_length": 16,
  77 + "max_new_tokens": 16,
  78 + "num_beams": 1,
  79 + "use_cache": True,
  80 + },
  81 + },
  82 + "cache": {
  83 + "ttl_seconds": 60,
  84 + "sliding_expiration": True,
  85 + },
  86 + }
  87 +
  88 + service = TranslationService(config)
  89 + fake_cache = _FakeCache()
  90 + service._translation_cache = fake_cache
  91 +
  92 + first = service.translate("商品标题", target_lang="en", source_lang="zh", model="llm")
  93 + second = service.translate("商品标题", target_lang="en", source_lang="zh", model="llm")
  94 + batch = service.translate(["连衣裙", "衬衫"], target_lang="en", source_lang="zh", model="opus-mt-zh-en")
  95 +
  96 + assert first == "llm:商品标题"
  97 + assert second == "llm:商品标题"
  98 + assert batch == ["opus-mt-zh-en:连衣裙", "opus-mt-zh-en:衬衫"]
  99 + assert fake_cache.get_calls == [
  100 + ("llm", "en", "商品标题"),
  101 + ("llm", "en", "商品标题"),
  102 + ("opus-mt-zh-en", "en", "连衣裙"),
  103 + ("opus-mt-zh-en", "en", "衬衫"),
  104 + ]
  105 + assert fake_cache.set_calls == [
  106 + ("llm", "en", "商品标题", "llm:商品标题"),
  107 + ("opus-mt-zh-en", "en", "连衣裙", "opus-mt-zh-en:连衣裙"),
  108 + ("opus-mt-zh-en", "en", "衬衫", "opus-mt-zh-en:衬衫"),
  109 + ]
translation/README.md
@@ -75,12 +75,8 @@ services: @@ -75,12 +75,8 @@ services:
75 default_scene: "general" 75 default_scene: "general"
76 timeout_sec: 10.0 76 timeout_sec: 10.0
77 cache: 77 cache:
78 - enabled: true  
79 - key_prefix: "trans:v2"  
80 ttl_seconds: 62208000 78 ttl_seconds: 62208000
81 sliding_expiration: true 79 sliding_expiration: true
82 - key_include_scene: true  
83 - key_include_source_lang: true  
84 capabilities: 80 capabilities:
85 qwen-mt: 81 qwen-mt:
86 enabled: true 82 enabled: true
@@ -95,11 +91,13 @@ services: @@ -95,11 +91,13 @@ services:
95 model: "qwen-flash" 91 model: "qwen-flash"
96 base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" 92 base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
97 timeout_sec: 30.0 93 timeout_sec: 30.0
  94 + use_cache: true
98 deepl: 95 deepl:
99 enabled: false 96 enabled: false
100 backend: "deepl" 97 backend: "deepl"
101 api_url: "https://api.deepl.com/v2/translate" 98 api_url: "https://api.deepl.com/v2/translate"
102 timeout_sec: 10.0 99 timeout_sec: 10.0
  100 + use_cache: true
103 nllb-200-distilled-600m: 101 nllb-200-distilled-600m:
104 enabled: true 102 enabled: true
105 backend: "local_nllb" 103 backend: "local_nllb"
@@ -112,6 +110,7 @@ services: @@ -112,6 +110,7 @@ services:
112 max_new_tokens: 64 110 max_new_tokens: 64
113 num_beams: 1 111 num_beams: 1
114 attn_implementation: "sdpa" 112 attn_implementation: "sdpa"
  113 + use_cache: true
115 opus-mt-zh-en: 114 opus-mt-zh-en:
116 enabled: true 115 enabled: true
117 backend: "local_marian" 116 backend: "local_marian"
@@ -123,6 +122,7 @@ services: @@ -123,6 +122,7 @@ services:
123 max_input_length: 256 122 max_input_length: 256
124 max_new_tokens: 256 123 max_new_tokens: 256
125 num_beams: 1 124 num_beams: 1
  125 + use_cache: true
126 opus-mt-en-zh: 126 opus-mt-en-zh:
127 enabled: true 127 enabled: true
128 backend: "local_marian" 128 backend: "local_marian"
@@ -134,6 +134,7 @@ services: @@ -134,6 +134,7 @@ services:
134 max_input_length: 256 134 max_input_length: 256
135 max_new_tokens: 256 135 max_new_tokens: 256
136 num_beams: 1 136 num_beams: 1
  137 + use_cache: true
137 ``` 138 ```
138 139
139 配置边界: 140 配置边界:
@@ -247,16 +248,20 @@ TRANSLATION_PORT=6006 @@ -247,16 +248,20 @@ TRANSLATION_PORT=6006
247 248
248 ```json 249 ```json
249 { 250 {
250 - "status": "healthy", 251 + "status": "healthy",
251 "service": "translation", 252 "service": "translation",
252 "default_model": "llm", 253 "default_model": "llm",
253 "default_scene": "general", 254 "default_scene": "general",
254 "available_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"], 255 "available_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"],
255 "enabled_capabilities": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"], 256 "enabled_capabilities": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"],
256 - "loaded_models": ["llm"] 257 + "loaded_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"]
257 } 258 }
258 ``` 259 ```
259 260
  261 +说明:
  262 +- translator service 进程启动时会一次性初始化全部已启用 capability
  263 +- 因此本地模型加载失败、依赖缺失、配置错误会在启动阶段直接暴露,而不是拖到首个在线请求
  264 +
260 ## 7. 代码调用方式 265 ## 7. 代码调用方式
261 266
262 业务侧统一这样调用: 267 业务侧统一这样调用:
@@ -317,6 +322,7 @@ results = translator.translate( @@ -317,6 +322,7 @@ results = translator.translate(
317 - 通用大模型翻译 322 - 通用大模型翻译
318 - 根据 `scene` 生成内部 prompt 323 - 根据 `scene` 生成内部 prompt
319 - 更灵活,但成本和稳定性取决于上游模型 324 - 更灵活,但成本和稳定性取决于上游模型
  325 +- 支持 Redis 翻译缓存
320 326
321 ### 8.3 DeepL 327 ### 8.3 DeepL
322 328
@@ -327,6 +333,7 @@ results = translator.translate( @@ -327,6 +333,7 @@ results = translator.translate(
327 - 商业翻译 API 333 - 商业翻译 API
328 - scene 会映射到内部上下文 334 - scene 会映射到内部上下文
329 - 当前默认关闭 335 - 当前默认关闭
  336 +- 支持 Redis 翻译缓存
330 337
331 ### 8.4 `facebook/nllb-200-distilled-600M` 338 ### 8.4 `facebook/nllb-200-distilled-600M`
332 339
@@ -338,6 +345,7 @@ results = translator.translate( @@ -338,6 +345,7 @@ results = translator.translate(
338 - 简介:多语种翻译:覆盖约 200 种语言。作为NLLB-200系列的蒸馏版本,该模型通过知识蒸馏技术将原130亿参数模型压缩至600M,同时保持了80%以上的翻译质量。 345 - 简介:多语种翻译:覆盖约 200 种语言。作为NLLB-200系列的蒸馏版本,该模型通过知识蒸馏技术将原130亿参数模型压缩至600M,同时保持了80%以上的翻译质量。
339 - 本地目录:`models/translation/facebook/nllb-200-distilled-600M` 346 - 本地目录:`models/translation/facebook/nllb-200-distilled-600M`
340 - 当前磁盘占用:约 `2.4G` 347 - 当前磁盘占用:约 `2.4G`
  348 +- 支持 Redis 翻译缓存
341 - 模型类型:多语种 Seq2Seq 机器翻译模型 349 - 模型类型:多语种 Seq2Seq 机器翻译模型
342 - 来源:Meta NLLB(No Language Left Behind)系列的 600M 蒸馏版 350 - 来源:Meta NLLB(No Language Left Behind)系列的 600M 蒸馏版
343 - 结构特点: 351 - 结构特点:
@@ -424,6 +432,7 @@ results = translator.translate( @@ -424,6 +432,7 @@ results = translator.translate(
424 - encoder-decoder Seq2Seq 432 - encoder-decoder Seq2Seq
425 - 聚焦特定语言对 433 - 聚焦特定语言对
426 - 模型更小、加载更轻、吞吐更高 434 - 模型更小、加载更轻、吞吐更高
  435 +- 支持 Redis 翻译缓存
427 436
428 ### 8.6 `opus-mt-en-zh` 437 ### 8.6 `opus-mt-en-zh`
429 438
@@ -441,6 +450,13 @@ results = translator.translate( @@ -441,6 +450,13 @@ results = translator.translate(
441 - encoder-decoder Seq2Seq 450 - encoder-decoder Seq2Seq
442 - 双语定向模型 451 - 双语定向模型
443 - 更适合中英双向拆分部署 452 - 更适合中英双向拆分部署
  453 +- 支持 Redis 翻译缓存
  454 +
  455 +### 8.7 翻译缓存
  456 +
  457 +- 所有 translation capability 都使用统一的 Redis 缓存层
  458 +- 每个 capability 通过各自的 `use_cache` 控制是否启用缓存
  459 +- 缓存 key 格式固定为 `trans:{model}:{target_lang}:{source_text[:4]}{sha256}`
444 460
445 ## 9. 本地模型安装与部署 461 ## 9. 本地模型安装与部署
446 462
translation/backends/qwen_mt.py
1 -"""Qwen-MT translation backend with cache support.""" 1 +"""Qwen-MT translation backend."""
2 2
3 from __future__ import annotations 3 from __future__ import annotations
4 4
5 -import hashlib  
6 import logging 5 import logging
7 import os 6 import os
8 import re 7 import re
9 import time 8 import time
10 from typing import List, Optional, Sequence, Union 9 from typing import List, Optional, Sequence, Union
11 10
12 -import redis  
13 from openai import OpenAI 11 from openai import OpenAI
14 12
15 -from config.env_config import DASHSCOPE_API_KEY, REDIS_CONFIG  
16 -from config.services_config import get_translation_cache_config 13 +from config.env_config import DASHSCOPE_API_KEY
17 from translation.languages import QWEN_LANGUAGE_CODES 14 from translation.languages import QWEN_LANGUAGE_CODES
18 15
19 logger = logging.getLogger(__name__) 16 logger = logging.getLogger(__name__)
@@ -26,7 +23,6 @@ class QwenMTTranslationBackend: @@ -26,7 +23,6 @@ class QwenMTTranslationBackend:
26 model: str, 23 model: str,
27 base_url: str, 24 base_url: str,
28 api_key: Optional[str] = None, 25 api_key: Optional[str] = None,
29 - use_cache: bool = True,  
30 timeout: int = 10, 26 timeout: int = 10,
31 glossary_id: Optional[str] = None, 27 glossary_id: Optional[str] = None,
32 ): 28 ):
@@ -35,16 +31,8 @@ class QwenMTTranslationBackend: @@ -35,16 +31,8 @@ class QwenMTTranslationBackend:
35 self.qwen_model_name = self._normalize_model_name(model) 31 self.qwen_model_name = self._normalize_model_name(model)
36 self.base_url = base_url 32 self.base_url = base_url
37 self.timeout = int(timeout) 33 self.timeout = int(timeout)
38 - self.use_cache = bool(use_cache)  
39 self.glossary_id = glossary_id 34 self.glossary_id = glossary_id
40 35
41 - cache_cfg = get_translation_cache_config()  
42 - self.cache_prefix = str(cache_cfg["key_prefix"])  
43 - self.expire_seconds = int(cache_cfg["ttl_seconds"])  
44 - self.cache_sliding_expiration = bool(cache_cfg["sliding_expiration"])  
45 - self.cache_include_scene = bool(cache_cfg["key_include_scene"])  
46 - self.cache_include_source_lang = bool(cache_cfg["key_include_source_lang"])  
47 -  
48 self._api_key = api_key or self._default_api_key(self.model) 36 self._api_key = api_key or self._default_api_key(self.model)
49 self._qwen_client: Optional[OpenAI] = None 37 self._qwen_client: Optional[OpenAI] = None
50 if self._api_key: 38 if self._api_key:
@@ -55,10 +43,6 @@ class QwenMTTranslationBackend: @@ -55,10 +43,6 @@ class QwenMTTranslationBackend:
55 else: 43 else:
56 logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable") 44 logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable")
57 45
58 - self.redis_client = None  
59 - if self.use_cache and bool(cache_cfg["enabled"]):  
60 - self.redis_client = self._init_redis_client()  
61 -  
62 @property 46 @property
63 def supports_batch(self) -> bool: 47 def supports_batch(self) -> bool:
64 return True 48 return True
@@ -82,38 +66,6 @@ class QwenMTTranslationBackend: @@ -82,38 +66,6 @@ class QwenMTTranslationBackend:
82 del model 66 del model
83 return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY") 67 return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY")
84 68
85 - def _init_redis_client(self):  
86 - try:  
87 - client = redis.Redis(  
88 - host=REDIS_CONFIG.get("host", "localhost"),  
89 - port=REDIS_CONFIG.get("port", 6479),  
90 - password=REDIS_CONFIG.get("password"),  
91 - decode_responses=True,  
92 - socket_timeout=REDIS_CONFIG.get("socket_timeout", 1),  
93 - socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1),  
94 - retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False),  
95 - health_check_interval=10,  
96 - )  
97 - client.ping()  
98 - return client  
99 - except Exception as exc:  
100 - logger.warning("Failed to initialize translation redis cache: %s", exc)  
101 - return None  
102 -  
103 - def _build_cache_key(  
104 - self,  
105 - text: str,  
106 - target_lang: str,  
107 - source_lang: Optional[str],  
108 - scene: Optional[str],  
109 - ) -> str:  
110 - src = (source_lang or "auto").strip().lower() if self.cache_include_source_lang else "-"  
111 - tgt = (target_lang or "").strip().lower()  
112 - scn = (scene or "").strip() if self.cache_include_scene else ""  
113 - payload = f"model={self.model}\nsrc={src}\ntgt={tgt}\nscene={scn}\ntext={text}"  
114 - digest = hashlib.sha256(payload.encode("utf-8")).hexdigest()  
115 - return f"{self.cache_prefix}:{self.model}:{src}:{tgt}:{digest}"  
116 -  
117 def translate( 69 def translate(
118 self, 70 self,
119 text: Union[str, Sequence[str]], 71 text: Union[str, Sequence[str]],
@@ -146,14 +98,7 @@ class QwenMTTranslationBackend: @@ -146,14 +98,7 @@ class QwenMTTranslationBackend:
146 if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)): 98 if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)):
147 return text 99 return text
148 100
149 - cached = self._get_cached_translation_redis(text, tgt, src, scene)  
150 - if cached is not None:  
151 - return cached  
152 -  
153 result = self._translate_qwen(text, tgt, src) 101 result = self._translate_qwen(text, tgt, src)
154 -  
155 - if result is not None:  
156 - self._set_cached_translation_redis(text, tgt, result, src, scene)  
157 return result 102 return result
158 103
159 def _translate_qwen( 104 def _translate_qwen(
@@ -197,41 +142,6 @@ class QwenMTTranslationBackend: @@ -197,41 +142,6 @@ class QwenMTTranslationBackend:
197 ) 142 )
198 return None 143 return None
199 144
200 - def _get_cached_translation_redis(  
201 - self,  
202 - text: str,  
203 - target_lang: str,  
204 - source_lang: Optional[str] = None,  
205 - scene: Optional[str] = None,  
206 - ) -> Optional[str]:  
207 - if not self.redis_client:  
208 - return None  
209 - key = self._build_cache_key(text, target_lang, source_lang, scene)  
210 - try:  
211 - value = self.redis_client.get(key)  
212 - if value and self.cache_sliding_expiration:  
213 - self.redis_client.expire(key, self.expire_seconds)  
214 - return value  
215 - except Exception as exc:  
216 - logger.warning("Redis get translation cache failed: %s", exc)  
217 - return None  
218 -  
219 - def _set_cached_translation_redis(  
220 - self,  
221 - text: str,  
222 - target_lang: str,  
223 - translation: str,  
224 - source_lang: Optional[str] = None,  
225 - scene: Optional[str] = None,  
226 - ) -> None:  
227 - if not self.redis_client:  
228 - return  
229 - key = self._build_cache_key(text, target_lang, source_lang, scene)  
230 - try:  
231 - self.redis_client.setex(key, self.expire_seconds, translation)  
232 - except Exception as exc:  
233 - logger.warning("Redis set translation cache failed: %s", exc)  
234 -  
235 @staticmethod 145 @staticmethod
236 def _contains_chinese(text: str) -> bool: 146 def _contains_chinese(text: str) -> bool:
237 return bool(re.search(r"[\u4e00-\u9fff]", text or "")) 147 return bool(re.search(r"[\u4e00-\u9fff]", text or ""))
translation/cache.py 0 → 100644
@@ -0,0 +1,92 @@ @@ -0,0 +1,92 @@
  1 +"""Shared translation cache utilities."""
  2 +
  3 +from __future__ import annotations
  4 +
  5 +import hashlib
  6 +import logging
  7 +from typing import Mapping, Optional
  8 +
  9 +import redis
  10 +
  11 +from config.env_config import REDIS_CONFIG
  12 +
  13 +logger = logging.getLogger(__name__)
  14 +
  15 +
  16 +class TranslationCache:
  17 + """Redis-backed cache shared by all translation capabilities."""
  18 +
  19 + def __init__(self, config: Mapping[str, object]) -> None:
  20 + self.ttl_seconds = int(config["ttl_seconds"])
  21 + self.sliding_expiration = bool(config["sliding_expiration"])
  22 + self.redis_client = self._init_redis_client()
  23 +
  24 + @property
  25 + def available(self) -> bool:
  26 + return self.redis_client is not None
  27 +
  28 + def build_key(self, *, model: str, target_lang: str, source_text: str) -> str:
  29 + normalized_model = str(model or "").strip().lower()
  30 + normalized_target_lang = str(target_lang or "").strip().lower()
  31 + text = str(source_text or "")
  32 + text_prefix = text[:4]
  33 + digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
  34 + return f"trans:{normalized_model}:{normalized_target_lang}:{text_prefix}{digest}"
  35 +
  36 + def get(self, *, model: str, target_lang: str, source_text: str) -> Optional[str]:
  37 + if self.redis_client is None:
  38 + return None
  39 + key = self.build_key(model=model, target_lang=target_lang, source_text=source_text)
  40 + try:
  41 + value = self.redis_client.get(key)
  42 + logger.info(
  43 + "Translation cache %s | model=%s target_lang=%s text_len=%s key=%s",
  44 + "hit" if value is not None else "miss",
  45 + model,
  46 + target_lang,
  47 + len(str(source_text or "")),
  48 + key,
  49 + )
  50 + if value and self.sliding_expiration:
  51 + self.redis_client.expire(key, self.ttl_seconds)
  52 + return value
  53 + except Exception as exc:
  54 + logger.warning("Redis get translation cache failed: %s", exc)
  55 + return None
  56 +
  57 + def set(self, *, model: str, target_lang: str, source_text: str, translated_text: str) -> None:
  58 + if self.redis_client is None:
  59 + return
  60 + key = self.build_key(model=model, target_lang=target_lang, source_text=source_text)
  61 + try:
  62 + self.redis_client.setex(key, self.ttl_seconds, translated_text)
  63 + logger.info(
  64 + "Translation cache write | model=%s target_lang=%s text_len=%s result_len=%s ttl_seconds=%s key=%s",
  65 + model,
  66 + target_lang,
  67 + len(str(source_text or "")),
  68 + len(str(translated_text or "")),
  69 + self.ttl_seconds,
  70 + key,
  71 + )
  72 + except Exception as exc:
  73 + logger.warning("Redis set translation cache failed: %s", exc)
  74 +
  75 + @staticmethod
  76 + def _init_redis_client() -> Optional[redis.Redis]:
  77 + try:
  78 + client = redis.Redis(
  79 + host=REDIS_CONFIG.get("host", "localhost"),
  80 + port=REDIS_CONFIG.get("port", 6479),
  81 + password=REDIS_CONFIG.get("password"),
  82 + decode_responses=True,
  83 + socket_timeout=REDIS_CONFIG.get("socket_timeout", 1),
  84 + socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1),
  85 + retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False),
  86 + health_check_interval=10,
  87 + )
  88 + client.ping()
  89 + return client
  90 + except Exception as exc:
  91 + logger.warning("Failed to initialize translation redis cache: %s", exc)
  92 + return None
translation/service.py
@@ -3,10 +3,10 @@ @@ -3,10 +3,10 @@
3 from __future__ import annotations 3 from __future__ import annotations
4 4
5 import logging 5 import logging
6 -import threading  
7 from typing import Dict, List, Optional 6 from typing import Dict, List, Optional
8 7
9 from config.services_config import get_translation_config 8 from config.services_config import get_translation_config
  9 +from translation.cache import TranslationCache
10 from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol 10 from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol
11 from translation.settings import ( 11 from translation.settings import (
12 TranslationConfig, 12 TranslationConfig,
@@ -25,10 +25,10 @@ class TranslationService: @@ -25,10 +25,10 @@ class TranslationService:
25 def __init__(self, config: Optional[TranslationConfig] = None) -> None: 25 def __init__(self, config: Optional[TranslationConfig] = None) -> None:
26 self.config = config or get_translation_config() 26 self.config = config or get_translation_config()
27 self._enabled_capabilities = self._collect_enabled_capabilities() 27 self._enabled_capabilities = self._collect_enabled_capabilities()
28 - self._backends: Dict[str, TranslationBackendProtocol] = {}  
29 - self._backend_lock = threading.Lock()  
30 if not self._enabled_capabilities: 28 if not self._enabled_capabilities:
31 raise ValueError("No enabled translation backends found in services.translation.capabilities") 29 raise ValueError("No enabled translation backends found in services.translation.capabilities")
  30 + self._translation_cache = TranslationCache(self.config["cache"])
  31 + self._backends = self._initialize_backends()
32 32
33 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: 33 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
34 enabled: Dict[str, Dict[str, object]] = {} 34 enabled: Dict[str, Dict[str, object]] = {}
@@ -59,6 +59,25 @@ class TranslationService: @@ -59,6 +59,25 @@ class TranslationService:
59 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") 59 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
60 return factory(name=name, cfg=cfg) 60 return factory(name=name, cfg=cfg)
61 61
  62 + def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:
  63 + backends: Dict[str, TranslationBackendProtocol] = {}
  64 + for name, capability_cfg in self._enabled_capabilities.items():
  65 + backend_type = str(capability_cfg["backend"])
  66 + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
  67 + backends[name] = self._create_backend(
  68 + name=name,
  69 + backend_type=backend_type,
  70 + cfg=capability_cfg,
  71 + )
  72 + logger.info(
  73 + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  74 + name,
  75 + backend_type,
  76 + bool(capability_cfg.get("use_cache")),
  77 + getattr(backends[name], "model", name),
  78 + )
  79 + return backends
  80 +
62 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: 81 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
63 from translation.backends.qwen_mt import QwenMTTranslationBackend 82 from translation.backends.qwen_mt import QwenMTTranslationBackend
64 83
@@ -67,7 +86,6 @@ class TranslationService: @@ -67,7 +86,6 @@ class TranslationService:
67 model=str(cfg["model"]).strip(), 86 model=str(cfg["model"]).strip(),
68 base_url=str(cfg["base_url"]).strip(), 87 base_url=str(cfg["base_url"]).strip(),
69 api_key=cfg.get("api_key"), 88 api_key=cfg.get("api_key"),
70 - use_cache=bool(cfg["use_cache"]),  
71 timeout=int(cfg["timeout_sec"]), 89 timeout=int(cfg["timeout_sec"]),
72 glossary_id=cfg.get("glossary_id"), 90 glossary_id=cfg.get("glossary_id"),
73 ) 91 )
@@ -138,26 +156,12 @@ class TranslationService: @@ -138,26 +156,12 @@ class TranslationService:
138 156
139 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: 157 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
140 normalized = normalize_translation_model(self.config, model) 158 normalized = normalize_translation_model(self.config, model)
141 - capability_cfg = self._enabled_capabilities.get(normalized)  
142 - if capability_cfg is None: 159 + backend = self._backends.get(normalized)
  160 + if backend is None:
143 raise ValueError( 161 raise ValueError(
144 f"Translation model '{normalized}' is not enabled. " 162 f"Translation model '{normalized}' is not enabled. "
145 f"Available models: {', '.join(self.available_models) or 'none'}" 163 f"Available models: {', '.join(self.available_models) or 'none'}"
146 ) 164 )
147 - backend = self._backends.get(normalized)  
148 - if backend is not None:  
149 - return backend  
150 - with self._backend_lock:  
151 - backend = self._backends.get(normalized)  
152 - if backend is None:  
153 - backend_type = str(capability_cfg["backend"])  
154 - logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type)  
155 - backend = self._create_backend(  
156 - name=normalized,  
157 - backend_type=backend_type,  
158 - cfg=capability_cfg,  
159 - )  
160 - self._backends[normalized] = backend  
161 return backend 165 return backend
162 166
163 def translate( 167 def translate(
@@ -169,11 +173,176 @@ class TranslationService: @@ -169,11 +173,176 @@ class TranslationService:
169 model: Optional[str] = None, 173 model: Optional[str] = None,
170 scene: Optional[str] = None, 174 scene: Optional[str] = None,
171 ) -> TranslateOutput: 175 ) -> TranslateOutput:
172 - backend = self.get_backend(model) 176 + normalized_model = normalize_translation_model(self.config, model)
  177 + backend = self.get_backend(normalized_model)
173 active_scene = normalize_translation_scene(self.config, scene) 178 active_scene = normalize_translation_scene(self.config, scene)
174 - return backend.translate( 179 + capability_cfg = self._enabled_capabilities[normalized_model]
  180 + use_cache = bool(capability_cfg.get("use_cache"))
  181 + text_count = 1 if isinstance(text, str) else len(list(text))
  182 + logger.info(
  183 + "Translation route | model=%s backend=%s scene=%s target_lang=%s source_lang=%s count=%s use_cache=%s cache_available=%s",
  184 + normalized_model,
  185 + getattr(backend, "model", normalized_model),
  186 + active_scene,
  187 + target_lang,
  188 + source_lang or "auto",
  189 + text_count,
  190 + use_cache,
  191 + self._translation_cache.available,
  192 + )
  193 + if not use_cache or not self._translation_cache.available:
  194 + return backend.translate(
  195 + text=text,
  196 + target_lang=target_lang,
  197 + source_lang=source_lang,
  198 + scene=active_scene,
  199 + )
  200 +
  201 + if isinstance(text, str):
  202 + return self._translate_with_cache(
  203 + backend,
  204 + text=text,
  205 + target_lang=target_lang,
  206 + source_lang=source_lang,
  207 + scene=active_scene,
  208 + model=normalized_model,
  209 + )
  210 +
  211 + return self._translate_batch_with_cache(
175 text=text, 212 text=text,
176 target_lang=target_lang, 213 target_lang=target_lang,
177 source_lang=source_lang, 214 source_lang=source_lang,
  215 + backend=backend,
178 scene=active_scene, 216 scene=active_scene,
  217 + model=normalized_model,
  218 + )
  219 +
  220 + def _translate_with_cache(
  221 + self,
  222 + backend: TranslationBackendProtocol,
  223 + *,
  224 + text: str,
  225 + target_lang: str,
  226 + source_lang: Optional[str],
  227 + scene: str,
  228 + model: str,
  229 + ) -> Optional[str]:
  230 + if not text.strip():
  231 + return text
  232 + cached = self._translation_cache.get(model=model, target_lang=target_lang, source_text=text)
  233 + if cached is not None:
  234 + logger.info(
  235 + "Translation cache served | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s",
  236 + model,
  237 + scene,
  238 + target_lang,
  239 + source_lang or "auto",
  240 + len(text),
  241 + )
  242 + return cached
  243 + translated = backend.translate(
  244 + text=text,
  245 + target_lang=target_lang,
  246 + source_lang=source_lang,
  247 + scene=scene,
179 ) 248 )
  249 + if translated is not None:
  250 + self._translation_cache.set(
  251 + model=model,
  252 + target_lang=target_lang,
  253 + source_text=text,
  254 + translated_text=translated,
  255 + )
  256 + logger.info(
  257 + "Translation backend result cached | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s result_len=%s",
  258 + model,
  259 + scene,
  260 + target_lang,
  261 + source_lang or "auto",
  262 + len(text),
  263 + len(str(translated)),
  264 + )
  265 + else:
  266 + logger.warning(
  267 + "Translation backend returned empty result | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s",
  268 + model,
  269 + scene,
  270 + target_lang,
  271 + source_lang or "auto",
  272 + len(text),
  273 + )
  274 + return translated
  275 +
  276 + def _translate_batch_with_cache(
  277 + self,
  278 + *,
  279 + text: TranslateInput,
  280 + target_lang: str,
  281 + source_lang: Optional[str],
  282 + backend: TranslationBackendProtocol,
  283 + scene: str,
  284 + model: str,
  285 + ) -> List[Optional[str]]:
  286 + texts = list(text)
  287 + results: List[Optional[str]] = [None] * len(texts)
  288 + misses: List[str] = []
  289 + miss_indices: List[int] = []
  290 + cache_hits = 0
  291 +
  292 + for idx, item in enumerate(texts):
  293 + normalized_text = "" if item is None else str(item)
  294 + if not normalized_text.strip():
  295 + results[idx] = normalized_text
  296 + continue
  297 + cached = self._translation_cache.get(
  298 + model=model,
  299 + target_lang=target_lang,
  300 + source_text=normalized_text,
  301 + )
  302 + if cached is not None:
  303 + results[idx] = cached
  304 + cache_hits += 1
  305 + continue
  306 + misses.append(normalized_text)
  307 + miss_indices.append(idx)
  308 +
  309 + logger.info(
  310 + "Translation batch cache summary | model=%s scene=%s target_lang=%s source_lang=%s total=%s cache_hits=%s cache_misses=%s",
  311 + model,
  312 + scene,
  313 + target_lang,
  314 + source_lang or "auto",
  315 + len(texts),
  316 + cache_hits,
  317 + len(misses),
  318 + )
  319 +
  320 + if misses:
  321 + translated = backend.translate(
  322 + text=misses,
  323 + target_lang=target_lang,
  324 + source_lang=source_lang,
  325 + scene=scene,
  326 + )
  327 + translated_list = translated if isinstance(translated, list) else [translated]
  328 + for idx, original_text, translated_text in zip(miss_indices, misses, translated_list):
  329 + results[idx] = translated_text
  330 + if translated_text is not None:
  331 + self._translation_cache.set(
  332 + model=model,
  333 + target_lang=target_lang,
  334 + source_text=original_text,
  335 + translated_text=translated_text,
  336 + )
  337 + else:
  338 + logger.warning(
  339 + "Translation batch item returned empty result | model=%s scene=%s target_lang=%s source_lang=%s item_index=%s text_len=%s",
  340 + model,
  341 + scene,
  342 + target_lang,
  343 + source_lang or "auto",
  344 + idx,
  345 + len(original_text),
  346 + )
  347 +
  348 + return results
translation/settings.py
@@ -90,21 +90,11 @@ def _build_cache_config(raw_cache: Any) -&gt; Dict[str, Any]: @@ -90,21 +90,11 @@ def _build_cache_config(raw_cache: Any) -&gt; Dict[str, Any]:
90 if not isinstance(raw_cache, Mapping): 90 if not isinstance(raw_cache, Mapping):
91 raise ValueError("services.translation.cache must be a mapping") 91 raise ValueError("services.translation.cache must be a mapping")
92 return { 92 return {
93 - "enabled": _require_bool(raw_cache.get("enabled"), "services.translation.cache.enabled"),  
94 - "key_prefix": _require_string(raw_cache.get("key_prefix"), "services.translation.cache.key_prefix"),  
95 "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"), 93 "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"),
96 "sliding_expiration": _require_bool( 94 "sliding_expiration": _require_bool(
97 raw_cache.get("sliding_expiration"), 95 raw_cache.get("sliding_expiration"),
98 "services.translation.cache.sliding_expiration", 96 "services.translation.cache.sliding_expiration",
99 ), 97 ),
100 - "key_include_scene": _require_bool(  
101 - raw_cache.get("key_include_scene"),  
102 - "services.translation.cache.key_include_scene",  
103 - ),  
104 - "key_include_source_lang": _require_bool(  
105 - raw_cache.get("key_include_source_lang"),  
106 - "services.translation.cache.key_include_source_lang",  
107 - ),  
108 } 98 }
109 99
110 100
@@ -131,12 +121,12 @@ def _build_capabilities(raw_capabilities: Any) -&gt; Dict[str, Dict[str, Any]]: @@ -131,12 +121,12 @@ def _build_capabilities(raw_capabilities: Any) -&gt; Dict[str, Dict[str, Any]]:
131 def _validate_capability(name: str, capability: Mapping[str, Any]) -> None: 121 def _validate_capability(name: str, capability: Mapping[str, Any]) -> None:
132 prefix = f"services.translation.capabilities.{name}" 122 prefix = f"services.translation.capabilities.{name}"
133 backend = capability.get("backend") 123 backend = capability.get("backend")
  124 + _require_bool(capability.get("use_cache"), f"{prefix}.use_cache")
134 125
135 if backend == "qwen_mt": 126 if backend == "qwen_mt":
136 _require_string(capability.get("model"), f"{prefix}.model") 127 _require_string(capability.get("model"), f"{prefix}.model")
137 _require_http_url(capability.get("base_url"), f"{prefix}.base_url") 128 _require_http_url(capability.get("base_url"), f"{prefix}.base_url")
138 _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec") 129 _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec")
139 - _require_bool(capability.get("use_cache"), f"{prefix}.use_cache")  
140 return 130 return
141 131
142 if backend == "llm": 132 if backend == "llm":