Commit cd4ce66dc8c34567248091bc97356f0f00d32062
1 parent
c90f80ed
trans logs
Showing
14 changed files
with
657 additions
and
239 deletions
Show diff stats
api/translator_app.py
| @@ -2,8 +2,12 @@ | @@ -2,8 +2,12 @@ | ||
| 2 | 2 | ||
| 3 | import argparse | 3 | import argparse |
| 4 | import logging | 4 | import logging |
| 5 | +import os | ||
| 6 | +import pathlib | ||
| 7 | +import time | ||
| 5 | from contextlib import asynccontextmanager | 8 | from contextlib import asynccontextmanager |
| 6 | from functools import lru_cache | 9 | from functools import lru_cache |
| 10 | +from logging.handlers import TimedRotatingFileHandler | ||
| 7 | from typing import List, Optional, Union | 11 | from typing import List, Optional, Union |
| 8 | 12 | ||
| 9 | import uvicorn | 13 | import uvicorn |
| @@ -20,12 +24,57 @@ from translation.settings import ( | @@ -20,12 +24,57 @@ from translation.settings import ( | ||
| 20 | normalize_translation_scene, | 24 | normalize_translation_scene, |
| 21 | ) | 25 | ) |
| 22 | 26 | ||
| 23 | -# Configure logging | ||
| 24 | -logging.basicConfig( | ||
| 25 | - level=logging.INFO, | ||
| 26 | - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | ||
| 27 | -) | 27 | + |
| 28 | +def configure_translator_logging() -> None: | ||
| 29 | + log_dir = pathlib.Path("logs") | ||
| 30 | + verbose_dir = log_dir / "verbose" | ||
| 31 | + log_dir.mkdir(exist_ok=True) | ||
| 32 | + verbose_dir.mkdir(parents=True, exist_ok=True) | ||
| 33 | + | ||
| 34 | + log_level = os.getenv("LOG_LEVEL", "INFO").upper() | ||
| 35 | + numeric_level = getattr(logging, log_level, logging.INFO) | ||
| 36 | + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") | ||
| 37 | + | ||
| 38 | + root_logger = logging.getLogger() | ||
| 39 | + root_logger.setLevel(numeric_level) | ||
| 40 | + root_logger.handlers.clear() | ||
| 41 | + | ||
| 42 | + console_handler = logging.StreamHandler() | ||
| 43 | + console_handler.setLevel(numeric_level) | ||
| 44 | + console_handler.setFormatter(formatter) | ||
| 45 | + root_logger.addHandler(console_handler) | ||
| 46 | + | ||
| 47 | + file_handler = TimedRotatingFileHandler( | ||
| 48 | + filename=log_dir / "translator_api.log", | ||
| 49 | + when="midnight", | ||
| 50 | + interval=1, | ||
| 51 | + backupCount=30, | ||
| 52 | + encoding="utf-8", | ||
| 53 | + ) | ||
| 54 | + file_handler.setLevel(numeric_level) | ||
| 55 | + file_handler.setFormatter(formatter) | ||
| 56 | + root_logger.addHandler(file_handler) | ||
| 57 | + | ||
| 58 | + verbose_logger = logging.getLogger("translator.verbose") | ||
| 59 | + verbose_logger.setLevel(numeric_level) | ||
| 60 | + verbose_logger.handlers.clear() | ||
| 61 | + verbose_logger.propagate = False | ||
| 62 | + | ||
| 63 | + verbose_handler = TimedRotatingFileHandler( | ||
| 64 | + filename=verbose_dir / "translator_verbose.log", | ||
| 65 | + when="midnight", | ||
| 66 | + interval=1, | ||
| 67 | + backupCount=30, | ||
| 68 | + encoding="utf-8", | ||
| 69 | + ) | ||
| 70 | + verbose_handler.setLevel(numeric_level) | ||
| 71 | + verbose_handler.setFormatter(formatter) | ||
| 72 | + verbose_logger.addHandler(verbose_handler) | ||
| 73 | + | ||
| 74 | + | ||
| 75 | +configure_translator_logging() | ||
| 28 | logger = logging.getLogger(__name__) | 76 | logger = logging.getLogger(__name__) |
| 77 | +verbose_logger = logging.getLogger("translator.verbose") | ||
| 29 | 78 | ||
| 30 | 79 | ||
| 31 | @lru_cache(maxsize=1) | 80 | @lru_cache(maxsize=1) |
| @@ -98,6 +147,37 @@ def _normalize_batch_result( | @@ -98,6 +147,37 @@ def _normalize_batch_result( | ||
| 98 | return [translated[idx] if idx < len(translated) else None for idx, _ in enumerate(original)] | 147 | return [translated[idx] if idx < len(translated) else None for idx, _ in enumerate(original)] |
| 99 | 148 | ||
| 100 | 149 | ||
| 150 | +def _text_preview(text: Optional[str], limit: int = 20) -> str: | ||
| 151 | + normalized = str(text or "").replace("\n", "\\n") | ||
| 152 | + return normalized[:limit] | ||
| 153 | + | ||
| 154 | + | ||
| 155 | +def _request_metrics(text: Union[str, List[str]]) -> dict: | ||
| 156 | + if isinstance(text, list): | ||
| 157 | + lengths = [len(str(item or "")) for item in text] | ||
| 158 | + return { | ||
| 159 | + "request_count": len(text), | ||
| 160 | + "lengths": lengths, | ||
| 161 | + "first_preview": _text_preview(text[0] if text else ""), | ||
| 162 | + } | ||
| 163 | + return { | ||
| 164 | + "request_count": 1, | ||
| 165 | + "lengths": [len(str(text or ""))], | ||
| 166 | + "first_preview": _text_preview(str(text or "")), | ||
| 167 | + } | ||
| 168 | + | ||
| 169 | + | ||
| 170 | +def _result_preview(translated: Union[str, List[Optional[str]], None]) -> str: | ||
| 171 | + if isinstance(translated, list): | ||
| 172 | + if not translated: | ||
| 173 | + return "" | ||
| 174 | + first = translated[0] | ||
| 175 | + return _text_preview("" if first is None else str(first)) | ||
| 176 | + if translated is None: | ||
| 177 | + return "" | ||
| 178 | + return _text_preview(str(translated)) | ||
| 179 | + | ||
| 180 | + | ||
| 101 | def _translate_batch( | 181 | def _translate_batch( |
| 102 | service: TranslationService, | 182 | service: TranslationService, |
| 103 | raw_text: List[str], | 183 | raw_text: List[str], |
| @@ -108,6 +188,17 @@ def _translate_batch( | @@ -108,6 +188,17 @@ def _translate_batch( | ||
| 108 | scene: str, | 188 | scene: str, |
| 109 | ) -> List[Optional[str]]: | 189 | ) -> List[Optional[str]]: |
| 110 | backend = service.get_backend(model) | 190 | backend = service.get_backend(model) |
| 191 | + logger.info( | ||
| 192 | + "Translation batch dispatch | model=%s scene=%s target_lang=%s source_lang=%s count=%s lengths=%s first_preview=%s supports_batch=%s", | ||
| 193 | + model, | ||
| 194 | + scene, | ||
| 195 | + target_lang, | ||
| 196 | + source_lang or "auto", | ||
| 197 | + len(raw_text), | ||
| 198 | + [len(str(item or "")) for item in raw_text], | ||
| 199 | + _text_preview(raw_text[0] if raw_text else ""), | ||
| 200 | + bool(getattr(backend, "supports_batch", False)), | ||
| 201 | + ) | ||
| 111 | if getattr(backend, "supports_batch", False): | 202 | if getattr(backend, "supports_batch", False): |
| 112 | try: | 203 | try: |
| 113 | translated = service.translate( | 204 | translated = service.translate( |
| @@ -117,6 +208,13 @@ def _translate_batch( | @@ -117,6 +208,13 @@ def _translate_batch( | ||
| 117 | model=model, | 208 | model=model, |
| 118 | scene=scene, | 209 | scene=scene, |
| 119 | ) | 210 | ) |
| 211 | + verbose_logger.info( | ||
| 212 | + "Translation batch result | model=%s scene=%s count=%s first_result=%s", | ||
| 213 | + model, | ||
| 214 | + scene, | ||
| 215 | + len(raw_text), | ||
| 216 | + _result_preview(translated), | ||
| 217 | + ) | ||
| 120 | return _normalize_batch_result(raw_text, translated) | 218 | return _normalize_batch_result(raw_text, translated) |
| 121 | except ValueError: | 219 | except ValueError: |
| 122 | raise | 220 | raise |
| @@ -139,7 +237,17 @@ def _translate_batch( | @@ -139,7 +237,17 @@ def _translate_batch( | ||
| 139 | except ValueError: | 237 | except ValueError: |
| 140 | raise | 238 | raise |
| 141 | except Exception as exc: | 239 | except Exception as exc: |
| 142 | - logger.warning("Per-item translation failed: %s", exc, exc_info=True) | 240 | + logger.warning( |
| 241 | + "Per-item translation failed | model=%s scene=%s target_lang=%s source_lang=%s item_len=%s item_preview=%s error=%s", | ||
| 242 | + model, | ||
| 243 | + scene, | ||
| 244 | + target_lang, | ||
| 245 | + source_lang or "auto", | ||
| 246 | + len(str(item or "")), | ||
| 247 | + _text_preview(str(item or "")), | ||
| 248 | + exc, | ||
| 249 | + exc_info=True, | ||
| 250 | + ) | ||
| 143 | out = None | 251 | out = None |
| 144 | results.append(out) | 252 | results.append(out) |
| 145 | return results | 253 | return results |
| @@ -147,19 +255,25 @@ def _translate_batch( | @@ -147,19 +255,25 @@ def _translate_batch( | ||
| 147 | 255 | ||
| 148 | @asynccontextmanager | 256 | @asynccontextmanager |
| 149 | async def lifespan(_: FastAPI): | 257 | async def lifespan(_: FastAPI): |
| 150 | - """Warm the default backend on process startup.""" | 258 | + """Initialize all enabled translation backends on process startup.""" |
| 151 | logger.info("Starting Translation Service API") | 259 | logger.info("Starting Translation Service API") |
| 152 | service = get_translation_service() | 260 | service = get_translation_service() |
| 153 | - default_backend = service.get_backend(service.config["default_model"]) | ||
| 154 | logger.info( | 261 | logger.info( |
| 155 | - "Translation service ready | default_model=%s available_models=%s loaded_models=%s", | 262 | + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", |
| 156 | service.config["default_model"], | 263 | service.config["default_model"], |
| 264 | + service.config["default_scene"], | ||
| 157 | service.available_models, | 265 | service.available_models, |
| 158 | service.loaded_models, | 266 | service.loaded_models, |
| 159 | ) | 267 | ) |
| 160 | logger.info( | 268 | logger.info( |
| 161 | - "Default translation backend warmed up | model=%s", | ||
| 162 | - getattr(default_backend, "model", service.config["default_model"]), | 269 | + "Translation backends initialized on startup | models=%s", |
| 270 | + service.loaded_models, | ||
| 271 | + ) | ||
| 272 | + verbose_logger.info( | ||
| 273 | + "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", | ||
| 274 | + service.available_models, | ||
| 275 | + service.config["cache"]["ttl_seconds"], | ||
| 276 | + service.config["cache"]["sliding_expiration"], | ||
| 163 | ) | 277 | ) |
| 164 | yield | 278 | yield |
| 165 | 279 | ||
| @@ -189,6 +303,12 @@ async def health_check(): | @@ -189,6 +303,12 @@ async def health_check(): | ||
| 189 | """Health check endpoint.""" | 303 | """Health check endpoint.""" |
| 190 | try: | 304 | try: |
| 191 | service = get_translation_service() | 305 | service = get_translation_service() |
| 306 | + logger.info( | ||
| 307 | + "Health check | default_model=%s default_scene=%s loaded_models=%s", | ||
| 308 | + service.config["default_model"], | ||
| 309 | + service.config["default_scene"], | ||
| 310 | + service.loaded_models, | ||
| 311 | + ) | ||
| 192 | return { | 312 | return { |
| 193 | "status": "healthy", | 313 | "status": "healthy", |
| 194 | "service": "translation", | 314 | "service": "translation", |
| @@ -216,12 +336,33 @@ async def translate(request: TranslationRequest): | @@ -216,12 +336,33 @@ async def translate(request: TranslationRequest): | ||
| 216 | if not request.target_lang: | 336 | if not request.target_lang: |
| 217 | raise HTTPException(status_code=400, detail="target_lang is required") | 337 | raise HTTPException(status_code=400, detail="target_lang is required") |
| 218 | 338 | ||
| 339 | + request_started = time.perf_counter() | ||
| 219 | try: | 340 | try: |
| 220 | service = get_translation_service() | 341 | service = get_translation_service() |
| 221 | scene = _normalize_scene(service, request.scene) | 342 | scene = _normalize_scene(service, request.scene) |
| 222 | model = _normalize_model(service, request.model) | 343 | model = _normalize_model(service, request.model) |
| 223 | translator = service.get_backend(model) | 344 | translator = service.get_backend(model) |
| 224 | raw_text = request.text | 345 | raw_text = request.text |
| 346 | + metrics = _request_metrics(raw_text) | ||
| 347 | + logger.info( | ||
| 348 | + "Translation request | model=%s scene=%s target_lang=%s source_lang=%s count=%s lengths=%s first_preview=%s backend=%s", | ||
| 349 | + model, | ||
| 350 | + scene, | ||
| 351 | + request.target_lang, | ||
| 352 | + request.source_lang or "auto", | ||
| 353 | + metrics["request_count"], | ||
| 354 | + metrics["lengths"], | ||
| 355 | + metrics["first_preview"], | ||
| 356 | + getattr(translator, "model", model), | ||
| 357 | + ) | ||
| 358 | + verbose_logger.info( | ||
| 359 | + "Translation request detail | model=%s scene=%s target_lang=%s source_lang=%s payload=%s", | ||
| 360 | + model, | ||
| 361 | + scene, | ||
| 362 | + request.target_lang, | ||
| 363 | + request.source_lang or "auto", | ||
| 364 | + raw_text, | ||
| 365 | + ) | ||
| 225 | 366 | ||
| 226 | if isinstance(raw_text, list): | 367 | if isinstance(raw_text, list): |
| 227 | results = _translate_batch( | 368 | results = _translate_batch( |
| @@ -232,6 +373,22 @@ async def translate(request: TranslationRequest): | @@ -232,6 +373,22 @@ async def translate(request: TranslationRequest): | ||
| 232 | model=model, | 373 | model=model, |
| 233 | scene=scene, | 374 | scene=scene, |
| 234 | ) | 375 | ) |
| 376 | + latency_ms = (time.perf_counter() - request_started) * 1000 | ||
| 377 | + logger.info( | ||
| 378 | + "Translation response | model=%s scene=%s count=%s first_result=%s latency_ms=%.2f", | ||
| 379 | + model, | ||
| 380 | + scene, | ||
| 381 | + len(raw_text), | ||
| 382 | + _result_preview(results), | ||
| 383 | + latency_ms, | ||
| 384 | + ) | ||
| 385 | + verbose_logger.info( | ||
| 386 | + "Translation response detail | model=%s scene=%s translated=%s latency_ms=%.2f", | ||
| 387 | + model, | ||
| 388 | + scene, | ||
| 389 | + results, | ||
| 390 | + latency_ms, | ||
| 391 | + ) | ||
| 235 | return TranslationResponse( | 392 | return TranslationResponse( |
| 236 | text=raw_text, | 393 | text=raw_text, |
| 237 | target_lang=request.target_lang, | 394 | target_lang=request.target_lang, |
| @@ -253,6 +410,22 @@ async def translate(request: TranslationRequest): | @@ -253,6 +410,22 @@ async def translate(request: TranslationRequest): | ||
| 253 | if translated_text is None: | 410 | if translated_text is None: |
| 254 | raise HTTPException(status_code=500, detail="Translation failed") | 411 | raise HTTPException(status_code=500, detail="Translation failed") |
| 255 | 412 | ||
| 413 | + latency_ms = (time.perf_counter() - request_started) * 1000 | ||
| 414 | + logger.info( | ||
| 415 | + "Translation response | model=%s scene=%s count=1 first_result=%s latency_ms=%.2f", | ||
| 416 | + model, | ||
| 417 | + scene, | ||
| 418 | + _result_preview(translated_text), | ||
| 419 | + latency_ms, | ||
| 420 | + ) | ||
| 421 | + verbose_logger.info( | ||
| 422 | + "Translation response detail | model=%s scene=%s translated=%s latency_ms=%.2f", | ||
| 423 | + model, | ||
| 424 | + scene, | ||
| 425 | + translated_text, | ||
| 426 | + latency_ms, | ||
| 427 | + ) | ||
| 428 | + | ||
| 256 | return TranslationResponse( | 429 | return TranslationResponse( |
| 257 | text=raw_text, | 430 | text=raw_text, |
| 258 | target_lang=request.target_lang, | 431 | target_lang=request.target_lang, |
| @@ -263,12 +436,22 @@ async def translate(request: TranslationRequest): | @@ -263,12 +436,22 @@ async def translate(request: TranslationRequest): | ||
| 263 | scene=scene, | 436 | scene=scene, |
| 264 | ) | 437 | ) |
| 265 | 438 | ||
| 266 | - except HTTPException: | 439 | + except HTTPException as exc: |
| 440 | + latency_ms = (time.perf_counter() - request_started) * 1000 | ||
| 441 | + logger.warning( | ||
| 442 | + "Translation request failed | status_code=%s detail=%s latency_ms=%.2f", | ||
| 443 | + exc.status_code, | ||
| 444 | + exc.detail, | ||
| 445 | + latency_ms, | ||
| 446 | + ) | ||
| 267 | raise | 447 | raise |
| 268 | except ValueError as e: | 448 | except ValueError as e: |
| 449 | + latency_ms = (time.perf_counter() - request_started) * 1000 | ||
| 450 | + logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) | ||
| 269 | raise HTTPException(status_code=400, detail=str(e)) from e | 451 | raise HTTPException(status_code=400, detail=str(e)) from e |
| 270 | except Exception as e: | 452 | except Exception as e: |
| 271 | - logger.error(f"Translation error: {e}", exc_info=True) | 453 | + latency_ms = (time.perf_counter() - request_started) * 1000 |
| 454 | + logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) | ||
| 272 | raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}") | 455 | raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}") |
| 273 | 456 | ||
| 274 | 457 |
config/config.yaml
| @@ -106,12 +106,8 @@ services: | @@ -106,12 +106,8 @@ services: | ||
| 106 | default_scene: "general" | 106 | default_scene: "general" |
| 107 | timeout_sec: 10.0 | 107 | timeout_sec: 10.0 |
| 108 | cache: | 108 | cache: |
| 109 | - enabled: true | ||
| 110 | - key_prefix: "trans:v2" | ||
| 111 | ttl_seconds: 62208000 | 109 | ttl_seconds: 62208000 |
| 112 | sliding_expiration: true | 110 | sliding_expiration: true |
| 113 | - key_include_scene: true | ||
| 114 | - key_include_source_lang: true | ||
| 115 | capabilities: | 111 | capabilities: |
| 116 | qwen-mt: | 112 | qwen-mt: |
| 117 | enabled: true | 113 | enabled: true |
| @@ -126,12 +122,14 @@ services: | @@ -126,12 +122,14 @@ services: | ||
| 126 | model: "qwen-flash" | 122 | model: "qwen-flash" |
| 127 | base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" | 123 | base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" |
| 128 | timeout_sec: 30.0 | 124 | timeout_sec: 30.0 |
| 125 | + use_cache: true | ||
| 129 | deepl: | 126 | deepl: |
| 130 | - enabled: false | 127 | + enabled: true |
| 131 | backend: "deepl" | 128 | backend: "deepl" |
| 132 | api_url: "https://api.deepl.com/v2/translate" | 129 | api_url: "https://api.deepl.com/v2/translate" |
| 133 | timeout_sec: 10.0 | 130 | timeout_sec: 10.0 |
| 134 | glossary_id: "" | 131 | glossary_id: "" |
| 132 | + use_cache: true | ||
| 135 | nllb-200-distilled-600m: | 133 | nllb-200-distilled-600m: |
| 136 | enabled: true | 134 | enabled: true |
| 137 | backend: "local_nllb" | 135 | backend: "local_nllb" |
| @@ -144,6 +142,7 @@ services: | @@ -144,6 +142,7 @@ services: | ||
| 144 | max_new_tokens: 64 | 142 | max_new_tokens: 64 |
| 145 | num_beams: 1 | 143 | num_beams: 1 |
| 146 | attn_implementation: "sdpa" | 144 | attn_implementation: "sdpa" |
| 145 | + use_cache: true | ||
| 147 | opus-mt-zh-en: | 146 | opus-mt-zh-en: |
| 148 | enabled: true | 147 | enabled: true |
| 149 | backend: "local_marian" | 148 | backend: "local_marian" |
| @@ -155,6 +154,7 @@ services: | @@ -155,6 +154,7 @@ services: | ||
| 155 | max_input_length: 256 | 154 | max_input_length: 256 |
| 156 | max_new_tokens: 256 | 155 | max_new_tokens: 256 |
| 157 | num_beams: 1 | 156 | num_beams: 1 |
| 157 | + use_cache: true | ||
| 158 | opus-mt-en-zh: | 158 | opus-mt-en-zh: |
| 159 | enabled: true | 159 | enabled: true |
| 160 | backend: "local_marian" | 160 | backend: "local_marian" |
| @@ -166,6 +166,7 @@ services: | @@ -166,6 +166,7 @@ services: | ||
| 166 | max_input_length: 256 | 166 | max_input_length: 256 |
| 167 | max_new_tokens: 256 | 167 | max_new_tokens: 256 |
| 168 | num_beams: 1 | 168 | num_beams: 1 |
| 169 | + use_cache: true | ||
| 169 | embedding: | 170 | embedding: |
| 170 | provider: "http" # http | 171 | provider: "http" # http |
| 171 | base_url: "http://127.0.0.1:6005" | 172 | base_url: "http://127.0.0.1:6005" |
config/env_config.py
| @@ -42,8 +42,6 @@ REDIS_CONFIG = { | @@ -42,8 +42,6 @@ REDIS_CONFIG = { | ||
| 42 | 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)), | 42 | 'socket_connect_timeout': int(os.getenv('REDIS_SOCKET_CONNECT_TIMEOUT', 1)), |
| 43 | 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true', | 43 | 'retry_on_timeout': os.getenv('REDIS_RETRY_ON_TIMEOUT', 'False').lower() == 'true', |
| 44 | 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months | 44 | 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months |
| 45 | - 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)), | ||
| 46 | - 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'), | ||
| 47 | # Embedding 缓存 key 前缀,例如 "embedding" | 45 | # Embedding 缓存 key 前缀,例如 "embedding" |
| 48 | 'embedding_cache_prefix': os.getenv('REDIS_EMBEDDING_CACHE_PREFIX', 'embedding'), | 46 | 'embedding_cache_prefix': os.getenv('REDIS_EMBEDDING_CACHE_PREFIX', 'embedding'), |
| 49 | } | 47 | } |
docs/工作总结-微服务性能优化与架构.md
| @@ -88,7 +88,7 @@ instruction: "Given a shopping query, rank product titles by relevance" | @@ -88,7 +88,7 @@ instruction: "Given a shopping query, rank product titles by relevance" | ||
| 88 | - **配置入口**:`config/config.yaml` → `services.translation`,显式声明 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend`、`base_url/api_url`、timeout 与本地模型运行参数。 | 88 | - **配置入口**:`config/config.yaml` → `services.translation`,显式声明 `service_url`、`default_model`、`default_scene`、各 capability 的 `backend`、`base_url/api_url`、timeout 与本地模型运行参数。 |
| 89 | - **内部规则收口**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一放在 `translation/` 内部,不再散落在 `config/`、`query/` 等位置。 | 89 | - **内部规则收口**:scene 集合、语言码映射、LLM prompt 模板、本地模型方向约束统一放在 `translation/` 内部,不再散落在 `config/`、`query/` 等位置。 |
| 90 | - **调用位置**:QueryParser 与 Indexer 均通过 `translation.create_translation_client()` 获取客户端,不写死 URL 或模型名。 | 90 | - **调用位置**:QueryParser 与 Indexer 均通过 `translation.create_translation_client()` 获取客户端,不写死 URL 或模型名。 |
| 91 | -- **缓存**:`services.translation.cache` 支持 `key_prefix: "trans:v2"`、`ttl_seconds`、`sliding_expiration` 等,翻译结果写 Redis,减轻重复请求对限速的影响。 | 91 | +- **缓存**:translator service 对所有 translation capability 统一接入 Redis 缓存;每个 capability 通过 `use_cache` 控制开关,key 格式固定为 `trans:{model}:{target_lang}:{source_text[:4]}{sha256}`。 |
| 92 | - **场景支撑**:在线索引(indexer)与 query 请求(QueryParser)共用同一套 provider 配置;可按环境或租户通过修改 `config.yaml` 或环境变量切换 provider/model。 | 92 | - **场景支撑**:在线索引(indexer)与 query 请求(QueryParser)共用同一套 provider 配置;可按环境或租户通过修改 `config.yaml` 或环境变量切换 provider/model。 |
| 93 | - **待配合**:**金伟侧对索引侧翻译调用做流量控制**(限流/排队/批量聚合),避免索引高峰打满 qwen 限速,影响在线 query 翻译。 | 93 | - **待配合**:**金伟侧对索引侧翻译调用做流量控制**(限流/排队/批量聚合),避免索引高峰打满 qwen 限速,影响在线 query 翻译。 |
| 94 | 94 |
docs/缓存与Redis使用说明.md
| @@ -12,7 +12,6 @@ | @@ -12,7 +12,6 @@ | ||
| 12 | - **Password**:`REDIS_PASSWORD` | 12 | - **Password**:`REDIS_PASSWORD` |
| 13 | - **Socket & 超时**:`REDIS_SOCKET_TIMEOUT` / `REDIS_SOCKET_CONNECT_TIMEOUT` / `REDIS_RETRY_ON_TIMEOUT` | 13 | - **Socket & 超时**:`REDIS_SOCKET_TIMEOUT` / `REDIS_SOCKET_CONNECT_TIMEOUT` / `REDIS_RETRY_ON_TIMEOUT` |
| 14 | - **通用缓存 TTL**:`REDIS_CACHE_EXPIRE_DAYS`(默认 `360*2` 天,代码注释为 “6 months”) | 14 | - **通用缓存 TTL**:`REDIS_CACHE_EXPIRE_DAYS`(默认 `360*2` 天,代码注释为 “6 months”) |
| 15 | -- **翻译缓存 TTL & 前缀**:`REDIS_TRANSLATION_CACHE_EXPIRE_DAYS`、`REDIS_TRANSLATION_CACHE_PREFIX` | ||
| 16 | 15 | ||
| 17 | --- | 16 | --- |
| 18 | 17 | ||
| @@ -21,7 +20,7 @@ | @@ -21,7 +20,7 @@ | ||
| 21 | | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 | | 20 | | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 | |
| 22 | |------------|----------|----------------|----------|------| | 21 | |------------|----------|----------------|----------|------| |
| 23 | | 向量缓存(text/image embedding) | `{EMBEDDING_CACHE_PREFIX}:{query_or_url}` / `{EMBEDDING_CACHE_PREFIX}:image:{url_or_path}` | **BF16 bytes**(每维 2 字节大端存储),读取后恢复为 `np.float32` | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`(文本)与 `embeddings/image_encoder.py`(图片);前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 | | 22 | | 向量缓存(text/image embedding) | `{EMBEDDING_CACHE_PREFIX}:{query_or_url}` / `{EMBEDDING_CACHE_PREFIX}:image:{url_or_path}` | **BF16 bytes**(每维 2 字节大端存储),读取后恢复为 `np.float32` | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`(文本)与 `embeddings/image_encoder.py`(图片);前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 | |
| 24 | -| 翻译结果缓存(Qwen-MT 翻译) | `{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/backends/qwen_mt.py` + `config/config.yaml` | | 23 | +| 翻译结果缓存(translator service) | `trans:{model}:{target_lang}:{source_text[:4]}{sha256(source_text)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `translation/service.py` + `config/config.yaml` | |
| 25 | | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` | | 24 | | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` | |
| 26 | 25 | ||
| 27 | 下面按模块详细说明。 | 26 | 下面按模块详细说明。 |
| @@ -71,34 +70,29 @@ | @@ -71,34 +70,29 @@ | ||
| 71 | 70 | ||
| 72 | --- | 71 | --- |
| 73 | 72 | ||
| 74 | -## 3. 翻译结果缓存(translation/backends/qwen_mt.py) | 73 | +## 3. 翻译结果缓存(translation/service.py) |
| 75 | 74 | ||
| 76 | -- **代码位置**:`translation/backends/qwen_mt.py` 中 `QwenMTTranslationBackend` | ||
| 77 | -- **用途**:缓存 Qwen-MT 翻译(及 translator service 复用的翻译)结果,减少云端请求,遵守限速。 | ||
| 78 | -- **配置入口**:`config/config.yaml -> services.translation.cache`,统一由 `config/services_config.get_translation_cache_config()` 解析。 | 75 | +- **代码位置**:`translation/service.py` |
| 76 | +- **用途**:统一缓存所有 translation capability 的翻译结果。 | ||
| 77 | +- **配置入口**: | ||
| 78 | + - `config/config.yaml -> services.translation.cache` | ||
| 79 | + - `config/config.yaml -> services.translation.capabilities.*.use_cache` | ||
| 79 | 80 | ||
| 80 | ### 3.1 Key 设计 | 81 | ### 3.1 Key 设计 |
| 81 | 82 | ||
| 82 | -- 内部构造函数:`_build_cache_key(...)` | 83 | +- 内部构造函数:`TranslationCache.build_key(...)` |
| 83 | - 模板: | 84 | - 模板: |
| 84 | 85 | ||
| 85 | ```text | 86 | ```text |
| 86 | -{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)} | 87 | +trans:{model}:{target_lang}:{source_text[:4]}{sha256(source_text)} |
| 87 | ``` | 88 | ``` |
| 88 | 89 | ||
| 89 | 其中: | 90 | 其中: |
| 90 | 91 | ||
| 91 | -- `cache_prefix`:来自 `services.translation.cache.key_prefix`,默认 `trans:v2`; | ||
| 92 | -- `model`:如 `"qwen-mt"`; | ||
| 93 | -- `src`:源语言(如 `zh` / `en` / `auto`),是否包含在 key 中由 `key_include_source_lang` 控制; | ||
| 94 | -- `tgt`:目标语言,如 `en` / `zh`; | ||
| 95 | -- `sha256(payload)`:对以下内容整体做 SHA-256: | ||
| 96 | - - `model` | ||
| 97 | - - `src` / `tgt` | ||
| 98 | - - `scene`(受 `key_include_scene` 控制) | ||
| 99 | - - 原始 `text` | ||
| 100 | - | ||
| 101 | -> 注意:所有 key 设计集中在 `_build_cache_key`,**不要在其他位置手动拼翻译缓存 key**。 | 92 | +- `model`:capability 名称,如 `qwen-mt`、`llm`、`opus-mt-zh-en` |
| 93 | +- `target_lang`:目标语言,如 `en` / `zh` | ||
| 94 | +- `source_text[:4]`:原文前 4 个字符 | ||
| 95 | +- `sha256(source_text)`:对完整原文做 SHA-256 | ||
| 102 | 96 | ||
| 103 | ### 3.2 Value 与类型 | 97 | ### 3.2 Value 与类型 |
| 104 | 98 | ||
| @@ -115,20 +109,25 @@ | @@ -115,20 +109,25 @@ | ||
| 115 | services: | 109 | services: |
| 116 | translation: | 110 | translation: |
| 117 | cache: | 111 | cache: |
| 118 | - enabled: true | ||
| 119 | - key_prefix: "trans:v2" | ||
| 120 | ttl_seconds: 62208000 # 默认约 720 天 | 112 | ttl_seconds: 62208000 # 默认约 720 天 |
| 121 | sliding_expiration: true | 113 | sliding_expiration: true |
| 122 | - key_include_scene: true | ||
| 123 | - key_include_source_lang: true | 114 | + capabilities: |
| 115 | + qwen-mt: | ||
| 116 | + use_cache: true | ||
| 117 | + llm: | ||
| 118 | + use_cache: true | ||
| 119 | + deepl: | ||
| 120 | + use_cache: true | ||
| 121 | + nllb-200-distilled-600m: | ||
| 122 | + use_cache: true | ||
| 123 | + opus-mt-zh-en: | ||
| 124 | + use_cache: true | ||
| 125 | + opus-mt-en-zh: | ||
| 126 | + use_cache: true | ||
| 124 | ``` | 127 | ``` |
| 125 | 128 | ||
| 126 | - 运行时行为: | 129 | - 运行时行为: |
| 127 | - - 创建 `Translator` 时,从 `cache_cfg` 读取: | ||
| 128 | - - `self.cache_prefix` | ||
| 129 | - - `self.expire_seconds` | ||
| 130 | - - `self.cache_sliding_expiration` | ||
| 131 | - - `self.cache_include_*` 一系列布尔开关; | 130 | + - translator service 启动时初始化共享 Redis cache; |
| 132 | - **读缓存**: | 131 | - **读缓存**: |
| 133 | - 命中后,若 `sliding_expiration=True`,会调用 `redis.expire(key, expire_seconds)`; | 132 | - 命中后,若 `sliding_expiration=True`,会调用 `redis.expire(key, expire_seconds)`; |
| 134 | - **写缓存**: | 133 | - **写缓存**: |
| @@ -136,8 +135,8 @@ services: | @@ -136,8 +135,8 @@ services: | ||
| 136 | 135 | ||
| 137 | ### 3.4 关联模块 | 136 | ### 3.4 关联模块 |
| 138 | 137 | ||
| 139 | -- `api/translator_app.py` 会通过 `translation.backends.qwen_mt.QwenMTTranslationBackend` 复用同一套缓存逻辑; | ||
| 140 | -- 文档说明:`docs/翻译模块说明.md` 中提到“推荐通过 Redis 翻译缓存复用结果”。 | 138 | +- `api/translator_app.py` 通过 `TranslationService` 统一复用同一套缓存逻辑; |
| 139 | +- 所有翻译后端都通过 `TranslationService` 接入缓存。 | ||
| 141 | 140 | ||
| 142 | --- | 141 | --- |
| 143 | 142 |
scripts/redis/redis_cache_health_check.py
| @@ -43,7 +43,6 @@ PROJECT_ROOT = Path(__file__).parent.parent.parent | @@ -43,7 +43,6 @@ PROJECT_ROOT = Path(__file__).parent.parent.parent | ||
| 43 | sys.path.insert(0, str(PROJECT_ROOT)) | 43 | sys.path.insert(0, str(PROJECT_ROOT)) |
| 44 | 44 | ||
| 45 | from config.env_config import REDIS_CONFIG # type: ignore | 45 | from config.env_config import REDIS_CONFIG # type: ignore |
| 46 | -from config.services_config import get_translation_cache_config # type: ignore | ||
| 47 | from embeddings.bf16 import decode_embedding_from_redis # type: ignore | 46 | from embeddings.bf16 import decode_embedding_from_redis # type: ignore |
| 48 | 47 | ||
| 49 | 48 | ||
| @@ -66,13 +65,11 @@ def _load_known_cache_types() -> Dict[str, CacheTypeConfig]: | @@ -66,13 +65,11 @@ def _load_known_cache_types() -> Dict[str, CacheTypeConfig]: | ||
| 66 | description="文本向量缓存(embeddings/text_encoder.py)", | 65 | description="文本向量缓存(embeddings/text_encoder.py)", |
| 67 | ) | 66 | ) |
| 68 | 67 | ||
| 69 | - # translation 缓存:prefix 来自 services.translation.cache.key_prefix | ||
| 70 | - cache_cfg = get_translation_cache_config() | ||
| 71 | - trans_prefix = cache_cfg.get("key_prefix", "trans:v2") | 68 | + # translation 缓存:统一前缀 trans |
| 72 | cache_types["translation"] = CacheTypeConfig( | 69 | cache_types["translation"] = CacheTypeConfig( |
| 73 | name="translation", | 70 | name="translation", |
| 74 | - pattern=f"{trans_prefix}:*", | ||
| 75 | - description="翻译结果缓存(query/qwen_mt_translate.Translator)", | 71 | + pattern="trans:*", |
| 72 | + description="翻译结果缓存(translation/service.py)", | ||
| 76 | ) | 73 | ) |
| 77 | 74 | ||
| 78 | # anchors 缓存:prefix 来自 REDIS_CONFIG['anchor_cache_prefix'](若存在),否则 product_anchors | 75 | # anchors 缓存:prefix 来自 REDIS_CONFIG['anchor_cache_prefix'](若存在),否则 product_anchors |
| @@ -400,4 +397,3 @@ def main() -> None: | @@ -400,4 +397,3 @@ def main() -> None: | ||
| 400 | 397 | ||
| 401 | if __name__ == "__main__": | 398 | if __name__ == "__main__": |
| 402 | main() | 399 | main() |
| 403 | - |
tests/ci/test_service_api_contracts.py
| @@ -625,12 +625,8 @@ def translator_client(monkeypatch): | @@ -625,12 +625,8 @@ def translator_client(monkeypatch): | ||
| 625 | } | 625 | } |
| 626 | }, | 626 | }, |
| 627 | "cache": { | 627 | "cache": { |
| 628 | - "enabled": True, | ||
| 629 | - "key_prefix": "trans:v2", | ||
| 630 | "ttl_seconds": 60, | 628 | "ttl_seconds": 60, |
| 631 | "sliding_expiration": True, | 629 | "sliding_expiration": True, |
| 632 | - "key_include_scene": True, | ||
| 633 | - "key_include_source_lang": True, | ||
| 634 | }, | 630 | }, |
| 635 | } | 631 | } |
| 636 | self.available_models = ["qwen-mt"] | 632 | self.available_models = ["qwen-mt"] |
| @@ -681,12 +677,8 @@ def test_translator_api_failure_returns_500(monkeypatch): | @@ -681,12 +677,8 @@ def test_translator_api_failure_returns_500(monkeypatch): | ||
| 681 | } | 677 | } |
| 682 | }, | 678 | }, |
| 683 | "cache": { | 679 | "cache": { |
| 684 | - "enabled": True, | ||
| 685 | - "key_prefix": "trans:v2", | ||
| 686 | "ttl_seconds": 60, | 680 | "ttl_seconds": 60, |
| 687 | "sliding_expiration": True, | 681 | "sliding_expiration": True, |
| 688 | - "key_include_scene": True, | ||
| 689 | - "key_include_source_lang": True, | ||
| 690 | }, | 682 | }, |
| 691 | } | 683 | } |
| 692 | self.available_models = ["qwen-mt"] | 684 | self.available_models = ["qwen-mt"] |
tests/test_translation_local_backends.py
| @@ -96,7 +96,7 @@ def test_nllb_uses_src_lang_and_forced_bos(monkeypatch): | @@ -96,7 +96,7 @@ def test_nllb_uses_src_lang_and_forced_bos(monkeypatch): | ||
| 96 | assert backend.seq2seq_model.last_generate_kwargs["forced_bos_token_id"] == 202 | 96 | assert backend.seq2seq_model.last_generate_kwargs["forced_bos_token_id"] == 202 |
| 97 | 97 | ||
| 98 | 98 | ||
| 99 | -def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | 99 | +def test_translation_service_preloads_enabled_backends(monkeypatch): |
| 100 | created = [] | 100 | created = [] |
| 101 | 101 | ||
| 102 | def _fake_create_backend(self, *, name, backend_type, cfg): | 102 | def _fake_create_backend(self, *, name, backend_type, cfg): |
| @@ -126,6 +126,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | @@ -126,6 +126,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | ||
| 126 | "opus-mt-en-zh": { | 126 | "opus-mt-en-zh": { |
| 127 | "enabled": True, | 127 | "enabled": True, |
| 128 | "backend": "local_marian", | 128 | "backend": "local_marian", |
| 129 | + "use_cache": True, | ||
| 129 | "model_id": "dummy", | 130 | "model_id": "dummy", |
| 130 | "model_dir": "dummy", | 131 | "model_dir": "dummy", |
| 131 | "device": "cpu", | 132 | "device": "cpu", |
| @@ -138,6 +139,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | @@ -138,6 +139,7 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | ||
| 138 | "nllb-200-distilled-600m": { | 139 | "nllb-200-distilled-600m": { |
| 139 | "enabled": True, | 140 | "enabled": True, |
| 140 | "backend": "local_nllb", | 141 | "backend": "local_nllb", |
| 142 | + "use_cache": True, | ||
| 141 | "model_id": "dummy", | 143 | "model_id": "dummy", |
| 142 | "model_dir": "dummy", | 144 | "model_dir": "dummy", |
| 143 | "device": "cpu", | 145 | "device": "cpu", |
| @@ -149,22 +151,19 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | @@ -149,22 +151,19 @@ def test_translation_service_lazy_loads_enabled_backends(monkeypatch): | ||
| 149 | }, | 151 | }, |
| 150 | }, | 152 | }, |
| 151 | "cache": { | 153 | "cache": { |
| 152 | - "enabled": True, | ||
| 153 | - "key_prefix": "trans:v2", | ||
| 154 | "ttl_seconds": 60, | 154 | "ttl_seconds": 60, |
| 155 | "sliding_expiration": True, | 155 | "sliding_expiration": True, |
| 156 | - "key_include_scene": True, | ||
| 157 | - "key_include_source_lang": True, | ||
| 158 | }, | 156 | }, |
| 159 | } | 157 | } |
| 160 | 158 | ||
| 161 | service = TranslationService(config) | 159 | service = TranslationService(config) |
| 162 | 160 | ||
| 163 | assert service.available_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"] | 161 | assert service.available_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"] |
| 164 | - assert service.loaded_models == [] | 162 | + assert service.loaded_models == ["opus-mt-en-zh", "nllb-200-distilled-600m"] |
| 163 | + assert created == [ | ||
| 164 | + ("opus-mt-en-zh", "local_marian"), | ||
| 165 | + ("nllb-200-distilled-600m", "local_nllb"), | ||
| 166 | + ] | ||
| 165 | 167 | ||
| 166 | backend = service.get_backend("opus-mt-en-zh") | 168 | backend = service.get_backend("opus-mt-en-zh") |
| 167 | - | ||
| 168 | assert backend.model == "opus-mt-en-zh" | 169 | assert backend.model == "opus-mt-en-zh" |
| 169 | - assert created == [("opus-mt-en-zh", "local_marian")] | ||
| 170 | - assert service.loaded_models == ["opus-mt-en-zh"] |
tests/test_translator_failure_semantics.py
| 1 | -from translation.backends.qwen_mt import QwenMTTranslationBackend | 1 | +from translation.cache import TranslationCache |
| 2 | +from translation.service import TranslationService | ||
| 2 | 3 | ||
| 3 | 4 | ||
| 4 | -class _RecordingRedis: | 5 | +class _FakeCache: |
| 5 | def __init__(self): | 6 | def __init__(self): |
| 6 | - self.setex_calls = [] | ||
| 7 | - | ||
| 8 | - def setex(self, key, ttl, value): | ||
| 9 | - self.setex_calls.append((key, ttl, value)) | ||
| 10 | - | ||
| 11 | - | ||
| 12 | -def test_translate_failure_returns_none_and_skips_cache(monkeypatch): | ||
| 13 | - translator = QwenMTTranslationBackend( | ||
| 14 | - capability_name="qwen-mt", | ||
| 15 | - model="qwen-mt-flash", | ||
| 16 | - base_url="https://dashscope-us.aliyuncs.com/compatible-mode/v1", | ||
| 17 | - api_key="dummy-key", | ||
| 18 | - use_cache=False, | ||
| 19 | - ) | ||
| 20 | - fake_redis = _RecordingRedis() | ||
| 21 | - translator.use_cache = True | ||
| 22 | - translator.redis_client = fake_redis | ||
| 23 | - translator.cache_prefix = "trans" | ||
| 24 | - translator.expire_seconds = 60 | ||
| 25 | - | ||
| 26 | - monkeypatch.setattr(translator, "_translate_qwen", lambda *args, **kwargs: None) | ||
| 27 | - | ||
| 28 | - result = translator.translate( | ||
| 29 | - text="商品标题", | ||
| 30 | - target_lang="en", | ||
| 31 | - source_lang="zh", | ||
| 32 | - scene="sku_name", | ||
| 33 | - ) | ||
| 34 | - | ||
| 35 | - assert result is None | ||
| 36 | - assert fake_redis.setex_calls == [] | 7 | + self.available = True |
| 8 | + self.storage = {} | ||
| 9 | + self.get_calls = [] | ||
| 10 | + self.set_calls = [] | ||
| 11 | + | ||
| 12 | + def get(self, *, model, target_lang, source_text): | ||
| 13 | + self.get_calls.append((model, target_lang, source_text)) | ||
| 14 | + return self.storage.get((model, target_lang, source_text)) | ||
| 15 | + | ||
| 16 | + def set(self, *, model, target_lang, source_text, translated_text): | ||
| 17 | + self.set_calls.append((model, target_lang, source_text, translated_text)) | ||
| 18 | + self.storage[(model, target_lang, source_text)] = translated_text | ||
| 19 | + | ||
| 20 | + | ||
| 21 | +def test_translation_cache_key_format(monkeypatch): | ||
| 22 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | ||
| 23 | + cache = TranslationCache({"ttl_seconds": 60, "sliding_expiration": True}) | ||
| 24 | + key = cache.build_key(model="llm", target_lang="en", source_text="商品标题") | ||
| 25 | + assert key.startswith("trans:llm:en:商品标题") | ||
| 26 | + assert len(key) == len("trans:llm:en:商品标题") + 64 | ||
| 27 | + | ||
| 28 | + | ||
| 29 | +def test_service_caches_all_capabilities(monkeypatch): | ||
| 30 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | ||
| 31 | + created = {} | ||
| 32 | + | ||
| 33 | + def _fake_create_backend(self, *, name, backend_type, cfg): | ||
| 34 | + del self, backend_type, cfg | ||
| 35 | + | ||
| 36 | + class _Backend: | ||
| 37 | + model = name | ||
| 38 | + | ||
| 39 | + @property | ||
| 40 | + def supports_batch(self): | ||
| 41 | + return True | ||
| 42 | + | ||
| 43 | + def translate(self, text, target_lang, source_lang=None, scene=None): | ||
| 44 | + del target_lang, source_lang, scene | ||
| 45 | + if isinstance(text, list): | ||
| 46 | + return [f"{name}:{item}" for item in text] | ||
| 47 | + return f"{name}:{text}" | ||
| 48 | + | ||
| 49 | + backend = _Backend() | ||
| 50 | + created[name] = backend | ||
| 51 | + return backend | ||
| 52 | + | ||
| 53 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | ||
| 54 | + config = { | ||
| 55 | + "service_url": "http://127.0.0.1:6006", | ||
| 56 | + "timeout_sec": 10.0, | ||
| 57 | + "default_model": "llm", | ||
| 58 | + "default_scene": "general", | ||
| 59 | + "capabilities": { | ||
| 60 | + "llm": { | ||
| 61 | + "enabled": True, | ||
| 62 | + "backend": "llm", | ||
| 63 | + "model": "dummy-llm", | ||
| 64 | + "base_url": "https://example.com", | ||
| 65 | + "timeout_sec": 10.0, | ||
| 66 | + "use_cache": True, | ||
| 67 | + }, | ||
| 68 | + "opus-mt-zh-en": { | ||
| 69 | + "enabled": True, | ||
| 70 | + "backend": "local_marian", | ||
| 71 | + "model_id": "dummy", | ||
| 72 | + "model_dir": "dummy", | ||
| 73 | + "device": "cpu", | ||
| 74 | + "torch_dtype": "float32", | ||
| 75 | + "batch_size": 8, | ||
| 76 | + "max_input_length": 16, | ||
| 77 | + "max_new_tokens": 16, | ||
| 78 | + "num_beams": 1, | ||
| 79 | + "use_cache": True, | ||
| 80 | + }, | ||
| 81 | + }, | ||
| 82 | + "cache": { | ||
| 83 | + "ttl_seconds": 60, | ||
| 84 | + "sliding_expiration": True, | ||
| 85 | + }, | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + service = TranslationService(config) | ||
| 89 | + fake_cache = _FakeCache() | ||
| 90 | + service._translation_cache = fake_cache | ||
| 91 | + | ||
| 92 | + first = service.translate("商品标题", target_lang="en", source_lang="zh", model="llm") | ||
| 93 | + second = service.translate("商品标题", target_lang="en", source_lang="zh", model="llm") | ||
| 94 | + batch = service.translate(["连衣裙", "衬衫"], target_lang="en", source_lang="zh", model="opus-mt-zh-en") | ||
| 95 | + | ||
| 96 | + assert first == "llm:商品标题" | ||
| 97 | + assert second == "llm:商品标题" | ||
| 98 | + assert batch == ["opus-mt-zh-en:连衣裙", "opus-mt-zh-en:衬衫"] | ||
| 99 | + assert fake_cache.get_calls == [ | ||
| 100 | + ("llm", "en", "商品标题"), | ||
| 101 | + ("llm", "en", "商品标题"), | ||
| 102 | + ("opus-mt-zh-en", "en", "连衣裙"), | ||
| 103 | + ("opus-mt-zh-en", "en", "衬衫"), | ||
| 104 | + ] | ||
| 105 | + assert fake_cache.set_calls == [ | ||
| 106 | + ("llm", "en", "商品标题", "llm:商品标题"), | ||
| 107 | + ("opus-mt-zh-en", "en", "连衣裙", "opus-mt-zh-en:连衣裙"), | ||
| 108 | + ("opus-mt-zh-en", "en", "衬衫", "opus-mt-zh-en:衬衫"), | ||
| 109 | + ] |
translation/README.md
| @@ -75,12 +75,8 @@ services: | @@ -75,12 +75,8 @@ services: | ||
| 75 | default_scene: "general" | 75 | default_scene: "general" |
| 76 | timeout_sec: 10.0 | 76 | timeout_sec: 10.0 |
| 77 | cache: | 77 | cache: |
| 78 | - enabled: true | ||
| 79 | - key_prefix: "trans:v2" | ||
| 80 | ttl_seconds: 62208000 | 78 | ttl_seconds: 62208000 |
| 81 | sliding_expiration: true | 79 | sliding_expiration: true |
| 82 | - key_include_scene: true | ||
| 83 | - key_include_source_lang: true | ||
| 84 | capabilities: | 80 | capabilities: |
| 85 | qwen-mt: | 81 | qwen-mt: |
| 86 | enabled: true | 82 | enabled: true |
| @@ -95,11 +91,13 @@ services: | @@ -95,11 +91,13 @@ services: | ||
| 95 | model: "qwen-flash" | 91 | model: "qwen-flash" |
| 96 | base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" | 92 | base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" |
| 97 | timeout_sec: 30.0 | 93 | timeout_sec: 30.0 |
| 94 | + use_cache: true | ||
| 98 | deepl: | 95 | deepl: |
| 99 | enabled: false | 96 | enabled: false |
| 100 | backend: "deepl" | 97 | backend: "deepl" |
| 101 | api_url: "https://api.deepl.com/v2/translate" | 98 | api_url: "https://api.deepl.com/v2/translate" |
| 102 | timeout_sec: 10.0 | 99 | timeout_sec: 10.0 |
| 100 | + use_cache: true | ||
| 103 | nllb-200-distilled-600m: | 101 | nllb-200-distilled-600m: |
| 104 | enabled: true | 102 | enabled: true |
| 105 | backend: "local_nllb" | 103 | backend: "local_nllb" |
| @@ -112,6 +110,7 @@ services: | @@ -112,6 +110,7 @@ services: | ||
| 112 | max_new_tokens: 64 | 110 | max_new_tokens: 64 |
| 113 | num_beams: 1 | 111 | num_beams: 1 |
| 114 | attn_implementation: "sdpa" | 112 | attn_implementation: "sdpa" |
| 113 | + use_cache: true | ||
| 115 | opus-mt-zh-en: | 114 | opus-mt-zh-en: |
| 116 | enabled: true | 115 | enabled: true |
| 117 | backend: "local_marian" | 116 | backend: "local_marian" |
| @@ -123,6 +122,7 @@ services: | @@ -123,6 +122,7 @@ services: | ||
| 123 | max_input_length: 256 | 122 | max_input_length: 256 |
| 124 | max_new_tokens: 256 | 123 | max_new_tokens: 256 |
| 125 | num_beams: 1 | 124 | num_beams: 1 |
| 125 | + use_cache: true | ||
| 126 | opus-mt-en-zh: | 126 | opus-mt-en-zh: |
| 127 | enabled: true | 127 | enabled: true |
| 128 | backend: "local_marian" | 128 | backend: "local_marian" |
| @@ -134,6 +134,7 @@ services: | @@ -134,6 +134,7 @@ services: | ||
| 134 | max_input_length: 256 | 134 | max_input_length: 256 |
| 135 | max_new_tokens: 256 | 135 | max_new_tokens: 256 |
| 136 | num_beams: 1 | 136 | num_beams: 1 |
| 137 | + use_cache: true | ||
| 137 | ``` | 138 | ``` |
| 138 | 139 | ||
| 139 | 配置边界: | 140 | 配置边界: |
| @@ -247,16 +248,20 @@ TRANSLATION_PORT=6006 | @@ -247,16 +248,20 @@ TRANSLATION_PORT=6006 | ||
| 247 | 248 | ||
| 248 | ```json | 249 | ```json |
| 249 | { | 250 | { |
| 250 | - "status": "healthy", | 251 | + "status": "healthy", |
| 251 | "service": "translation", | 252 | "service": "translation", |
| 252 | "default_model": "llm", | 253 | "default_model": "llm", |
| 253 | "default_scene": "general", | 254 | "default_scene": "general", |
| 254 | "available_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"], | 255 | "available_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"], |
| 255 | "enabled_capabilities": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"], | 256 | "enabled_capabilities": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"], |
| 256 | - "loaded_models": ["llm"] | 257 | + "loaded_models": ["qwen-mt", "llm", "nllb-200-distilled-600m", "opus-mt-zh-en", "opus-mt-en-zh"] |
| 257 | } | 258 | } |
| 258 | ``` | 259 | ``` |
| 259 | 260 | ||
| 261 | +说明: | ||
| 262 | +- translator service 进程启动时会一次性初始化全部已启用 capability | ||
| 263 | +- 因此本地模型加载失败、依赖缺失、配置错误会在启动阶段直接暴露,而不是拖到首个在线请求 | ||
| 264 | + | ||
| 260 | ## 7. 代码调用方式 | 265 | ## 7. 代码调用方式 |
| 261 | 266 | ||
| 262 | 业务侧统一这样调用: | 267 | 业务侧统一这样调用: |
| @@ -317,6 +322,7 @@ results = translator.translate( | @@ -317,6 +322,7 @@ results = translator.translate( | ||
| 317 | - 通用大模型翻译 | 322 | - 通用大模型翻译 |
| 318 | - 根据 `scene` 生成内部 prompt | 323 | - 根据 `scene` 生成内部 prompt |
| 319 | - 更灵活,但成本和稳定性取决于上游模型 | 324 | - 更灵活,但成本和稳定性取决于上游模型 |
| 325 | +- 支持 Redis 翻译缓存 | ||
| 320 | 326 | ||
| 321 | ### 8.3 DeepL | 327 | ### 8.3 DeepL |
| 322 | 328 | ||
| @@ -327,6 +333,7 @@ results = translator.translate( | @@ -327,6 +333,7 @@ results = translator.translate( | ||
| 327 | - 商业翻译 API | 333 | - 商业翻译 API |
| 328 | - scene 会映射到内部上下文 | 334 | - scene 会映射到内部上下文 |
| 329 | - 当前默认关闭 | 335 | - 当前默认关闭 |
| 336 | +- 支持 Redis 翻译缓存 | ||
| 330 | 337 | ||
| 331 | ### 8.4 `facebook/nllb-200-distilled-600M` | 338 | ### 8.4 `facebook/nllb-200-distilled-600M` |
| 332 | 339 | ||
| @@ -338,6 +345,7 @@ results = translator.translate( | @@ -338,6 +345,7 @@ results = translator.translate( | ||
| 338 | - 简介:多语种翻译:覆盖约 200 种语言。作为NLLB-200系列的蒸馏版本,该模型通过知识蒸馏技术将原130亿参数模型压缩至600M,同时保持了80%以上的翻译质量。 | 345 | - 简介:多语种翻译:覆盖约 200 种语言。作为NLLB-200系列的蒸馏版本,该模型通过知识蒸馏技术将原130亿参数模型压缩至600M,同时保持了80%以上的翻译质量。 |
| 339 | - 本地目录:`models/translation/facebook/nllb-200-distilled-600M` | 346 | - 本地目录:`models/translation/facebook/nllb-200-distilled-600M` |
| 340 | - 当前磁盘占用:约 `2.4G` | 347 | - 当前磁盘占用:约 `2.4G` |
| 348 | +- 支持 Redis 翻译缓存 | ||
| 341 | - 模型类型:多语种 Seq2Seq 机器翻译模型 | 349 | - 模型类型:多语种 Seq2Seq 机器翻译模型 |
| 342 | - 来源:Meta NLLB(No Language Left Behind)系列的 600M 蒸馏版 | 350 | - 来源:Meta NLLB(No Language Left Behind)系列的 600M 蒸馏版 |
| 343 | - 结构特点: | 351 | - 结构特点: |
| @@ -424,6 +432,7 @@ results = translator.translate( | @@ -424,6 +432,7 @@ results = translator.translate( | ||
| 424 | - encoder-decoder Seq2Seq | 432 | - encoder-decoder Seq2Seq |
| 425 | - 聚焦特定语言对 | 433 | - 聚焦特定语言对 |
| 426 | - 模型更小、加载更轻、吞吐更高 | 434 | - 模型更小、加载更轻、吞吐更高 |
| 435 | +- 支持 Redis 翻译缓存 | ||
| 427 | 436 | ||
| 428 | ### 8.6 `opus-mt-en-zh` | 437 | ### 8.6 `opus-mt-en-zh` |
| 429 | 438 | ||
| @@ -441,6 +450,13 @@ results = translator.translate( | @@ -441,6 +450,13 @@ results = translator.translate( | ||
| 441 | - encoder-decoder Seq2Seq | 450 | - encoder-decoder Seq2Seq |
| 442 | - 双语定向模型 | 451 | - 双语定向模型 |
| 443 | - 更适合中英双向拆分部署 | 452 | - 更适合中英双向拆分部署 |
| 453 | +- 支持 Redis 翻译缓存 | ||
| 454 | + | ||
| 455 | +### 8.7 翻译缓存 | ||
| 456 | + | ||
| 457 | +- 所有 translation capability 都使用统一的 Redis 缓存层 | ||
| 458 | +- 每个 capability 通过各自的 `use_cache` 控制是否启用缓存 | ||
| 459 | +- 缓存 key 格式固定为 `trans:{model}:{target_lang}:{source_text[:4]}{sha256}` | ||
| 444 | 460 | ||
| 445 | ## 9. 本地模型安装与部署 | 461 | ## 9. 本地模型安装与部署 |
| 446 | 462 |
translation/backends/qwen_mt.py
| 1 | -"""Qwen-MT translation backend with cache support.""" | 1 | +"""Qwen-MT translation backend.""" |
| 2 | 2 | ||
| 3 | from __future__ import annotations | 3 | from __future__ import annotations |
| 4 | 4 | ||
| 5 | -import hashlib | ||
| 6 | import logging | 5 | import logging |
| 7 | import os | 6 | import os |
| 8 | import re | 7 | import re |
| 9 | import time | 8 | import time |
| 10 | from typing import List, Optional, Sequence, Union | 9 | from typing import List, Optional, Sequence, Union |
| 11 | 10 | ||
| 12 | -import redis | ||
| 13 | from openai import OpenAI | 11 | from openai import OpenAI |
| 14 | 12 | ||
| 15 | -from config.env_config import DASHSCOPE_API_KEY, REDIS_CONFIG | ||
| 16 | -from config.services_config import get_translation_cache_config | 13 | +from config.env_config import DASHSCOPE_API_KEY |
| 17 | from translation.languages import QWEN_LANGUAGE_CODES | 14 | from translation.languages import QWEN_LANGUAGE_CODES |
| 18 | 15 | ||
| 19 | logger = logging.getLogger(__name__) | 16 | logger = logging.getLogger(__name__) |
| @@ -26,7 +23,6 @@ class QwenMTTranslationBackend: | @@ -26,7 +23,6 @@ class QwenMTTranslationBackend: | ||
| 26 | model: str, | 23 | model: str, |
| 27 | base_url: str, | 24 | base_url: str, |
| 28 | api_key: Optional[str] = None, | 25 | api_key: Optional[str] = None, |
| 29 | - use_cache: bool = True, | ||
| 30 | timeout: int = 10, | 26 | timeout: int = 10, |
| 31 | glossary_id: Optional[str] = None, | 27 | glossary_id: Optional[str] = None, |
| 32 | ): | 28 | ): |
| @@ -35,16 +31,8 @@ class QwenMTTranslationBackend: | @@ -35,16 +31,8 @@ class QwenMTTranslationBackend: | ||
| 35 | self.qwen_model_name = self._normalize_model_name(model) | 31 | self.qwen_model_name = self._normalize_model_name(model) |
| 36 | self.base_url = base_url | 32 | self.base_url = base_url |
| 37 | self.timeout = int(timeout) | 33 | self.timeout = int(timeout) |
| 38 | - self.use_cache = bool(use_cache) | ||
| 39 | self.glossary_id = glossary_id | 34 | self.glossary_id = glossary_id |
| 40 | 35 | ||
| 41 | - cache_cfg = get_translation_cache_config() | ||
| 42 | - self.cache_prefix = str(cache_cfg["key_prefix"]) | ||
| 43 | - self.expire_seconds = int(cache_cfg["ttl_seconds"]) | ||
| 44 | - self.cache_sliding_expiration = bool(cache_cfg["sliding_expiration"]) | ||
| 45 | - self.cache_include_scene = bool(cache_cfg["key_include_scene"]) | ||
| 46 | - self.cache_include_source_lang = bool(cache_cfg["key_include_source_lang"]) | ||
| 47 | - | ||
| 48 | self._api_key = api_key or self._default_api_key(self.model) | 36 | self._api_key = api_key or self._default_api_key(self.model) |
| 49 | self._qwen_client: Optional[OpenAI] = None | 37 | self._qwen_client: Optional[OpenAI] = None |
| 50 | if self._api_key: | 38 | if self._api_key: |
| @@ -55,10 +43,6 @@ class QwenMTTranslationBackend: | @@ -55,10 +43,6 @@ class QwenMTTranslationBackend: | ||
| 55 | else: | 43 | else: |
| 56 | logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable") | 44 | logger.warning("DASHSCOPE_API_KEY not set; qwen-mt translation unavailable") |
| 57 | 45 | ||
| 58 | - self.redis_client = None | ||
| 59 | - if self.use_cache and bool(cache_cfg["enabled"]): | ||
| 60 | - self.redis_client = self._init_redis_client() | ||
| 61 | - | ||
| 62 | @property | 46 | @property |
| 63 | def supports_batch(self) -> bool: | 47 | def supports_batch(self) -> bool: |
| 64 | return True | 48 | return True |
| @@ -82,38 +66,6 @@ class QwenMTTranslationBackend: | @@ -82,38 +66,6 @@ class QwenMTTranslationBackend: | ||
| 82 | del model | 66 | del model |
| 83 | return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY") | 67 | return DASHSCOPE_API_KEY or os.getenv("DASHSCOPE_API_KEY") |
| 84 | 68 | ||
| 85 | - def _init_redis_client(self): | ||
| 86 | - try: | ||
| 87 | - client = redis.Redis( | ||
| 88 | - host=REDIS_CONFIG.get("host", "localhost"), | ||
| 89 | - port=REDIS_CONFIG.get("port", 6479), | ||
| 90 | - password=REDIS_CONFIG.get("password"), | ||
| 91 | - decode_responses=True, | ||
| 92 | - socket_timeout=REDIS_CONFIG.get("socket_timeout", 1), | ||
| 93 | - socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1), | ||
| 94 | - retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False), | ||
| 95 | - health_check_interval=10, | ||
| 96 | - ) | ||
| 97 | - client.ping() | ||
| 98 | - return client | ||
| 99 | - except Exception as exc: | ||
| 100 | - logger.warning("Failed to initialize translation redis cache: %s", exc) | ||
| 101 | - return None | ||
| 102 | - | ||
| 103 | - def _build_cache_key( | ||
| 104 | - self, | ||
| 105 | - text: str, | ||
| 106 | - target_lang: str, | ||
| 107 | - source_lang: Optional[str], | ||
| 108 | - scene: Optional[str], | ||
| 109 | - ) -> str: | ||
| 110 | - src = (source_lang or "auto").strip().lower() if self.cache_include_source_lang else "-" | ||
| 111 | - tgt = (target_lang or "").strip().lower() | ||
| 112 | - scn = (scene or "").strip() if self.cache_include_scene else "" | ||
| 113 | - payload = f"model={self.model}\nsrc={src}\ntgt={tgt}\nscene={scn}\ntext={text}" | ||
| 114 | - digest = hashlib.sha256(payload.encode("utf-8")).hexdigest() | ||
| 115 | - return f"{self.cache_prefix}:{self.model}:{src}:{tgt}:{digest}" | ||
| 116 | - | ||
| 117 | def translate( | 69 | def translate( |
| 118 | self, | 70 | self, |
| 119 | text: Union[str, Sequence[str]], | 71 | text: Union[str, Sequence[str]], |
| @@ -146,14 +98,7 @@ class QwenMTTranslationBackend: | @@ -146,14 +98,7 @@ class QwenMTTranslationBackend: | ||
| 146 | if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)): | 98 | if tgt == "zh" and (self._contains_chinese(text) or self._is_pure_number(text)): |
| 147 | return text | 99 | return text |
| 148 | 100 | ||
| 149 | - cached = self._get_cached_translation_redis(text, tgt, src, scene) | ||
| 150 | - if cached is not None: | ||
| 151 | - return cached | ||
| 152 | - | ||
| 153 | result = self._translate_qwen(text, tgt, src) | 101 | result = self._translate_qwen(text, tgt, src) |
| 154 | - | ||
| 155 | - if result is not None: | ||
| 156 | - self._set_cached_translation_redis(text, tgt, result, src, scene) | ||
| 157 | return result | 102 | return result |
| 158 | 103 | ||
| 159 | def _translate_qwen( | 104 | def _translate_qwen( |
| @@ -197,41 +142,6 @@ class QwenMTTranslationBackend: | @@ -197,41 +142,6 @@ class QwenMTTranslationBackend: | ||
| 197 | ) | 142 | ) |
| 198 | return None | 143 | return None |
| 199 | 144 | ||
| 200 | - def _get_cached_translation_redis( | ||
| 201 | - self, | ||
| 202 | - text: str, | ||
| 203 | - target_lang: str, | ||
| 204 | - source_lang: Optional[str] = None, | ||
| 205 | - scene: Optional[str] = None, | ||
| 206 | - ) -> Optional[str]: | ||
| 207 | - if not self.redis_client: | ||
| 208 | - return None | ||
| 209 | - key = self._build_cache_key(text, target_lang, source_lang, scene) | ||
| 210 | - try: | ||
| 211 | - value = self.redis_client.get(key) | ||
| 212 | - if value and self.cache_sliding_expiration: | ||
| 213 | - self.redis_client.expire(key, self.expire_seconds) | ||
| 214 | - return value | ||
| 215 | - except Exception as exc: | ||
| 216 | - logger.warning("Redis get translation cache failed: %s", exc) | ||
| 217 | - return None | ||
| 218 | - | ||
| 219 | - def _set_cached_translation_redis( | ||
| 220 | - self, | ||
| 221 | - text: str, | ||
| 222 | - target_lang: str, | ||
| 223 | - translation: str, | ||
| 224 | - source_lang: Optional[str] = None, | ||
| 225 | - scene: Optional[str] = None, | ||
| 226 | - ) -> None: | ||
| 227 | - if not self.redis_client: | ||
| 228 | - return | ||
| 229 | - key = self._build_cache_key(text, target_lang, source_lang, scene) | ||
| 230 | - try: | ||
| 231 | - self.redis_client.setex(key, self.expire_seconds, translation) | ||
| 232 | - except Exception as exc: | ||
| 233 | - logger.warning("Redis set translation cache failed: %s", exc) | ||
| 234 | - | ||
| 235 | @staticmethod | 145 | @staticmethod |
| 236 | def _contains_chinese(text: str) -> bool: | 146 | def _contains_chinese(text: str) -> bool: |
| 237 | return bool(re.search(r"[\u4e00-\u9fff]", text or "")) | 147 | return bool(re.search(r"[\u4e00-\u9fff]", text or "")) |
| @@ -0,0 +1,92 @@ | @@ -0,0 +1,92 @@ | ||
| 1 | +"""Shared translation cache utilities.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +import hashlib | ||
| 6 | +import logging | ||
| 7 | +from typing import Mapping, Optional | ||
| 8 | + | ||
| 9 | +import redis | ||
| 10 | + | ||
| 11 | +from config.env_config import REDIS_CONFIG | ||
| 12 | + | ||
| 13 | +logger = logging.getLogger(__name__) | ||
| 14 | + | ||
| 15 | + | ||
| 16 | +class TranslationCache: | ||
| 17 | + """Redis-backed cache shared by all translation capabilities.""" | ||
| 18 | + | ||
| 19 | + def __init__(self, config: Mapping[str, object]) -> None: | ||
| 20 | + self.ttl_seconds = int(config["ttl_seconds"]) | ||
| 21 | + self.sliding_expiration = bool(config["sliding_expiration"]) | ||
| 22 | + self.redis_client = self._init_redis_client() | ||
| 23 | + | ||
| 24 | + @property | ||
| 25 | + def available(self) -> bool: | ||
| 26 | + return self.redis_client is not None | ||
| 27 | + | ||
| 28 | + def build_key(self, *, model: str, target_lang: str, source_text: str) -> str: | ||
| 29 | + normalized_model = str(model or "").strip().lower() | ||
| 30 | + normalized_target_lang = str(target_lang or "").strip().lower() | ||
| 31 | + text = str(source_text or "") | ||
| 32 | + text_prefix = text[:4] | ||
| 33 | + digest = hashlib.sha256(text.encode("utf-8")).hexdigest() | ||
| 34 | + return f"trans:{normalized_model}:{normalized_target_lang}:{text_prefix}{digest}" | ||
| 35 | + | ||
| 36 | + def get(self, *, model: str, target_lang: str, source_text: str) -> Optional[str]: | ||
| 37 | + if self.redis_client is None: | ||
| 38 | + return None | ||
| 39 | + key = self.build_key(model=model, target_lang=target_lang, source_text=source_text) | ||
| 40 | + try: | ||
| 41 | + value = self.redis_client.get(key) | ||
| 42 | + logger.info( | ||
| 43 | + "Translation cache %s | model=%s target_lang=%s text_len=%s key=%s", | ||
| 44 | + "hit" if value is not None else "miss", | ||
| 45 | + model, | ||
| 46 | + target_lang, | ||
| 47 | + len(str(source_text or "")), | ||
| 48 | + key, | ||
| 49 | + ) | ||
| 50 | + if value and self.sliding_expiration: | ||
| 51 | + self.redis_client.expire(key, self.ttl_seconds) | ||
| 52 | + return value | ||
| 53 | + except Exception as exc: | ||
| 54 | + logger.warning("Redis get translation cache failed: %s", exc) | ||
| 55 | + return None | ||
| 56 | + | ||
| 57 | + def set(self, *, model: str, target_lang: str, source_text: str, translated_text: str) -> None: | ||
| 58 | + if self.redis_client is None: | ||
| 59 | + return | ||
| 60 | + key = self.build_key(model=model, target_lang=target_lang, source_text=source_text) | ||
| 61 | + try: | ||
| 62 | + self.redis_client.setex(key, self.ttl_seconds, translated_text) | ||
| 63 | + logger.info( | ||
| 64 | + "Translation cache write | model=%s target_lang=%s text_len=%s result_len=%s ttl_seconds=%s key=%s", | ||
| 65 | + model, | ||
| 66 | + target_lang, | ||
| 67 | + len(str(source_text or "")), | ||
| 68 | + len(str(translated_text or "")), | ||
| 69 | + self.ttl_seconds, | ||
| 70 | + key, | ||
| 71 | + ) | ||
| 72 | + except Exception as exc: | ||
| 73 | + logger.warning("Redis set translation cache failed: %s", exc) | ||
| 74 | + | ||
| 75 | + @staticmethod | ||
| 76 | + def _init_redis_client() -> Optional[redis.Redis]: | ||
| 77 | + try: | ||
| 78 | + client = redis.Redis( | ||
| 79 | + host=REDIS_CONFIG.get("host", "localhost"), | ||
| 80 | + port=REDIS_CONFIG.get("port", 6479), | ||
| 81 | + password=REDIS_CONFIG.get("password"), | ||
| 82 | + decode_responses=True, | ||
| 83 | + socket_timeout=REDIS_CONFIG.get("socket_timeout", 1), | ||
| 84 | + socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1), | ||
| 85 | + retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False), | ||
| 86 | + health_check_interval=10, | ||
| 87 | + ) | ||
| 88 | + client.ping() | ||
| 89 | + return client | ||
| 90 | + except Exception as exc: | ||
| 91 | + logger.warning("Failed to initialize translation redis cache: %s", exc) | ||
| 92 | + return None |
translation/service.py
| @@ -3,10 +3,10 @@ | @@ -3,10 +3,10 @@ | ||
| 3 | from __future__ import annotations | 3 | from __future__ import annotations |
| 4 | 4 | ||
| 5 | import logging | 5 | import logging |
| 6 | -import threading | ||
| 7 | from typing import Dict, List, Optional | 6 | from typing import Dict, List, Optional |
| 8 | 7 | ||
| 9 | from config.services_config import get_translation_config | 8 | from config.services_config import get_translation_config |
| 9 | +from translation.cache import TranslationCache | ||
| 10 | from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol | 10 | from translation.protocols import TranslateInput, TranslateOutput, TranslationBackendProtocol |
| 11 | from translation.settings import ( | 11 | from translation.settings import ( |
| 12 | TranslationConfig, | 12 | TranslationConfig, |
| @@ -25,10 +25,10 @@ class TranslationService: | @@ -25,10 +25,10 @@ class TranslationService: | ||
| 25 | def __init__(self, config: Optional[TranslationConfig] = None) -> None: | 25 | def __init__(self, config: Optional[TranslationConfig] = None) -> None: |
| 26 | self.config = config or get_translation_config() | 26 | self.config = config or get_translation_config() |
| 27 | self._enabled_capabilities = self._collect_enabled_capabilities() | 27 | self._enabled_capabilities = self._collect_enabled_capabilities() |
| 28 | - self._backends: Dict[str, TranslationBackendProtocol] = {} | ||
| 29 | - self._backend_lock = threading.Lock() | ||
| 30 | if not self._enabled_capabilities: | 28 | if not self._enabled_capabilities: |
| 31 | raise ValueError("No enabled translation backends found in services.translation.capabilities") | 29 | raise ValueError("No enabled translation backends found in services.translation.capabilities") |
| 30 | + self._translation_cache = TranslationCache(self.config["cache"]) | ||
| 31 | + self._backends = self._initialize_backends() | ||
| 32 | 32 | ||
| 33 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: | 33 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: |
| 34 | enabled: Dict[str, Dict[str, object]] = {} | 34 | enabled: Dict[str, Dict[str, object]] = {} |
| @@ -59,6 +59,25 @@ class TranslationService: | @@ -59,6 +59,25 @@ class TranslationService: | ||
| 59 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") | 59 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") |
| 60 | return factory(name=name, cfg=cfg) | 60 | return factory(name=name, cfg=cfg) |
| 61 | 61 | ||
| 62 | + def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]: | ||
| 63 | + backends: Dict[str, TranslationBackendProtocol] = {} | ||
| 64 | + for name, capability_cfg in self._enabled_capabilities.items(): | ||
| 65 | + backend_type = str(capability_cfg["backend"]) | ||
| 66 | + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | ||
| 67 | + backends[name] = self._create_backend( | ||
| 68 | + name=name, | ||
| 69 | + backend_type=backend_type, | ||
| 70 | + cfg=capability_cfg, | ||
| 71 | + ) | ||
| 72 | + logger.info( | ||
| 73 | + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | ||
| 74 | + name, | ||
| 75 | + backend_type, | ||
| 76 | + bool(capability_cfg.get("use_cache")), | ||
| 77 | + getattr(backends[name], "model", name), | ||
| 78 | + ) | ||
| 79 | + return backends | ||
| 80 | + | ||
| 62 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: | 81 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: |
| 63 | from translation.backends.qwen_mt import QwenMTTranslationBackend | 82 | from translation.backends.qwen_mt import QwenMTTranslationBackend |
| 64 | 83 | ||
| @@ -67,7 +86,6 @@ class TranslationService: | @@ -67,7 +86,6 @@ class TranslationService: | ||
| 67 | model=str(cfg["model"]).strip(), | 86 | model=str(cfg["model"]).strip(), |
| 68 | base_url=str(cfg["base_url"]).strip(), | 87 | base_url=str(cfg["base_url"]).strip(), |
| 69 | api_key=cfg.get("api_key"), | 88 | api_key=cfg.get("api_key"), |
| 70 | - use_cache=bool(cfg["use_cache"]), | ||
| 71 | timeout=int(cfg["timeout_sec"]), | 89 | timeout=int(cfg["timeout_sec"]), |
| 72 | glossary_id=cfg.get("glossary_id"), | 90 | glossary_id=cfg.get("glossary_id"), |
| 73 | ) | 91 | ) |
| @@ -138,26 +156,12 @@ class TranslationService: | @@ -138,26 +156,12 @@ class TranslationService: | ||
| 138 | 156 | ||
| 139 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: | 157 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: |
| 140 | normalized = normalize_translation_model(self.config, model) | 158 | normalized = normalize_translation_model(self.config, model) |
| 141 | - capability_cfg = self._enabled_capabilities.get(normalized) | ||
| 142 | - if capability_cfg is None: | 159 | + backend = self._backends.get(normalized) |
| 160 | + if backend is None: | ||
| 143 | raise ValueError( | 161 | raise ValueError( |
| 144 | f"Translation model '{normalized}' is not enabled. " | 162 | f"Translation model '{normalized}' is not enabled. " |
| 145 | f"Available models: {', '.join(self.available_models) or 'none'}" | 163 | f"Available models: {', '.join(self.available_models) or 'none'}" |
| 146 | ) | 164 | ) |
| 147 | - backend = self._backends.get(normalized) | ||
| 148 | - if backend is not None: | ||
| 149 | - return backend | ||
| 150 | - with self._backend_lock: | ||
| 151 | - backend = self._backends.get(normalized) | ||
| 152 | - if backend is None: | ||
| 153 | - backend_type = str(capability_cfg["backend"]) | ||
| 154 | - logger.info("Initializing translation backend | model=%s backend=%s", normalized, backend_type) | ||
| 155 | - backend = self._create_backend( | ||
| 156 | - name=normalized, | ||
| 157 | - backend_type=backend_type, | ||
| 158 | - cfg=capability_cfg, | ||
| 159 | - ) | ||
| 160 | - self._backends[normalized] = backend | ||
| 161 | return backend | 165 | return backend |
| 162 | 166 | ||
| 163 | def translate( | 167 | def translate( |
| @@ -169,11 +173,176 @@ class TranslationService: | @@ -169,11 +173,176 @@ class TranslationService: | ||
| 169 | model: Optional[str] = None, | 173 | model: Optional[str] = None, |
| 170 | scene: Optional[str] = None, | 174 | scene: Optional[str] = None, |
| 171 | ) -> TranslateOutput: | 175 | ) -> TranslateOutput: |
| 172 | - backend = self.get_backend(model) | 176 | + normalized_model = normalize_translation_model(self.config, model) |
| 177 | + backend = self.get_backend(normalized_model) | ||
| 173 | active_scene = normalize_translation_scene(self.config, scene) | 178 | active_scene = normalize_translation_scene(self.config, scene) |
| 174 | - return backend.translate( | 179 | + capability_cfg = self._enabled_capabilities[normalized_model] |
| 180 | + use_cache = bool(capability_cfg.get("use_cache")) | ||
| 181 | + text_count = 1 if isinstance(text, str) else len(list(text)) | ||
| 182 | + logger.info( | ||
| 183 | + "Translation route | model=%s backend=%s scene=%s target_lang=%s source_lang=%s count=%s use_cache=%s cache_available=%s", | ||
| 184 | + normalized_model, | ||
| 185 | + getattr(backend, "model", normalized_model), | ||
| 186 | + active_scene, | ||
| 187 | + target_lang, | ||
| 188 | + source_lang or "auto", | ||
| 189 | + text_count, | ||
| 190 | + use_cache, | ||
| 191 | + self._translation_cache.available, | ||
| 192 | + ) | ||
| 193 | + if not use_cache or not self._translation_cache.available: | ||
| 194 | + return backend.translate( | ||
| 195 | + text=text, | ||
| 196 | + target_lang=target_lang, | ||
| 197 | + source_lang=source_lang, | ||
| 198 | + scene=active_scene, | ||
| 199 | + ) | ||
| 200 | + | ||
| 201 | + if isinstance(text, str): | ||
| 202 | + return self._translate_with_cache( | ||
| 203 | + backend, | ||
| 204 | + text=text, | ||
| 205 | + target_lang=target_lang, | ||
| 206 | + source_lang=source_lang, | ||
| 207 | + scene=active_scene, | ||
| 208 | + model=normalized_model, | ||
| 209 | + ) | ||
| 210 | + | ||
| 211 | + return self._translate_batch_with_cache( | ||
| 175 | text=text, | 212 | text=text, |
| 176 | target_lang=target_lang, | 213 | target_lang=target_lang, |
| 177 | source_lang=source_lang, | 214 | source_lang=source_lang, |
| 215 | + backend=backend, | ||
| 178 | scene=active_scene, | 216 | scene=active_scene, |
| 217 | + model=normalized_model, | ||
| 218 | + ) | ||
| 219 | + | ||
| 220 | + def _translate_with_cache( | ||
| 221 | + self, | ||
| 222 | + backend: TranslationBackendProtocol, | ||
| 223 | + *, | ||
| 224 | + text: str, | ||
| 225 | + target_lang: str, | ||
| 226 | + source_lang: Optional[str], | ||
| 227 | + scene: str, | ||
| 228 | + model: str, | ||
| 229 | + ) -> Optional[str]: | ||
| 230 | + if not text.strip(): | ||
| 231 | + return text | ||
| 232 | + cached = self._translation_cache.get(model=model, target_lang=target_lang, source_text=text) | ||
| 233 | + if cached is not None: | ||
| 234 | + logger.info( | ||
| 235 | + "Translation cache served | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s", | ||
| 236 | + model, | ||
| 237 | + scene, | ||
| 238 | + target_lang, | ||
| 239 | + source_lang or "auto", | ||
| 240 | + len(text), | ||
| 241 | + ) | ||
| 242 | + return cached | ||
| 243 | + translated = backend.translate( | ||
| 244 | + text=text, | ||
| 245 | + target_lang=target_lang, | ||
| 246 | + source_lang=source_lang, | ||
| 247 | + scene=scene, | ||
| 179 | ) | 248 | ) |
| 249 | + if translated is not None: | ||
| 250 | + self._translation_cache.set( | ||
| 251 | + model=model, | ||
| 252 | + target_lang=target_lang, | ||
| 253 | + source_text=text, | ||
| 254 | + translated_text=translated, | ||
| 255 | + ) | ||
| 256 | + logger.info( | ||
| 257 | + "Translation backend result cached | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s result_len=%s", | ||
| 258 | + model, | ||
| 259 | + scene, | ||
| 260 | + target_lang, | ||
| 261 | + source_lang or "auto", | ||
| 262 | + len(text), | ||
| 263 | + len(str(translated)), | ||
| 264 | + ) | ||
| 265 | + else: | ||
| 266 | + logger.warning( | ||
| 267 | + "Translation backend returned empty result | model=%s scene=%s target_lang=%s source_lang=%s text_len=%s", | ||
| 268 | + model, | ||
| 269 | + scene, | ||
| 270 | + target_lang, | ||
| 271 | + source_lang or "auto", | ||
| 272 | + len(text), | ||
| 273 | + ) | ||
| 274 | + return translated | ||
| 275 | + | ||
| 276 | + def _translate_batch_with_cache( | ||
| 277 | + self, | ||
| 278 | + *, | ||
| 279 | + text: TranslateInput, | ||
| 280 | + target_lang: str, | ||
| 281 | + source_lang: Optional[str], | ||
| 282 | + backend: TranslationBackendProtocol, | ||
| 283 | + scene: str, | ||
| 284 | + model: str, | ||
| 285 | + ) -> List[Optional[str]]: | ||
| 286 | + texts = list(text) | ||
| 287 | + results: List[Optional[str]] = [None] * len(texts) | ||
| 288 | + misses: List[str] = [] | ||
| 289 | + miss_indices: List[int] = [] | ||
| 290 | + cache_hits = 0 | ||
| 291 | + | ||
| 292 | + for idx, item in enumerate(texts): | ||
| 293 | + normalized_text = "" if item is None else str(item) | ||
| 294 | + if not normalized_text.strip(): | ||
| 295 | + results[idx] = normalized_text | ||
| 296 | + continue | ||
| 297 | + cached = self._translation_cache.get( | ||
| 298 | + model=model, | ||
| 299 | + target_lang=target_lang, | ||
| 300 | + source_text=normalized_text, | ||
| 301 | + ) | ||
| 302 | + if cached is not None: | ||
| 303 | + results[idx] = cached | ||
| 304 | + cache_hits += 1 | ||
| 305 | + continue | ||
| 306 | + misses.append(normalized_text) | ||
| 307 | + miss_indices.append(idx) | ||
| 308 | + | ||
| 309 | + logger.info( | ||
| 310 | + "Translation batch cache summary | model=%s scene=%s target_lang=%s source_lang=%s total=%s cache_hits=%s cache_misses=%s", | ||
| 311 | + model, | ||
| 312 | + scene, | ||
| 313 | + target_lang, | ||
| 314 | + source_lang or "auto", | ||
| 315 | + len(texts), | ||
| 316 | + cache_hits, | ||
| 317 | + len(misses), | ||
| 318 | + ) | ||
| 319 | + | ||
| 320 | + if misses: | ||
| 321 | + translated = backend.translate( | ||
| 322 | + text=misses, | ||
| 323 | + target_lang=target_lang, | ||
| 324 | + source_lang=source_lang, | ||
| 325 | + scene=scene, | ||
| 326 | + ) | ||
| 327 | + translated_list = translated if isinstance(translated, list) else [translated] | ||
| 328 | + for idx, original_text, translated_text in zip(miss_indices, misses, translated_list): | ||
| 329 | + results[idx] = translated_text | ||
| 330 | + if translated_text is not None: | ||
| 331 | + self._translation_cache.set( | ||
| 332 | + model=model, | ||
| 333 | + target_lang=target_lang, | ||
| 334 | + source_text=original_text, | ||
| 335 | + translated_text=translated_text, | ||
| 336 | + ) | ||
| 337 | + else: | ||
| 338 | + logger.warning( | ||
| 339 | + "Translation batch item returned empty result | model=%s scene=%s target_lang=%s source_lang=%s item_index=%s text_len=%s", | ||
| 340 | + model, | ||
| 341 | + scene, | ||
| 342 | + target_lang, | ||
| 343 | + source_lang or "auto", | ||
| 344 | + idx, | ||
| 345 | + len(original_text), | ||
| 346 | + ) | ||
| 347 | + | ||
| 348 | + return results |
translation/settings.py
| @@ -90,21 +90,11 @@ def _build_cache_config(raw_cache: Any) -> Dict[str, Any]: | @@ -90,21 +90,11 @@ def _build_cache_config(raw_cache: Any) -> Dict[str, Any]: | ||
| 90 | if not isinstance(raw_cache, Mapping): | 90 | if not isinstance(raw_cache, Mapping): |
| 91 | raise ValueError("services.translation.cache must be a mapping") | 91 | raise ValueError("services.translation.cache must be a mapping") |
| 92 | return { | 92 | return { |
| 93 | - "enabled": _require_bool(raw_cache.get("enabled"), "services.translation.cache.enabled"), | ||
| 94 | - "key_prefix": _require_string(raw_cache.get("key_prefix"), "services.translation.cache.key_prefix"), | ||
| 95 | "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"), | 93 | "ttl_seconds": _require_positive_int(raw_cache.get("ttl_seconds"), "services.translation.cache.ttl_seconds"), |
| 96 | "sliding_expiration": _require_bool( | 94 | "sliding_expiration": _require_bool( |
| 97 | raw_cache.get("sliding_expiration"), | 95 | raw_cache.get("sliding_expiration"), |
| 98 | "services.translation.cache.sliding_expiration", | 96 | "services.translation.cache.sliding_expiration", |
| 99 | ), | 97 | ), |
| 100 | - "key_include_scene": _require_bool( | ||
| 101 | - raw_cache.get("key_include_scene"), | ||
| 102 | - "services.translation.cache.key_include_scene", | ||
| 103 | - ), | ||
| 104 | - "key_include_source_lang": _require_bool( | ||
| 105 | - raw_cache.get("key_include_source_lang"), | ||
| 106 | - "services.translation.cache.key_include_source_lang", | ||
| 107 | - ), | ||
| 108 | } | 98 | } |
| 109 | 99 | ||
| 110 | 100 | ||
| @@ -131,12 +121,12 @@ def _build_capabilities(raw_capabilities: Any) -> Dict[str, Dict[str, Any]]: | @@ -131,12 +121,12 @@ def _build_capabilities(raw_capabilities: Any) -> Dict[str, Dict[str, Any]]: | ||
| 131 | def _validate_capability(name: str, capability: Mapping[str, Any]) -> None: | 121 | def _validate_capability(name: str, capability: Mapping[str, Any]) -> None: |
| 132 | prefix = f"services.translation.capabilities.{name}" | 122 | prefix = f"services.translation.capabilities.{name}" |
| 133 | backend = capability.get("backend") | 123 | backend = capability.get("backend") |
| 124 | + _require_bool(capability.get("use_cache"), f"{prefix}.use_cache") | ||
| 134 | 125 | ||
| 135 | if backend == "qwen_mt": | 126 | if backend == "qwen_mt": |
| 136 | _require_string(capability.get("model"), f"{prefix}.model") | 127 | _require_string(capability.get("model"), f"{prefix}.model") |
| 137 | _require_http_url(capability.get("base_url"), f"{prefix}.base_url") | 128 | _require_http_url(capability.get("base_url"), f"{prefix}.base_url") |
| 138 | _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec") | 129 | _require_positive_float(capability.get("timeout_sec"), f"{prefix}.timeout_sec") |
| 139 | - _require_bool(capability.get("use_cache"), f"{prefix}.use_cache") | ||
| 140 | return | 130 | return |
| 141 | 131 | ||
| 142 | if backend == "llm": | 132 | if backend == "llm": |