Commit 89fa3f3ccb7d7815460c21ea52ecca110435d61e
1 parent
778c299a
Sync master portability fixes from f07947a
Showing
14 changed files
with
832 additions
and
183 deletions
Show diff stats
api/translator_app.py
| @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): | @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): | ||
| 271 | """Initialize all enabled translation backends on process startup.""" | 271 | """Initialize all enabled translation backends on process startup.""" |
| 272 | logger.info("Starting Translation Service API") | 272 | logger.info("Starting Translation Service API") |
| 273 | service = get_translation_service() | 273 | service = get_translation_service() |
| 274 | + failed_models = list(getattr(service, "failed_models", [])) | ||
| 275 | + backend_errors = dict(getattr(service, "backend_errors", {})) | ||
| 274 | logger.info( | 276 | logger.info( |
| 275 | - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", | 277 | + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s", |
| 276 | service.config["default_model"], | 278 | service.config["default_model"], |
| 277 | service.config["default_scene"], | 279 | service.config["default_scene"], |
| 278 | service.available_models, | 280 | service.available_models, |
| 279 | service.loaded_models, | 281 | service.loaded_models, |
| 282 | + failed_models, | ||
| 280 | ) | 283 | ) |
| 281 | logger.info( | 284 | logger.info( |
| 282 | - "Translation backends initialized on startup | models=%s", | 285 | + "Translation backends initialized on startup | loaded=%s failed=%s", |
| 283 | service.loaded_models, | 286 | service.loaded_models, |
| 287 | + backend_errors, | ||
| 284 | ) | 288 | ) |
| 285 | verbose_logger.info( | 289 | verbose_logger.info( |
| 286 | "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", | 290 | "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", |
| @@ -316,11 +320,14 @@ async def health_check(): | @@ -316,11 +320,14 @@ async def health_check(): | ||
| 316 | """Health check endpoint.""" | 320 | """Health check endpoint.""" |
| 317 | try: | 321 | try: |
| 318 | service = get_translation_service() | 322 | service = get_translation_service() |
| 323 | + failed_models = list(getattr(service, "failed_models", [])) | ||
| 324 | + backend_errors = dict(getattr(service, "backend_errors", {})) | ||
| 319 | logger.info( | 325 | logger.info( |
| 320 | - "Health check | default_model=%s default_scene=%s loaded_models=%s", | 326 | + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s", |
| 321 | service.config["default_model"], | 327 | service.config["default_model"], |
| 322 | service.config["default_scene"], | 328 | service.config["default_scene"], |
| 323 | service.loaded_models, | 329 | service.loaded_models, |
| 330 | + failed_models, | ||
| 324 | ) | 331 | ) |
| 325 | return { | 332 | return { |
| 326 | "status": "healthy", | 333 | "status": "healthy", |
| @@ -330,6 +337,8 @@ async def health_check(): | @@ -330,6 +337,8 @@ async def health_check(): | ||
| 330 | "available_models": service.available_models, | 337 | "available_models": service.available_models, |
| 331 | "enabled_capabilities": get_enabled_translation_models(service.config), | 338 | "enabled_capabilities": get_enabled_translation_models(service.config), |
| 332 | "loaded_models": service.loaded_models, | 339 | "loaded_models": service.loaded_models, |
| 340 | + "failed_models": failed_models, | ||
| 341 | + "backend_errors": backend_errors, | ||
| 333 | } | 342 | } |
| 334 | except Exception as e: | 343 | except Exception as e: |
| 335 | logger.error(f"Health check failed: {e}") | 344 | logger.error(f"Health check failed: {e}") |
| @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): | @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): | ||
| 463 | latency_ms = (time.perf_counter() - request_started) * 1000 | 472 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 464 | logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) | 473 | logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) |
| 465 | raise HTTPException(status_code=400, detail=str(e)) from e | 474 | raise HTTPException(status_code=400, detail=str(e)) from e |
| 475 | + except RuntimeError as e: | ||
| 476 | + latency_ms = (time.perf_counter() - request_started) * 1000 | ||
| 477 | + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms) | ||
| 478 | + raise HTTPException(status_code=503, detail=str(e)) from e | ||
| 466 | except Exception as e: | 479 | except Exception as e: |
| 467 | latency_ms = (time.perf_counter() - request_started) * 1000 | 480 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 468 | logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) | 481 | logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) |
config/loader.py
| @@ -655,6 +655,14 @@ class AppConfigLoader: | @@ -655,6 +655,14 @@ class AppConfigLoader: | ||
| 655 | 655 | ||
| 656 | translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} | 656 | translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} |
| 657 | normalized_translation = build_translation_config(translation_raw) | 657 | normalized_translation = build_translation_config(translation_raw) |
| 658 | + local_translation_backends = {"local_nllb", "local_marian"} | ||
| 659 | + for capability_name, capability_cfg in normalized_translation["capabilities"].items(): | ||
| 660 | + backend_name = str(capability_cfg.get("backend") or "").strip().lower() | ||
| 661 | + if backend_name not in local_translation_backends: | ||
| 662 | + continue | ||
| 663 | + for path_key in ("model_dir", "ct2_model_dir"): | ||
| 664 | + if capability_cfg.get(path_key) not in (None, ""): | ||
| 665 | + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve()) | ||
| 658 | translation_config = TranslationServiceConfig( | 666 | translation_config = TranslationServiceConfig( |
| 659 | endpoint=str(normalized_translation["service_url"]).rstrip("/"), | 667 | endpoint=str(normalized_translation["service_url"]).rstrip("/"), |
| 660 | timeout_sec=float(normalized_translation["timeout_sec"]), | 668 | timeout_sec=float(normalized_translation["timeout_sec"]), |
| @@ -749,7 +757,7 @@ class AppConfigLoader: | @@ -749,7 +757,7 @@ class AppConfigLoader: | ||
| 749 | port=port, | 757 | port=port, |
| 750 | backend=backend_name, | 758 | backend=backend_name, |
| 751 | runtime_dir=( | 759 | runtime_dir=( |
| 752 | - str(v) | 760 | + str(self._resolve_project_path_value(v).resolve()) |
| 753 | if (v := instance_raw.get("runtime_dir")) not in (None, "") | 761 | if (v := instance_raw.get("runtime_dir")) not in (None, "") |
| 754 | else None | 762 | else None |
| 755 | ), | 763 | ), |
| @@ -787,6 +795,12 @@ class AppConfigLoader: | @@ -787,6 +795,12 @@ class AppConfigLoader: | ||
| 787 | rerank=rerank_config, | 795 | rerank=rerank_config, |
| 788 | ) | 796 | ) |
| 789 | 797 | ||
| 798 | + def _resolve_project_path_value(self, value: Any) -> Path: | ||
| 799 | + candidate = Path(str(value)).expanduser() | ||
| 800 | + if candidate.is_absolute(): | ||
| 801 | + return candidate | ||
| 802 | + return self.project_root / candidate | ||
| 803 | + | ||
| 790 | def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: | 804 | def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: |
| 791 | if not isinstance(raw, dict): | 805 | if not isinstance(raw, dict): |
| 792 | raise ConfigurationError("tenant_config must be a mapping") | 806 | raise ConfigurationError("tenant_config must be a mapping") |
| @@ -822,13 +836,6 @@ class AppConfigLoader: | @@ -822,13 +836,6 @@ class AppConfigLoader: | ||
| 822 | 836 | ||
| 823 | def _build_infrastructure_config(self, environment: str) -> InfrastructureConfig: | 837 | def _build_infrastructure_config(self, environment: str) -> InfrastructureConfig: |
| 824 | del environment | 838 | del environment |
| 825 | - _redis_db_raw = os.getenv("REDIS_DB") or os.getenv("REDIS_SNAPSHOT_DB") | ||
| 826 | - _redis_db = 0 | ||
| 827 | - if _redis_db_raw is not None and str(_redis_db_raw).strip() != "": | ||
| 828 | - try: | ||
| 829 | - _redis_db = int(str(_redis_db_raw).strip()) | ||
| 830 | - except ValueError: | ||
| 831 | - _redis_db = 0 | ||
| 832 | return InfrastructureConfig( | 839 | return InfrastructureConfig( |
| 833 | elasticsearch=ElasticsearchSettings( | 840 | elasticsearch=ElasticsearchSettings( |
| 834 | host=os.getenv("ES_HOST", "http://localhost:9200"), | 841 | host=os.getenv("ES_HOST", "http://localhost:9200"), |
| @@ -838,7 +845,7 @@ class AppConfigLoader: | @@ -838,7 +845,7 @@ class AppConfigLoader: | ||
| 838 | redis=RedisSettings( | 845 | redis=RedisSettings( |
| 839 | host=os.getenv("REDIS_HOST", "localhost"), | 846 | host=os.getenv("REDIS_HOST", "localhost"), |
| 840 | port=int(os.getenv("REDIS_PORT", 6479)), | 847 | port=int(os.getenv("REDIS_PORT", 6479)), |
| 841 | - snapshot_db=_redis_db, | 848 | + snapshot_db=int(os.getenv("REDIS_SNAPSHOT_DB", 0)), |
| 842 | password=os.getenv("REDIS_PASSWORD"), | 849 | password=os.getenv("REDIS_PASSWORD"), |
| 843 | socket_timeout=int(os.getenv("REDIS_SOCKET_TIMEOUT", 1)), | 850 | socket_timeout=int(os.getenv("REDIS_SOCKET_TIMEOUT", 1)), |
| 844 | socket_connect_timeout=int(os.getenv("REDIS_SOCKET_CONNECT_TIMEOUT", 1)), | 851 | socket_connect_timeout=int(os.getenv("REDIS_SOCKET_CONNECT_TIMEOUT", 1)), |
frontend/static/js/app.js
| @@ -316,7 +316,10 @@ async function performSearch(page = 1) { | @@ -316,7 +316,10 @@ async function performSearch(page = 1) { | ||
| 316 | document.getElementById('productGrid').innerHTML = ''; | 316 | document.getElementById('productGrid').innerHTML = ''; |
| 317 | 317 | ||
| 318 | try { | 318 | try { |
| 319 | - const response = await fetch(`${API_BASE_URL}/search/`, { | 319 | + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin); |
| 320 | + searchUrl.searchParams.set('tenant_id', tenantId); | ||
| 321 | + | ||
| 322 | + const response = await fetch(searchUrl.toString(), { | ||
| 320 | method: 'POST', | 323 | method: 'POST', |
| 321 | headers: { | 324 | headers: { |
| 322 | 'Content-Type': 'application/json', | 325 | 'Content-Type': 'application/json', |
requirements_translator_service.txt
| @@ -13,7 +13,8 @@ httpx>=0.24.0 | @@ -13,7 +13,8 @@ httpx>=0.24.0 | ||
| 13 | tqdm>=4.65.0 | 13 | tqdm>=4.65.0 |
| 14 | 14 | ||
| 15 | torch>=2.0.0 | 15 | torch>=2.0.0 |
| 16 | -transformers>=4.30.0 | 16 | +# Keep translator conversions on the last verified NLLB-compatible release line. |
| 17 | +transformers>=4.51.0,<4.52.0 | ||
| 17 | ctranslate2>=4.7.0 | 18 | ctranslate2>=4.7.0 |
| 18 | sentencepiece>=0.2.0 | 19 | sentencepiece>=0.2.0 |
| 19 | sacremoses>=0.1.1 | 20 | sacremoses>=0.1.1 |
| 1 | #!/usr/bin/env python3 | 1 | #!/usr/bin/env python3 |
| 2 | -"""Download local translation models declared in services.translation.capabilities.""" | 2 | +"""Backward-compatible entrypoint for translation model downloads.""" |
| 3 | 3 | ||
| 4 | from __future__ import annotations | 4 | from __future__ import annotations |
| 5 | 5 | ||
| 6 | -import argparse | ||
| 7 | -import os | 6 | +import runpy |
| 8 | from pathlib import Path | 7 | from pathlib import Path |
| 9 | -import shutil | ||
| 10 | -import subprocess | ||
| 11 | -import sys | ||
| 12 | -from typing import Iterable | ||
| 13 | - | ||
| 14 | -from huggingface_hub import snapshot_download | ||
| 15 | - | ||
| 16 | -PROJECT_ROOT = Path(__file__).resolve().parent.parent | ||
| 17 | -if str(PROJECT_ROOT) not in sys.path: | ||
| 18 | - sys.path.insert(0, str(PROJECT_ROOT)) | ||
| 19 | -os.environ.setdefault("HF_HUB_DISABLE_XET", "1") | ||
| 20 | - | ||
| 21 | -from config.services_config import get_translation_config | ||
| 22 | - | ||
| 23 | - | ||
| 24 | -LOCAL_BACKENDS = {"local_nllb", "local_marian"} | ||
| 25 | - | ||
| 26 | - | ||
| 27 | -def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: | ||
| 28 | - cfg = get_translation_config() | ||
| 29 | - capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {} | ||
| 30 | - for name, capability in capabilities.items(): | ||
| 31 | - backend = str(capability.get("backend") or "").strip().lower() | ||
| 32 | - if backend not in LOCAL_BACKENDS: | ||
| 33 | - continue | ||
| 34 | - if selected and name not in selected: | ||
| 35 | - continue | ||
| 36 | - yield name, capability | ||
| 37 | - | ||
| 38 | - | ||
| 39 | -def _compute_ct2_output_dir(capability: dict) -> Path: | ||
| 40 | - custom = str(capability.get("ct2_model_dir") or "").strip() | ||
| 41 | - if custom: | ||
| 42 | - return Path(custom).expanduser() | ||
| 43 | - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | ||
| 44 | - compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower() | ||
| 45 | - normalized = compute_type.replace("_", "-") | ||
| 46 | - return model_dir / f"ctranslate2-{normalized}" | ||
| 47 | - | ||
| 48 | - | ||
| 49 | -def _resolve_converter_binary() -> str: | ||
| 50 | - candidate = shutil.which("ct2-transformers-converter") | ||
| 51 | - if candidate: | ||
| 52 | - return candidate | ||
| 53 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | ||
| 54 | - if venv_candidate.exists(): | ||
| 55 | - return str(venv_candidate) | ||
| 56 | - raise RuntimeError( | ||
| 57 | - "ct2-transformers-converter was not found. " | ||
| 58 | - "Install ctranslate2 in the active Python environment first." | ||
| 59 | - ) | ||
| 60 | - | ||
| 61 | - | ||
| 62 | -def convert_to_ctranslate2(name: str, capability: dict) -> None: | ||
| 63 | - model_id = str(capability.get("model_id") or "").strip() | ||
| 64 | - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | ||
| 65 | - model_source = str(model_dir if model_dir.exists() else model_id) | ||
| 66 | - output_dir = _compute_ct2_output_dir(capability) | ||
| 67 | - if (output_dir / "model.bin").exists(): | ||
| 68 | - print(f"[skip-convert] {name} -> {output_dir}") | ||
| 69 | - return | ||
| 70 | - quantization = str( | ||
| 71 | - capability.get("ct2_conversion_quantization") | ||
| 72 | - or capability.get("ct2_compute_type") | ||
| 73 | - or capability.get("torch_dtype") | ||
| 74 | - or "default" | ||
| 75 | - ).strip() | ||
| 76 | - output_dir.parent.mkdir(parents=True, exist_ok=True) | ||
| 77 | - print(f"[convert] {name} -> {output_dir} ({quantization})") | ||
| 78 | - subprocess.run( | ||
| 79 | - [ | ||
| 80 | - _resolve_converter_binary(), | ||
| 81 | - "--model", | ||
| 82 | - model_source, | ||
| 83 | - "--output_dir", | ||
| 84 | - str(output_dir), | ||
| 85 | - "--quantization", | ||
| 86 | - quantization, | ||
| 87 | - ], | ||
| 88 | - check=True, | ||
| 89 | - ) | ||
| 90 | - print(f"[converted] {name}") | ||
| 91 | - | ||
| 92 | - | ||
| 93 | -def main() -> None: | ||
| 94 | - parser = argparse.ArgumentParser(description="Download local translation models") | ||
| 95 | - parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") | ||
| 96 | - parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") | ||
| 97 | - parser.add_argument( | ||
| 98 | - "--convert-ctranslate2", | ||
| 99 | - action="store_true", | ||
| 100 | - help="Also convert the downloaded Hugging Face models into CTranslate2 format", | ||
| 101 | - ) | ||
| 102 | - args = parser.parse_args() | ||
| 103 | - | ||
| 104 | - selected = {item.strip().lower() for item in args.models if item.strip()} or None | ||
| 105 | - if not args.all_local and not selected: | ||
| 106 | - parser.error("pass --all-local or --models <name> ...") | ||
| 107 | - | ||
| 108 | - for name, capability in iter_local_capabilities(selected): | ||
| 109 | - model_id = str(capability.get("model_id") or "").strip() | ||
| 110 | - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | ||
| 111 | - if not model_id or not model_dir: | ||
| 112 | - raise ValueError(f"Capability '{name}' must define model_id and model_dir") | ||
| 113 | - model_dir.parent.mkdir(parents=True, exist_ok=True) | ||
| 114 | - print(f"[download] {name} -> {model_dir} ({model_id})") | ||
| 115 | - snapshot_download( | ||
| 116 | - repo_id=model_id, | ||
| 117 | - local_dir=str(model_dir), | ||
| 118 | - ) | ||
| 119 | - print(f"[done] {name}") | ||
| 120 | - if args.convert_ctranslate2: | ||
| 121 | - convert_to_ctranslate2(name, capability) | ||
| 122 | 8 | ||
| 123 | 9 | ||
| 124 | if __name__ == "__main__": | 10 | if __name__ == "__main__": |
| 125 | - main() | 11 | + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py" |
| 12 | + runpy.run_path(str(target), run_name="__main__") |
| @@ -0,0 +1,278 @@ | @@ -0,0 +1,278 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +""" | ||
| 3 | +Simple HTTP server for saas-search frontend. | ||
| 4 | +""" | ||
| 5 | + | ||
| 6 | +import http.server | ||
| 7 | +import socketserver | ||
| 8 | +import os | ||
| 9 | +import sys | ||
| 10 | +import logging | ||
| 11 | +import time | ||
| 12 | +import urllib.request | ||
| 13 | +import urllib.error | ||
| 14 | +from collections import defaultdict, deque | ||
| 15 | +from pathlib import Path | ||
| 16 | +from dotenv import load_dotenv | ||
| 17 | + | ||
| 18 | +# Load .env file | ||
| 19 | +project_root = Path(__file__).resolve().parents[2] | ||
| 20 | +load_dotenv(project_root / '.env') | ||
| 21 | + | ||
| 22 | +# Get API_BASE_URL from environment๏ผ้ป่ฎคไธๆณจๅ ฅ๏ผ้ฟๅ ่ขซๆง .env ่ฆ็ๅๆบ็ญ็ฅ๏ผ | ||
| 23 | +# ไป ๅฝๆพๅผ่ฎพ็ฝฎ FRONTEND_INJECT_API_BASE_URL=1 ๆถๆๆณจๅ ฅ window.API_BASE_URLใ | ||
| 24 | +API_BASE_URL = os.getenv('API_BASE_URL') or None | ||
| 25 | +INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1' | ||
| 26 | +# Backend proxy target for same-origin API forwarding | ||
| 27 | +BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/') | ||
| 28 | + | ||
| 29 | +# Change to frontend directory | ||
| 30 | +frontend_dir = os.path.join(project_root, 'frontend') | ||
| 31 | +os.chdir(frontend_dir) | ||
| 32 | + | ||
| 33 | +# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback. | ||
| 34 | +PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003))) | ||
| 35 | + | ||
| 36 | +# Configure logging to suppress scanner noise | ||
| 37 | +logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') | ||
| 38 | + | ||
| 39 | +class RateLimitingMixin: | ||
| 40 | + """Mixin for rate limiting requests by IP address.""" | ||
| 41 | + request_counts = defaultdict(deque) | ||
| 42 | + rate_limit = 100 # requests per minute | ||
| 43 | + window = 60 # seconds | ||
| 44 | + | ||
| 45 | + @classmethod | ||
| 46 | + def is_rate_limited(cls, ip): | ||
| 47 | + now = time.time() | ||
| 48 | + | ||
| 49 | + # Clean old requests | ||
| 50 | + while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window: | ||
| 51 | + cls.request_counts[ip].popleft() | ||
| 52 | + | ||
| 53 | + # Check rate limit | ||
| 54 | + if len(cls.request_counts[ip]) > cls.rate_limit: | ||
| 55 | + return True | ||
| 56 | + | ||
| 57 | + cls.request_counts[ip].append(now) | ||
| 58 | + return False | ||
| 59 | + | ||
| 60 | +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): | ||
| 61 | + """Custom request handler with CORS support and robust error handling.""" | ||
| 62 | + | ||
| 63 | + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer" | ||
| 64 | + | ||
| 65 | + def _is_proxy_path(self, path: str) -> bool: | ||
| 66 | + """Return True for API paths that should be forwarded to backend service.""" | ||
| 67 | + return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') | ||
| 68 | + | ||
| 69 | + def _proxy_to_backend(self): | ||
| 70 | + """Proxy current request to backend service on the GPU server.""" | ||
| 71 | + target_url = f"{BACKEND_PROXY_URL}{self.path}" | ||
| 72 | + method = self.command.upper() | ||
| 73 | + | ||
| 74 | + try: | ||
| 75 | + content_length = int(self.headers.get('Content-Length', '0')) | ||
| 76 | + except ValueError: | ||
| 77 | + content_length = 0 | ||
| 78 | + body = self.rfile.read(content_length) if content_length > 0 else None | ||
| 79 | + | ||
| 80 | + forward_headers = {} | ||
| 81 | + for key, value in self.headers.items(): | ||
| 82 | + lk = key.lower() | ||
| 83 | + if lk in ('host', 'content-length', 'connection'): | ||
| 84 | + continue | ||
| 85 | + forward_headers[key] = value | ||
| 86 | + | ||
| 87 | + req = urllib.request.Request( | ||
| 88 | + target_url, | ||
| 89 | + data=body, | ||
| 90 | + headers=forward_headers, | ||
| 91 | + method=method, | ||
| 92 | + ) | ||
| 93 | + | ||
| 94 | + try: | ||
| 95 | + with urllib.request.urlopen(req, timeout=30) as resp: | ||
| 96 | + resp_body = resp.read() | ||
| 97 | + self.send_response(resp.getcode()) | ||
| 98 | + for header, value in resp.getheaders(): | ||
| 99 | + lh = header.lower() | ||
| 100 | + if lh in ('transfer-encoding', 'connection', 'content-length'): | ||
| 101 | + continue | ||
| 102 | + self.send_header(header, value) | ||
| 103 | + self.end_headers() | ||
| 104 | + self.wfile.write(resp_body) | ||
| 105 | + except urllib.error.HTTPError as e: | ||
| 106 | + err_body = e.read() if hasattr(e, 'read') else b'' | ||
| 107 | + self.send_response(e.code) | ||
| 108 | + if e.headers: | ||
| 109 | + for header, value in e.headers.items(): | ||
| 110 | + lh = header.lower() | ||
| 111 | + if lh in ('transfer-encoding', 'connection', 'content-length'): | ||
| 112 | + continue | ||
| 113 | + self.send_header(header, value) | ||
| 114 | + self.end_headers() | ||
| 115 | + if err_body: | ||
| 116 | + self.wfile.write(err_body) | ||
| 117 | + except Exception as e: | ||
| 118 | + logging.error(f"Backend proxy error for {method} {self.path}: {e}") | ||
| 119 | + self.send_response(502) | ||
| 120 | + self.send_header('Content-Type', 'application/json; charset=utf-8') | ||
| 121 | + self.end_headers() | ||
| 122 | + self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}') | ||
| 123 | + | ||
| 124 | + def do_GET(self): | ||
| 125 | + """Handle GET requests with API config injection.""" | ||
| 126 | + path = self.path.split('?')[0] | ||
| 127 | + | ||
| 128 | + # Proxy API paths to backend first | ||
| 129 | + if self._is_proxy_path(path): | ||
| 130 | + self._proxy_to_backend() | ||
| 131 | + return | ||
| 132 | + | ||
| 133 | + # Route / to index.html | ||
| 134 | + if path == '/' or path == '': | ||
| 135 | + self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '') | ||
| 136 | + | ||
| 137 | + # Inject API config for HTML files | ||
| 138 | + if self.path.endswith('.html'): | ||
| 139 | + self._serve_html_with_config() | ||
| 140 | + else: | ||
| 141 | + super().do_GET() | ||
| 142 | + | ||
| 143 | + def _serve_html_with_config(self): | ||
| 144 | + """Serve HTML with optional API_BASE_URL injected.""" | ||
| 145 | + try: | ||
| 146 | + file_path = self.path.lstrip('/') | ||
| 147 | + if not os.path.exists(file_path): | ||
| 148 | + self.send_error(404) | ||
| 149 | + return | ||
| 150 | + | ||
| 151 | + with open(file_path, 'r', encoding='utf-8') as f: | ||
| 152 | + html = f.read() | ||
| 153 | + | ||
| 154 | + # ้ป่ฎคไธๆณจๅ ฅ API_BASE_URL๏ผ้ฟๅ ๅๅฒ .env๏ผๅฆ http://xx:6002๏ผ่ฆ็ๅๆบ่ฐ็จใ | ||
| 155 | + # ไป ๅฝ FRONTEND_INJECT_API_BASE_URL=1 ไธ API_BASE_URL ๆๅผๆถๆๆณจๅ ฅใ | ||
| 156 | + if INJECT_API_BASE_URL and API_BASE_URL: | ||
| 157 | + config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n ' | ||
| 158 | + html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1) | ||
| 159 | + | ||
| 160 | + self.send_response(200) | ||
| 161 | + self.send_header('Content-Type', 'text/html; charset=utf-8') | ||
| 162 | + self.end_headers() | ||
| 163 | + self.wfile.write(html.encode('utf-8')) | ||
| 164 | + except Exception as e: | ||
| 165 | + logging.error(f"Error serving HTML: {e}") | ||
| 166 | + self.send_error(500) | ||
| 167 | + | ||
| 168 | + def do_POST(self): | ||
| 169 | + """Handle POST requests. Proxy API requests to backend.""" | ||
| 170 | + path = self.path.split('?')[0] | ||
| 171 | + if self._is_proxy_path(path): | ||
| 172 | + self._proxy_to_backend() | ||
| 173 | + return | ||
| 174 | + self.send_error(405, "Method Not Allowed") | ||
| 175 | + | ||
| 176 | + def setup(self): | ||
| 177 | + """Setup with error handling.""" | ||
| 178 | + try: | ||
| 179 | + super().setup() | ||
| 180 | + except Exception: | ||
| 181 | + pass # Silently handle setup errors from scanners | ||
| 182 | + | ||
| 183 | + def handle_one_request(self): | ||
| 184 | + """Handle single request with error catching.""" | ||
| 185 | + try: | ||
| 186 | + # Check rate limiting | ||
| 187 | + client_ip = self.client_address[0] | ||
| 188 | + if self.is_rate_limited(client_ip): | ||
| 189 | + logging.warning(f"Rate limiting IP: {client_ip}") | ||
| 190 | + self.send_error(429, "Too Many Requests") | ||
| 191 | + return | ||
| 192 | + | ||
| 193 | + super().handle_one_request() | ||
| 194 | + except (ConnectionResetError, BrokenPipeError): | ||
| 195 | + # Client disconnected prematurely - common with scanners | ||
| 196 | + pass | ||
| 197 | + except UnicodeDecodeError: | ||
| 198 | + # Binary data received - not HTTP | ||
| 199 | + pass | ||
| 200 | + except Exception as e: | ||
| 201 | + # Log unexpected errors but don't crash | ||
| 202 | + logging.debug(f"Request handling error: {e}") | ||
| 203 | + | ||
| 204 | + def log_message(self, format, *args): | ||
| 205 | + """Suppress logging for malformed requests from scanners.""" | ||
| 206 | + message = format % args | ||
| 207 | + # Filter out scanner noise | ||
| 208 | + noise_patterns = [ | ||
| 209 | + "code 400", | ||
| 210 | + "Bad request", | ||
| 211 | + "Bad request version", | ||
| 212 | + "Bad HTTP/0.9 request type", | ||
| 213 | + "Bad request syntax" | ||
| 214 | + ] | ||
| 215 | + if any(pattern in message for pattern in noise_patterns): | ||
| 216 | + return | ||
| 217 | + # Only log legitimate requests | ||
| 218 | + if message and not message.startswith(" ") and len(message) > 10: | ||
| 219 | + super().log_message(format, *args) | ||
| 220 | + | ||
| 221 | + def end_headers(self): | ||
| 222 | + # Add CORS headers | ||
| 223 | + self.send_header('Access-Control-Allow-Origin', '*') | ||
| 224 | + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | ||
| 225 | + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS) | ||
| 226 | + # Add security headers | ||
| 227 | + self.send_header('X-Content-Type-Options', 'nosniff') | ||
| 228 | + self.send_header('X-Frame-Options', 'DENY') | ||
| 229 | + self.send_header('X-XSS-Protection', '1; mode=block') | ||
| 230 | + super().end_headers() | ||
| 231 | + | ||
| 232 | + def do_OPTIONS(self): | ||
| 233 | + """Handle OPTIONS requests.""" | ||
| 234 | + try: | ||
| 235 | + path = self.path.split('?')[0] | ||
| 236 | + if self._is_proxy_path(path): | ||
| 237 | + self.send_response(204) | ||
| 238 | + self.end_headers() | ||
| 239 | + return | ||
| 240 | + self.send_response(200) | ||
| 241 | + self.end_headers() | ||
| 242 | + except Exception: | ||
| 243 | + pass | ||
| 244 | + | ||
| 245 | +class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer): | ||
| 246 | + """Threaded TCP server with better error handling.""" | ||
| 247 | + allow_reuse_address = True | ||
| 248 | + daemon_threads = True | ||
| 249 | + | ||
| 250 | +if __name__ == '__main__': | ||
| 251 | + # Check if port is already in use | ||
| 252 | + import socket | ||
| 253 | + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||
| 254 | + try: | ||
| 255 | + sock.bind(("", PORT)) | ||
| 256 | + sock.close() | ||
| 257 | + except OSError: | ||
| 258 | + print(f"ERROR: Port {PORT} is already in use.") | ||
| 259 | + print(f"Please stop the existing server or use a different port.") | ||
| 260 | + print(f"To stop existing server: kill $(lsof -t -i:{PORT})") | ||
| 261 | + sys.exit(1) | ||
| 262 | + | ||
| 263 | + # Create threaded server for better concurrency | ||
| 264 | + with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd: | ||
| 265 | + print(f"Frontend server started at http://localhost:{PORT}") | ||
| 266 | + print(f"Serving files from: {os.getcwd()}") | ||
| 267 | + print("\nPress Ctrl+C to stop the server") | ||
| 268 | + | ||
| 269 | + try: | ||
| 270 | + httpd.serve_forever() | ||
| 271 | + except KeyboardInterrupt: | ||
| 272 | + print("\nShutting down server...") | ||
| 273 | + httpd.shutdown() | ||
| 274 | + print("Server stopped") | ||
| 275 | + sys.exit(0) | ||
| 276 | + except Exception as e: | ||
| 277 | + print(f"Server error: {e}") | ||
| 278 | + sys.exit(1) |
scripts/setup_translator_venv.sh
| @@ -8,8 +8,47 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | @@ -8,8 +8,47 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | ||
| 8 | cd "${PROJECT_ROOT}" | 8 | cd "${PROJECT_ROOT}" |
| 9 | 9 | ||
| 10 | VENV_DIR="${PROJECT_ROOT}/.venv-translator" | 10 | VENV_DIR="${PROJECT_ROOT}/.venv-translator" |
| 11 | -PYTHON_BIN="${PYTHON_BIN:-python3}" | ||
| 12 | TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" | 11 | TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" |
| 12 | +MIN_PYTHON_MAJOR=3 | ||
| 13 | +MIN_PYTHON_MINOR=10 | ||
| 14 | + | ||
| 15 | +python_meets_minimum() { | ||
| 16 | + local bin="$1" | ||
| 17 | + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}" | ||
| 18 | +import sys | ||
| 19 | + | ||
| 20 | +required = tuple(int(value) for value in sys.argv[1:]) | ||
| 21 | +sys.exit(0 if sys.version_info[:2] >= required else 1) | ||
| 22 | +PY | ||
| 23 | +} | ||
| 24 | + | ||
| 25 | +discover_python_bin() { | ||
| 26 | + local candidates=() | ||
| 27 | + | ||
| 28 | + if [[ -n "${PYTHON_BIN:-}" ]]; then | ||
| 29 | + candidates+=("${PYTHON_BIN}") | ||
| 30 | + fi | ||
| 31 | + candidates+=("python3.12" "python3.11" "python3.10" "python3") | ||
| 32 | + | ||
| 33 | + local candidate | ||
| 34 | + for candidate in "${candidates[@]}"; do | ||
| 35 | + if ! command -v "${candidate}" >/dev/null 2>&1; then | ||
| 36 | + continue | ||
| 37 | + fi | ||
| 38 | + if python_meets_minimum "${candidate}"; then | ||
| 39 | + echo "${candidate}" | ||
| 40 | + return 0 | ||
| 41 | + fi | ||
| 42 | + done | ||
| 43 | + | ||
| 44 | + return 1 | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +if ! PYTHON_BIN="$(discover_python_bin)"; then | ||
| 48 | + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2 | ||
| 49 | + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2 | ||
| 50 | + exit 1 | ||
| 51 | +fi | ||
| 13 | 52 | ||
| 14 | if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then | 53 | if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then |
| 15 | echo "ERROR: python not found: ${PYTHON_BIN}" >&2 | 54 | echo "ERROR: python not found: ${PYTHON_BIN}" >&2 |
| @@ -32,6 +71,7 @@ mkdir -p "${TMP_DIR}" | @@ -32,6 +71,7 @@ mkdir -p "${TMP_DIR}" | ||
| 32 | export TMPDIR="${TMP_DIR}" | 71 | export TMPDIR="${TMP_DIR}" |
| 33 | PIP_ARGS=(--no-cache-dir) | 72 | PIP_ARGS=(--no-cache-dir) |
| 34 | 73 | ||
| 74 | +echo "Using Python=${PYTHON_BIN}" | ||
| 35 | echo "Using TMPDIR=${TMPDIR}" | 75 | echo "Using TMPDIR=${TMPDIR}" |
| 36 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel | 76 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel |
| 37 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt | 77 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt |
| @@ -39,5 +79,5 @@ echo "Using TMPDIR=${TMPDIR}" | @@ -39,5 +79,5 @@ echo "Using TMPDIR=${TMPDIR}" | ||
| 39 | echo | 79 | echo |
| 40 | echo "Done." | 80 | echo "Done." |
| 41 | echo "Translator venv: ${VENV_DIR}" | 81 | echo "Translator venv: ${VENV_DIR}" |
| 42 | -echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local" | 82 | +echo "Download local models: ./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local" |
| 43 | echo "Start service: ./scripts/start_translator.sh" | 83 | echo "Start service: ./scripts/start_translator.sh" |
scripts/translation/download_translation_models.py
0 โ 100755
| @@ -0,0 +1,100 @@ | @@ -0,0 +1,100 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +"""Download local translation models declared in services.translation.capabilities.""" | ||
| 3 | + | ||
| 4 | +from __future__ import annotations | ||
| 5 | + | ||
| 6 | +import argparse | ||
| 7 | +import os | ||
| 8 | +from pathlib import Path | ||
| 9 | +import sys | ||
| 10 | +from typing import Iterable | ||
| 11 | + | ||
| 12 | +from huggingface_hub import snapshot_download | ||
| 13 | + | ||
| 14 | +PROJECT_ROOT = Path(__file__).resolve().parents[2] | ||
| 15 | +if str(PROJECT_ROOT) not in sys.path: | ||
| 16 | + sys.path.insert(0, str(PROJECT_ROOT)) | ||
| 17 | +os.environ.setdefault("HF_HUB_DISABLE_XET", "1") | ||
| 18 | + | ||
| 19 | +from config.services_config import get_translation_config | ||
| 20 | +from translation.ct2_conversion import convert_transformers_model | ||
| 21 | + | ||
| 22 | + | ||
| 23 | +LOCAL_BACKENDS = {"local_nllb", "local_marian"} | ||
| 24 | + | ||
| 25 | + | ||
| 26 | +def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: | ||
| 27 | + cfg = get_translation_config() | ||
| 28 | + capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {} | ||
| 29 | + for name, capability in capabilities.items(): | ||
| 30 | + backend = str(capability.get("backend") or "").strip().lower() | ||
| 31 | + if backend not in LOCAL_BACKENDS: | ||
| 32 | + continue | ||
| 33 | + if selected and name not in selected: | ||
| 34 | + continue | ||
| 35 | + yield name, capability | ||
| 36 | + | ||
| 37 | + | ||
| 38 | +def _compute_ct2_output_dir(capability: dict) -> Path: | ||
| 39 | + custom = str(capability.get("ct2_model_dir") or "").strip() | ||
| 40 | + if custom: | ||
| 41 | + return Path(custom).expanduser() | ||
| 42 | + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | ||
| 43 | + compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower() | ||
| 44 | + normalized = compute_type.replace("_", "-") | ||
| 45 | + return model_dir / f"ctranslate2-{normalized}" | ||
| 46 | + | ||
| 47 | + | ||
| 48 | +def convert_to_ctranslate2(name: str, capability: dict) -> None: | ||
| 49 | + model_id = str(capability.get("model_id") or "").strip() | ||
| 50 | + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | ||
| 51 | + model_source = str(model_dir if model_dir.exists() else model_id) | ||
| 52 | + output_dir = _compute_ct2_output_dir(capability) | ||
| 53 | + if (output_dir / "model.bin").exists(): | ||
| 54 | + print(f"[skip-convert] {name} -> {output_dir}") | ||
| 55 | + return | ||
| 56 | + quantization = str( | ||
| 57 | + capability.get("ct2_conversion_quantization") | ||
| 58 | + or capability.get("ct2_compute_type") | ||
| 59 | + or capability.get("torch_dtype") | ||
| 60 | + or "default" | ||
| 61 | + ).strip() | ||
| 62 | + output_dir.parent.mkdir(parents=True, exist_ok=True) | ||
| 63 | + print(f"[convert] {name} -> {output_dir} ({quantization})") | ||
| 64 | + convert_transformers_model(model_source, str(output_dir), quantization) | ||
| 65 | + print(f"[converted] {name}") | ||
| 66 | + | ||
| 67 | + | ||
| 68 | +def main() -> None: | ||
| 69 | + parser = argparse.ArgumentParser(description="Download local translation models") | ||
| 70 | + parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") | ||
| 71 | + parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") | ||
| 72 | + parser.add_argument( | ||
| 73 | + "--convert-ctranslate2", | ||
| 74 | + action="store_true", | ||
| 75 | + help="Also convert the downloaded Hugging Face models into CTranslate2 format", | ||
| 76 | + ) | ||
| 77 | + args = parser.parse_args() | ||
| 78 | + | ||
| 79 | + selected = {item.strip().lower() for item in args.models if item.strip()} or None | ||
| 80 | + if not args.all_local and not selected: | ||
| 81 | + parser.error("pass --all-local or --models <name> ...") | ||
| 82 | + | ||
| 83 | + for name, capability in iter_local_capabilities(selected): | ||
| 84 | + model_id = str(capability.get("model_id") or "").strip() | ||
| 85 | + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | ||
| 86 | + if not model_id or not model_dir: | ||
| 87 | + raise ValueError(f"Capability '{name}' must define model_id and model_dir") | ||
| 88 | + model_dir.parent.mkdir(parents=True, exist_ok=True) | ||
| 89 | + print(f"[download] {name} -> {model_dir} ({model_id})") | ||
| 90 | + snapshot_download( | ||
| 91 | + repo_id=model_id, | ||
| 92 | + local_dir=str(model_dir), | ||
| 93 | + ) | ||
| 94 | + print(f"[done] {name}") | ||
| 95 | + if args.convert_ctranslate2: | ||
| 96 | + convert_to_ctranslate2(name, capability) | ||
| 97 | + | ||
| 98 | + | ||
| 99 | +if __name__ == "__main__": | ||
| 100 | + main() |
| @@ -0,0 +1,85 @@ | @@ -0,0 +1,85 @@ | ||
| 1 | +from __future__ import annotations | ||
| 2 | + | ||
| 3 | +import sys | ||
| 4 | +import types | ||
| 5 | + | ||
| 6 | +import pytest | ||
| 7 | + | ||
| 8 | +import translation.ct2_conversion as ct2_conversion | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +class _FakeTransformersConverter: | ||
| 12 | + def __init__(self, model_name_or_path): | ||
| 13 | + self.model_name_or_path = model_name_or_path | ||
| 14 | + self.load_calls = [] | ||
| 15 | + | ||
| 16 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | ||
| 17 | + self.load_calls.append( | ||
| 18 | + { | ||
| 19 | + "model_class": model_class, | ||
| 20 | + "resolved_model_name_or_path": resolved_model_name_or_path, | ||
| 21 | + "kwargs": dict(kwargs), | ||
| 22 | + } | ||
| 23 | + ) | ||
| 24 | + if "dtype" in kwargs or "torch_dtype" in kwargs: | ||
| 25 | + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'") | ||
| 26 | + return {"loaded": True, "path": resolved_model_name_or_path} | ||
| 27 | + | ||
| 28 | + def convert(self, output_dir, quantization=None, force=False): | ||
| 29 | + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32") | ||
| 30 | + return { | ||
| 31 | + "loaded": loaded, | ||
| 32 | + "output_dir": output_dir, | ||
| 33 | + "quantization": quantization, | ||
| 34 | + "force": force, | ||
| 35 | + "load_calls": list(self.load_calls), | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + | ||
| 39 | +def _install_fake_ctranslate2(monkeypatch, base_converter): | ||
| 40 | + converters_module = types.ModuleType("ctranslate2.converters") | ||
| 41 | + converters_module.TransformersConverter = base_converter | ||
| 42 | + ctranslate2_module = types.ModuleType("ctranslate2") | ||
| 43 | + ctranslate2_module.converters = converters_module | ||
| 44 | + | ||
| 45 | + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module) | ||
| 46 | + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module) | ||
| 47 | + | ||
| 48 | + | ||
| 49 | +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch): | ||
| 50 | + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter) | ||
| 51 | + fake_transformers = types.ModuleType("transformers") | ||
| 52 | + fake_transformers.AutoConfig = types.SimpleNamespace( | ||
| 53 | + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path) | ||
| 54 | + ) | ||
| 55 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | ||
| 56 | + | ||
| 57 | + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") | ||
| 58 | + | ||
| 59 | + assert result["loaded"] == {"loaded": True, "path": "fake-model"} | ||
| 60 | + assert result["output_dir"] == "/tmp/out" | ||
| 61 | + assert result["quantization"] == "float16" | ||
| 62 | + assert result["force"] is False | ||
| 63 | + assert len(result["load_calls"]) == 2 | ||
| 64 | + assert result["load_calls"][0] == { | ||
| 65 | + "model_class": "FakeModel", | ||
| 66 | + "resolved_model_name_or_path": "fake-model", | ||
| 67 | + "kwargs": {"dtype": "float32"}, | ||
| 68 | + } | ||
| 69 | + assert result["load_calls"][1]["model_class"] == "FakeModel" | ||
| 70 | + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model" | ||
| 71 | + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None | ||
| 72 | + | ||
| 73 | + | ||
| 74 | +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch): | ||
| 75 | + class _AlwaysFailingConverter(_FakeTransformersConverter): | ||
| 76 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | ||
| 77 | + raise TypeError("different constructor error") | ||
| 78 | + | ||
| 79 | + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter) | ||
| 80 | + fake_transformers = types.ModuleType("transformers") | ||
| 81 | + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path)) | ||
| 82 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | ||
| 83 | + | ||
| 84 | + with pytest.raises(TypeError, match="different constructor error"): | ||
| 85 | + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") |
tests/test_translation_local_backends.py
| @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): | @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): | ||
| 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] | 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] |
| 202 | 202 | ||
| 203 | 203 | ||
| 204 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch): | ||
| 205 | + wrong_dir = tmp_path / "wrong-nllb" | ||
| 206 | + wrong_dir.mkdir() | ||
| 207 | + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8") | ||
| 208 | + | ||
| 209 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | ||
| 210 | + | ||
| 211 | + backend = NLLBCTranslate2TranslationBackend( | ||
| 212 | + name="nllb-200-distilled-600m", | ||
| 213 | + model_id="facebook/nllb-200-distilled-600M", | ||
| 214 | + model_dir=str(wrong_dir), | ||
| 215 | + device="cpu", | ||
| 216 | + torch_dtype="float32", | ||
| 217 | + batch_size=1, | ||
| 218 | + max_input_length=16, | ||
| 219 | + max_new_tokens=16, | ||
| 220 | + num_beams=1, | ||
| 221 | + ) | ||
| 222 | + | ||
| 223 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | ||
| 224 | + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M" | ||
| 225 | + | ||
| 226 | + | ||
| 227 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch): | ||
| 228 | + incomplete_dir = tmp_path / "incomplete-nllb" | ||
| 229 | + incomplete_dir.mkdir() | ||
| 230 | + (incomplete_dir / "ctranslate2-float16").mkdir() | ||
| 231 | + | ||
| 232 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | ||
| 233 | + | ||
| 234 | + backend = NLLBCTranslate2TranslationBackend( | ||
| 235 | + name="nllb-200-distilled-600m", | ||
| 236 | + model_id="facebook/nllb-200-distilled-600M", | ||
| 237 | + model_dir=str(incomplete_dir), | ||
| 238 | + device="cpu", | ||
| 239 | + torch_dtype="float32", | ||
| 240 | + batch_size=1, | ||
| 241 | + max_input_length=16, | ||
| 242 | + max_new_tokens=16, | ||
| 243 | + num_beams=1, | ||
| 244 | + ) | ||
| 245 | + | ||
| 246 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | ||
| 247 | + | ||
| 248 | + | ||
| 204 | def test_nllb_resolves_flores_short_tags_and_iso_no(): | 249 | def test_nllb_resolves_flores_short_tags_and_iso_no(): |
| 205 | cat = build_nllb_language_catalog(None) | 250 | cat = build_nllb_language_catalog(None) |
| 206 | assert resolve_nllb_language_code("ca", cat) == "cat_Latn" | 251 | assert resolve_nllb_language_code("ca", cat) == "cat_Latn" |
tests/test_translator_failure_semantics.py
| @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): | @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): | ||
| 197 | ] | 197 | ] |
| 198 | 198 | ||
| 199 | 199 | ||
| 200 | +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch): | ||
| 201 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | ||
| 202 | + | ||
| 203 | + def _fake_create_backend(self, *, name, backend_type, cfg): | ||
| 204 | + del self, backend_type, cfg | ||
| 205 | + if name == "broken-nllb": | ||
| 206 | + raise RuntimeError("broken model dir") | ||
| 207 | + | ||
| 208 | + class _Backend: | ||
| 209 | + model = name | ||
| 210 | + | ||
| 211 | + @property | ||
| 212 | + def supports_batch(self): | ||
| 213 | + return True | ||
| 214 | + | ||
| 215 | + def translate(self, text, target_lang, source_lang=None, scene=None): | ||
| 216 | + del target_lang, source_lang, scene | ||
| 217 | + return text | ||
| 218 | + | ||
| 219 | + return _Backend() | ||
| 220 | + | ||
| 221 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | ||
| 222 | + service = TranslationService( | ||
| 223 | + { | ||
| 224 | + "service_url": "http://127.0.0.1:6006", | ||
| 225 | + "timeout_sec": 10.0, | ||
| 226 | + "default_model": "llm", | ||
| 227 | + "default_scene": "general", | ||
| 228 | + "capabilities": { | ||
| 229 | + "llm": { | ||
| 230 | + "enabled": True, | ||
| 231 | + "backend": "llm", | ||
| 232 | + "model": "dummy-llm", | ||
| 233 | + "base_url": "https://example.com", | ||
| 234 | + "timeout_sec": 10.0, | ||
| 235 | + "use_cache": True, | ||
| 236 | + }, | ||
| 237 | + "broken-nllb": { | ||
| 238 | + "enabled": True, | ||
| 239 | + "backend": "local_nllb", | ||
| 240 | + "model_id": "dummy", | ||
| 241 | + "model_dir": "dummy", | ||
| 242 | + "device": "cpu", | ||
| 243 | + "torch_dtype": "float32", | ||
| 244 | + "batch_size": 8, | ||
| 245 | + "max_input_length": 16, | ||
| 246 | + "max_new_tokens": 16, | ||
| 247 | + "num_beams": 1, | ||
| 248 | + "use_cache": True, | ||
| 249 | + }, | ||
| 250 | + }, | ||
| 251 | + "cache": { | ||
| 252 | + "ttl_seconds": 60, | ||
| 253 | + "sliding_expiration": True, | ||
| 254 | + }, | ||
| 255 | + } | ||
| 256 | + ) | ||
| 257 | + | ||
| 258 | + assert service.available_models == ["llm", "broken-nllb"] | ||
| 259 | + assert service.loaded_models == ["llm"] | ||
| 260 | + assert service.failed_models == ["broken-nllb"] | ||
| 261 | + assert service.backend_errors["broken-nllb"] == "broken model dir" | ||
| 262 | + | ||
| 263 | + with pytest.raises(RuntimeError, match="failed to initialize"): | ||
| 264 | + service.get_backend("broken-nllb") | ||
| 265 | + | ||
| 266 | + | ||
| 200 | def test_translation_cache_probe_models_order(): | 267 | def test_translation_cache_probe_models_order(): |
| 201 | cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} | 268 | cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} |
| 202 | assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] | 269 | assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] |
translation/backends/local_ctranslate2.py
| @@ -4,9 +4,7 @@ from __future__ import annotations | @@ -4,9 +4,7 @@ from __future__ import annotations | ||
| 4 | 4 | ||
| 5 | import logging | 5 | import logging |
| 6 | import os | 6 | import os |
| 7 | -import shutil | ||
| 8 | -import subprocess | ||
| 9 | -import sys | 7 | +import json |
| 10 | import threading | 8 | import threading |
| 11 | from pathlib import Path | 9 | from pathlib import Path |
| 12 | from typing import Dict, List, Optional, Sequence, Union | 10 | from typing import Dict, List, Optional, Sequence, Union |
| @@ -24,6 +22,7 @@ from translation.text_splitter import ( | @@ -24,6 +22,7 @@ from translation.text_splitter import ( | ||
| 24 | join_translated_segments, | 22 | join_translated_segments, |
| 25 | split_text_for_translation, | 23 | split_text_for_translation, |
| 26 | ) | 24 | ) |
| 25 | +from translation.ct2_conversion import convert_transformers_model | ||
| 27 | 26 | ||
| 28 | logger = logging.getLogger(__name__) | 27 | logger = logging.getLogger(__name__) |
| 29 | 28 | ||
| @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -> str: | @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -> str: | ||
| 76 | return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") | 75 | return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") |
| 77 | 76 | ||
| 78 | 77 | ||
| 79 | -def _resolve_converter_binary() -> str: | ||
| 80 | - candidate = shutil.which("ct2-transformers-converter") | ||
| 81 | - if candidate: | ||
| 82 | - return candidate | ||
| 83 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | ||
| 84 | - if venv_candidate.exists(): | ||
| 85 | - return str(venv_candidate) | ||
| 86 | - raise RuntimeError( | ||
| 87 | - "ct2-transformers-converter was not found. " | ||
| 88 | - "Ensure ctranslate2 is installed in the active translator environment." | ||
| 89 | - ) | 78 | +def _detect_local_model_type(model_dir: str) -> Optional[str]: |
| 79 | + config_path = Path(model_dir).expanduser() / "config.json" | ||
| 80 | + if not config_path.exists(): | ||
| 81 | + return None | ||
| 82 | + try: | ||
| 83 | + with open(config_path, "r", encoding="utf-8") as handle: | ||
| 84 | + payload = json.load(handle) or {} | ||
| 85 | + except Exception as exc: | ||
| 86 | + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc) | ||
| 87 | + return None | ||
| 88 | + model_type = str(payload.get("model_type") or "").strip().lower() | ||
| 89 | + return model_type or None | ||
| 90 | 90 | ||
| 91 | 91 | ||
| 92 | class LocalCTranslate2TranslationBackend: | 92 | class LocalCTranslate2TranslationBackend: |
| @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: | @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: | ||
| 144 | self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) | 144 | self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) |
| 145 | self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) | 145 | self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) |
| 146 | self._tokenizer_lock = threading.Lock() | 146 | self._tokenizer_lock = threading.Lock() |
| 147 | + self._local_model_source = self._resolve_local_model_source() | ||
| 147 | self._load_runtime() | 148 | self._load_runtime() |
| 148 | 149 | ||
| 149 | @property | 150 | @property |
| @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: | @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: | ||
| 151 | return True | 152 | return True |
| 152 | 153 | ||
| 153 | def _tokenizer_source(self) -> str: | 154 | def _tokenizer_source(self) -> str: |
| 154 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | 155 | + return self._local_model_source or self.model_id |
| 155 | 156 | ||
| 156 | def _model_source(self) -> str: | 157 | def _model_source(self) -> str: |
| 157 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | 158 | + return self._local_model_source or self.model_id |
| 159 | + | ||
| 160 | + def _expected_local_model_types(self) -> Optional[set[str]]: | ||
| 161 | + return None | ||
| 162 | + | ||
| 163 | + def _resolve_local_model_source(self) -> Optional[str]: | ||
| 164 | + model_path = Path(self.model_dir).expanduser() | ||
| 165 | + if not model_path.exists(): | ||
| 166 | + return None | ||
| 167 | + if not (model_path / "config.json").exists(): | ||
| 168 | + logger.warning( | ||
| 169 | + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id", | ||
| 170 | + self.model, | ||
| 171 | + model_path, | ||
| 172 | + ) | ||
| 173 | + return None | ||
| 174 | + | ||
| 175 | + expected_types = self._expected_local_model_types() | ||
| 176 | + if not expected_types: | ||
| 177 | + return str(model_path) | ||
| 178 | + | ||
| 179 | + detected_type = _detect_local_model_type(str(model_path)) | ||
| 180 | + if detected_type is None: | ||
| 181 | + return str(model_path) | ||
| 182 | + if detected_type in expected_types: | ||
| 183 | + return str(model_path) | ||
| 184 | + | ||
| 185 | + logger.warning( | ||
| 186 | + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id", | ||
| 187 | + self.model, | ||
| 188 | + model_path, | ||
| 189 | + detected_type, | ||
| 190 | + sorted(expected_types), | ||
| 191 | + ) | ||
| 192 | + return None | ||
| 158 | 193 | ||
| 159 | def _tokenizer_kwargs(self) -> Dict[str, object]: | 194 | def _tokenizer_kwargs(self) -> Dict[str, object]: |
| 160 | return {} | 195 | return {} |
| @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: | @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: | ||
| 204 | ) | 239 | ) |
| 205 | 240 | ||
| 206 | ct2_path.parent.mkdir(parents=True, exist_ok=True) | 241 | ct2_path.parent.mkdir(parents=True, exist_ok=True) |
| 207 | - converter = _resolve_converter_binary() | ||
| 208 | logger.info( | 242 | logger.info( |
| 209 | "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", | 243 | "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", |
| 210 | self.model, | 244 | self.model, |
| @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: | @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: | ||
| 213 | self.ct2_conversion_quantization, | 247 | self.ct2_conversion_quantization, |
| 214 | ) | 248 | ) |
| 215 | try: | 249 | try: |
| 216 | - subprocess.run( | ||
| 217 | - [ | ||
| 218 | - converter, | ||
| 219 | - "--model", | ||
| 220 | - model_source, | ||
| 221 | - "--output_dir", | ||
| 222 | - str(ct2_path), | ||
| 223 | - "--quantization", | ||
| 224 | - self.ct2_conversion_quantization, | ||
| 225 | - ], | ||
| 226 | - check=True, | ||
| 227 | - stdout=subprocess.PIPE, | ||
| 228 | - stderr=subprocess.PIPE, | ||
| 229 | - text=True, | 250 | + convert_transformers_model( |
| 251 | + model_source, | ||
| 252 | + str(ct2_path), | ||
| 253 | + self.ct2_conversion_quantization, | ||
| 230 | ) | 254 | ) |
| 231 | - except subprocess.CalledProcessError as exc: | ||
| 232 | - stderr = exc.stderr.strip() | 255 | + except Exception as exc: |
| 233 | raise RuntimeError( | 256 | raise RuntimeError( |
| 234 | - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}" | 257 | + f"Failed to convert model '{self.model}' to CTranslate2: {exc}" |
| 235 | ) from exc | 258 | ) from exc |
| 236 | 259 | ||
| 237 | def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: | 260 | def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: |
| @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | ||
| 557 | f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" | 580 | f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" |
| 558 | ) | 581 | ) |
| 559 | 582 | ||
| 583 | + def _expected_local_model_types(self) -> Optional[set[str]]: | ||
| 584 | + return {"marian"} | ||
| 585 | + | ||
| 560 | 586 | ||
| 561 | class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | 587 | class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 562 | """Local backend for NLLB models on CTranslate2.""" | 588 | """Local backend for NLLB models on CTranslate2.""" |
| @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | ||
| 619 | if resolve_nllb_language_code(target_lang, self.language_codes) is None: | 645 | if resolve_nllb_language_code(target_lang, self.language_codes) is None: |
| 620 | raise ValueError(f"Unsupported NLLB target language: {target_lang}") | 646 | raise ValueError(f"Unsupported NLLB target language: {target_lang}") |
| 621 | 647 | ||
| 648 | + def _expected_local_model_types(self) -> Optional[set[str]]: | ||
| 649 | + return {"m2m_100", "nllb_moe"} | ||
| 650 | + | ||
| 622 | def _get_tokenizer_for_source(self, source_lang: str): | 651 | def _get_tokenizer_for_source(self, source_lang: str): |
| 623 | src_code = resolve_nllb_language_code(source_lang, self.language_codes) | 652 | src_code = resolve_nllb_language_code(source_lang, self.language_codes) |
| 624 | if src_code is None: | 653 | if src_code is None: |
| @@ -0,0 +1,52 @@ | @@ -0,0 +1,52 @@ | ||
| 1 | +"""Helpers for converting Hugging Face translation models to CTranslate2.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +import copy | ||
| 6 | +import logging | ||
| 7 | + | ||
| 8 | +logger = logging.getLogger(__name__) | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +def convert_transformers_model( | ||
| 12 | + model_name_or_path: str, | ||
| 13 | + output_dir: str, | ||
| 14 | + quantization: str, | ||
| 15 | + *, | ||
| 16 | + force: bool = False, | ||
| 17 | +) -> str: | ||
| 18 | + from ctranslate2.converters import TransformersConverter | ||
| 19 | + from transformers import AutoConfig | ||
| 20 | + | ||
| 21 | + class _CompatibleTransformersConverter(TransformersConverter): | ||
| 22 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | ||
| 23 | + try: | ||
| 24 | + return super().load_model(model_class, resolved_model_name_or_path, **kwargs) | ||
| 25 | + except TypeError as exc: | ||
| 26 | + if "unexpected keyword argument 'dtype'" not in str(exc): | ||
| 27 | + raise | ||
| 28 | + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None: | ||
| 29 | + raise | ||
| 30 | + | ||
| 31 | + logger.warning( | ||
| 32 | + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s", | ||
| 33 | + resolved_model_name_or_path, | ||
| 34 | + getattr(model_class, "__name__", model_class), | ||
| 35 | + ) | ||
| 36 | + retry_kwargs = dict(kwargs) | ||
| 37 | + retry_kwargs.pop("dtype", None) | ||
| 38 | + retry_kwargs.pop("torch_dtype", None) | ||
| 39 | + config = retry_kwargs.get("config") | ||
| 40 | + if config is None: | ||
| 41 | + config = AutoConfig.from_pretrained(resolved_model_name_or_path) | ||
| 42 | + else: | ||
| 43 | + config = copy.deepcopy(config) | ||
| 44 | + if hasattr(config, "dtype"): | ||
| 45 | + config.dtype = None | ||
| 46 | + if hasattr(config, "torch_dtype"): | ||
| 47 | + config.torch_dtype = None | ||
| 48 | + retry_kwargs["config"] = config | ||
| 49 | + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs) | ||
| 50 | + | ||
| 51 | + converter = _CompatibleTransformersConverter(model_name_or_path) | ||
| 52 | + return converter.convert(output_dir=output_dir, quantization=quantization, force=force) |
translation/service.py
| @@ -31,7 +31,12 @@ class TranslationService: | @@ -31,7 +31,12 @@ class TranslationService: | ||
| 31 | if not self._enabled_capabilities: | 31 | if not self._enabled_capabilities: |
| 32 | raise ValueError("No enabled translation backends found in services.translation.capabilities") | 32 | raise ValueError("No enabled translation backends found in services.translation.capabilities") |
| 33 | self._translation_cache = TranslationCache(self.config["cache"]) | 33 | self._translation_cache = TranslationCache(self.config["cache"]) |
| 34 | - self._backends = self._initialize_backends() | 34 | + self._backends: Dict[str, TranslationBackendProtocol] = {} |
| 35 | + self._backend_errors: Dict[str, str] = {} | ||
| 36 | + self._initialize_backends() | ||
| 37 | + if not self._backends: | ||
| 38 | + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error" | ||
| 39 | + raise RuntimeError(f"No translation backends could be initialized: {details}") | ||
| 35 | 40 | ||
| 36 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: | 41 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: |
| 37 | enabled: Dict[str, Dict[str, object]] = {} | 42 | enabled: Dict[str, Dict[str, object]] = {} |
| @@ -62,24 +67,47 @@ class TranslationService: | @@ -62,24 +67,47 @@ class TranslationService: | ||
| 62 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") | 67 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") |
| 63 | return factory(name=name, cfg=cfg) | 68 | return factory(name=name, cfg=cfg) |
| 64 | 69 | ||
| 65 | - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]: | ||
| 66 | - backends: Dict[str, TranslationBackendProtocol] = {} | ||
| 67 | - for name, capability_cfg in self._enabled_capabilities.items(): | ||
| 68 | - backend_type = str(capability_cfg["backend"]) | ||
| 69 | - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | ||
| 70 | - backends[name] = self._create_backend( | 70 | + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]: |
| 71 | + capability_cfg = self._enabled_capabilities.get(name) | ||
| 72 | + if capability_cfg is None: | ||
| 73 | + return None | ||
| 74 | + if name in self._backends: | ||
| 75 | + return self._backends[name] | ||
| 76 | + | ||
| 77 | + backend_type = str(capability_cfg["backend"]) | ||
| 78 | + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | ||
| 79 | + try: | ||
| 80 | + backend = self._create_backend( | ||
| 71 | name=name, | 81 | name=name, |
| 72 | backend_type=backend_type, | 82 | backend_type=backend_type, |
| 73 | cfg=capability_cfg, | 83 | cfg=capability_cfg, |
| 74 | ) | 84 | ) |
| 75 | - logger.info( | ||
| 76 | - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | 85 | + except Exception as exc: |
| 86 | + error_text = str(exc).strip() or exc.__class__.__name__ | ||
| 87 | + self._backend_errors[name] = error_text | ||
| 88 | + logger.error( | ||
| 89 | + "Translation backend initialization failed | model=%s backend=%s error=%s", | ||
| 77 | name, | 90 | name, |
| 78 | backend_type, | 91 | backend_type, |
| 79 | - bool(capability_cfg.get("use_cache")), | ||
| 80 | - getattr(backends[name], "model", name), | 92 | + error_text, |
| 93 | + exc_info=True, | ||
| 81 | ) | 94 | ) |
| 82 | - return backends | 95 | + return None |
| 96 | + | ||
| 97 | + self._backends[name] = backend | ||
| 98 | + self._backend_errors.pop(name, None) | ||
| 99 | + logger.info( | ||
| 100 | + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | ||
| 101 | + name, | ||
| 102 | + backend_type, | ||
| 103 | + bool(capability_cfg.get("use_cache")), | ||
| 104 | + getattr(backend, "model", name), | ||
| 105 | + ) | ||
| 106 | + return backend | ||
| 107 | + | ||
| 108 | + def _initialize_backends(self) -> None: | ||
| 109 | + for name, capability_cfg in self._enabled_capabilities.items(): | ||
| 110 | + self._load_backend(name) | ||
| 83 | 111 | ||
| 84 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: | 112 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: |
| 85 | from translation.backends.qwen_mt import QwenMTTranslationBackend | 113 | from translation.backends.qwen_mt import QwenMTTranslationBackend |
| @@ -178,13 +206,27 @@ class TranslationService: | @@ -178,13 +206,27 @@ class TranslationService: | ||
| 178 | def loaded_models(self) -> List[str]: | 206 | def loaded_models(self) -> List[str]: |
| 179 | return list(self._backends.keys()) | 207 | return list(self._backends.keys()) |
| 180 | 208 | ||
| 209 | + @property | ||
| 210 | + def failed_models(self) -> List[str]: | ||
| 211 | + return list(self._backend_errors.keys()) | ||
| 212 | + | ||
| 213 | + @property | ||
| 214 | + def backend_errors(self) -> Dict[str, str]: | ||
| 215 | + return dict(self._backend_errors) | ||
| 216 | + | ||
| 181 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: | 217 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: |
| 182 | normalized = normalize_translation_model(self.config, model) | 218 | normalized = normalize_translation_model(self.config, model) |
| 183 | - backend = self._backends.get(normalized) | 219 | + backend = self._backends.get(normalized) or self._load_backend(normalized) |
| 184 | if backend is None: | 220 | if backend is None: |
| 185 | - raise ValueError( | ||
| 186 | - f"Translation model '{normalized}' is not enabled. " | ||
| 187 | - f"Available models: {', '.join(self.available_models) or 'none'}" | 221 | + if normalized not in self._enabled_capabilities: |
| 222 | + raise ValueError( | ||
| 223 | + f"Translation model '{normalized}' is not enabled. " | ||
| 224 | + f"Available models: {', '.join(self.available_models) or 'none'}" | ||
| 225 | + ) | ||
| 226 | + error_text = self._backend_errors.get(normalized) or "unknown initialization error" | ||
| 227 | + raise RuntimeError( | ||
| 228 | + f"Translation model '{normalized}' failed to initialize: {error_text}. " | ||
| 229 | + f"Loaded models: {', '.join(self.loaded_models) or 'none'}" | ||
| 188 | ) | 230 | ) |
| 189 | return backend | 231 | return backend |
| 190 | 232 |