Commit 89fa3f3ccb7d7815460c21ea52ecca110435d61e
1 parent
778c299a
Sync master portability fixes from f07947a
Showing
14 changed files
with
832 additions
and
183 deletions
Show diff stats
api/translator_app.py
| ... | ... | @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): |
| 271 | 271 | """Initialize all enabled translation backends on process startup.""" |
| 272 | 272 | logger.info("Starting Translation Service API") |
| 273 | 273 | service = get_translation_service() |
| 274 | + failed_models = list(getattr(service, "failed_models", [])) | |
| 275 | + backend_errors = dict(getattr(service, "backend_errors", {})) | |
| 274 | 276 | logger.info( |
| 275 | - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", | |
| 277 | + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s", | |
| 276 | 278 | service.config["default_model"], |
| 277 | 279 | service.config["default_scene"], |
| 278 | 280 | service.available_models, |
| 279 | 281 | service.loaded_models, |
| 282 | + failed_models, | |
| 280 | 283 | ) |
| 281 | 284 | logger.info( |
| 282 | - "Translation backends initialized on startup | models=%s", | |
| 285 | + "Translation backends initialized on startup | loaded=%s failed=%s", | |
| 283 | 286 | service.loaded_models, |
| 287 | + backend_errors, | |
| 284 | 288 | ) |
| 285 | 289 | verbose_logger.info( |
| 286 | 290 | "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", |
| ... | ... | @@ -316,11 +320,14 @@ async def health_check(): |
| 316 | 320 | """Health check endpoint.""" |
| 317 | 321 | try: |
| 318 | 322 | service = get_translation_service() |
| 323 | + failed_models = list(getattr(service, "failed_models", [])) | |
| 324 | + backend_errors = dict(getattr(service, "backend_errors", {})) | |
| 319 | 325 | logger.info( |
| 320 | - "Health check | default_model=%s default_scene=%s loaded_models=%s", | |
| 326 | + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s", | |
| 321 | 327 | service.config["default_model"], |
| 322 | 328 | service.config["default_scene"], |
| 323 | 329 | service.loaded_models, |
| 330 | + failed_models, | |
| 324 | 331 | ) |
| 325 | 332 | return { |
| 326 | 333 | "status": "healthy", |
| ... | ... | @@ -330,6 +337,8 @@ async def health_check(): |
| 330 | 337 | "available_models": service.available_models, |
| 331 | 338 | "enabled_capabilities": get_enabled_translation_models(service.config), |
| 332 | 339 | "loaded_models": service.loaded_models, |
| 340 | + "failed_models": failed_models, | |
| 341 | + "backend_errors": backend_errors, | |
| 333 | 342 | } |
| 334 | 343 | except Exception as e: |
| 335 | 344 | logger.error(f"Health check failed: {e}") |
| ... | ... | @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): |
| 463 | 472 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 464 | 473 | logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) |
| 465 | 474 | raise HTTPException(status_code=400, detail=str(e)) from e |
| 475 | + except RuntimeError as e: | |
| 476 | + latency_ms = (time.perf_counter() - request_started) * 1000 | |
| 477 | + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms) | |
| 478 | + raise HTTPException(status_code=503, detail=str(e)) from e | |
| 466 | 479 | except Exception as e: |
| 467 | 480 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 468 | 481 | logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) | ... | ... |
config/loader.py
| ... | ... | @@ -655,6 +655,14 @@ class AppConfigLoader: |
| 655 | 655 | |
| 656 | 656 | translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} |
| 657 | 657 | normalized_translation = build_translation_config(translation_raw) |
| 658 | + local_translation_backends = {"local_nllb", "local_marian"} | |
| 659 | + for capability_name, capability_cfg in normalized_translation["capabilities"].items(): | |
| 660 | + backend_name = str(capability_cfg.get("backend") or "").strip().lower() | |
| 661 | + if backend_name not in local_translation_backends: | |
| 662 | + continue | |
| 663 | + for path_key in ("model_dir", "ct2_model_dir"): | |
| 664 | + if capability_cfg.get(path_key) not in (None, ""): | |
| 665 | + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve()) | |
| 658 | 666 | translation_config = TranslationServiceConfig( |
| 659 | 667 | endpoint=str(normalized_translation["service_url"]).rstrip("/"), |
| 660 | 668 | timeout_sec=float(normalized_translation["timeout_sec"]), |
| ... | ... | @@ -749,7 +757,7 @@ class AppConfigLoader: |
| 749 | 757 | port=port, |
| 750 | 758 | backend=backend_name, |
| 751 | 759 | runtime_dir=( |
| 752 | - str(v) | |
| 760 | + str(self._resolve_project_path_value(v).resolve()) | |
| 753 | 761 | if (v := instance_raw.get("runtime_dir")) not in (None, "") |
| 754 | 762 | else None |
| 755 | 763 | ), |
| ... | ... | @@ -787,6 +795,12 @@ class AppConfigLoader: |
| 787 | 795 | rerank=rerank_config, |
| 788 | 796 | ) |
| 789 | 797 | |
| 798 | + def _resolve_project_path_value(self, value: Any) -> Path: | |
| 799 | + candidate = Path(str(value)).expanduser() | |
| 800 | + if candidate.is_absolute(): | |
| 801 | + return candidate | |
| 802 | + return self.project_root / candidate | |
| 803 | + | |
| 790 | 804 | def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: |
| 791 | 805 | if not isinstance(raw, dict): |
| 792 | 806 | raise ConfigurationError("tenant_config must be a mapping") |
| ... | ... | @@ -822,13 +836,6 @@ class AppConfigLoader: |
| 822 | 836 | |
| 823 | 837 | def _build_infrastructure_config(self, environment: str) -> InfrastructureConfig: |
| 824 | 838 | del environment |
| 825 | - _redis_db_raw = os.getenv("REDIS_DB") or os.getenv("REDIS_SNAPSHOT_DB") | |
| 826 | - _redis_db = 0 | |
| 827 | - if _redis_db_raw is not None and str(_redis_db_raw).strip() != "": | |
| 828 | - try: | |
| 829 | - _redis_db = int(str(_redis_db_raw).strip()) | |
| 830 | - except ValueError: | |
| 831 | - _redis_db = 0 | |
| 832 | 839 | return InfrastructureConfig( |
| 833 | 840 | elasticsearch=ElasticsearchSettings( |
| 834 | 841 | host=os.getenv("ES_HOST", "http://localhost:9200"), |
| ... | ... | @@ -838,7 +845,7 @@ class AppConfigLoader: |
| 838 | 845 | redis=RedisSettings( |
| 839 | 846 | host=os.getenv("REDIS_HOST", "localhost"), |
| 840 | 847 | port=int(os.getenv("REDIS_PORT", 6479)), |
| 841 | - snapshot_db=_redis_db, | |
| 848 | + snapshot_db=int(os.getenv("REDIS_SNAPSHOT_DB", 0)), | |
| 842 | 849 | password=os.getenv("REDIS_PASSWORD"), |
| 843 | 850 | socket_timeout=int(os.getenv("REDIS_SOCKET_TIMEOUT", 1)), |
| 844 | 851 | socket_connect_timeout=int(os.getenv("REDIS_SOCKET_CONNECT_TIMEOUT", 1)), | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -316,7 +316,10 @@ async function performSearch(page = 1) { |
| 316 | 316 | document.getElementById('productGrid').innerHTML = ''; |
| 317 | 317 | |
| 318 | 318 | try { |
| 319 | - const response = await fetch(`${API_BASE_URL}/search/`, { | |
| 319 | + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin); | |
| 320 | + searchUrl.searchParams.set('tenant_id', tenantId); | |
| 321 | + | |
| 322 | + const response = await fetch(searchUrl.toString(), { | |
| 320 | 323 | method: 'POST', |
| 321 | 324 | headers: { |
| 322 | 325 | 'Content-Type': 'application/json', | ... | ... |
requirements_translator_service.txt
| ... | ... | @@ -13,7 +13,8 @@ httpx>=0.24.0 |
| 13 | 13 | tqdm>=4.65.0 |
| 14 | 14 | |
| 15 | 15 | torch>=2.0.0 |
| 16 | -transformers>=4.30.0 | |
| 16 | +# Keep translator conversions on the last verified NLLB-compatible release line. | |
| 17 | +transformers>=4.51.0,<4.52.0 | |
| 17 | 18 | ctranslate2>=4.7.0 |
| 18 | 19 | sentencepiece>=0.2.0 |
| 19 | 20 | sacremoses>=0.1.1 | ... | ... |
| 1 | 1 | #!/usr/bin/env python3 |
| 2 | -"""Download local translation models declared in services.translation.capabilities.""" | |
| 2 | +"""Backward-compatible entrypoint for translation model downloads.""" | |
| 3 | 3 | |
| 4 | 4 | from __future__ import annotations |
| 5 | 5 | |
| 6 | -import argparse | |
| 7 | -import os | |
| 6 | +import runpy | |
| 8 | 7 | from pathlib import Path |
| 9 | -import shutil | |
| 10 | -import subprocess | |
| 11 | -import sys | |
| 12 | -from typing import Iterable | |
| 13 | - | |
| 14 | -from huggingface_hub import snapshot_download | |
| 15 | - | |
| 16 | -PROJECT_ROOT = Path(__file__).resolve().parent.parent | |
| 17 | -if str(PROJECT_ROOT) not in sys.path: | |
| 18 | - sys.path.insert(0, str(PROJECT_ROOT)) | |
| 19 | -os.environ.setdefault("HF_HUB_DISABLE_XET", "1") | |
| 20 | - | |
| 21 | -from config.services_config import get_translation_config | |
| 22 | - | |
| 23 | - | |
| 24 | -LOCAL_BACKENDS = {"local_nllb", "local_marian"} | |
| 25 | - | |
| 26 | - | |
| 27 | -def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: | |
| 28 | - cfg = get_translation_config() | |
| 29 | - capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {} | |
| 30 | - for name, capability in capabilities.items(): | |
| 31 | - backend = str(capability.get("backend") or "").strip().lower() | |
| 32 | - if backend not in LOCAL_BACKENDS: | |
| 33 | - continue | |
| 34 | - if selected and name not in selected: | |
| 35 | - continue | |
| 36 | - yield name, capability | |
| 37 | - | |
| 38 | - | |
| 39 | -def _compute_ct2_output_dir(capability: dict) -> Path: | |
| 40 | - custom = str(capability.get("ct2_model_dir") or "").strip() | |
| 41 | - if custom: | |
| 42 | - return Path(custom).expanduser() | |
| 43 | - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | |
| 44 | - compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower() | |
| 45 | - normalized = compute_type.replace("_", "-") | |
| 46 | - return model_dir / f"ctranslate2-{normalized}" | |
| 47 | - | |
| 48 | - | |
| 49 | -def _resolve_converter_binary() -> str: | |
| 50 | - candidate = shutil.which("ct2-transformers-converter") | |
| 51 | - if candidate: | |
| 52 | - return candidate | |
| 53 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | |
| 54 | - if venv_candidate.exists(): | |
| 55 | - return str(venv_candidate) | |
| 56 | - raise RuntimeError( | |
| 57 | - "ct2-transformers-converter was not found. " | |
| 58 | - "Install ctranslate2 in the active Python environment first." | |
| 59 | - ) | |
| 60 | - | |
| 61 | - | |
| 62 | -def convert_to_ctranslate2(name: str, capability: dict) -> None: | |
| 63 | - model_id = str(capability.get("model_id") or "").strip() | |
| 64 | - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | |
| 65 | - model_source = str(model_dir if model_dir.exists() else model_id) | |
| 66 | - output_dir = _compute_ct2_output_dir(capability) | |
| 67 | - if (output_dir / "model.bin").exists(): | |
| 68 | - print(f"[skip-convert] {name} -> {output_dir}") | |
| 69 | - return | |
| 70 | - quantization = str( | |
| 71 | - capability.get("ct2_conversion_quantization") | |
| 72 | - or capability.get("ct2_compute_type") | |
| 73 | - or capability.get("torch_dtype") | |
| 74 | - or "default" | |
| 75 | - ).strip() | |
| 76 | - output_dir.parent.mkdir(parents=True, exist_ok=True) | |
| 77 | - print(f"[convert] {name} -> {output_dir} ({quantization})") | |
| 78 | - subprocess.run( | |
| 79 | - [ | |
| 80 | - _resolve_converter_binary(), | |
| 81 | - "--model", | |
| 82 | - model_source, | |
| 83 | - "--output_dir", | |
| 84 | - str(output_dir), | |
| 85 | - "--quantization", | |
| 86 | - quantization, | |
| 87 | - ], | |
| 88 | - check=True, | |
| 89 | - ) | |
| 90 | - print(f"[converted] {name}") | |
| 91 | - | |
| 92 | - | |
| 93 | -def main() -> None: | |
| 94 | - parser = argparse.ArgumentParser(description="Download local translation models") | |
| 95 | - parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") | |
| 96 | - parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") | |
| 97 | - parser.add_argument( | |
| 98 | - "--convert-ctranslate2", | |
| 99 | - action="store_true", | |
| 100 | - help="Also convert the downloaded Hugging Face models into CTranslate2 format", | |
| 101 | - ) | |
| 102 | - args = parser.parse_args() | |
| 103 | - | |
| 104 | - selected = {item.strip().lower() for item in args.models if item.strip()} or None | |
| 105 | - if not args.all_local and not selected: | |
| 106 | - parser.error("pass --all-local or --models <name> ...") | |
| 107 | - | |
| 108 | - for name, capability in iter_local_capabilities(selected): | |
| 109 | - model_id = str(capability.get("model_id") or "").strip() | |
| 110 | - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | |
| 111 | - if not model_id or not model_dir: | |
| 112 | - raise ValueError(f"Capability '{name}' must define model_id and model_dir") | |
| 113 | - model_dir.parent.mkdir(parents=True, exist_ok=True) | |
| 114 | - print(f"[download] {name} -> {model_dir} ({model_id})") | |
| 115 | - snapshot_download( | |
| 116 | - repo_id=model_id, | |
| 117 | - local_dir=str(model_dir), | |
| 118 | - ) | |
| 119 | - print(f"[done] {name}") | |
| 120 | - if args.convert_ctranslate2: | |
| 121 | - convert_to_ctranslate2(name, capability) | |
| 122 | 8 | |
| 123 | 9 | |
| 124 | 10 | if __name__ == "__main__": |
| 125 | - main() | |
| 11 | + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py" | |
| 12 | + runpy.run_path(str(target), run_name="__main__") | ... | ... |
| ... | ... | @@ -0,0 +1,278 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +Simple HTTP server for saas-search frontend. | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import http.server | |
| 7 | +import socketserver | |
| 8 | +import os | |
| 9 | +import sys | |
| 10 | +import logging | |
| 11 | +import time | |
| 12 | +import urllib.request | |
| 13 | +import urllib.error | |
| 14 | +from collections import defaultdict, deque | |
| 15 | +from pathlib import Path | |
| 16 | +from dotenv import load_dotenv | |
| 17 | + | |
| 18 | +# Load .env file | |
| 19 | +project_root = Path(__file__).resolve().parents[2] | |
| 20 | +load_dotenv(project_root / '.env') | |
| 21 | + | |
| 22 | +# Get API_BASE_URL from environment๏ผ้ป่ฎคไธๆณจๅ ฅ๏ผ้ฟๅ ่ขซๆง .env ่ฆ็ๅๆบ็ญ็ฅ๏ผ | |
| 23 | +# ไป ๅฝๆพๅผ่ฎพ็ฝฎ FRONTEND_INJECT_API_BASE_URL=1 ๆถๆๆณจๅ ฅ window.API_BASE_URLใ | |
| 24 | +API_BASE_URL = os.getenv('API_BASE_URL') or None | |
| 25 | +INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1' | |
| 26 | +# Backend proxy target for same-origin API forwarding | |
| 27 | +BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/') | |
| 28 | + | |
| 29 | +# Change to frontend directory | |
| 30 | +frontend_dir = os.path.join(project_root, 'frontend') | |
| 31 | +os.chdir(frontend_dir) | |
| 32 | + | |
| 33 | +# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback. | |
| 34 | +PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003))) | |
| 35 | + | |
| 36 | +# Configure logging to suppress scanner noise | |
| 37 | +logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') | |
| 38 | + | |
| 39 | +class RateLimitingMixin: | |
| 40 | + """Mixin for rate limiting requests by IP address.""" | |
| 41 | + request_counts = defaultdict(deque) | |
| 42 | + rate_limit = 100 # requests per minute | |
| 43 | + window = 60 # seconds | |
| 44 | + | |
| 45 | + @classmethod | |
| 46 | + def is_rate_limited(cls, ip): | |
| 47 | + now = time.time() | |
| 48 | + | |
| 49 | + # Clean old requests | |
| 50 | + while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window: | |
| 51 | + cls.request_counts[ip].popleft() | |
| 52 | + | |
| 53 | + # Check rate limit | |
| 54 | + if len(cls.request_counts[ip]) > cls.rate_limit: | |
| 55 | + return True | |
| 56 | + | |
| 57 | + cls.request_counts[ip].append(now) | |
| 58 | + return False | |
| 59 | + | |
| 60 | +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): | |
| 61 | + """Custom request handler with CORS support and robust error handling.""" | |
| 62 | + | |
| 63 | + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer" | |
| 64 | + | |
| 65 | + def _is_proxy_path(self, path: str) -> bool: | |
| 66 | + """Return True for API paths that should be forwarded to backend service.""" | |
| 67 | + return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') | |
| 68 | + | |
| 69 | + def _proxy_to_backend(self): | |
| 70 | + """Proxy current request to backend service on the GPU server.""" | |
| 71 | + target_url = f"{BACKEND_PROXY_URL}{self.path}" | |
| 72 | + method = self.command.upper() | |
| 73 | + | |
| 74 | + try: | |
| 75 | + content_length = int(self.headers.get('Content-Length', '0')) | |
| 76 | + except ValueError: | |
| 77 | + content_length = 0 | |
| 78 | + body = self.rfile.read(content_length) if content_length > 0 else None | |
| 79 | + | |
| 80 | + forward_headers = {} | |
| 81 | + for key, value in self.headers.items(): | |
| 82 | + lk = key.lower() | |
| 83 | + if lk in ('host', 'content-length', 'connection'): | |
| 84 | + continue | |
| 85 | + forward_headers[key] = value | |
| 86 | + | |
| 87 | + req = urllib.request.Request( | |
| 88 | + target_url, | |
| 89 | + data=body, | |
| 90 | + headers=forward_headers, | |
| 91 | + method=method, | |
| 92 | + ) | |
| 93 | + | |
| 94 | + try: | |
| 95 | + with urllib.request.urlopen(req, timeout=30) as resp: | |
| 96 | + resp_body = resp.read() | |
| 97 | + self.send_response(resp.getcode()) | |
| 98 | + for header, value in resp.getheaders(): | |
| 99 | + lh = header.lower() | |
| 100 | + if lh in ('transfer-encoding', 'connection', 'content-length'): | |
| 101 | + continue | |
| 102 | + self.send_header(header, value) | |
| 103 | + self.end_headers() | |
| 104 | + self.wfile.write(resp_body) | |
| 105 | + except urllib.error.HTTPError as e: | |
| 106 | + err_body = e.read() if hasattr(e, 'read') else b'' | |
| 107 | + self.send_response(e.code) | |
| 108 | + if e.headers: | |
| 109 | + for header, value in e.headers.items(): | |
| 110 | + lh = header.lower() | |
| 111 | + if lh in ('transfer-encoding', 'connection', 'content-length'): | |
| 112 | + continue | |
| 113 | + self.send_header(header, value) | |
| 114 | + self.end_headers() | |
| 115 | + if err_body: | |
| 116 | + self.wfile.write(err_body) | |
| 117 | + except Exception as e: | |
| 118 | + logging.error(f"Backend proxy error for {method} {self.path}: {e}") | |
| 119 | + self.send_response(502) | |
| 120 | + self.send_header('Content-Type', 'application/json; charset=utf-8') | |
| 121 | + self.end_headers() | |
| 122 | + self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}') | |
| 123 | + | |
| 124 | + def do_GET(self): | |
| 125 | + """Handle GET requests with API config injection.""" | |
| 126 | + path = self.path.split('?')[0] | |
| 127 | + | |
| 128 | + # Proxy API paths to backend first | |
| 129 | + if self._is_proxy_path(path): | |
| 130 | + self._proxy_to_backend() | |
| 131 | + return | |
| 132 | + | |
| 133 | + # Route / to index.html | |
| 134 | + if path == '/' or path == '': | |
| 135 | + self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '') | |
| 136 | + | |
| 137 | + # Inject API config for HTML files | |
| 138 | + if self.path.endswith('.html'): | |
| 139 | + self._serve_html_with_config() | |
| 140 | + else: | |
| 141 | + super().do_GET() | |
| 142 | + | |
| 143 | + def _serve_html_with_config(self): | |
| 144 | + """Serve HTML with optional API_BASE_URL injected.""" | |
| 145 | + try: | |
| 146 | + file_path = self.path.lstrip('/') | |
| 147 | + if not os.path.exists(file_path): | |
| 148 | + self.send_error(404) | |
| 149 | + return | |
| 150 | + | |
| 151 | + with open(file_path, 'r', encoding='utf-8') as f: | |
| 152 | + html = f.read() | |
| 153 | + | |
| 154 | + # ้ป่ฎคไธๆณจๅ ฅ API_BASE_URL๏ผ้ฟๅ ๅๅฒ .env๏ผๅฆ http://xx:6002๏ผ่ฆ็ๅๆบ่ฐ็จใ | |
| 155 | + # ไป ๅฝ FRONTEND_INJECT_API_BASE_URL=1 ไธ API_BASE_URL ๆๅผๆถๆๆณจๅ ฅใ | |
| 156 | + if INJECT_API_BASE_URL and API_BASE_URL: | |
| 157 | + config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n ' | |
| 158 | + html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1) | |
| 159 | + | |
| 160 | + self.send_response(200) | |
| 161 | + self.send_header('Content-Type', 'text/html; charset=utf-8') | |
| 162 | + self.end_headers() | |
| 163 | + self.wfile.write(html.encode('utf-8')) | |
| 164 | + except Exception as e: | |
| 165 | + logging.error(f"Error serving HTML: {e}") | |
| 166 | + self.send_error(500) | |
| 167 | + | |
| 168 | + def do_POST(self): | |
| 169 | + """Handle POST requests. Proxy API requests to backend.""" | |
| 170 | + path = self.path.split('?')[0] | |
| 171 | + if self._is_proxy_path(path): | |
| 172 | + self._proxy_to_backend() | |
| 173 | + return | |
| 174 | + self.send_error(405, "Method Not Allowed") | |
| 175 | + | |
| 176 | + def setup(self): | |
| 177 | + """Setup with error handling.""" | |
| 178 | + try: | |
| 179 | + super().setup() | |
| 180 | + except Exception: | |
| 181 | + pass # Silently handle setup errors from scanners | |
| 182 | + | |
| 183 | + def handle_one_request(self): | |
| 184 | + """Handle single request with error catching.""" | |
| 185 | + try: | |
| 186 | + # Check rate limiting | |
| 187 | + client_ip = self.client_address[0] | |
| 188 | + if self.is_rate_limited(client_ip): | |
| 189 | + logging.warning(f"Rate limiting IP: {client_ip}") | |
| 190 | + self.send_error(429, "Too Many Requests") | |
| 191 | + return | |
| 192 | + | |
| 193 | + super().handle_one_request() | |
| 194 | + except (ConnectionResetError, BrokenPipeError): | |
| 195 | + # Client disconnected prematurely - common with scanners | |
| 196 | + pass | |
| 197 | + except UnicodeDecodeError: | |
| 198 | + # Binary data received - not HTTP | |
| 199 | + pass | |
| 200 | + except Exception as e: | |
| 201 | + # Log unexpected errors but don't crash | |
| 202 | + logging.debug(f"Request handling error: {e}") | |
| 203 | + | |
| 204 | + def log_message(self, format, *args): | |
| 205 | + """Suppress logging for malformed requests from scanners.""" | |
| 206 | + message = format % args | |
| 207 | + # Filter out scanner noise | |
| 208 | + noise_patterns = [ | |
| 209 | + "code 400", | |
| 210 | + "Bad request", | |
| 211 | + "Bad request version", | |
| 212 | + "Bad HTTP/0.9 request type", | |
| 213 | + "Bad request syntax" | |
| 214 | + ] | |
| 215 | + if any(pattern in message for pattern in noise_patterns): | |
| 216 | + return | |
| 217 | + # Only log legitimate requests | |
| 218 | + if message and not message.startswith(" ") and len(message) > 10: | |
| 219 | + super().log_message(format, *args) | |
| 220 | + | |
| 221 | + def end_headers(self): | |
| 222 | + # Add CORS headers | |
| 223 | + self.send_header('Access-Control-Allow-Origin', '*') | |
| 224 | + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | |
| 225 | + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS) | |
| 226 | + # Add security headers | |
| 227 | + self.send_header('X-Content-Type-Options', 'nosniff') | |
| 228 | + self.send_header('X-Frame-Options', 'DENY') | |
| 229 | + self.send_header('X-XSS-Protection', '1; mode=block') | |
| 230 | + super().end_headers() | |
| 231 | + | |
| 232 | + def do_OPTIONS(self): | |
| 233 | + """Handle OPTIONS requests.""" | |
| 234 | + try: | |
| 235 | + path = self.path.split('?')[0] | |
| 236 | + if self._is_proxy_path(path): | |
| 237 | + self.send_response(204) | |
| 238 | + self.end_headers() | |
| 239 | + return | |
| 240 | + self.send_response(200) | |
| 241 | + self.end_headers() | |
| 242 | + except Exception: | |
| 243 | + pass | |
| 244 | + | |
| 245 | +class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer): | |
| 246 | + """Threaded TCP server with better error handling.""" | |
| 247 | + allow_reuse_address = True | |
| 248 | + daemon_threads = True | |
| 249 | + | |
| 250 | +if __name__ == '__main__': | |
| 251 | + # Check if port is already in use | |
| 252 | + import socket | |
| 253 | + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
| 254 | + try: | |
| 255 | + sock.bind(("", PORT)) | |
| 256 | + sock.close() | |
| 257 | + except OSError: | |
| 258 | + print(f"ERROR: Port {PORT} is already in use.") | |
| 259 | + print(f"Please stop the existing server or use a different port.") | |
| 260 | + print(f"To stop existing server: kill $(lsof -t -i:{PORT})") | |
| 261 | + sys.exit(1) | |
| 262 | + | |
| 263 | + # Create threaded server for better concurrency | |
| 264 | + with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd: | |
| 265 | + print(f"Frontend server started at http://localhost:{PORT}") | |
| 266 | + print(f"Serving files from: {os.getcwd()}") | |
| 267 | + print("\nPress Ctrl+C to stop the server") | |
| 268 | + | |
| 269 | + try: | |
| 270 | + httpd.serve_forever() | |
| 271 | + except KeyboardInterrupt: | |
| 272 | + print("\nShutting down server...") | |
| 273 | + httpd.shutdown() | |
| 274 | + print("Server stopped") | |
| 275 | + sys.exit(0) | |
| 276 | + except Exception as e: | |
| 277 | + print(f"Server error: {e}") | |
| 278 | + sys.exit(1) | ... | ... |
scripts/setup_translator_venv.sh
| ... | ... | @@ -8,8 +8,47 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 8 | 8 | cd "${PROJECT_ROOT}" |
| 9 | 9 | |
| 10 | 10 | VENV_DIR="${PROJECT_ROOT}/.venv-translator" |
| 11 | -PYTHON_BIN="${PYTHON_BIN:-python3}" | |
| 12 | 11 | TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" |
| 12 | +MIN_PYTHON_MAJOR=3 | |
| 13 | +MIN_PYTHON_MINOR=10 | |
| 14 | + | |
| 15 | +python_meets_minimum() { | |
| 16 | + local bin="$1" | |
| 17 | + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}" | |
| 18 | +import sys | |
| 19 | + | |
| 20 | +required = tuple(int(value) for value in sys.argv[1:]) | |
| 21 | +sys.exit(0 if sys.version_info[:2] >= required else 1) | |
| 22 | +PY | |
| 23 | +} | |
| 24 | + | |
| 25 | +discover_python_bin() { | |
| 26 | + local candidates=() | |
| 27 | + | |
| 28 | + if [[ -n "${PYTHON_BIN:-}" ]]; then | |
| 29 | + candidates+=("${PYTHON_BIN}") | |
| 30 | + fi | |
| 31 | + candidates+=("python3.12" "python3.11" "python3.10" "python3") | |
| 32 | + | |
| 33 | + local candidate | |
| 34 | + for candidate in "${candidates[@]}"; do | |
| 35 | + if ! command -v "${candidate}" >/dev/null 2>&1; then | |
| 36 | + continue | |
| 37 | + fi | |
| 38 | + if python_meets_minimum "${candidate}"; then | |
| 39 | + echo "${candidate}" | |
| 40 | + return 0 | |
| 41 | + fi | |
| 42 | + done | |
| 43 | + | |
| 44 | + return 1 | |
| 45 | +} | |
| 46 | + | |
| 47 | +if ! PYTHON_BIN="$(discover_python_bin)"; then | |
| 48 | + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2 | |
| 49 | + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2 | |
| 50 | + exit 1 | |
| 51 | +fi | |
| 13 | 52 | |
| 14 | 53 | if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then |
| 15 | 54 | echo "ERROR: python not found: ${PYTHON_BIN}" >&2 |
| ... | ... | @@ -32,6 +71,7 @@ mkdir -p "${TMP_DIR}" |
| 32 | 71 | export TMPDIR="${TMP_DIR}" |
| 33 | 72 | PIP_ARGS=(--no-cache-dir) |
| 34 | 73 | |
| 74 | +echo "Using Python=${PYTHON_BIN}" | |
| 35 | 75 | echo "Using TMPDIR=${TMPDIR}" |
| 36 | 76 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel |
| 37 | 77 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt |
| ... | ... | @@ -39,5 +79,5 @@ echo "Using TMPDIR=${TMPDIR}" |
| 39 | 79 | echo |
| 40 | 80 | echo "Done." |
| 41 | 81 | echo "Translator venv: ${VENV_DIR}" |
| 42 | -echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local" | |
| 82 | +echo "Download local models: ./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local" | |
| 43 | 83 | echo "Start service: ./scripts/start_translator.sh" | ... | ... |
scripts/translation/download_translation_models.py
0 โ 100755
| ... | ... | @@ -0,0 +1,100 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +"""Download local translation models declared in services.translation.capabilities.""" | |
| 3 | + | |
| 4 | +from __future__ import annotations | |
| 5 | + | |
| 6 | +import argparse | |
| 7 | +import os | |
| 8 | +from pathlib import Path | |
| 9 | +import sys | |
| 10 | +from typing import Iterable | |
| 11 | + | |
| 12 | +from huggingface_hub import snapshot_download | |
| 13 | + | |
| 14 | +PROJECT_ROOT = Path(__file__).resolve().parents[2] | |
| 15 | +if str(PROJECT_ROOT) not in sys.path: | |
| 16 | + sys.path.insert(0, str(PROJECT_ROOT)) | |
| 17 | +os.environ.setdefault("HF_HUB_DISABLE_XET", "1") | |
| 18 | + | |
| 19 | +from config.services_config import get_translation_config | |
| 20 | +from translation.ct2_conversion import convert_transformers_model | |
| 21 | + | |
| 22 | + | |
| 23 | +LOCAL_BACKENDS = {"local_nllb", "local_marian"} | |
| 24 | + | |
| 25 | + | |
| 26 | +def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: | |
| 27 | + cfg = get_translation_config() | |
| 28 | + capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {} | |
| 29 | + for name, capability in capabilities.items(): | |
| 30 | + backend = str(capability.get("backend") or "").strip().lower() | |
| 31 | + if backend not in LOCAL_BACKENDS: | |
| 32 | + continue | |
| 33 | + if selected and name not in selected: | |
| 34 | + continue | |
| 35 | + yield name, capability | |
| 36 | + | |
| 37 | + | |
| 38 | +def _compute_ct2_output_dir(capability: dict) -> Path: | |
| 39 | + custom = str(capability.get("ct2_model_dir") or "").strip() | |
| 40 | + if custom: | |
| 41 | + return Path(custom).expanduser() | |
| 42 | + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | |
| 43 | + compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower() | |
| 44 | + normalized = compute_type.replace("_", "-") | |
| 45 | + return model_dir / f"ctranslate2-{normalized}" | |
| 46 | + | |
| 47 | + | |
| 48 | +def convert_to_ctranslate2(name: str, capability: dict) -> None: | |
| 49 | + model_id = str(capability.get("model_id") or "").strip() | |
| 50 | + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | |
| 51 | + model_source = str(model_dir if model_dir.exists() else model_id) | |
| 52 | + output_dir = _compute_ct2_output_dir(capability) | |
| 53 | + if (output_dir / "model.bin").exists(): | |
| 54 | + print(f"[skip-convert] {name} -> {output_dir}") | |
| 55 | + return | |
| 56 | + quantization = str( | |
| 57 | + capability.get("ct2_conversion_quantization") | |
| 58 | + or capability.get("ct2_compute_type") | |
| 59 | + or capability.get("torch_dtype") | |
| 60 | + or "default" | |
| 61 | + ).strip() | |
| 62 | + output_dir.parent.mkdir(parents=True, exist_ok=True) | |
| 63 | + print(f"[convert] {name} -> {output_dir} ({quantization})") | |
| 64 | + convert_transformers_model(model_source, str(output_dir), quantization) | |
| 65 | + print(f"[converted] {name}") | |
| 66 | + | |
| 67 | + | |
| 68 | +def main() -> None: | |
| 69 | + parser = argparse.ArgumentParser(description="Download local translation models") | |
| 70 | + parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") | |
| 71 | + parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") | |
| 72 | + parser.add_argument( | |
| 73 | + "--convert-ctranslate2", | |
| 74 | + action="store_true", | |
| 75 | + help="Also convert the downloaded Hugging Face models into CTranslate2 format", | |
| 76 | + ) | |
| 77 | + args = parser.parse_args() | |
| 78 | + | |
| 79 | + selected = {item.strip().lower() for item in args.models if item.strip()} or None | |
| 80 | + if not args.all_local and not selected: | |
| 81 | + parser.error("pass --all-local or --models <name> ...") | |
| 82 | + | |
| 83 | + for name, capability in iter_local_capabilities(selected): | |
| 84 | + model_id = str(capability.get("model_id") or "").strip() | |
| 85 | + model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | |
| 86 | + if not model_id or not model_dir: | |
| 87 | + raise ValueError(f"Capability '{name}' must define model_id and model_dir") | |
| 88 | + model_dir.parent.mkdir(parents=True, exist_ok=True) | |
| 89 | + print(f"[download] {name} -> {model_dir} ({model_id})") | |
| 90 | + snapshot_download( | |
| 91 | + repo_id=model_id, | |
| 92 | + local_dir=str(model_dir), | |
| 93 | + ) | |
| 94 | + print(f"[done] {name}") | |
| 95 | + if args.convert_ctranslate2: | |
| 96 | + convert_to_ctranslate2(name, capability) | |
| 97 | + | |
| 98 | + | |
| 99 | +if __name__ == "__main__": | |
| 100 | + main() | ... | ... |
| ... | ... | @@ -0,0 +1,85 @@ |
| 1 | +from __future__ import annotations | |
| 2 | + | |
| 3 | +import sys | |
| 4 | +import types | |
| 5 | + | |
| 6 | +import pytest | |
| 7 | + | |
| 8 | +import translation.ct2_conversion as ct2_conversion | |
| 9 | + | |
| 10 | + | |
| 11 | +class _FakeTransformersConverter: | |
| 12 | + def __init__(self, model_name_or_path): | |
| 13 | + self.model_name_or_path = model_name_or_path | |
| 14 | + self.load_calls = [] | |
| 15 | + | |
| 16 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | |
| 17 | + self.load_calls.append( | |
| 18 | + { | |
| 19 | + "model_class": model_class, | |
| 20 | + "resolved_model_name_or_path": resolved_model_name_or_path, | |
| 21 | + "kwargs": dict(kwargs), | |
| 22 | + } | |
| 23 | + ) | |
| 24 | + if "dtype" in kwargs or "torch_dtype" in kwargs: | |
| 25 | + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'") | |
| 26 | + return {"loaded": True, "path": resolved_model_name_or_path} | |
| 27 | + | |
| 28 | + def convert(self, output_dir, quantization=None, force=False): | |
| 29 | + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32") | |
| 30 | + return { | |
| 31 | + "loaded": loaded, | |
| 32 | + "output_dir": output_dir, | |
| 33 | + "quantization": quantization, | |
| 34 | + "force": force, | |
| 35 | + "load_calls": list(self.load_calls), | |
| 36 | + } | |
| 37 | + | |
| 38 | + | |
| 39 | +def _install_fake_ctranslate2(monkeypatch, base_converter): | |
| 40 | + converters_module = types.ModuleType("ctranslate2.converters") | |
| 41 | + converters_module.TransformersConverter = base_converter | |
| 42 | + ctranslate2_module = types.ModuleType("ctranslate2") | |
| 43 | + ctranslate2_module.converters = converters_module | |
| 44 | + | |
| 45 | + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module) | |
| 46 | + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module) | |
| 47 | + | |
| 48 | + | |
| 49 | +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch): | |
| 50 | + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter) | |
| 51 | + fake_transformers = types.ModuleType("transformers") | |
| 52 | + fake_transformers.AutoConfig = types.SimpleNamespace( | |
| 53 | + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path) | |
| 54 | + ) | |
| 55 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | |
| 56 | + | |
| 57 | + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") | |
| 58 | + | |
| 59 | + assert result["loaded"] == {"loaded": True, "path": "fake-model"} | |
| 60 | + assert result["output_dir"] == "/tmp/out" | |
| 61 | + assert result["quantization"] == "float16" | |
| 62 | + assert result["force"] is False | |
| 63 | + assert len(result["load_calls"]) == 2 | |
| 64 | + assert result["load_calls"][0] == { | |
| 65 | + "model_class": "FakeModel", | |
| 66 | + "resolved_model_name_or_path": "fake-model", | |
| 67 | + "kwargs": {"dtype": "float32"}, | |
| 68 | + } | |
| 69 | + assert result["load_calls"][1]["model_class"] == "FakeModel" | |
| 70 | + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model" | |
| 71 | + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None | |
| 72 | + | |
| 73 | + | |
| 74 | +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch): | |
| 75 | + class _AlwaysFailingConverter(_FakeTransformersConverter): | |
| 76 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | |
| 77 | + raise TypeError("different constructor error") | |
| 78 | + | |
| 79 | + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter) | |
| 80 | + fake_transformers = types.ModuleType("transformers") | |
| 81 | + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path)) | |
| 82 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | |
| 83 | + | |
| 84 | + with pytest.raises(TypeError, match="different constructor error"): | |
| 85 | + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") | ... | ... |
tests/test_translation_local_backends.py
| ... | ... | @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): |
| 201 | 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] |
| 202 | 202 | |
| 203 | 203 | |
| 204 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch): | |
| 205 | + wrong_dir = tmp_path / "wrong-nllb" | |
| 206 | + wrong_dir.mkdir() | |
| 207 | + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8") | |
| 208 | + | |
| 209 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | |
| 210 | + | |
| 211 | + backend = NLLBCTranslate2TranslationBackend( | |
| 212 | + name="nllb-200-distilled-600m", | |
| 213 | + model_id="facebook/nllb-200-distilled-600M", | |
| 214 | + model_dir=str(wrong_dir), | |
| 215 | + device="cpu", | |
| 216 | + torch_dtype="float32", | |
| 217 | + batch_size=1, | |
| 218 | + max_input_length=16, | |
| 219 | + max_new_tokens=16, | |
| 220 | + num_beams=1, | |
| 221 | + ) | |
| 222 | + | |
| 223 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | |
| 224 | + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M" | |
| 225 | + | |
| 226 | + | |
| 227 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch): | |
| 228 | + incomplete_dir = tmp_path / "incomplete-nllb" | |
| 229 | + incomplete_dir.mkdir() | |
| 230 | + (incomplete_dir / "ctranslate2-float16").mkdir() | |
| 231 | + | |
| 232 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | |
| 233 | + | |
| 234 | + backend = NLLBCTranslate2TranslationBackend( | |
| 235 | + name="nllb-200-distilled-600m", | |
| 236 | + model_id="facebook/nllb-200-distilled-600M", | |
| 237 | + model_dir=str(incomplete_dir), | |
| 238 | + device="cpu", | |
| 239 | + torch_dtype="float32", | |
| 240 | + batch_size=1, | |
| 241 | + max_input_length=16, | |
| 242 | + max_new_tokens=16, | |
| 243 | + num_beams=1, | |
| 244 | + ) | |
| 245 | + | |
| 246 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | |
| 247 | + | |
| 248 | + | |
| 204 | 249 | def test_nllb_resolves_flores_short_tags_and_iso_no(): |
| 205 | 250 | cat = build_nllb_language_catalog(None) |
| 206 | 251 | assert resolve_nllb_language_code("ca", cat) == "cat_Latn" | ... | ... |
tests/test_translator_failure_semantics.py
| ... | ... | @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): |
| 197 | 197 | ] |
| 198 | 198 | |
| 199 | 199 | |
| 200 | +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch): | |
| 201 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | |
| 202 | + | |
| 203 | + def _fake_create_backend(self, *, name, backend_type, cfg): | |
| 204 | + del self, backend_type, cfg | |
| 205 | + if name == "broken-nllb": | |
| 206 | + raise RuntimeError("broken model dir") | |
| 207 | + | |
| 208 | + class _Backend: | |
| 209 | + model = name | |
| 210 | + | |
| 211 | + @property | |
| 212 | + def supports_batch(self): | |
| 213 | + return True | |
| 214 | + | |
| 215 | + def translate(self, text, target_lang, source_lang=None, scene=None): | |
| 216 | + del target_lang, source_lang, scene | |
| 217 | + return text | |
| 218 | + | |
| 219 | + return _Backend() | |
| 220 | + | |
| 221 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | |
| 222 | + service = TranslationService( | |
| 223 | + { | |
| 224 | + "service_url": "http://127.0.0.1:6006", | |
| 225 | + "timeout_sec": 10.0, | |
| 226 | + "default_model": "llm", | |
| 227 | + "default_scene": "general", | |
| 228 | + "capabilities": { | |
| 229 | + "llm": { | |
| 230 | + "enabled": True, | |
| 231 | + "backend": "llm", | |
| 232 | + "model": "dummy-llm", | |
| 233 | + "base_url": "https://example.com", | |
| 234 | + "timeout_sec": 10.0, | |
| 235 | + "use_cache": True, | |
| 236 | + }, | |
| 237 | + "broken-nllb": { | |
| 238 | + "enabled": True, | |
| 239 | + "backend": "local_nllb", | |
| 240 | + "model_id": "dummy", | |
| 241 | + "model_dir": "dummy", | |
| 242 | + "device": "cpu", | |
| 243 | + "torch_dtype": "float32", | |
| 244 | + "batch_size": 8, | |
| 245 | + "max_input_length": 16, | |
| 246 | + "max_new_tokens": 16, | |
| 247 | + "num_beams": 1, | |
| 248 | + "use_cache": True, | |
| 249 | + }, | |
| 250 | + }, | |
| 251 | + "cache": { | |
| 252 | + "ttl_seconds": 60, | |
| 253 | + "sliding_expiration": True, | |
| 254 | + }, | |
| 255 | + } | |
| 256 | + ) | |
| 257 | + | |
| 258 | + assert service.available_models == ["llm", "broken-nllb"] | |
| 259 | + assert service.loaded_models == ["llm"] | |
| 260 | + assert service.failed_models == ["broken-nllb"] | |
| 261 | + assert service.backend_errors["broken-nllb"] == "broken model dir" | |
| 262 | + | |
| 263 | + with pytest.raises(RuntimeError, match="failed to initialize"): | |
| 264 | + service.get_backend("broken-nllb") | |
| 265 | + | |
| 266 | + | |
| 200 | 267 | def test_translation_cache_probe_models_order(): |
| 201 | 268 | cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} |
| 202 | 269 | assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] | ... | ... |
translation/backends/local_ctranslate2.py
| ... | ... | @@ -4,9 +4,7 @@ from __future__ import annotations |
| 4 | 4 | |
| 5 | 5 | import logging |
| 6 | 6 | import os |
| 7 | -import shutil | |
| 8 | -import subprocess | |
| 9 | -import sys | |
| 7 | +import json | |
| 10 | 8 | import threading |
| 11 | 9 | from pathlib import Path |
| 12 | 10 | from typing import Dict, List, Optional, Sequence, Union |
| ... | ... | @@ -24,6 +22,7 @@ from translation.text_splitter import ( |
| 24 | 22 | join_translated_segments, |
| 25 | 23 | split_text_for_translation, |
| 26 | 24 | ) |
| 25 | +from translation.ct2_conversion import convert_transformers_model | |
| 27 | 26 | |
| 28 | 27 | logger = logging.getLogger(__name__) |
| 29 | 28 | |
| ... | ... | @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -> str: |
| 76 | 75 | return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") |
| 77 | 76 | |
| 78 | 77 | |
| 79 | -def _resolve_converter_binary() -> str: | |
| 80 | - candidate = shutil.which("ct2-transformers-converter") | |
| 81 | - if candidate: | |
| 82 | - return candidate | |
| 83 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | |
| 84 | - if venv_candidate.exists(): | |
| 85 | - return str(venv_candidate) | |
| 86 | - raise RuntimeError( | |
| 87 | - "ct2-transformers-converter was not found. " | |
| 88 | - "Ensure ctranslate2 is installed in the active translator environment." | |
| 89 | - ) | |
| 78 | +def _detect_local_model_type(model_dir: str) -> Optional[str]: | |
| 79 | + config_path = Path(model_dir).expanduser() / "config.json" | |
| 80 | + if not config_path.exists(): | |
| 81 | + return None | |
| 82 | + try: | |
| 83 | + with open(config_path, "r", encoding="utf-8") as handle: | |
| 84 | + payload = json.load(handle) or {} | |
| 85 | + except Exception as exc: | |
| 86 | + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc) | |
| 87 | + return None | |
| 88 | + model_type = str(payload.get("model_type") or "").strip().lower() | |
| 89 | + return model_type or None | |
| 90 | 90 | |
| 91 | 91 | |
| 92 | 92 | class LocalCTranslate2TranslationBackend: |
| ... | ... | @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: |
| 144 | 144 | self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) |
| 145 | 145 | self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) |
| 146 | 146 | self._tokenizer_lock = threading.Lock() |
| 147 | + self._local_model_source = self._resolve_local_model_source() | |
| 147 | 148 | self._load_runtime() |
| 148 | 149 | |
| 149 | 150 | @property |
| ... | ... | @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: |
| 151 | 152 | return True |
| 152 | 153 | |
| 153 | 154 | def _tokenizer_source(self) -> str: |
| 154 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | |
| 155 | + return self._local_model_source or self.model_id | |
| 155 | 156 | |
| 156 | 157 | def _model_source(self) -> str: |
| 157 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | |
| 158 | + return self._local_model_source or self.model_id | |
| 159 | + | |
| 160 | + def _expected_local_model_types(self) -> Optional[set[str]]: | |
| 161 | + return None | |
| 162 | + | |
| 163 | + def _resolve_local_model_source(self) -> Optional[str]: | |
| 164 | + model_path = Path(self.model_dir).expanduser() | |
| 165 | + if not model_path.exists(): | |
| 166 | + return None | |
| 167 | + if not (model_path / "config.json").exists(): | |
| 168 | + logger.warning( | |
| 169 | + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id", | |
| 170 | + self.model, | |
| 171 | + model_path, | |
| 172 | + ) | |
| 173 | + return None | |
| 174 | + | |
| 175 | + expected_types = self._expected_local_model_types() | |
| 176 | + if not expected_types: | |
| 177 | + return str(model_path) | |
| 178 | + | |
| 179 | + detected_type = _detect_local_model_type(str(model_path)) | |
| 180 | + if detected_type is None: | |
| 181 | + return str(model_path) | |
| 182 | + if detected_type in expected_types: | |
| 183 | + return str(model_path) | |
| 184 | + | |
| 185 | + logger.warning( | |
| 186 | + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id", | |
| 187 | + self.model, | |
| 188 | + model_path, | |
| 189 | + detected_type, | |
| 190 | + sorted(expected_types), | |
| 191 | + ) | |
| 192 | + return None | |
| 158 | 193 | |
| 159 | 194 | def _tokenizer_kwargs(self) -> Dict[str, object]: |
| 160 | 195 | return {} |
| ... | ... | @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: |
| 204 | 239 | ) |
| 205 | 240 | |
| 206 | 241 | ct2_path.parent.mkdir(parents=True, exist_ok=True) |
| 207 | - converter = _resolve_converter_binary() | |
| 208 | 242 | logger.info( |
| 209 | 243 | "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", |
| 210 | 244 | self.model, |
| ... | ... | @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: |
| 213 | 247 | self.ct2_conversion_quantization, |
| 214 | 248 | ) |
| 215 | 249 | try: |
| 216 | - subprocess.run( | |
| 217 | - [ | |
| 218 | - converter, | |
| 219 | - "--model", | |
| 220 | - model_source, | |
| 221 | - "--output_dir", | |
| 222 | - str(ct2_path), | |
| 223 | - "--quantization", | |
| 224 | - self.ct2_conversion_quantization, | |
| 225 | - ], | |
| 226 | - check=True, | |
| 227 | - stdout=subprocess.PIPE, | |
| 228 | - stderr=subprocess.PIPE, | |
| 229 | - text=True, | |
| 250 | + convert_transformers_model( | |
| 251 | + model_source, | |
| 252 | + str(ct2_path), | |
| 253 | + self.ct2_conversion_quantization, | |
| 230 | 254 | ) |
| 231 | - except subprocess.CalledProcessError as exc: | |
| 232 | - stderr = exc.stderr.strip() | |
| 255 | + except Exception as exc: | |
| 233 | 256 | raise RuntimeError( |
| 234 | - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}" | |
| 257 | + f"Failed to convert model '{self.model}' to CTranslate2: {exc}" | |
| 235 | 258 | ) from exc |
| 236 | 259 | |
| 237 | 260 | def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: |
| ... | ... | @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 557 | 580 | f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" |
| 558 | 581 | ) |
| 559 | 582 | |
| 583 | + def _expected_local_model_types(self) -> Optional[set[str]]: | |
| 584 | + return {"marian"} | |
| 585 | + | |
| 560 | 586 | |
| 561 | 587 | class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 562 | 588 | """Local backend for NLLB models on CTranslate2.""" |
| ... | ... | @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 619 | 645 | if resolve_nllb_language_code(target_lang, self.language_codes) is None: |
| 620 | 646 | raise ValueError(f"Unsupported NLLB target language: {target_lang}") |
| 621 | 647 | |
| 648 | + def _expected_local_model_types(self) -> Optional[set[str]]: | |
| 649 | + return {"m2m_100", "nllb_moe"} | |
| 650 | + | |
| 622 | 651 | def _get_tokenizer_for_source(self, source_lang: str): |
| 623 | 652 | src_code = resolve_nllb_language_code(source_lang, self.language_codes) |
| 624 | 653 | if src_code is None: | ... | ... |
| ... | ... | @@ -0,0 +1,52 @@ |
| 1 | +"""Helpers for converting Hugging Face translation models to CTranslate2.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import copy | |
| 6 | +import logging | |
| 7 | + | |
| 8 | +logger = logging.getLogger(__name__) | |
| 9 | + | |
| 10 | + | |
| 11 | +def convert_transformers_model( | |
| 12 | + model_name_or_path: str, | |
| 13 | + output_dir: str, | |
| 14 | + quantization: str, | |
| 15 | + *, | |
| 16 | + force: bool = False, | |
| 17 | +) -> str: | |
| 18 | + from ctranslate2.converters import TransformersConverter | |
| 19 | + from transformers import AutoConfig | |
| 20 | + | |
| 21 | + class _CompatibleTransformersConverter(TransformersConverter): | |
| 22 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | |
| 23 | + try: | |
| 24 | + return super().load_model(model_class, resolved_model_name_or_path, **kwargs) | |
| 25 | + except TypeError as exc: | |
| 26 | + if "unexpected keyword argument 'dtype'" not in str(exc): | |
| 27 | + raise | |
| 28 | + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None: | |
| 29 | + raise | |
| 30 | + | |
| 31 | + logger.warning( | |
| 32 | + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s", | |
| 33 | + resolved_model_name_or_path, | |
| 34 | + getattr(model_class, "__name__", model_class), | |
| 35 | + ) | |
| 36 | + retry_kwargs = dict(kwargs) | |
| 37 | + retry_kwargs.pop("dtype", None) | |
| 38 | + retry_kwargs.pop("torch_dtype", None) | |
| 39 | + config = retry_kwargs.get("config") | |
| 40 | + if config is None: | |
| 41 | + config = AutoConfig.from_pretrained(resolved_model_name_or_path) | |
| 42 | + else: | |
| 43 | + config = copy.deepcopy(config) | |
| 44 | + if hasattr(config, "dtype"): | |
| 45 | + config.dtype = None | |
| 46 | + if hasattr(config, "torch_dtype"): | |
| 47 | + config.torch_dtype = None | |
| 48 | + retry_kwargs["config"] = config | |
| 49 | + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs) | |
| 50 | + | |
| 51 | + converter = _CompatibleTransformersConverter(model_name_or_path) | |
| 52 | + return converter.convert(output_dir=output_dir, quantization=quantization, force=force) | ... | ... |
translation/service.py
| ... | ... | @@ -31,7 +31,12 @@ class TranslationService: |
| 31 | 31 | if not self._enabled_capabilities: |
| 32 | 32 | raise ValueError("No enabled translation backends found in services.translation.capabilities") |
| 33 | 33 | self._translation_cache = TranslationCache(self.config["cache"]) |
| 34 | - self._backends = self._initialize_backends() | |
| 34 | + self._backends: Dict[str, TranslationBackendProtocol] = {} | |
| 35 | + self._backend_errors: Dict[str, str] = {} | |
| 36 | + self._initialize_backends() | |
| 37 | + if not self._backends: | |
| 38 | + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error" | |
| 39 | + raise RuntimeError(f"No translation backends could be initialized: {details}") | |
| 35 | 40 | |
| 36 | 41 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: |
| 37 | 42 | enabled: Dict[str, Dict[str, object]] = {} |
| ... | ... | @@ -62,24 +67,47 @@ class TranslationService: |
| 62 | 67 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") |
| 63 | 68 | return factory(name=name, cfg=cfg) |
| 64 | 69 | |
| 65 | - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]: | |
| 66 | - backends: Dict[str, TranslationBackendProtocol] = {} | |
| 67 | - for name, capability_cfg in self._enabled_capabilities.items(): | |
| 68 | - backend_type = str(capability_cfg["backend"]) | |
| 69 | - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | |
| 70 | - backends[name] = self._create_backend( | |
| 70 | + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]: | |
| 71 | + capability_cfg = self._enabled_capabilities.get(name) | |
| 72 | + if capability_cfg is None: | |
| 73 | + return None | |
| 74 | + if name in self._backends: | |
| 75 | + return self._backends[name] | |
| 76 | + | |
| 77 | + backend_type = str(capability_cfg["backend"]) | |
| 78 | + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | |
| 79 | + try: | |
| 80 | + backend = self._create_backend( | |
| 71 | 81 | name=name, |
| 72 | 82 | backend_type=backend_type, |
| 73 | 83 | cfg=capability_cfg, |
| 74 | 84 | ) |
| 75 | - logger.info( | |
| 76 | - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | |
| 85 | + except Exception as exc: | |
| 86 | + error_text = str(exc).strip() or exc.__class__.__name__ | |
| 87 | + self._backend_errors[name] = error_text | |
| 88 | + logger.error( | |
| 89 | + "Translation backend initialization failed | model=%s backend=%s error=%s", | |
| 77 | 90 | name, |
| 78 | 91 | backend_type, |
| 79 | - bool(capability_cfg.get("use_cache")), | |
| 80 | - getattr(backends[name], "model", name), | |
| 92 | + error_text, | |
| 93 | + exc_info=True, | |
| 81 | 94 | ) |
| 82 | - return backends | |
| 95 | + return None | |
| 96 | + | |
| 97 | + self._backends[name] = backend | |
| 98 | + self._backend_errors.pop(name, None) | |
| 99 | + logger.info( | |
| 100 | + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | |
| 101 | + name, | |
| 102 | + backend_type, | |
| 103 | + bool(capability_cfg.get("use_cache")), | |
| 104 | + getattr(backend, "model", name), | |
| 105 | + ) | |
| 106 | + return backend | |
| 107 | + | |
| 108 | + def _initialize_backends(self) -> None: | |
| 109 | + for name, capability_cfg in self._enabled_capabilities.items(): | |
| 110 | + self._load_backend(name) | |
| 83 | 111 | |
| 84 | 112 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: |
| 85 | 113 | from translation.backends.qwen_mt import QwenMTTranslationBackend |
| ... | ... | @@ -178,13 +206,27 @@ class TranslationService: |
| 178 | 206 | def loaded_models(self) -> List[str]: |
| 179 | 207 | return list(self._backends.keys()) |
| 180 | 208 | |
| 209 | + @property | |
| 210 | + def failed_models(self) -> List[str]: | |
| 211 | + return list(self._backend_errors.keys()) | |
| 212 | + | |
| 213 | + @property | |
| 214 | + def backend_errors(self) -> Dict[str, str]: | |
| 215 | + return dict(self._backend_errors) | |
| 216 | + | |
| 181 | 217 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: |
| 182 | 218 | normalized = normalize_translation_model(self.config, model) |
| 183 | - backend = self._backends.get(normalized) | |
| 219 | + backend = self._backends.get(normalized) or self._load_backend(normalized) | |
| 184 | 220 | if backend is None: |
| 185 | - raise ValueError( | |
| 186 | - f"Translation model '{normalized}' is not enabled. " | |
| 187 | - f"Available models: {', '.join(self.available_models) or 'none'}" | |
| 221 | + if normalized not in self._enabled_capabilities: | |
| 222 | + raise ValueError( | |
| 223 | + f"Translation model '{normalized}' is not enabled. " | |
| 224 | + f"Available models: {', '.join(self.available_models) or 'none'}" | |
| 225 | + ) | |
| 226 | + error_text = self._backend_errors.get(normalized) or "unknown initialization error" | |
| 227 | + raise RuntimeError( | |
| 228 | + f"Translation model '{normalized}' failed to initialize: {error_text}. " | |
| 229 | + f"Loaded models: {', '.join(self.loaded_models) or 'none'}" | |
| 188 | 230 | ) |
| 189 | 231 | return backend |
| 190 | 232 | ... | ... |