Commit 89fa3f3ccb7d7815460c21ea52ecca110435d61e

Authored by tangwang
1 parent 778c299a

Sync master portability fixes from f07947a

api/translator_app.py
... ... @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI):
271 271 """Initialize all enabled translation backends on process startup."""
272 272 logger.info("Starting Translation Service API")
273 273 service = get_translation_service()
  274 + failed_models = list(getattr(service, "failed_models", []))
  275 + backend_errors = dict(getattr(service, "backend_errors", {}))
274 276 logger.info(
275   - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s",
  277 + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s",
276 278 service.config["default_model"],
277 279 service.config["default_scene"],
278 280 service.available_models,
279 281 service.loaded_models,
  282 + failed_models,
280 283 )
281 284 logger.info(
282   - "Translation backends initialized on startup | models=%s",
  285 + "Translation backends initialized on startup | loaded=%s failed=%s",
283 286 service.loaded_models,
  287 + backend_errors,
284 288 )
285 289 verbose_logger.info(
286 290 "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
... ... @@ -316,11 +320,14 @@ async def health_check():
316 320 """Health check endpoint."""
317 321 try:
318 322 service = get_translation_service()
  323 + failed_models = list(getattr(service, "failed_models", []))
  324 + backend_errors = dict(getattr(service, "backend_errors", {}))
319 325 logger.info(
320   - "Health check | default_model=%s default_scene=%s loaded_models=%s",
  326 + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s",
321 327 service.config["default_model"],
322 328 service.config["default_scene"],
323 329 service.loaded_models,
  330 + failed_models,
324 331 )
325 332 return {
326 333 "status": "healthy",
... ... @@ -330,6 +337,8 @@ async def health_check():
330 337 "available_models": service.available_models,
331 338 "enabled_capabilities": get_enabled_translation_models(service.config),
332 339 "loaded_models": service.loaded_models,
  340 + "failed_models": failed_models,
  341 + "backend_errors": backend_errors,
333 342 }
334 343 except Exception as e:
335 344 logger.error(f"Health check failed: {e}")
... ... @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request):
463 472 latency_ms = (time.perf_counter() - request_started) * 1000
464 473 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms)
465 474 raise HTTPException(status_code=400, detail=str(e)) from e
  475 + except RuntimeError as e:
  476 + latency_ms = (time.perf_counter() - request_started) * 1000
  477 + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms)
  478 + raise HTTPException(status_code=503, detail=str(e)) from e
466 479 except Exception as e:
467 480 latency_ms = (time.perf_counter() - request_started) * 1000
468 481 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
... ...
config/loader.py
... ... @@ -655,6 +655,14 @@ class AppConfigLoader:
655 655  
656 656 translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {}
657 657 normalized_translation = build_translation_config(translation_raw)
  658 + local_translation_backends = {"local_nllb", "local_marian"}
  659 + for capability_name, capability_cfg in normalized_translation["capabilities"].items():
  660 + backend_name = str(capability_cfg.get("backend") or "").strip().lower()
  661 + if backend_name not in local_translation_backends:
  662 + continue
  663 + for path_key in ("model_dir", "ct2_model_dir"):
  664 + if capability_cfg.get(path_key) not in (None, ""):
  665 + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve())
658 666 translation_config = TranslationServiceConfig(
659 667 endpoint=str(normalized_translation["service_url"]).rstrip("/"),
660 668 timeout_sec=float(normalized_translation["timeout_sec"]),
... ... @@ -749,7 +757,7 @@ class AppConfigLoader:
749 757 port=port,
750 758 backend=backend_name,
751 759 runtime_dir=(
752   - str(v)
  760 + str(self._resolve_project_path_value(v).resolve())
753 761 if (v := instance_raw.get("runtime_dir")) not in (None, "")
754 762 else None
755 763 ),
... ... @@ -787,6 +795,12 @@ class AppConfigLoader:
787 795 rerank=rerank_config,
788 796 )
789 797  
  798 + def _resolve_project_path_value(self, value: Any) -> Path:
  799 + candidate = Path(str(value)).expanduser()
  800 + if candidate.is_absolute():
  801 + return candidate
  802 + return self.project_root / candidate
  803 +
790 804 def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig:
791 805 if not isinstance(raw, dict):
792 806 raise ConfigurationError("tenant_config must be a mapping")
... ... @@ -822,13 +836,6 @@ class AppConfigLoader:
822 836  
823 837 def _build_infrastructure_config(self, environment: str) -> InfrastructureConfig:
824 838 del environment
825   - _redis_db_raw = os.getenv("REDIS_DB") or os.getenv("REDIS_SNAPSHOT_DB")
826   - _redis_db = 0
827   - if _redis_db_raw is not None and str(_redis_db_raw).strip() != "":
828   - try:
829   - _redis_db = int(str(_redis_db_raw).strip())
830   - except ValueError:
831   - _redis_db = 0
832 839 return InfrastructureConfig(
833 840 elasticsearch=ElasticsearchSettings(
834 841 host=os.getenv("ES_HOST", "http://localhost:9200"),
... ... @@ -838,7 +845,7 @@ class AppConfigLoader:
838 845 redis=RedisSettings(
839 846 host=os.getenv("REDIS_HOST", "localhost"),
840 847 port=int(os.getenv("REDIS_PORT", 6479)),
841   - snapshot_db=_redis_db,
  848 + snapshot_db=int(os.getenv("REDIS_SNAPSHOT_DB", 0)),
842 849 password=os.getenv("REDIS_PASSWORD"),
843 850 socket_timeout=int(os.getenv("REDIS_SOCKET_TIMEOUT", 1)),
844 851 socket_connect_timeout=int(os.getenv("REDIS_SOCKET_CONNECT_TIMEOUT", 1)),
... ...
frontend/static/js/app.js
... ... @@ -316,7 +316,10 @@ async function performSearch(page = 1) {
316 316 document.getElementById('productGrid').innerHTML = '';
317 317  
318 318 try {
319   - const response = await fetch(`${API_BASE_URL}/search/`, {
  319 + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin);
  320 + searchUrl.searchParams.set('tenant_id', tenantId);
  321 +
  322 + const response = await fetch(searchUrl.toString(), {
320 323 method: 'POST',
321 324 headers: {
322 325 'Content-Type': 'application/json',
... ...
requirements_translator_service.txt
... ... @@ -13,7 +13,8 @@ httpx>=0.24.0
13 13 tqdm>=4.65.0
14 14  
15 15 torch>=2.0.0
16   -transformers>=4.30.0
  16 +# Keep translator conversions on the last verified NLLB-compatible release line.
  17 +transformers>=4.51.0,<4.52.0
17 18 ctranslate2>=4.7.0
18 19 sentencepiece>=0.2.0
19 20 sacremoses>=0.1.1
... ...
scripts/download_translation_models.py 100755 โ†’ 100644
1 1 #!/usr/bin/env python3
2   -"""Download local translation models declared in services.translation.capabilities."""
  2 +"""Backward-compatible entrypoint for translation model downloads."""
3 3  
4 4 from __future__ import annotations
5 5  
6   -import argparse
7   -import os
  6 +import runpy
8 7 from pathlib import Path
9   -import shutil
10   -import subprocess
11   -import sys
12   -from typing import Iterable
13   -
14   -from huggingface_hub import snapshot_download
15   -
16   -PROJECT_ROOT = Path(__file__).resolve().parent.parent
17   -if str(PROJECT_ROOT) not in sys.path:
18   - sys.path.insert(0, str(PROJECT_ROOT))
19   -os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
20   -
21   -from config.services_config import get_translation_config
22   -
23   -
24   -LOCAL_BACKENDS = {"local_nllb", "local_marian"}
25   -
26   -
27   -def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
28   - cfg = get_translation_config()
29   - capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
30   - for name, capability in capabilities.items():
31   - backend = str(capability.get("backend") or "").strip().lower()
32   - if backend not in LOCAL_BACKENDS:
33   - continue
34   - if selected and name not in selected:
35   - continue
36   - yield name, capability
37   -
38   -
39   -def _compute_ct2_output_dir(capability: dict) -> Path:
40   - custom = str(capability.get("ct2_model_dir") or "").strip()
41   - if custom:
42   - return Path(custom).expanduser()
43   - model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
44   - compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
45   - normalized = compute_type.replace("_", "-")
46   - return model_dir / f"ctranslate2-{normalized}"
47   -
48   -
49   -def _resolve_converter_binary() -> str:
50   - candidate = shutil.which("ct2-transformers-converter")
51   - if candidate:
52   - return candidate
53   - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
54   - if venv_candidate.exists():
55   - return str(venv_candidate)
56   - raise RuntimeError(
57   - "ct2-transformers-converter was not found. "
58   - "Install ctranslate2 in the active Python environment first."
59   - )
60   -
61   -
62   -def convert_to_ctranslate2(name: str, capability: dict) -> None:
63   - model_id = str(capability.get("model_id") or "").strip()
64   - model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
65   - model_source = str(model_dir if model_dir.exists() else model_id)
66   - output_dir = _compute_ct2_output_dir(capability)
67   - if (output_dir / "model.bin").exists():
68   - print(f"[skip-convert] {name} -> {output_dir}")
69   - return
70   - quantization = str(
71   - capability.get("ct2_conversion_quantization")
72   - or capability.get("ct2_compute_type")
73   - or capability.get("torch_dtype")
74   - or "default"
75   - ).strip()
76   - output_dir.parent.mkdir(parents=True, exist_ok=True)
77   - print(f"[convert] {name} -> {output_dir} ({quantization})")
78   - subprocess.run(
79   - [
80   - _resolve_converter_binary(),
81   - "--model",
82   - model_source,
83   - "--output_dir",
84   - str(output_dir),
85   - "--quantization",
86   - quantization,
87   - ],
88   - check=True,
89   - )
90   - print(f"[converted] {name}")
91   -
92   -
93   -def main() -> None:
94   - parser = argparse.ArgumentParser(description="Download local translation models")
95   - parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
96   - parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
97   - parser.add_argument(
98   - "--convert-ctranslate2",
99   - action="store_true",
100   - help="Also convert the downloaded Hugging Face models into CTranslate2 format",
101   - )
102   - args = parser.parse_args()
103   -
104   - selected = {item.strip().lower() for item in args.models if item.strip()} or None
105   - if not args.all_local and not selected:
106   - parser.error("pass --all-local or --models <name> ...")
107   -
108   - for name, capability in iter_local_capabilities(selected):
109   - model_id = str(capability.get("model_id") or "").strip()
110   - model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
111   - if not model_id or not model_dir:
112   - raise ValueError(f"Capability '{name}' must define model_id and model_dir")
113   - model_dir.parent.mkdir(parents=True, exist_ok=True)
114   - print(f"[download] {name} -> {model_dir} ({model_id})")
115   - snapshot_download(
116   - repo_id=model_id,
117   - local_dir=str(model_dir),
118   - )
119   - print(f"[done] {name}")
120   - if args.convert_ctranslate2:
121   - convert_to_ctranslate2(name, capability)
122 8  
123 9  
124 10 if __name__ == "__main__":
125   - main()
  11 + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py"
  12 + runpy.run_path(str(target), run_name="__main__")
... ...
scripts/frontend/frontend_server.py 0 โ†’ 100755
... ... @@ -0,0 +1,278 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +Simple HTTP server for saas-search frontend.
  4 +"""
  5 +
  6 +import http.server
  7 +import socketserver
  8 +import os
  9 +import sys
  10 +import logging
  11 +import time
  12 +import urllib.request
  13 +import urllib.error
  14 +from collections import defaultdict, deque
  15 +from pathlib import Path
  16 +from dotenv import load_dotenv
  17 +
  18 +# Load .env file
  19 +project_root = Path(__file__).resolve().parents[2]
  20 +load_dotenv(project_root / '.env')
  21 +
  22 +# Get API_BASE_URL from environment๏ผˆ้ป˜่ฎคไธๆณจๅ…ฅ๏ผŒ้ฟๅ…่ขซๆ—ง .env ่ฆ†็›–ๅŒๆบ็ญ–็•ฅ๏ผ‰
  23 +# ไป…ๅฝ“ๆ˜พๅผ่ฎพ็ฝฎ FRONTEND_INJECT_API_BASE_URL=1 ๆ—ถๆ‰ๆณจๅ…ฅ window.API_BASE_URLใ€‚
  24 +API_BASE_URL = os.getenv('API_BASE_URL') or None
  25 +INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
  26 +# Backend proxy target for same-origin API forwarding
  27 +BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')
  28 +
  29 +# Change to frontend directory
  30 +frontend_dir = os.path.join(project_root, 'frontend')
  31 +os.chdir(frontend_dir)
  32 +
  33 +# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
  34 +PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))
  35 +
  36 +# Configure logging to suppress scanner noise
  37 +logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
  38 +
  39 +class RateLimitingMixin:
  40 + """Mixin for rate limiting requests by IP address."""
  41 + request_counts = defaultdict(deque)
  42 + rate_limit = 100 # requests per minute
  43 + window = 60 # seconds
  44 +
  45 + @classmethod
  46 + def is_rate_limited(cls, ip):
  47 + now = time.time()
  48 +
  49 + # Clean old requests
  50 + while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
  51 + cls.request_counts[ip].popleft()
  52 +
  53 + # Check rate limit
  54 + if len(cls.request_counts[ip]) > cls.rate_limit:
  55 + return True
  56 +
  57 + cls.request_counts[ip].append(now)
  58 + return False
  59 +
  60 +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
  61 + """Custom request handler with CORS support and robust error handling."""
  62 +
  63 + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer"
  64 +
  65 + def _is_proxy_path(self, path: str) -> bool:
  66 + """Return True for API paths that should be forwarded to backend service."""
  67 + return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
  68 +
  69 + def _proxy_to_backend(self):
  70 + """Proxy current request to backend service on the GPU server."""
  71 + target_url = f"{BACKEND_PROXY_URL}{self.path}"
  72 + method = self.command.upper()
  73 +
  74 + try:
  75 + content_length = int(self.headers.get('Content-Length', '0'))
  76 + except ValueError:
  77 + content_length = 0
  78 + body = self.rfile.read(content_length) if content_length > 0 else None
  79 +
  80 + forward_headers = {}
  81 + for key, value in self.headers.items():
  82 + lk = key.lower()
  83 + if lk in ('host', 'content-length', 'connection'):
  84 + continue
  85 + forward_headers[key] = value
  86 +
  87 + req = urllib.request.Request(
  88 + target_url,
  89 + data=body,
  90 + headers=forward_headers,
  91 + method=method,
  92 + )
  93 +
  94 + try:
  95 + with urllib.request.urlopen(req, timeout=30) as resp:
  96 + resp_body = resp.read()
  97 + self.send_response(resp.getcode())
  98 + for header, value in resp.getheaders():
  99 + lh = header.lower()
  100 + if lh in ('transfer-encoding', 'connection', 'content-length'):
  101 + continue
  102 + self.send_header(header, value)
  103 + self.end_headers()
  104 + self.wfile.write(resp_body)
  105 + except urllib.error.HTTPError as e:
  106 + err_body = e.read() if hasattr(e, 'read') else b''
  107 + self.send_response(e.code)
  108 + if e.headers:
  109 + for header, value in e.headers.items():
  110 + lh = header.lower()
  111 + if lh in ('transfer-encoding', 'connection', 'content-length'):
  112 + continue
  113 + self.send_header(header, value)
  114 + self.end_headers()
  115 + if err_body:
  116 + self.wfile.write(err_body)
  117 + except Exception as e:
  118 + logging.error(f"Backend proxy error for {method} {self.path}: {e}")
  119 + self.send_response(502)
  120 + self.send_header('Content-Type', 'application/json; charset=utf-8')
  121 + self.end_headers()
  122 + self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')
  123 +
  124 + def do_GET(self):
  125 + """Handle GET requests with API config injection."""
  126 + path = self.path.split('?')[0]
  127 +
  128 + # Proxy API paths to backend first
  129 + if self._is_proxy_path(path):
  130 + self._proxy_to_backend()
  131 + return
  132 +
  133 + # Route / to index.html
  134 + if path == '/' or path == '':
  135 + self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
  136 +
  137 + # Inject API config for HTML files
  138 + if self.path.endswith('.html'):
  139 + self._serve_html_with_config()
  140 + else:
  141 + super().do_GET()
  142 +
  143 + def _serve_html_with_config(self):
  144 + """Serve HTML with optional API_BASE_URL injected."""
  145 + try:
  146 + file_path = self.path.lstrip('/')
  147 + if not os.path.exists(file_path):
  148 + self.send_error(404)
  149 + return
  150 +
  151 + with open(file_path, 'r', encoding='utf-8') as f:
  152 + html = f.read()
  153 +
  154 + # ้ป˜่ฎคไธๆณจๅ…ฅ API_BASE_URL๏ผŒ้ฟๅ…ๅކๅฒ .env๏ผˆๅฆ‚ http://xx:6002๏ผ‰่ฆ†็›–ๅŒๆบ่ฐƒ็”จใ€‚
  155 + # ไป…ๅฝ“ FRONTEND_INJECT_API_BASE_URL=1 ไธ” API_BASE_URL ๆœ‰ๅ€ผๆ—ถๆ‰ๆณจๅ…ฅใ€‚
  156 + if INJECT_API_BASE_URL and API_BASE_URL:
  157 + config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n '
  158 + html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
  159 +
  160 + self.send_response(200)
  161 + self.send_header('Content-Type', 'text/html; charset=utf-8')
  162 + self.end_headers()
  163 + self.wfile.write(html.encode('utf-8'))
  164 + except Exception as e:
  165 + logging.error(f"Error serving HTML: {e}")
  166 + self.send_error(500)
  167 +
  168 + def do_POST(self):
  169 + """Handle POST requests. Proxy API requests to backend."""
  170 + path = self.path.split('?')[0]
  171 + if self._is_proxy_path(path):
  172 + self._proxy_to_backend()
  173 + return
  174 + self.send_error(405, "Method Not Allowed")
  175 +
  176 + def setup(self):
  177 + """Setup with error handling."""
  178 + try:
  179 + super().setup()
  180 + except Exception:
  181 + pass # Silently handle setup errors from scanners
  182 +
  183 + def handle_one_request(self):
  184 + """Handle single request with error catching."""
  185 + try:
  186 + # Check rate limiting
  187 + client_ip = self.client_address[0]
  188 + if self.is_rate_limited(client_ip):
  189 + logging.warning(f"Rate limiting IP: {client_ip}")
  190 + self.send_error(429, "Too Many Requests")
  191 + return
  192 +
  193 + super().handle_one_request()
  194 + except (ConnectionResetError, BrokenPipeError):
  195 + # Client disconnected prematurely - common with scanners
  196 + pass
  197 + except UnicodeDecodeError:
  198 + # Binary data received - not HTTP
  199 + pass
  200 + except Exception as e:
  201 + # Log unexpected errors but don't crash
  202 + logging.debug(f"Request handling error: {e}")
  203 +
  204 + def log_message(self, format, *args):
  205 + """Suppress logging for malformed requests from scanners."""
  206 + message = format % args
  207 + # Filter out scanner noise
  208 + noise_patterns = [
  209 + "code 400",
  210 + "Bad request",
  211 + "Bad request version",
  212 + "Bad HTTP/0.9 request type",
  213 + "Bad request syntax"
  214 + ]
  215 + if any(pattern in message for pattern in noise_patterns):
  216 + return
  217 + # Only log legitimate requests
  218 + if message and not message.startswith(" ") and len(message) > 10:
  219 + super().log_message(format, *args)
  220 +
  221 + def end_headers(self):
  222 + # Add CORS headers
  223 + self.send_header('Access-Control-Allow-Origin', '*')
  224 + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
  225 + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS)
  226 + # Add security headers
  227 + self.send_header('X-Content-Type-Options', 'nosniff')
  228 + self.send_header('X-Frame-Options', 'DENY')
  229 + self.send_header('X-XSS-Protection', '1; mode=block')
  230 + super().end_headers()
  231 +
  232 + def do_OPTIONS(self):
  233 + """Handle OPTIONS requests."""
  234 + try:
  235 + path = self.path.split('?')[0]
  236 + if self._is_proxy_path(path):
  237 + self.send_response(204)
  238 + self.end_headers()
  239 + return
  240 + self.send_response(200)
  241 + self.end_headers()
  242 + except Exception:
  243 + pass
  244 +
  245 +class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
  246 + """Threaded TCP server with better error handling."""
  247 + allow_reuse_address = True
  248 + daemon_threads = True
  249 +
  250 +if __name__ == '__main__':
  251 + # Check if port is already in use
  252 + import socket
  253 + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  254 + try:
  255 + sock.bind(("", PORT))
  256 + sock.close()
  257 + except OSError:
  258 + print(f"ERROR: Port {PORT} is already in use.")
  259 + print(f"Please stop the existing server or use a different port.")
  260 + print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
  261 + sys.exit(1)
  262 +
  263 + # Create threaded server for better concurrency
  264 + with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
  265 + print(f"Frontend server started at http://localhost:{PORT}")
  266 + print(f"Serving files from: {os.getcwd()}")
  267 + print("\nPress Ctrl+C to stop the server")
  268 +
  269 + try:
  270 + httpd.serve_forever()
  271 + except KeyboardInterrupt:
  272 + print("\nShutting down server...")
  273 + httpd.shutdown()
  274 + print("Server stopped")
  275 + sys.exit(0)
  276 + except Exception as e:
  277 + print(f"Server error: {e}")
  278 + sys.exit(1)
... ...
scripts/setup_translator_venv.sh
... ... @@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot;
8 8 cd "${PROJECT_ROOT}"
9 9  
10 10 VENV_DIR="${PROJECT_ROOT}/.venv-translator"
11   -PYTHON_BIN="${PYTHON_BIN:-python3}"
12 11 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}"
  12 +MIN_PYTHON_MAJOR=3
  13 +MIN_PYTHON_MINOR=10
  14 +
  15 +python_meets_minimum() {
  16 + local bin="$1"
  17 + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}"
  18 +import sys
  19 +
  20 +required = tuple(int(value) for value in sys.argv[1:])
  21 +sys.exit(0 if sys.version_info[:2] >= required else 1)
  22 +PY
  23 +}
  24 +
  25 +discover_python_bin() {
  26 + local candidates=()
  27 +
  28 + if [[ -n "${PYTHON_BIN:-}" ]]; then
  29 + candidates+=("${PYTHON_BIN}")
  30 + fi
  31 + candidates+=("python3.12" "python3.11" "python3.10" "python3")
  32 +
  33 + local candidate
  34 + for candidate in "${candidates[@]}"; do
  35 + if ! command -v "${candidate}" >/dev/null 2>&1; then
  36 + continue
  37 + fi
  38 + if python_meets_minimum "${candidate}"; then
  39 + echo "${candidate}"
  40 + return 0
  41 + fi
  42 + done
  43 +
  44 + return 1
  45 +}
  46 +
  47 +if ! PYTHON_BIN="$(discover_python_bin)"; then
  48 + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2
  49 + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2
  50 + exit 1
  51 +fi
13 52  
14 53 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
15 54 echo "ERROR: python not found: ${PYTHON_BIN}" >&2
... ... @@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot;
32 71 export TMPDIR="${TMP_DIR}"
33 72 PIP_ARGS=(--no-cache-dir)
34 73  
  74 +echo "Using Python=${PYTHON_BIN}"
35 75 echo "Using TMPDIR=${TMPDIR}"
36 76 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel
37 77 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt
... ... @@ -39,5 +79,5 @@ echo &quot;Using TMPDIR=${TMPDIR}&quot;
39 79 echo
40 80 echo "Done."
41 81 echo "Translator venv: ${VENV_DIR}"
42   -echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local"
  82 +echo "Download local models: ./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local"
43 83 echo "Start service: ./scripts/start_translator.sh"
... ...
scripts/translation/download_translation_models.py 0 โ†’ 100755
... ... @@ -0,0 +1,100 @@
  1 +#!/usr/bin/env python3
  2 +"""Download local translation models declared in services.translation.capabilities."""
  3 +
  4 +from __future__ import annotations
  5 +
  6 +import argparse
  7 +import os
  8 +from pathlib import Path
  9 +import sys
  10 +from typing import Iterable
  11 +
  12 +from huggingface_hub import snapshot_download
  13 +
  14 +PROJECT_ROOT = Path(__file__).resolve().parents[2]
  15 +if str(PROJECT_ROOT) not in sys.path:
  16 + sys.path.insert(0, str(PROJECT_ROOT))
  17 +os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
  18 +
  19 +from config.services_config import get_translation_config
  20 +from translation.ct2_conversion import convert_transformers_model
  21 +
  22 +
  23 +LOCAL_BACKENDS = {"local_nllb", "local_marian"}
  24 +
  25 +
  26 +def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
  27 + cfg = get_translation_config()
  28 + capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
  29 + for name, capability in capabilities.items():
  30 + backend = str(capability.get("backend") or "").strip().lower()
  31 + if backend not in LOCAL_BACKENDS:
  32 + continue
  33 + if selected and name not in selected:
  34 + continue
  35 + yield name, capability
  36 +
  37 +
  38 +def _compute_ct2_output_dir(capability: dict) -> Path:
  39 + custom = str(capability.get("ct2_model_dir") or "").strip()
  40 + if custom:
  41 + return Path(custom).expanduser()
  42 + model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
  43 + compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
  44 + normalized = compute_type.replace("_", "-")
  45 + return model_dir / f"ctranslate2-{normalized}"
  46 +
  47 +
  48 +def convert_to_ctranslate2(name: str, capability: dict) -> None:
  49 + model_id = str(capability.get("model_id") or "").strip()
  50 + model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
  51 + model_source = str(model_dir if model_dir.exists() else model_id)
  52 + output_dir = _compute_ct2_output_dir(capability)
  53 + if (output_dir / "model.bin").exists():
  54 + print(f"[skip-convert] {name} -> {output_dir}")
  55 + return
  56 + quantization = str(
  57 + capability.get("ct2_conversion_quantization")
  58 + or capability.get("ct2_compute_type")
  59 + or capability.get("torch_dtype")
  60 + or "default"
  61 + ).strip()
  62 + output_dir.parent.mkdir(parents=True, exist_ok=True)
  63 + print(f"[convert] {name} -> {output_dir} ({quantization})")
  64 + convert_transformers_model(model_source, str(output_dir), quantization)
  65 + print(f"[converted] {name}")
  66 +
  67 +
  68 +def main() -> None:
  69 + parser = argparse.ArgumentParser(description="Download local translation models")
  70 + parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
  71 + parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
  72 + parser.add_argument(
  73 + "--convert-ctranslate2",
  74 + action="store_true",
  75 + help="Also convert the downloaded Hugging Face models into CTranslate2 format",
  76 + )
  77 + args = parser.parse_args()
  78 +
  79 + selected = {item.strip().lower() for item in args.models if item.strip()} or None
  80 + if not args.all_local and not selected:
  81 + parser.error("pass --all-local or --models <name> ...")
  82 +
  83 + for name, capability in iter_local_capabilities(selected):
  84 + model_id = str(capability.get("model_id") or "").strip()
  85 + model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
  86 + if not model_id or not model_dir:
  87 + raise ValueError(f"Capability '{name}' must define model_id and model_dir")
  88 + model_dir.parent.mkdir(parents=True, exist_ok=True)
  89 + print(f"[download] {name} -> {model_dir} ({model_id})")
  90 + snapshot_download(
  91 + repo_id=model_id,
  92 + local_dir=str(model_dir),
  93 + )
  94 + print(f"[done] {name}")
  95 + if args.convert_ctranslate2:
  96 + convert_to_ctranslate2(name, capability)
  97 +
  98 +
  99 +if __name__ == "__main__":
  100 + main()
... ...
tests/test_translation_converter_resolution.py 0 โ†’ 100644
... ... @@ -0,0 +1,85 @@
  1 +from __future__ import annotations
  2 +
  3 +import sys
  4 +import types
  5 +
  6 +import pytest
  7 +
  8 +import translation.ct2_conversion as ct2_conversion
  9 +
  10 +
  11 +class _FakeTransformersConverter:
  12 + def __init__(self, model_name_or_path):
  13 + self.model_name_or_path = model_name_or_path
  14 + self.load_calls = []
  15 +
  16 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  17 + self.load_calls.append(
  18 + {
  19 + "model_class": model_class,
  20 + "resolved_model_name_or_path": resolved_model_name_or_path,
  21 + "kwargs": dict(kwargs),
  22 + }
  23 + )
  24 + if "dtype" in kwargs or "torch_dtype" in kwargs:
  25 + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'")
  26 + return {"loaded": True, "path": resolved_model_name_or_path}
  27 +
  28 + def convert(self, output_dir, quantization=None, force=False):
  29 + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32")
  30 + return {
  31 + "loaded": loaded,
  32 + "output_dir": output_dir,
  33 + "quantization": quantization,
  34 + "force": force,
  35 + "load_calls": list(self.load_calls),
  36 + }
  37 +
  38 +
  39 +def _install_fake_ctranslate2(monkeypatch, base_converter):
  40 + converters_module = types.ModuleType("ctranslate2.converters")
  41 + converters_module.TransformersConverter = base_converter
  42 + ctranslate2_module = types.ModuleType("ctranslate2")
  43 + ctranslate2_module.converters = converters_module
  44 +
  45 + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module)
  46 + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module)
  47 +
  48 +
  49 +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch):
  50 + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter)
  51 + fake_transformers = types.ModuleType("transformers")
  52 + fake_transformers.AutoConfig = types.SimpleNamespace(
  53 + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path)
  54 + )
  55 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  56 +
  57 + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
  58 +
  59 + assert result["loaded"] == {"loaded": True, "path": "fake-model"}
  60 + assert result["output_dir"] == "/tmp/out"
  61 + assert result["quantization"] == "float16"
  62 + assert result["force"] is False
  63 + assert len(result["load_calls"]) == 2
  64 + assert result["load_calls"][0] == {
  65 + "model_class": "FakeModel",
  66 + "resolved_model_name_or_path": "fake-model",
  67 + "kwargs": {"dtype": "float32"},
  68 + }
  69 + assert result["load_calls"][1]["model_class"] == "FakeModel"
  70 + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model"
  71 + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None
  72 +
  73 +
  74 +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch):
  75 + class _AlwaysFailingConverter(_FakeTransformersConverter):
  76 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  77 + raise TypeError("different constructor error")
  78 +
  79 + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter)
  80 + fake_transformers = types.ModuleType("transformers")
  81 + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path))
  82 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  83 +
  84 + with pytest.raises(TypeError, match="different constructor error"):
  85 + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
... ...
tests/test_translation_local_backends.py
... ... @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch):
201 201 assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]]
202 202  
203 203  
  204 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch):
  205 + wrong_dir = tmp_path / "wrong-nllb"
  206 + wrong_dir.mkdir()
  207 + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8")
  208 +
  209 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  210 +
  211 + backend = NLLBCTranslate2TranslationBackend(
  212 + name="nllb-200-distilled-600m",
  213 + model_id="facebook/nllb-200-distilled-600M",
  214 + model_dir=str(wrong_dir),
  215 + device="cpu",
  216 + torch_dtype="float32",
  217 + batch_size=1,
  218 + max_input_length=16,
  219 + max_new_tokens=16,
  220 + num_beams=1,
  221 + )
  222 +
  223 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  224 + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M"
  225 +
  226 +
  227 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch):
  228 + incomplete_dir = tmp_path / "incomplete-nllb"
  229 + incomplete_dir.mkdir()
  230 + (incomplete_dir / "ctranslate2-float16").mkdir()
  231 +
  232 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  233 +
  234 + backend = NLLBCTranslate2TranslationBackend(
  235 + name="nllb-200-distilled-600m",
  236 + model_id="facebook/nllb-200-distilled-600M",
  237 + model_dir=str(incomplete_dir),
  238 + device="cpu",
  239 + torch_dtype="float32",
  240 + batch_size=1,
  241 + max_input_length=16,
  242 + max_new_tokens=16,
  243 + num_beams=1,
  244 + )
  245 +
  246 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  247 +
  248 +
204 249 def test_nllb_resolves_flores_short_tags_and_iso_no():
205 250 cat = build_nllb_language_catalog(None)
206 251 assert resolve_nllb_language_code("ca", cat) == "cat_Latn"
... ...
tests/test_translator_failure_semantics.py
... ... @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog):
197 197 ]
198 198  
199 199  
  200 +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch):
  201 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  202 +
  203 + def _fake_create_backend(self, *, name, backend_type, cfg):
  204 + del self, backend_type, cfg
  205 + if name == "broken-nllb":
  206 + raise RuntimeError("broken model dir")
  207 +
  208 + class _Backend:
  209 + model = name
  210 +
  211 + @property
  212 + def supports_batch(self):
  213 + return True
  214 +
  215 + def translate(self, text, target_lang, source_lang=None, scene=None):
  216 + del target_lang, source_lang, scene
  217 + return text
  218 +
  219 + return _Backend()
  220 +
  221 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  222 + service = TranslationService(
  223 + {
  224 + "service_url": "http://127.0.0.1:6006",
  225 + "timeout_sec": 10.0,
  226 + "default_model": "llm",
  227 + "default_scene": "general",
  228 + "capabilities": {
  229 + "llm": {
  230 + "enabled": True,
  231 + "backend": "llm",
  232 + "model": "dummy-llm",
  233 + "base_url": "https://example.com",
  234 + "timeout_sec": 10.0,
  235 + "use_cache": True,
  236 + },
  237 + "broken-nllb": {
  238 + "enabled": True,
  239 + "backend": "local_nllb",
  240 + "model_id": "dummy",
  241 + "model_dir": "dummy",
  242 + "device": "cpu",
  243 + "torch_dtype": "float32",
  244 + "batch_size": 8,
  245 + "max_input_length": 16,
  246 + "max_new_tokens": 16,
  247 + "num_beams": 1,
  248 + "use_cache": True,
  249 + },
  250 + },
  251 + "cache": {
  252 + "ttl_seconds": 60,
  253 + "sliding_expiration": True,
  254 + },
  255 + }
  256 + )
  257 +
  258 + assert service.available_models == ["llm", "broken-nllb"]
  259 + assert service.loaded_models == ["llm"]
  260 + assert service.failed_models == ["broken-nllb"]
  261 + assert service.backend_errors["broken-nllb"] == "broken model dir"
  262 +
  263 + with pytest.raises(RuntimeError, match="failed to initialize"):
  264 + service.get_backend("broken-nllb")
  265 +
  266 +
200 267 def test_translation_cache_probe_models_order():
201 268 cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}}
202 269 assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"]
... ...
translation/backends/local_ctranslate2.py
... ... @@ -4,9 +4,7 @@ from __future__ import annotations
4 4  
5 5 import logging
6 6 import os
7   -import shutil
8   -import subprocess
9   -import sys
  7 +import json
10 8 import threading
11 9 from pathlib import Path
12 10 from typing import Dict, List, Optional, Sequence, Union
... ... @@ -24,6 +22,7 @@ from translation.text_splitter import (
24 22 join_translated_segments,
25 23 split_text_for_translation,
26 24 )
  25 +from translation.ct2_conversion import convert_transformers_model
27 26  
28 27 logger = logging.getLogger(__name__)
29 28  
... ... @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str:
76 75 return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}")
77 76  
78 77  
79   -def _resolve_converter_binary() -> str:
80   - candidate = shutil.which("ct2-transformers-converter")
81   - if candidate:
82   - return candidate
83   - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
84   - if venv_candidate.exists():
85   - return str(venv_candidate)
86   - raise RuntimeError(
87   - "ct2-transformers-converter was not found. "
88   - "Ensure ctranslate2 is installed in the active translator environment."
89   - )
  78 +def _detect_local_model_type(model_dir: str) -> Optional[str]:
  79 + config_path = Path(model_dir).expanduser() / "config.json"
  80 + if not config_path.exists():
  81 + return None
  82 + try:
  83 + with open(config_path, "r", encoding="utf-8") as handle:
  84 + payload = json.load(handle) or {}
  85 + except Exception as exc:
  86 + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc)
  87 + return None
  88 + model_type = str(payload.get("model_type") or "").strip().lower()
  89 + return model_type or None
90 90  
91 91  
92 92 class LocalCTranslate2TranslationBackend:
... ... @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend:
144 144 self.ct2_decoding_length_extra = int(ct2_decoding_length_extra)
145 145 self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min))
146 146 self._tokenizer_lock = threading.Lock()
  147 + self._local_model_source = self._resolve_local_model_source()
147 148 self._load_runtime()
148 149  
149 150 @property
... ... @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend:
151 152 return True
152 153  
153 154 def _tokenizer_source(self) -> str:
154   - return self.model_dir if os.path.exists(self.model_dir) else self.model_id
  155 + return self._local_model_source or self.model_id
155 156  
156 157 def _model_source(self) -> str:
157   - return self.model_dir if os.path.exists(self.model_dir) else self.model_id
  158 + return self._local_model_source or self.model_id
  159 +
  160 + def _expected_local_model_types(self) -> Optional[set[str]]:
  161 + return None
  162 +
  163 + def _resolve_local_model_source(self) -> Optional[str]:
  164 + model_path = Path(self.model_dir).expanduser()
  165 + if not model_path.exists():
  166 + return None
  167 + if not (model_path / "config.json").exists():
  168 + logger.warning(
  169 + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id",
  170 + self.model,
  171 + model_path,
  172 + )
  173 + return None
  174 +
  175 + expected_types = self._expected_local_model_types()
  176 + if not expected_types:
  177 + return str(model_path)
  178 +
  179 + detected_type = _detect_local_model_type(str(model_path))
  180 + if detected_type is None:
  181 + return str(model_path)
  182 + if detected_type in expected_types:
  183 + return str(model_path)
  184 +
  185 + logger.warning(
  186 + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id",
  187 + self.model,
  188 + model_path,
  189 + detected_type,
  190 + sorted(expected_types),
  191 + )
  192 + return None
158 193  
159 194 def _tokenizer_kwargs(self) -> Dict[str, object]:
160 195 return {}
... ... @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend:
204 239 )
205 240  
206 241 ct2_path.parent.mkdir(parents=True, exist_ok=True)
207   - converter = _resolve_converter_binary()
208 242 logger.info(
209 243 "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s",
210 244 self.model,
... ... @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend:
213 247 self.ct2_conversion_quantization,
214 248 )
215 249 try:
216   - subprocess.run(
217   - [
218   - converter,
219   - "--model",
220   - model_source,
221   - "--output_dir",
222   - str(ct2_path),
223   - "--quantization",
224   - self.ct2_conversion_quantization,
225   - ],
226   - check=True,
227   - stdout=subprocess.PIPE,
228   - stderr=subprocess.PIPE,
229   - text=True,
  250 + convert_transformers_model(
  251 + model_source,
  252 + str(ct2_path),
  253 + self.ct2_conversion_quantization,
230 254 )
231   - except subprocess.CalledProcessError as exc:
232   - stderr = exc.stderr.strip()
  255 + except Exception as exc:
233 256 raise RuntimeError(
234   - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}"
  257 + f"Failed to convert model '{self.model}' to CTranslate2: {exc}"
235 258 ) from exc
236 259  
237 260 def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]:
... ... @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
557 580 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}"
558 581 )
559 582  
  583 + def _expected_local_model_types(self) -> Optional[set[str]]:
  584 + return {"marian"}
  585 +
560 586  
561 587 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
562 588 """Local backend for NLLB models on CTranslate2."""
... ... @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
619 645 if resolve_nllb_language_code(target_lang, self.language_codes) is None:
620 646 raise ValueError(f"Unsupported NLLB target language: {target_lang}")
621 647  
  648 + def _expected_local_model_types(self) -> Optional[set[str]]:
  649 + return {"m2m_100", "nllb_moe"}
  650 +
622 651 def _get_tokenizer_for_source(self, source_lang: str):
623 652 src_code = resolve_nllb_language_code(source_lang, self.language_codes)
624 653 if src_code is None:
... ...
translation/ct2_conversion.py 0 โ†’ 100644
... ... @@ -0,0 +1,52 @@
  1 +"""Helpers for converting Hugging Face translation models to CTranslate2."""
  2 +
  3 +from __future__ import annotations
  4 +
  5 +import copy
  6 +import logging
  7 +
  8 +logger = logging.getLogger(__name__)
  9 +
  10 +
  11 +def convert_transformers_model(
  12 + model_name_or_path: str,
  13 + output_dir: str,
  14 + quantization: str,
  15 + *,
  16 + force: bool = False,
  17 +) -> str:
  18 + from ctranslate2.converters import TransformersConverter
  19 + from transformers import AutoConfig
  20 +
  21 + class _CompatibleTransformersConverter(TransformersConverter):
  22 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  23 + try:
  24 + return super().load_model(model_class, resolved_model_name_or_path, **kwargs)
  25 + except TypeError as exc:
  26 + if "unexpected keyword argument 'dtype'" not in str(exc):
  27 + raise
  28 + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None:
  29 + raise
  30 +
  31 + logger.warning(
  32 + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s",
  33 + resolved_model_name_or_path,
  34 + getattr(model_class, "__name__", model_class),
  35 + )
  36 + retry_kwargs = dict(kwargs)
  37 + retry_kwargs.pop("dtype", None)
  38 + retry_kwargs.pop("torch_dtype", None)
  39 + config = retry_kwargs.get("config")
  40 + if config is None:
  41 + config = AutoConfig.from_pretrained(resolved_model_name_or_path)
  42 + else:
  43 + config = copy.deepcopy(config)
  44 + if hasattr(config, "dtype"):
  45 + config.dtype = None
  46 + if hasattr(config, "torch_dtype"):
  47 + config.torch_dtype = None
  48 + retry_kwargs["config"] = config
  49 + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs)
  50 +
  51 + converter = _CompatibleTransformersConverter(model_name_or_path)
  52 + return converter.convert(output_dir=output_dir, quantization=quantization, force=force)
... ...
translation/service.py
... ... @@ -31,7 +31,12 @@ class TranslationService:
31 31 if not self._enabled_capabilities:
32 32 raise ValueError("No enabled translation backends found in services.translation.capabilities")
33 33 self._translation_cache = TranslationCache(self.config["cache"])
34   - self._backends = self._initialize_backends()
  34 + self._backends: Dict[str, TranslationBackendProtocol] = {}
  35 + self._backend_errors: Dict[str, str] = {}
  36 + self._initialize_backends()
  37 + if not self._backends:
  38 + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error"
  39 + raise RuntimeError(f"No translation backends could be initialized: {details}")
35 40  
36 41 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
37 42 enabled: Dict[str, Dict[str, object]] = {}
... ... @@ -62,24 +67,47 @@ class TranslationService:
62 67 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
63 68 return factory(name=name, cfg=cfg)
64 69  
65   - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:
66   - backends: Dict[str, TranslationBackendProtocol] = {}
67   - for name, capability_cfg in self._enabled_capabilities.items():
68   - backend_type = str(capability_cfg["backend"])
69   - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
70   - backends[name] = self._create_backend(
  70 + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]:
  71 + capability_cfg = self._enabled_capabilities.get(name)
  72 + if capability_cfg is None:
  73 + return None
  74 + if name in self._backends:
  75 + return self._backends[name]
  76 +
  77 + backend_type = str(capability_cfg["backend"])
  78 + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
  79 + try:
  80 + backend = self._create_backend(
71 81 name=name,
72 82 backend_type=backend_type,
73 83 cfg=capability_cfg,
74 84 )
75   - logger.info(
76   - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  85 + except Exception as exc:
  86 + error_text = str(exc).strip() or exc.__class__.__name__
  87 + self._backend_errors[name] = error_text
  88 + logger.error(
  89 + "Translation backend initialization failed | model=%s backend=%s error=%s",
77 90 name,
78 91 backend_type,
79   - bool(capability_cfg.get("use_cache")),
80   - getattr(backends[name], "model", name),
  92 + error_text,
  93 + exc_info=True,
81 94 )
82   - return backends
  95 + return None
  96 +
  97 + self._backends[name] = backend
  98 + self._backend_errors.pop(name, None)
  99 + logger.info(
  100 + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  101 + name,
  102 + backend_type,
  103 + bool(capability_cfg.get("use_cache")),
  104 + getattr(backend, "model", name),
  105 + )
  106 + return backend
  107 +
  108 + def _initialize_backends(self) -> None:
  109 + for name, capability_cfg in self._enabled_capabilities.items():
  110 + self._load_backend(name)
83 111  
84 112 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
85 113 from translation.backends.qwen_mt import QwenMTTranslationBackend
... ... @@ -178,13 +206,27 @@ class TranslationService:
178 206 def loaded_models(self) -> List[str]:
179 207 return list(self._backends.keys())
180 208  
  209 + @property
  210 + def failed_models(self) -> List[str]:
  211 + return list(self._backend_errors.keys())
  212 +
  213 + @property
  214 + def backend_errors(self) -> Dict[str, str]:
  215 + return dict(self._backend_errors)
  216 +
181 217 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
182 218 normalized = normalize_translation_model(self.config, model)
183   - backend = self._backends.get(normalized)
  219 + backend = self._backends.get(normalized) or self._load_backend(normalized)
184 220 if backend is None:
185   - raise ValueError(
186   - f"Translation model '{normalized}' is not enabled. "
187   - f"Available models: {', '.join(self.available_models) or 'none'}"
  221 + if normalized not in self._enabled_capabilities:
  222 + raise ValueError(
  223 + f"Translation model '{normalized}' is not enabled. "
  224 + f"Available models: {', '.join(self.available_models) or 'none'}"
  225 + )
  226 + error_text = self._backend_errors.get(normalized) or "unknown initialization error"
  227 + raise RuntimeError(
  228 + f"Translation model '{normalized}' failed to initialize: {error_text}. "
  229 + f"Loaded models: {', '.join(self.loaded_models) or 'none'}"
188 230 )
189 231 return backend
190 232  
... ...