Commit f07947a5d510f29a6f588b84e3734ab5f0310a38
1 parent
0ba0e0fc
Improve portability and harden public frontend search
Showing
14 changed files
with
465 additions
and
85 deletions
Show diff stats
api/translator_app.py
| @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): | @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): | ||
| 271 | """Initialize all enabled translation backends on process startup.""" | 271 | """Initialize all enabled translation backends on process startup.""" |
| 272 | logger.info("Starting Translation Service API") | 272 | logger.info("Starting Translation Service API") |
| 273 | service = get_translation_service() | 273 | service = get_translation_service() |
| 274 | + failed_models = list(getattr(service, "failed_models", [])) | ||
| 275 | + backend_errors = dict(getattr(service, "backend_errors", {})) | ||
| 274 | logger.info( | 276 | logger.info( |
| 275 | - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", | 277 | + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s", |
| 276 | service.config["default_model"], | 278 | service.config["default_model"], |
| 277 | service.config["default_scene"], | 279 | service.config["default_scene"], |
| 278 | service.available_models, | 280 | service.available_models, |
| 279 | service.loaded_models, | 281 | service.loaded_models, |
| 282 | + failed_models, | ||
| 280 | ) | 283 | ) |
| 281 | logger.info( | 284 | logger.info( |
| 282 | - "Translation backends initialized on startup | models=%s", | 285 | + "Translation backends initialized on startup | loaded=%s failed=%s", |
| 283 | service.loaded_models, | 286 | service.loaded_models, |
| 287 | + backend_errors, | ||
| 284 | ) | 288 | ) |
| 285 | verbose_logger.info( | 289 | verbose_logger.info( |
| 286 | "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", | 290 | "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", |
| @@ -316,11 +320,14 @@ async def health_check(): | @@ -316,11 +320,14 @@ async def health_check(): | ||
| 316 | """Health check endpoint.""" | 320 | """Health check endpoint.""" |
| 317 | try: | 321 | try: |
| 318 | service = get_translation_service() | 322 | service = get_translation_service() |
| 323 | + failed_models = list(getattr(service, "failed_models", [])) | ||
| 324 | + backend_errors = dict(getattr(service, "backend_errors", {})) | ||
| 319 | logger.info( | 325 | logger.info( |
| 320 | - "Health check | default_model=%s default_scene=%s loaded_models=%s", | 326 | + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s", |
| 321 | service.config["default_model"], | 327 | service.config["default_model"], |
| 322 | service.config["default_scene"], | 328 | service.config["default_scene"], |
| 323 | service.loaded_models, | 329 | service.loaded_models, |
| 330 | + failed_models, | ||
| 324 | ) | 331 | ) |
| 325 | return { | 332 | return { |
| 326 | "status": "healthy", | 333 | "status": "healthy", |
| @@ -330,6 +337,8 @@ async def health_check(): | @@ -330,6 +337,8 @@ async def health_check(): | ||
| 330 | "available_models": service.available_models, | 337 | "available_models": service.available_models, |
| 331 | "enabled_capabilities": get_enabled_translation_models(service.config), | 338 | "enabled_capabilities": get_enabled_translation_models(service.config), |
| 332 | "loaded_models": service.loaded_models, | 339 | "loaded_models": service.loaded_models, |
| 340 | + "failed_models": failed_models, | ||
| 341 | + "backend_errors": backend_errors, | ||
| 333 | } | 342 | } |
| 334 | except Exception as e: | 343 | except Exception as e: |
| 335 | logger.error(f"Health check failed: {e}") | 344 | logger.error(f"Health check failed: {e}") |
| @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): | @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): | ||
| 463 | latency_ms = (time.perf_counter() - request_started) * 1000 | 472 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 464 | logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) | 473 | logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) |
| 465 | raise HTTPException(status_code=400, detail=str(e)) from e | 474 | raise HTTPException(status_code=400, detail=str(e)) from e |
| 475 | + except RuntimeError as e: | ||
| 476 | + latency_ms = (time.perf_counter() - request_started) * 1000 | ||
| 477 | + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms) | ||
| 478 | + raise HTTPException(status_code=503, detail=str(e)) from e | ||
| 466 | except Exception as e: | 479 | except Exception as e: |
| 467 | latency_ms = (time.perf_counter() - request_started) * 1000 | 480 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 468 | logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) | 481 | logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) |
config/loader.py
| @@ -655,6 +655,14 @@ class AppConfigLoader: | @@ -655,6 +655,14 @@ class AppConfigLoader: | ||
| 655 | 655 | ||
| 656 | translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} | 656 | translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} |
| 657 | normalized_translation = build_translation_config(translation_raw) | 657 | normalized_translation = build_translation_config(translation_raw) |
| 658 | + local_translation_backends = {"local_nllb", "local_marian"} | ||
| 659 | + for capability_name, capability_cfg in normalized_translation["capabilities"].items(): | ||
| 660 | + backend_name = str(capability_cfg.get("backend") or "").strip().lower() | ||
| 661 | + if backend_name not in local_translation_backends: | ||
| 662 | + continue | ||
| 663 | + for path_key in ("model_dir", "ct2_model_dir"): | ||
| 664 | + if capability_cfg.get(path_key) not in (None, ""): | ||
| 665 | + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve()) | ||
| 658 | translation_config = TranslationServiceConfig( | 666 | translation_config = TranslationServiceConfig( |
| 659 | endpoint=str(normalized_translation["service_url"]).rstrip("/"), | 667 | endpoint=str(normalized_translation["service_url"]).rstrip("/"), |
| 660 | timeout_sec=float(normalized_translation["timeout_sec"]), | 668 | timeout_sec=float(normalized_translation["timeout_sec"]), |
| @@ -749,7 +757,7 @@ class AppConfigLoader: | @@ -749,7 +757,7 @@ class AppConfigLoader: | ||
| 749 | port=port, | 757 | port=port, |
| 750 | backend=backend_name, | 758 | backend=backend_name, |
| 751 | runtime_dir=( | 759 | runtime_dir=( |
| 752 | - str(v) | 760 | + str(self._resolve_project_path_value(v).resolve()) |
| 753 | if (v := instance_raw.get("runtime_dir")) not in (None, "") | 761 | if (v := instance_raw.get("runtime_dir")) not in (None, "") |
| 754 | else None | 762 | else None |
| 755 | ), | 763 | ), |
| @@ -787,6 +795,12 @@ class AppConfigLoader: | @@ -787,6 +795,12 @@ class AppConfigLoader: | ||
| 787 | rerank=rerank_config, | 795 | rerank=rerank_config, |
| 788 | ) | 796 | ) |
| 789 | 797 | ||
| 798 | + def _resolve_project_path_value(self, value: Any) -> Path: | ||
| 799 | + candidate = Path(str(value)).expanduser() | ||
| 800 | + if candidate.is_absolute(): | ||
| 801 | + return candidate | ||
| 802 | + return self.project_root / candidate | ||
| 803 | + | ||
| 790 | def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: | 804 | def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: |
| 791 | if not isinstance(raw, dict): | 805 | if not isinstance(raw, dict): |
| 792 | raise ConfigurationError("tenant_config must be a mapping") | 806 | raise ConfigurationError("tenant_config must be a mapping") |
frontend/static/js/app.js
| @@ -316,7 +316,10 @@ async function performSearch(page = 1) { | @@ -316,7 +316,10 @@ async function performSearch(page = 1) { | ||
| 316 | document.getElementById('productGrid').innerHTML = ''; | 316 | document.getElementById('productGrid').innerHTML = ''; |
| 317 | 317 | ||
| 318 | try { | 318 | try { |
| 319 | - const response = await fetch(`${API_BASE_URL}/search/`, { | 319 | + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin); |
| 320 | + searchUrl.searchParams.set('tenant_id', tenantId); | ||
| 321 | + | ||
| 322 | + const response = await fetch(searchUrl.toString(), { | ||
| 320 | method: 'POST', | 323 | method: 'POST', |
| 321 | headers: { | 324 | headers: { |
| 322 | 'Content-Type': 'application/json', | 325 | 'Content-Type': 'application/json', |
requirements_translator_service.txt
| @@ -13,7 +13,8 @@ httpx>=0.24.0 | @@ -13,7 +13,8 @@ httpx>=0.24.0 | ||
| 13 | tqdm>=4.65.0 | 13 | tqdm>=4.65.0 |
| 14 | 14 | ||
| 15 | torch>=2.0.0 | 15 | torch>=2.0.0 |
| 16 | -transformers>=4.30.0 | 16 | +# Keep translator conversions on the last verified NLLB-compatible release line. |
| 17 | +transformers>=4.51.0,<4.52.0 | ||
| 17 | ctranslate2>=4.7.0 | 18 | ctranslate2>=4.7.0 |
| 18 | sentencepiece>=0.2.0 | 19 | sentencepiece>=0.2.0 |
| 19 | sacremoses>=0.1.1 | 20 | sacremoses>=0.1.1 |
| @@ -0,0 +1,12 @@ | @@ -0,0 +1,12 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +"""Backward-compatible entrypoint for translation model downloads.""" | ||
| 3 | + | ||
| 4 | +from __future__ import annotations | ||
| 5 | + | ||
| 6 | +import runpy | ||
| 7 | +from pathlib import Path | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +if __name__ == "__main__": | ||
| 11 | + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py" | ||
| 12 | + runpy.run_path(str(target), run_name="__main__") |
scripts/frontend/frontend_server.py
| @@ -60,6 +60,8 @@ class RateLimitingMixin: | @@ -60,6 +60,8 @@ class RateLimitingMixin: | ||
| 60 | class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): | 60 | class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): |
| 61 | """Custom request handler with CORS support and robust error handling.""" | 61 | """Custom request handler with CORS support and robust error handling.""" |
| 62 | 62 | ||
| 63 | + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer" | ||
| 64 | + | ||
| 63 | def _is_proxy_path(self, path: str) -> bool: | 65 | def _is_proxy_path(self, path: str) -> bool: |
| 64 | """Return True for API paths that should be forwarded to backend service.""" | 66 | """Return True for API paths that should be forwarded to backend service.""" |
| 65 | return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') | 67 | return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') |
| @@ -220,7 +222,7 @@ class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMix | @@ -220,7 +222,7 @@ class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMix | ||
| 220 | # Add CORS headers | 222 | # Add CORS headers |
| 221 | self.send_header('Access-Control-Allow-Origin', '*') | 223 | self.send_header('Access-Control-Allow-Origin', '*') |
| 222 | self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | 224 | self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') |
| 223 | - self.send_header('Access-Control-Allow-Headers', 'Content-Type') | 225 | + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS) |
| 224 | # Add security headers | 226 | # Add security headers |
| 225 | self.send_header('X-Content-Type-Options', 'nosniff') | 227 | self.send_header('X-Content-Type-Options', 'nosniff') |
| 226 | self.send_header('X-Frame-Options', 'DENY') | 228 | self.send_header('X-Frame-Options', 'DENY') |
scripts/setup_translator_venv.sh
| @@ -8,8 +8,47 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | @@ -8,8 +8,47 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | ||
| 8 | cd "${PROJECT_ROOT}" | 8 | cd "${PROJECT_ROOT}" |
| 9 | 9 | ||
| 10 | VENV_DIR="${PROJECT_ROOT}/.venv-translator" | 10 | VENV_DIR="${PROJECT_ROOT}/.venv-translator" |
| 11 | -PYTHON_BIN="${PYTHON_BIN:-python3}" | ||
| 12 | TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" | 11 | TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" |
| 12 | +MIN_PYTHON_MAJOR=3 | ||
| 13 | +MIN_PYTHON_MINOR=10 | ||
| 14 | + | ||
| 15 | +python_meets_minimum() { | ||
| 16 | + local bin="$1" | ||
| 17 | + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}" | ||
| 18 | +import sys | ||
| 19 | + | ||
| 20 | +required = tuple(int(value) for value in sys.argv[1:]) | ||
| 21 | +sys.exit(0 if sys.version_info[:2] >= required else 1) | ||
| 22 | +PY | ||
| 23 | +} | ||
| 24 | + | ||
| 25 | +discover_python_bin() { | ||
| 26 | + local candidates=() | ||
| 27 | + | ||
| 28 | + if [[ -n "${PYTHON_BIN:-}" ]]; then | ||
| 29 | + candidates+=("${PYTHON_BIN}") | ||
| 30 | + fi | ||
| 31 | + candidates+=("python3.12" "python3.11" "python3.10" "python3") | ||
| 32 | + | ||
| 33 | + local candidate | ||
| 34 | + for candidate in "${candidates[@]}"; do | ||
| 35 | + if ! command -v "${candidate}" >/dev/null 2>&1; then | ||
| 36 | + continue | ||
| 37 | + fi | ||
| 38 | + if python_meets_minimum "${candidate}"; then | ||
| 39 | + echo "${candidate}" | ||
| 40 | + return 0 | ||
| 41 | + fi | ||
| 42 | + done | ||
| 43 | + | ||
| 44 | + return 1 | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +if ! PYTHON_BIN="$(discover_python_bin)"; then | ||
| 48 | + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2 | ||
| 49 | + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2 | ||
| 50 | + exit 1 | ||
| 51 | +fi | ||
| 13 | 52 | ||
| 14 | if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then | 53 | if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then |
| 15 | echo "ERROR: python not found: ${PYTHON_BIN}" >&2 | 54 | echo "ERROR: python not found: ${PYTHON_BIN}" >&2 |
| @@ -32,6 +71,7 @@ mkdir -p "${TMP_DIR}" | @@ -32,6 +71,7 @@ mkdir -p "${TMP_DIR}" | ||
| 32 | export TMPDIR="${TMP_DIR}" | 71 | export TMPDIR="${TMP_DIR}" |
| 33 | PIP_ARGS=(--no-cache-dir) | 72 | PIP_ARGS=(--no-cache-dir) |
| 34 | 73 | ||
| 74 | +echo "Using Python=${PYTHON_BIN}" | ||
| 35 | echo "Using TMPDIR=${TMPDIR}" | 75 | echo "Using TMPDIR=${TMPDIR}" |
| 36 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel | 76 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel |
| 37 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt | 77 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt |
scripts/translation/download_translation_models.py
| @@ -6,8 +6,6 @@ from __future__ import annotations | @@ -6,8 +6,6 @@ from __future__ import annotations | ||
| 6 | import argparse | 6 | import argparse |
| 7 | import os | 7 | import os |
| 8 | from pathlib import Path | 8 | from pathlib import Path |
| 9 | -import shutil | ||
| 10 | -import subprocess | ||
| 11 | import sys | 9 | import sys |
| 12 | from typing import Iterable | 10 | from typing import Iterable |
| 13 | 11 | ||
| @@ -19,6 +17,7 @@ if str(PROJECT_ROOT) not in sys.path: | @@ -19,6 +17,7 @@ if str(PROJECT_ROOT) not in sys.path: | ||
| 19 | os.environ.setdefault("HF_HUB_DISABLE_XET", "1") | 17 | os.environ.setdefault("HF_HUB_DISABLE_XET", "1") |
| 20 | 18 | ||
| 21 | from config.services_config import get_translation_config | 19 | from config.services_config import get_translation_config |
| 20 | +from translation.ct2_conversion import convert_transformers_model | ||
| 22 | 21 | ||
| 23 | 22 | ||
| 24 | LOCAL_BACKENDS = {"local_nllb", "local_marian"} | 23 | LOCAL_BACKENDS = {"local_nllb", "local_marian"} |
| @@ -46,19 +45,6 @@ def _compute_ct2_output_dir(capability: dict) -> Path: | @@ -46,19 +45,6 @@ def _compute_ct2_output_dir(capability: dict) -> Path: | ||
| 46 | return model_dir / f"ctranslate2-{normalized}" | 45 | return model_dir / f"ctranslate2-{normalized}" |
| 47 | 46 | ||
| 48 | 47 | ||
| 49 | -def _resolve_converter_binary() -> str: | ||
| 50 | - candidate = shutil.which("ct2-transformers-converter") | ||
| 51 | - if candidate: | ||
| 52 | - return candidate | ||
| 53 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | ||
| 54 | - if venv_candidate.exists(): | ||
| 55 | - return str(venv_candidate) | ||
| 56 | - raise RuntimeError( | ||
| 57 | - "ct2-transformers-converter was not found. " | ||
| 58 | - "Install ctranslate2 in the active Python environment first." | ||
| 59 | - ) | ||
| 60 | - | ||
| 61 | - | ||
| 62 | def convert_to_ctranslate2(name: str, capability: dict) -> None: | 48 | def convert_to_ctranslate2(name: str, capability: dict) -> None: |
| 63 | model_id = str(capability.get("model_id") or "").strip() | 49 | model_id = str(capability.get("model_id") or "").strip() |
| 64 | model_dir = Path(str(capability.get("model_dir") or "")).expanduser() | 50 | model_dir = Path(str(capability.get("model_dir") or "")).expanduser() |
| @@ -75,18 +61,7 @@ def convert_to_ctranslate2(name: str, capability: dict) -> None: | @@ -75,18 +61,7 @@ def convert_to_ctranslate2(name: str, capability: dict) -> None: | ||
| 75 | ).strip() | 61 | ).strip() |
| 76 | output_dir.parent.mkdir(parents=True, exist_ok=True) | 62 | output_dir.parent.mkdir(parents=True, exist_ok=True) |
| 77 | print(f"[convert] {name} -> {output_dir} ({quantization})") | 63 | print(f"[convert] {name} -> {output_dir} ({quantization})") |
| 78 | - subprocess.run( | ||
| 79 | - [ | ||
| 80 | - _resolve_converter_binary(), | ||
| 81 | - "--model", | ||
| 82 | - model_source, | ||
| 83 | - "--output_dir", | ||
| 84 | - str(output_dir), | ||
| 85 | - "--quantization", | ||
| 86 | - quantization, | ||
| 87 | - ], | ||
| 88 | - check=True, | ||
| 89 | - ) | 64 | + convert_transformers_model(model_source, str(output_dir), quantization) |
| 90 | print(f"[converted] {name}") | 65 | print(f"[converted] {name}") |
| 91 | 66 | ||
| 92 | 67 |
| @@ -0,0 +1,85 @@ | @@ -0,0 +1,85 @@ | ||
| 1 | +from __future__ import annotations | ||
| 2 | + | ||
| 3 | +import sys | ||
| 4 | +import types | ||
| 5 | + | ||
| 6 | +import pytest | ||
| 7 | + | ||
| 8 | +import translation.ct2_conversion as ct2_conversion | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +class _FakeTransformersConverter: | ||
| 12 | + def __init__(self, model_name_or_path): | ||
| 13 | + self.model_name_or_path = model_name_or_path | ||
| 14 | + self.load_calls = [] | ||
| 15 | + | ||
| 16 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | ||
| 17 | + self.load_calls.append( | ||
| 18 | + { | ||
| 19 | + "model_class": model_class, | ||
| 20 | + "resolved_model_name_or_path": resolved_model_name_or_path, | ||
| 21 | + "kwargs": dict(kwargs), | ||
| 22 | + } | ||
| 23 | + ) | ||
| 24 | + if "dtype" in kwargs or "torch_dtype" in kwargs: | ||
| 25 | + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'") | ||
| 26 | + return {"loaded": True, "path": resolved_model_name_or_path} | ||
| 27 | + | ||
| 28 | + def convert(self, output_dir, quantization=None, force=False): | ||
| 29 | + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32") | ||
| 30 | + return { | ||
| 31 | + "loaded": loaded, | ||
| 32 | + "output_dir": output_dir, | ||
| 33 | + "quantization": quantization, | ||
| 34 | + "force": force, | ||
| 35 | + "load_calls": list(self.load_calls), | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + | ||
| 39 | +def _install_fake_ctranslate2(monkeypatch, base_converter): | ||
| 40 | + converters_module = types.ModuleType("ctranslate2.converters") | ||
| 41 | + converters_module.TransformersConverter = base_converter | ||
| 42 | + ctranslate2_module = types.ModuleType("ctranslate2") | ||
| 43 | + ctranslate2_module.converters = converters_module | ||
| 44 | + | ||
| 45 | + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module) | ||
| 46 | + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module) | ||
| 47 | + | ||
| 48 | + | ||
| 49 | +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch): | ||
| 50 | + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter) | ||
| 51 | + fake_transformers = types.ModuleType("transformers") | ||
| 52 | + fake_transformers.AutoConfig = types.SimpleNamespace( | ||
| 53 | + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path) | ||
| 54 | + ) | ||
| 55 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | ||
| 56 | + | ||
| 57 | + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") | ||
| 58 | + | ||
| 59 | + assert result["loaded"] == {"loaded": True, "path": "fake-model"} | ||
| 60 | + assert result["output_dir"] == "/tmp/out" | ||
| 61 | + assert result["quantization"] == "float16" | ||
| 62 | + assert result["force"] is False | ||
| 63 | + assert len(result["load_calls"]) == 2 | ||
| 64 | + assert result["load_calls"][0] == { | ||
| 65 | + "model_class": "FakeModel", | ||
| 66 | + "resolved_model_name_or_path": "fake-model", | ||
| 67 | + "kwargs": {"dtype": "float32"}, | ||
| 68 | + } | ||
| 69 | + assert result["load_calls"][1]["model_class"] == "FakeModel" | ||
| 70 | + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model" | ||
| 71 | + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None | ||
| 72 | + | ||
| 73 | + | ||
| 74 | +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch): | ||
| 75 | + class _AlwaysFailingConverter(_FakeTransformersConverter): | ||
| 76 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | ||
| 77 | + raise TypeError("different constructor error") | ||
| 78 | + | ||
| 79 | + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter) | ||
| 80 | + fake_transformers = types.ModuleType("transformers") | ||
| 81 | + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path)) | ||
| 82 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | ||
| 83 | + | ||
| 84 | + with pytest.raises(TypeError, match="different constructor error"): | ||
| 85 | + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") |
tests/test_translation_local_backends.py
| @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): | @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): | ||
| 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] | 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] |
| 202 | 202 | ||
| 203 | 203 | ||
| 204 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch): | ||
| 205 | + wrong_dir = tmp_path / "wrong-nllb" | ||
| 206 | + wrong_dir.mkdir() | ||
| 207 | + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8") | ||
| 208 | + | ||
| 209 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | ||
| 210 | + | ||
| 211 | + backend = NLLBCTranslate2TranslationBackend( | ||
| 212 | + name="nllb-200-distilled-600m", | ||
| 213 | + model_id="facebook/nllb-200-distilled-600M", | ||
| 214 | + model_dir=str(wrong_dir), | ||
| 215 | + device="cpu", | ||
| 216 | + torch_dtype="float32", | ||
| 217 | + batch_size=1, | ||
| 218 | + max_input_length=16, | ||
| 219 | + max_new_tokens=16, | ||
| 220 | + num_beams=1, | ||
| 221 | + ) | ||
| 222 | + | ||
| 223 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | ||
| 224 | + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M" | ||
| 225 | + | ||
| 226 | + | ||
| 227 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch): | ||
| 228 | + incomplete_dir = tmp_path / "incomplete-nllb" | ||
| 229 | + incomplete_dir.mkdir() | ||
| 230 | + (incomplete_dir / "ctranslate2-float16").mkdir() | ||
| 231 | + | ||
| 232 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | ||
| 233 | + | ||
| 234 | + backend = NLLBCTranslate2TranslationBackend( | ||
| 235 | + name="nllb-200-distilled-600m", | ||
| 236 | + model_id="facebook/nllb-200-distilled-600M", | ||
| 237 | + model_dir=str(incomplete_dir), | ||
| 238 | + device="cpu", | ||
| 239 | + torch_dtype="float32", | ||
| 240 | + batch_size=1, | ||
| 241 | + max_input_length=16, | ||
| 242 | + max_new_tokens=16, | ||
| 243 | + num_beams=1, | ||
| 244 | + ) | ||
| 245 | + | ||
| 246 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | ||
| 247 | + | ||
| 248 | + | ||
| 204 | def test_nllb_resolves_flores_short_tags_and_iso_no(): | 249 | def test_nllb_resolves_flores_short_tags_and_iso_no(): |
| 205 | cat = build_nllb_language_catalog(None) | 250 | cat = build_nllb_language_catalog(None) |
| 206 | assert resolve_nllb_language_code("ca", cat) == "cat_Latn" | 251 | assert resolve_nllb_language_code("ca", cat) == "cat_Latn" |
tests/test_translator_failure_semantics.py
| @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): | @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): | ||
| 197 | ] | 197 | ] |
| 198 | 198 | ||
| 199 | 199 | ||
| 200 | +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch): | ||
| 201 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | ||
| 202 | + | ||
| 203 | + def _fake_create_backend(self, *, name, backend_type, cfg): | ||
| 204 | + del self, backend_type, cfg | ||
| 205 | + if name == "broken-nllb": | ||
| 206 | + raise RuntimeError("broken model dir") | ||
| 207 | + | ||
| 208 | + class _Backend: | ||
| 209 | + model = name | ||
| 210 | + | ||
| 211 | + @property | ||
| 212 | + def supports_batch(self): | ||
| 213 | + return True | ||
| 214 | + | ||
| 215 | + def translate(self, text, target_lang, source_lang=None, scene=None): | ||
| 216 | + del target_lang, source_lang, scene | ||
| 217 | + return text | ||
| 218 | + | ||
| 219 | + return _Backend() | ||
| 220 | + | ||
| 221 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | ||
| 222 | + service = TranslationService( | ||
| 223 | + { | ||
| 224 | + "service_url": "http://127.0.0.1:6006", | ||
| 225 | + "timeout_sec": 10.0, | ||
| 226 | + "default_model": "llm", | ||
| 227 | + "default_scene": "general", | ||
| 228 | + "capabilities": { | ||
| 229 | + "llm": { | ||
| 230 | + "enabled": True, | ||
| 231 | + "backend": "llm", | ||
| 232 | + "model": "dummy-llm", | ||
| 233 | + "base_url": "https://example.com", | ||
| 234 | + "timeout_sec": 10.0, | ||
| 235 | + "use_cache": True, | ||
| 236 | + }, | ||
| 237 | + "broken-nllb": { | ||
| 238 | + "enabled": True, | ||
| 239 | + "backend": "local_nllb", | ||
| 240 | + "model_id": "dummy", | ||
| 241 | + "model_dir": "dummy", | ||
| 242 | + "device": "cpu", | ||
| 243 | + "torch_dtype": "float32", | ||
| 244 | + "batch_size": 8, | ||
| 245 | + "max_input_length": 16, | ||
| 246 | + "max_new_tokens": 16, | ||
| 247 | + "num_beams": 1, | ||
| 248 | + "use_cache": True, | ||
| 249 | + }, | ||
| 250 | + }, | ||
| 251 | + "cache": { | ||
| 252 | + "ttl_seconds": 60, | ||
| 253 | + "sliding_expiration": True, | ||
| 254 | + }, | ||
| 255 | + } | ||
| 256 | + ) | ||
| 257 | + | ||
| 258 | + assert service.available_models == ["llm", "broken-nllb"] | ||
| 259 | + assert service.loaded_models == ["llm"] | ||
| 260 | + assert service.failed_models == ["broken-nllb"] | ||
| 261 | + assert service.backend_errors["broken-nllb"] == "broken model dir" | ||
| 262 | + | ||
| 263 | + with pytest.raises(RuntimeError, match="failed to initialize"): | ||
| 264 | + service.get_backend("broken-nllb") | ||
| 265 | + | ||
| 266 | + | ||
| 200 | def test_translation_cache_probe_models_order(): | 267 | def test_translation_cache_probe_models_order(): |
| 201 | cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} | 268 | cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} |
| 202 | assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] | 269 | assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] |
translation/backends/local_ctranslate2.py
| @@ -4,9 +4,7 @@ from __future__ import annotations | @@ -4,9 +4,7 @@ from __future__ import annotations | ||
| 4 | 4 | ||
| 5 | import logging | 5 | import logging |
| 6 | import os | 6 | import os |
| 7 | -import shutil | ||
| 8 | -import subprocess | ||
| 9 | -import sys | 7 | +import json |
| 10 | import threading | 8 | import threading |
| 11 | from pathlib import Path | 9 | from pathlib import Path |
| 12 | from typing import Dict, List, Optional, Sequence, Union | 10 | from typing import Dict, List, Optional, Sequence, Union |
| @@ -24,6 +22,7 @@ from translation.text_splitter import ( | @@ -24,6 +22,7 @@ from translation.text_splitter import ( | ||
| 24 | join_translated_segments, | 22 | join_translated_segments, |
| 25 | split_text_for_translation, | 23 | split_text_for_translation, |
| 26 | ) | 24 | ) |
| 25 | +from translation.ct2_conversion import convert_transformers_model | ||
| 27 | 26 | ||
| 28 | logger = logging.getLogger(__name__) | 27 | logger = logging.getLogger(__name__) |
| 29 | 28 | ||
| @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -> str: | @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -> str: | ||
| 76 | return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") | 75 | return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") |
| 77 | 76 | ||
| 78 | 77 | ||
| 79 | -def _resolve_converter_binary() -> str: | ||
| 80 | - candidate = shutil.which("ct2-transformers-converter") | ||
| 81 | - if candidate: | ||
| 82 | - return candidate | ||
| 83 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | ||
| 84 | - if venv_candidate.exists(): | ||
| 85 | - return str(venv_candidate) | ||
| 86 | - raise RuntimeError( | ||
| 87 | - "ct2-transformers-converter was not found. " | ||
| 88 | - "Ensure ctranslate2 is installed in the active translator environment." | ||
| 89 | - ) | 78 | +def _detect_local_model_type(model_dir: str) -> Optional[str]: |
| 79 | + config_path = Path(model_dir).expanduser() / "config.json" | ||
| 80 | + if not config_path.exists(): | ||
| 81 | + return None | ||
| 82 | + try: | ||
| 83 | + with open(config_path, "r", encoding="utf-8") as handle: | ||
| 84 | + payload = json.load(handle) or {} | ||
| 85 | + except Exception as exc: | ||
| 86 | + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc) | ||
| 87 | + return None | ||
| 88 | + model_type = str(payload.get("model_type") or "").strip().lower() | ||
| 89 | + return model_type or None | ||
| 90 | 90 | ||
| 91 | 91 | ||
| 92 | class LocalCTranslate2TranslationBackend: | 92 | class LocalCTranslate2TranslationBackend: |
| @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: | @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: | ||
| 144 | self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) | 144 | self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) |
| 145 | self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) | 145 | self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) |
| 146 | self._tokenizer_lock = threading.Lock() | 146 | self._tokenizer_lock = threading.Lock() |
| 147 | + self._local_model_source = self._resolve_local_model_source() | ||
| 147 | self._load_runtime() | 148 | self._load_runtime() |
| 148 | 149 | ||
| 149 | @property | 150 | @property |
| @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: | @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: | ||
| 151 | return True | 152 | return True |
| 152 | 153 | ||
| 153 | def _tokenizer_source(self) -> str: | 154 | def _tokenizer_source(self) -> str: |
| 154 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | 155 | + return self._local_model_source or self.model_id |
| 155 | 156 | ||
| 156 | def _model_source(self) -> str: | 157 | def _model_source(self) -> str: |
| 157 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | 158 | + return self._local_model_source or self.model_id |
| 159 | + | ||
| 160 | + def _expected_local_model_types(self) -> Optional[set[str]]: | ||
| 161 | + return None | ||
| 162 | + | ||
| 163 | + def _resolve_local_model_source(self) -> Optional[str]: | ||
| 164 | + model_path = Path(self.model_dir).expanduser() | ||
| 165 | + if not model_path.exists(): | ||
| 166 | + return None | ||
| 167 | + if not (model_path / "config.json").exists(): | ||
| 168 | + logger.warning( | ||
| 169 | + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id", | ||
| 170 | + self.model, | ||
| 171 | + model_path, | ||
| 172 | + ) | ||
| 173 | + return None | ||
| 174 | + | ||
| 175 | + expected_types = self._expected_local_model_types() | ||
| 176 | + if not expected_types: | ||
| 177 | + return str(model_path) | ||
| 178 | + | ||
| 179 | + detected_type = _detect_local_model_type(str(model_path)) | ||
| 180 | + if detected_type is None: | ||
| 181 | + return str(model_path) | ||
| 182 | + if detected_type in expected_types: | ||
| 183 | + return str(model_path) | ||
| 184 | + | ||
| 185 | + logger.warning( | ||
| 186 | + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id", | ||
| 187 | + self.model, | ||
| 188 | + model_path, | ||
| 189 | + detected_type, | ||
| 190 | + sorted(expected_types), | ||
| 191 | + ) | ||
| 192 | + return None | ||
| 158 | 193 | ||
| 159 | def _tokenizer_kwargs(self) -> Dict[str, object]: | 194 | def _tokenizer_kwargs(self) -> Dict[str, object]: |
| 160 | return {} | 195 | return {} |
| @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: | @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: | ||
| 204 | ) | 239 | ) |
| 205 | 240 | ||
| 206 | ct2_path.parent.mkdir(parents=True, exist_ok=True) | 241 | ct2_path.parent.mkdir(parents=True, exist_ok=True) |
| 207 | - converter = _resolve_converter_binary() | ||
| 208 | logger.info( | 242 | logger.info( |
| 209 | "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", | 243 | "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", |
| 210 | self.model, | 244 | self.model, |
| @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: | @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: | ||
| 213 | self.ct2_conversion_quantization, | 247 | self.ct2_conversion_quantization, |
| 214 | ) | 248 | ) |
| 215 | try: | 249 | try: |
| 216 | - subprocess.run( | ||
| 217 | - [ | ||
| 218 | - converter, | ||
| 219 | - "--model", | ||
| 220 | - model_source, | ||
| 221 | - "--output_dir", | ||
| 222 | - str(ct2_path), | ||
| 223 | - "--quantization", | ||
| 224 | - self.ct2_conversion_quantization, | ||
| 225 | - ], | ||
| 226 | - check=True, | ||
| 227 | - stdout=subprocess.PIPE, | ||
| 228 | - stderr=subprocess.PIPE, | ||
| 229 | - text=True, | 250 | + convert_transformers_model( |
| 251 | + model_source, | ||
| 252 | + str(ct2_path), | ||
| 253 | + self.ct2_conversion_quantization, | ||
| 230 | ) | 254 | ) |
| 231 | - except subprocess.CalledProcessError as exc: | ||
| 232 | - stderr = exc.stderr.strip() | 255 | + except Exception as exc: |
| 233 | raise RuntimeError( | 256 | raise RuntimeError( |
| 234 | - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}" | 257 | + f"Failed to convert model '{self.model}' to CTranslate2: {exc}" |
| 235 | ) from exc | 258 | ) from exc |
| 236 | 259 | ||
| 237 | def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: | 260 | def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: |
| @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | ||
| 557 | f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" | 580 | f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" |
| 558 | ) | 581 | ) |
| 559 | 582 | ||
| 583 | + def _expected_local_model_types(self) -> Optional[set[str]]: | ||
| 584 | + return {"marian"} | ||
| 585 | + | ||
| 560 | 586 | ||
| 561 | class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | 587 | class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 562 | """Local backend for NLLB models on CTranslate2.""" | 588 | """Local backend for NLLB models on CTranslate2.""" |
| @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): | ||
| 619 | if resolve_nllb_language_code(target_lang, self.language_codes) is None: | 645 | if resolve_nllb_language_code(target_lang, self.language_codes) is None: |
| 620 | raise ValueError(f"Unsupported NLLB target language: {target_lang}") | 646 | raise ValueError(f"Unsupported NLLB target language: {target_lang}") |
| 621 | 647 | ||
| 648 | + def _expected_local_model_types(self) -> Optional[set[str]]: | ||
| 649 | + return {"m2m_100", "nllb_moe"} | ||
| 650 | + | ||
| 622 | def _get_tokenizer_for_source(self, source_lang: str): | 651 | def _get_tokenizer_for_source(self, source_lang: str): |
| 623 | src_code = resolve_nllb_language_code(source_lang, self.language_codes) | 652 | src_code = resolve_nllb_language_code(source_lang, self.language_codes) |
| 624 | if src_code is None: | 653 | if src_code is None: |
| @@ -0,0 +1,52 @@ | @@ -0,0 +1,52 @@ | ||
| 1 | +"""Helpers for converting Hugging Face translation models to CTranslate2.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +import copy | ||
| 6 | +import logging | ||
| 7 | + | ||
| 8 | +logger = logging.getLogger(__name__) | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +def convert_transformers_model( | ||
| 12 | + model_name_or_path: str, | ||
| 13 | + output_dir: str, | ||
| 14 | + quantization: str, | ||
| 15 | + *, | ||
| 16 | + force: bool = False, | ||
| 17 | +) -> str: | ||
| 18 | + from ctranslate2.converters import TransformersConverter | ||
| 19 | + from transformers import AutoConfig | ||
| 20 | + | ||
| 21 | + class _CompatibleTransformersConverter(TransformersConverter): | ||
| 22 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | ||
| 23 | + try: | ||
| 24 | + return super().load_model(model_class, resolved_model_name_or_path, **kwargs) | ||
| 25 | + except TypeError as exc: | ||
| 26 | + if "unexpected keyword argument 'dtype'" not in str(exc): | ||
| 27 | + raise | ||
| 28 | + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None: | ||
| 29 | + raise | ||
| 30 | + | ||
| 31 | + logger.warning( | ||
| 32 | + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s", | ||
| 33 | + resolved_model_name_or_path, | ||
| 34 | + getattr(model_class, "__name__", model_class), | ||
| 35 | + ) | ||
| 36 | + retry_kwargs = dict(kwargs) | ||
| 37 | + retry_kwargs.pop("dtype", None) | ||
| 38 | + retry_kwargs.pop("torch_dtype", None) | ||
| 39 | + config = retry_kwargs.get("config") | ||
| 40 | + if config is None: | ||
| 41 | + config = AutoConfig.from_pretrained(resolved_model_name_or_path) | ||
| 42 | + else: | ||
| 43 | + config = copy.deepcopy(config) | ||
| 44 | + if hasattr(config, "dtype"): | ||
| 45 | + config.dtype = None | ||
| 46 | + if hasattr(config, "torch_dtype"): | ||
| 47 | + config.torch_dtype = None | ||
| 48 | + retry_kwargs["config"] = config | ||
| 49 | + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs) | ||
| 50 | + | ||
| 51 | + converter = _CompatibleTransformersConverter(model_name_or_path) | ||
| 52 | + return converter.convert(output_dir=output_dir, quantization=quantization, force=force) |
translation/service.py
| @@ -31,7 +31,12 @@ class TranslationService: | @@ -31,7 +31,12 @@ class TranslationService: | ||
| 31 | if not self._enabled_capabilities: | 31 | if not self._enabled_capabilities: |
| 32 | raise ValueError("No enabled translation backends found in services.translation.capabilities") | 32 | raise ValueError("No enabled translation backends found in services.translation.capabilities") |
| 33 | self._translation_cache = TranslationCache(self.config["cache"]) | 33 | self._translation_cache = TranslationCache(self.config["cache"]) |
| 34 | - self._backends = self._initialize_backends() | 34 | + self._backends: Dict[str, TranslationBackendProtocol] = {} |
| 35 | + self._backend_errors: Dict[str, str] = {} | ||
| 36 | + self._initialize_backends() | ||
| 37 | + if not self._backends: | ||
| 38 | + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error" | ||
| 39 | + raise RuntimeError(f"No translation backends could be initialized: {details}") | ||
| 35 | 40 | ||
| 36 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: | 41 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: |
| 37 | enabled: Dict[str, Dict[str, object]] = {} | 42 | enabled: Dict[str, Dict[str, object]] = {} |
| @@ -62,24 +67,47 @@ class TranslationService: | @@ -62,24 +67,47 @@ class TranslationService: | ||
| 62 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") | 67 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") |
| 63 | return factory(name=name, cfg=cfg) | 68 | return factory(name=name, cfg=cfg) |
| 64 | 69 | ||
| 65 | - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]: | ||
| 66 | - backends: Dict[str, TranslationBackendProtocol] = {} | ||
| 67 | - for name, capability_cfg in self._enabled_capabilities.items(): | ||
| 68 | - backend_type = str(capability_cfg["backend"]) | ||
| 69 | - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | ||
| 70 | - backends[name] = self._create_backend( | 70 | + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]: |
| 71 | + capability_cfg = self._enabled_capabilities.get(name) | ||
| 72 | + if capability_cfg is None: | ||
| 73 | + return None | ||
| 74 | + if name in self._backends: | ||
| 75 | + return self._backends[name] | ||
| 76 | + | ||
| 77 | + backend_type = str(capability_cfg["backend"]) | ||
| 78 | + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | ||
| 79 | + try: | ||
| 80 | + backend = self._create_backend( | ||
| 71 | name=name, | 81 | name=name, |
| 72 | backend_type=backend_type, | 82 | backend_type=backend_type, |
| 73 | cfg=capability_cfg, | 83 | cfg=capability_cfg, |
| 74 | ) | 84 | ) |
| 75 | - logger.info( | ||
| 76 | - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | 85 | + except Exception as exc: |
| 86 | + error_text = str(exc).strip() or exc.__class__.__name__ | ||
| 87 | + self._backend_errors[name] = error_text | ||
| 88 | + logger.error( | ||
| 89 | + "Translation backend initialization failed | model=%s backend=%s error=%s", | ||
| 77 | name, | 90 | name, |
| 78 | backend_type, | 91 | backend_type, |
| 79 | - bool(capability_cfg.get("use_cache")), | ||
| 80 | - getattr(backends[name], "model", name), | 92 | + error_text, |
| 93 | + exc_info=True, | ||
| 81 | ) | 94 | ) |
| 82 | - return backends | 95 | + return None |
| 96 | + | ||
| 97 | + self._backends[name] = backend | ||
| 98 | + self._backend_errors.pop(name, None) | ||
| 99 | + logger.info( | ||
| 100 | + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | ||
| 101 | + name, | ||
| 102 | + backend_type, | ||
| 103 | + bool(capability_cfg.get("use_cache")), | ||
| 104 | + getattr(backend, "model", name), | ||
| 105 | + ) | ||
| 106 | + return backend | ||
| 107 | + | ||
| 108 | + def _initialize_backends(self) -> None: | ||
| 109 | + for name, capability_cfg in self._enabled_capabilities.items(): | ||
| 110 | + self._load_backend(name) | ||
| 83 | 111 | ||
| 84 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: | 112 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: |
| 85 | from translation.backends.qwen_mt import QwenMTTranslationBackend | 113 | from translation.backends.qwen_mt import QwenMTTranslationBackend |
| @@ -178,13 +206,27 @@ class TranslationService: | @@ -178,13 +206,27 @@ class TranslationService: | ||
| 178 | def loaded_models(self) -> List[str]: | 206 | def loaded_models(self) -> List[str]: |
| 179 | return list(self._backends.keys()) | 207 | return list(self._backends.keys()) |
| 180 | 208 | ||
| 209 | + @property | ||
| 210 | + def failed_models(self) -> List[str]: | ||
| 211 | + return list(self._backend_errors.keys()) | ||
| 212 | + | ||
| 213 | + @property | ||
| 214 | + def backend_errors(self) -> Dict[str, str]: | ||
| 215 | + return dict(self._backend_errors) | ||
| 216 | + | ||
| 181 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: | 217 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: |
| 182 | normalized = normalize_translation_model(self.config, model) | 218 | normalized = normalize_translation_model(self.config, model) |
| 183 | - backend = self._backends.get(normalized) | 219 | + backend = self._backends.get(normalized) or self._load_backend(normalized) |
| 184 | if backend is None: | 220 | if backend is None: |
| 185 | - raise ValueError( | ||
| 186 | - f"Translation model '{normalized}' is not enabled. " | ||
| 187 | - f"Available models: {', '.join(self.available_models) or 'none'}" | 221 | + if normalized not in self._enabled_capabilities: |
| 222 | + raise ValueError( | ||
| 223 | + f"Translation model '{normalized}' is not enabled. " | ||
| 224 | + f"Available models: {', '.join(self.available_models) or 'none'}" | ||
| 225 | + ) | ||
| 226 | + error_text = self._backend_errors.get(normalized) or "unknown initialization error" | ||
| 227 | + raise RuntimeError( | ||
| 228 | + f"Translation model '{normalized}' failed to initialize: {error_text}. " | ||
| 229 | + f"Loaded models: {', '.join(self.loaded_models) or 'none'}" | ||
| 188 | ) | 230 | ) |
| 189 | return backend | 231 | return backend |
| 190 | 232 |
-
mentioned in commit 89fa3f3ccb7d7815460c21ea52ecca110435d61e