Commit f07947a5d510f29a6f588b84e3734ab5f0310a38
1 parent
0ba0e0fc
Improve portability and harden public frontend search
Showing
14 changed files
with
465 additions
and
85 deletions
Show diff stats
api/translator_app.py
| ... | ... | @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): |
| 271 | 271 | """Initialize all enabled translation backends on process startup.""" |
| 272 | 272 | logger.info("Starting Translation Service API") |
| 273 | 273 | service = get_translation_service() |
| 274 | + failed_models = list(getattr(service, "failed_models", [])) | |
| 275 | + backend_errors = dict(getattr(service, "backend_errors", {})) | |
| 274 | 276 | logger.info( |
| 275 | - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", | |
| 277 | + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s", | |
| 276 | 278 | service.config["default_model"], |
| 277 | 279 | service.config["default_scene"], |
| 278 | 280 | service.available_models, |
| 279 | 281 | service.loaded_models, |
| 282 | + failed_models, | |
| 280 | 283 | ) |
| 281 | 284 | logger.info( |
| 282 | - "Translation backends initialized on startup | models=%s", | |
| 285 | + "Translation backends initialized on startup | loaded=%s failed=%s", | |
| 283 | 286 | service.loaded_models, |
| 287 | + backend_errors, | |
| 284 | 288 | ) |
| 285 | 289 | verbose_logger.info( |
| 286 | 290 | "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", |
| ... | ... | @@ -316,11 +320,14 @@ async def health_check(): |
| 316 | 320 | """Health check endpoint.""" |
| 317 | 321 | try: |
| 318 | 322 | service = get_translation_service() |
| 323 | + failed_models = list(getattr(service, "failed_models", [])) | |
| 324 | + backend_errors = dict(getattr(service, "backend_errors", {})) | |
| 319 | 325 | logger.info( |
| 320 | - "Health check | default_model=%s default_scene=%s loaded_models=%s", | |
| 326 | + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s", | |
| 321 | 327 | service.config["default_model"], |
| 322 | 328 | service.config["default_scene"], |
| 323 | 329 | service.loaded_models, |
| 330 | + failed_models, | |
| 324 | 331 | ) |
| 325 | 332 | return { |
| 326 | 333 | "status": "healthy", |
| ... | ... | @@ -330,6 +337,8 @@ async def health_check(): |
| 330 | 337 | "available_models": service.available_models, |
| 331 | 338 | "enabled_capabilities": get_enabled_translation_models(service.config), |
| 332 | 339 | "loaded_models": service.loaded_models, |
| 340 | + "failed_models": failed_models, | |
| 341 | + "backend_errors": backend_errors, | |
| 333 | 342 | } |
| 334 | 343 | except Exception as e: |
| 335 | 344 | logger.error(f"Health check failed: {e}") |
| ... | ... | @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): |
| 463 | 472 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 464 | 473 | logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) |
| 465 | 474 | raise HTTPException(status_code=400, detail=str(e)) from e |
| 475 | + except RuntimeError as e: | |
| 476 | + latency_ms = (time.perf_counter() - request_started) * 1000 | |
| 477 | + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms) | |
| 478 | + raise HTTPException(status_code=503, detail=str(e)) from e | |
| 466 | 479 | except Exception as e: |
| 467 | 480 | latency_ms = (time.perf_counter() - request_started) * 1000 |
| 468 | 481 | logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) | ... | ... |
config/loader.py
| ... | ... | @@ -655,6 +655,14 @@ class AppConfigLoader: |
| 655 | 655 | |
| 656 | 656 | translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} |
| 657 | 657 | normalized_translation = build_translation_config(translation_raw) |
| 658 | + local_translation_backends = {"local_nllb", "local_marian"} | |
| 659 | + for capability_name, capability_cfg in normalized_translation["capabilities"].items(): | |
| 660 | + backend_name = str(capability_cfg.get("backend") or "").strip().lower() | |
| 661 | + if backend_name not in local_translation_backends: | |
| 662 | + continue | |
| 663 | + for path_key in ("model_dir", "ct2_model_dir"): | |
| 664 | + if capability_cfg.get(path_key) not in (None, ""): | |
| 665 | + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve()) | |
| 658 | 666 | translation_config = TranslationServiceConfig( |
| 659 | 667 | endpoint=str(normalized_translation["service_url"]).rstrip("/"), |
| 660 | 668 | timeout_sec=float(normalized_translation["timeout_sec"]), |
| ... | ... | @@ -749,7 +757,7 @@ class AppConfigLoader: |
| 749 | 757 | port=port, |
| 750 | 758 | backend=backend_name, |
| 751 | 759 | runtime_dir=( |
| 752 | - str(v) | |
| 760 | + str(self._resolve_project_path_value(v).resolve()) | |
| 753 | 761 | if (v := instance_raw.get("runtime_dir")) not in (None, "") |
| 754 | 762 | else None |
| 755 | 763 | ), |
| ... | ... | @@ -787,6 +795,12 @@ class AppConfigLoader: |
| 787 | 795 | rerank=rerank_config, |
| 788 | 796 | ) |
| 789 | 797 | |
| 798 | + def _resolve_project_path_value(self, value: Any) -> Path: | |
| 799 | + candidate = Path(str(value)).expanduser() | |
| 800 | + if candidate.is_absolute(): | |
| 801 | + return candidate | |
| 802 | + return self.project_root / candidate | |
| 803 | + | |
| 790 | 804 | def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: |
| 791 | 805 | if not isinstance(raw, dict): |
| 792 | 806 | raise ConfigurationError("tenant_config must be a mapping") | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -316,7 +316,10 @@ async function performSearch(page = 1) { |
| 316 | 316 | document.getElementById('productGrid').innerHTML = ''; |
| 317 | 317 | |
| 318 | 318 | try { |
| 319 | - const response = await fetch(`${API_BASE_URL}/search/`, { | |
| 319 | + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin); | |
| 320 | + searchUrl.searchParams.set('tenant_id', tenantId); | |
| 321 | + | |
| 322 | + const response = await fetch(searchUrl.toString(), { | |
| 320 | 323 | method: 'POST', |
| 321 | 324 | headers: { |
| 322 | 325 | 'Content-Type': 'application/json', | ... | ... |
requirements_translator_service.txt
| ... | ... | @@ -13,7 +13,8 @@ httpx>=0.24.0 |
| 13 | 13 | tqdm>=4.65.0 |
| 14 | 14 | |
| 15 | 15 | torch>=2.0.0 |
| 16 | -transformers>=4.30.0 | |
| 16 | +# Keep translator conversions on the last verified NLLB-compatible release line. | |
| 17 | +transformers>=4.51.0,<4.52.0 | |
| 17 | 18 | ctranslate2>=4.7.0 |
| 18 | 19 | sentencepiece>=0.2.0 |
| 19 | 20 | sacremoses>=0.1.1 | ... | ... |
| ... | ... | @@ -0,0 +1,12 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +"""Backward-compatible entrypoint for translation model downloads.""" | |
| 3 | + | |
| 4 | +from __future__ import annotations | |
| 5 | + | |
| 6 | +import runpy | |
| 7 | +from pathlib import Path | |
| 8 | + | |
| 9 | + | |
| 10 | +if __name__ == "__main__": | |
| 11 | + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py" | |
| 12 | + runpy.run_path(str(target), run_name="__main__") | ... | ... |
scripts/frontend/frontend_server.py
| ... | ... | @@ -60,6 +60,8 @@ class RateLimitingMixin: |
| 60 | 60 | class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): |
| 61 | 61 | """Custom request handler with CORS support and robust error handling.""" |
| 62 | 62 | |
| 63 | + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer" | |
| 64 | + | |
| 63 | 65 | def _is_proxy_path(self, path: str) -> bool: |
| 64 | 66 | """Return True for API paths that should be forwarded to backend service.""" |
| 65 | 67 | return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') |
| ... | ... | @@ -220,7 +222,7 @@ class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMix |
| 220 | 222 | # Add CORS headers |
| 221 | 223 | self.send_header('Access-Control-Allow-Origin', '*') |
| 222 | 224 | self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') |
| 223 | - self.send_header('Access-Control-Allow-Headers', 'Content-Type') | |
| 225 | + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS) | |
| 224 | 226 | # Add security headers |
| 225 | 227 | self.send_header('X-Content-Type-Options', 'nosniff') |
| 226 | 228 | self.send_header('X-Frame-Options', 'DENY') | ... | ... |
scripts/setup_translator_venv.sh
| ... | ... | @@ -8,8 +8,47 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 8 | 8 | cd "${PROJECT_ROOT}" |
| 9 | 9 | |
| 10 | 10 | VENV_DIR="${PROJECT_ROOT}/.venv-translator" |
| 11 | -PYTHON_BIN="${PYTHON_BIN:-python3}" | |
| 12 | 11 | TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" |
| 12 | +MIN_PYTHON_MAJOR=3 | |
| 13 | +MIN_PYTHON_MINOR=10 | |
| 14 | + | |
| 15 | +python_meets_minimum() { | |
| 16 | + local bin="$1" | |
| 17 | + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}" | |
| 18 | +import sys | |
| 19 | + | |
| 20 | +required = tuple(int(value) for value in sys.argv[1:]) | |
| 21 | +sys.exit(0 if sys.version_info[:2] >= required else 1) | |
| 22 | +PY | |
| 23 | +} | |
| 24 | + | |
| 25 | +discover_python_bin() { | |
| 26 | + local candidates=() | |
| 27 | + | |
| 28 | + if [[ -n "${PYTHON_BIN:-}" ]]; then | |
| 29 | + candidates+=("${PYTHON_BIN}") | |
| 30 | + fi | |
| 31 | + candidates+=("python3.12" "python3.11" "python3.10" "python3") | |
| 32 | + | |
| 33 | + local candidate | |
| 34 | + for candidate in "${candidates[@]}"; do | |
| 35 | + if ! command -v "${candidate}" >/dev/null 2>&1; then | |
| 36 | + continue | |
| 37 | + fi | |
| 38 | + if python_meets_minimum "${candidate}"; then | |
| 39 | + echo "${candidate}" | |
| 40 | + return 0 | |
| 41 | + fi | |
| 42 | + done | |
| 43 | + | |
| 44 | + return 1 | |
| 45 | +} | |
| 46 | + | |
| 47 | +if ! PYTHON_BIN="$(discover_python_bin)"; then | |
| 48 | + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2 | |
| 49 | + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2 | |
| 50 | + exit 1 | |
| 51 | +fi | |
| 13 | 52 | |
| 14 | 53 | if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then |
| 15 | 54 | echo "ERROR: python not found: ${PYTHON_BIN}" >&2 |
| ... | ... | @@ -32,6 +71,7 @@ mkdir -p "${TMP_DIR}" |
| 32 | 71 | export TMPDIR="${TMP_DIR}" |
| 33 | 72 | PIP_ARGS=(--no-cache-dir) |
| 34 | 73 | |
| 74 | +echo "Using Python=${PYTHON_BIN}" | |
| 35 | 75 | echo "Using TMPDIR=${TMPDIR}" |
| 36 | 76 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel |
| 37 | 77 | "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt | ... | ... |
scripts/translation/download_translation_models.py
| ... | ... | @@ -6,8 +6,6 @@ from __future__ import annotations |
| 6 | 6 | import argparse |
| 7 | 7 | import os |
| 8 | 8 | from pathlib import Path |
| 9 | -import shutil | |
| 10 | -import subprocess | |
| 11 | 9 | import sys |
| 12 | 10 | from typing import Iterable |
| 13 | 11 | |
| ... | ... | @@ -19,6 +17,7 @@ if str(PROJECT_ROOT) not in sys.path: |
| 19 | 17 | os.environ.setdefault("HF_HUB_DISABLE_XET", "1") |
| 20 | 18 | |
| 21 | 19 | from config.services_config import get_translation_config |
| 20 | +from translation.ct2_conversion import convert_transformers_model | |
| 22 | 21 | |
| 23 | 22 | |
| 24 | 23 | LOCAL_BACKENDS = {"local_nllb", "local_marian"} |
| ... | ... | @@ -46,19 +45,6 @@ def _compute_ct2_output_dir(capability: dict) -> Path: |
| 46 | 45 | return model_dir / f"ctranslate2-{normalized}" |
| 47 | 46 | |
| 48 | 47 | |
| 49 | -def _resolve_converter_binary() -> str: | |
| 50 | - candidate = shutil.which("ct2-transformers-converter") | |
| 51 | - if candidate: | |
| 52 | - return candidate | |
| 53 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | |
| 54 | - if venv_candidate.exists(): | |
| 55 | - return str(venv_candidate) | |
| 56 | - raise RuntimeError( | |
| 57 | - "ct2-transformers-converter was not found. " | |
| 58 | - "Install ctranslate2 in the active Python environment first." | |
| 59 | - ) | |
| 60 | - | |
| 61 | - | |
| 62 | 48 | def convert_to_ctranslate2(name: str, capability: dict) -> None: |
| 63 | 49 | model_id = str(capability.get("model_id") or "").strip() |
| 64 | 50 | model_dir = Path(str(capability.get("model_dir") or "")).expanduser() |
| ... | ... | @@ -75,18 +61,7 @@ def convert_to_ctranslate2(name: str, capability: dict) -> None: |
| 75 | 61 | ).strip() |
| 76 | 62 | output_dir.parent.mkdir(parents=True, exist_ok=True) |
| 77 | 63 | print(f"[convert] {name} -> {output_dir} ({quantization})") |
| 78 | - subprocess.run( | |
| 79 | - [ | |
| 80 | - _resolve_converter_binary(), | |
| 81 | - "--model", | |
| 82 | - model_source, | |
| 83 | - "--output_dir", | |
| 84 | - str(output_dir), | |
| 85 | - "--quantization", | |
| 86 | - quantization, | |
| 87 | - ], | |
| 88 | - check=True, | |
| 89 | - ) | |
| 64 | + convert_transformers_model(model_source, str(output_dir), quantization) | |
| 90 | 65 | print(f"[converted] {name}") |
| 91 | 66 | |
| 92 | 67 | ... | ... |
| ... | ... | @@ -0,0 +1,85 @@ |
| 1 | +from __future__ import annotations | |
| 2 | + | |
| 3 | +import sys | |
| 4 | +import types | |
| 5 | + | |
| 6 | +import pytest | |
| 7 | + | |
| 8 | +import translation.ct2_conversion as ct2_conversion | |
| 9 | + | |
| 10 | + | |
| 11 | +class _FakeTransformersConverter: | |
| 12 | + def __init__(self, model_name_or_path): | |
| 13 | + self.model_name_or_path = model_name_or_path | |
| 14 | + self.load_calls = [] | |
| 15 | + | |
| 16 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | |
| 17 | + self.load_calls.append( | |
| 18 | + { | |
| 19 | + "model_class": model_class, | |
| 20 | + "resolved_model_name_or_path": resolved_model_name_or_path, | |
| 21 | + "kwargs": dict(kwargs), | |
| 22 | + } | |
| 23 | + ) | |
| 24 | + if "dtype" in kwargs or "torch_dtype" in kwargs: | |
| 25 | + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'") | |
| 26 | + return {"loaded": True, "path": resolved_model_name_or_path} | |
| 27 | + | |
| 28 | + def convert(self, output_dir, quantization=None, force=False): | |
| 29 | + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32") | |
| 30 | + return { | |
| 31 | + "loaded": loaded, | |
| 32 | + "output_dir": output_dir, | |
| 33 | + "quantization": quantization, | |
| 34 | + "force": force, | |
| 35 | + "load_calls": list(self.load_calls), | |
| 36 | + } | |
| 37 | + | |
| 38 | + | |
| 39 | +def _install_fake_ctranslate2(monkeypatch, base_converter): | |
| 40 | + converters_module = types.ModuleType("ctranslate2.converters") | |
| 41 | + converters_module.TransformersConverter = base_converter | |
| 42 | + ctranslate2_module = types.ModuleType("ctranslate2") | |
| 43 | + ctranslate2_module.converters = converters_module | |
| 44 | + | |
| 45 | + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module) | |
| 46 | + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module) | |
| 47 | + | |
| 48 | + | |
| 49 | +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch): | |
| 50 | + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter) | |
| 51 | + fake_transformers = types.ModuleType("transformers") | |
| 52 | + fake_transformers.AutoConfig = types.SimpleNamespace( | |
| 53 | + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path) | |
| 54 | + ) | |
| 55 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | |
| 56 | + | |
| 57 | + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") | |
| 58 | + | |
| 59 | + assert result["loaded"] == {"loaded": True, "path": "fake-model"} | |
| 60 | + assert result["output_dir"] == "/tmp/out" | |
| 61 | + assert result["quantization"] == "float16" | |
| 62 | + assert result["force"] is False | |
| 63 | + assert len(result["load_calls"]) == 2 | |
| 64 | + assert result["load_calls"][0] == { | |
| 65 | + "model_class": "FakeModel", | |
| 66 | + "resolved_model_name_or_path": "fake-model", | |
| 67 | + "kwargs": {"dtype": "float32"}, | |
| 68 | + } | |
| 69 | + assert result["load_calls"][1]["model_class"] == "FakeModel" | |
| 70 | + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model" | |
| 71 | + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None | |
| 72 | + | |
| 73 | + | |
| 74 | +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch): | |
| 75 | + class _AlwaysFailingConverter(_FakeTransformersConverter): | |
| 76 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | |
| 77 | + raise TypeError("different constructor error") | |
| 78 | + | |
| 79 | + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter) | |
| 80 | + fake_transformers = types.ModuleType("transformers") | |
| 81 | + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path)) | |
| 82 | + monkeypatch.setitem(sys.modules, "transformers", fake_transformers) | |
| 83 | + | |
| 84 | + with pytest.raises(TypeError, match="different constructor error"): | |
| 85 | + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16") | ... | ... |
tests/test_translation_local_backends.py
| ... | ... | @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): |
| 201 | 201 | assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] |
| 202 | 202 | |
| 203 | 203 | |
| 204 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch): | |
| 205 | + wrong_dir = tmp_path / "wrong-nllb" | |
| 206 | + wrong_dir.mkdir() | |
| 207 | + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8") | |
| 208 | + | |
| 209 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | |
| 210 | + | |
| 211 | + backend = NLLBCTranslate2TranslationBackend( | |
| 212 | + name="nllb-200-distilled-600m", | |
| 213 | + model_id="facebook/nllb-200-distilled-600M", | |
| 214 | + model_dir=str(wrong_dir), | |
| 215 | + device="cpu", | |
| 216 | + torch_dtype="float32", | |
| 217 | + batch_size=1, | |
| 218 | + max_input_length=16, | |
| 219 | + max_new_tokens=16, | |
| 220 | + num_beams=1, | |
| 221 | + ) | |
| 222 | + | |
| 223 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | |
| 224 | + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M" | |
| 225 | + | |
| 226 | + | |
| 227 | +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch): | |
| 228 | + incomplete_dir = tmp_path / "incomplete-nllb" | |
| 229 | + incomplete_dir.mkdir() | |
| 230 | + (incomplete_dir / "ctranslate2-float16").mkdir() | |
| 231 | + | |
| 232 | + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime) | |
| 233 | + | |
| 234 | + backend = NLLBCTranslate2TranslationBackend( | |
| 235 | + name="nllb-200-distilled-600m", | |
| 236 | + model_id="facebook/nllb-200-distilled-600M", | |
| 237 | + model_dir=str(incomplete_dir), | |
| 238 | + device="cpu", | |
| 239 | + torch_dtype="float32", | |
| 240 | + batch_size=1, | |
| 241 | + max_input_length=16, | |
| 242 | + max_new_tokens=16, | |
| 243 | + num_beams=1, | |
| 244 | + ) | |
| 245 | + | |
| 246 | + assert backend._model_source() == "facebook/nllb-200-distilled-600M" | |
| 247 | + | |
| 248 | + | |
| 204 | 249 | def test_nllb_resolves_flores_short_tags_and_iso_no(): |
| 205 | 250 | cat = build_nllb_language_catalog(None) |
| 206 | 251 | assert resolve_nllb_language_code("ca", cat) == "cat_Latn" | ... | ... |
tests/test_translator_failure_semantics.py
| ... | ... | @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): |
| 197 | 197 | ] |
| 198 | 198 | |
| 199 | 199 | |
| 200 | +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch): | |
| 201 | + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None)) | |
| 202 | + | |
| 203 | + def _fake_create_backend(self, *, name, backend_type, cfg): | |
| 204 | + del self, backend_type, cfg | |
| 205 | + if name == "broken-nllb": | |
| 206 | + raise RuntimeError("broken model dir") | |
| 207 | + | |
| 208 | + class _Backend: | |
| 209 | + model = name | |
| 210 | + | |
| 211 | + @property | |
| 212 | + def supports_batch(self): | |
| 213 | + return True | |
| 214 | + | |
| 215 | + def translate(self, text, target_lang, source_lang=None, scene=None): | |
| 216 | + del target_lang, source_lang, scene | |
| 217 | + return text | |
| 218 | + | |
| 219 | + return _Backend() | |
| 220 | + | |
| 221 | + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend) | |
| 222 | + service = TranslationService( | |
| 223 | + { | |
| 224 | + "service_url": "http://127.0.0.1:6006", | |
| 225 | + "timeout_sec": 10.0, | |
| 226 | + "default_model": "llm", | |
| 227 | + "default_scene": "general", | |
| 228 | + "capabilities": { | |
| 229 | + "llm": { | |
| 230 | + "enabled": True, | |
| 231 | + "backend": "llm", | |
| 232 | + "model": "dummy-llm", | |
| 233 | + "base_url": "https://example.com", | |
| 234 | + "timeout_sec": 10.0, | |
| 235 | + "use_cache": True, | |
| 236 | + }, | |
| 237 | + "broken-nllb": { | |
| 238 | + "enabled": True, | |
| 239 | + "backend": "local_nllb", | |
| 240 | + "model_id": "dummy", | |
| 241 | + "model_dir": "dummy", | |
| 242 | + "device": "cpu", | |
| 243 | + "torch_dtype": "float32", | |
| 244 | + "batch_size": 8, | |
| 245 | + "max_input_length": 16, | |
| 246 | + "max_new_tokens": 16, | |
| 247 | + "num_beams": 1, | |
| 248 | + "use_cache": True, | |
| 249 | + }, | |
| 250 | + }, | |
| 251 | + "cache": { | |
| 252 | + "ttl_seconds": 60, | |
| 253 | + "sliding_expiration": True, | |
| 254 | + }, | |
| 255 | + } | |
| 256 | + ) | |
| 257 | + | |
| 258 | + assert service.available_models == ["llm", "broken-nllb"] | |
| 259 | + assert service.loaded_models == ["llm"] | |
| 260 | + assert service.failed_models == ["broken-nllb"] | |
| 261 | + assert service.backend_errors["broken-nllb"] == "broken model dir" | |
| 262 | + | |
| 263 | + with pytest.raises(RuntimeError, match="failed to initialize"): | |
| 264 | + service.get_backend("broken-nllb") | |
| 265 | + | |
| 266 | + | |
| 200 | 267 | def test_translation_cache_probe_models_order(): |
| 201 | 268 | cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} |
| 202 | 269 | assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] | ... | ... |
translation/backends/local_ctranslate2.py
| ... | ... | @@ -4,9 +4,7 @@ from __future__ import annotations |
| 4 | 4 | |
| 5 | 5 | import logging |
| 6 | 6 | import os |
| 7 | -import shutil | |
| 8 | -import subprocess | |
| 9 | -import sys | |
| 7 | +import json | |
| 10 | 8 | import threading |
| 11 | 9 | from pathlib import Path |
| 12 | 10 | from typing import Dict, List, Optional, Sequence, Union |
| ... | ... | @@ -24,6 +22,7 @@ from translation.text_splitter import ( |
| 24 | 22 | join_translated_segments, |
| 25 | 23 | split_text_for_translation, |
| 26 | 24 | ) |
| 25 | +from translation.ct2_conversion import convert_transformers_model | |
| 27 | 26 | |
| 28 | 27 | logger = logging.getLogger(__name__) |
| 29 | 28 | |
| ... | ... | @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -> str: |
| 76 | 75 | return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") |
| 77 | 76 | |
| 78 | 77 | |
| 79 | -def _resolve_converter_binary() -> str: | |
| 80 | - candidate = shutil.which("ct2-transformers-converter") | |
| 81 | - if candidate: | |
| 82 | - return candidate | |
| 83 | - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" | |
| 84 | - if venv_candidate.exists(): | |
| 85 | - return str(venv_candidate) | |
| 86 | - raise RuntimeError( | |
| 87 | - "ct2-transformers-converter was not found. " | |
| 88 | - "Ensure ctranslate2 is installed in the active translator environment." | |
| 89 | - ) | |
| 78 | +def _detect_local_model_type(model_dir: str) -> Optional[str]: | |
| 79 | + config_path = Path(model_dir).expanduser() / "config.json" | |
| 80 | + if not config_path.exists(): | |
| 81 | + return None | |
| 82 | + try: | |
| 83 | + with open(config_path, "r", encoding="utf-8") as handle: | |
| 84 | + payload = json.load(handle) or {} | |
| 85 | + except Exception as exc: | |
| 86 | + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc) | |
| 87 | + return None | |
| 88 | + model_type = str(payload.get("model_type") or "").strip().lower() | |
| 89 | + return model_type or None | |
| 90 | 90 | |
| 91 | 91 | |
| 92 | 92 | class LocalCTranslate2TranslationBackend: |
| ... | ... | @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: |
| 144 | 144 | self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) |
| 145 | 145 | self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) |
| 146 | 146 | self._tokenizer_lock = threading.Lock() |
| 147 | + self._local_model_source = self._resolve_local_model_source() | |
| 147 | 148 | self._load_runtime() |
| 148 | 149 | |
| 149 | 150 | @property |
| ... | ... | @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: |
| 151 | 152 | return True |
| 152 | 153 | |
| 153 | 154 | def _tokenizer_source(self) -> str: |
| 154 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | |
| 155 | + return self._local_model_source or self.model_id | |
| 155 | 156 | |
| 156 | 157 | def _model_source(self) -> str: |
| 157 | - return self.model_dir if os.path.exists(self.model_dir) else self.model_id | |
| 158 | + return self._local_model_source or self.model_id | |
| 159 | + | |
| 160 | + def _expected_local_model_types(self) -> Optional[set[str]]: | |
| 161 | + return None | |
| 162 | + | |
| 163 | + def _resolve_local_model_source(self) -> Optional[str]: | |
| 164 | + model_path = Path(self.model_dir).expanduser() | |
| 165 | + if not model_path.exists(): | |
| 166 | + return None | |
| 167 | + if not (model_path / "config.json").exists(): | |
| 168 | + logger.warning( | |
| 169 | + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id", | |
| 170 | + self.model, | |
| 171 | + model_path, | |
| 172 | + ) | |
| 173 | + return None | |
| 174 | + | |
| 175 | + expected_types = self._expected_local_model_types() | |
| 176 | + if not expected_types: | |
| 177 | + return str(model_path) | |
| 178 | + | |
| 179 | + detected_type = _detect_local_model_type(str(model_path)) | |
| 180 | + if detected_type is None: | |
| 181 | + return str(model_path) | |
| 182 | + if detected_type in expected_types: | |
| 183 | + return str(model_path) | |
| 184 | + | |
| 185 | + logger.warning( | |
| 186 | + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id", | |
| 187 | + self.model, | |
| 188 | + model_path, | |
| 189 | + detected_type, | |
| 190 | + sorted(expected_types), | |
| 191 | + ) | |
| 192 | + return None | |
| 158 | 193 | |
| 159 | 194 | def _tokenizer_kwargs(self) -> Dict[str, object]: |
| 160 | 195 | return {} |
| ... | ... | @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: |
| 204 | 239 | ) |
| 205 | 240 | |
| 206 | 241 | ct2_path.parent.mkdir(parents=True, exist_ok=True) |
| 207 | - converter = _resolve_converter_binary() | |
| 208 | 242 | logger.info( |
| 209 | 243 | "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", |
| 210 | 244 | self.model, |
| ... | ... | @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: |
| 213 | 247 | self.ct2_conversion_quantization, |
| 214 | 248 | ) |
| 215 | 249 | try: |
| 216 | - subprocess.run( | |
| 217 | - [ | |
| 218 | - converter, | |
| 219 | - "--model", | |
| 220 | - model_source, | |
| 221 | - "--output_dir", | |
| 222 | - str(ct2_path), | |
| 223 | - "--quantization", | |
| 224 | - self.ct2_conversion_quantization, | |
| 225 | - ], | |
| 226 | - check=True, | |
| 227 | - stdout=subprocess.PIPE, | |
| 228 | - stderr=subprocess.PIPE, | |
| 229 | - text=True, | |
| 250 | + convert_transformers_model( | |
| 251 | + model_source, | |
| 252 | + str(ct2_path), | |
| 253 | + self.ct2_conversion_quantization, | |
| 230 | 254 | ) |
| 231 | - except subprocess.CalledProcessError as exc: | |
| 232 | - stderr = exc.stderr.strip() | |
| 255 | + except Exception as exc: | |
| 233 | 256 | raise RuntimeError( |
| 234 | - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}" | |
| 257 | + f"Failed to convert model '{self.model}' to CTranslate2: {exc}" | |
| 235 | 258 | ) from exc |
| 236 | 259 | |
| 237 | 260 | def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: |
| ... | ... | @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 557 | 580 | f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" |
| 558 | 581 | ) |
| 559 | 582 | |
| 583 | + def _expected_local_model_types(self) -> Optional[set[str]]: | |
| 584 | + return {"marian"} | |
| 585 | + | |
| 560 | 586 | |
| 561 | 587 | class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 562 | 588 | """Local backend for NLLB models on CTranslate2.""" |
| ... | ... | @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): |
| 619 | 645 | if resolve_nllb_language_code(target_lang, self.language_codes) is None: |
| 620 | 646 | raise ValueError(f"Unsupported NLLB target language: {target_lang}") |
| 621 | 647 | |
| 648 | + def _expected_local_model_types(self) -> Optional[set[str]]: | |
| 649 | + return {"m2m_100", "nllb_moe"} | |
| 650 | + | |
| 622 | 651 | def _get_tokenizer_for_source(self, source_lang: str): |
| 623 | 652 | src_code = resolve_nllb_language_code(source_lang, self.language_codes) |
| 624 | 653 | if src_code is None: | ... | ... |
| ... | ... | @@ -0,0 +1,52 @@ |
| 1 | +"""Helpers for converting Hugging Face translation models to CTranslate2.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import copy | |
| 6 | +import logging | |
| 7 | + | |
| 8 | +logger = logging.getLogger(__name__) | |
| 9 | + | |
| 10 | + | |
| 11 | +def convert_transformers_model( | |
| 12 | + model_name_or_path: str, | |
| 13 | + output_dir: str, | |
| 14 | + quantization: str, | |
| 15 | + *, | |
| 16 | + force: bool = False, | |
| 17 | +) -> str: | |
| 18 | + from ctranslate2.converters import TransformersConverter | |
| 19 | + from transformers import AutoConfig | |
| 20 | + | |
| 21 | + class _CompatibleTransformersConverter(TransformersConverter): | |
| 22 | + def load_model(self, model_class, resolved_model_name_or_path, **kwargs): | |
| 23 | + try: | |
| 24 | + return super().load_model(model_class, resolved_model_name_or_path, **kwargs) | |
| 25 | + except TypeError as exc: | |
| 26 | + if "unexpected keyword argument 'dtype'" not in str(exc): | |
| 27 | + raise | |
| 28 | + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None: | |
| 29 | + raise | |
| 30 | + | |
| 31 | + logger.warning( | |
| 32 | + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s", | |
| 33 | + resolved_model_name_or_path, | |
| 34 | + getattr(model_class, "__name__", model_class), | |
| 35 | + ) | |
| 36 | + retry_kwargs = dict(kwargs) | |
| 37 | + retry_kwargs.pop("dtype", None) | |
| 38 | + retry_kwargs.pop("torch_dtype", None) | |
| 39 | + config = retry_kwargs.get("config") | |
| 40 | + if config is None: | |
| 41 | + config = AutoConfig.from_pretrained(resolved_model_name_or_path) | |
| 42 | + else: | |
| 43 | + config = copy.deepcopy(config) | |
| 44 | + if hasattr(config, "dtype"): | |
| 45 | + config.dtype = None | |
| 46 | + if hasattr(config, "torch_dtype"): | |
| 47 | + config.torch_dtype = None | |
| 48 | + retry_kwargs["config"] = config | |
| 49 | + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs) | |
| 50 | + | |
| 51 | + converter = _CompatibleTransformersConverter(model_name_or_path) | |
| 52 | + return converter.convert(output_dir=output_dir, quantization=quantization, force=force) | ... | ... |
translation/service.py
| ... | ... | @@ -31,7 +31,12 @@ class TranslationService: |
| 31 | 31 | if not self._enabled_capabilities: |
| 32 | 32 | raise ValueError("No enabled translation backends found in services.translation.capabilities") |
| 33 | 33 | self._translation_cache = TranslationCache(self.config["cache"]) |
| 34 | - self._backends = self._initialize_backends() | |
| 34 | + self._backends: Dict[str, TranslationBackendProtocol] = {} | |
| 35 | + self._backend_errors: Dict[str, str] = {} | |
| 36 | + self._initialize_backends() | |
| 37 | + if not self._backends: | |
| 38 | + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error" | |
| 39 | + raise RuntimeError(f"No translation backends could be initialized: {details}") | |
| 35 | 40 | |
| 36 | 41 | def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: |
| 37 | 42 | enabled: Dict[str, Dict[str, object]] = {} |
| ... | ... | @@ -62,24 +67,47 @@ class TranslationService: |
| 62 | 67 | raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") |
| 63 | 68 | return factory(name=name, cfg=cfg) |
| 64 | 69 | |
| 65 | - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]: | |
| 66 | - backends: Dict[str, TranslationBackendProtocol] = {} | |
| 67 | - for name, capability_cfg in self._enabled_capabilities.items(): | |
| 68 | - backend_type = str(capability_cfg["backend"]) | |
| 69 | - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | |
| 70 | - backends[name] = self._create_backend( | |
| 70 | + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]: | |
| 71 | + capability_cfg = self._enabled_capabilities.get(name) | |
| 72 | + if capability_cfg is None: | |
| 73 | + return None | |
| 74 | + if name in self._backends: | |
| 75 | + return self._backends[name] | |
| 76 | + | |
| 77 | + backend_type = str(capability_cfg["backend"]) | |
| 78 | + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type) | |
| 79 | + try: | |
| 80 | + backend = self._create_backend( | |
| 71 | 81 | name=name, |
| 72 | 82 | backend_type=backend_type, |
| 73 | 83 | cfg=capability_cfg, |
| 74 | 84 | ) |
| 75 | - logger.info( | |
| 76 | - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | |
| 85 | + except Exception as exc: | |
| 86 | + error_text = str(exc).strip() or exc.__class__.__name__ | |
| 87 | + self._backend_errors[name] = error_text | |
| 88 | + logger.error( | |
| 89 | + "Translation backend initialization failed | model=%s backend=%s error=%s", | |
| 77 | 90 | name, |
| 78 | 91 | backend_type, |
| 79 | - bool(capability_cfg.get("use_cache")), | |
| 80 | - getattr(backends[name], "model", name), | |
| 92 | + error_text, | |
| 93 | + exc_info=True, | |
| 81 | 94 | ) |
| 82 | - return backends | |
| 95 | + return None | |
| 96 | + | |
| 97 | + self._backends[name] = backend | |
| 98 | + self._backend_errors.pop(name, None) | |
| 99 | + logger.info( | |
| 100 | + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", | |
| 101 | + name, | |
| 102 | + backend_type, | |
| 103 | + bool(capability_cfg.get("use_cache")), | |
| 104 | + getattr(backend, "model", name), | |
| 105 | + ) | |
| 106 | + return backend | |
| 107 | + | |
| 108 | + def _initialize_backends(self) -> None: | |
| 109 | + for name, capability_cfg in self._enabled_capabilities.items(): | |
| 110 | + self._load_backend(name) | |
| 83 | 111 | |
| 84 | 112 | def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: |
| 85 | 113 | from translation.backends.qwen_mt import QwenMTTranslationBackend |
| ... | ... | @@ -178,13 +206,27 @@ class TranslationService: |
| 178 | 206 | def loaded_models(self) -> List[str]: |
| 179 | 207 | return list(self._backends.keys()) |
| 180 | 208 | |
| 209 | + @property | |
| 210 | + def failed_models(self) -> List[str]: | |
| 211 | + return list(self._backend_errors.keys()) | |
| 212 | + | |
| 213 | + @property | |
| 214 | + def backend_errors(self) -> Dict[str, str]: | |
| 215 | + return dict(self._backend_errors) | |
| 216 | + | |
| 181 | 217 | def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: |
| 182 | 218 | normalized = normalize_translation_model(self.config, model) |
| 183 | - backend = self._backends.get(normalized) | |
| 219 | + backend = self._backends.get(normalized) or self._load_backend(normalized) | |
| 184 | 220 | if backend is None: |
| 185 | - raise ValueError( | |
| 186 | - f"Translation model '{normalized}' is not enabled. " | |
| 187 | - f"Available models: {', '.join(self.available_models) or 'none'}" | |
| 221 | + if normalized not in self._enabled_capabilities: | |
| 222 | + raise ValueError( | |
| 223 | + f"Translation model '{normalized}' is not enabled. " | |
| 224 | + f"Available models: {', '.join(self.available_models) or 'none'}" | |
| 225 | + ) | |
| 226 | + error_text = self._backend_errors.get(normalized) or "unknown initialization error" | |
| 227 | + raise RuntimeError( | |
| 228 | + f"Translation model '{normalized}' failed to initialize: {error_text}. " | |
| 229 | + f"Loaded models: {', '.join(self.loaded_models) or 'none'}" | |
| 188 | 230 | ) |
| 189 | 231 | return backend |
| 190 | 232 | ... | ... |
-
mentioned in commit 89fa3f3ccb7d7815460c21ea52ecca110435d61e