Commit f07947a5d510f29a6f588b84e3734ab5f0310a38

Authored by tangwang
1 parent 0ba0e0fc

Improve portability and harden public frontend search

api/translator_app.py
@@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI):
271 """Initialize all enabled translation backends on process startup.""" 271 """Initialize all enabled translation backends on process startup."""
272 logger.info("Starting Translation Service API") 272 logger.info("Starting Translation Service API")
273 service = get_translation_service() 273 service = get_translation_service()
  274 + failed_models = list(getattr(service, "failed_models", []))
  275 + backend_errors = dict(getattr(service, "backend_errors", {}))
274 logger.info( 276 logger.info(
275 - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", 277 + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s",
276 service.config["default_model"], 278 service.config["default_model"],
277 service.config["default_scene"], 279 service.config["default_scene"],
278 service.available_models, 280 service.available_models,
279 service.loaded_models, 281 service.loaded_models,
  282 + failed_models,
280 ) 283 )
281 logger.info( 284 logger.info(
282 - "Translation backends initialized on startup | models=%s", 285 + "Translation backends initialized on startup | loaded=%s failed=%s",
283 service.loaded_models, 286 service.loaded_models,
  287 + backend_errors,
284 ) 288 )
285 verbose_logger.info( 289 verbose_logger.info(
286 "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", 290 "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
@@ -316,11 +320,14 @@ async def health_check(): @@ -316,11 +320,14 @@ async def health_check():
316 """Health check endpoint.""" 320 """Health check endpoint."""
317 try: 321 try:
318 service = get_translation_service() 322 service = get_translation_service()
  323 + failed_models = list(getattr(service, "failed_models", []))
  324 + backend_errors = dict(getattr(service, "backend_errors", {}))
319 logger.info( 325 logger.info(
320 - "Health check | default_model=%s default_scene=%s loaded_models=%s", 326 + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s",
321 service.config["default_model"], 327 service.config["default_model"],
322 service.config["default_scene"], 328 service.config["default_scene"],
323 service.loaded_models, 329 service.loaded_models,
  330 + failed_models,
324 ) 331 )
325 return { 332 return {
326 "status": "healthy", 333 "status": "healthy",
@@ -330,6 +337,8 @@ async def health_check(): @@ -330,6 +337,8 @@ async def health_check():
330 "available_models": service.available_models, 337 "available_models": service.available_models,
331 "enabled_capabilities": get_enabled_translation_models(service.config), 338 "enabled_capabilities": get_enabled_translation_models(service.config),
332 "loaded_models": service.loaded_models, 339 "loaded_models": service.loaded_models,
  340 + "failed_models": failed_models,
  341 + "backend_errors": backend_errors,
333 } 342 }
334 except Exception as e: 343 except Exception as e:
335 logger.error(f"Health check failed: {e}") 344 logger.error(f"Health check failed: {e}")
@@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request):
463 latency_ms = (time.perf_counter() - request_started) * 1000 472 latency_ms = (time.perf_counter() - request_started) * 1000
464 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) 473 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms)
465 raise HTTPException(status_code=400, detail=str(e)) from e 474 raise HTTPException(status_code=400, detail=str(e)) from e
  475 + except RuntimeError as e:
  476 + latency_ms = (time.perf_counter() - request_started) * 1000
  477 + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms)
  478 + raise HTTPException(status_code=503, detail=str(e)) from e
466 except Exception as e: 479 except Exception as e:
467 latency_ms = (time.perf_counter() - request_started) * 1000 480 latency_ms = (time.perf_counter() - request_started) * 1000
468 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) 481 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
@@ -655,6 +655,14 @@ class AppConfigLoader: @@ -655,6 +655,14 @@ class AppConfigLoader:
655 655
656 translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} 656 translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {}
657 normalized_translation = build_translation_config(translation_raw) 657 normalized_translation = build_translation_config(translation_raw)
  658 + local_translation_backends = {"local_nllb", "local_marian"}
  659 + for capability_name, capability_cfg in normalized_translation["capabilities"].items():
  660 + backend_name = str(capability_cfg.get("backend") or "").strip().lower()
  661 + if backend_name not in local_translation_backends:
  662 + continue
  663 + for path_key in ("model_dir", "ct2_model_dir"):
  664 + if capability_cfg.get(path_key) not in (None, ""):
  665 + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve())
658 translation_config = TranslationServiceConfig( 666 translation_config = TranslationServiceConfig(
659 endpoint=str(normalized_translation["service_url"]).rstrip("/"), 667 endpoint=str(normalized_translation["service_url"]).rstrip("/"),
660 timeout_sec=float(normalized_translation["timeout_sec"]), 668 timeout_sec=float(normalized_translation["timeout_sec"]),
@@ -749,7 +757,7 @@ class AppConfigLoader: @@ -749,7 +757,7 @@ class AppConfigLoader:
749 port=port, 757 port=port,
750 backend=backend_name, 758 backend=backend_name,
751 runtime_dir=( 759 runtime_dir=(
752 - str(v) 760 + str(self._resolve_project_path_value(v).resolve())
753 if (v := instance_raw.get("runtime_dir")) not in (None, "") 761 if (v := instance_raw.get("runtime_dir")) not in (None, "")
754 else None 762 else None
755 ), 763 ),
@@ -787,6 +795,12 @@ class AppConfigLoader: @@ -787,6 +795,12 @@ class AppConfigLoader:
787 rerank=rerank_config, 795 rerank=rerank_config,
788 ) 796 )
789 797
  798 + def _resolve_project_path_value(self, value: Any) -> Path:
  799 + candidate = Path(str(value)).expanduser()
  800 + if candidate.is_absolute():
  801 + return candidate
  802 + return self.project_root / candidate
  803 +
790 def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: 804 def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig:
791 if not isinstance(raw, dict): 805 if not isinstance(raw, dict):
792 raise ConfigurationError("tenant_config must be a mapping") 806 raise ConfigurationError("tenant_config must be a mapping")
frontend/static/js/app.js
@@ -316,7 +316,10 @@ async function performSearch(page = 1) { @@ -316,7 +316,10 @@ async function performSearch(page = 1) {
316 document.getElementById('productGrid').innerHTML = ''; 316 document.getElementById('productGrid').innerHTML = '';
317 317
318 try { 318 try {
319 - const response = await fetch(`${API_BASE_URL}/search/`, { 319 + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin);
  320 + searchUrl.searchParams.set('tenant_id', tenantId);
  321 +
  322 + const response = await fetch(searchUrl.toString(), {
320 method: 'POST', 323 method: 'POST',
321 headers: { 324 headers: {
322 'Content-Type': 'application/json', 325 'Content-Type': 'application/json',
requirements_translator_service.txt
@@ -13,7 +13,8 @@ httpx>=0.24.0 @@ -13,7 +13,8 @@ httpx>=0.24.0
13 tqdm>=4.65.0 13 tqdm>=4.65.0
14 14
15 torch>=2.0.0 15 torch>=2.0.0
16 -transformers>=4.30.0 16 +# Keep translator conversions on the last verified NLLB-compatible release line.
  17 +transformers>=4.51.0,<4.52.0
17 ctranslate2>=4.7.0 18 ctranslate2>=4.7.0
18 sentencepiece>=0.2.0 19 sentencepiece>=0.2.0
19 sacremoses>=0.1.1 20 sacremoses>=0.1.1
scripts/download_translation_models.py 0 โ†’ 100644
@@ -0,0 +1,12 @@ @@ -0,0 +1,12 @@
  1 +#!/usr/bin/env python3
  2 +"""Backward-compatible entrypoint for translation model downloads."""
  3 +
  4 +from __future__ import annotations
  5 +
  6 +import runpy
  7 +from pathlib import Path
  8 +
  9 +
  10 +if __name__ == "__main__":
  11 + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py"
  12 + runpy.run_path(str(target), run_name="__main__")
scripts/frontend/frontend_server.py
@@ -60,6 +60,8 @@ class RateLimitingMixin: @@ -60,6 +60,8 @@ class RateLimitingMixin:
60 class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): 60 class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
61 """Custom request handler with CORS support and robust error handling.""" 61 """Custom request handler with CORS support and robust error handling."""
62 62
  63 + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer"
  64 +
63 def _is_proxy_path(self, path: str) -> bool: 65 def _is_proxy_path(self, path: str) -> bool:
64 """Return True for API paths that should be forwarded to backend service.""" 66 """Return True for API paths that should be forwarded to backend service."""
65 return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') 67 return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
@@ -220,7 +222,7 @@ class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMix @@ -220,7 +222,7 @@ class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMix
220 # Add CORS headers 222 # Add CORS headers
221 self.send_header('Access-Control-Allow-Origin', '*') 223 self.send_header('Access-Control-Allow-Origin', '*')
222 self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') 224 self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
223 - self.send_header('Access-Control-Allow-Headers', 'Content-Type') 225 + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS)
224 # Add security headers 226 # Add security headers
225 self.send_header('X-Content-Type-Options', 'nosniff') 227 self.send_header('X-Content-Type-Options', 'nosniff')
226 self.send_header('X-Frame-Options', 'DENY') 228 self.send_header('X-Frame-Options', 'DENY')
scripts/setup_translator_venv.sh
@@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot; @@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot;
8 cd "${PROJECT_ROOT}" 8 cd "${PROJECT_ROOT}"
9 9
10 VENV_DIR="${PROJECT_ROOT}/.venv-translator" 10 VENV_DIR="${PROJECT_ROOT}/.venv-translator"
11 -PYTHON_BIN="${PYTHON_BIN:-python3}"  
12 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" 11 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}"
  12 +MIN_PYTHON_MAJOR=3
  13 +MIN_PYTHON_MINOR=10
  14 +
  15 +python_meets_minimum() {
  16 + local bin="$1"
  17 + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}"
  18 +import sys
  19 +
  20 +required = tuple(int(value) for value in sys.argv[1:])
  21 +sys.exit(0 if sys.version_info[:2] >= required else 1)
  22 +PY
  23 +}
  24 +
  25 +discover_python_bin() {
  26 + local candidates=()
  27 +
  28 + if [[ -n "${PYTHON_BIN:-}" ]]; then
  29 + candidates+=("${PYTHON_BIN}")
  30 + fi
  31 + candidates+=("python3.12" "python3.11" "python3.10" "python3")
  32 +
  33 + local candidate
  34 + for candidate in "${candidates[@]}"; do
  35 + if ! command -v "${candidate}" >/dev/null 2>&1; then
  36 + continue
  37 + fi
  38 + if python_meets_minimum "${candidate}"; then
  39 + echo "${candidate}"
  40 + return 0
  41 + fi
  42 + done
  43 +
  44 + return 1
  45 +}
  46 +
  47 +if ! PYTHON_BIN="$(discover_python_bin)"; then
  48 + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2
  49 + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2
  50 + exit 1
  51 +fi
13 52
14 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then 53 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
15 echo "ERROR: python not found: ${PYTHON_BIN}" >&2 54 echo "ERROR: python not found: ${PYTHON_BIN}" >&2
@@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot; @@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot;
32 export TMPDIR="${TMP_DIR}" 71 export TMPDIR="${TMP_DIR}"
33 PIP_ARGS=(--no-cache-dir) 72 PIP_ARGS=(--no-cache-dir)
34 73
  74 +echo "Using Python=${PYTHON_BIN}"
35 echo "Using TMPDIR=${TMPDIR}" 75 echo "Using TMPDIR=${TMPDIR}"
36 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel 76 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel
37 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt 77 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt
scripts/translation/download_translation_models.py
@@ -6,8 +6,6 @@ from __future__ import annotations @@ -6,8 +6,6 @@ from __future__ import annotations
6 import argparse 6 import argparse
7 import os 7 import os
8 from pathlib import Path 8 from pathlib import Path
9 -import shutil  
10 -import subprocess  
11 import sys 9 import sys
12 from typing import Iterable 10 from typing import Iterable
13 11
@@ -19,6 +17,7 @@ if str(PROJECT_ROOT) not in sys.path: @@ -19,6 +17,7 @@ if str(PROJECT_ROOT) not in sys.path:
19 os.environ.setdefault("HF_HUB_DISABLE_XET", "1") 17 os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
20 18
21 from config.services_config import get_translation_config 19 from config.services_config import get_translation_config
  20 +from translation.ct2_conversion import convert_transformers_model
22 21
23 22
24 LOCAL_BACKENDS = {"local_nllb", "local_marian"} 23 LOCAL_BACKENDS = {"local_nllb", "local_marian"}
@@ -46,19 +45,6 @@ def _compute_ct2_output_dir(capability: dict) -&gt; Path: @@ -46,19 +45,6 @@ def _compute_ct2_output_dir(capability: dict) -&gt; Path:
46 return model_dir / f"ctranslate2-{normalized}" 45 return model_dir / f"ctranslate2-{normalized}"
47 46
48 47
49 -def _resolve_converter_binary() -> str:  
50 - candidate = shutil.which("ct2-transformers-converter")  
51 - if candidate:  
52 - return candidate  
53 - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"  
54 - if venv_candidate.exists():  
55 - return str(venv_candidate)  
56 - raise RuntimeError(  
57 - "ct2-transformers-converter was not found. "  
58 - "Install ctranslate2 in the active Python environment first."  
59 - )  
60 -  
61 -  
62 def convert_to_ctranslate2(name: str, capability: dict) -> None: 48 def convert_to_ctranslate2(name: str, capability: dict) -> None:
63 model_id = str(capability.get("model_id") or "").strip() 49 model_id = str(capability.get("model_id") or "").strip()
64 model_dir = Path(str(capability.get("model_dir") or "")).expanduser() 50 model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
@@ -75,18 +61,7 @@ def convert_to_ctranslate2(name: str, capability: dict) -&gt; None: @@ -75,18 +61,7 @@ def convert_to_ctranslate2(name: str, capability: dict) -&gt; None:
75 ).strip() 61 ).strip()
76 output_dir.parent.mkdir(parents=True, exist_ok=True) 62 output_dir.parent.mkdir(parents=True, exist_ok=True)
77 print(f"[convert] {name} -> {output_dir} ({quantization})") 63 print(f"[convert] {name} -> {output_dir} ({quantization})")
78 - subprocess.run(  
79 - [  
80 - _resolve_converter_binary(),  
81 - "--model",  
82 - model_source,  
83 - "--output_dir",  
84 - str(output_dir),  
85 - "--quantization",  
86 - quantization,  
87 - ],  
88 - check=True,  
89 - ) 64 + convert_transformers_model(model_source, str(output_dir), quantization)
90 print(f"[converted] {name}") 65 print(f"[converted] {name}")
91 66
92 67
tests/test_translation_converter_resolution.py 0 โ†’ 100644
@@ -0,0 +1,85 @@ @@ -0,0 +1,85 @@
  1 +from __future__ import annotations
  2 +
  3 +import sys
  4 +import types
  5 +
  6 +import pytest
  7 +
  8 +import translation.ct2_conversion as ct2_conversion
  9 +
  10 +
  11 +class _FakeTransformersConverter:
  12 + def __init__(self, model_name_or_path):
  13 + self.model_name_or_path = model_name_or_path
  14 + self.load_calls = []
  15 +
  16 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  17 + self.load_calls.append(
  18 + {
  19 + "model_class": model_class,
  20 + "resolved_model_name_or_path": resolved_model_name_or_path,
  21 + "kwargs": dict(kwargs),
  22 + }
  23 + )
  24 + if "dtype" in kwargs or "torch_dtype" in kwargs:
  25 + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'")
  26 + return {"loaded": True, "path": resolved_model_name_or_path}
  27 +
  28 + def convert(self, output_dir, quantization=None, force=False):
  29 + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32")
  30 + return {
  31 + "loaded": loaded,
  32 + "output_dir": output_dir,
  33 + "quantization": quantization,
  34 + "force": force,
  35 + "load_calls": list(self.load_calls),
  36 + }
  37 +
  38 +
  39 +def _install_fake_ctranslate2(monkeypatch, base_converter):
  40 + converters_module = types.ModuleType("ctranslate2.converters")
  41 + converters_module.TransformersConverter = base_converter
  42 + ctranslate2_module = types.ModuleType("ctranslate2")
  43 + ctranslate2_module.converters = converters_module
  44 +
  45 + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module)
  46 + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module)
  47 +
  48 +
  49 +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch):
  50 + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter)
  51 + fake_transformers = types.ModuleType("transformers")
  52 + fake_transformers.AutoConfig = types.SimpleNamespace(
  53 + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path)
  54 + )
  55 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  56 +
  57 + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
  58 +
  59 + assert result["loaded"] == {"loaded": True, "path": "fake-model"}
  60 + assert result["output_dir"] == "/tmp/out"
  61 + assert result["quantization"] == "float16"
  62 + assert result["force"] is False
  63 + assert len(result["load_calls"]) == 2
  64 + assert result["load_calls"][0] == {
  65 + "model_class": "FakeModel",
  66 + "resolved_model_name_or_path": "fake-model",
  67 + "kwargs": {"dtype": "float32"},
  68 + }
  69 + assert result["load_calls"][1]["model_class"] == "FakeModel"
  70 + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model"
  71 + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None
  72 +
  73 +
  74 +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch):
  75 + class _AlwaysFailingConverter(_FakeTransformersConverter):
  76 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  77 + raise TypeError("different constructor error")
  78 +
  79 + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter)
  80 + fake_transformers = types.ModuleType("transformers")
  81 + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path))
  82 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  83 +
  84 + with pytest.raises(TypeError, match="different constructor error"):
  85 + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
tests/test_translation_local_backends.py
@@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch):
201 assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] 201 assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]]
202 202
203 203
  204 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch):
  205 + wrong_dir = tmp_path / "wrong-nllb"
  206 + wrong_dir.mkdir()
  207 + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8")
  208 +
  209 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  210 +
  211 + backend = NLLBCTranslate2TranslationBackend(
  212 + name="nllb-200-distilled-600m",
  213 + model_id="facebook/nllb-200-distilled-600M",
  214 + model_dir=str(wrong_dir),
  215 + device="cpu",
  216 + torch_dtype="float32",
  217 + batch_size=1,
  218 + max_input_length=16,
  219 + max_new_tokens=16,
  220 + num_beams=1,
  221 + )
  222 +
  223 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  224 + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M"
  225 +
  226 +
  227 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch):
  228 + incomplete_dir = tmp_path / "incomplete-nllb"
  229 + incomplete_dir.mkdir()
  230 + (incomplete_dir / "ctranslate2-float16").mkdir()
  231 +
  232 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  233 +
  234 + backend = NLLBCTranslate2TranslationBackend(
  235 + name="nllb-200-distilled-600m",
  236 + model_id="facebook/nllb-200-distilled-600M",
  237 + model_dir=str(incomplete_dir),
  238 + device="cpu",
  239 + torch_dtype="float32",
  240 + batch_size=1,
  241 + max_input_length=16,
  242 + max_new_tokens=16,
  243 + num_beams=1,
  244 + )
  245 +
  246 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  247 +
  248 +
204 def test_nllb_resolves_flores_short_tags_and_iso_no(): 249 def test_nllb_resolves_flores_short_tags_and_iso_no():
205 cat = build_nllb_language_catalog(None) 250 cat = build_nllb_language_catalog(None)
206 assert resolve_nllb_language_code("ca", cat) == "cat_Latn" 251 assert resolve_nllb_language_code("ca", cat) == "cat_Latn"
tests/test_translator_failure_semantics.py
@@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog):
197 ] 197 ]
198 198
199 199
  200 +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch):
  201 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  202 +
  203 + def _fake_create_backend(self, *, name, backend_type, cfg):
  204 + del self, backend_type, cfg
  205 + if name == "broken-nllb":
  206 + raise RuntimeError("broken model dir")
  207 +
  208 + class _Backend:
  209 + model = name
  210 +
  211 + @property
  212 + def supports_batch(self):
  213 + return True
  214 +
  215 + def translate(self, text, target_lang, source_lang=None, scene=None):
  216 + del target_lang, source_lang, scene
  217 + return text
  218 +
  219 + return _Backend()
  220 +
  221 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  222 + service = TranslationService(
  223 + {
  224 + "service_url": "http://127.0.0.1:6006",
  225 + "timeout_sec": 10.0,
  226 + "default_model": "llm",
  227 + "default_scene": "general",
  228 + "capabilities": {
  229 + "llm": {
  230 + "enabled": True,
  231 + "backend": "llm",
  232 + "model": "dummy-llm",
  233 + "base_url": "https://example.com",
  234 + "timeout_sec": 10.0,
  235 + "use_cache": True,
  236 + },
  237 + "broken-nllb": {
  238 + "enabled": True,
  239 + "backend": "local_nllb",
  240 + "model_id": "dummy",
  241 + "model_dir": "dummy",
  242 + "device": "cpu",
  243 + "torch_dtype": "float32",
  244 + "batch_size": 8,
  245 + "max_input_length": 16,
  246 + "max_new_tokens": 16,
  247 + "num_beams": 1,
  248 + "use_cache": True,
  249 + },
  250 + },
  251 + "cache": {
  252 + "ttl_seconds": 60,
  253 + "sliding_expiration": True,
  254 + },
  255 + }
  256 + )
  257 +
  258 + assert service.available_models == ["llm", "broken-nllb"]
  259 + assert service.loaded_models == ["llm"]
  260 + assert service.failed_models == ["broken-nllb"]
  261 + assert service.backend_errors["broken-nllb"] == "broken model dir"
  262 +
  263 + with pytest.raises(RuntimeError, match="failed to initialize"):
  264 + service.get_backend("broken-nllb")
  265 +
  266 +
200 def test_translation_cache_probe_models_order(): 267 def test_translation_cache_probe_models_order():
201 cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} 268 cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}}
202 assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] 269 assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"]
translation/backends/local_ctranslate2.py
@@ -4,9 +4,7 @@ from __future__ import annotations @@ -4,9 +4,7 @@ from __future__ import annotations
4 4
5 import logging 5 import logging
6 import os 6 import os
7 -import shutil  
8 -import subprocess  
9 -import sys 7 +import json
10 import threading 8 import threading
11 from pathlib import Path 9 from pathlib import Path
12 from typing import Dict, List, Optional, Sequence, Union 10 from typing import Dict, List, Optional, Sequence, Union
@@ -24,6 +22,7 @@ from translation.text_splitter import ( @@ -24,6 +22,7 @@ from translation.text_splitter import (
24 join_translated_segments, 22 join_translated_segments,
25 split_text_for_translation, 23 split_text_for_translation,
26 ) 24 )
  25 +from translation.ct2_conversion import convert_transformers_model
27 26
28 logger = logging.getLogger(__name__) 27 logger = logging.getLogger(__name__)
29 28
@@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str: @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str:
76 return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") 75 return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}")
77 76
78 77
79 -def _resolve_converter_binary() -> str:  
80 - candidate = shutil.which("ct2-transformers-converter")  
81 - if candidate:  
82 - return candidate  
83 - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"  
84 - if venv_candidate.exists():  
85 - return str(venv_candidate)  
86 - raise RuntimeError(  
87 - "ct2-transformers-converter was not found. "  
88 - "Ensure ctranslate2 is installed in the active translator environment."  
89 - ) 78 +def _detect_local_model_type(model_dir: str) -> Optional[str]:
  79 + config_path = Path(model_dir).expanduser() / "config.json"
  80 + if not config_path.exists():
  81 + return None
  82 + try:
  83 + with open(config_path, "r", encoding="utf-8") as handle:
  84 + payload = json.load(handle) or {}
  85 + except Exception as exc:
  86 + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc)
  87 + return None
  88 + model_type = str(payload.get("model_type") or "").strip().lower()
  89 + return model_type or None
90 90
91 91
92 class LocalCTranslate2TranslationBackend: 92 class LocalCTranslate2TranslationBackend:
@@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend:
144 self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) 144 self.ct2_decoding_length_extra = int(ct2_decoding_length_extra)
145 self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) 145 self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min))
146 self._tokenizer_lock = threading.Lock() 146 self._tokenizer_lock = threading.Lock()
  147 + self._local_model_source = self._resolve_local_model_source()
147 self._load_runtime() 148 self._load_runtime()
148 149
149 @property 150 @property
@@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend:
151 return True 152 return True
152 153
153 def _tokenizer_source(self) -> str: 154 def _tokenizer_source(self) -> str:
154 - return self.model_dir if os.path.exists(self.model_dir) else self.model_id 155 + return self._local_model_source or self.model_id
155 156
156 def _model_source(self) -> str: 157 def _model_source(self) -> str:
157 - return self.model_dir if os.path.exists(self.model_dir) else self.model_id 158 + return self._local_model_source or self.model_id
  159 +
  160 + def _expected_local_model_types(self) -> Optional[set[str]]:
  161 + return None
  162 +
  163 + def _resolve_local_model_source(self) -> Optional[str]:
  164 + model_path = Path(self.model_dir).expanduser()
  165 + if not model_path.exists():
  166 + return None
  167 + if not (model_path / "config.json").exists():
  168 + logger.warning(
  169 + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id",
  170 + self.model,
  171 + model_path,
  172 + )
  173 + return None
  174 +
  175 + expected_types = self._expected_local_model_types()
  176 + if not expected_types:
  177 + return str(model_path)
  178 +
  179 + detected_type = _detect_local_model_type(str(model_path))
  180 + if detected_type is None:
  181 + return str(model_path)
  182 + if detected_type in expected_types:
  183 + return str(model_path)
  184 +
  185 + logger.warning(
  186 + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id",
  187 + self.model,
  188 + model_path,
  189 + detected_type,
  190 + sorted(expected_types),
  191 + )
  192 + return None
158 193
159 def _tokenizer_kwargs(self) -> Dict[str, object]: 194 def _tokenizer_kwargs(self) -> Dict[str, object]:
160 return {} 195 return {}
@@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend:
204 ) 239 )
205 240
206 ct2_path.parent.mkdir(parents=True, exist_ok=True) 241 ct2_path.parent.mkdir(parents=True, exist_ok=True)
207 - converter = _resolve_converter_binary()  
208 logger.info( 242 logger.info(
209 "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", 243 "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s",
210 self.model, 244 self.model,
@@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend:
213 self.ct2_conversion_quantization, 247 self.ct2_conversion_quantization,
214 ) 248 )
215 try: 249 try:
216 - subprocess.run(  
217 - [  
218 - converter,  
219 - "--model",  
220 - model_source,  
221 - "--output_dir",  
222 - str(ct2_path),  
223 - "--quantization",  
224 - self.ct2_conversion_quantization,  
225 - ],  
226 - check=True,  
227 - stdout=subprocess.PIPE,  
228 - stderr=subprocess.PIPE,  
229 - text=True, 250 + convert_transformers_model(
  251 + model_source,
  252 + str(ct2_path),
  253 + self.ct2_conversion_quantization,
230 ) 254 )
231 - except subprocess.CalledProcessError as exc:  
232 - stderr = exc.stderr.strip() 255 + except Exception as exc:
233 raise RuntimeError( 256 raise RuntimeError(
234 - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}" 257 + f"Failed to convert model '{self.model}' to CTranslate2: {exc}"
235 ) from exc 258 ) from exc
236 259
237 def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: 260 def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]:
@@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
557 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" 580 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}"
558 ) 581 )
559 582
  583 + def _expected_local_model_types(self) -> Optional[set[str]]:
  584 + return {"marian"}
  585 +
560 586
561 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): 587 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
562 """Local backend for NLLB models on CTranslate2.""" 588 """Local backend for NLLB models on CTranslate2."""
@@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
619 if resolve_nllb_language_code(target_lang, self.language_codes) is None: 645 if resolve_nllb_language_code(target_lang, self.language_codes) is None:
620 raise ValueError(f"Unsupported NLLB target language: {target_lang}") 646 raise ValueError(f"Unsupported NLLB target language: {target_lang}")
621 647
  648 + def _expected_local_model_types(self) -> Optional[set[str]]:
  649 + return {"m2m_100", "nllb_moe"}
  650 +
622 def _get_tokenizer_for_source(self, source_lang: str): 651 def _get_tokenizer_for_source(self, source_lang: str):
623 src_code = resolve_nllb_language_code(source_lang, self.language_codes) 652 src_code = resolve_nllb_language_code(source_lang, self.language_codes)
624 if src_code is None: 653 if src_code is None:
translation/ct2_conversion.py 0 โ†’ 100644
@@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
  1 +"""Helpers for converting Hugging Face translation models to CTranslate2."""
  2 +
  3 +from __future__ import annotations
  4 +
  5 +import copy
  6 +import logging
  7 +
  8 +logger = logging.getLogger(__name__)
  9 +
  10 +
  11 +def convert_transformers_model(
  12 + model_name_or_path: str,
  13 + output_dir: str,
  14 + quantization: str,
  15 + *,
  16 + force: bool = False,
  17 +) -> str:
  18 + from ctranslate2.converters import TransformersConverter
  19 + from transformers import AutoConfig
  20 +
  21 + class _CompatibleTransformersConverter(TransformersConverter):
  22 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  23 + try:
  24 + return super().load_model(model_class, resolved_model_name_or_path, **kwargs)
  25 + except TypeError as exc:
  26 + if "unexpected keyword argument 'dtype'" not in str(exc):
  27 + raise
  28 + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None:
  29 + raise
  30 +
  31 + logger.warning(
  32 + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s",
  33 + resolved_model_name_or_path,
  34 + getattr(model_class, "__name__", model_class),
  35 + )
  36 + retry_kwargs = dict(kwargs)
  37 + retry_kwargs.pop("dtype", None)
  38 + retry_kwargs.pop("torch_dtype", None)
  39 + config = retry_kwargs.get("config")
  40 + if config is None:
  41 + config = AutoConfig.from_pretrained(resolved_model_name_or_path)
  42 + else:
  43 + config = copy.deepcopy(config)
  44 + if hasattr(config, "dtype"):
  45 + config.dtype = None
  46 + if hasattr(config, "torch_dtype"):
  47 + config.torch_dtype = None
  48 + retry_kwargs["config"] = config
  49 + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs)
  50 +
  51 + converter = _CompatibleTransformersConverter(model_name_or_path)
  52 + return converter.convert(output_dir=output_dir, quantization=quantization, force=force)
translation/service.py
@@ -31,7 +31,12 @@ class TranslationService: @@ -31,7 +31,12 @@ class TranslationService:
31 if not self._enabled_capabilities: 31 if not self._enabled_capabilities:
32 raise ValueError("No enabled translation backends found in services.translation.capabilities") 32 raise ValueError("No enabled translation backends found in services.translation.capabilities")
33 self._translation_cache = TranslationCache(self.config["cache"]) 33 self._translation_cache = TranslationCache(self.config["cache"])
34 - self._backends = self._initialize_backends() 34 + self._backends: Dict[str, TranslationBackendProtocol] = {}
  35 + self._backend_errors: Dict[str, str] = {}
  36 + self._initialize_backends()
  37 + if not self._backends:
  38 + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error"
  39 + raise RuntimeError(f"No translation backends could be initialized: {details}")
35 40
36 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: 41 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
37 enabled: Dict[str, Dict[str, object]] = {} 42 enabled: Dict[str, Dict[str, object]] = {}
@@ -62,24 +67,47 @@ class TranslationService: @@ -62,24 +67,47 @@ class TranslationService:
62 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") 67 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
63 return factory(name=name, cfg=cfg) 68 return factory(name=name, cfg=cfg)
64 69
65 - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:  
66 - backends: Dict[str, TranslationBackendProtocol] = {}  
67 - for name, capability_cfg in self._enabled_capabilities.items():  
68 - backend_type = str(capability_cfg["backend"])  
69 - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)  
70 - backends[name] = self._create_backend( 70 + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]:
  71 + capability_cfg = self._enabled_capabilities.get(name)
  72 + if capability_cfg is None:
  73 + return None
  74 + if name in self._backends:
  75 + return self._backends[name]
  76 +
  77 + backend_type = str(capability_cfg["backend"])
  78 + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
  79 + try:
  80 + backend = self._create_backend(
71 name=name, 81 name=name,
72 backend_type=backend_type, 82 backend_type=backend_type,
73 cfg=capability_cfg, 83 cfg=capability_cfg,
74 ) 84 )
75 - logger.info(  
76 - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", 85 + except Exception as exc:
  86 + error_text = str(exc).strip() or exc.__class__.__name__
  87 + self._backend_errors[name] = error_text
  88 + logger.error(
  89 + "Translation backend initialization failed | model=%s backend=%s error=%s",
77 name, 90 name,
78 backend_type, 91 backend_type,
79 - bool(capability_cfg.get("use_cache")),  
80 - getattr(backends[name], "model", name), 92 + error_text,
  93 + exc_info=True,
81 ) 94 )
82 - return backends 95 + return None
  96 +
  97 + self._backends[name] = backend
  98 + self._backend_errors.pop(name, None)
  99 + logger.info(
  100 + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  101 + name,
  102 + backend_type,
  103 + bool(capability_cfg.get("use_cache")),
  104 + getattr(backend, "model", name),
  105 + )
  106 + return backend
  107 +
  108 + def _initialize_backends(self) -> None:
  109 + for name, capability_cfg in self._enabled_capabilities.items():
  110 + self._load_backend(name)
83 111
84 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: 112 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
85 from translation.backends.qwen_mt import QwenMTTranslationBackend 113 from translation.backends.qwen_mt import QwenMTTranslationBackend
@@ -178,13 +206,27 @@ class TranslationService: @@ -178,13 +206,27 @@ class TranslationService:
178 def loaded_models(self) -> List[str]: 206 def loaded_models(self) -> List[str]:
179 return list(self._backends.keys()) 207 return list(self._backends.keys())
180 208
  209 + @property
  210 + def failed_models(self) -> List[str]:
  211 + return list(self._backend_errors.keys())
  212 +
  213 + @property
  214 + def backend_errors(self) -> Dict[str, str]:
  215 + return dict(self._backend_errors)
  216 +
181 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: 217 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
182 normalized = normalize_translation_model(self.config, model) 218 normalized = normalize_translation_model(self.config, model)
183 - backend = self._backends.get(normalized) 219 + backend = self._backends.get(normalized) or self._load_backend(normalized)
184 if backend is None: 220 if backend is None:
185 - raise ValueError(  
186 - f"Translation model '{normalized}' is not enabled. "  
187 - f"Available models: {', '.join(self.available_models) or 'none'}" 221 + if normalized not in self._enabled_capabilities:
  222 + raise ValueError(
  223 + f"Translation model '{normalized}' is not enabled. "
  224 + f"Available models: {', '.join(self.available_models) or 'none'}"
  225 + )
  226 + error_text = self._backend_errors.get(normalized) or "unknown initialization error"
  227 + raise RuntimeError(
  228 + f"Translation model '{normalized}' failed to initialize: {error_text}. "
  229 + f"Loaded models: {', '.join(self.loaded_models) or 'none'}"
188 ) 230 )
189 return backend 231 return backend
190 232