Commit f07947a5d510f29a6f588b84e3734ab5f0310a38

Authored by tangwang
1 parent 0ba0e0fc

Improve portability and harden public frontend search

api/translator_app.py
... ... @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI):
271 271 """Initialize all enabled translation backends on process startup."""
272 272 logger.info("Starting Translation Service API")
273 273 service = get_translation_service()
  274 + failed_models = list(getattr(service, "failed_models", []))
  275 + backend_errors = dict(getattr(service, "backend_errors", {}))
274 276 logger.info(
275   - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s",
  277 + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s",
276 278 service.config["default_model"],
277 279 service.config["default_scene"],
278 280 service.available_models,
279 281 service.loaded_models,
  282 + failed_models,
280 283 )
281 284 logger.info(
282   - "Translation backends initialized on startup | models=%s",
  285 + "Translation backends initialized on startup | loaded=%s failed=%s",
283 286 service.loaded_models,
  287 + backend_errors,
284 288 )
285 289 verbose_logger.info(
286 290 "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
... ... @@ -316,11 +320,14 @@ async def health_check():
316 320 """Health check endpoint."""
317 321 try:
318 322 service = get_translation_service()
  323 + failed_models = list(getattr(service, "failed_models", []))
  324 + backend_errors = dict(getattr(service, "backend_errors", {}))
319 325 logger.info(
320   - "Health check | default_model=%s default_scene=%s loaded_models=%s",
  326 + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s",
321 327 service.config["default_model"],
322 328 service.config["default_scene"],
323 329 service.loaded_models,
  330 + failed_models,
324 331 )
325 332 return {
326 333 "status": "healthy",
... ... @@ -330,6 +337,8 @@ async def health_check():
330 337 "available_models": service.available_models,
331 338 "enabled_capabilities": get_enabled_translation_models(service.config),
332 339 "loaded_models": service.loaded_models,
  340 + "failed_models": failed_models,
  341 + "backend_errors": backend_errors,
333 342 }
334 343 except Exception as e:
335 344 logger.error(f"Health check failed: {e}")
... ... @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request):
463 472 latency_ms = (time.perf_counter() - request_started) * 1000
464 473 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms)
465 474 raise HTTPException(status_code=400, detail=str(e)) from e
  475 + except RuntimeError as e:
  476 + latency_ms = (time.perf_counter() - request_started) * 1000
  477 + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms)
  478 + raise HTTPException(status_code=503, detail=str(e)) from e
466 479 except Exception as e:
467 480 latency_ms = (time.perf_counter() - request_started) * 1000
468 481 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
... ...
config/loader.py
... ... @@ -655,6 +655,14 @@ class AppConfigLoader:
655 655  
656 656 translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {}
657 657 normalized_translation = build_translation_config(translation_raw)
  658 + local_translation_backends = {"local_nllb", "local_marian"}
  659 + for capability_name, capability_cfg in normalized_translation["capabilities"].items():
  660 + backend_name = str(capability_cfg.get("backend") or "").strip().lower()
  661 + if backend_name not in local_translation_backends:
  662 + continue
  663 + for path_key in ("model_dir", "ct2_model_dir"):
  664 + if capability_cfg.get(path_key) not in (None, ""):
  665 + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve())
658 666 translation_config = TranslationServiceConfig(
659 667 endpoint=str(normalized_translation["service_url"]).rstrip("/"),
660 668 timeout_sec=float(normalized_translation["timeout_sec"]),
... ... @@ -749,7 +757,7 @@ class AppConfigLoader:
749 757 port=port,
750 758 backend=backend_name,
751 759 runtime_dir=(
752   - str(v)
  760 + str(self._resolve_project_path_value(v).resolve())
753 761 if (v := instance_raw.get("runtime_dir")) not in (None, "")
754 762 else None
755 763 ),
... ... @@ -787,6 +795,12 @@ class AppConfigLoader:
787 795 rerank=rerank_config,
788 796 )
789 797  
  798 + def _resolve_project_path_value(self, value: Any) -> Path:
  799 + candidate = Path(str(value)).expanduser()
  800 + if candidate.is_absolute():
  801 + return candidate
  802 + return self.project_root / candidate
  803 +
790 804 def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig:
791 805 if not isinstance(raw, dict):
792 806 raise ConfigurationError("tenant_config must be a mapping")
... ...
frontend/static/js/app.js
... ... @@ -316,7 +316,10 @@ async function performSearch(page = 1) {
316 316 document.getElementById('productGrid').innerHTML = '';
317 317  
318 318 try {
319   - const response = await fetch(`${API_BASE_URL}/search/`, {
  319 + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin);
  320 + searchUrl.searchParams.set('tenant_id', tenantId);
  321 +
  322 + const response = await fetch(searchUrl.toString(), {
320 323 method: 'POST',
321 324 headers: {
322 325 'Content-Type': 'application/json',
... ...
requirements_translator_service.txt
... ... @@ -13,7 +13,8 @@ httpx>=0.24.0
13 13 tqdm>=4.65.0
14 14  
15 15 torch>=2.0.0
16   -transformers>=4.30.0
  16 +# Keep translator conversions on the last verified NLLB-compatible release line.
  17 +transformers>=4.51.0,<4.52.0
17 18 ctranslate2>=4.7.0
18 19 sentencepiece>=0.2.0
19 20 sacremoses>=0.1.1
... ...
scripts/download_translation_models.py 0 โ†’ 100644
... ... @@ -0,0 +1,12 @@
  1 +#!/usr/bin/env python3
  2 +"""Backward-compatible entrypoint for translation model downloads."""
  3 +
  4 +from __future__ import annotations
  5 +
  6 +import runpy
  7 +from pathlib import Path
  8 +
  9 +
  10 +if __name__ == "__main__":
  11 + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py"
  12 + runpy.run_path(str(target), run_name="__main__")
... ...
scripts/frontend/frontend_server.py
... ... @@ -60,6 +60,8 @@ class RateLimitingMixin:
60 60 class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
61 61 """Custom request handler with CORS support and robust error handling."""
62 62  
  63 + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer"
  64 +
63 65 def _is_proxy_path(self, path: str) -> bool:
64 66 """Return True for API paths that should be forwarded to backend service."""
65 67 return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
... ... @@ -220,7 +222,7 @@ class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMix
220 222 # Add CORS headers
221 223 self.send_header('Access-Control-Allow-Origin', '*')
222 224 self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
223   - self.send_header('Access-Control-Allow-Headers', 'Content-Type')
  225 + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS)
224 226 # Add security headers
225 227 self.send_header('X-Content-Type-Options', 'nosniff')
226 228 self.send_header('X-Frame-Options', 'DENY')
... ...
scripts/setup_translator_venv.sh
... ... @@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot;
8 8 cd "${PROJECT_ROOT}"
9 9  
10 10 VENV_DIR="${PROJECT_ROOT}/.venv-translator"
11   -PYTHON_BIN="${PYTHON_BIN:-python3}"
12 11 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}"
  12 +MIN_PYTHON_MAJOR=3
  13 +MIN_PYTHON_MINOR=10
  14 +
  15 +python_meets_minimum() {
  16 + local bin="$1"
  17 + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}"
  18 +import sys
  19 +
  20 +required = tuple(int(value) for value in sys.argv[1:])
  21 +sys.exit(0 if sys.version_info[:2] >= required else 1)
  22 +PY
  23 +}
  24 +
  25 +discover_python_bin() {
  26 + local candidates=()
  27 +
  28 + if [[ -n "${PYTHON_BIN:-}" ]]; then
  29 + candidates+=("${PYTHON_BIN}")
  30 + fi
  31 + candidates+=("python3.12" "python3.11" "python3.10" "python3")
  32 +
  33 + local candidate
  34 + for candidate in "${candidates[@]}"; do
  35 + if ! command -v "${candidate}" >/dev/null 2>&1; then
  36 + continue
  37 + fi
  38 + if python_meets_minimum "${candidate}"; then
  39 + echo "${candidate}"
  40 + return 0
  41 + fi
  42 + done
  43 +
  44 + return 1
  45 +}
  46 +
  47 +if ! PYTHON_BIN="$(discover_python_bin)"; then
  48 + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2
  49 + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2
  50 + exit 1
  51 +fi
13 52  
14 53 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
15 54 echo "ERROR: python not found: ${PYTHON_BIN}" >&2
... ... @@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot;
32 71 export TMPDIR="${TMP_DIR}"
33 72 PIP_ARGS=(--no-cache-dir)
34 73  
  74 +echo "Using Python=${PYTHON_BIN}"
35 75 echo "Using TMPDIR=${TMPDIR}"
36 76 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel
37 77 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt
... ...
scripts/translation/download_translation_models.py
... ... @@ -6,8 +6,6 @@ from __future__ import annotations
6 6 import argparse
7 7 import os
8 8 from pathlib import Path
9   -import shutil
10   -import subprocess
11 9 import sys
12 10 from typing import Iterable
13 11  
... ... @@ -19,6 +17,7 @@ if str(PROJECT_ROOT) not in sys.path:
19 17 os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
20 18  
21 19 from config.services_config import get_translation_config
  20 +from translation.ct2_conversion import convert_transformers_model
22 21  
23 22  
24 23 LOCAL_BACKENDS = {"local_nllb", "local_marian"}
... ... @@ -46,19 +45,6 @@ def _compute_ct2_output_dir(capability: dict) -&gt; Path:
46 45 return model_dir / f"ctranslate2-{normalized}"
47 46  
48 47  
49   -def _resolve_converter_binary() -> str:
50   - candidate = shutil.which("ct2-transformers-converter")
51   - if candidate:
52   - return candidate
53   - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
54   - if venv_candidate.exists():
55   - return str(venv_candidate)
56   - raise RuntimeError(
57   - "ct2-transformers-converter was not found. "
58   - "Install ctranslate2 in the active Python environment first."
59   - )
60   -
61   -
62 48 def convert_to_ctranslate2(name: str, capability: dict) -> None:
63 49 model_id = str(capability.get("model_id") or "").strip()
64 50 model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
... ... @@ -75,18 +61,7 @@ def convert_to_ctranslate2(name: str, capability: dict) -&gt; None:
75 61 ).strip()
76 62 output_dir.parent.mkdir(parents=True, exist_ok=True)
77 63 print(f"[convert] {name} -> {output_dir} ({quantization})")
78   - subprocess.run(
79   - [
80   - _resolve_converter_binary(),
81   - "--model",
82   - model_source,
83   - "--output_dir",
84   - str(output_dir),
85   - "--quantization",
86   - quantization,
87   - ],
88   - check=True,
89   - )
  64 + convert_transformers_model(model_source, str(output_dir), quantization)
90 65 print(f"[converted] {name}")
91 66  
92 67  
... ...
tests/test_translation_converter_resolution.py 0 โ†’ 100644
... ... @@ -0,0 +1,85 @@
  1 +from __future__ import annotations
  2 +
  3 +import sys
  4 +import types
  5 +
  6 +import pytest
  7 +
  8 +import translation.ct2_conversion as ct2_conversion
  9 +
  10 +
  11 +class _FakeTransformersConverter:
  12 + def __init__(self, model_name_or_path):
  13 + self.model_name_or_path = model_name_or_path
  14 + self.load_calls = []
  15 +
  16 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  17 + self.load_calls.append(
  18 + {
  19 + "model_class": model_class,
  20 + "resolved_model_name_or_path": resolved_model_name_or_path,
  21 + "kwargs": dict(kwargs),
  22 + }
  23 + )
  24 + if "dtype" in kwargs or "torch_dtype" in kwargs:
  25 + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'")
  26 + return {"loaded": True, "path": resolved_model_name_or_path}
  27 +
  28 + def convert(self, output_dir, quantization=None, force=False):
  29 + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32")
  30 + return {
  31 + "loaded": loaded,
  32 + "output_dir": output_dir,
  33 + "quantization": quantization,
  34 + "force": force,
  35 + "load_calls": list(self.load_calls),
  36 + }
  37 +
  38 +
  39 +def _install_fake_ctranslate2(monkeypatch, base_converter):
  40 + converters_module = types.ModuleType("ctranslate2.converters")
  41 + converters_module.TransformersConverter = base_converter
  42 + ctranslate2_module = types.ModuleType("ctranslate2")
  43 + ctranslate2_module.converters = converters_module
  44 +
  45 + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module)
  46 + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module)
  47 +
  48 +
  49 +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch):
  50 + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter)
  51 + fake_transformers = types.ModuleType("transformers")
  52 + fake_transformers.AutoConfig = types.SimpleNamespace(
  53 + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path)
  54 + )
  55 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  56 +
  57 + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
  58 +
  59 + assert result["loaded"] == {"loaded": True, "path": "fake-model"}
  60 + assert result["output_dir"] == "/tmp/out"
  61 + assert result["quantization"] == "float16"
  62 + assert result["force"] is False
  63 + assert len(result["load_calls"]) == 2
  64 + assert result["load_calls"][0] == {
  65 + "model_class": "FakeModel",
  66 + "resolved_model_name_or_path": "fake-model",
  67 + "kwargs": {"dtype": "float32"},
  68 + }
  69 + assert result["load_calls"][1]["model_class"] == "FakeModel"
  70 + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model"
  71 + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None
  72 +
  73 +
  74 +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch):
  75 + class _AlwaysFailingConverter(_FakeTransformersConverter):
  76 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  77 + raise TypeError("different constructor error")
  78 +
  79 + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter)
  80 + fake_transformers = types.ModuleType("transformers")
  81 + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path))
  82 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  83 +
  84 + with pytest.raises(TypeError, match="different constructor error"):
  85 + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
... ...
tests/test_translation_local_backends.py
... ... @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch):
201 201 assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]]
202 202  
203 203  
  204 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch):
  205 + wrong_dir = tmp_path / "wrong-nllb"
  206 + wrong_dir.mkdir()
  207 + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8")
  208 +
  209 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  210 +
  211 + backend = NLLBCTranslate2TranslationBackend(
  212 + name="nllb-200-distilled-600m",
  213 + model_id="facebook/nllb-200-distilled-600M",
  214 + model_dir=str(wrong_dir),
  215 + device="cpu",
  216 + torch_dtype="float32",
  217 + batch_size=1,
  218 + max_input_length=16,
  219 + max_new_tokens=16,
  220 + num_beams=1,
  221 + )
  222 +
  223 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  224 + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M"
  225 +
  226 +
  227 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch):
  228 + incomplete_dir = tmp_path / "incomplete-nllb"
  229 + incomplete_dir.mkdir()
  230 + (incomplete_dir / "ctranslate2-float16").mkdir()
  231 +
  232 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  233 +
  234 + backend = NLLBCTranslate2TranslationBackend(
  235 + name="nllb-200-distilled-600m",
  236 + model_id="facebook/nllb-200-distilled-600M",
  237 + model_dir=str(incomplete_dir),
  238 + device="cpu",
  239 + torch_dtype="float32",
  240 + batch_size=1,
  241 + max_input_length=16,
  242 + max_new_tokens=16,
  243 + num_beams=1,
  244 + )
  245 +
  246 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  247 +
  248 +
204 249 def test_nllb_resolves_flores_short_tags_and_iso_no():
205 250 cat = build_nllb_language_catalog(None)
206 251 assert resolve_nllb_language_code("ca", cat) == "cat_Latn"
... ...
tests/test_translator_failure_semantics.py
... ... @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog):
197 197 ]
198 198  
199 199  
  200 +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch):
  201 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  202 +
  203 + def _fake_create_backend(self, *, name, backend_type, cfg):
  204 + del self, backend_type, cfg
  205 + if name == "broken-nllb":
  206 + raise RuntimeError("broken model dir")
  207 +
  208 + class _Backend:
  209 + model = name
  210 +
  211 + @property
  212 + def supports_batch(self):
  213 + return True
  214 +
  215 + def translate(self, text, target_lang, source_lang=None, scene=None):
  216 + del target_lang, source_lang, scene
  217 + return text
  218 +
  219 + return _Backend()
  220 +
  221 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  222 + service = TranslationService(
  223 + {
  224 + "service_url": "http://127.0.0.1:6006",
  225 + "timeout_sec": 10.0,
  226 + "default_model": "llm",
  227 + "default_scene": "general",
  228 + "capabilities": {
  229 + "llm": {
  230 + "enabled": True,
  231 + "backend": "llm",
  232 + "model": "dummy-llm",
  233 + "base_url": "https://example.com",
  234 + "timeout_sec": 10.0,
  235 + "use_cache": True,
  236 + },
  237 + "broken-nllb": {
  238 + "enabled": True,
  239 + "backend": "local_nllb",
  240 + "model_id": "dummy",
  241 + "model_dir": "dummy",
  242 + "device": "cpu",
  243 + "torch_dtype": "float32",
  244 + "batch_size": 8,
  245 + "max_input_length": 16,
  246 + "max_new_tokens": 16,
  247 + "num_beams": 1,
  248 + "use_cache": True,
  249 + },
  250 + },
  251 + "cache": {
  252 + "ttl_seconds": 60,
  253 + "sliding_expiration": True,
  254 + },
  255 + }
  256 + )
  257 +
  258 + assert service.available_models == ["llm", "broken-nllb"]
  259 + assert service.loaded_models == ["llm"]
  260 + assert service.failed_models == ["broken-nllb"]
  261 + assert service.backend_errors["broken-nllb"] == "broken model dir"
  262 +
  263 + with pytest.raises(RuntimeError, match="failed to initialize"):
  264 + service.get_backend("broken-nllb")
  265 +
  266 +
200 267 def test_translation_cache_probe_models_order():
201 268 cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}}
202 269 assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"]
... ...
translation/backends/local_ctranslate2.py
... ... @@ -4,9 +4,7 @@ from __future__ import annotations
4 4  
5 5 import logging
6 6 import os
7   -import shutil
8   -import subprocess
9   -import sys
  7 +import json
10 8 import threading
11 9 from pathlib import Path
12 10 from typing import Dict, List, Optional, Sequence, Union
... ... @@ -24,6 +22,7 @@ from translation.text_splitter import (
24 22 join_translated_segments,
25 23 split_text_for_translation,
26 24 )
  25 +from translation.ct2_conversion import convert_transformers_model
27 26  
28 27 logger = logging.getLogger(__name__)
29 28  
... ... @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str:
76 75 return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}")
77 76  
78 77  
79   -def _resolve_converter_binary() -> str:
80   - candidate = shutil.which("ct2-transformers-converter")
81   - if candidate:
82   - return candidate
83   - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
84   - if venv_candidate.exists():
85   - return str(venv_candidate)
86   - raise RuntimeError(
87   - "ct2-transformers-converter was not found. "
88   - "Ensure ctranslate2 is installed in the active translator environment."
89   - )
  78 +def _detect_local_model_type(model_dir: str) -> Optional[str]:
  79 + config_path = Path(model_dir).expanduser() / "config.json"
  80 + if not config_path.exists():
  81 + return None
  82 + try:
  83 + with open(config_path, "r", encoding="utf-8") as handle:
  84 + payload = json.load(handle) or {}
  85 + except Exception as exc:
  86 + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc)
  87 + return None
  88 + model_type = str(payload.get("model_type") or "").strip().lower()
  89 + return model_type or None
90 90  
91 91  
92 92 class LocalCTranslate2TranslationBackend:
... ... @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend:
144 144 self.ct2_decoding_length_extra = int(ct2_decoding_length_extra)
145 145 self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min))
146 146 self._tokenizer_lock = threading.Lock()
  147 + self._local_model_source = self._resolve_local_model_source()
147 148 self._load_runtime()
148 149  
149 150 @property
... ... @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend:
151 152 return True
152 153  
153 154 def _tokenizer_source(self) -> str:
154   - return self.model_dir if os.path.exists(self.model_dir) else self.model_id
  155 + return self._local_model_source or self.model_id
155 156  
156 157 def _model_source(self) -> str:
157   - return self.model_dir if os.path.exists(self.model_dir) else self.model_id
  158 + return self._local_model_source or self.model_id
  159 +
  160 + def _expected_local_model_types(self) -> Optional[set[str]]:
  161 + return None
  162 +
  163 + def _resolve_local_model_source(self) -> Optional[str]:
  164 + model_path = Path(self.model_dir).expanduser()
  165 + if not model_path.exists():
  166 + return None
  167 + if not (model_path / "config.json").exists():
  168 + logger.warning(
  169 + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id",
  170 + self.model,
  171 + model_path,
  172 + )
  173 + return None
  174 +
  175 + expected_types = self._expected_local_model_types()
  176 + if not expected_types:
  177 + return str(model_path)
  178 +
  179 + detected_type = _detect_local_model_type(str(model_path))
  180 + if detected_type is None:
  181 + return str(model_path)
  182 + if detected_type in expected_types:
  183 + return str(model_path)
  184 +
  185 + logger.warning(
  186 + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id",
  187 + self.model,
  188 + model_path,
  189 + detected_type,
  190 + sorted(expected_types),
  191 + )
  192 + return None
158 193  
159 194 def _tokenizer_kwargs(self) -> Dict[str, object]:
160 195 return {}
... ... @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend:
204 239 )
205 240  
206 241 ct2_path.parent.mkdir(parents=True, exist_ok=True)
207   - converter = _resolve_converter_binary()
208 242 logger.info(
209 243 "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s",
210 244 self.model,
... ... @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend:
213 247 self.ct2_conversion_quantization,
214 248 )
215 249 try:
216   - subprocess.run(
217   - [
218   - converter,
219   - "--model",
220   - model_source,
221   - "--output_dir",
222   - str(ct2_path),
223   - "--quantization",
224   - self.ct2_conversion_quantization,
225   - ],
226   - check=True,
227   - stdout=subprocess.PIPE,
228   - stderr=subprocess.PIPE,
229   - text=True,
  250 + convert_transformers_model(
  251 + model_source,
  252 + str(ct2_path),
  253 + self.ct2_conversion_quantization,
230 254 )
231   - except subprocess.CalledProcessError as exc:
232   - stderr = exc.stderr.strip()
  255 + except Exception as exc:
233 256 raise RuntimeError(
234   - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}"
  257 + f"Failed to convert model '{self.model}' to CTranslate2: {exc}"
235 258 ) from exc
236 259  
237 260 def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]:
... ... @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
557 580 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}"
558 581 )
559 582  
  583 + def _expected_local_model_types(self) -> Optional[set[str]]:
  584 + return {"marian"}
  585 +
560 586  
561 587 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
562 588 """Local backend for NLLB models on CTranslate2."""
... ... @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
619 645 if resolve_nllb_language_code(target_lang, self.language_codes) is None:
620 646 raise ValueError(f"Unsupported NLLB target language: {target_lang}")
621 647  
  648 + def _expected_local_model_types(self) -> Optional[set[str]]:
  649 + return {"m2m_100", "nllb_moe"}
  650 +
622 651 def _get_tokenizer_for_source(self, source_lang: str):
623 652 src_code = resolve_nllb_language_code(source_lang, self.language_codes)
624 653 if src_code is None:
... ...
translation/ct2_conversion.py 0 โ†’ 100644
... ... @@ -0,0 +1,52 @@
  1 +"""Helpers for converting Hugging Face translation models to CTranslate2."""
  2 +
  3 +from __future__ import annotations
  4 +
  5 +import copy
  6 +import logging
  7 +
  8 +logger = logging.getLogger(__name__)
  9 +
  10 +
  11 +def convert_transformers_model(
  12 + model_name_or_path: str,
  13 + output_dir: str,
  14 + quantization: str,
  15 + *,
  16 + force: bool = False,
  17 +) -> str:
  18 + from ctranslate2.converters import TransformersConverter
  19 + from transformers import AutoConfig
  20 +
  21 + class _CompatibleTransformersConverter(TransformersConverter):
  22 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  23 + try:
  24 + return super().load_model(model_class, resolved_model_name_or_path, **kwargs)
  25 + except TypeError as exc:
  26 + if "unexpected keyword argument 'dtype'" not in str(exc):
  27 + raise
  28 + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None:
  29 + raise
  30 +
  31 + logger.warning(
  32 + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s",
  33 + resolved_model_name_or_path,
  34 + getattr(model_class, "__name__", model_class),
  35 + )
  36 + retry_kwargs = dict(kwargs)
  37 + retry_kwargs.pop("dtype", None)
  38 + retry_kwargs.pop("torch_dtype", None)
  39 + config = retry_kwargs.get("config")
  40 + if config is None:
  41 + config = AutoConfig.from_pretrained(resolved_model_name_or_path)
  42 + else:
  43 + config = copy.deepcopy(config)
  44 + if hasattr(config, "dtype"):
  45 + config.dtype = None
  46 + if hasattr(config, "torch_dtype"):
  47 + config.torch_dtype = None
  48 + retry_kwargs["config"] = config
  49 + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs)
  50 +
  51 + converter = _CompatibleTransformersConverter(model_name_or_path)
  52 + return converter.convert(output_dir=output_dir, quantization=quantization, force=force)
... ...
translation/service.py
... ... @@ -31,7 +31,12 @@ class TranslationService:
31 31 if not self._enabled_capabilities:
32 32 raise ValueError("No enabled translation backends found in services.translation.capabilities")
33 33 self._translation_cache = TranslationCache(self.config["cache"])
34   - self._backends = self._initialize_backends()
  34 + self._backends: Dict[str, TranslationBackendProtocol] = {}
  35 + self._backend_errors: Dict[str, str] = {}
  36 + self._initialize_backends()
  37 + if not self._backends:
  38 + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error"
  39 + raise RuntimeError(f"No translation backends could be initialized: {details}")
35 40  
36 41 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
37 42 enabled: Dict[str, Dict[str, object]] = {}
... ... @@ -62,24 +67,47 @@ class TranslationService:
62 67 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
63 68 return factory(name=name, cfg=cfg)
64 69  
65   - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:
66   - backends: Dict[str, TranslationBackendProtocol] = {}
67   - for name, capability_cfg in self._enabled_capabilities.items():
68   - backend_type = str(capability_cfg["backend"])
69   - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
70   - backends[name] = self._create_backend(
  70 + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]:
  71 + capability_cfg = self._enabled_capabilities.get(name)
  72 + if capability_cfg is None:
  73 + return None
  74 + if name in self._backends:
  75 + return self._backends[name]
  76 +
  77 + backend_type = str(capability_cfg["backend"])
  78 + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
  79 + try:
  80 + backend = self._create_backend(
71 81 name=name,
72 82 backend_type=backend_type,
73 83 cfg=capability_cfg,
74 84 )
75   - logger.info(
76   - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  85 + except Exception as exc:
  86 + error_text = str(exc).strip() or exc.__class__.__name__
  87 + self._backend_errors[name] = error_text
  88 + logger.error(
  89 + "Translation backend initialization failed | model=%s backend=%s error=%s",
77 90 name,
78 91 backend_type,
79   - bool(capability_cfg.get("use_cache")),
80   - getattr(backends[name], "model", name),
  92 + error_text,
  93 + exc_info=True,
81 94 )
82   - return backends
  95 + return None
  96 +
  97 + self._backends[name] = backend
  98 + self._backend_errors.pop(name, None)
  99 + logger.info(
  100 + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  101 + name,
  102 + backend_type,
  103 + bool(capability_cfg.get("use_cache")),
  104 + getattr(backend, "model", name),
  105 + )
  106 + return backend
  107 +
  108 + def _initialize_backends(self) -> None:
  109 + for name, capability_cfg in self._enabled_capabilities.items():
  110 + self._load_backend(name)
83 111  
84 112 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
85 113 from translation.backends.qwen_mt import QwenMTTranslationBackend
... ... @@ -178,13 +206,27 @@ class TranslationService:
178 206 def loaded_models(self) -> List[str]:
179 207 return list(self._backends.keys())
180 208  
  209 + @property
  210 + def failed_models(self) -> List[str]:
  211 + return list(self._backend_errors.keys())
  212 +
  213 + @property
  214 + def backend_errors(self) -> Dict[str, str]:
  215 + return dict(self._backend_errors)
  216 +
181 217 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
182 218 normalized = normalize_translation_model(self.config, model)
183   - backend = self._backends.get(normalized)
  219 + backend = self._backends.get(normalized) or self._load_backend(normalized)
184 220 if backend is None:
185   - raise ValueError(
186   - f"Translation model '{normalized}' is not enabled. "
187   - f"Available models: {', '.join(self.available_models) or 'none'}"
  221 + if normalized not in self._enabled_capabilities:
  222 + raise ValueError(
  223 + f"Translation model '{normalized}' is not enabled. "
  224 + f"Available models: {', '.join(self.available_models) or 'none'}"
  225 + )
  226 + error_text = self._backend_errors.get(normalized) or "unknown initialization error"
  227 + raise RuntimeError(
  228 + f"Translation model '{normalized}' failed to initialize: {error_text}. "
  229 + f"Loaded models: {', '.join(self.loaded_models) or 'none'}"
188 230 )
189 231 return backend
190 232  
... ...