Compare View

switch
from
...
to
 
Commits (4)
  • tangwang
     
  • tangwang
     
  • tangwang
     
  • This branch is intended to differ from master only by deployment configuration for the test machine.
    
    - Add `.env.test.example` as a secrets-free override snippet to be appended onto `.env`.
      - Pins TEI to GPU mode (`TEI_DEVICE=cuda`) with `float16` for performance.
      - Pins a Tesla T4 compatible TEI image (`text-embeddings-inference:turing-1.9`) to avoid
        compute-capability mismatch errors (T4=sm75 vs non-turing images compiled for sm80).
      - Keeps TEI request limits aligned with current service settings (`TEI_MAX_BATCH_TOKENS=2048`,
        `TEI_MAX_CLIENT_BATCH_SIZE=8`) and provides an example BGE-M3 snapshot path.
    - Extend `.env.example` with guidance on selecting the correct TEI image tag (`turing-*` for T4,
      `cuda-*` for Ampere+) and optional mirror repository override.
    
    No credentials are committed; `.env` remains local-only.
    
    Made-with: Cursor
    tangwang
     
@@ -58,9 +58,10 @@ TEI_MAX_CLIENT_BATCH_SIZE=8 @@ -58,9 +58,10 @@ TEI_MAX_CLIENT_BATCH_SIZE=8
58 BACKEND_PROXY_URL=http://127.0.0.1:6002 58 BACKEND_PROXY_URL=http://127.0.0.1:6002
59 59
60 # ===== test env connectivity overrides (2026-04-12) ===== 60 # ===== test env connectivity overrides (2026-04-12) =====
61 -REDIS_HOST=127.0.0.1 61 +REDIS_HOST=localhost
62 REDIS_PORT=6479 62 REDIS_PORT=6479
63 REDIS_PASSWORD=BMfv5aI31kgHWtlx 63 REDIS_PASSWORD=BMfv5aI31kgHWtlx
  64 +REDIS_DB=6
64 DB_HOST=120.79.247.228 65 DB_HOST=120.79.247.228
65 DB_PORT=3316 66 DB_PORT=3316
66 DB_DATABASE=saas 67 DB_DATABASE=saas
@@ -12,6 +12,8 @@ ES_PASSWORD= @@ -12,6 +12,8 @@ ES_PASSWORD=
12 # Redis (生产默认 10.200.16.14:6479,密码见 docs/QUICKSTART.md §1.6) 12 # Redis (生产默认 10.200.16.14:6479,密码见 docs/QUICKSTART.md §1.6)
13 REDIS_HOST=10.200.16.14 13 REDIS_HOST=10.200.16.14
14 REDIS_PORT=6479 14 REDIS_PORT=6479
  15 +# 逻辑库编号(与 config.yaml infrastructure.redis.snapshot_db 一致;测试可与生产共用实例时用不同 db 隔离)
  16 +REDIS_DB=0
15 REDIS_PASSWORD= 17 REDIS_PASSWORD=
16 18
17 # DeepL Translation API 19 # DeepL Translation API
@@ -44,6 +46,14 @@ EMBEDDING_BACKEND=tei @@ -44,6 +46,14 @@ EMBEDDING_BACKEND=tei
44 TEI_BASE_URL=http://127.0.0.1:8080 46 TEI_BASE_URL=http://127.0.0.1:8080
45 TEI_DEVICE=cuda 47 TEI_DEVICE=cuda
46 TEI_VERSION=1.9 48 TEI_VERSION=1.9
  49 +# Optional: override TEI docker image repository (useful for mirrors).
  50 +# TEI_IMAGE_REPO=ghcr.m.daocloud.io/huggingface/text-embeddings-inference
  51 +#
  52 +# Optional: pin an explicit TEI image tag.
  53 +# - For Tesla T4 (compute capability 7.5), prefer the `turing-*` image tag, e.g.:
  54 +# TEI_IMAGE=ghcr.m.daocloud.io/huggingface/text-embeddings-inference:turing-1.9
  55 +# - For Ampere+ GPUs, prefer `cuda-*` image tag, e.g.:
  56 +# TEI_IMAGE=ghcr.m.daocloud.io/huggingface/text-embeddings-inference:cuda-1.9
47 TEI_MAX_BATCH_TOKENS=2048 57 TEI_MAX_BATCH_TOKENS=2048
48 TEI_MAX_CLIENT_BATCH_SIZE=8 58 TEI_MAX_CLIENT_BATCH_SIZE=8
49 TEI_HEALTH_TIMEOUT_SEC=300 59 TEI_HEALTH_TIMEOUT_SEC=300
.env.test.example 0 → 100644
@@ -0,0 +1,40 @@ @@ -0,0 +1,40 @@
  1 +# Test environment overrides example (no secrets).
  2 +#
  3 +# Usage:
  4 +# cp .env.example .env
  5 +# cat .env.test.example >> .env
  6 +#
  7 +# Notes:
  8 +# - This repo is multi-service; values below focus on local test deployment.
  9 +# - Keep real credentials (Redis/MySQL/ES passwords) out of VCS.
  10 +
  11 +# ===== runtime / namespace =====
  12 +RUNTIME_ENV=test
  13 +ES_INDEX_NAMESPACE=test_
  14 +
  15 +# ===== Elasticsearch (example: local docker on non-default port) =====
  16 +ES_HOST=http://127.0.0.1:19200
  17 +ES_USERNAME=
  18 +ES_PASSWORD=
  19 +ES_DOCKER_HTTP_PORT=19200
  20 +ES_DOCKER_CONTAINER_NAME=saas-search-es9-test
  21 +
  22 +# ===== HuggingFace cache =====
  23 +HF_CACHE_DIR=/data/tw/.cache/huggingface
  24 +
  25 +# ===== TEI (text embeddings inference) =====
  26 +# Service port exposed by container (host:8080 -> container:80)
  27 +TEI_PORT=8080
  28 +# Use GPU when available
  29 +TEI_DEVICE=cuda
  30 +# Use float16 for performance on GPU
  31 +TEI_DTYPE=float16
  32 +# IMPORTANT for Tesla T4 (compute capability 7.5): use turing image tag
  33 +TEI_IMAGE=ghcr.m.daocloud.io/huggingface/text-embeddings-inference:turing-1.9
  34 +# Example pinned model snapshot path (update per-machine)
  35 +TEI_MODEL_ID=/data/hub/models--BAAI--bge-m3/snapshots/5617a9f61b028005a4858fdac845db406aefb181
  36 +TEI_MAX_BATCH_TOKENS=2048
  37 +TEI_MAX_CLIENT_BATCH_SIZE=8
  38 +TEI_HEALTH_TIMEOUT_SEC=240
  39 +TEI_CONTAINER_NAME=saas-search-tei-test
  40 +
api/translator_app.py
@@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI):
271 """Initialize all enabled translation backends on process startup.""" 271 """Initialize all enabled translation backends on process startup."""
272 logger.info("Starting Translation Service API") 272 logger.info("Starting Translation Service API")
273 service = get_translation_service() 273 service = get_translation_service()
  274 + failed_models = list(getattr(service, "failed_models", []))
  275 + backend_errors = dict(getattr(service, "backend_errors", {}))
274 logger.info( 276 logger.info(
275 - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", 277 + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s",
276 service.config["default_model"], 278 service.config["default_model"],
277 service.config["default_scene"], 279 service.config["default_scene"],
278 service.available_models, 280 service.available_models,
279 service.loaded_models, 281 service.loaded_models,
  282 + failed_models,
280 ) 283 )
281 logger.info( 284 logger.info(
282 - "Translation backends initialized on startup | models=%s", 285 + "Translation backends initialized on startup | loaded=%s failed=%s",
283 service.loaded_models, 286 service.loaded_models,
  287 + backend_errors,
284 ) 288 )
285 verbose_logger.info( 289 verbose_logger.info(
286 "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", 290 "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s",
@@ -316,11 +320,14 @@ async def health_check(): @@ -316,11 +320,14 @@ async def health_check():
316 """Health check endpoint.""" 320 """Health check endpoint."""
317 try: 321 try:
318 service = get_translation_service() 322 service = get_translation_service()
  323 + failed_models = list(getattr(service, "failed_models", []))
  324 + backend_errors = dict(getattr(service, "backend_errors", {}))
319 logger.info( 325 logger.info(
320 - "Health check | default_model=%s default_scene=%s loaded_models=%s", 326 + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s",
321 service.config["default_model"], 327 service.config["default_model"],
322 service.config["default_scene"], 328 service.config["default_scene"],
323 service.loaded_models, 329 service.loaded_models,
  330 + failed_models,
324 ) 331 )
325 return { 332 return {
326 "status": "healthy", 333 "status": "healthy",
@@ -330,6 +337,8 @@ async def health_check(): @@ -330,6 +337,8 @@ async def health_check():
330 "available_models": service.available_models, 337 "available_models": service.available_models,
331 "enabled_capabilities": get_enabled_translation_models(service.config), 338 "enabled_capabilities": get_enabled_translation_models(service.config),
332 "loaded_models": service.loaded_models, 339 "loaded_models": service.loaded_models,
  340 + "failed_models": failed_models,
  341 + "backend_errors": backend_errors,
333 } 342 }
334 except Exception as e: 343 except Exception as e:
335 logger.error(f"Health check failed: {e}") 344 logger.error(f"Health check failed: {e}")
@@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request):
463 latency_ms = (time.perf_counter() - request_started) * 1000 472 latency_ms = (time.perf_counter() - request_started) * 1000
464 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) 473 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms)
465 raise HTTPException(status_code=400, detail=str(e)) from e 474 raise HTTPException(status_code=400, detail=str(e)) from e
  475 + except RuntimeError as e:
  476 + latency_ms = (time.perf_counter() - request_started) * 1000
  477 + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms)
  478 + raise HTTPException(status_code=503, detail=str(e)) from e
466 except Exception as e: 479 except Exception as e:
467 latency_ms = (time.perf_counter() - request_started) * 1000 480 latency_ms = (time.perf_counter() - request_started) * 1000
468 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) 481 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True)
config/environments/test.yaml
1 -query_config:  
2 - enable_text_embedding: true  
3 - text_embedding_field: title_embedding  
4 - zh_to_en_model: deepl  
5 - en_to_zh_model: deepl  
6 - default_translation_model: deepl  
7 - zh_to_en_model__source_not_in_index: deepl  
8 - en_to_zh_model__source_not_in_index: deepl  
9 - default_translation_model__source_not_in_index: deepl  
10 - 1 +# 仅覆盖与主干不同的测试环境项;query / translation / redis 等与 config.yaml 一致处不写在此文件中。
11 infrastructure: 2 infrastructure:
12 elasticsearch: 3 elasticsearch:
13 host: http://127.0.0.1:19200 4 host: http://127.0.0.1:19200
@@ -28,24 +19,6 @@ services: @@ -28,24 +19,6 @@ services:
28 model_id: BAAI/bge-m3 19 model_id: BAAI/bge-m3
29 timeout_sec: 60 20 timeout_sec: 60
30 max_client_batch_size: 8 21 max_client_batch_size: 8
31 - translation:  
32 - service_url: http://127.0.0.1:6006  
33 - default_model: deepl  
34 - default_scene: general  
35 - timeout_sec: 10.0  
36 - capabilities:  
37 - qwen-mt:  
38 - enabled: false  
39 - llm:  
40 - enabled: false  
41 - deepl:  
42 - enabled: true  
43 - nllb-200-distilled-600m:  
44 - enabled: false  
45 - opus-mt-zh-en:  
46 - enabled: false  
47 - opus-mt-en-zh:  
48 - enabled: false  
49 22
50 fine_rank: 23 fine_rank:
51 enabled: false 24 enabled: false
@@ -655,6 +655,14 @@ class AppConfigLoader: @@ -655,6 +655,14 @@ class AppConfigLoader:
655 655
656 translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} 656 translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {}
657 normalized_translation = build_translation_config(translation_raw) 657 normalized_translation = build_translation_config(translation_raw)
  658 + local_translation_backends = {"local_nllb", "local_marian"}
  659 + for capability_name, capability_cfg in normalized_translation["capabilities"].items():
  660 + backend_name = str(capability_cfg.get("backend") or "").strip().lower()
  661 + if backend_name not in local_translation_backends:
  662 + continue
  663 + for path_key in ("model_dir", "ct2_model_dir"):
  664 + if capability_cfg.get(path_key) not in (None, ""):
  665 + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve())
658 translation_config = TranslationServiceConfig( 666 translation_config = TranslationServiceConfig(
659 endpoint=str(normalized_translation["service_url"]).rstrip("/"), 667 endpoint=str(normalized_translation["service_url"]).rstrip("/"),
660 timeout_sec=float(normalized_translation["timeout_sec"]), 668 timeout_sec=float(normalized_translation["timeout_sec"]),
@@ -749,7 +757,7 @@ class AppConfigLoader: @@ -749,7 +757,7 @@ class AppConfigLoader:
749 port=port, 757 port=port,
750 backend=backend_name, 758 backend=backend_name,
751 runtime_dir=( 759 runtime_dir=(
752 - str(v) 760 + str(self._resolve_project_path_value(v).resolve())
753 if (v := instance_raw.get("runtime_dir")) not in (None, "") 761 if (v := instance_raw.get("runtime_dir")) not in (None, "")
754 else None 762 else None
755 ), 763 ),
@@ -787,6 +795,12 @@ class AppConfigLoader: @@ -787,6 +795,12 @@ class AppConfigLoader:
787 rerank=rerank_config, 795 rerank=rerank_config,
788 ) 796 )
789 797
  798 + def _resolve_project_path_value(self, value: Any) -> Path:
  799 + candidate = Path(str(value)).expanduser()
  800 + if candidate.is_absolute():
  801 + return candidate
  802 + return self.project_root / candidate
  803 +
790 def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: 804 def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig:
791 if not isinstance(raw, dict): 805 if not isinstance(raw, dict):
792 raise ConfigurationError("tenant_config must be a mapping") 806 raise ConfigurationError("tenant_config must be a mapping")
embeddings/redis_embedding_cache.py
@@ -53,6 +53,7 @@ class RedisEmbeddingCache: @@ -53,6 +53,7 @@ class RedisEmbeddingCache:
53 client = redis.Redis( 53 client = redis.Redis(
54 host=redis_config.host, 54 host=redis_config.host,
55 port=redis_config.port, 55 port=redis_config.port,
  56 + db=redis_config.snapshot_db,
56 password=redis_config.password, 57 password=redis_config.password,
57 decode_responses=False, 58 decode_responses=False,
58 socket_timeout=redis_config.socket_timeout, 59 socket_timeout=redis_config.socket_timeout,
frontend/static/js/app.js
@@ -316,7 +316,10 @@ async function performSearch(page = 1) { @@ -316,7 +316,10 @@ async function performSearch(page = 1) {
316 document.getElementById('productGrid').innerHTML = ''; 316 document.getElementById('productGrid').innerHTML = '';
317 317
318 try { 318 try {
319 - const response = await fetch(`${API_BASE_URL}/search/`, { 319 + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin);
  320 + searchUrl.searchParams.set('tenant_id', tenantId);
  321 +
  322 + const response = await fetch(searchUrl.toString(), {
320 method: 'POST', 323 method: 'POST',
321 headers: { 324 headers: {
322 'Content-Type': 'application/json', 325 'Content-Type': 'application/json',
indexer/product_enrich.py
@@ -124,6 +124,7 @@ try: @@ -124,6 +124,7 @@ try:
124 _anchor_redis = redis.Redis( 124 _anchor_redis = redis.Redis(
125 host=_REDIS_CONFIG.host, 125 host=_REDIS_CONFIG.host,
126 port=_REDIS_CONFIG.port, 126 port=_REDIS_CONFIG.port,
  127 + db=_REDIS_CONFIG.snapshot_db,
127 password=_REDIS_CONFIG.password, 128 password=_REDIS_CONFIG.password,
128 decode_responses=True, 129 decode_responses=True,
129 socket_timeout=_REDIS_CONFIG.socket_timeout, 130 socket_timeout=_REDIS_CONFIG.socket_timeout,
requirements_translator_service.txt
@@ -13,7 +13,8 @@ httpx>=0.24.0 @@ -13,7 +13,8 @@ httpx>=0.24.0
13 tqdm>=4.65.0 13 tqdm>=4.65.0
14 14
15 torch>=2.0.0 15 torch>=2.0.0
16 -transformers>=4.30.0 16 +# Keep translator conversions on the last verified NLLB-compatible release line.
  17 +transformers>=4.51.0,<4.52.0
17 ctranslate2>=4.7.0 18 ctranslate2>=4.7.0
18 sentencepiece>=0.2.0 19 sentencepiece>=0.2.0
19 sacremoses>=0.1.1 20 sacremoses>=0.1.1
scripts/download_translation_models.py 100755 → 100644
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 -"""Download local translation models declared in services.translation.capabilities.""" 2 +"""Backward-compatible entrypoint for translation model downloads."""
3 3
4 from __future__ import annotations 4 from __future__ import annotations
5 5
6 -import argparse  
7 -import os 6 +import runpy
8 from pathlib import Path 7 from pathlib import Path
9 -import shutil  
10 -import subprocess  
11 -import sys  
12 -from typing import Iterable  
13 -  
14 -from huggingface_hub import snapshot_download  
15 -  
16 -PROJECT_ROOT = Path(__file__).resolve().parent.parent  
17 -if str(PROJECT_ROOT) not in sys.path:  
18 - sys.path.insert(0, str(PROJECT_ROOT))  
19 -os.environ.setdefault("HF_HUB_DISABLE_XET", "1")  
20 -  
21 -from config.services_config import get_translation_config  
22 -  
23 -  
24 -LOCAL_BACKENDS = {"local_nllb", "local_marian"}  
25 -  
26 -  
27 -def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:  
28 - cfg = get_translation_config()  
29 - capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}  
30 - for name, capability in capabilities.items():  
31 - backend = str(capability.get("backend") or "").strip().lower()  
32 - if backend not in LOCAL_BACKENDS:  
33 - continue  
34 - if selected and name not in selected:  
35 - continue  
36 - yield name, capability  
37 -  
38 -  
39 -def _compute_ct2_output_dir(capability: dict) -> Path:  
40 - custom = str(capability.get("ct2_model_dir") or "").strip()  
41 - if custom:  
42 - return Path(custom).expanduser()  
43 - model_dir = Path(str(capability.get("model_dir") or "")).expanduser()  
44 - compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()  
45 - normalized = compute_type.replace("_", "-")  
46 - return model_dir / f"ctranslate2-{normalized}"  
47 -  
48 -  
49 -def _resolve_converter_binary() -> str:  
50 - candidate = shutil.which("ct2-transformers-converter")  
51 - if candidate:  
52 - return candidate  
53 - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"  
54 - if venv_candidate.exists():  
55 - return str(venv_candidate)  
56 - raise RuntimeError(  
57 - "ct2-transformers-converter was not found. "  
58 - "Install ctranslate2 in the active Python environment first."  
59 - )  
60 -  
61 -  
62 -def convert_to_ctranslate2(name: str, capability: dict) -> None:  
63 - model_id = str(capability.get("model_id") or "").strip()  
64 - model_dir = Path(str(capability.get("model_dir") or "")).expanduser()  
65 - model_source = str(model_dir if model_dir.exists() else model_id)  
66 - output_dir = _compute_ct2_output_dir(capability)  
67 - if (output_dir / "model.bin").exists():  
68 - print(f"[skip-convert] {name} -> {output_dir}")  
69 - return  
70 - quantization = str(  
71 - capability.get("ct2_conversion_quantization")  
72 - or capability.get("ct2_compute_type")  
73 - or capability.get("torch_dtype")  
74 - or "default"  
75 - ).strip()  
76 - output_dir.parent.mkdir(parents=True, exist_ok=True)  
77 - print(f"[convert] {name} -> {output_dir} ({quantization})")  
78 - subprocess.run(  
79 - [  
80 - _resolve_converter_binary(),  
81 - "--model",  
82 - model_source,  
83 - "--output_dir",  
84 - str(output_dir),  
85 - "--quantization",  
86 - quantization,  
87 - ],  
88 - check=True,  
89 - )  
90 - print(f"[converted] {name}")  
91 -  
92 -  
93 -def main() -> None:  
94 - parser = argparse.ArgumentParser(description="Download local translation models")  
95 - parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")  
96 - parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")  
97 - parser.add_argument(  
98 - "--convert-ctranslate2",  
99 - action="store_true",  
100 - help="Also convert the downloaded Hugging Face models into CTranslate2 format",  
101 - )  
102 - args = parser.parse_args()  
103 -  
104 - selected = {item.strip().lower() for item in args.models if item.strip()} or None  
105 - if not args.all_local and not selected:  
106 - parser.error("pass --all-local or --models <name> ...")  
107 -  
108 - for name, capability in iter_local_capabilities(selected):  
109 - model_id = str(capability.get("model_id") or "").strip()  
110 - model_dir = Path(str(capability.get("model_dir") or "")).expanduser()  
111 - if not model_id or not model_dir:  
112 - raise ValueError(f"Capability '{name}' must define model_id and model_dir")  
113 - model_dir.parent.mkdir(parents=True, exist_ok=True)  
114 - print(f"[download] {name} -> {model_dir} ({model_id})")  
115 - snapshot_download(  
116 - repo_id=model_id,  
117 - local_dir=str(model_dir),  
118 - )  
119 - print(f"[done] {name}")  
120 - if args.convert_ctranslate2:  
121 - convert_to_ctranslate2(name, capability)  
122 8
123 9
124 if __name__ == "__main__": 10 if __name__ == "__main__":
125 - main() 11 + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py"
  12 + runpy.run_path(str(target), run_name="__main__")
scripts/frontend/frontend_server.py 0 → 100755
@@ -0,0 +1,278 @@ @@ -0,0 +1,278 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +Simple HTTP server for saas-search frontend.
  4 +"""
  5 +
  6 +import http.server
  7 +import socketserver
  8 +import os
  9 +import sys
  10 +import logging
  11 +import time
  12 +import urllib.request
  13 +import urllib.error
  14 +from collections import defaultdict, deque
  15 +from pathlib import Path
  16 +from dotenv import load_dotenv
  17 +
  18 +# Load .env file
  19 +project_root = Path(__file__).resolve().parents[2]
  20 +load_dotenv(project_root / '.env')
  21 +
  22 +# Get API_BASE_URL from environment(默认不注入,避免被旧 .env 覆盖同源策略)
  23 +# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。
  24 +API_BASE_URL = os.getenv('API_BASE_URL') or None
  25 +INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
  26 +# Backend proxy target for same-origin API forwarding
  27 +BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')
  28 +
  29 +# Change to frontend directory
  30 +frontend_dir = os.path.join(project_root, 'frontend')
  31 +os.chdir(frontend_dir)
  32 +
  33 +# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
  34 +PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))
  35 +
  36 +# Configure logging to suppress scanner noise
  37 +logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
  38 +
  39 +class RateLimitingMixin:
  40 + """Mixin for rate limiting requests by IP address."""
  41 + request_counts = defaultdict(deque)
  42 + rate_limit = 100 # requests per minute
  43 + window = 60 # seconds
  44 +
  45 + @classmethod
  46 + def is_rate_limited(cls, ip):
  47 + now = time.time()
  48 +
  49 + # Clean old requests
  50 + while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
  51 + cls.request_counts[ip].popleft()
  52 +
  53 + # Check rate limit
  54 + if len(cls.request_counts[ip]) > cls.rate_limit:
  55 + return True
  56 +
  57 + cls.request_counts[ip].append(now)
  58 + return False
  59 +
  60 +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
  61 + """Custom request handler with CORS support and robust error handling."""
  62 +
  63 + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer"
  64 +
  65 + def _is_proxy_path(self, path: str) -> bool:
  66 + """Return True for API paths that should be forwarded to backend service."""
  67 + return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
  68 +
  69 + def _proxy_to_backend(self):
  70 + """Proxy current request to backend service on the GPU server."""
  71 + target_url = f"{BACKEND_PROXY_URL}{self.path}"
  72 + method = self.command.upper()
  73 +
  74 + try:
  75 + content_length = int(self.headers.get('Content-Length', '0'))
  76 + except ValueError:
  77 + content_length = 0
  78 + body = self.rfile.read(content_length) if content_length > 0 else None
  79 +
  80 + forward_headers = {}
  81 + for key, value in self.headers.items():
  82 + lk = key.lower()
  83 + if lk in ('host', 'content-length', 'connection'):
  84 + continue
  85 + forward_headers[key] = value
  86 +
  87 + req = urllib.request.Request(
  88 + target_url,
  89 + data=body,
  90 + headers=forward_headers,
  91 + method=method,
  92 + )
  93 +
  94 + try:
  95 + with urllib.request.urlopen(req, timeout=30) as resp:
  96 + resp_body = resp.read()
  97 + self.send_response(resp.getcode())
  98 + for header, value in resp.getheaders():
  99 + lh = header.lower()
  100 + if lh in ('transfer-encoding', 'connection', 'content-length'):
  101 + continue
  102 + self.send_header(header, value)
  103 + self.end_headers()
  104 + self.wfile.write(resp_body)
  105 + except urllib.error.HTTPError as e:
  106 + err_body = e.read() if hasattr(e, 'read') else b''
  107 + self.send_response(e.code)
  108 + if e.headers:
  109 + for header, value in e.headers.items():
  110 + lh = header.lower()
  111 + if lh in ('transfer-encoding', 'connection', 'content-length'):
  112 + continue
  113 + self.send_header(header, value)
  114 + self.end_headers()
  115 + if err_body:
  116 + self.wfile.write(err_body)
  117 + except Exception as e:
  118 + logging.error(f"Backend proxy error for {method} {self.path}: {e}")
  119 + self.send_response(502)
  120 + self.send_header('Content-Type', 'application/json; charset=utf-8')
  121 + self.end_headers()
  122 + self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')
  123 +
  124 + def do_GET(self):
  125 + """Handle GET requests with API config injection."""
  126 + path = self.path.split('?')[0]
  127 +
  128 + # Proxy API paths to backend first
  129 + if self._is_proxy_path(path):
  130 + self._proxy_to_backend()
  131 + return
  132 +
  133 + # Route / to index.html
  134 + if path == '/' or path == '':
  135 + self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
  136 +
  137 + # Inject API config for HTML files
  138 + if self.path.endswith('.html'):
  139 + self._serve_html_with_config()
  140 + else:
  141 + super().do_GET()
  142 +
  143 + def _serve_html_with_config(self):
  144 + """Serve HTML with optional API_BASE_URL injected."""
  145 + try:
  146 + file_path = self.path.lstrip('/')
  147 + if not os.path.exists(file_path):
  148 + self.send_error(404)
  149 + return
  150 +
  151 + with open(file_path, 'r', encoding='utf-8') as f:
  152 + html = f.read()
  153 +
  154 + # 默认不注入 API_BASE_URL,避免历史 .env(如 http://xx:6002)覆盖同源调用。
  155 + # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。
  156 + if INJECT_API_BASE_URL and API_BASE_URL:
  157 + config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n '
  158 + html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
  159 +
  160 + self.send_response(200)
  161 + self.send_header('Content-Type', 'text/html; charset=utf-8')
  162 + self.end_headers()
  163 + self.wfile.write(html.encode('utf-8'))
  164 + except Exception as e:
  165 + logging.error(f"Error serving HTML: {e}")
  166 + self.send_error(500)
  167 +
  168 + def do_POST(self):
  169 + """Handle POST requests. Proxy API requests to backend."""
  170 + path = self.path.split('?')[0]
  171 + if self._is_proxy_path(path):
  172 + self._proxy_to_backend()
  173 + return
  174 + self.send_error(405, "Method Not Allowed")
  175 +
  176 + def setup(self):
  177 + """Setup with error handling."""
  178 + try:
  179 + super().setup()
  180 + except Exception:
  181 + pass # Silently handle setup errors from scanners
  182 +
  183 + def handle_one_request(self):
  184 + """Handle single request with error catching."""
  185 + try:
  186 + # Check rate limiting
  187 + client_ip = self.client_address[0]
  188 + if self.is_rate_limited(client_ip):
  189 + logging.warning(f"Rate limiting IP: {client_ip}")
  190 + self.send_error(429, "Too Many Requests")
  191 + return
  192 +
  193 + super().handle_one_request()
  194 + except (ConnectionResetError, BrokenPipeError):
  195 + # Client disconnected prematurely - common with scanners
  196 + pass
  197 + except UnicodeDecodeError:
  198 + # Binary data received - not HTTP
  199 + pass
  200 + except Exception as e:
  201 + # Log unexpected errors but don't crash
  202 + logging.debug(f"Request handling error: {e}")
  203 +
  204 + def log_message(self, format, *args):
  205 + """Suppress logging for malformed requests from scanners."""
  206 + message = format % args
  207 + # Filter out scanner noise
  208 + noise_patterns = [
  209 + "code 400",
  210 + "Bad request",
  211 + "Bad request version",
  212 + "Bad HTTP/0.9 request type",
  213 + "Bad request syntax"
  214 + ]
  215 + if any(pattern in message for pattern in noise_patterns):
  216 + return
  217 + # Only log legitimate requests
  218 + if message and not message.startswith(" ") and len(message) > 10:
  219 + super().log_message(format, *args)
  220 +
  221 + def end_headers(self):
  222 + # Add CORS headers
  223 + self.send_header('Access-Control-Allow-Origin', '*')
  224 + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
  225 + self.send_header('Access-Control-Allow-Headers', self._ALLOWED_CORS_HEADERS)
  226 + # Add security headers
  227 + self.send_header('X-Content-Type-Options', 'nosniff')
  228 + self.send_header('X-Frame-Options', 'DENY')
  229 + self.send_header('X-XSS-Protection', '1; mode=block')
  230 + super().end_headers()
  231 +
  232 + def do_OPTIONS(self):
  233 + """Handle OPTIONS requests."""
  234 + try:
  235 + path = self.path.split('?')[0]
  236 + if self._is_proxy_path(path):
  237 + self.send_response(204)
  238 + self.end_headers()
  239 + return
  240 + self.send_response(200)
  241 + self.end_headers()
  242 + except Exception:
  243 + pass
  244 +
  245 +class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
  246 + """Threaded TCP server with better error handling."""
  247 + allow_reuse_address = True
  248 + daemon_threads = True
  249 +
  250 +if __name__ == '__main__':
  251 + # Check if port is already in use
  252 + import socket
  253 + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  254 + try:
  255 + sock.bind(("", PORT))
  256 + sock.close()
  257 + except OSError:
  258 + print(f"ERROR: Port {PORT} is already in use.")
  259 + print(f"Please stop the existing server or use a different port.")
  260 + print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
  261 + sys.exit(1)
  262 +
  263 + # Create threaded server for better concurrency
  264 + with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
  265 + print(f"Frontend server started at http://localhost:{PORT}")
  266 + print(f"Serving files from: {os.getcwd()}")
  267 + print("\nPress Ctrl+C to stop the server")
  268 +
  269 + try:
  270 + httpd.serve_forever()
  271 + except KeyboardInterrupt:
  272 + print("\nShutting down server...")
  273 + httpd.shutdown()
  274 + print("Server stopped")
  275 + sys.exit(0)
  276 + except Exception as e:
  277 + print(f"Server error: {e}")
  278 + sys.exit(1)
scripts/frontend_server.py 100755 → 100644
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 -"""  
3 -Simple HTTP server for saas-search frontend.  
4 -""" 2 +"""Backward-compatible frontend server entrypoint."""
5 3
6 -import http.server  
7 -import socketserver  
8 -import os  
9 -import sys  
10 -import logging  
11 -import time  
12 -import urllib.request  
13 -import urllib.error  
14 -from collections import defaultdict, deque  
15 -from pathlib import Path  
16 -from dotenv import load_dotenv  
17 -  
18 -# Load .env file  
19 -project_root = Path(__file__).parent.parent  
20 -load_dotenv(project_root / '.env')  
21 -  
22 -# Get API_BASE_URL from environment(默认不注入,避免被旧 .env 覆盖同源策略)  
23 -# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。  
24 -API_BASE_URL = os.getenv('API_BASE_URL') or None  
25 -INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'  
26 -# Backend proxy target for same-origin API forwarding  
27 -BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')  
28 -  
29 -# Change to frontend directory  
30 -frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend')  
31 -os.chdir(frontend_dir)  
32 -  
33 -# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.  
34 -PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))  
35 -  
36 -# Configure logging to suppress scanner noise  
37 -logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')  
38 -  
39 -class RateLimitingMixin:  
40 - """Mixin for rate limiting requests by IP address."""  
41 - request_counts = defaultdict(deque)  
42 - rate_limit = 100 # requests per minute  
43 - window = 60 # seconds  
44 -  
45 - @classmethod  
46 - def is_rate_limited(cls, ip):  
47 - now = time.time()  
48 -  
49 - # Clean old requests  
50 - while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:  
51 - cls.request_counts[ip].popleft()  
52 -  
53 - # Check rate limit  
54 - if len(cls.request_counts[ip]) > cls.rate_limit:  
55 - return True  
56 -  
57 - cls.request_counts[ip].append(now)  
58 - return False  
59 -  
60 -class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):  
61 - """Custom request handler with CORS support and robust error handling."""  
62 -  
63 - def _is_proxy_path(self, path: str) -> bool:  
64 - """Return True for API paths that should be forwarded to backend service."""  
65 - return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')  
66 -  
67 - def _proxy_to_backend(self):  
68 - """Proxy current request to backend service on the GPU server."""  
69 - target_url = f"{BACKEND_PROXY_URL}{self.path}"  
70 - method = self.command.upper()  
71 -  
72 - try:  
73 - content_length = int(self.headers.get('Content-Length', '0'))  
74 - except ValueError:  
75 - content_length = 0  
76 - body = self.rfile.read(content_length) if content_length > 0 else None 4 +from __future__ import annotations
77 5
78 - forward_headers = {}  
79 - for key, value in self.headers.items():  
80 - lk = key.lower()  
81 - if lk in ('host', 'content-length', 'connection'):  
82 - continue  
83 - forward_headers[key] = value  
84 -  
85 - req = urllib.request.Request(  
86 - target_url,  
87 - data=body,  
88 - headers=forward_headers,  
89 - method=method,  
90 - )  
91 -  
92 - try:  
93 - with urllib.request.urlopen(req, timeout=30) as resp:  
94 - resp_body = resp.read()  
95 - self.send_response(resp.getcode())  
96 - for header, value in resp.getheaders():  
97 - lh = header.lower()  
98 - if lh in ('transfer-encoding', 'connection', 'content-length'):  
99 - continue  
100 - self.send_header(header, value)  
101 - self.end_headers()  
102 - self.wfile.write(resp_body)  
103 - except urllib.error.HTTPError as e:  
104 - err_body = e.read() if hasattr(e, 'read') else b''  
105 - self.send_response(e.code)  
106 - if e.headers:  
107 - for header, value in e.headers.items():  
108 - lh = header.lower()  
109 - if lh in ('transfer-encoding', 'connection', 'content-length'):  
110 - continue  
111 - self.send_header(header, value)  
112 - self.end_headers()  
113 - if err_body:  
114 - self.wfile.write(err_body)  
115 - except Exception as e:  
116 - logging.error(f"Backend proxy error for {method} {self.path}: {e}")  
117 - self.send_response(502)  
118 - self.send_header('Content-Type', 'application/json; charset=utf-8')  
119 - self.end_headers()  
120 - self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')  
121 -  
122 - def do_GET(self):  
123 - """Handle GET requests with API config injection."""  
124 - path = self.path.split('?')[0]  
125 -  
126 - # Proxy API paths to backend first  
127 - if self._is_proxy_path(path):  
128 - self._proxy_to_backend()  
129 - return  
130 -  
131 - # Route / to index.html  
132 - if path == '/' or path == '':  
133 - self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')  
134 -  
135 - # Inject API config for HTML files  
136 - if self.path.endswith('.html'):  
137 - self._serve_html_with_config()  
138 - else:  
139 - super().do_GET()  
140 -  
141 - def _serve_html_with_config(self):  
142 - """Serve HTML with optional API_BASE_URL injected."""  
143 - try:  
144 - file_path = self.path.lstrip('/')  
145 - if not os.path.exists(file_path):  
146 - self.send_error(404)  
147 - return  
148 -  
149 - with open(file_path, 'r', encoding='utf-8') as f:  
150 - html = f.read()  
151 -  
152 - # 默认不注入 API_BASE_URL,避免历史 .env(如 http://xx:6002)覆盖同源调用。  
153 - # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。  
154 - if INJECT_API_BASE_URL and API_BASE_URL:  
155 - config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n '  
156 - html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)  
157 -  
158 - self.send_response(200)  
159 - self.send_header('Content-Type', 'text/html; charset=utf-8')  
160 - self.end_headers()  
161 - self.wfile.write(html.encode('utf-8'))  
162 - except Exception as e:  
163 - logging.error(f"Error serving HTML: {e}")  
164 - self.send_error(500)  
165 -  
166 - def do_POST(self):  
167 - """Handle POST requests. Proxy API requests to backend."""  
168 - path = self.path.split('?')[0]  
169 - if self._is_proxy_path(path):  
170 - self._proxy_to_backend()  
171 - return  
172 - self.send_error(405, "Method Not Allowed")  
173 -  
174 - def setup(self):  
175 - """Setup with error handling."""  
176 - try:  
177 - super().setup()  
178 - except Exception:  
179 - pass # Silently handle setup errors from scanners  
180 -  
181 - def handle_one_request(self):  
182 - """Handle single request with error catching."""  
183 - try:  
184 - # Check rate limiting  
185 - client_ip = self.client_address[0]  
186 - if self.is_rate_limited(client_ip):  
187 - logging.warning(f"Rate limiting IP: {client_ip}")  
188 - self.send_error(429, "Too Many Requests")  
189 - return  
190 -  
191 - super().handle_one_request()  
192 - except (ConnectionResetError, BrokenPipeError):  
193 - # Client disconnected prematurely - common with scanners  
194 - pass  
195 - except UnicodeDecodeError:  
196 - # Binary data received - not HTTP  
197 - pass  
198 - except Exception as e:  
199 - # Log unexpected errors but don't crash  
200 - logging.debug(f"Request handling error: {e}")  
201 -  
202 - def log_message(self, format, *args):  
203 - """Suppress logging for malformed requests from scanners."""  
204 - message = format % args  
205 - # Filter out scanner noise  
206 - noise_patterns = [  
207 - "code 400",  
208 - "Bad request",  
209 - "Bad request version",  
210 - "Bad HTTP/0.9 request type",  
211 - "Bad request syntax"  
212 - ]  
213 - if any(pattern in message for pattern in noise_patterns):  
214 - return  
215 - # Only log legitimate requests  
216 - if message and not message.startswith(" ") and len(message) > 10:  
217 - super().log_message(format, *args)  
218 -  
219 - def end_headers(self):  
220 - # Add CORS headers  
221 - self.send_header('Access-Control-Allow-Origin', '*')  
222 - self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')  
223 - self.send_header('Access-Control-Allow-Headers', 'Content-Type')  
224 - # Add security headers  
225 - self.send_header('X-Content-Type-Options', 'nosniff')  
226 - self.send_header('X-Frame-Options', 'DENY')  
227 - self.send_header('X-XSS-Protection', '1; mode=block')  
228 - super().end_headers()  
229 -  
230 - def do_OPTIONS(self):  
231 - """Handle OPTIONS requests."""  
232 - try:  
233 - path = self.path.split('?')[0]  
234 - if self._is_proxy_path(path):  
235 - self.send_response(204)  
236 - self.end_headers()  
237 - return  
238 - self.send_response(200)  
239 - self.end_headers()  
240 - except Exception:  
241 - pass  
242 -  
243 -class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):  
244 - """Threaded TCP server with better error handling."""  
245 - allow_reuse_address = True  
246 - daemon_threads = True 6 +import runpy
  7 +from pathlib import Path
247 8
248 -if __name__ == '__main__':  
249 - # Check if port is already in use  
250 - import socket  
251 - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)  
252 - try:  
253 - sock.bind(("", PORT))  
254 - sock.close()  
255 - except OSError:  
256 - print(f"ERROR: Port {PORT} is already in use.")  
257 - print(f"Please stop the existing server or use a different port.")  
258 - print(f"To stop existing server: kill $(lsof -t -i:{PORT})")  
259 - sys.exit(1)  
260 -  
261 - # Create threaded server for better concurrency  
262 - with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:  
263 - print(f"Frontend server started at http://localhost:{PORT}")  
264 - print(f"Serving files from: {os.getcwd()}")  
265 - print("\nPress Ctrl+C to stop the server")  
266 9
267 - try:  
268 - httpd.serve_forever()  
269 - except KeyboardInterrupt:  
270 - print("\nShutting down server...")  
271 - httpd.shutdown()  
272 - print("Server stopped")  
273 - sys.exit(0)  
274 - except Exception as e:  
275 - print(f"Server error: {e}")  
276 - sys.exit(1) 10 +if __name__ == "__main__":
  11 + target = Path(__file__).resolve().parent / "frontend" / "frontend_server.py"
  12 + runpy.run_path(str(target), run_name="__main__")
scripts/setup_translator_venv.sh
@@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot; @@ -8,8 +8,47 @@ PROJECT_ROOT=&quot;$(cd &quot;$(dirname &quot;$0&quot;)/..&quot; &amp;&amp; pwd)&quot;
8 cd "${PROJECT_ROOT}" 8 cd "${PROJECT_ROOT}"
9 9
10 VENV_DIR="${PROJECT_ROOT}/.venv-translator" 10 VENV_DIR="${PROJECT_ROOT}/.venv-translator"
11 -PYTHON_BIN="${PYTHON_BIN:-python3}"  
12 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}" 11 TMP_DIR="${TRANSLATOR_PIP_TMPDIR:-${PROJECT_ROOT}/.tmp/translator-pip}"
  12 +MIN_PYTHON_MAJOR=3
  13 +MIN_PYTHON_MINOR=10
  14 +
  15 +python_meets_minimum() {
  16 + local bin="$1"
  17 + "${bin}" - <<'PY' "${MIN_PYTHON_MAJOR}" "${MIN_PYTHON_MINOR}"
  18 +import sys
  19 +
  20 +required = tuple(int(value) for value in sys.argv[1:])
  21 +sys.exit(0 if sys.version_info[:2] >= required else 1)
  22 +PY
  23 +}
  24 +
  25 +discover_python_bin() {
  26 + local candidates=()
  27 +
  28 + if [[ -n "${PYTHON_BIN:-}" ]]; then
  29 + candidates+=("${PYTHON_BIN}")
  30 + fi
  31 + candidates+=("python3.12" "python3.11" "python3.10" "python3")
  32 +
  33 + local candidate
  34 + for candidate in "${candidates[@]}"; do
  35 + if ! command -v "${candidate}" >/dev/null 2>&1; then
  36 + continue
  37 + fi
  38 + if python_meets_minimum "${candidate}"; then
  39 + echo "${candidate}"
  40 + return 0
  41 + fi
  42 + done
  43 +
  44 + return 1
  45 +}
  46 +
  47 +if ! PYTHON_BIN="$(discover_python_bin)"; then
  48 + echo "ERROR: unable to find Python >= ${MIN_PYTHON_MAJOR}.${MIN_PYTHON_MINOR}." >&2
  49 + echo "Set PYTHON_BIN to a compatible interpreter and rerun." >&2
  50 + exit 1
  51 +fi
13 52
14 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then 53 if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
15 echo "ERROR: python not found: ${PYTHON_BIN}" >&2 54 echo "ERROR: python not found: ${PYTHON_BIN}" >&2
@@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot; @@ -32,6 +71,7 @@ mkdir -p &quot;${TMP_DIR}&quot;
32 export TMPDIR="${TMP_DIR}" 71 export TMPDIR="${TMP_DIR}"
33 PIP_ARGS=(--no-cache-dir) 72 PIP_ARGS=(--no-cache-dir)
34 73
  74 +echo "Using Python=${PYTHON_BIN}"
35 echo "Using TMPDIR=${TMPDIR}" 75 echo "Using TMPDIR=${TMPDIR}"
36 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel 76 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" --upgrade pip wheel
37 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt 77 "${VENV_DIR}/bin/python" -m pip install "${PIP_ARGS[@]}" -r requirements_translator_service.txt
@@ -39,5 +79,5 @@ echo &quot;Using TMPDIR=${TMPDIR}&quot; @@ -39,5 +79,5 @@ echo &quot;Using TMPDIR=${TMPDIR}&quot;
39 echo 79 echo
40 echo "Done." 80 echo "Done."
41 echo "Translator venv: ${VENV_DIR}" 81 echo "Translator venv: ${VENV_DIR}"
42 -echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local" 82 +echo "Download local models: ./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local"
43 echo "Start service: ./scripts/start_translator.sh" 83 echo "Start service: ./scripts/start_translator.sh"
scripts/translation/download_translation_models.py 0 → 100755
@@ -0,0 +1,100 @@ @@ -0,0 +1,100 @@
  1 +#!/usr/bin/env python3
  2 +"""Download local translation models declared in services.translation.capabilities."""
  3 +
  4 +from __future__ import annotations
  5 +
  6 +import argparse
  7 +import os
  8 +from pathlib import Path
  9 +import sys
  10 +from typing import Iterable
  11 +
  12 +from huggingface_hub import snapshot_download
  13 +
  14 +PROJECT_ROOT = Path(__file__).resolve().parents[2]
  15 +if str(PROJECT_ROOT) not in sys.path:
  16 + sys.path.insert(0, str(PROJECT_ROOT))
  17 +os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
  18 +
  19 +from config.services_config import get_translation_config
  20 +from translation.ct2_conversion import convert_transformers_model
  21 +
  22 +
  23 +LOCAL_BACKENDS = {"local_nllb", "local_marian"}
  24 +
  25 +
  26 +def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
  27 + cfg = get_translation_config()
  28 + capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
  29 + for name, capability in capabilities.items():
  30 + backend = str(capability.get("backend") or "").strip().lower()
  31 + if backend not in LOCAL_BACKENDS:
  32 + continue
  33 + if selected and name not in selected:
  34 + continue
  35 + yield name, capability
  36 +
  37 +
  38 +def _compute_ct2_output_dir(capability: dict) -> Path:
  39 + custom = str(capability.get("ct2_model_dir") or "").strip()
  40 + if custom:
  41 + return Path(custom).expanduser()
  42 + model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
  43 + compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
  44 + normalized = compute_type.replace("_", "-")
  45 + return model_dir / f"ctranslate2-{normalized}"
  46 +
  47 +
  48 +def convert_to_ctranslate2(name: str, capability: dict) -> None:
  49 + model_id = str(capability.get("model_id") or "").strip()
  50 + model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
  51 + model_source = str(model_dir if model_dir.exists() else model_id)
  52 + output_dir = _compute_ct2_output_dir(capability)
  53 + if (output_dir / "model.bin").exists():
  54 + print(f"[skip-convert] {name} -> {output_dir}")
  55 + return
  56 + quantization = str(
  57 + capability.get("ct2_conversion_quantization")
  58 + or capability.get("ct2_compute_type")
  59 + or capability.get("torch_dtype")
  60 + or "default"
  61 + ).strip()
  62 + output_dir.parent.mkdir(parents=True, exist_ok=True)
  63 + print(f"[convert] {name} -> {output_dir} ({quantization})")
  64 + convert_transformers_model(model_source, str(output_dir), quantization)
  65 + print(f"[converted] {name}")
  66 +
  67 +
  68 +def main() -> None:
  69 + parser = argparse.ArgumentParser(description="Download local translation models")
  70 + parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
  71 + parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
  72 + parser.add_argument(
  73 + "--convert-ctranslate2",
  74 + action="store_true",
  75 + help="Also convert the downloaded Hugging Face models into CTranslate2 format",
  76 + )
  77 + args = parser.parse_args()
  78 +
  79 + selected = {item.strip().lower() for item in args.models if item.strip()} or None
  80 + if not args.all_local and not selected:
  81 + parser.error("pass --all-local or --models <name> ...")
  82 +
  83 + for name, capability in iter_local_capabilities(selected):
  84 + model_id = str(capability.get("model_id") or "").strip()
  85 + model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
  86 + if not model_id or not model_dir:
  87 + raise ValueError(f"Capability '{name}' must define model_id and model_dir")
  88 + model_dir.parent.mkdir(parents=True, exist_ok=True)
  89 + print(f"[download] {name} -> {model_dir} ({model_id})")
  90 + snapshot_download(
  91 + repo_id=model_id,
  92 + local_dir=str(model_dir),
  93 + )
  94 + print(f"[done] {name}")
  95 + if args.convert_ctranslate2:
  96 + convert_to_ctranslate2(name, capability)
  97 +
  98 +
  99 +if __name__ == "__main__":
  100 + main()
tests/test_translation_converter_resolution.py 0 → 100644
@@ -0,0 +1,85 @@ @@ -0,0 +1,85 @@
  1 +from __future__ import annotations
  2 +
  3 +import sys
  4 +import types
  5 +
  6 +import pytest
  7 +
  8 +import translation.ct2_conversion as ct2_conversion
  9 +
  10 +
  11 +class _FakeTransformersConverter:
  12 + def __init__(self, model_name_or_path):
  13 + self.model_name_or_path = model_name_or_path
  14 + self.load_calls = []
  15 +
  16 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  17 + self.load_calls.append(
  18 + {
  19 + "model_class": model_class,
  20 + "resolved_model_name_or_path": resolved_model_name_or_path,
  21 + "kwargs": dict(kwargs),
  22 + }
  23 + )
  24 + if "dtype" in kwargs or "torch_dtype" in kwargs:
  25 + raise TypeError("M2M100ForConditionalGeneration.__init__() got an unexpected keyword argument 'dtype'")
  26 + return {"loaded": True, "path": resolved_model_name_or_path}
  27 +
  28 + def convert(self, output_dir, quantization=None, force=False):
  29 + loaded = self.load_model("FakeModel", self.model_name_or_path, dtype="float32")
  30 + return {
  31 + "loaded": loaded,
  32 + "output_dir": output_dir,
  33 + "quantization": quantization,
  34 + "force": force,
  35 + "load_calls": list(self.load_calls),
  36 + }
  37 +
  38 +
  39 +def _install_fake_ctranslate2(monkeypatch, base_converter):
  40 + converters_module = types.ModuleType("ctranslate2.converters")
  41 + converters_module.TransformersConverter = base_converter
  42 + ctranslate2_module = types.ModuleType("ctranslate2")
  43 + ctranslate2_module.converters = converters_module
  44 +
  45 + monkeypatch.setitem(sys.modules, "ctranslate2", ctranslate2_module)
  46 + monkeypatch.setitem(sys.modules, "ctranslate2.converters", converters_module)
  47 +
  48 +
  49 +def test_convert_transformers_model_retries_without_torch_dtype(monkeypatch):
  50 + _install_fake_ctranslate2(monkeypatch, _FakeTransformersConverter)
  51 + fake_transformers = types.ModuleType("transformers")
  52 + fake_transformers.AutoConfig = types.SimpleNamespace(
  53 + from_pretrained=lambda path: types.SimpleNamespace(torch_dtype="float32", path=path)
  54 + )
  55 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  56 +
  57 + result = ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
  58 +
  59 + assert result["loaded"] == {"loaded": True, "path": "fake-model"}
  60 + assert result["output_dir"] == "/tmp/out"
  61 + assert result["quantization"] == "float16"
  62 + assert result["force"] is False
  63 + assert len(result["load_calls"]) == 2
  64 + assert result["load_calls"][0] == {
  65 + "model_class": "FakeModel",
  66 + "resolved_model_name_or_path": "fake-model",
  67 + "kwargs": {"dtype": "float32"},
  68 + }
  69 + assert result["load_calls"][1]["model_class"] == "FakeModel"
  70 + assert result["load_calls"][1]["resolved_model_name_or_path"] == "fake-model"
  71 + assert getattr(result["load_calls"][1]["kwargs"]["config"], "torch_dtype", "missing") is None
  72 +
  73 +
  74 +def test_convert_transformers_model_preserves_unrelated_type_errors(monkeypatch):
  75 + class _AlwaysFailingConverter(_FakeTransformersConverter):
  76 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  77 + raise TypeError("different constructor error")
  78 +
  79 + _install_fake_ctranslate2(monkeypatch, _AlwaysFailingConverter)
  80 + fake_transformers = types.ModuleType("transformers")
  81 + fake_transformers.AutoConfig = types.SimpleNamespace(from_pretrained=lambda path: types.SimpleNamespace(path=path))
  82 + monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
  83 +
  84 + with pytest.raises(TypeError, match="different constructor error"):
  85 + ct2_conversion.convert_transformers_model("fake-model", "/tmp/out", "float16")
tests/test_translation_local_backends.py
@@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch): @@ -201,6 +201,51 @@ def test_nllb_ctranslate2_accepts_finnish_short_code(monkeypatch):
201 assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]] 201 assert backend.translator.last_translate_batch_kwargs["target_prefix"] == [["zho_Hans"]]
202 202
203 203
  204 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_wrong_type(tmp_path, monkeypatch):
  205 + wrong_dir = tmp_path / "wrong-nllb"
  206 + wrong_dir.mkdir()
  207 + (wrong_dir / "config.json").write_text('{"model_type":"led"}', encoding="utf-8")
  208 +
  209 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  210 +
  211 + backend = NLLBCTranslate2TranslationBackend(
  212 + name="nllb-200-distilled-600m",
  213 + model_id="facebook/nllb-200-distilled-600M",
  214 + model_dir=str(wrong_dir),
  215 + device="cpu",
  216 + torch_dtype="float32",
  217 + batch_size=1,
  218 + max_input_length=16,
  219 + max_new_tokens=16,
  220 + num_beams=1,
  221 + )
  222 +
  223 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  224 + assert backend._tokenizer_source() == "facebook/nllb-200-distilled-600M"
  225 +
  226 +
  227 +def test_nllb_ctranslate2_falls_back_to_model_id_when_local_dir_is_incomplete(tmp_path, monkeypatch):
  228 + incomplete_dir = tmp_path / "incomplete-nllb"
  229 + incomplete_dir.mkdir()
  230 + (incomplete_dir / "ctranslate2-float16").mkdir()
  231 +
  232 + monkeypatch.setattr(NLLBCTranslate2TranslationBackend, "_load_runtime", _stub_load_ct2_runtime)
  233 +
  234 + backend = NLLBCTranslate2TranslationBackend(
  235 + name="nllb-200-distilled-600m",
  236 + model_id="facebook/nllb-200-distilled-600M",
  237 + model_dir=str(incomplete_dir),
  238 + device="cpu",
  239 + torch_dtype="float32",
  240 + batch_size=1,
  241 + max_input_length=16,
  242 + max_new_tokens=16,
  243 + num_beams=1,
  244 + )
  245 +
  246 + assert backend._model_source() == "facebook/nllb-200-distilled-600M"
  247 +
  248 +
204 def test_nllb_resolves_flores_short_tags_and_iso_no(): 249 def test_nllb_resolves_flores_short_tags_and_iso_no():
205 cat = build_nllb_language_catalog(None) 250 cat = build_nllb_language_catalog(None)
206 assert resolve_nllb_language_code("ca", cat) == "cat_Latn" 251 assert resolve_nllb_language_code("ca", cat) == "cat_Latn"
tests/test_translator_failure_semantics.py
@@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog): @@ -197,6 +197,73 @@ def test_translation_route_log_focuses_on_routing_decision(monkeypatch, caplog):
197 ] 197 ]
198 198
199 199
  200 +def test_service_skips_failed_backend_but_keeps_healthy_capabilities(monkeypatch):
  201 + monkeypatch.setattr(TranslationCache, "_init_redis_client", staticmethod(lambda: None))
  202 +
  203 + def _fake_create_backend(self, *, name, backend_type, cfg):
  204 + del self, backend_type, cfg
  205 + if name == "broken-nllb":
  206 + raise RuntimeError("broken model dir")
  207 +
  208 + class _Backend:
  209 + model = name
  210 +
  211 + @property
  212 + def supports_batch(self):
  213 + return True
  214 +
  215 + def translate(self, text, target_lang, source_lang=None, scene=None):
  216 + del target_lang, source_lang, scene
  217 + return text
  218 +
  219 + return _Backend()
  220 +
  221 + monkeypatch.setattr(TranslationService, "_create_backend", _fake_create_backend)
  222 + service = TranslationService(
  223 + {
  224 + "service_url": "http://127.0.0.1:6006",
  225 + "timeout_sec": 10.0,
  226 + "default_model": "llm",
  227 + "default_scene": "general",
  228 + "capabilities": {
  229 + "llm": {
  230 + "enabled": True,
  231 + "backend": "llm",
  232 + "model": "dummy-llm",
  233 + "base_url": "https://example.com",
  234 + "timeout_sec": 10.0,
  235 + "use_cache": True,
  236 + },
  237 + "broken-nllb": {
  238 + "enabled": True,
  239 + "backend": "local_nllb",
  240 + "model_id": "dummy",
  241 + "model_dir": "dummy",
  242 + "device": "cpu",
  243 + "torch_dtype": "float32",
  244 + "batch_size": 8,
  245 + "max_input_length": 16,
  246 + "max_new_tokens": 16,
  247 + "num_beams": 1,
  248 + "use_cache": True,
  249 + },
  250 + },
  251 + "cache": {
  252 + "ttl_seconds": 60,
  253 + "sliding_expiration": True,
  254 + },
  255 + }
  256 + )
  257 +
  258 + assert service.available_models == ["llm", "broken-nllb"]
  259 + assert service.loaded_models == ["llm"]
  260 + assert service.failed_models == ["broken-nllb"]
  261 + assert service.backend_errors["broken-nllb"] == "broken model dir"
  262 +
  263 + with pytest.raises(RuntimeError, match="failed to initialize"):
  264 + service.get_backend("broken-nllb")
  265 +
  266 +
200 def test_translation_cache_probe_models_order(): 267 def test_translation_cache_probe_models_order():
201 cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}} 268 cfg = {"cache": {"model_quality_tiers": {"low": 10, "high": 50, "mid": 30}}}
202 assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"] 269 assert translation_cache_probe_models(cfg, "low") == ["high", "mid", "low"]
translation/backends/local_ctranslate2.py
@@ -4,9 +4,7 @@ from __future__ import annotations @@ -4,9 +4,7 @@ from __future__ import annotations
4 4
5 import logging 5 import logging
6 import os 6 import os
7 -import shutil  
8 -import subprocess  
9 -import sys 7 +import json
10 import threading 8 import threading
11 from pathlib import Path 9 from pathlib import Path
12 from typing import Dict, List, Optional, Sequence, Union 10 from typing import Dict, List, Optional, Sequence, Union
@@ -24,6 +22,7 @@ from translation.text_splitter import ( @@ -24,6 +22,7 @@ from translation.text_splitter import (
24 join_translated_segments, 22 join_translated_segments,
25 split_text_for_translation, 23 split_text_for_translation,
26 ) 24 )
  25 +from translation.ct2_conversion import convert_transformers_model
27 26
28 logger = logging.getLogger(__name__) 27 logger = logging.getLogger(__name__)
29 28
@@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str: @@ -76,17 +75,18 @@ def _derive_ct2_model_dir(model_dir: str, compute_type: str) -&gt; str:
76 return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}") 75 return str(Path(model_dir).expanduser() / f"ctranslate2-{normalized}")
77 76
78 77
79 -def _resolve_converter_binary() -> str:  
80 - candidate = shutil.which("ct2-transformers-converter")  
81 - if candidate:  
82 - return candidate  
83 - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"  
84 - if venv_candidate.exists():  
85 - return str(venv_candidate)  
86 - raise RuntimeError(  
87 - "ct2-transformers-converter was not found. "  
88 - "Ensure ctranslate2 is installed in the active translator environment."  
89 - ) 78 +def _detect_local_model_type(model_dir: str) -> Optional[str]:
  79 + config_path = Path(model_dir).expanduser() / "config.json"
  80 + if not config_path.exists():
  81 + return None
  82 + try:
  83 + with open(config_path, "r", encoding="utf-8") as handle:
  84 + payload = json.load(handle) or {}
  85 + except Exception as exc:
  86 + logger.warning("Failed to inspect local translation config %s: %s", config_path, exc)
  87 + return None
  88 + model_type = str(payload.get("model_type") or "").strip().lower()
  89 + return model_type or None
90 90
91 91
92 class LocalCTranslate2TranslationBackend: 92 class LocalCTranslate2TranslationBackend:
@@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend: @@ -144,6 +144,7 @@ class LocalCTranslate2TranslationBackend:
144 self.ct2_decoding_length_extra = int(ct2_decoding_length_extra) 144 self.ct2_decoding_length_extra = int(ct2_decoding_length_extra)
145 self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min)) 145 self.ct2_decoding_length_min = max(1, int(ct2_decoding_length_min))
146 self._tokenizer_lock = threading.Lock() 146 self._tokenizer_lock = threading.Lock()
  147 + self._local_model_source = self._resolve_local_model_source()
147 self._load_runtime() 148 self._load_runtime()
148 149
149 @property 150 @property
@@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend: @@ -151,10 +152,44 @@ class LocalCTranslate2TranslationBackend:
151 return True 152 return True
152 153
153 def _tokenizer_source(self) -> str: 154 def _tokenizer_source(self) -> str:
154 - return self.model_dir if os.path.exists(self.model_dir) else self.model_id 155 + return self._local_model_source or self.model_id
155 156
156 def _model_source(self) -> str: 157 def _model_source(self) -> str:
157 - return self.model_dir if os.path.exists(self.model_dir) else self.model_id 158 + return self._local_model_source or self.model_id
  159 +
  160 + def _expected_local_model_types(self) -> Optional[set[str]]:
  161 + return None
  162 +
  163 + def _resolve_local_model_source(self) -> Optional[str]:
  164 + model_path = Path(self.model_dir).expanduser()
  165 + if not model_path.exists():
  166 + return None
  167 + if not (model_path / "config.json").exists():
  168 + logger.warning(
  169 + "Local translation model_dir is incomplete | model=%s model_dir=%s missing=config.json fallback=model_id",
  170 + self.model,
  171 + model_path,
  172 + )
  173 + return None
  174 +
  175 + expected_types = self._expected_local_model_types()
  176 + if not expected_types:
  177 + return str(model_path)
  178 +
  179 + detected_type = _detect_local_model_type(str(model_path))
  180 + if detected_type is None:
  181 + return str(model_path)
  182 + if detected_type in expected_types:
  183 + return str(model_path)
  184 +
  185 + logger.warning(
  186 + "Local translation model_dir has unexpected model_type | model=%s model_dir=%s detected=%s expected=%s fallback=model_id",
  187 + self.model,
  188 + model_path,
  189 + detected_type,
  190 + sorted(expected_types),
  191 + )
  192 + return None
158 193
159 def _tokenizer_kwargs(self) -> Dict[str, object]: 194 def _tokenizer_kwargs(self) -> Dict[str, object]:
160 return {} 195 return {}
@@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend: @@ -204,7 +239,6 @@ class LocalCTranslate2TranslationBackend:
204 ) 239 )
205 240
206 ct2_path.parent.mkdir(parents=True, exist_ok=True) 241 ct2_path.parent.mkdir(parents=True, exist_ok=True)
207 - converter = _resolve_converter_binary()  
208 logger.info( 242 logger.info(
209 "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s", 243 "Converting translation model to CTranslate2 | name=%s source=%s output=%s quantization=%s",
210 self.model, 244 self.model,
@@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend: @@ -213,25 +247,14 @@ class LocalCTranslate2TranslationBackend:
213 self.ct2_conversion_quantization, 247 self.ct2_conversion_quantization,
214 ) 248 )
215 try: 249 try:
216 - subprocess.run(  
217 - [  
218 - converter,  
219 - "--model",  
220 - model_source,  
221 - "--output_dir",  
222 - str(ct2_path),  
223 - "--quantization",  
224 - self.ct2_conversion_quantization,  
225 - ],  
226 - check=True,  
227 - stdout=subprocess.PIPE,  
228 - stderr=subprocess.PIPE,  
229 - text=True, 250 + convert_transformers_model(
  251 + model_source,
  252 + str(ct2_path),
  253 + self.ct2_conversion_quantization,
230 ) 254 )
231 - except subprocess.CalledProcessError as exc:  
232 - stderr = exc.stderr.strip() 255 + except Exception as exc:
233 raise RuntimeError( 256 raise RuntimeError(
234 - f"Failed to convert model '{self.model}' to CTranslate2: {stderr or exc}" 257 + f"Failed to convert model '{self.model}' to CTranslate2: {exc}"
235 ) from exc 258 ) from exc
236 259
237 def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]: 260 def _normalize_texts(self, text: Union[str, Sequence[str]]) -> List[str]:
@@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): @@ -557,6 +580,9 @@ class MarianCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
557 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}" 580 f"Model '{self.model}' only supports target languages: {sorted(self.target_langs)}"
558 ) 581 )
559 582
  583 + def _expected_local_model_types(self) -> Optional[set[str]]:
  584 + return {"marian"}
  585 +
560 586
561 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): 587 class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
562 """Local backend for NLLB models on CTranslate2.""" 588 """Local backend for NLLB models on CTranslate2."""
@@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend): @@ -619,6 +645,9 @@ class NLLBCTranslate2TranslationBackend(LocalCTranslate2TranslationBackend):
619 if resolve_nllb_language_code(target_lang, self.language_codes) is None: 645 if resolve_nllb_language_code(target_lang, self.language_codes) is None:
620 raise ValueError(f"Unsupported NLLB target language: {target_lang}") 646 raise ValueError(f"Unsupported NLLB target language: {target_lang}")
621 647
  648 + def _expected_local_model_types(self) -> Optional[set[str]]:
  649 + return {"m2m_100", "nllb_moe"}
  650 +
622 def _get_tokenizer_for_source(self, source_lang: str): 651 def _get_tokenizer_for_source(self, source_lang: str):
623 src_code = resolve_nllb_language_code(source_lang, self.language_codes) 652 src_code = resolve_nllb_language_code(source_lang, self.language_codes)
624 if src_code is None: 653 if src_code is None:
translation/cache.py
@@ -87,6 +87,7 @@ class TranslationCache: @@ -87,6 +87,7 @@ class TranslationCache:
87 client = redis.Redis( 87 client = redis.Redis(
88 host=redis_config.host, 88 host=redis_config.host,
89 port=redis_config.port, 89 port=redis_config.port,
  90 + db=redis_config.snapshot_db,
90 password=redis_config.password, 91 password=redis_config.password,
91 decode_responses=True, 92 decode_responses=True,
92 socket_timeout=redis_config.socket_timeout, 93 socket_timeout=redis_config.socket_timeout,
translation/ct2_conversion.py 0 → 100644
@@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
  1 +"""Helpers for converting Hugging Face translation models to CTranslate2."""
  2 +
  3 +from __future__ import annotations
  4 +
  5 +import copy
  6 +import logging
  7 +
  8 +logger = logging.getLogger(__name__)
  9 +
  10 +
  11 +def convert_transformers_model(
  12 + model_name_or_path: str,
  13 + output_dir: str,
  14 + quantization: str,
  15 + *,
  16 + force: bool = False,
  17 +) -> str:
  18 + from ctranslate2.converters import TransformersConverter
  19 + from transformers import AutoConfig
  20 +
  21 + class _CompatibleTransformersConverter(TransformersConverter):
  22 + def load_model(self, model_class, resolved_model_name_or_path, **kwargs):
  23 + try:
  24 + return super().load_model(model_class, resolved_model_name_or_path, **kwargs)
  25 + except TypeError as exc:
  26 + if "unexpected keyword argument 'dtype'" not in str(exc):
  27 + raise
  28 + if kwargs.get("dtype") is None and kwargs.get("torch_dtype") is None:
  29 + raise
  30 +
  31 + logger.warning(
  32 + "Retrying CTranslate2 model load without dtype hints | model=%s class=%s",
  33 + resolved_model_name_or_path,
  34 + getattr(model_class, "__name__", model_class),
  35 + )
  36 + retry_kwargs = dict(kwargs)
  37 + retry_kwargs.pop("dtype", None)
  38 + retry_kwargs.pop("torch_dtype", None)
  39 + config = retry_kwargs.get("config")
  40 + if config is None:
  41 + config = AutoConfig.from_pretrained(resolved_model_name_or_path)
  42 + else:
  43 + config = copy.deepcopy(config)
  44 + if hasattr(config, "dtype"):
  45 + config.dtype = None
  46 + if hasattr(config, "torch_dtype"):
  47 + config.torch_dtype = None
  48 + retry_kwargs["config"] = config
  49 + return super().load_model(model_class, resolved_model_name_or_path, **retry_kwargs)
  50 +
  51 + converter = _CompatibleTransformersConverter(model_name_or_path)
  52 + return converter.convert(output_dir=output_dir, quantization=quantization, force=force)
translation/service.py
@@ -31,7 +31,12 @@ class TranslationService: @@ -31,7 +31,12 @@ class TranslationService:
31 if not self._enabled_capabilities: 31 if not self._enabled_capabilities:
32 raise ValueError("No enabled translation backends found in services.translation.capabilities") 32 raise ValueError("No enabled translation backends found in services.translation.capabilities")
33 self._translation_cache = TranslationCache(self.config["cache"]) 33 self._translation_cache = TranslationCache(self.config["cache"])
34 - self._backends = self._initialize_backends() 34 + self._backends: Dict[str, TranslationBackendProtocol] = {}
  35 + self._backend_errors: Dict[str, str] = {}
  36 + self._initialize_backends()
  37 + if not self._backends:
  38 + details = ", ".join(f"{name}: {err}" for name, err in sorted(self._backend_errors.items())) or "unknown error"
  39 + raise RuntimeError(f"No translation backends could be initialized: {details}")
35 40
36 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]: 41 def _collect_enabled_capabilities(self) -> Dict[str, Dict[str, object]]:
37 enabled: Dict[str, Dict[str, object]] = {} 42 enabled: Dict[str, Dict[str, object]] = {}
@@ -62,24 +67,47 @@ class TranslationService: @@ -62,24 +67,47 @@ class TranslationService:
62 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'") 67 raise ValueError(f"Unsupported translation backend '{backend_type}' for capability '{name}'")
63 return factory(name=name, cfg=cfg) 68 return factory(name=name, cfg=cfg)
64 69
65 - def _initialize_backends(self) -> Dict[str, TranslationBackendProtocol]:  
66 - backends: Dict[str, TranslationBackendProtocol] = {}  
67 - for name, capability_cfg in self._enabled_capabilities.items():  
68 - backend_type = str(capability_cfg["backend"])  
69 - logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)  
70 - backends[name] = self._create_backend( 70 + def _load_backend(self, name: str) -> Optional[TranslationBackendProtocol]:
  71 + capability_cfg = self._enabled_capabilities.get(name)
  72 + if capability_cfg is None:
  73 + return None
  74 + if name in self._backends:
  75 + return self._backends[name]
  76 +
  77 + backend_type = str(capability_cfg["backend"])
  78 + logger.info("Initializing translation backend | model=%s backend=%s", name, backend_type)
  79 + try:
  80 + backend = self._create_backend(
71 name=name, 81 name=name,
72 backend_type=backend_type, 82 backend_type=backend_type,
73 cfg=capability_cfg, 83 cfg=capability_cfg,
74 ) 84 )
75 - logger.info(  
76 - "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s", 85 + except Exception as exc:
  86 + error_text = str(exc).strip() or exc.__class__.__name__
  87 + self._backend_errors[name] = error_text
  88 + logger.error(
  89 + "Translation backend initialization failed | model=%s backend=%s error=%s",
77 name, 90 name,
78 backend_type, 91 backend_type,
79 - bool(capability_cfg.get("use_cache")),  
80 - getattr(backends[name], "model", name), 92 + error_text,
  93 + exc_info=True,
81 ) 94 )
82 - return backends 95 + return None
  96 +
  97 + self._backends[name] = backend
  98 + self._backend_errors.pop(name, None)
  99 + logger.info(
  100 + "Translation backend initialized | model=%s backend=%s use_cache=%s backend_model=%s",
  101 + name,
  102 + backend_type,
  103 + bool(capability_cfg.get("use_cache")),
  104 + getattr(backend, "model", name),
  105 + )
  106 + return backend
  107 +
  108 + def _initialize_backends(self) -> None:
  109 + for name, capability_cfg in self._enabled_capabilities.items():
  110 + self._load_backend(name)
83 111
84 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol: 112 def _create_qwen_mt_backend(self, *, name: str, cfg: Dict[str, object]) -> TranslationBackendProtocol:
85 from translation.backends.qwen_mt import QwenMTTranslationBackend 113 from translation.backends.qwen_mt import QwenMTTranslationBackend
@@ -178,13 +206,27 @@ class TranslationService: @@ -178,13 +206,27 @@ class TranslationService:
178 def loaded_models(self) -> List[str]: 206 def loaded_models(self) -> List[str]:
179 return list(self._backends.keys()) 207 return list(self._backends.keys())
180 208
  209 + @property
  210 + def failed_models(self) -> List[str]:
  211 + return list(self._backend_errors.keys())
  212 +
  213 + @property
  214 + def backend_errors(self) -> Dict[str, str]:
  215 + return dict(self._backend_errors)
  216 +
181 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol: 217 def get_backend(self, model: Optional[str] = None) -> TranslationBackendProtocol:
182 normalized = normalize_translation_model(self.config, model) 218 normalized = normalize_translation_model(self.config, model)
183 - backend = self._backends.get(normalized) 219 + backend = self._backends.get(normalized) or self._load_backend(normalized)
184 if backend is None: 220 if backend is None:
185 - raise ValueError(  
186 - f"Translation model '{normalized}' is not enabled. "  
187 - f"Available models: {', '.join(self.available_models) or 'none'}" 221 + if normalized not in self._enabled_capabilities:
  222 + raise ValueError(
  223 + f"Translation model '{normalized}' is not enabled. "
  224 + f"Available models: {', '.join(self.available_models) or 'none'}"
  225 + )
  226 + error_text = self._backend_errors.get(normalized) or "unknown initialization error"
  227 + raise RuntimeError(
  228 + f"Translation model '{normalized}' failed to initialize: {error_text}. "
  229 + f"Loaded models: {', '.join(self.loaded_models) or 'none'}"
188 ) 230 )
189 return backend 231 return backend
190 232