diff --git a/api/translator_app.py b/api/translator_app.py index b4e4f87..50d8927 100644 --- a/api/translator_app.py +++ b/api/translator_app.py @@ -271,16 +271,20 @@ async def lifespan(_: FastAPI): """Initialize all enabled translation backends on process startup.""" logger.info("Starting Translation Service API") service = get_translation_service() + failed_models = list(getattr(service, "failed_models", [])) + backend_errors = dict(getattr(service, "backend_errors", {})) logger.info( - "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s", + "Translation service ready | default_model=%s default_scene=%s available_models=%s loaded_models=%s failed_models=%s", service.config["default_model"], service.config["default_scene"], service.available_models, service.loaded_models, + failed_models, ) logger.info( - "Translation backends initialized on startup | models=%s", + "Translation backends initialized on startup | loaded=%s failed=%s", service.loaded_models, + backend_errors, ) verbose_logger.info( "Translation startup detail | capabilities=%s cache_ttl_seconds=%s cache_sliding_expiration=%s", @@ -316,11 +320,14 @@ async def health_check(): """Health check endpoint.""" try: service = get_translation_service() + failed_models = list(getattr(service, "failed_models", [])) + backend_errors = dict(getattr(service, "backend_errors", {})) logger.info( - "Health check | default_model=%s default_scene=%s loaded_models=%s", + "Health check | default_model=%s default_scene=%s loaded_models=%s failed_models=%s", service.config["default_model"], service.config["default_scene"], service.loaded_models, + failed_models, ) return { "status": "healthy", @@ -330,6 +337,8 @@ async def health_check(): "available_models": service.available_models, "enabled_capabilities": get_enabled_translation_models(service.config), "loaded_models": service.loaded_models, + "failed_models": failed_models, + "backend_errors": backend_errors, } except Exception as e: logger.error(f"Health check failed: {e}") @@ -463,6 +472,10 @@ async def translate(request: TranslationRequest, http_request: Request): latency_ms = (time.perf_counter() - request_started) * 1000 logger.warning("Translation validation error | error=%s latency_ms=%.2f", e, latency_ms) raise HTTPException(status_code=400, detail=str(e)) from e + except RuntimeError as e: + latency_ms = (time.perf_counter() - request_started) * 1000 + logger.warning("Translation backend unavailable | error=%s latency_ms=%.2f", e, latency_ms) + raise HTTPException(status_code=503, detail=str(e)) from e except Exception as e: latency_ms = (time.perf_counter() - request_started) * 1000 logger.error("Translation error | error=%s latency_ms=%.2f", e, latency_ms, exc_info=True) diff --git a/config/loader.py b/config/loader.py index 16a4fc9..cbe635d 100644 --- a/config/loader.py +++ b/config/loader.py @@ -655,6 +655,14 @@ class AppConfigLoader: translation_raw = raw.get("translation") if isinstance(raw.get("translation"), dict) else {} normalized_translation = build_translation_config(translation_raw) + local_translation_backends = {"local_nllb", "local_marian"} + for capability_name, capability_cfg in normalized_translation["capabilities"].items(): + backend_name = str(capability_cfg.get("backend") or "").strip().lower() + if backend_name not in local_translation_backends: + continue + for path_key in ("model_dir", "ct2_model_dir"): + if capability_cfg.get(path_key) not in (None, ""): + capability_cfg[path_key] = str(self._resolve_project_path_value(capability_cfg[path_key]).resolve()) translation_config = TranslationServiceConfig( endpoint=str(normalized_translation["service_url"]).rstrip("/"), timeout_sec=float(normalized_translation["timeout_sec"]), @@ -749,7 +757,7 @@ class AppConfigLoader: port=port, backend=backend_name, runtime_dir=( - str(v) + str(self._resolve_project_path_value(v).resolve()) if (v := instance_raw.get("runtime_dir")) not in (None, "") else None ), @@ -787,6 +795,12 @@ class AppConfigLoader: rerank=rerank_config, ) + def _resolve_project_path_value(self, value: Any) -> Path: + candidate = Path(str(value)).expanduser() + if candidate.is_absolute(): + return candidate + return self.project_root / candidate + def _build_tenants_config(self, raw: Dict[str, Any]) -> TenantCatalogConfig: if not isinstance(raw, dict): raise ConfigurationError("tenant_config must be a mapping") @@ -822,13 +836,6 @@ class AppConfigLoader: def _build_infrastructure_config(self, environment: str) -> InfrastructureConfig: del environment - _redis_db_raw = os.getenv("REDIS_DB") or os.getenv("REDIS_SNAPSHOT_DB") - _redis_db = 0 - if _redis_db_raw is not None and str(_redis_db_raw).strip() != "": - try: - _redis_db = int(str(_redis_db_raw).strip()) - except ValueError: - _redis_db = 0 return InfrastructureConfig( elasticsearch=ElasticsearchSettings( host=os.getenv("ES_HOST", "http://localhost:9200"), @@ -838,7 +845,7 @@ class AppConfigLoader: redis=RedisSettings( host=os.getenv("REDIS_HOST", "localhost"), port=int(os.getenv("REDIS_PORT", 6479)), - snapshot_db=_redis_db, + snapshot_db=int(os.getenv("REDIS_SNAPSHOT_DB", 0)), password=os.getenv("REDIS_PASSWORD"), socket_timeout=int(os.getenv("REDIS_SOCKET_TIMEOUT", 1)), socket_connect_timeout=int(os.getenv("REDIS_SOCKET_CONNECT_TIMEOUT", 1)), diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index 435780b..ea036ba 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -316,7 +316,10 @@ async function performSearch(page = 1) { document.getElementById('productGrid').innerHTML = ''; try { - const response = await fetch(`${API_BASE_URL}/search/`, { + const searchUrl = new URL(`${API_BASE_URL}/search/`, window.location.origin); + searchUrl.searchParams.set('tenant_id', tenantId); + + const response = await fetch(searchUrl.toString(), { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/requirements_translator_service.txt b/requirements_translator_service.txt index e8b8f18..d944e6c 100644 --- a/requirements_translator_service.txt +++ b/requirements_translator_service.txt @@ -13,7 +13,8 @@ httpx>=0.24.0 tqdm>=4.65.0 torch>=2.0.0 -transformers>=4.30.0 +# Keep translator conversions on the last verified NLLB-compatible release line. +transformers>=4.51.0,<4.52.0 ctranslate2>=4.7.0 sentencepiece>=0.2.0 sacremoses>=0.1.1 diff --git a/scripts/download_translation_models.py b/scripts/download_translation_models.py old mode 100755 new mode 100644 index a6fcba4..0b67f40 --- a/scripts/download_translation_models.py +++ b/scripts/download_translation_models.py @@ -1,125 +1,12 @@ #!/usr/bin/env python3 -"""Download local translation models declared in services.translation.capabilities.""" +"""Backward-compatible entrypoint for translation model downloads.""" from __future__ import annotations -import argparse -import os +import runpy from pathlib import Path -import shutil -import subprocess -import sys -from typing import Iterable - -from huggingface_hub import snapshot_download - -PROJECT_ROOT = Path(__file__).resolve().parent.parent -if str(PROJECT_ROOT) not in sys.path: - sys.path.insert(0, str(PROJECT_ROOT)) -os.environ.setdefault("HF_HUB_DISABLE_XET", "1") - -from config.services_config import get_translation_config - - -LOCAL_BACKENDS = {"local_nllb", "local_marian"} - - -def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: - cfg = get_translation_config() - capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {} - for name, capability in capabilities.items(): - backend = str(capability.get("backend") or "").strip().lower() - if backend not in LOCAL_BACKENDS: - continue - if selected and name not in selected: - continue - yield name, capability - - -def _compute_ct2_output_dir(capability: dict) -> Path: - custom = str(capability.get("ct2_model_dir") or "").strip() - if custom: - return Path(custom).expanduser() - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() - compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower() - normalized = compute_type.replace("_", "-") - return model_dir / f"ctranslate2-{normalized}" - - -def _resolve_converter_binary() -> str: - candidate = shutil.which("ct2-transformers-converter") - if candidate: - return candidate - venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter" - if venv_candidate.exists(): - return str(venv_candidate) - raise RuntimeError( - "ct2-transformers-converter was not found. " - "Install ctranslate2 in the active Python environment first." - ) - - -def convert_to_ctranslate2(name: str, capability: dict) -> None: - model_id = str(capability.get("model_id") or "").strip() - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() - model_source = str(model_dir if model_dir.exists() else model_id) - output_dir = _compute_ct2_output_dir(capability) - if (output_dir / "model.bin").exists(): - print(f"[skip-convert] {name} -> {output_dir}") - return - quantization = str( - capability.get("ct2_conversion_quantization") - or capability.get("ct2_compute_type") - or capability.get("torch_dtype") - or "default" - ).strip() - output_dir.parent.mkdir(parents=True, exist_ok=True) - print(f"[convert] {name} -> {output_dir} ({quantization})") - subprocess.run( - [ - _resolve_converter_binary(), - "--model", - model_source, - "--output_dir", - str(output_dir), - "--quantization", - quantization, - ], - check=True, - ) - print(f"[converted] {name}") - - -def main() -> None: - parser = argparse.ArgumentParser(description="Download local translation models") - parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") - parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") - parser.add_argument( - "--convert-ctranslate2", - action="store_true", - help="Also convert the downloaded Hugging Face models into CTranslate2 format", - ) - args = parser.parse_args() - - selected = {item.strip().lower() for item in args.models if item.strip()} or None - if not args.all_local and not selected: - parser.error("pass --all-local or --models ...") - - for name, capability in iter_local_capabilities(selected): - model_id = str(capability.get("model_id") or "").strip() - model_dir = Path(str(capability.get("model_dir") or "")).expanduser() - if not model_id or not model_dir: - raise ValueError(f"Capability '{name}' must define model_id and model_dir") - model_dir.parent.mkdir(parents=True, exist_ok=True) - print(f"[download] {name} -> {model_dir} ({model_id})") - snapshot_download( - repo_id=model_id, - local_dir=str(model_dir), - ) - print(f"[done] {name}") - if args.convert_ctranslate2: - convert_to_ctranslate2(name, capability) if __name__ == "__main__": - main() + target = Path(__file__).resolve().parent / "translation" / "download_translation_models.py" + runpy.run_path(str(target), run_name="__main__") diff --git a/scripts/frontend/frontend_server.py b/scripts/frontend/frontend_server.py new file mode 100755 index 0000000..0d30342 --- /dev/null +++ b/scripts/frontend/frontend_server.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +Simple HTTP server for saas-search frontend. +""" + +import http.server +import socketserver +import os +import sys +import logging +import time +import urllib.request +import urllib.error +from collections import defaultdict, deque +from pathlib import Path +from dotenv import load_dotenv + +# Load .env file +project_root = Path(__file__).resolve().parents[2] +load_dotenv(project_root / '.env') + +# Get API_BASE_URL from environment(默认不注入,避免被旧 .env 覆盖同源策略) +# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。 +API_BASE_URL = os.getenv('API_BASE_URL') or None +INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1' +# Backend proxy target for same-origin API forwarding +BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/') + +# Change to frontend directory +frontend_dir = os.path.join(project_root, 'frontend') +os.chdir(frontend_dir) + +# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback. +PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003))) + +# Configure logging to suppress scanner noise +logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') + +class RateLimitingMixin: + """Mixin for rate limiting requests by IP address.""" + request_counts = defaultdict(deque) + rate_limit = 100 # requests per minute + window = 60 # seconds + + @classmethod + def is_rate_limited(cls, ip): + now = time.time() + + # Clean old requests + while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window: + cls.request_counts[ip].popleft() + + # Check rate limit + if len(cls.request_counts[ip]) > cls.rate_limit: + return True + + cls.request_counts[ip].append(now) + return False + +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin): + """Custom request handler with CORS support and robust error handling.""" + + _ALLOWED_CORS_HEADERS = "Content-Type, X-Tenant-ID, X-Request-ID, Referer" + + def _is_proxy_path(self, path: str) -> bool: + """Return True for API paths that should be forwarded to backend service.""" + return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/') + + def _proxy_to_backend(self): + """Proxy current request to backend service on the GPU server.""" + target_url = f"{BACKEND_PROXY_URL}{self.path}" + method = self.command.upper() + + try: + content_length = int(self.headers.get('Content-Length', '0')) + except ValueError: + content_length = 0 + body = self.rfile.read(content_length) if content_length > 0 else None + + forward_headers = {} + for key, value in self.headers.items(): + lk = key.lower() + if lk in ('host', 'content-length', 'connection'): + continue + forward_headers[key] = value + + req = urllib.request.Request( + target_url, + data=body, + headers=forward_headers, + method=method, + ) + + try: + with urllib.request.urlopen(req, timeout=30) as resp: + resp_body = resp.read() + self.send_response(resp.getcode()) + for header, value in resp.getheaders(): + lh = header.lower() + if lh in ('transfer-encoding', 'connection', 'content-length'): + continue + self.send_header(header, value) + self.end_headers() + self.wfile.write(resp_body) + except urllib.error.HTTPError as e: + err_body = e.read() if hasattr(e, 'read') else b'' + self.send_response(e.code) + if e.headers: + for header, value in e.headers.items(): + lh = header.lower() + if lh in ('transfer-encoding', 'connection', 'content-length'): + continue + self.send_header(header, value) + self.end_headers() + if err_body: + self.wfile.write(err_body) + except Exception as e: + logging.error(f"Backend proxy error for {method} {self.path}: {e}") + self.send_response(502) + self.send_header('Content-Type', 'application/json; charset=utf-8') + self.end_headers() + self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}') + + def do_GET(self): + """Handle GET requests with API config injection.""" + path = self.path.split('?')[0] + + # Proxy API paths to backend first + if self._is_proxy_path(path): + self._proxy_to_backend() + return + + # Route / to index.html + if path == '/' or path == '': + self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '') + + # Inject API config for HTML files + if self.path.endswith('.html'): + self._serve_html_with_config() + else: + super().do_GET() + + def _serve_html_with_config(self): + """Serve HTML with optional API_BASE_URL injected.""" + try: + file_path = self.path.lstrip('/') + if not os.path.exists(file_path): + self.send_error(404) + return + + with open(file_path, 'r', encoding='utf-8') as f: + html = f.read() + + # 默认不注入 API_BASE_URL,避免历史 .env(如 http://xx:6002)覆盖同源调用。 + # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。 + if INJECT_API_BASE_URL and API_BASE_URL: + config_script = f'\n ' + html = html.replace('