#!/usr/bin/env python3 """Download local translation models declared in services.translation.capabilities.""" from __future__ import annotations import argparse from pathlib import Path import os import sys from typing import Iterable from huggingface_hub import snapshot_download PROJECT_ROOT = Path(__file__).resolve().parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) os.environ.setdefault("HF_HUB_DISABLE_XET", "1") from config.services_config import get_translation_config LOCAL_BACKENDS = {"local_nllb", "local_marian"} def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]: cfg = get_translation_config() for name, capability in cfg.capabilities.items(): backend = str(capability.get("backend") or "").strip().lower() if backend not in LOCAL_BACKENDS: continue if selected and name not in selected: continue yield name, capability def main() -> None: parser = argparse.ArgumentParser(description="Download local translation models") parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models") parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download") args = parser.parse_args() selected = {item.strip().lower() for item in args.models if item.strip()} or None if not args.all_local and not selected: parser.error("pass --all-local or --models ...") for name, capability in iter_local_capabilities(selected): model_id = str(capability.get("model_id") or "").strip() model_dir = Path(str(capability.get("model_dir") or "")).expanduser() if not model_id or not model_dir: raise ValueError(f"Capability '{name}' must define model_id and model_dir") model_dir.parent.mkdir(parents=True, exist_ok=True) print(f"[download] {name} -> {model_dir} ({model_id})") snapshot_download( repo_id=model_id, local_dir=str(model_dir), ) print(f"[done] {name}") if __name__ == "__main__": main()