#!/usr/bin/env python3 """ Smoke test: load Qwen3VLLMScoreRerankerBackend (must run as a file, not stdin — vLLM spawn). Usage (from repo root, score venv): PYTHONPATH=. ./.venv-reranker-score/bin/python benchmarks/reranker/smoke_qwen3_vllm_score_backend.py Same as production: vLLM child processes need the venv's ``bin`` on PATH (for pip's ``ninja`` when vLLM auto-selects FLASHINFER on T4/Turing). ``start_reranker.sh`` exports that; this script prepends ``sysconfig.get_path("scripts")`` (the stdlib location for this environment's console scripts, independent of ``python`` symlink targets). """ from __future__ import annotations import argparse import logging import os import sys import sysconfig from pathlib import Path # Repo root on sys.path when run from benchmarks/reranker/. _ROOT = Path(__file__).resolve().parents[2] if str(_ROOT) not in sys.path: sys.path.insert(0, str(_ROOT)) logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") import torch from reranker.backends.qwen3_vllm_score import ( Qwen3VLLMScoreRerankerBackend, ) def main() -> int: p = argparse.ArgumentParser() p.add_argument( "--gpu-memory-utilization", type=float, default=0.12, help="vLLM gpu_memory_utilization (default 0.12 for tight GPUs)", ) args = p.parse_args() scripts = sysconfig.get_path("scripts") if scripts: os.environ["PATH"] = scripts + os.pathsep + os.environ.get("PATH", "") if not torch.cuda.is_available(): print("SKIP: CUDA not available") return 0 cfg = { "model_name": "Qwen/Qwen3-Reranker-0.6B", "max_model_len": 160, "tensor_parallel_size": 1, "gpu_memory_utilization": args.gpu_memory_utilization, "dtype": "float16", "enable_prefix_caching": False, "enforce_eager": True, "infer_batch_size": 4, "instruction_format": "compact", } print("Loading backend ...") backend = Qwen3VLLMScoreRerankerBackend(cfg) scores, meta = backend.score_with_meta("smoke query", ["title one", "title two"], normalize=False) print("scores:", scores) print("meta:", {k: meta[k] for k in ("backend", "infer_batch_size", "instruction_format") if k in meta}) print("OK") return 0 if __name__ == "__main__": raise SystemExit(main())