# Dedicated high-performance venv for qwen3_vllm_score: .venv-reranker-score
#
# Create / refresh:
#   ./scripts/setup_reranker_venv.sh qwen3_vllm_score
#
# vLLM 0.17+ replaces LLM(task="score") with runner/convert auto + LLM.score().
# Pin vLLM for reproducible perf baselines; bump after validating CUDA/driver on your hosts.
# If pip cannot find a wheel for your CUDA version, edit the vllm line or install from:
#   https://docs.vllm.ai/en/latest/getting_started/installation.html

-r requirements_reranker_base.txt
# FlashInfer JIT (vLLM may select it on Turing when TRITON_ATTN is not forced) needs a ninja binary on PATH.
ninja>=1.11
vllm==0.18.0
# Match vLLM 0.18 stack; cap <5 to avoid pip prefetching incompatible transformers 5.x.
transformers>=4.51.0,<5