#!/bin/bash # # Start reranker service from isolated venv (.venv-reranker). # set -euo pipefail PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "${PROJECT_ROOT}" RERANKER_VENV="${RERANKER_VENV:-${PROJECT_ROOT}/.venv-reranker}" PYTHON_BIN="${RERANKER_VENV}/bin/python" if [[ ! -x "${PYTHON_BIN}" ]]; then echo "ERROR: reranker venv not found: ${RERANKER_VENV}" >&2 echo "Please run: ./scripts/setup_reranker_venv.sh" >&2 exit 1 fi # Load .env if present (without activating main venv) ENV_FILE="${PROJECT_ROOT}/.env" if [ -f "${ENV_FILE}" ]; then while IFS= read -r line || [ -n "${line}" ]; do line="${line%$'\r'}" [[ -z "${line//[[:space:]]/}" ]] && continue [[ "${line}" =~ ^[[:space:]]*# ]] && continue [[ "${line}" != *=* ]] && continue key="${line%%=*}" value="${line#*=}" key="${key#"${key%%[![:space:]]*}"}" key="${key%"${key##*[![:space:]]}"}" value="${value#"${value%%[![:space:]]*}"}" if [[ ${#value} -ge 2 ]]; then first="${value:0:1}" last="${value: -1}" if [[ ("${first}" == '"' && "${last}" == '"') || ("${first}" == "'" && "${last}" == "'") ]]; then value="${value:1:${#value}-2}" fi fi export "${key}=${value}" done < "${ENV_FILE}" fi RERANKER_HOST="${RERANKER_HOST:-0.0.0.0}" RERANKER_PORT="${RERANKER_PORT:-6007}" RERANK_BACKEND=$("${PYTHON_BIN}" -c "from config.services_config import get_rerank_backend_config; print(get_rerank_backend_config()[0])") # Keep vLLM/triton/torch caches out of system disk. RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker}" mkdir -p "${RERANKER_RUNTIME_DIR}/home" \ "${RERANKER_RUNTIME_DIR}/cache" \ "${RERANKER_RUNTIME_DIR}/config" \ "${RERANKER_RUNTIME_DIR}/triton" \ "${RERANKER_RUNTIME_DIR}/torch_compile" \ "${RERANKER_RUNTIME_DIR}/tmp" export HOME="${RERANKER_RUNTIME_DIR}/home" export XDG_CACHE_HOME="${RERANKER_RUNTIME_DIR}/cache" export XDG_CONFIG_HOME="${RERANKER_RUNTIME_DIR}/config" export TRITON_CACHE_DIR="${RERANKER_RUNTIME_DIR}/triton" export TORCHINDUCTOR_CACHE_DIR="${RERANKER_RUNTIME_DIR}/torch_compile" export TMPDIR="${RERANKER_RUNTIME_DIR}/tmp" export VLLM_NO_USAGE_STATS="${VLLM_NO_USAGE_STATS:-1}" export PATH="${RERANKER_VENV}/bin:${PATH}" if [[ "${RERANK_BACKEND}" == "qwen3_vllm" ]]; then if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then echo "ERROR: qwen3_vllm backend requires NVIDIA GPU, but nvidia-smi is unavailable." >&2 exit 1 fi if ! "${PYTHON_BIN}" - <<'PY' try: import vllm # noqa: F401 import torch if not torch.cuda.is_available(): raise SystemExit(1) except Exception: raise SystemExit(1) PY then echo "ERROR: qwen3_vllm backend requires vllm + CUDA runtime in ${RERANKER_VENV}." >&2 echo "Please run: ./scripts/setup_reranker_venv.sh and verify CUDA is available." >&2 exit 1 fi fi echo "========================================" echo "Starting Reranker Service" echo "========================================" echo "Python: ${PYTHON_BIN}" echo "Host: ${RERANKER_HOST}" echo "Port: ${RERANKER_PORT}" echo "Backend: ${RERANK_BACKEND}" echo "Runtime dir: ${RERANKER_RUNTIME_DIR}" echo exec "${PYTHON_BIN}" -m uvicorn reranker.server:app \ --host "${RERANKER_HOST}" \ --port "${RERANKER_PORT}" \ --workers 1