Blame view

scripts/start_reranker.sh 5.22 KB
d1d356f8   tangwang   脚本优化
1
2
  #!/bin/bash
  #
3d508beb   tangwang   reranker-4b-gguf
3
  # Start reranker service from its backend-specific isolated venv.
d1d356f8   tangwang   脚本优化
4
  #
07cf5a93   tangwang   START_EMBEDDING=...
5
  set -euo pipefail
d1d356f8   tangwang   脚本优化
6
  
07cf5a93   tangwang   START_EMBEDDING=...
7
8
  PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  cd "${PROJECT_ROOT}"
d1d356f8   tangwang   脚本优化
9
  
7fbca0d7   tangwang   启动脚本优化
10
11
12
13
  # Load .env without activating main venv.
  # shellcheck source=scripts/lib/load_env.sh
  source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
  load_env_file "${PROJECT_ROOT}/.env"
3d508beb   tangwang   reranker-4b-gguf
14
15
  # shellcheck source=scripts/lib/reranker_backend_env.sh
  source "${PROJECT_ROOT}/scripts/lib/reranker_backend_env.sh"
d1d356f8   tangwang   脚本优化
16
  
daa2690b   tangwang   漏斗参数调优&呈现优化
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  CONFIG_PYTHON="${PROJECT_ROOT}/.venv/bin/python"
  if [[ ! -x "${CONFIG_PYTHON}" ]]; then
    CONFIG_PYTHON="${PYTHON:-python3}"
  fi
  
  RERANK_INSTANCE="${RERANK_INSTANCE:-default}"
  
  read -r INSTANCE_HOST INSTANCE_PORT INSTANCE_BACKEND INSTANCE_RUNTIME_DIR <<EOF
  $(
    PYTHONPATH="${PROJECT_ROOT}${PYTHONPATH:+:${PYTHONPATH}}" "${CONFIG_PYTHON}" - <<'PY'
  from config.loader import get_app_config
  import os
  
  cfg = get_app_config().services.rerank
  name = (os.getenv("RERANK_INSTANCE") or cfg.default_instance).strip() or cfg.default_instance
  instance = cfg.get_instance(name)
  runtime_dir = instance.runtime_dir or f"./.runtime/reranker/{name}"
  print(instance.host, instance.port, instance.backend, runtime_dir)
  PY
  )
  EOF
  
  RERANKER_HOST="${RERANKER_HOST:-${INSTANCE_HOST:-0.0.0.0}}"
  RERANKER_PORT="${RERANKER_PORT:-${INSTANCE_PORT:-6007}}"
  RERANK_BACKEND="${RERANK_BACKEND:-${INSTANCE_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}}"
3d508beb   tangwang   reranker-4b-gguf
42
43
44
45
46
47
48
49
  RERANKER_VENV="${RERANKER_VENV:-$(reranker_backend_venv_dir "${PROJECT_ROOT}" "${RERANK_BACKEND}")}"
  PYTHON_BIN="${RERANKER_VENV}/bin/python"
  
  if [[ ! -x "${PYTHON_BIN}" ]]; then
    echo "ERROR: reranker venv not found for backend ${RERANK_BACKEND}: ${RERANKER_VENV}" >&2
    echo "Please run: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND}" >&2
    exit 1
  fi
07cf5a93   tangwang   START_EMBEDDING=...
50
51
  
  # Keep vLLM/triton/torch caches out of system disk.
daa2690b   tangwang   漏斗参数调优&呈现优化
52
53
54
55
  RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${INSTANCE_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker/${RERANK_INSTANCE}}}"
  if [[ "${RERANKER_RUNTIME_DIR}" != /* ]]; then
    RERANKER_RUNTIME_DIR="${PROJECT_ROOT}/${RERANKER_RUNTIME_DIR#./}"
  fi
07cf5a93   tangwang   START_EMBEDDING=...
56
57
58
59
60
61
62
63
64
65
66
67
68
  mkdir -p "${RERANKER_RUNTIME_DIR}/home" \
           "${RERANKER_RUNTIME_DIR}/cache" \
           "${RERANKER_RUNTIME_DIR}/config" \
           "${RERANKER_RUNTIME_DIR}/triton" \
           "${RERANKER_RUNTIME_DIR}/torch_compile" \
           "${RERANKER_RUNTIME_DIR}/tmp"
  export HOME="${RERANKER_RUNTIME_DIR}/home"
  export XDG_CACHE_HOME="${RERANKER_RUNTIME_DIR}/cache"
  export XDG_CONFIG_HOME="${RERANKER_RUNTIME_DIR}/config"
  export TRITON_CACHE_DIR="${RERANKER_RUNTIME_DIR}/triton"
  export TORCHINDUCTOR_CACHE_DIR="${RERANKER_RUNTIME_DIR}/torch_compile"
  export TMPDIR="${RERANKER_RUNTIME_DIR}/tmp"
  export VLLM_NO_USAGE_STATS="${VLLM_NO_USAGE_STATS:-1}"
b0972ff9   tangwang   qwen3_vllm_score ...
69
70
  # venv bin must be on PATH before Python starts: vLLM worker inherits it; on T4/Turing,
  # qwen3_vllm_score now relies on vLLM auto-selecting FLASHINFER, whose JIT needs pip-installed ninja.
07cf5a93   tangwang   START_EMBEDDING=...
71
72
  export PATH="${RERANKER_VENV}/bin:${PATH}"
  
5c21a485   tangwang   qwen3-reranker-0....
73
74
75
76
  if [[ "${RERANK_BACKEND}" == qwen3_gguf* ]]; then
    export HF_HUB_DISABLE_XET="${HF_HUB_DISABLE_XET:-1}"
  fi
  
4823f463   tangwang   qwen3_vllm_score ...
77
  if [[ "${RERANK_BACKEND}" == "qwen3_vllm" || "${RERANK_BACKEND}" == "qwen3_vllm_score" || "${RERANK_BACKEND}" == "qwen3_transformers_packed" ]]; then
07cf5a93   tangwang   START_EMBEDDING=...
78
    if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
9de5ef49   tangwang   qwen3_vllm_score ...
79
      echo "ERROR: ${RERANK_BACKEND} backend requires NVIDIA GPU, but nvidia-smi is unavailable." >&2
07cf5a93   tangwang   START_EMBEDDING=...
80
81
82
83
      exit 1
    fi
    if ! "${PYTHON_BIN}" - <<'PY'
  try:
07cf5a93   tangwang   START_EMBEDDING=...
84
      import torch
4823f463   tangwang   qwen3_vllm_score ...
85
86
87
88
      try:
          import vllm  # noqa: F401
      except Exception:
          pass
07cf5a93   tangwang   START_EMBEDDING=...
89
90
91
92
93
94
      if not torch.cuda.is_available():
          raise SystemExit(1)
  except Exception:
      raise SystemExit(1)
  PY
    then
4823f463   tangwang   qwen3_vllm_score ...
95
96
97
98
99
      if [[ "${RERANK_BACKEND}" == "qwen3_transformers_packed" ]]; then
        echo "ERROR: ${RERANK_BACKEND} backend requires torch + CUDA runtime in ${RERANKER_VENV}." >&2
      else
        echo "ERROR: ${RERANK_BACKEND} backend requires vllm + CUDA runtime in ${RERANKER_VENV}." >&2
      fi
3d508beb   tangwang   reranker-4b-gguf
100
101
102
103
104
      echo "Please run: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND} and verify CUDA is available." >&2
      exit 1
    fi
  fi
  
5c21a485   tangwang   qwen3-reranker-0....
105
106
107
  if [[ "${RERANK_BACKEND}" == qwen3_gguf* ]]; then
    gguf_check_status=0
    "${PYTHON_BIN}" - <<'PY' || gguf_check_status=$?
3d508beb   tangwang   reranker-4b-gguf
108
  try:
5c21a485   tangwang   qwen3-reranker-0....
109
110
111
      import llama_cpp
      if hasattr(llama_cpp, "llama_supports_gpu_offload") and not llama_cpp.llama_supports_gpu_offload():
          raise SystemExit(2)
3d508beb   tangwang   reranker-4b-gguf
112
113
114
  except Exception:
      raise SystemExit(1)
  PY
5c21a485   tangwang   qwen3-reranker-0....
115
116
117
118
119
120
121
122
    if [[ "${gguf_check_status}" != "0" ]]; then
      if [[ "${gguf_check_status}" == "2" ]]; then
        echo "ERROR: ${RERANK_BACKEND} backend detected a CPU-only llama-cpp-python build in ${RERANKER_VENV}." >&2
        echo "Please rerun: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND}" >&2
      else
        echo "ERROR: ${RERANK_BACKEND} backend requires llama-cpp-python in ${RERANKER_VENV}." >&2
        echo "Please run: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND}" >&2
      fi
07cf5a93   tangwang   START_EMBEDDING=...
123
124
125
      exit 1
    fi
  fi
d1d356f8   tangwang   脚本优化
126
127
128
129
  
  echo "========================================"
  echo "Starting Reranker Service"
  echo "========================================"
daa2690b   tangwang   漏斗参数调优&呈现优化
130
  echo "Instance: ${RERANK_INSTANCE}"
07cf5a93   tangwang   START_EMBEDDING=...
131
  echo "Python: ${PYTHON_BIN}"
d1d356f8   tangwang   脚本优化
132
133
  echo "Host: ${RERANKER_HOST}"
  echo "Port: ${RERANKER_PORT}"
07cf5a93   tangwang   START_EMBEDDING=...
134
135
  echo "Backend: ${RERANK_BACKEND}"
  echo "Runtime dir: ${RERANKER_RUNTIME_DIR}"
d1d356f8   tangwang   脚本优化
136
137
  echo
  
07cf5a93   tangwang   START_EMBEDDING=...
138
  exec "${PYTHON_BIN}" -m uvicorn reranker.server:app \
d1d356f8   tangwang   脚本优化
139
140
141
    --host "${RERANKER_HOST}" \
    --port "${RERANKER_PORT}" \
    --workers 1