d1d356f8
tangwang
脚本优化
|
1
2
|
#!/bin/bash
#
|
3d508beb
tangwang
reranker-4b-gguf
|
3
|
# Start reranker service from its backend-specific isolated venv.
|
d1d356f8
tangwang
脚本优化
|
4
|
#
|
07cf5a93
tangwang
START_EMBEDDING=...
|
5
|
set -euo pipefail
|
d1d356f8
tangwang
脚本优化
|
6
|
|
07cf5a93
tangwang
START_EMBEDDING=...
|
7
8
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${PROJECT_ROOT}"
|
d1d356f8
tangwang
脚本优化
|
9
|
|
7fbca0d7
tangwang
启动脚本优化
|
10
11
12
13
|
# Load .env without activating main venv.
# shellcheck source=scripts/lib/load_env.sh
source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
load_env_file "${PROJECT_ROOT}/.env"
|
3d508beb
tangwang
reranker-4b-gguf
|
14
15
|
# shellcheck source=scripts/lib/reranker_backend_env.sh
source "${PROJECT_ROOT}/scripts/lib/reranker_backend_env.sh"
|
d1d356f8
tangwang
脚本优化
|
16
17
18
|
RERANKER_HOST="${RERANKER_HOST:-0.0.0.0}"
RERANKER_PORT="${RERANKER_PORT:-6007}"
|
3d508beb
tangwang
reranker-4b-gguf
|
19
20
21
22
23
24
25
26
27
|
RERANK_BACKEND="${RERANK_BACKEND:-$(detect_rerank_backend "${PROJECT_ROOT}")}"
RERANKER_VENV="${RERANKER_VENV:-$(reranker_backend_venv_dir "${PROJECT_ROOT}" "${RERANK_BACKEND}")}"
PYTHON_BIN="${RERANKER_VENV}/bin/python"
if [[ ! -x "${PYTHON_BIN}" ]]; then
echo "ERROR: reranker venv not found for backend ${RERANK_BACKEND}: ${RERANKER_VENV}" >&2
echo "Please run: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND}" >&2
exit 1
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
# Keep vLLM/triton/torch caches out of system disk.
RERANKER_RUNTIME_DIR="${RERANKER_RUNTIME_DIR:-${PROJECT_ROOT}/.runtime/reranker}"
mkdir -p "${RERANKER_RUNTIME_DIR}/home" \
"${RERANKER_RUNTIME_DIR}/cache" \
"${RERANKER_RUNTIME_DIR}/config" \
"${RERANKER_RUNTIME_DIR}/triton" \
"${RERANKER_RUNTIME_DIR}/torch_compile" \
"${RERANKER_RUNTIME_DIR}/tmp"
export HOME="${RERANKER_RUNTIME_DIR}/home"
export XDG_CACHE_HOME="${RERANKER_RUNTIME_DIR}/cache"
export XDG_CONFIG_HOME="${RERANKER_RUNTIME_DIR}/config"
export TRITON_CACHE_DIR="${RERANKER_RUNTIME_DIR}/triton"
export TORCHINDUCTOR_CACHE_DIR="${RERANKER_RUNTIME_DIR}/torch_compile"
export TMPDIR="${RERANKER_RUNTIME_DIR}/tmp"
export VLLM_NO_USAGE_STATS="${VLLM_NO_USAGE_STATS:-1}"
|
540fb5af
tangwang
添加了可关闭的开关:保留默认行为(...
|
44
45
|
# venv bin must be on PATH before Python starts: vLLM worker inherits it; FlashInfer JIT needs
# pip-installed ninja when qwen3_vllm_score does not force TRITON_ATTN (e.g. T4 + auto_triton off).
|
07cf5a93
tangwang
START_EMBEDDING=...
|
46
47
|
export PATH="${RERANKER_VENV}/bin:${PATH}"
|
5c21a485
tangwang
qwen3-reranker-0....
|
48
49
50
51
|
if [[ "${RERANK_BACKEND}" == qwen3_gguf* ]]; then
export HF_HUB_DISABLE_XET="${HF_HUB_DISABLE_XET:-1}"
fi
|
4823f463
tangwang
qwen3_vllm_score ...
|
52
|
if [[ "${RERANK_BACKEND}" == "qwen3_vllm" || "${RERANK_BACKEND}" == "qwen3_vllm_score" || "${RERANK_BACKEND}" == "qwen3_transformers_packed" ]]; then
|
07cf5a93
tangwang
START_EMBEDDING=...
|
53
|
if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
|
9de5ef49
tangwang
qwen3_vllm_score ...
|
54
|
echo "ERROR: ${RERANK_BACKEND} backend requires NVIDIA GPU, but nvidia-smi is unavailable." >&2
|
07cf5a93
tangwang
START_EMBEDDING=...
|
55
56
57
58
|
exit 1
fi
if ! "${PYTHON_BIN}" - <<'PY'
try:
|
07cf5a93
tangwang
START_EMBEDDING=...
|
59
|
import torch
|
4823f463
tangwang
qwen3_vllm_score ...
|
60
61
62
63
|
try:
import vllm # noqa: F401
except Exception:
pass
|
07cf5a93
tangwang
START_EMBEDDING=...
|
64
65
66
67
68
69
|
if not torch.cuda.is_available():
raise SystemExit(1)
except Exception:
raise SystemExit(1)
PY
then
|
4823f463
tangwang
qwen3_vllm_score ...
|
70
71
72
73
74
|
if [[ "${RERANK_BACKEND}" == "qwen3_transformers_packed" ]]; then
echo "ERROR: ${RERANK_BACKEND} backend requires torch + CUDA runtime in ${RERANKER_VENV}." >&2
else
echo "ERROR: ${RERANK_BACKEND} backend requires vllm + CUDA runtime in ${RERANKER_VENV}." >&2
fi
|
3d508beb
tangwang
reranker-4b-gguf
|
75
76
77
78
79
|
echo "Please run: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND} and verify CUDA is available." >&2
exit 1
fi
fi
|
5c21a485
tangwang
qwen3-reranker-0....
|
80
81
82
|
if [[ "${RERANK_BACKEND}" == qwen3_gguf* ]]; then
gguf_check_status=0
"${PYTHON_BIN}" - <<'PY' || gguf_check_status=$?
|
3d508beb
tangwang
reranker-4b-gguf
|
83
|
try:
|
5c21a485
tangwang
qwen3-reranker-0....
|
84
85
86
|
import llama_cpp
if hasattr(llama_cpp, "llama_supports_gpu_offload") and not llama_cpp.llama_supports_gpu_offload():
raise SystemExit(2)
|
3d508beb
tangwang
reranker-4b-gguf
|
87
88
89
|
except Exception:
raise SystemExit(1)
PY
|
5c21a485
tangwang
qwen3-reranker-0....
|
90
91
92
93
94
95
96
97
|
if [[ "${gguf_check_status}" != "0" ]]; then
if [[ "${gguf_check_status}" == "2" ]]; then
echo "ERROR: ${RERANK_BACKEND} backend detected a CPU-only llama-cpp-python build in ${RERANKER_VENV}." >&2
echo "Please rerun: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND}" >&2
else
echo "ERROR: ${RERANK_BACKEND} backend requires llama-cpp-python in ${RERANKER_VENV}." >&2
echo "Please run: ./scripts/setup_reranker_venv.sh ${RERANK_BACKEND}" >&2
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
98
99
100
|
exit 1
fi
fi
|
d1d356f8
tangwang
脚本优化
|
101
102
103
104
|
echo "========================================"
echo "Starting Reranker Service"
echo "========================================"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
105
|
echo "Python: ${PYTHON_BIN}"
|
d1d356f8
tangwang
脚本优化
|
106
107
|
echo "Host: ${RERANKER_HOST}"
echo "Port: ${RERANKER_PORT}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
108
109
|
echo "Backend: ${RERANK_BACKEND}"
echo "Runtime dir: ${RERANKER_RUNTIME_DIR}"
|
d1d356f8
tangwang
脚本优化
|
110
111
|
echo
|
07cf5a93
tangwang
START_EMBEDDING=...
|
112
|
exec "${PYTHON_BIN}" -m uvicorn reranker.server:app \
|
d1d356f8
tangwang
脚本优化
|
113
114
115
|
--host "${RERANKER_HOST}" \
--port "${RERANKER_PORT}" \
--workers 1
|