run_reranker_vllm_instruction_benchmark.sh
2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env bash
# Patch config, restart reranker, wait for /health, run benchmark_reranker_random_titles.py.
# Requires: curl, .venv with PyYAML not needed (patch is standalone Python).
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
PYTHON="${ROOT}/.venv/bin/python"
DAY="$(date +%F)"
OUT_DIR="${ROOT}/perf_reports/reranker_vllm_instruction/${DAY}"
mkdir -p "$OUT_DIR"
health_ok() {
local want_backend="$1"
local want_fmt="$2"
local body
if ! body="$(curl -sS --connect-timeout 2 --max-time 5 "http://127.0.0.1:6007/health" 2>/dev/null)"; then
return 1
fi
echo "$body" | "$PYTHON" -c "
import json, sys
want_b, want_f = sys.argv[1], sys.argv[2]
d = json.load(sys.stdin)
if d.get('status') != 'ok' or not d.get('model_loaded'):
sys.exit(1)
if d.get('backend') != want_b:
sys.exit(1)
if d.get('instruction_format') != want_f:
sys.exit(1)
sys.exit(0)
" "$want_backend" "$want_fmt"
}
wait_health() {
local want_backend="$1"
local want_fmt="$2"
local i
for i in $(seq 1 180); do
if health_ok "$want_backend" "$want_fmt"; then
curl -sS "http://127.0.0.1:6007/health" | "$PYTHON" -m json.tool
return 0
fi
echo "[wait] ${i}/180 backend=${want_backend} instruction_format=${want_fmt} ..."
sleep 3
done
echo "[error] health did not match in time" >&2
return 1
}
run_one() {
local backend="$1"
local fmt="$2"
local tag="${backend}|${fmt}"
local jf="${OUT_DIR}/${backend}_${fmt}.json"
echo "========== ${tag} =========="
"$PYTHON" "${ROOT}/scripts/patch_rerank_vllm_benchmark_config.py" \
--backend "$backend" --instruction-format "$fmt"
"${ROOT}/restart.sh" reranker
wait_health "$backend" "$fmt"
if ! "$PYTHON" "${ROOT}/scripts/benchmark_reranker_random_titles.py" \
100,200,400,600,800,1000 \
--repeat 5 \
--seed 42 \
--quiet-runs \
--timeout 360 \
--tag "$tag" \
--json-summary-out "$jf"
then
echo "[warn] benchmark exited non-zero for ${tag} (see ${jf} failed flag / partial runs)" >&2
fi
echo "artifact: $jf"
}
run_one qwen3_vllm compact
run_one qwen3_vllm standard
run_one qwen3_vllm_score compact
run_one qwen3_vllm_score standard
# Restore repo-default-style rerank settings (score + compact).
"$PYTHON" "${ROOT}/scripts/patch_rerank_vllm_benchmark_config.py" \
--backend qwen3_vllm_score --instruction-format compact
"${ROOT}/restart.sh" reranker
wait_health qwen3_vllm_score compact
echo "Restored config: qwen3_vllm_score + compact. Done. Artifacts under ${OUT_DIR}"