9f5994b4
tangwang
reranker
|
1
2
3
4
5
6
7
8
9
10
|
#!/bin/bash
#
# Benchmark reranker for e-commerce short-text workload:
# - query <= ~100 tokens
# - docs are short title / title+brief
# - one request contains ~1000 docs
#
# Outputs JSON reports under perf_reports/<date>/reranker_1000docs/
#
# Usage:
|
3abbc95a
tangwang
重构(scripts): 整理sc...
|
11
|
# ./benchmarks/reranker/benchmark_reranker_1000docs.sh
|
9f5994b4
tangwang
reranker
|
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
# Optional env:
# BATCH_SIZES="24 32 48 64"
# C1_REQUESTS=4
# C4_REQUESTS=8
# TENANT_ID=162
#
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${PROJECT_ROOT}"
TENANT_ID="${TENANT_ID:-162}"
BATCH_SIZES="${BATCH_SIZES:-24 32 48 64}"
C1_REQUESTS="${C1_REQUESTS:-4}"
C4_REQUESTS="${C4_REQUESTS:-8}"
TIMEOUT_SEC="${TIMEOUT_SEC:-240}"
RERANK_BASE="${RERANK_BASE:-http://127.0.0.1:6007}"
DATE_TAG="$(date +%Y%m%d)"
OUT_DIR="perf_reports/${DATE_TAG}/reranker_1000docs"
TMP_CASES="/tmp/rerank_1000_shortdocs_cases.json"
mkdir -p "${OUT_DIR}"
cleanup() {
|
af7ee060
tangwang
service_ctl 简化为“显...
|
36
|
./scripts/service_ctl.sh stop reranker >/dev/null 2>&1 || true
|
9f5994b4
tangwang
reranker
|
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
}
trap cleanup EXIT
cat > "${TMP_CASES}" <<'JSON'
{
"scenarios": {
"rerank": [
{
"method": "POST",
"path": "/rerank",
"json": {
"query": "wireless ergonomic gaming mouse for office use with rechargeable battery and bluetooth",
"docs": [],
"normalize": true
}
}
]
}
}
JSON
python3 - <<'PY'
import json
from pathlib import Path
p = Path("/tmp/rerank_1000_shortdocs_cases.json")
d = json.loads(p.read_text(encoding="utf-8"))
docs = []
for i in range(1000):
if i % 3 == 0:
doc = f"wireless mouse model {i} ergonomic grip 2.4g bluetooth"
elif i % 3 == 1:
doc = f"gaming mouse {i} rgb lightweight high precision sensor"
else:
doc = f"office mouse {i} rechargeable silent click compact"
if i % 5 == 0:
doc += " with usb receiver"
if i % 7 == 0:
doc += " long battery life"
docs.append(doc)
d["scenarios"]["rerank"][0]["json"]["docs"] = docs
p.write_text(json.dumps(d, ensure_ascii=False), encoding="utf-8")
print(f"[info] generated docs={len(docs)} at {p}")
PY
run_bench() {
local bs="$1"
local c="$2"
local req="$3"
local out="${OUT_DIR}/rerank_bs${bs}_c${c}_r${req}.json"
|
3abbc95a
tangwang
重构(scripts): 整理sc...
|
88
|
.venv/bin/python benchmarks/perf_api_benchmark.py \
|
9f5994b4
tangwang
reranker
|
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
--scenario rerank \
--tenant-id "${TENANT_ID}" \
--reranker-base "${RERANK_BASE}" \
--cases-file "${TMP_CASES}" \
--concurrency "${c}" \
--max-requests "${req}" \
--timeout "${TIMEOUT_SEC}" \
--output "${out}" >/dev/null
python3 - <<PY
import json
p="${out}"
d=json.load(open(p))
r=d["results"][0]
lat=r["latency_ms"]
print(f"[result] bs=${bs} c=${c} req=${req} avg={lat['avg']}ms p95={lat['p95']}ms rps={r['throughput_rps']}")
PY
}
for bs in ${BATCH_SIZES}; do
echo "[info] benchmarking infer_batch_size=${bs}"
cleanup
RERANK_VLLM_INFER_BATCH_SIZE="${bs}" \
RERANK_VLLM_SORT_BY_DOC_LENGTH="true" \
|
9f5994b4
tangwang
reranker
|
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
nohup ./scripts/start_reranker.sh >"${OUT_DIR}/start_bs${bs}.log" 2>&1 &
for i in $(seq 1 180); do
if curl -sf "${RERANK_BASE}/health" >/dev/null 2>&1; then
break
fi
sleep 1
if [ "${i}" -eq 180 ]; then
echo "[error] reranker startup timeout for bs=${bs}" >&2
tail -n 80 "${OUT_DIR}/start_bs${bs}.log" >&2 || true
exit 1
fi
done
run_bench "${bs}" 1 "${C1_REQUESTS}"
run_bench "${bs}" 4 "${C4_REQUESTS}"
done
echo "[info] benchmark done: ${OUT_DIR}"
|