Blame view

benchmarks/reranker/run_reranker_vllm_instruction_benchmark.sh 2.51 KB
52ea6529   tangwang   性能测试:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
  #!/usr/bin/env bash
  # Patch config, restart reranker, wait for /health, run benchmark_reranker_random_titles.py.
  # Requires: curl, .venv with PyYAML not needed (patch is standalone Python).
  
  set -euo pipefail
  ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  cd "$ROOT"
  
  PYTHON="${ROOT}/.venv/bin/python"
  DAY="$(date +%F)"
  OUT_DIR="${ROOT}/perf_reports/reranker_vllm_instruction/${DAY}"
  mkdir -p "$OUT_DIR"
  
  health_ok() {
    local want_backend="$1"
    local want_fmt="$2"
    local body
    if ! body="$(curl -sS --connect-timeout 2 --max-time 5 "http://127.0.0.1:6007/health" 2>/dev/null)"; then
      return 1
    fi
    echo "$body" | "$PYTHON" -c "
  import json, sys
  want_b, want_f = sys.argv[1], sys.argv[2]
  d = json.load(sys.stdin)
  if d.get('status') != 'ok' or not d.get('model_loaded'):
      sys.exit(1)
  if d.get('backend') != want_b:
      sys.exit(1)
  if d.get('instruction_format') != want_f:
      sys.exit(1)
  sys.exit(0)
  " "$want_backend" "$want_fmt"
  }
  
  wait_health() {
    local want_backend="$1"
    local want_fmt="$2"
    local i
    for i in $(seq 1 180); do
      if health_ok "$want_backend" "$want_fmt"; then
        curl -sS "http://127.0.0.1:6007/health" | "$PYTHON" -m json.tool
        return 0
      fi
      echo "[wait] ${i}/180 backend=${want_backend} instruction_format=${want_fmt} ..."
      sleep 3
    done
    echo "[error] health did not match in time" >&2
    return 1
  }
  
  run_one() {
    local backend="$1"
    local fmt="$2"
    local tag="${backend}|${fmt}"
    local jf="${OUT_DIR}/${backend}_${fmt}.json"
  
    echo "========== ${tag} =========="
3abbc95a   tangwang   重构(scripts): 整理sc...
58
    "$PYTHON" "${ROOT}/benchmarks/reranker/patch_rerank_vllm_benchmark_config.py" \
52ea6529   tangwang   性能测试:
59
60
61
62
63
      --backend "$backend" --instruction-format "$fmt"
  
    "${ROOT}/restart.sh" reranker
    wait_health "$backend" "$fmt"
  
3abbc95a   tangwang   重构(scripts): 整理sc...
64
    if ! "$PYTHON" "${ROOT}/benchmarks/reranker/benchmark_reranker_random_titles.py" \
52ea6529   tangwang   性能测试:
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
      100,200,400,600,800,1000 \
      --repeat 5 \
      --seed 42 \
      --quiet-runs \
      --timeout 360 \
      --tag "$tag" \
      --json-summary-out "$jf"
    then
      echo "[warn] benchmark exited non-zero for ${tag} (see ${jf} failed flag / partial runs)" >&2
    fi
  
    echo "artifact: $jf"
  }
  
  run_one qwen3_vllm compact
  run_one qwen3_vllm standard
  run_one qwen3_vllm_score compact
  run_one qwen3_vllm_score standard
  
  # Restore repo-default-style rerank settings (score + compact).
3abbc95a   tangwang   重构(scripts): 整理sc...
85
  "$PYTHON" "${ROOT}/benchmarks/reranker/patch_rerank_vllm_benchmark_config.py" \
52ea6529   tangwang   性能测试:
86
87
88
89
    --backend qwen3_vllm_score --instruction-format compact
  "${ROOT}/restart.sh" reranker
  wait_health qwen3_vllm_score compact
  echo "Restored config: qwen3_vllm_score + compact. Done. Artifacts under ${OUT_DIR}"