rerank_performance_compare.sh 2.55 KB
#!/bin/bash

set -u

FILE="/data/saas-search/tests/data/titles.1.8w"
ROUNDS=10
SAMPLE_SIZE=400

if [ ! -f "$FILE" ]; then
  echo "文件不存在: $FILE"
  exit 1
fi

# 生成随机 400 行文本,并转成 JSON 数组
generate_docs_json() {
  shuf -n "$SAMPLE_SIZE" "$FILE" | jq -R -s 'split("\n")[:-1]'
}

# 统计汇总
summarize_times() {
  local name="$1"
  shift
  local arr=("$@")
  local total=0
  local min=${arr[0]}
  local max=${arr[0]}
  local count=${#arr[@]}

  for t in "${arr[@]}"; do
    total=$((total + t))
    if [ "$t" -lt "$min" ]; then
      min=$t
    fi
    if [ "$t" -gt "$max" ]; then
      max=$t
    fi
  done

  local avg=$((total / count))

  echo "========================================"
  echo "$name 汇总"
  echo "测试次数: $count"
  echo "总耗时: ${total} ms"
  echo "平均耗时: ${avg} ms"
  echo "最小耗时: ${min} ms"
  echo "最大耗时: ${max} ms"
  echo "========================================"
}

echo "开始测试..."
echo "数据文件: $FILE"
echo "每次随机抽样: $SAMPLE_SIZE 行"
echo "每个测试对象执行次数: $ROUNDS"
echo

times_obj1=()
times_obj2=()

for ((i=1; i<=ROUNDS; i++)); do
  echo "---------- 第 $i 轮 ----------"

  # 每轮随机生成一组 400 行
  DOCS_JSON=$(generate_docs_json)

  # 测试对象1
  PAYLOAD1=$(jq -n \
    --arg query "健身女生T恤短袖" \
    --argjson docs "$DOCS_JSON" \
    --argjson top_n 386 \
    --argjson normalize true \
    '{
      query: $query,
      docs: $docs,
      top_n: $top_n,
      normalize: $normalize
    }')

  start1=$(date +%s%N)
  curl -s -o /dev/null -X POST "http://localhost:6007/rerank" \
    -H "Content-Type: application/json" \
    -d "$PAYLOAD1"
  end1=$(date +%s%N)
  duration1=$(( (end1 - start1) / 1000000 ))
  times_obj1+=("$duration1")
  echo "测试对象1 第 $i 次耗时: ${duration1} ms"

  # 测试对象2
  PAYLOAD2=$(jq -n \
    --arg model "Qwen3-Reranker-0.6B" \
    --arg query "什么是机器学习" \
    --argjson documents "$DOCS_JSON" \
    '{
      model: $model,
      query: $query,
      documents: $documents
    }')

  start2=$(date +%s%N)
  curl -s -o /dev/null -X POST "http://10.200.16.14:9997/v1/rerank" \
    -H "accept: application/json" \
    -H "Content-Type: application/json" \
    -d "$PAYLOAD2"
  end2=$(date +%s%N)
  duration2=$(( (end2 - start2) / 1000000 ))
  times_obj2+=("$duration2")
  echo "测试对象2 第 $i 次耗时: ${duration2} ms"

  echo
done

echo
echo "测试完成,开始汇总..."
echo

summarize_times "测试对象1" "${times_obj1[@]}"
summarize_times "测试对象2" "${times_obj2[@]}"