Blame view

scripts/evaluation/start_eval.sh 2.58 KB
881d338b   tangwang   评估框架
1
2
3
4
5
6
7
8
  #!/usr/bin/env bash
  # Search evaluation quick entrypoints. Run from any cwd; resolves repo root.
  set -euo pipefail
  
  ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
  cd "$ROOT"
  PY="${ROOT}/.venv/bin/python"
  TENANT_ID="${TENANT_ID:-163}"
2059d959   tangwang   feat(eval): 多评估集统...
9
  DATASET_ID="${REPO_EVAL_DATASET_ID:-core_queries}"
12a75c46   tangwang   feat(eval): 为 LLM...
10
11
12
13
14
  RETRY_COUNT="${REPO_EVAL_RETRY_COUNT:-2}"
  EXTRA_QUERY_ARGS=()
  if [[ -n "${REPO_EVAL_QUERIES:-}" ]]; then
    EXTRA_QUERY_ARGS=(--queries-file "${REPO_EVAL_QUERIES}")
  fi
881d338b   tangwang   评估框架
15
16
  
  usage() {
12a75c46   tangwang   feat(eval): 为 LLM...
17
    echo "Usage: $0 batch|batch-rebuild|batch-rebuild-resume|serve"
a345b01f   tangwang   eval framework
18
    echo "  batch          — batch eval: live search every query, LLM only for missing labels (top_k=50)"
d172c259   tangwang   eval框架
19
    echo "  batch-rebuild  — deep rebuild: build --force-refresh-labels (search recall pool + full-corpus rerank + batched LLM; expensive)"
12a75c46   tangwang   feat(eval): 为 LLM...
20
    echo "  batch-rebuild-resume — resume missing queries from dataset query_builds with retry/continue-on-error"
7b8d9e1a   tangwang   评估框架的启动脚本
21
    echo "  serve          — eval UI (default http://0.0.0.0:\${EVAL_WEB_PORT:-6010}/; also: ./scripts/start_eval_web.sh)"
12a75c46   tangwang   feat(eval): 为 LLM...
22
    echo "Env: TENANT_ID (default 163), REPO_EVAL_DATASET_ID (default core_queries), REPO_EVAL_QUERIES (optional override), REPO_EVAL_RETRY_COUNT (default 2), EVAL_WEB_HOST, EVAL_WEB_PORT (default 6010)"
881d338b   tangwang   评估框架
23
24
25
26
  }
  
  case "${1:-}" in
    batch)
286e9b4f   tangwang   evalution
27
28
      exec "$PY" scripts/evaluation/build_annotation_set.py batch \
        --tenant-id "$TENANT_ID" \
2059d959   tangwang   feat(eval): 多评估集统...
29
        --dataset-id "$DATASET_ID" \
286e9b4f   tangwang   evalution
30
        --top-k 50 \
12a75c46   tangwang   feat(eval): 为 LLM...
31
32
        --language en \
        "${EXTRA_QUERY_ARGS[@]}"
f8e7cb97   tangwang   evalution framework
33
34
      ;;
    batch-rebuild)
286e9b4f   tangwang   evalution
35
36
      exec "$PY" scripts/evaluation/build_annotation_set.py build \
        --tenant-id "$TENANT_ID" \
2059d959   tangwang   feat(eval): 多评估集统...
37
        --dataset-id "$DATASET_ID" \
286e9b4f   tangwang   evalution
38
39
40
41
42
        --search-depth 500 \
        --rerank-depth 10000 \
        --reset-artifacts \
        --force-refresh-rerank \
        --force-refresh-labels \
12a75c46   tangwang   feat(eval): 为 LLM...
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
        --language en \
        "${EXTRA_QUERY_ARGS[@]}"
      ;;
    batch-rebuild-resume)
      exec "$PY" scripts/evaluation/build_annotation_set.py build \
        --tenant-id "$TENANT_ID" \
        --dataset-id "$DATASET_ID" \
        --search-depth 500 \
        --rerank-depth 10000 \
        --force-refresh-rerank \
        --force-refresh-labels \
        --resume-missing \
        --continue-on-error \
        --max-retries-per-query "$RETRY_COUNT" \
        --retry-backoff-sec 10 \
        --language en \
        "${EXTRA_QUERY_ARGS[@]}"
881d338b   tangwang   评估框架
60
61
      ;;
    serve)
7b8d9e1a   tangwang   评估框架的启动脚本
62
63
      EVAL_WEB_PORT="${EVAL_WEB_PORT:-6010}"
      EVAL_WEB_HOST="${EVAL_WEB_HOST:-0.0.0.0}"
881d338b   tangwang   评估框架
64
65
      exec "$PY" scripts/evaluation/serve_eval_web.py serve \
        --tenant-id "$TENANT_ID" \
2059d959   tangwang   feat(eval): 多评估集统...
66
        --dataset-id "$DATASET_ID" \
7b8d9e1a   tangwang   评估框架的启动脚本
67
        --host "$EVAL_WEB_HOST" \
12a75c46   tangwang   feat(eval): 为 LLM...
68
69
        --port "$EVAL_WEB_PORT" \
        "${EXTRA_QUERY_ARGS[@]}"
881d338b   tangwang   评估框架
70
71
72
73
74
75
      ;;
    *)
      usage
      exit 1
      ;;
  esac