Blame view

scripts/evaluation/start_eval.sh 1.96 KB
881d338b   tangwang   评估框架
1
2
3
4
5
6
7
8
9
  #!/usr/bin/env bash
  # Search evaluation quick entrypoints. Run from any cwd; resolves repo root.
  set -euo pipefail
  
  ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
  cd "$ROOT"
  PY="${ROOT}/.venv/bin/python"
  TENANT_ID="${TENANT_ID:-163}"
  QUERIES="${REPO_EVAL_QUERIES:-scripts/evaluation/queries/queries.txt}"
310bb3bc   tangwang   eval tools
10
  START_FROM_QUERY="${REPO_EVAL_START_FROM_QUERY:-}"
881d338b   tangwang   评估框架
11
12
  
  usage() {
f8e7cb97   tangwang   evalution framework
13
    echo "Usage: $0 batch|batch-rebuild|serve"
a345b01f   tangwang   eval framework
14
    echo "  batch          — batch eval: live search every query, LLM only for missing labels (top_k=50)"
d172c259   tangwang   eval框架
15
    echo "  batch-rebuild  — deep rebuild: build --force-refresh-labels (search recall pool + full-corpus rerank + batched LLM; expensive)"
7b8d9e1a   tangwang   评估框架的启动脚本
16
    echo "  serve          — eval UI (default http://0.0.0.0:\${EVAL_WEB_PORT:-6010}/; also: ./scripts/start_eval_web.sh)"
310bb3bc   tangwang   eval tools
17
    echo "Env: TENANT_ID (default 163), REPO_EVAL_QUERIES, REPO_EVAL_START_FROM_QUERY, EVAL_WEB_HOST, EVAL_WEB_PORT (default 6010)"
881d338b   tangwang   评估框架
18
19
20
21
  }
  
  case "${1:-}" in
    batch)
310bb3bc   tangwang   eval tools
22
23
24
25
26
      cmd=(
        "$PY" scripts/evaluation/build_annotation_set.py batch
        --tenant-id "$TENANT_ID"
        --queries-file "$QUERIES"
        --top-k 50
a345b01f   tangwang   eval framework
27
        --language en
310bb3bc   tangwang   eval tools
28
29
30
31
32
      )
      if [ -n "$START_FROM_QUERY" ]; then
        cmd+=(--start-from-query "$START_FROM_QUERY")
      fi
      exec "${cmd[@]}"
f8e7cb97   tangwang   evalution framework
33
34
      ;;
    batch-rebuild)
310bb3bc   tangwang   eval tools
35
36
37
38
39
40
41
42
43
      cmd=(
        "$PY" scripts/evaluation/build_annotation_set.py build
        --tenant-id "$TENANT_ID"
        --queries-file "$QUERIES"
        --search-depth 500
        --rerank-depth 10000
        --reset-artifacts
        --force-refresh-rerank
        --force-refresh-labels
a345b01f   tangwang   eval framework
44
        --language en
310bb3bc   tangwang   eval tools
45
46
47
48
49
      )
      if [ -n "$START_FROM_QUERY" ]; then
        cmd+=(--start-from-query "$START_FROM_QUERY")
      fi
      exec "${cmd[@]}"
881d338b   tangwang   评估框架
50
51
      ;;
    serve)
7b8d9e1a   tangwang   评估框架的启动脚本
52
53
      EVAL_WEB_PORT="${EVAL_WEB_PORT:-6010}"
      EVAL_WEB_HOST="${EVAL_WEB_HOST:-0.0.0.0}"
881d338b   tangwang   评估框架
54
55
56
      exec "$PY" scripts/evaluation/serve_eval_web.py serve \
        --tenant-id "$TENANT_ID" \
        --queries-file "$QUERIES" \
7b8d9e1a   tangwang   评估框架的启动脚本
57
58
        --host "$EVAL_WEB_HOST" \
        --port "$EVAL_WEB_PORT"
881d338b   tangwang   评估框架
59
60
61
62
63
64
      ;;
    *)
      usage
      exit 1
      ;;
  esac