Blame view

scripts/evaluation/quick_start_eval.sh 1.42 KB
881d338b   tangwang   评估框架
1
2
3
4
5
6
7
8
9
10
11
  #!/usr/bin/env bash
  # Search evaluation quick entrypoints. Run from any cwd; resolves repo root.
  set -euo pipefail
  
  ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
  cd "$ROOT"
  PY="${ROOT}/.venv/bin/python"
  TENANT_ID="${TENANT_ID:-163}"
  QUERIES="${REPO_EVAL_QUERIES:-scripts/evaluation/queries/queries.txt}"
  
  usage() {
f8e7cb97   tangwang   evalution framework
12
13
14
15
16
    echo "Usage: $0 batch|batch-rebuild|serve"
    echo "  batch          — batch eval: live search every query, LLM only for missing labels (top_k=50, simple)"
    echo "  batch-rebuild  — same as batch but --force-refresh-labels (re-LLM all top_k hits; expensive, overwrites cache)"
    echo "  serve          — eval UI on http://0.0.0.0:6010/"
    echo "Env: TENANT_ID (default 163), REPO_EVAL_QUERIES (path to queries file)"
881d338b   tangwang   评估框架
17
18
19
20
21
22
23
24
25
  }
  
  case "${1:-}" in
    batch)
      exec "$PY" scripts/evaluation/build_annotation_set.py batch \
        --tenant-id "$TENANT_ID" \
        --queries-file "$QUERIES" \
        --top-k 50 \
        --language en \
f8e7cb97   tangwang   evalution framework
26
27
28
29
30
31
32
33
        --labeler-mode simple
      ;;
    batch-rebuild)
      exec "$PY" scripts/evaluation/build_annotation_set.py batch \
        --tenant-id "$TENANT_ID" \
        --queries-file "$QUERIES" \
        --top-k 50 \
        --language en \
881d338b   tangwang   评估框架
34
35
36
37
38
39
40
        --labeler-mode simple \
        --force-refresh-labels
      ;;
    serve)
      exec "$PY" scripts/evaluation/serve_eval_web.py serve \
        --tenant-id "$TENANT_ID" \
        --queries-file "$QUERIES" \
f8e7cb97   tangwang   evalution framework
41
        --host 0.0.0.0 \
881d338b   tangwang   评估框架
42
43
44
45
46
47
48
        --port 6010
      ;;
    *)
      usage
      exit 1
      ;;
  esac