start_eval.sh
2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env bash
# Search evaluation quick entrypoints. Run from any cwd; resolves repo root.
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
cd "$ROOT"
PY="${ROOT}/.venv/bin/python"
TENANT_ID="${TENANT_ID:-163}"
DATASET_ID="${REPO_EVAL_DATASET_ID:-core_queries}"
RETRY_COUNT="${REPO_EVAL_RETRY_COUNT:-2}"
EXTRA_QUERY_ARGS=()
if [[ -n "${REPO_EVAL_QUERIES:-}" ]]; then
EXTRA_QUERY_ARGS=(--queries-file "${REPO_EVAL_QUERIES}")
fi
usage() {
echo "Usage: $0 batch|batch-rebuild|batch-rebuild-resume|serve"
echo " batch — batch eval: live search every query, LLM only for missing labels (top_k=50)"
echo " batch-rebuild — deep rebuild: build --force-refresh-labels (search recall pool + full-corpus rerank + batched LLM; expensive)"
echo " batch-rebuild-resume — resume missing queries from dataset query_builds with retry/continue-on-error"
echo " serve — eval UI (default http://0.0.0.0:\${EVAL_WEB_PORT:-6010}/; also: ./scripts/start_eval_web.sh)"
echo "Env: TENANT_ID (default 163), REPO_EVAL_DATASET_ID (default core_queries), REPO_EVAL_QUERIES (optional override), REPO_EVAL_RETRY_COUNT (default 2), EVAL_WEB_HOST, EVAL_WEB_PORT (default 6010)"
}
case "${1:-}" in
batch)
exec "$PY" scripts/evaluation/build_annotation_set.py batch \
--tenant-id "$TENANT_ID" \
--dataset-id "$DATASET_ID" \
--top-k 50 \
--language en \
"${EXTRA_QUERY_ARGS[@]}"
;;
batch-rebuild)
exec "$PY" scripts/evaluation/build_annotation_set.py build \
--tenant-id "$TENANT_ID" \
--dataset-id "$DATASET_ID" \
--search-depth 500 \
--rerank-depth 10000 \
--reset-artifacts \
--force-refresh-rerank \
--force-refresh-labels \
--language en \
"${EXTRA_QUERY_ARGS[@]}"
;;
batch-rebuild-resume)
exec "$PY" scripts/evaluation/build_annotation_set.py build \
--tenant-id "$TENANT_ID" \
--dataset-id "$DATASET_ID" \
--search-depth 500 \
--rerank-depth 10000 \
--force-refresh-rerank \
--force-refresh-labels \
--resume-missing \
--continue-on-error \
--max-retries-per-query "$RETRY_COUNT" \
--retry-backoff-sec 10 \
--language en \
"${EXTRA_QUERY_ARGS[@]}"
;;
serve)
EVAL_WEB_PORT="${EVAL_WEB_PORT:-6010}"
EVAL_WEB_HOST="${EVAL_WEB_HOST:-0.0.0.0}"
exec "$PY" scripts/evaluation/serve_eval_web.py serve \
--tenant-id "$TENANT_ID" \
--dataset-id "$DATASET_ID" \
--host "$EVAL_WEB_HOST" \
--port "$EVAL_WEB_PORT" \
"${EXTRA_QUERY_ARGS[@]}"
;;
*)
usage
exit 1
;;
esac