881d338b
tangwang
评估框架
|
1
2
3
4
5
6
7
8
9
10
11
|
#!/usr/bin/env bash
# Search evaluation quick entrypoints. Run from any cwd; resolves repo root.
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
cd "$ROOT"
PY="${ROOT}/.venv/bin/python"
TENANT_ID="${TENANT_ID:-163}"
QUERIES="${REPO_EVAL_QUERIES:-scripts/evaluation/queries/queries.txt}"
usage() {
|
f8e7cb97
tangwang
evalution framework
|
12
13
14
|
echo "Usage: $0 batch|batch-rebuild|serve"
echo " batch — batch eval: live search every query, LLM only for missing labels (top_k=50, simple)"
echo " batch-rebuild — same as batch but --force-refresh-labels (re-LLM all top_k hits; expensive, overwrites cache)"
|
7b8d9e1a
tangwang
评估框架的启动脚本
|
15
16
|
echo " serve — eval UI (default http://0.0.0.0:\${EVAL_WEB_PORT:-6010}/; also: ./scripts/start_eval_web.sh)"
echo "Env: TENANT_ID (default 163), REPO_EVAL_QUERIES, EVAL_WEB_HOST, EVAL_WEB_PORT (default 6010)"
|
881d338b
tangwang
评估框架
|
17
18
19
20
21
22
23
24
25
|
}
case "${1:-}" in
batch)
exec "$PY" scripts/evaluation/build_annotation_set.py batch \
--tenant-id "$TENANT_ID" \
--queries-file "$QUERIES" \
--top-k 50 \
--language en \
|
f8e7cb97
tangwang
evalution framework
|
26
27
28
29
30
31
32
33
|
--labeler-mode simple
;;
batch-rebuild)
exec "$PY" scripts/evaluation/build_annotation_set.py batch \
--tenant-id "$TENANT_ID" \
--queries-file "$QUERIES" \
--top-k 50 \
--language en \
|
881d338b
tangwang
评估框架
|
34
35
36
37
|
--labeler-mode simple \
--force-refresh-labels
;;
serve)
|
7b8d9e1a
tangwang
评估框架的启动脚本
|
38
39
|
EVAL_WEB_PORT="${EVAL_WEB_PORT:-6010}"
EVAL_WEB_HOST="${EVAL_WEB_HOST:-0.0.0.0}"
|
881d338b
tangwang
评估框架
|
40
41
42
|
exec "$PY" scripts/evaluation/serve_eval_web.py serve \
--tenant-id "$TENANT_ID" \
--queries-file "$QUERIES" \
|
7b8d9e1a
tangwang
评估框架的启动脚本
|
43
44
|
--host "$EVAL_WEB_HOST" \
--port "$EVAL_WEB_PORT"
|
881d338b
tangwang
评估框架
|
45
46
47
48
49
50
|
;;
*)
usage
exit 1
;;
esac
|