Blame view

scripts/evaluation/run_coarse_fusion_tuning_resilient.sh 2.82 KB
d3dd01d3   tangwang   自动寻参:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  #!/bin/bash
  
  set -euo pipefail
  
  cd "$(dirname "$0")/../.."
  source ./activate.sh
  
  usage() {
    echo "usage: $0 <run_name> <dataset_id> <max_evals> <batch_size> <candidate_pool_size> <random_seed> <search_space> <seed_report> [resume_run_dir]" >&2
    exit 1
  }
  
  if [ "$#" -lt 8 ]; then
    usage
  fi
  
  RUN_NAME="$1"
  DATASET_ID="$2"
  MAX_EVALS="$3"
  BATCH_SIZE="$4"
  CANDIDATE_POOL_SIZE="$5"
  RANDOM_SEED="$6"
  SEARCH_SPACE="$7"
  SEED_REPORT="$8"
  RESUME_RUN_DIR="${9:-}"
  
  BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}"
  RESTART_SLEEP_SEC="${RESTART_SLEEP_SEC:-30}"
  SEARCH_BASE_URL="${SEARCH_BASE_URL:-http://127.0.0.1:6002}"
  EVAL_WEB_BASE_URL="${EVAL_WEB_BASE_URL:-http://127.0.0.1:6010}"
  RUN_DIR="artifacts/search_evaluation/tuning_runs/${RUN_NAME}"
  
  mkdir -p "$(dirname "$RUN_DIR")"
  
  count_live_successes() {
    python3 - "$RUN_DIR" <<'PY'
  import json
  import sys
  from pathlib import Path
  
  run_dir = Path(sys.argv[1])
  path = run_dir / "trials.jsonl"
  count = 0
  if path.is_file():
      for line in path.read_text(encoding="utf-8").splitlines():
          line = line.strip()
          if not line:
              continue
          obj = json.loads(line)
          if obj.get("status") == "ok" and not obj.get("is_seed"):
              count += 1
  print(count)
  PY
  }
  
  build_cmd() {
    local cmd=(
      python
      scripts/evaluation/tune_fusion.py
      --mode optimize
      --search-space "$SEARCH_SPACE"
      --seed-report "$SEED_REPORT"
      --tenant-id 163
      --dataset-id "$DATASET_ID"
      --queries-file scripts/evaluation/queries/queries.txt
      --top-k 100
      --language en
      --search-base-url "$SEARCH_BASE_URL"
      --eval-web-base-url "$EVAL_WEB_BASE_URL"
      --max-evals "$MAX_EVALS"
      --batch-size "$BATCH_SIZE"
      --candidate-pool-size "$CANDIDATE_POOL_SIZE"
      --random-seed "$RANDOM_SEED"
      --batch-eval-timeout-sec "$BATCH_EVAL_TIMEOUT_SEC"
    )
    if [ -n "$RESUME_RUN_DIR" ]; then
      cmd+=(--resume-run "$RESUME_RUN_DIR")
    else
      cmd+=(--run-name "$RUN_NAME")
    fi
    printf '%q ' "${cmd[@]}"
    printf '\n'
  }
  
  attempt=0
  while true; do
    live_successes="$(count_live_successes)"
    if [ "$live_successes" -ge "$MAX_EVALS" ]; then
      echo "[resilient] complete run_name=$RUN_NAME live_successes=$live_successes target=$MAX_EVALS"
      exit 0
    fi
  
    attempt=$((attempt + 1))
    if [ -d "$RUN_DIR" ]; then
      RESUME_RUN_DIR="$RUN_DIR"
    fi
  
    echo "[resilient] attempt=$attempt run_name=$RUN_NAME live_successes=$live_successes target=$MAX_EVALS"
    CMD_STR="$(build_cmd)"
    echo "[resilient] cmd=$CMD_STR"
  
    set +e
    bash -lc "$CMD_STR"
    exit_code=$?
    set -e
  
    live_successes="$(count_live_successes)"
    echo "[resilient] exit_code=$exit_code live_successes=$live_successes"
  
    if [ "$live_successes" -ge "$MAX_EVALS" ]; then
      echo "[resilient] finished after attempt=$attempt"
      exit 0
    fi
  
    echo "[resilient] sleeping ${RESTART_SLEEP_SEC}s before resume"
    sleep "$RESTART_SLEEP_SEC"
  done