start_coarse_fusion_tuning_long.sh 2.25 KB
#!/bin/bash

set -euo pipefail

cd "$(dirname "$0")/../.."
source ./activate.sh

DATASET_ID="${REPO_EVAL_DATASET_ID:-core_queries}"
case "${DATASET_ID}" in
  clothing_top771)
    DEFAULT_SEARCH_SPACE="scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml"
    DEFAULT_SEED_REPORT="artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md"
    DEFAULT_MAX_EVALS="18"
    DEFAULT_BATCH_SIZE="2"
    DEFAULT_CANDIDATE_POOL_SIZE="160"
    DEFAULT_RANDOM_SEED="20260422"
    ;;
  *)
    DEFAULT_SEARCH_SPACE="scripts/evaluation/tuning/coarse_rank_fusion_space.yaml"
    DEFAULT_SEED_REPORT="artifacts/search_evaluation/batch_reports/batch_20260415T150754Z_00b6a8aa3d.md"
    DEFAULT_MAX_EVALS="36"
    DEFAULT_BATCH_SIZE="3"
    DEFAULT_CANDIDATE_POOL_SIZE="512"
    DEFAULT_RANDOM_SEED="20260416"
    ;;
esac

RUN_NAME="${RUN_NAME:-coarse_fusion_${DATASET_ID}_$(date -u +%Y%m%dT%H%M%SZ)}"
SEARCH_SPACE="${SEARCH_SPACE:-${DEFAULT_SEARCH_SPACE}}"
SEED_REPORT="${SEED_REPORT:-${DEFAULT_SEED_REPORT}}"
MAX_EVALS="${MAX_EVALS:-${DEFAULT_MAX_EVALS}}"
BATCH_SIZE="${BATCH_SIZE:-${DEFAULT_BATCH_SIZE}}"
CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-${DEFAULT_CANDIDATE_POOL_SIZE}}"
RANDOM_SEED="${RANDOM_SEED:-${DEFAULT_RANDOM_SEED}}"
BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}"

LAUNCH_DIR="artifacts/search_evaluation/tuning_launches"
mkdir -p "${LAUNCH_DIR}"
LOG_PATH="${LAUNCH_DIR}/${RUN_NAME}.log"
PID_PATH="${LAUNCH_DIR}/${RUN_NAME}.pid"
CMD_PATH="${LAUNCH_DIR}/${RUN_NAME}.cmd"

CMD=(
  bash
  scripts/evaluation/run_coarse_fusion_tuning_resilient.sh
  "${RUN_NAME}"
  "${DATASET_ID}"
  "${MAX_EVALS}"
  "${BATCH_SIZE}"
  "${CANDIDATE_POOL_SIZE}"
  "${RANDOM_SEED}"
  "${SEARCH_SPACE}"
  "${SEED_REPORT}"
)

if [ "$#" -gt 0 ]; then
  CMD+=("$@")
fi

export BATCH_EVAL_TIMEOUT_SEC

printf '%q ' "${CMD[@]}" > "${CMD_PATH}"
printf '\n' >> "${CMD_PATH}"

nohup "${CMD[@]}" > "${LOG_PATH}" 2>&1 &
PID=$!
echo "${PID}" > "${PID_PATH}"

echo "run_name=${RUN_NAME}"
echo "pid=${PID}"
echo "log=${LOG_PATH}"
echo "pid_file=${PID_PATH}"
echo "cmd_file=${CMD_PATH}"
echo "run_dir=artifacts/search_evaluation/tuning_runs/${RUN_NAME}"
echo
echo "tail -f ${LOG_PATH}"
echo "cat artifacts/search_evaluation/tuning_runs/${RUN_NAME}/leaderboard.csv"