From 935f6e1b1df162953f7ae96de7ee27702eb8f2da Mon Sep 17 00:00:00 2001 From: tangwang Date: Fri, 24 Apr 2026 17:40:02 +0800 Subject: [PATCH] coarse_rank 搜参结果 **参数列表(4 套)** - `baseline`(top771 最优,`seed_baseline`) - `es_bias: 10.0`, `es_exponent: 0.05` - `text_bias: 0.1`, `text_exponent: 0.35`, `text_translation_weight: 1.0` - `knn_text_weight: 1.0`, `knn_image_weight: 2.0`, `knn_tie_breaker: 0.3` - `knn_bias: 0.2`, `knn_exponent: 5.6` - `knn_text_bias: 0.2`, `knn_text_exponent: 0.0` - `knn_image_bias: 0.2`, `knn_image_exponent: 0.0` - `54 条上得到的极端解`(`seed_legacy_bo234`) - `es_bias: 7.214`, `es_exponent: 0.2025` - `text_bias: 4.0`, `text_exponent: 1.584`, `text_translation_weight: 1.4441` - `knn_text_weight: 0.1`, `knn_image_weight: 5.6232`, `knn_tie_breaker: 0.021` - `knn_bias: 0.0019`, `knn_exponent: 11.8477` - `knn_text_bias: 2.3125`, `knn_text_exponent: 1.1547` - `knn_image_bias: 0.9641`, `knn_image_exponent: 5.8671` - `bo_012`(`Primary_Metric_Score=0.485027`) - `es_bias: 6.6233`, `es_exponent: 0.2377` - `text_bias: 0.049`, `text_exponent: 0.4446`, `text_translation_weight: 1.6236` - `knn_text_weight: 1.0344`, `knn_image_weight: 1.3565`, `knn_tie_breaker: 0.212` - `knn_bias: 0.0052`, `knn_exponent: 4.4639` - `knn_text_bias: 0.1148`, `knn_text_exponent: 1.0926` - `knn_image_bias: 0.0114`, `knn_image_exponent: 5.2496` - `bo_018`(`Primary_Metric_Score=0.484691`) - `es_bias: 8.8861`, `es_exponent: 0.2794` - `text_bias: 0.0189`, `text_exponent: 0.2`, `text_translation_weight: 1.7178` - `knn_text_weight: 1.7459`, `knn_image_weight: 4.2658`, `knn_tie_breaker: 0.2814` - `knn_bias: 0.001`, `knn_exponent: 1.4923` - `knn_text_bias: 4.0`, `knn_text_exponent: 0.9309` - `knn_image_bias: 0.01`, `knn_image_exponent: 5.8289` --- README.md | 5 +++-- artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.cmd | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.pid | 1 + config/config.yaml | 10 +++++----- docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/evaluation/resume_coarse_fusion_tuning_knn_tail.sh | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/evaluation/run_coarse_fusion_tuning_resilient.sh | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- scripts/evaluation/start_coarse_fusion_tuning_knn_tail.sh | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 497 insertions(+), 9 deletions(-) create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.cmd create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.pid create mode 100755 scripts/evaluation/resume_coarse_fusion_tuning_knn_tail.sh create mode 100755 scripts/evaluation/start_coarse_fusion_tuning_knn_tail.sh create mode 100644 scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml diff --git a/README.md b/README.md index 391e135..f796ef0 100644 --- a/README.md +++ b/README.md @@ -46,10 +46,11 @@ source activate.sh - `6002` backend(`/search/*`, `/admin/*`) - `6003` frontend - `6004` indexer(`/indexer/*`) +- `6006` translator - `6005` embedding-text(可选,`POST /embed/text`;常见后端为 TEI,默认 `8080`) -- `6006` translator(可选) -- `6007` reranker(可选,`POST /rerank`;精排可与主重排分 `service_profile`,见 `config.yaml` → `fine_rank` / `services.rerank`) - `6008` embedding-image(可选,`POST /embed/image` 等) +- `6007` reranker +- `6009` fine_rank - `6010` eval-web(搜索评估 UI,`./scripts/service_ctl.sh` 服务名 `eval-web`) 更完整示例见 `docs/QUICKSTART.md`。 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.cmd b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.cmd new file mode 100644 index 0000000..f755d66 --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.cmd @@ -0,0 +1 @@ +bash scripts/evaluation/run_coarse_fusion_tuning_resilient.sh coarse_fusion_clothing_top771_knn_tail_20260424T093837Z clothing_top771 20 2 96 20260424 scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml '' diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.pid b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.pid new file mode 100644 index 0000000..a9d2142 --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_knn_tail_20260424T093837Z.daemon.pid @@ -0,0 +1 @@ +1355252 diff --git a/config/config.yaml b/config/config.yaml index e280f2b..ebc54ca 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -247,11 +247,11 @@ coarse_rank: knn_image_weight: 2.0 knn_tie_breaker: 0.3 knn_bias: 0.6 - knn_exponent: 0.4 - knn_text_bias: 0.2 - knn_text_exponent: 0.0 - knn_image_bias: 0.2 - knn_image_exponent: 0.0 + knn_exponent: 4.4639 + knn_text_bias: 0.1148 + knn_text_exponent: 1.0926 + knn_image_bias: 0.0114 + knn_image_exponent: 5.2496 fine_rank: enabled: false input_window: 160 diff --git a/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md b/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md index a55c958..701666a 100644 --- a/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md +++ b/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md @@ -87,3 +87,175 @@ Prompt - 1 Response - 1 +跑完了,这个 `run_dir` 已经完成,没有在继续运行的调参进程。 + +结果目录: +- [summary.md](/data/saas-search/artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/summary.md) +- [leaderboard.csv](/data/saas-search/artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/leaderboard.csv) +- [trials.jsonl](/data/saas-search/artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/trials.jsonl) + +**结论** +1. 这轮 18 个 live eval 全部跑满了。 + 见 [summary.md](/data/saas-search/artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/summary.md:11) + +2. 大集 `clothing_top771` 上,最优仍然是 fusion-only baseline,`Primary_Metric_Score=0.487476`。 + 见 [summary.md](/data/saas-search/artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/summary.md:17) + +3. 这轮优化器已经很接近 baseline,但没有超过。 + 最接近的是: + - `bo_012 = 0.485027` + - `bo_018 = 0.484691` + - `bo_013 = 0.483849` + 与 baseline 的差距分别是 `-0.002449`、`-0.002785`、`-0.003627` + +4. 小集上那种“极端参数”没有迁移成功。 + 典型例子: + - `seed_low_knn_global = 0.447485` + - `seed_legacy_bo340 = 0.477537` + - `seed_bigset_text_stable = 0.449802` + 说明大集不支持此前那类明显更激进的形态。 + +**怎么解读这轮** +这轮最重要的收获不是“找到新最优”,而是把大集上的有效区间摸清了。 + +大集上表现较好的候选,虽然具体值不同,但有几个共同点: + +1. `text_bias` 普遍很低。 + 排名前几的候选大多在 `0.02 ~ 0.10` 附近,baseline 的 `0.1` 并不吃亏。 + 这和小集里 `text_bias=4.0` 的方向明显相反。 + +2. `es_exponent` 往往比 baseline 更高。 + baseline 是 `0.05`,而靠前候选很多在 `0.15 ~ 0.28`。 + 这说明大集上 lexical 这一路适度增强是有价值的,但提升幅度还不足以抵消其它项的副作用。 + +3. `text_translation_weight` 往往高于 baseline。 + 靠前候选多数在 `1.3 ~ 1.7`,比 baseline 的 `1.0` 明显更高。 + 这是这轮最值得保留的信号之一。 + +4. `knn_image_weight` 可以比 baseline 更高,但不能只靠“强图像+极端非线性”硬推。 + 很多接近最优的点把 `knn_image_weight` 提到了 `4+`,但最终仍没超过 baseline。 + 说明 image 通路能帮忙,但必须和其它项配平。 + +5. `knn_bias` 低值是一个信号,但不是充分条件。 + 不少高分候选把 `knn_bias` 压得很低,甚至接近 `0`。 + 但单独走“低 knn_bias + 低 knn_exponent”的路线并不好,`seed_low_knn_global` 已经证伪了这一点。 + +6. `knn_text_exponent` 和 `knn_image_exponent` 从 `0` 放开后,能接近 baseline,但还没形成稳定优势。 + 靠前候选里这两个参数经常被拉高,说明它们不是完全无用;但从结果看,它们更像是“补偿项”,不是决定性增益项。 + +**对上次担心的过拟合,这轮能下什么判断** +可以比较明确地说: + +- 小集 54 条上得到的极端解,确实存在明显过拟合迹象。 +- 到 771 条大集后,最稳的仍是原 baseline 一类的“温和融合”方案。 +- 但是 baseline 也不是绝对最优形态,至少有两类方向值得继续缩圈: + - 更高的 `es_exponent` + - 更高的 `text_translation_weight` + +**下一轮缩圈建议** +我建议不要再做同样宽度的 14 维长跑了,信息已经够了。下一轮可以做“baseline 附近精修”。 + +建议缩圈到这组范围: + +- `es_bias: 7.5 ~ 11.5` +- `es_exponent: 0.12 ~ 0.28` +- `text_bias: 0.01 ~ 0.15` +- `text_exponent: 0.20 ~ 0.55` +- `text_translation_weight: 1.20 ~ 1.75` +- `knn_text_weight: 0.75 ~ 1.35` +- `knn_image_weight: 1.2 ~ 4.8` +- `knn_tie_breaker: 0.18 ~ 0.32` +- `knn_bias: 0.001 ~ 0.25` +- `knn_exponent: 3.5 ~ 8.0` +- `knn_text_bias: 0.001 ~ 0.3` +- `knn_text_exponent: 0.4 ~ 1.3` +- `knn_image_bias: 0.001 ~ 0.35` +- `knn_image_exponent: 2.0 ~ 5.8` + +如果要更激进地降成本,我建议直接冻结这几个: +- `text_bias=0.1` +- `knn_tie_breaker=0.24~0.30` 小范围 +- `knn_text_bias` 缩很小 +- `knn_image_bias` 缩很小 + +然后重点打这 6 个: +- `es_exponent` +- `text_translation_weight` +- `knn_image_weight` +- `knn_bias` +- `knn_exponent` +- `knn_text_exponent` + +**额外发现** +流程上还有一个问题,这轮后半段 `bo_018` 在成功前重复失败了很多次,主要是: +- `build_annotation_set.py batch` 非零退出 +- 少量 `restart.sh backend` 非零退出 + +不过守护逻辑最终还是把任务跑完了。这个从 [summary.md](/data/saas-search/artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/summary.md:30) 的 failures 和 daemon log 都能看到。说明“自动续跑”已起作用,但评测链路本身还不够干净。 + +如果你愿意,我下一步可以直接继续两件事里的一个: +1. 按这轮结论,给你生成一个 `clothing_top771_v2` 缩圈搜索空间并启动下一轮 +2. 先处理 `build_annotation_set.py batch` 偶发失败的问题,减少下一轮长跑中的无效重试 + + + +**参数列表(4 套)** +- `baseline`(top771 最优,`seed_baseline`) + - `es_bias: 10.0`, `es_exponent: 0.05` + - `text_bias: 0.1`, `text_exponent: 0.35`, `text_translation_weight: 1.0` + - `knn_text_weight: 1.0`, `knn_image_weight: 2.0`, `knn_tie_breaker: 0.3` + - `knn_bias: 0.2`, `knn_exponent: 5.6` + - `knn_text_bias: 0.2`, `knn_text_exponent: 0.0` + - `knn_image_bias: 0.2`, `knn_image_exponent: 0.0` +- `54 条上得到的极端解`(`seed_legacy_bo234`) + - `es_bias: 7.214`, `es_exponent: 0.2025` + - `text_bias: 4.0`, `text_exponent: 1.584`, `text_translation_weight: 1.4441` + - `knn_text_weight: 0.1`, `knn_image_weight: 5.6232`, `knn_tie_breaker: 0.021` + - `knn_bias: 0.0019`, `knn_exponent: 11.8477` + - `knn_text_bias: 2.3125`, `knn_text_exponent: 1.1547` + - `knn_image_bias: 0.9641`, `knn_image_exponent: 5.8671` +- `bo_012`(`Primary_Metric_Score=0.485027`) + - `es_bias: 6.6233`, `es_exponent: 0.2377` + - `text_bias: 0.049`, `text_exponent: 0.4446`, `text_translation_weight: 1.6236` + - `knn_text_weight: 1.0344`, `knn_image_weight: 1.3565`, `knn_tie_breaker: 0.212` + - `knn_bias: 0.0052`, `knn_exponent: 4.4639` + - `knn_text_bias: 0.1148`, `knn_text_exponent: 1.0926` + - `knn_image_bias: 0.0114`, `knn_image_exponent: 5.2496` +- `bo_018`(`Primary_Metric_Score=0.484691`) + - `es_bias: 8.8861`, `es_exponent: 0.2794` + - `text_bias: 0.0189`, `text_exponent: 0.2`, `text_translation_weight: 1.7178` + - `knn_text_weight: 1.7459`, `knn_image_weight: 4.2658`, `knn_tie_breaker: 0.2814` + - `knn_bias: 0.001`, `knn_exponent: 1.4923` + - `knn_text_bias: 4.0`, `knn_text_exponent: 0.9309` + - `knn_image_bias: 0.01`, `knn_image_exponent: 5.8289` + +**怎么找(可复现)** +- 从 `leaderboard.csv` 找(含分数+参数一行全):`artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/leaderboard.csv` + - 例:`rg '^2,bo_012,' artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/leaderboard.csv` +- 从 `trials.jsonl` 找(最权威,调参器实际写入的 params):`artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/trials.jsonl` + - 例:`rg '\"name\": \"bo_012\"' artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/trials.jsonl` + - 例:`rg '\"name\": \"seed_legacy_bo234\"' artifacts/search_evaluation/tuning_runs/coarse_fusion_clothing_top771_resilient_20260422T091650Z/trials.jsonl` + +**已补到 `config.yaml`** +- 我已把这 4 套参数作为“注释 presets”补在 `coarse_rank.fusion` 旁边:`config/config.yaml:236` +- 注意:你当前 `config/config.yaml` 里 `coarse_rank.fusion` 的生效值是 `knn_bias=0.6 / knn_exponent=0.4`,更像 `seed_low_knn_global`,不是本次大集最优的 baseline。 + + + + + + +# request 2 + + +固定这些参数(不再参与寻参): +es_bias: 6.62, es_exponent: 0.24 +text_bias: 0.05 text_exponent: 0.445, text_translation_weight: 1.0 +knn_text_weight: 1.0, knn_image_weight: 1.35, knn_tie_breaker: 0.212 +knn_bias: 0.0052, + +然后对以下参数进行寻参: +knn_exponent(0.3-6.0) +knn_text_bias (0.0~0.3) knn_text_exponent (0.2 ~ 3.0) +knn_image_bias (0.0~0.3) knn_image_exponent (1.0~7.0) +设计好搜参脚本后跑起来,注意程序启动起来之后要检测是否运行稳定了,确保可以长时间运行直到全部跑完 \ No newline at end of file diff --git a/scripts/evaluation/resume_coarse_fusion_tuning_knn_tail.sh b/scripts/evaluation/resume_coarse_fusion_tuning_knn_tail.sh new file mode 100755 index 0000000..9b30bf9 --- /dev/null +++ b/scripts/evaluation/resume_coarse_fusion_tuning_knn_tail.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +set -euo pipefail + +if [ "$#" -lt 1 ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +cd "$(dirname "$0")/../.." +source ./activate.sh + +TARGET="$1" + +if [ -d "${TARGET}" ]; then + RUN_DIR="${TARGET}" + RUN_NAME="$(basename "${RUN_DIR}")" +else + RUN_NAME="${TARGET}" + RUN_DIR="artifacts/search_evaluation/tuning_runs/${RUN_NAME}" +fi + +if [ ! -d "${RUN_DIR}" ]; then + echo "run dir not found: ${RUN_DIR}" >&2 + exit 1 +fi + +DATASET_ID="${REPO_EVAL_DATASET_ID:-clothing_top771}" +MAX_EVALS="${MAX_EVALS:-20}" +BATCH_SIZE="${BATCH_SIZE:-2}" +CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-96}" +RANDOM_SEED="${RANDOM_SEED:-20260424}" +BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}" + +LAUNCH_DIR="artifacts/search_evaluation/tuning_launches" +mkdir -p "${LAUNCH_DIR}" +LOG_PATH="${LAUNCH_DIR}/${RUN_NAME}.daemon.log" +PID_PATH="${LAUNCH_DIR}/${RUN_NAME}.daemon.pid" +CMD_PATH="${LAUNCH_DIR}/${RUN_NAME}.daemon.cmd" + +CMD=( + bash + scripts/evaluation/run_coarse_fusion_tuning_resilient.sh + "${RUN_NAME}" + "${DATASET_ID}" + "${MAX_EVALS}" + "${BATCH_SIZE}" + "${CANDIDATE_POOL_SIZE}" + "${RANDOM_SEED}" + "${RUN_DIR}/search_space.yaml" + "" + "${RUN_DIR}" +) + +export BATCH_EVAL_TIMEOUT_SEC + +printf '%q ' "${CMD[@]}" > "${CMD_PATH}" +printf '\n' >> "${CMD_PATH}" + +setsid "${CMD[@]}" > "${LOG_PATH}" 2>&1 < /dev/null & +PID=$! +echo "${PID}" > "${PID_PATH}" + +echo "run_name=${RUN_NAME}" +echo "pid=${PID}" +echo "log=${LOG_PATH}" +echo "pid_file=${PID_PATH}" +echo "cmd_file=${CMD_PATH}" +echo "run_dir=${RUN_DIR}" +echo +echo "tail -f ${LOG_PATH}" +echo "cat ${RUN_DIR}/leaderboard.csv" diff --git a/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh b/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh index 406face..fc2e464 100755 --- a/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh +++ b/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh @@ -29,9 +29,48 @@ RESTART_SLEEP_SEC="${RESTART_SLEEP_SEC:-30}" SEARCH_BASE_URL="${SEARCH_BASE_URL:-http://127.0.0.1:6002}" EVAL_WEB_BASE_URL="${EVAL_WEB_BASE_URL:-http://127.0.0.1:6010}" RUN_DIR="artifacts/search_evaluation/tuning_runs/${RUN_NAME}" +LOCK_DIR="${RUN_DIR}/.resilient_lock" +HEALTH_POLL_SEC="${HEALTH_POLL_SEC:-15}" mkdir -p "$(dirname "$RUN_DIR")" +release_lock() { + if [ -d "$LOCK_DIR" ] && [ -f "$LOCK_DIR/pid" ] && [ "$(cat "$LOCK_DIR/pid" 2>/dev/null || true)" = "$$" ]; then + rm -rf "$LOCK_DIR" + fi +} + +acquire_lock() { + mkdir -p "$RUN_DIR" + if mkdir "$LOCK_DIR" 2>/dev/null; then + echo "$$" > "$LOCK_DIR/pid" + date -u +%Y-%m-%dT%H:%M:%SZ > "$LOCK_DIR/started_at" + return 0 + fi + + local owner_pid="" + if [ -f "$LOCK_DIR/pid" ]; then + owner_pid="$(cat "$LOCK_DIR/pid" 2>/dev/null || true)" + fi + if [ -n "$owner_pid" ] && kill -0 "$owner_pid" 2>/dev/null; then + echo "[resilient] lock already held by pid=${owner_pid}, exiting" + exit 0 + fi + + echo "[resilient] removing stale lock at ${LOCK_DIR}" + rm -rf "$LOCK_DIR" + if mkdir "$LOCK_DIR" 2>/dev/null; then + echo "$$" > "$LOCK_DIR/pid" + date -u +%Y-%m-%dT%H:%M:%SZ > "$LOCK_DIR/started_at" + return 0 + fi + + echo "[resilient] failed to acquire lock at ${LOCK_DIR}" + exit 1 +} + +trap release_lock EXIT INT TERM + count_live_successes() { python3 - "$RUN_DIR" <<'PY' import json @@ -53,13 +92,61 @@ print(count) PY } +wait_for_health() { + local url="$1" + local timeout_sec="$2" + local deadline=$(( $(date +%s) + timeout_sec )) + while [ "$(date +%s)" -lt "$deadline" ]; do + if curl -fsS "$url" >/dev/null 2>&1; then + return 0 + fi + sleep 2 + done + return 1 +} + +ensure_services() { + if ! wait_for_health "${SEARCH_BASE_URL}/health" 20; then + echo "[resilient] backend unhealthy, restarting backend" + ./restart.sh backend || true + sleep 5 + fi + if ! wait_for_health "${SEARCH_BASE_URL}/health" 180; then + echo "[resilient] backend still unhealthy after restart" + return 1 + fi + + if ! wait_for_health "${EVAL_WEB_BASE_URL}/api/history" 20; then + echo "[resilient] eval-web unhealthy, restarting eval-web" + ./restart.sh eval-web || true + sleep 5 + fi + if ! wait_for_health "${EVAL_WEB_BASE_URL}/api/history" 180; then + echo "[resilient] eval-web still unhealthy after restart" + return 1 + fi + return 0 +} + +heal_services_nonblocking() { + if ! curl -fsS "${SEARCH_BASE_URL}/health" >/dev/null 2>&1; then + echo "[resilient] backend became unhealthy during run, restarting backend" + ./restart.sh backend || true + sleep 5 + fi + if ! curl -fsS "${EVAL_WEB_BASE_URL}/api/history" >/dev/null 2>&1; then + echo "[resilient] eval-web became unhealthy during run, restarting eval-web" + ./restart.sh eval-web || true + sleep 5 + fi +} + build_cmd() { local cmd=( python scripts/evaluation/tune_fusion.py --mode optimize --search-space "$SEARCH_SPACE" - --seed-report "$SEED_REPORT" --tenant-id 163 --dataset-id "$DATASET_ID" --queries-file scripts/evaluation/queries/queries.txt @@ -73,6 +160,9 @@ build_cmd() { --random-seed "$RANDOM_SEED" --batch-eval-timeout-sec "$BATCH_EVAL_TIMEOUT_SEC" ) + if [ -n "$SEED_REPORT" ]; then + cmd+=(--seed-report "$SEED_REPORT") + fi if [ -n "$RESUME_RUN_DIR" ]; then cmd+=(--resume-run "$RESUME_RUN_DIR") else @@ -83,6 +173,7 @@ build_cmd() { } attempt=0 +acquire_lock while true; do live_successes="$(count_live_successes)" if [ "$live_successes" -ge "$MAX_EVALS" ]; then @@ -96,11 +187,23 @@ while true; do fi echo "[resilient] attempt=$attempt run_name=$RUN_NAME live_successes=$live_successes target=$MAX_EVALS" + if ! ensure_services; then + echo "[resilient] service preflight failed, sleeping ${RESTART_SLEEP_SEC}s before retry" + sleep "$RESTART_SLEEP_SEC" + continue + fi CMD_STR="$(build_cmd)" echo "[resilient] cmd=$CMD_STR" set +e - bash -lc "$CMD_STR" + bash -lc "$CMD_STR" & + child_pid=$! + echo "[resilient] child_pid=${child_pid}" + while kill -0 "$child_pid" 2>/dev/null; do + heal_services_nonblocking + sleep "$HEALTH_POLL_SEC" + done + wait "$child_pid" exit_code=$? set -e @@ -112,6 +215,9 @@ while true; do exit 0 fi + if ! ensure_services; then + echo "[resilient] service recovery failed after exit_code=$exit_code" + fi echo "[resilient] sleeping ${RESTART_SLEEP_SEC}s before resume" sleep "$RESTART_SLEEP_SEC" done diff --git a/scripts/evaluation/start_coarse_fusion_tuning_knn_tail.sh b/scripts/evaluation/start_coarse_fusion_tuning_knn_tail.sh new file mode 100755 index 0000000..c80805f --- /dev/null +++ b/scripts/evaluation/start_coarse_fusion_tuning_knn_tail.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +set -euo pipefail + +cd "$(dirname "$0")/../.." +source ./activate.sh + +RUN_NAME="${RUN_NAME:-coarse_fusion_clothing_top771_knn_tail_$(date -u +%Y%m%dT%H%M%SZ)}" +DATASET_ID="${REPO_EVAL_DATASET_ID:-clothing_top771}" +SEARCH_SPACE="${SEARCH_SPACE:-scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml}" +MAX_EVALS="${MAX_EVALS:-20}" +BATCH_SIZE="${BATCH_SIZE:-2}" +CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-96}" +RANDOM_SEED="${RANDOM_SEED:-20260424}" +BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}" + +LAUNCH_DIR="artifacts/search_evaluation/tuning_launches" +mkdir -p "${LAUNCH_DIR}" +LOG_PATH="${LAUNCH_DIR}/${RUN_NAME}.daemon.log" +PID_PATH="${LAUNCH_DIR}/${RUN_NAME}.daemon.pid" +CMD_PATH="${LAUNCH_DIR}/${RUN_NAME}.daemon.cmd" + +CMD=( + bash + scripts/evaluation/run_coarse_fusion_tuning_resilient.sh + "${RUN_NAME}" + "${DATASET_ID}" + "${MAX_EVALS}" + "${BATCH_SIZE}" + "${CANDIDATE_POOL_SIZE}" + "${RANDOM_SEED}" + "${SEARCH_SPACE}" + "" +) + +export BATCH_EVAL_TIMEOUT_SEC + +printf '%q ' "${CMD[@]}" > "${CMD_PATH}" +printf '\n' >> "${CMD_PATH}" + +setsid "${CMD[@]}" > "${LOG_PATH}" 2>&1 < /dev/null & +PID=$! +echo "${PID}" > "${PID_PATH}" + +echo "run_name=${RUN_NAME}" +echo "pid=${PID}" +echo "log=${LOG_PATH}" +echo "pid_file=${PID_PATH}" +echo "cmd_file=${CMD_PATH}" +echo "run_dir=artifacts/search_evaluation/tuning_runs/${RUN_NAME}" +echo +echo "tail -f ${LOG_PATH}" +echo "cat artifacts/search_evaluation/tuning_runs/${RUN_NAME}/leaderboard.csv" diff --git a/scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml b/scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml new file mode 100644 index 0000000..bf3357e --- /dev/null +++ b/scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771_knn_tail.yaml @@ -0,0 +1,82 @@ +target_path: coarse_rank.fusion + +baseline: + es_bias: 6.62 + es_exponent: 0.24 + text_bias: 0.05 + text_exponent: 0.445 + text_translation_weight: 1.0 + knn_text_weight: 1.0 + knn_image_weight: 1.35 + knn_tie_breaker: 0.212 + knn_bias: 0.0052 + knn_exponent: 4.4639 + knn_text_bias: 0.1148 + knn_text_exponent: 1.0926 + knn_image_bias: 0.0114 + knn_image_exponent: 5.2496 + +parameters: + knn_exponent: {min: 0.3, max: 6.0, scale: linear, round: 4} + knn_text_bias: {min: 0.0, max: 0.3, scale: linear, round: 4} + knn_text_exponent: {min: 0.2, max: 3.0, scale: linear, round: 4} + knn_image_bias: {min: 0.0, max: 0.3, scale: linear, round: 4} + knn_image_exponent: {min: 1.0, max: 7.0, scale: linear, round: 4} + +seed_experiments: + - name: seed_fixed_anchor + description: 以 bo_012 的 5 维 knn 子项为锚点,在新固定参数下验证迁移。 + params: + knn_exponent: 4.4639 + knn_text_bias: 0.1148 + knn_text_exponent: 1.0926 + knn_image_bias: 0.0114 + knn_image_exponent: 5.2496 + - name: seed_knn_soft + description: 更平滑的全局 knn 指数,保留较强 image 子项指数。 + params: + knn_exponent: 1.2 + knn_text_bias: 0.06 + knn_text_exponent: 0.9 + knn_image_bias: 0.02 + knn_image_exponent: 5.4 + - name: seed_knn_balanced + description: 中等 knn 指数和中等子项非线性,作为稳健中心点。 + params: + knn_exponent: 2.8 + knn_text_bias: 0.12 + knn_text_exponent: 1.4 + knn_image_bias: 0.05 + knn_image_exponent: 4.2 + - name: seed_knn_high + description: 更高的全局 knn 指数,检查大集是否仍偏好更陡的 top-rank 强化。 + params: + knn_exponent: 5.6 + knn_text_bias: 0.04 + knn_text_exponent: 0.8 + knn_image_bias: 0.03 + knn_image_exponent: 5.0 + - name: seed_text_branch_heavier + description: 提高 knn_text 子项偏置和指数,观察 text/image 子项间的平衡点。 + params: + knn_exponent: 3.6 + knn_text_bias: 0.22 + knn_text_exponent: 2.2 + knn_image_bias: 0.01 + knn_image_exponent: 3.2 + - name: seed_image_branch_heavier + description: 提高 knn_image 子项偏置和指数,检查 image 通路在当前固定主参数下的上限。 + params: + knn_exponent: 3.4 + knn_text_bias: 0.03 + knn_text_exponent: 0.6 + knn_image_bias: 0.16 + knn_image_exponent: 6.2 + +optimizer: + init_random: 2 + candidate_pool_size: 96 + explore_probability: 0.14 + local_jitter_probability: 0.62 + elite_fraction: 0.3 + min_normalized_distance: 0.06 -- libgit2 0.21.2