From d3dd01d3687413795804d7c9164f10d8aadc585d Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 22 Apr 2026 17:19:54 +0800 Subject: [PATCH] 自动寻参: - 把 batch timeout 改成“可无限长跑”: - [tune_fusion.py](/data/saas-search/scripts/evaluation/tune_fusion.py:400) - 现在 `--batch-eval-timeout-sec <= 0` 时,不再给 `subprocess.run` 设置 Python 层超时 - 新增 resilient wrapper,负责自动续跑: - [run_coarse_fusion_tuning_resilient.sh](/data/saas-search/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh) - 逻辑是:检查 `trials.jsonl` 里已完成的 live eval 数量,没到 `max_evals` 就继续 `resume-run` - 即使异常退出,也会 sleep 后自动从已有 `run_dir` 继续 - 启动/续跑脚本都切到 resilient 模式: - [start_coarse_fusion_tuning_long.sh](/data/saas-search/scripts/evaluation/start_coarse_fusion_tuning_long.sh) - [resume_coarse_fusion_tuning_long.sh](/data/saas-search/scripts/evaluation/resume_coarse_fusion_tuning_long.sh) --- artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.cmd | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.pid | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.cmd | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.pid | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.cmd | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.pid | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.cmd | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.pid | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.cmd | 1 + artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.pid | 1 + config/config-with-reranker.yaml | 3 ++- config/config.yaml | 93 +++++++++++---------------------------------------------------------------------------------- docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ docs/issues/issue-2026-04-16-数据集扩增&bayes寻参-TODO.md | 64 +++++----------------------------------------------------------- scripts/evaluation/resume_coarse_fusion_tuning_long.sh | 53 ++++++++++++++++++++++++++++++++++------------------- scripts/evaluation/run_coarse_fusion_tuning_resilient.sh | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/evaluation/start_coarse_fusion_tuning_long.sh | 61 +++++++++++++++++++++++++++++++++++++++---------------------- scripts/evaluation/tune_fusion.py | 6 +++++- scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 19 files changed, 473 insertions(+), 184 deletions(-) create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.cmd create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.pid create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.cmd create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.pid create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.cmd create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.pid create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.cmd create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.pid create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.cmd create mode 100644 artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.pid create mode 100644 docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md create mode 100755 scripts/evaluation/run_coarse_fusion_tuning_resilient.sh create mode 100644 scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.cmd b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.cmd new file mode 100644 index 0000000..7e05c09 --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.cmd @@ -0,0 +1 @@ +python scripts/evaluation/tune_fusion.py --mode optimize --run-name coarse_fusion_clothing_top771_20260422T023815Z --search-space scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml --seed-report artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md --tenant-id 163 --dataset-id clothing_top771 --queries-file scripts/evaluation/queries/queries.txt --top-k 100 --language en --search-base-url http://127.0.0.1:6002 --eval-web-base-url http://127.0.0.1:6010 --max-evals 18 --batch-size 2 --candidate-pool-size 160 --random-seed 20260422 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.pid b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.pid new file mode 100644 index 0000000..939f80f --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023815Z.pid @@ -0,0 +1 @@ +3843738 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.cmd b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.cmd new file mode 100644 index 0000000..b85b39b --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.cmd @@ -0,0 +1 @@ +python scripts/evaluation/tune_fusion.py --mode optimize --run-name coarse_fusion_clothing_top771_20260422T023951Z --search-space scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml --seed-report artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md --tenant-id 163 --dataset-id clothing_top771 --queries-file scripts/evaluation/queries/queries.txt --top-k 100 --language en --search-base-url http://127.0.0.1:6002 --eval-web-base-url http://127.0.0.1:6010 --max-evals 18 --batch-size 2 --candidate-pool-size 160 --random-seed 20260422 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.pid b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.pid new file mode 100644 index 0000000..948b194 --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_20260422T023951Z.pid @@ -0,0 +1 @@ +3845416 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.cmd b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.cmd new file mode 100644 index 0000000..fe6ec8c --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.cmd @@ -0,0 +1 @@ +python scripts/evaluation/tune_fusion.py --mode optimize --run-name coarse_fusion_clothing_top771_dryrun --search-space scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml --seed-report artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T021002Z_5426bba1a6/report.md --tenant-id 163 --dataset-id clothing_top771 --queries-file scripts/evaluation/queries/queries.txt --top-k 100 --language en --search-base-url http://127.0.0.1:6002 --eval-web-base-url http://127.0.0.1:6010 --max-evals 18 --batch-size 2 --candidate-pool-size 160 --random-seed 20260422 --help diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.pid b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.pid new file mode 100644 index 0000000..eacf9d1 --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun.pid @@ -0,0 +1 @@ +3842050 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.cmd b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.cmd new file mode 100644 index 0000000..723333d --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.cmd @@ -0,0 +1 @@ +python scripts/evaluation/tune_fusion.py --mode optimize --run-name coarse_fusion_clothing_top771_dryrun2 --search-space scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml --seed-report artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md --tenant-id 163 --dataset-id clothing_top771 --queries-file scripts/evaluation/queries/queries.txt --top-k 100 --language en --search-base-url http://127.0.0.1:6002 --eval-web-base-url http://127.0.0.1:6010 --max-evals 18 --batch-size 2 --candidate-pool-size 160 --random-seed 20260422 --help diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.pid b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.pid new file mode 100644 index 0000000..828ac9b --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_dryrun2.pid @@ -0,0 +1 @@ +3843512 diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.cmd b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.cmd new file mode 100644 index 0000000..5191fda --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.cmd @@ -0,0 +1 @@ +bash scripts/evaluation/run_coarse_fusion_tuning_resilient.sh coarse_fusion_clothing_top771_resilient_dryrun clothing_top771 18 2 160 20260422 scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md --help diff --git a/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.pid b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.pid new file mode 100644 index 0000000..44b45fe --- /dev/null +++ b/artifacts/search_evaluation/tuning_launches/coarse_fusion_clothing_top771_resilient_dryrun.pid @@ -0,0 +1 @@ +4126011 diff --git a/config/config-with-reranker.yaml b/config/config-with-reranker.yaml index eb7e182..d07d082 100644 --- a/config/config-with-reranker.yaml +++ b/config/config-with-reranker.yaml @@ -260,6 +260,7 @@ function_score: score_mode: sum boost_mode: multiply functions: [] + coarse_rank: enabled: true input_window: 480 @@ -271,7 +272,7 @@ coarse_rank: text_exponent: 0.35 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 - text_translation_weight: 0.8 + text_translation_weight: 1.0 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 diff --git a/config/config.yaml b/config/config.yaml index 616624c..e280f2b 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -100,11 +100,8 @@ es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: 30s - -# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} field_boosts: title: 3.0 - # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 qanchors: 1.0 enriched_tags: 1.0 enriched_attributes.value: 1.5 @@ -118,7 +115,6 @@ field_boosts: brief: 1.0 description: 1.0 vendor: 1.0 - query_config: supported_languages: - zh @@ -126,16 +122,12 @@ query_config: default_language: en enable_text_embedding: true enable_query_rewrite: true - - zh_to_en_model: nllb-200-distilled-600m # nllb-200-distilled-600m deepl opus-mt-zh-en / opus-mt-en-zh + zh_to_en_model: nllb-200-distilled-600m en_to_zh_model: nllb-200-distilled-600m default_translation_model: nllb-200-distilled-600m - # 源语种不在 index_languages时翻译质量比较重要,因此单独配置 zh_to_en_model__source_not_in_index: deepl en_to_zh_model__source_not_in_index: deepl default_translation_model__source_not_in_index: deepl - - # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒) translation_embedding_wait_budget_ms_source_in_index: 300 translation_embedding_wait_budget_ms_source_not_in_index: 400 style_intent: @@ -165,31 +157,22 @@ query_config: enabled: true dictionary_path: config/dictionaries/product_title_exclusion.tsv search_fields: - # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} multilingual_fields: - title - keywords - qanchors - enriched_tags - enriched_attributes.value - # - enriched_taxonomy_attributes.value - option1_values - option2_values - option3_values - category_path - category_name_text - # - brief - # - description - # - vendor - # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values - shared_fields: null core_multilingual_fields: - title - qanchors - category_name_text - - # 文本召回(主查询 + 翻译查询) text_query_strategy: base_minimum_should_match: 60% translation_minimum_should_match: 60% @@ -206,8 +189,6 @@ query_config: phrase_match_boost: 3.0 text_embedding_field: title_embedding image_embedding_field: image_embedding.vector - - # null表示返回所有字段,[]表示不返回任何字段 source_fields: - spu_id - handle @@ -223,13 +204,8 @@ query_config: - category1_name - category2_name - category3_name - # - tags - # - keywords - # - qanchors - # - enriched_tags - enriched_attributes - enriched_taxonomy_attributes - - min_price - compare_at_price - image_url @@ -245,17 +221,14 @@ query_config: - option3_values - specifications - skus - - # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) knn_text_boost: 4 knn_image_boost: 4 knn_text_k: 160 - knn_text_num_candidates: 560 # k * 3.4 + knn_text_num_candidates: 560 knn_text_k_long: 400 knn_text_num_candidates_long: 1200 knn_image_k: 400 knn_image_num_candidates: 1200 - function_score: score_mode: sum boost_mode: multiply @@ -269,20 +242,18 @@ coarse_rank: es_exponent: 0.05 text_bias: 0.1 text_exponent: 0.35 - # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) - # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 text_translation_weight: 1.0 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 - knn_bias: 0.2 - knn_exponent: 5.6 + knn_bias: 0.6 + knn_exponent: 0.4 knn_text_bias: 0.2 knn_text_exponent: 0.0 knn_image_bias: 0.2 knn_image_exponent: 0.0 fine_rank: - enabled: false # false 时保序透传 + enabled: false input_window: 160 output_window: 80 timeout_sec: 10.0 @@ -290,7 +261,7 @@ fine_rank: rerank_doc_template: '{title}' service_profile: fine rerank: - enabled: false # false 时保序透传 + enabled: false rerank_window: 160 exact_knn_rescore_enabled: true exact_knn_rescore_window: 160 @@ -300,10 +271,6 @@ rerank: rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: default - # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) - # 其中 knn_score 先做一层 dis_max: - # max(knn_text_weight * text_knn, knn_image_weight * image_knn) - # + knn_tie_breaker * 另一侧较弱信号 fusion: es_bias: 10.0 es_exponent: 0.05 @@ -312,7 +279,6 @@ rerank: fine_bias: 0.1 fine_exponent: 1.0 text_bias: 0.1 - # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) text_exponent: 0.25 text_translation_weight: 0.8 knn_text_weight: 1.0 @@ -320,7 +286,6 @@ rerank: knn_tie_breaker: 0.3 knn_bias: 0.0 knn_exponent: 5.6 - services: translation: service_url: http://127.0.0.1:6006 @@ -330,9 +295,6 @@ services: cache: ttl_seconds: 62208000 sliding_expiration: true - # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups) - # Higher tier = better quality. Multiple models may share one tier (同级). - # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). enable_model_quality_tier_cache: true model_quality_tiers: deepl: 30 @@ -443,10 +405,7 @@ services: device: cuda batch_size: 32 normalize_embeddings: true - # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name) - # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中 - # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。 - image_backend: clip_as_service # clip_as_service | local_cnclip + image_backend: clip_as_service image_backends: clip_as_service: server: grpc://127.0.0.1:51000 @@ -472,7 +431,6 @@ services: request: max_docs: 1000 normalize: true - # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 default_instance: default instances: default: @@ -515,31 +473,11 @@ services: enforce_eager: false infer_batch_size: 100 sort_by_doc_length: true - - # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) - instruction_format: standard # compact standard - # instruction: "Given a query, score the product for relevance" - # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 - # instruction: "rank products by given query, category match first" - # instruction: "Rank products by query relevance, prioritizing category match" - # instruction: "Rank products by query relevance, prioritizing category and style match" - # instruction: "Rank by query relevance, prioritize category & style" - # instruction: "Relevance ranking: category & style match first" - # instruction: "Score product relevance by query with category & style match prioritized" - # instruction: "Rank products by query with category & style match prioritized" - # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" + instruction_format: standard instruction: rank products by given query - - # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score - # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 qwen3_vllm_score: model_name: Qwen/Qwen3-Reranker-0.6B - # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false use_original_qwen3_hf_overrides: true - # vllm_runner: "auto" - # vllm_convert: "auto" - # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 - # hf_overrides: {} engine: vllm max_model_len: 172 tensor_parallel_size: 1 @@ -549,10 +487,7 @@ services: enforce_eager: false infer_batch_size: 80 sort_by_doc_length: true - # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 - instruction_format: standard # compact standard - # instruction: "Rank products by query with category & style match prioritized" - # instruction: "Given a shopping query, rank products by relevance" + instruction_format: standard instruction: Rank products by query with category & style match prioritized qwen3_transformers: model_name: Qwen/Qwen3-Reranker-0.6B @@ -620,25 +555,19 @@ services: endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks api_key_env: RERANK_DASHSCOPE_API_KEY_CN timeout_sec: 10.0 - top_n_cap: 0 # 0 表示 top_n=当前请求文档数 - batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) + top_n_cap: 0 + batchsize: 64 instruct: Given a shopping query, rank product titles by relevance max_retries: 2 retry_backoff_sec: 0.2 - spu_config: enabled: true spu_field: spu_id inner_hits_size: 10 - # 配置哪些option维度参与检索(进索引、以及在线搜索) - # 格式为list,选择option1/option2/option3中的一个或多个 searchable_option_dimensions: - option1 - option2 - option3 - -# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) -# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 tenant_config: default: primary_language: en diff --git a/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md b/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md new file mode 100644 index 0000000..a55c958 --- /dev/null +++ b/docs/issues/issue-2026-04-16-bayes寻参-clothing_top771数据集上寻参.md @@ -0,0 +1,89 @@ +Prompt - 1 + +二、在大标注集上寻参 + +我以前经过过一轮调参,是基于54个评测样本(queries.txt),过程中发现的最优的参数是这一组: +0.641241 {'es_bias': '7.214', 'es_exponent': '0.2025', 'text_bias': '4.0', 'text_exponent': '1.584', 'text_translation_weight': '1.4441', 'knn_text_weight': '0.1', 'knn_image_weight': '5.6232', 'knn_tie_breaker': + '0.021', 'knn_bias': '0.0019', 'knn_exponent': '11.8477', 'knn_text_bias': '2.3125', 'knn_text_exponent': '1.1547', 'knn_image_bias': '0.9641', 'knn_image_exponent': '5.8671'} + +这一组参数分布比较极端,text_bias太大(文本项得分事0~1的,加上4被稀释的很大),图片的exponent太大,不过在这个数据集上面确实是最好的,我觉得有过拟合的可能,因此要扩大数据集,先扩展标注集,然后使用扩展的标注集,继续进行寻参。 + +因此新建了一个标注集合,标注任务也已经完成:Clothing Filtered 771。请启动寻参任务,把任务跑起来,以后等程序跑完了应该能拿到寻参结果,下次你可以结合程序执行的结果进行结论分析。 + +至于调参方式,请参考以前的一轮调参: +我当时的调参需求: + +请对coarse_rank fusion公式进行调参: + 目前的baseline是这一组,Primary_Metric_Score: 0.637642: + coarse_rank: + ... + fusion: + es_bias: 10.0 + es_exponent: 0.05 + text_bias: 0.1 + text_exponent: 0.35 + text_translation_weight: 1.0 + knn_text_weight: 1.0 + knn_image_weight: 2.0 + knn_tie_breaker: 0.3 + knn_bias: 0.2 + knn_exponent: 5.6 + knn_text_bias: 0.2 + knn_text_exponent: 0.0 + knn_image_bias: 0.2 + knn_image_exponent: 0.0 + 评估指标在/data/saas-search/artifacts/search_evaluation/batch_reports/batch_20260415T150754Z_00b6a8aa3d.md + 请以这个为基准,发散思维,进行宽一点的范围调参。因为每次重启、评测都需要几分钟,请写一个调参的框架,基于框架、设定好多组参数、写好脚本,每轮收集效果自动的调整参数分布(因为笛卡尔积、多种参数调参成本太高,因此考虑贝叶斯调参等方法通 + 过多轮迭代通过脚本自动收敛) + 每次调整参数后需要重启backend (有时候重启backend后eval-web好像也挂了,如果有可以追查原因并修复) + ./restart.sh backend + 注意:请你调试脚本、进行一轮分析,最终要沉淀一套调参脚本,我下次可以重新跑(还是针对这组参数调参),能自动迭代(调整参数分布)、收集每组参数的指标、调优到最优的参数组合。 + + + +你当时给出的调参脚本( “种子实验 + 随机探索 + 基于历史最优分布的自适应采样”反复迭代。搜索范围已经放宽,新增了多组偏离 baseline 很远的 seed,也保留了 baseline 附近抖动,适合长时间慢慢收敛。): + + 长时间自动调参脚本已经配好,核心文件是: + + - 调参主程序:scripts/evaluation/tune_fusion.py + - 宽范围搜索空间:scripts/evaluation/tuning/coarse_rank_fusion_space.yaml + - 后台长跑启动脚本:scripts/evaluation/start_coarse_fusion_tuning_long.sh + - 续跑脚本:scripts/evaluation/resume_coarse_fusion_tuning_long.sh + - 使用说明:scripts/evaluation/tuning/README.md + + 直接启动一轮: + + ./scripts/evaluation/start_coarse_fusion_tuning_long.sh + + 如果想跑更久一点: + + MAX_EVALS=48 BATCH_SIZE=3 CANDIDATE_POOL_SIZE=512 RUN_NAME=coarse_fusion_long_001 \ + ./scripts/evaluation/start_coarse_fusion_tuning_long.sh + + 看日志: + + tail -f artifacts/search_evaluation/tuning_launches/.log + + 看结果目录: + + ls artifacts/search_evaluation/tuning_runs// + + 续跑: + + ./scripts/evaluation/resume_coarse_fusion_tuning_long.sh + + 每轮会自动做这些事: + + - 生成一批候选 coarse_rank.fusion 参数 + - 写入配置并重启 backend + - 检查 eval-web,必要时自愈拉起 + - 跑评测并收集 Primary_Metric_Score + - 更新 trials.jsonl、leaderboard.csv、summary.md + - 基于历史结果调整下一轮采样分布 + +上面只是历史的寻参任务的参考。现在请你使用新的数据集来进行寻参。 +要注意的是这个数据集比较大,因此每一轮行参任务耗时会比较长,进行大范围的精细的搜索不太可取。考虑仔细分析上次寻参的结果,在已有成果上,进行精细搜索,如果已有成果还不太够,可以在小数据集上进行粗搜,然后再到大数据集上寻参。 + + + +Response - 1 diff --git a/docs/issues/issue-2026-04-16-数据集扩增&bayes寻参-TODO.md b/docs/issues/issue-2026-04-16-数据集扩增&bayes寻参-TODO.md index 0630e64..a61ea42 100644 --- a/docs/issues/issue-2026-04-16-数据集扩增&bayes寻参-TODO.md +++ b/docs/issues/issue-2026-04-16-数据集扩增&bayes寻参-TODO.md @@ -377,10 +377,9 @@ CLI / 启动脚本设计 这一组参数分布比较极端,text_bias太大(文本项得分事0~1的,加上4被稀释的很大),图片的exponent太大,不过在这个数据集上面确实是最好的,我觉得有过拟合的可能,因此要扩大数据集,先扩展标注集,然后使用扩展的标注集,继续进行寻参。 -我已经新建了一个标注集合,请启动寻参任务,把任务跑起来,以后等程序跑完了应该能拿到寻参结果,下次你可以结合程序执行的结果进行结论分析。 +因此新建了一个标注集合,标注任务也已经完成:Clothing Filtered 771。请启动寻参任务,把任务跑起来,以后等程序跑完了应该能拿到寻参结果,下次你可以结合程序执行的结果进行结论分析。 - -以前的一轮调参: +至于调参方式,请参考以前的一轮调参: 我当时的调参需求: 请对coarse_rank fusion公式进行调参: @@ -411,7 +410,7 @@ CLI / 启动脚本设计 -你给出的调参脚本: +你当时给出的调参脚本( “种子实验 + 随机探索 + 基于历史最优分布的自适应采样”反复迭代。搜索范围已经放宽,新增了多组偏离 baseline 很远的 seed,也保留了 baseline 附近抖动,适合长时间慢慢收敛。): 长时间自动调参脚本已经配好,核心文件是: @@ -421,8 +420,6 @@ CLI / 启动脚本设计 - 续跑脚本:scripts/evaluation/resume_coarse_fusion_tuning_long.sh - 使用说明:scripts/evaluation/tuning/README.md - 这套方案不是笛卡尔积穷举,而是“种子实验 + 随机探索 + 基于历史最优分布的自适应采样”反复迭代。搜索范围已经放宽,新增了多组偏离 baseline 很远的 seed,也保留了 baseline 附近抖动,适合长时间慢慢收敛。 - 直接启动一轮: ./scripts/evaluation/start_coarse_fusion_tuning_long.sh @@ -453,56 +450,5 @@ CLI / 启动脚本设计 - 更新 trials.jsonl、leaderboard.csv、summary.md - 基于历史结果调整下一轮采样分布 - 你先跑一轮长任务。跑完后把 run_name 或 scripts/evaluation/tuning/README.md 里对应实验目录发我,我再基于结果做下一步分析和收缩搜索空间。 - - - - -› 我运行了:MAX_EVALS=400 BATCH_SIZE=3 CANDIDATE_POOL_SIZE=512 RUN_NAME=coarse_fusion_long_001 ./scripts/evaluation/start_coarse_fusion_tuning_long.sh - - run_name=coarse_fusion_long_001 - pid=2218620 - log=artifacts/search_evaluation/tuning_launches/coarse_fusion_long_001.log - pid_file=artifacts/search_evaluation/tuning_launches/coarse_fusion_long_001.pid - cmd_file=artifacts/search_evaluation/tuning_launches/coarse_fusion_long_001.cmd - run_dir=artifacts/search_evaluation/tuning_runs/coarse_fusion_long_001 - - tail -f artifacts/search_evaluation/tuning_launches/coarse_fusion_long_001.log - cat artifacts/search_evaluation/tuning_runs/coarse_fusion_long_001/leaderboard.csv - - -• 我先看这次长跑的日志和当前排行榜,确认任务是否正常推进、是否有异常中断,以及目前最优参数落在哪个方向。 - -• Explored - └ Read coarse_fusion_long_001.log, leaderboard.csv - List coarse_fusion_long_001 - Read summary.md - -─── - -• 结果已经足够看出明显趋势了。我再补一层聚合分析,看看 top 区间里哪些参数方向反复出现,避免只盯着单个最优点。 - -• Ran ps -p 2218620 -o pid=,etimes=,cmd= - └ (no output) - -• Ran python - <<'PY' - │ import csv, statistics, math - │ from pathlib import Path - │ … +15 lines - └ rows 385 - - … +61 lines (ctrl + t to view transcript) - knn_image_bias: mean=0.5631 median=0.2000 min=0.0010 max=4.0000 - knn_image_exponent: mean=4.6316 median=5.8680 min=0.0000 max=6.0000 - -• Ran python - <<'PY' - │ import csv - │ from pathlib import Path - │ … +19 lines - └ baseline_score 0.637642 - - … +30 lines (ctrl + t to view transcript) - 10 bo_234 0.641241 {'es_bias': '7.214', 'es_exponent': '0.2025', 'text_bias': '4.0', 'text_exponent': '1.584', 'text_translation_weight': '1.4441', 'knn_text_weight': '0.1', 'knn_image_weight': '5.6232', 'knn_tie_breaker': - '0.021', 'knn_bias': '0.0019', 'knn_exponent': '11.8477', 'knn_text_bias': '2.3125', 'knn_text_exponent': '1.1547', 'knn_image_bias': '0.9641', 'knn_image_exponent': '5.8671'} - -这一次因为外部原因(磁盘满)终止了,以上是最好的一组参数。 +上面只是历史的寻参任务的参考。现在请你使用新的数据集来进行寻参。 +要注意的是这个数据集比较大,因此每一轮行参任务耗时会比较长,进行大范围的精细的搜索不太可取。考虑仔细分析上次寻参的结果,在已有成果上,进行精细搜索,如果已有成果还不太够,可以在小数据集上进行粗搜,然后再到大数据集上寻参。 \ No newline at end of file diff --git a/scripts/evaluation/resume_coarse_fusion_tuning_long.sh b/scripts/evaluation/resume_coarse_fusion_tuning_long.sh index 5318d80..19ea59d 100755 --- a/scripts/evaluation/resume_coarse_fusion_tuning_long.sh +++ b/scripts/evaluation/resume_coarse_fusion_tuning_long.sh @@ -26,10 +26,28 @@ if [ ! -d "${RUN_DIR}" ]; then exit 1 fi -MAX_EVALS="${MAX_EVALS:-36}" -BATCH_SIZE="${BATCH_SIZE:-3}" -CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-512}" DATASET_ID="${REPO_EVAL_DATASET_ID:-core_queries}" +case "${DATASET_ID}" in + clothing_top771) + DEFAULT_SEED_REPORT="artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md" + DEFAULT_MAX_EVALS="18" + DEFAULT_BATCH_SIZE="2" + DEFAULT_CANDIDATE_POOL_SIZE="160" + ;; + *) + DEFAULT_SEED_REPORT="artifacts/search_evaluation/batch_reports/batch_20260415T150754Z_00b6a8aa3d.md" + DEFAULT_MAX_EVALS="36" + DEFAULT_BATCH_SIZE="3" + DEFAULT_CANDIDATE_POOL_SIZE="512" + ;; +esac + +SEED_REPORT="${SEED_REPORT:-${DEFAULT_SEED_REPORT}}" +MAX_EVALS="${MAX_EVALS:-${DEFAULT_MAX_EVALS}}" +BATCH_SIZE="${BATCH_SIZE:-${DEFAULT_BATCH_SIZE}}" +CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-${DEFAULT_CANDIDATE_POOL_SIZE}}" +BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}" +RANDOM_SEED="${RANDOM_SEED:-20260422}" LAUNCH_DIR="artifacts/search_evaluation/tuning_launches" mkdir -p "${LAUNCH_DIR}" @@ -38,28 +56,25 @@ PID_PATH="${LAUNCH_DIR}/${RUN_NAME}.resume.pid" CMD_PATH="${LAUNCH_DIR}/${RUN_NAME}.resume.cmd" CMD=( - python - scripts/evaluation/tune_fusion.py - --mode optimize - --resume-run "${RUN_DIR}" - --search-space "${RUN_DIR}/search_space.yaml" - --seed-report artifacts/search_evaluation/batch_reports/batch_20260415T150754Z_00b6a8aa3d.md - --tenant-id 163 - --dataset-id "${DATASET_ID}" - --queries-file scripts/evaluation/queries/queries.txt - --top-k 100 - --language en - --search-base-url http://127.0.0.1:6002 - --eval-web-base-url http://127.0.0.1:6010 - --max-evals "${MAX_EVALS}" - --batch-size "${BATCH_SIZE}" - --candidate-pool-size "${CANDIDATE_POOL_SIZE}" + bash + scripts/evaluation/run_coarse_fusion_tuning_resilient.sh + "${RUN_NAME}" + "${DATASET_ID}" + "${MAX_EVALS}" + "${BATCH_SIZE}" + "${CANDIDATE_POOL_SIZE}" + "${RANDOM_SEED}" + "${RUN_DIR}/search_space.yaml" + "${SEED_REPORT}" + "${RUN_DIR}" ) if [ "$#" -gt 0 ]; then CMD+=("$@") fi +export BATCH_EVAL_TIMEOUT_SEC + printf '%q ' "${CMD[@]}" > "${CMD_PATH}" printf '\n' >> "${CMD_PATH}" diff --git a/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh b/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh new file mode 100755 index 0000000..406face --- /dev/null +++ b/scripts/evaluation/run_coarse_fusion_tuning_resilient.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +set -euo pipefail + +cd "$(dirname "$0")/../.." +source ./activate.sh + +usage() { + echo "usage: $0 [resume_run_dir]" >&2 + exit 1 +} + +if [ "$#" -lt 8 ]; then + usage +fi + +RUN_NAME="$1" +DATASET_ID="$2" +MAX_EVALS="$3" +BATCH_SIZE="$4" +CANDIDATE_POOL_SIZE="$5" +RANDOM_SEED="$6" +SEARCH_SPACE="$7" +SEED_REPORT="$8" +RESUME_RUN_DIR="${9:-}" + +BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}" +RESTART_SLEEP_SEC="${RESTART_SLEEP_SEC:-30}" +SEARCH_BASE_URL="${SEARCH_BASE_URL:-http://127.0.0.1:6002}" +EVAL_WEB_BASE_URL="${EVAL_WEB_BASE_URL:-http://127.0.0.1:6010}" +RUN_DIR="artifacts/search_evaluation/tuning_runs/${RUN_NAME}" + +mkdir -p "$(dirname "$RUN_DIR")" + +count_live_successes() { + python3 - "$RUN_DIR" <<'PY' +import json +import sys +from pathlib import Path + +run_dir = Path(sys.argv[1]) +path = run_dir / "trials.jsonl" +count = 0 +if path.is_file(): + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + obj = json.loads(line) + if obj.get("status") == "ok" and not obj.get("is_seed"): + count += 1 +print(count) +PY +} + +build_cmd() { + local cmd=( + python + scripts/evaluation/tune_fusion.py + --mode optimize + --search-space "$SEARCH_SPACE" + --seed-report "$SEED_REPORT" + --tenant-id 163 + --dataset-id "$DATASET_ID" + --queries-file scripts/evaluation/queries/queries.txt + --top-k 100 + --language en + --search-base-url "$SEARCH_BASE_URL" + --eval-web-base-url "$EVAL_WEB_BASE_URL" + --max-evals "$MAX_EVALS" + --batch-size "$BATCH_SIZE" + --candidate-pool-size "$CANDIDATE_POOL_SIZE" + --random-seed "$RANDOM_SEED" + --batch-eval-timeout-sec "$BATCH_EVAL_TIMEOUT_SEC" + ) + if [ -n "$RESUME_RUN_DIR" ]; then + cmd+=(--resume-run "$RESUME_RUN_DIR") + else + cmd+=(--run-name "$RUN_NAME") + fi + printf '%q ' "${cmd[@]}" + printf '\n' +} + +attempt=0 +while true; do + live_successes="$(count_live_successes)" + if [ "$live_successes" -ge "$MAX_EVALS" ]; then + echo "[resilient] complete run_name=$RUN_NAME live_successes=$live_successes target=$MAX_EVALS" + exit 0 + fi + + attempt=$((attempt + 1)) + if [ -d "$RUN_DIR" ]; then + RESUME_RUN_DIR="$RUN_DIR" + fi + + echo "[resilient] attempt=$attempt run_name=$RUN_NAME live_successes=$live_successes target=$MAX_EVALS" + CMD_STR="$(build_cmd)" + echo "[resilient] cmd=$CMD_STR" + + set +e + bash -lc "$CMD_STR" + exit_code=$? + set -e + + live_successes="$(count_live_successes)" + echo "[resilient] exit_code=$exit_code live_successes=$live_successes" + + if [ "$live_successes" -ge "$MAX_EVALS" ]; then + echo "[resilient] finished after attempt=$attempt" + exit 0 + fi + + echo "[resilient] sleeping ${RESTART_SLEEP_SEC}s before resume" + sleep "$RESTART_SLEEP_SEC" +done diff --git a/scripts/evaluation/start_coarse_fusion_tuning_long.sh b/scripts/evaluation/start_coarse_fusion_tuning_long.sh index fdcb886..c1cbfb7 100755 --- a/scripts/evaluation/start_coarse_fusion_tuning_long.sh +++ b/scripts/evaluation/start_coarse_fusion_tuning_long.sh @@ -5,12 +5,34 @@ set -euo pipefail cd "$(dirname "$0")/../.." source ./activate.sh -RUN_NAME="${RUN_NAME:-coarse_fusion_long_$(date -u +%Y%m%dT%H%M%SZ)}" -MAX_EVALS="${MAX_EVALS:-36}" -BATCH_SIZE="${BATCH_SIZE:-3}" -CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-512}" -RANDOM_SEED="${RANDOM_SEED:-20260416}" DATASET_ID="${REPO_EVAL_DATASET_ID:-core_queries}" +case "${DATASET_ID}" in + clothing_top771) + DEFAULT_SEARCH_SPACE="scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml" + DEFAULT_SEED_REPORT="artifacts/search_evaluation/datasets/clothing_top771/batch_reports/batch_20260422T014610Z_5426bba1a6/report.md" + DEFAULT_MAX_EVALS="18" + DEFAULT_BATCH_SIZE="2" + DEFAULT_CANDIDATE_POOL_SIZE="160" + DEFAULT_RANDOM_SEED="20260422" + ;; + *) + DEFAULT_SEARCH_SPACE="scripts/evaluation/tuning/coarse_rank_fusion_space.yaml" + DEFAULT_SEED_REPORT="artifacts/search_evaluation/batch_reports/batch_20260415T150754Z_00b6a8aa3d.md" + DEFAULT_MAX_EVALS="36" + DEFAULT_BATCH_SIZE="3" + DEFAULT_CANDIDATE_POOL_SIZE="512" + DEFAULT_RANDOM_SEED="20260416" + ;; +esac + +RUN_NAME="${RUN_NAME:-coarse_fusion_${DATASET_ID}_$(date -u +%Y%m%dT%H%M%SZ)}" +SEARCH_SPACE="${SEARCH_SPACE:-${DEFAULT_SEARCH_SPACE}}" +SEED_REPORT="${SEED_REPORT:-${DEFAULT_SEED_REPORT}}" +MAX_EVALS="${MAX_EVALS:-${DEFAULT_MAX_EVALS}}" +BATCH_SIZE="${BATCH_SIZE:-${DEFAULT_BATCH_SIZE}}" +CANDIDATE_POOL_SIZE="${CANDIDATE_POOL_SIZE:-${DEFAULT_CANDIDATE_POOL_SIZE}}" +RANDOM_SEED="${RANDOM_SEED:-${DEFAULT_RANDOM_SEED}}" +BATCH_EVAL_TIMEOUT_SEC="${BATCH_EVAL_TIMEOUT_SEC:-0}" LAUNCH_DIR="artifacts/search_evaluation/tuning_launches" mkdir -p "${LAUNCH_DIR}" @@ -19,29 +41,24 @@ PID_PATH="${LAUNCH_DIR}/${RUN_NAME}.pid" CMD_PATH="${LAUNCH_DIR}/${RUN_NAME}.cmd" CMD=( - python - scripts/evaluation/tune_fusion.py - --mode optimize - --run-name "${RUN_NAME}" - --search-space scripts/evaluation/tuning/coarse_rank_fusion_space.yaml - --seed-report artifacts/search_evaluation/batch_reports/batch_20260415T150754Z_00b6a8aa3d.md - --tenant-id 163 - --dataset-id "${DATASET_ID}" - --queries-file scripts/evaluation/queries/queries.txt - --top-k 100 - --language en - --search-base-url http://127.0.0.1:6002 - --eval-web-base-url http://127.0.0.1:6010 - --max-evals "${MAX_EVALS}" - --batch-size "${BATCH_SIZE}" - --candidate-pool-size "${CANDIDATE_POOL_SIZE}" - --random-seed "${RANDOM_SEED}" + bash + scripts/evaluation/run_coarse_fusion_tuning_resilient.sh + "${RUN_NAME}" + "${DATASET_ID}" + "${MAX_EVALS}" + "${BATCH_SIZE}" + "${CANDIDATE_POOL_SIZE}" + "${RANDOM_SEED}" + "${SEARCH_SPACE}" + "${SEED_REPORT}" ) if [ "$#" -gt 0 ]; then CMD+=("$@") fi +export BATCH_EVAL_TIMEOUT_SEC + printf '%q ' "${CMD[@]}" > "${CMD_PATH}" printf '\n' >> "${CMD_PATH}" diff --git a/scripts/evaluation/tune_fusion.py b/scripts/evaluation/tune_fusion.py index d99730f..1c1d35c 100644 --- a/scripts/evaluation/tune_fusion.py +++ b/scripts/evaluation/tune_fusion.py @@ -379,6 +379,7 @@ def run_batch_eval( top_k: int, language: str, force_refresh_labels: bool, + timeout_sec: int, ) -> Dict[str, Any]: cmd = [ str(PROJECT_ROOT / ".venv" / "bin" / "python"), @@ -397,13 +398,14 @@ def run_batch_eval( cmd.extend(["--queries-file", str(queries_file)]) if force_refresh_labels: cmd.append("--force-refresh-labels") + timeout = timeout_sec if timeout_sec and timeout_sec > 0 else None completed = subprocess.run( cmd, cwd=PROJECT_ROOT, check=True, capture_output=True, text=True, - timeout=7200, + timeout=timeout, ) output = (completed.stdout or "") + "\n" + (completed.stderr or "") batch_ids = re.findall(r"batch_id=([A-Za-z0-9_]+)", output) @@ -1221,6 +1223,7 @@ def run_optimize_mode(args: argparse.Namespace) -> None: top_k=args.top_k, language=args.language, force_refresh_labels=force_refresh_labels, + timeout_sec=args.batch_eval_timeout_sec, ) ensure_disk_headroom( min_free_gb=args.min_free_gb, @@ -1362,6 +1365,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--resume-run", default=None) parser.add_argument("--max-evals", type=int, default=12) parser.add_argument("--batch-size", type=int, default=3) + parser.add_argument("--batch-eval-timeout-sec", type=int, default=0) parser.add_argument("--init-random", type=int, default=None) parser.add_argument("--candidate-pool-size", type=int, default=None) parser.add_argument("--random-seed", type=int, default=20260415) diff --git a/scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml b/scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml new file mode 100644 index 0000000..70f60e1 --- /dev/null +++ b/scripts/evaluation/tuning/coarse_rank_fusion_space_clothing_top771.yaml @@ -0,0 +1,161 @@ +target_path: coarse_rank.fusion + +baseline: + es_bias: 10.0 + es_exponent: 0.05 + text_bias: 0.1 + text_exponent: 0.35 + text_translation_weight: 1.0 + knn_text_weight: 1.0 + knn_image_weight: 2.0 + knn_tie_breaker: 0.3 + knn_bias: 0.2 + knn_exponent: 5.6 + knn_text_bias: 0.2 + knn_text_exponent: 0.0 + knn_image_bias: 0.2 + knn_image_exponent: 0.0 + +parameters: + es_bias: {min: 2.0, max: 20.0, scale: log, round: 4} + es_exponent: {min: 0.03, max: 0.28, scale: linear, round: 4} + text_bias: {min: 0.01, max: 4.0, scale: log, round: 4} + text_exponent: {min: 0.2, max: 1.6, scale: linear, round: 4} + text_translation_weight: {min: 0.7, max: 1.8, scale: linear, round: 4} + knn_text_weight: {min: 0.05, max: 1.8, scale: linear, round: 4} + knn_image_weight: {min: 1.2, max: 6.0, scale: linear, round: 4} + knn_tie_breaker: {min: 0.0, max: 0.4, scale: linear, round: 4} + knn_bias: {min: 0.001, max: 2.5, scale: log, round: 4} + knn_exponent: {min: 0.05, max: 12.0, scale: log, round: 4} + knn_text_bias: {min: 0.001, max: 4.0, scale: log, round: 4} + knn_text_exponent: {min: 0.0, max: 2.0, scale: linear, round: 4} + knn_image_bias: {min: 0.01, max: 1.5, scale: log, round: 4} + knn_image_exponent: {min: 0.0, max: 6.0, scale: linear, round: 4} + +seed_experiments: + - name: seed_low_knn_global + description: 先验证 021002 中出现的低 knn 全局指数,去掉 reranker 后是否仍有收益。 + params: + knn_bias: 0.6 + knn_exponent: 0.4 + - name: seed_bigset_knn_soft + description: 从低 knn 全局指数出发,继续平滑 knn 非线性。 + params: + text_exponent: 0.42 + text_translation_weight: 1.05 + knn_text_weight: 0.85 + knn_image_weight: 2.4 + knn_tie_breaker: 0.18 + knn_bias: 0.9 + knn_exponent: 0.18 + knn_image_exponent: 0.2 + - name: seed_bigset_knn_mid + description: 保留平滑 knn,但让 image 通路再强一点,验证大集是否需要适度非线性。 + params: + es_bias: 8.0 + es_exponent: 0.08 + text_bias: 0.15 + text_exponent: 0.5 + text_translation_weight: 1.15 + knn_text_weight: 0.65 + knn_image_weight: 3.1 + knn_tie_breaker: 0.12 + knn_bias: 0.45 + knn_exponent: 0.85 + knn_text_bias: 0.35 + knn_text_exponent: 0.2 + knn_image_bias: 0.22 + knn_image_exponent: 0.8 + - name: seed_bigset_text_stable + description: 提高 lexical 区分度,观察大集是否更偏好稳健文本排序。 + params: + es_bias: 7.0 + es_exponent: 0.12 + text_bias: 0.25 + text_exponent: 0.72 + text_translation_weight: 1.0 + knn_text_weight: 0.55 + knn_image_weight: 2.2 + knn_tie_breaker: 0.08 + knn_bias: 0.7 + knn_exponent: 0.35 + knn_text_bias: 0.5 + knn_text_exponent: 0.4 + knn_image_bias: 0.18 + knn_image_exponent: 0.35 + - name: seed_hybrid_transfer + description: 以大集 baseline 为主,温和吸收小集历史赢家中的 image/text 强化模式。 + params: + es_bias: 7.2 + es_exponent: 0.15 + text_bias: 0.6 + text_exponent: 0.82 + text_translation_weight: 1.28 + knn_text_weight: 0.45 + knn_image_weight: 4.0 + knn_tie_breaker: 0.08 + knn_bias: 0.2 + knn_exponent: 1.2 + knn_text_bias: 0.8 + knn_text_exponent: 0.45 + knn_image_bias: 0.3 + knn_image_exponent: 1.4 + - name: seed_legacy_bo234 + description: 直接验证 53 条集历史最优在 771 条集上的迁移表现。 + params: + es_bias: 7.214 + es_exponent: 0.2025 + text_bias: 4.0 + text_exponent: 1.584 + text_translation_weight: 1.4441 + knn_text_weight: 0.1 + knn_image_weight: 5.6232 + knn_tie_breaker: 0.021 + knn_bias: 0.0019 + knn_exponent: 11.8477 + knn_text_bias: 2.3125 + knn_text_exponent: 1.1547 + knn_image_bias: 0.9641 + knn_image_exponent: 5.8671 + - name: seed_legacy_bo340 + description: 验证小集冠军参数在大集上是否仍有价值。 + params: + es_bias: 5.887 + es_exponent: 0.2145 + text_bias: 4.0 + text_exponent: 1.6 + text_translation_weight: 1.4788 + knn_text_weight: 0.3693 + knn_image_weight: 5.7028 + knn_tie_breaker: 0.0174 + knn_bias: 0.0016 + knn_exponent: 12.0 + knn_text_bias: 2.6071 + knn_text_exponent: 1.0458 + knn_image_bias: 0.8282 + knn_image_exponent: 6.0 + - name: seed_image_guard + description: 控制 image 权重但允许 image 子项指数,检查 recall 与 precision 的平衡点。 + params: + es_bias: 9.0 + es_exponent: 0.09 + text_bias: 0.12 + text_exponent: 0.45 + text_translation_weight: 1.1 + knn_text_weight: 0.7 + knn_image_weight: 2.8 + knn_tie_breaker: 0.1 + knn_bias: 0.55 + knn_exponent: 0.55 + knn_text_bias: 0.25 + knn_text_exponent: 0.15 + knn_image_bias: 0.28 + knn_image_exponent: 1.0 + +optimizer: + init_random: 2 + candidate_pool_size: 160 + explore_probability: 0.12 + local_jitter_probability: 0.62 + elite_fraction: 0.25 + min_normalized_distance: 0.08 -- libgit2 0.21.2