From 4d000c94f48fcbe1d95fc255aa7842be03dad4d0 Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 2 Apr 2026 11:29:03 +0800 Subject: [PATCH] 融合公式调参 --- config/config.yaml | 20 ++++++++++---------- scripts/evaluation/eval_framework/prompts.py | 45 ++++++++++++++++++++++----------------------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 0b2f9c2..7a38319 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -285,7 +285,7 @@ coarse_rank: input_window: 700 output_window: 240 fusion: - es_bias: 0.1 + es_bias: 10.0 es_exponent: 0.05 text_bias: 0.1 text_exponent: 0.35 @@ -293,10 +293,10 @@ coarse_rank: # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 text_translation_weight: 1.0 knn_text_weight: 1.0 - knn_image_weight: 1.0 - knn_tie_breaker: 0.1 + knn_image_weight: 2.0 + knn_tie_breaker: 0.3 knn_bias: 0.6 - knn_exponent: 0.2 + knn_exponent: 0.4 # 精排配置(轻量 reranker) fine_rank: @@ -324,21 +324,21 @@ rerank: # max(knn_text_weight * text_knn, knn_image_weight * image_knn) # + knn_tie_breaker * 另一侧较弱信号 fusion: - es_bias: 0.1 + es_bias: 10.0 es_exponent: 0.05 - rerank_bias: 1.0e-05 + rerank_bias: 0.1 rerank_exponent: 1.15 - fine_bias: 1.0e-05 + fine_bias: 0.1 fine_exponent: 1.0 text_bias: 0.1 text_exponent: 0.25 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) text_translation_weight: 0.8 knn_text_weight: 1.0 - knn_image_weight: 1.0 - knn_tie_breaker: 0.1 + knn_image_weight: 2.0 + knn_tie_breaker: 0.3 knn_bias: 0.6 - knn_exponent: 0.2 + knn_exponent: 0.4 # 可扩展服务/provider 注册表(单一配置源) services: diff --git a/scripts/evaluation/eval_framework/prompts.py b/scripts/evaluation/eval_framework/prompts.py index bee800c..f7dea90 100644 --- a/scripts/evaluation/eval_framework/prompts.py +++ b/scripts/evaluation/eval_framework/prompts.py @@ -130,21 +130,18 @@ Typical situations: - The core product type matches, but the product clearly violates an explicit and important requirement in the query, with little or no acceptable substitutability. Typical examples: -- Query: “pants” - Product: “shoes” - → Wrong product type. -- Query: “slim-fit pants” - Product: “loose wide-leg pants” - → Clear contradiction in fit, with extremely low substitutability. -- Query: “sleeveless dress” - Product: “long-sleeve dress” - → Clear contradiction in sleeve type. -- Query: “jeans” - Product: “sweatpants” - → Different core category, with significantly different style and wearing scenario. -- Query: “boots” - Product: “sneakers” - → Different core category, different function, and different usage scenario. + +1. **Different core product type with no substitutability** +- Query: "pants", Product: "shoes" +- Query: "boots", Product: "sneakers" + +2. **Close core product type but clear conflict on key attributes** +- Query: "slim-fit pants", Product: "loose wide-leg pants" +- Query: "sleeveless dress", Product: "long-sleeve dress" +- Query: "loose-fitting hoodie", Product: "slim-fit T-shirt" + +3. **Same broad category but significant difference in style or usage scenario** +- Query: "jeans", Product: "sweatpants / dress pants" ## Judgment Principles @@ -277,16 +274,18 @@ _CLASSIFY_TEMPLATE_ZH = """你是一个服饰电商搜索系统中的相关性 - 核心商品类型匹配,但商品明显违背了查询中一个明确且重要的要求,且几乎不具备可接受的替代性。 典型情况: + +**1. 核心品类不同,且无替代性** - 查询:“裤子”,商品:“鞋子” - → 商品类型错误。 -- 查询:“修身裤”,商品:“宽松阔腿裤” - → 与版型要求明显冲突,替代性极低。 -- 查询:“无袖连衣裙”,商品:“长袖连衣裙” - → 与袖型要求明显冲突。 -- 查询:“牛仔裤”,商品:“运动裤” - → 核心品类不同(牛仔裤 vs 运动裤),风格和场景差异大。 - 查询:“靴子”,商品:“运动鞋” - → 核心品类不同,功能和适用场景差异大。 + +**2. 核心品类相近,但关键属性明显冲突** +- 查询:“紧身裤”,商品:“阔腿裤” +- 查询:“无袖连衣裙”,商品:“长袖连衣裙” +- 查询:“宽松卫衣”,商品:“修身T恤” + +**3. 核心品类同属大类,但风格、场景差异巨大** +- 查询:“牛仔裤”,商品:“运动裤 / 西裤” ## 判断原则 -- libgit2 0.21.2