融合公式优化。加入意图匹配因子

tangwang
1 parent 837d5d76
Showing 6 changed files with 55 additions and 3 deletions Show diff stats
config/config.yaml
config/loader.py
config/schema.py
search/rerank_client.py
search/searcher.py
tests/test_rerank_client.py
@@ -118,6 +118,7 @@ query_config:
  
   style_intent:
     enabled: true
+    selected_sku_boost: 1.2
     color_dictionary_path: "config/dictionaries/style_intent_color.csv"
     size_dictionary_path: "config/dictionaries/style_intent_size.csv"
     dimension_aliases:
@@ -439,6 +439,9 @@ class AppConfigLoader:
                 query_cfg.get("translation_embedding_wait_budget_ms_source_not_in_index", 200)
             ),
             style_intent_enabled=bool(style_intent_cfg.get("enabled", True)),
+            style_intent_selected_sku_boost=float(
+                style_intent_cfg.get("selected_sku_boost", 1.2)
+            ),
             style_intent_terms=style_intent_terms,
             style_intent_dimension_aliases=style_dimension_aliases,
             product_title_exclusion_enabled=bool(product_title_exclusion_cfg.get("enabled", True)),
@@ -65,6 +65,7 @@ class QueryConfig:
     translation_embedding_wait_budget_ms_source_in_index: int = 80
     translation_embedding_wait_budget_ms_source_not_in_index: int = 200
     style_intent_enabled: bool = True
+    style_intent_selected_sku_boost: float = 1.2
     style_intent_terms: Dict[str, List[Dict[str, List[str]]]] = field(default_factory=dict)
     style_intent_dimension_aliases: Dict[str, List[str]] = field(default_factory=dict)
     product_title_exclusion_enabled: bool = True
@@ -200,19 +200,24 @@ def _multiply_fusion_factors(
     knn_score: float,
     fusion: RerankFusionConfig,
 ) -> Tuple[float, float, float, float]:
-    """(rerank_factor, text_factor, knn_factor, fused)."""
+    """(rerank_factor, text_factor, knn_factor, fused_without_style_boost)."""
     r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
     t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
     k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
     return r, t, k, r * t * k
  
  
+def _has_selected_sku(hit: Dict[str, Any]) -> bool:
+    return bool(str(hit.get("_style_rerank_suffix") or "").strip())
+
+
 def fuse_scores_and_resort(
     es_hits: List[Dict[str, Any]],
     rerank_scores: List[float],
     weight_es: float = DEFAULT_WEIGHT_ES,
     weight_ai: float = DEFAULT_WEIGHT_AI,
     fusion: Optional[RerankFusionConfig] = None,
+    style_intent_selected_sku_boost: float = 1.2,
     debug: bool = False,
     rerank_debug_rows: Optional[List[Dict[str, Any]]] = None,
 ) -> List[Dict[str, Any]]:
@@ -220,7 +225,10 @@ def fuse_scores_and_resort(
     将 ES 分数与重排分数按乘法公式融合（不修改原始 _score），并按融合分数降序重排。
  
     融合形式（由 ``fusion`` 配置 bias / exponent）::
-        fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k
+        fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k * sku_boost
+
+    其中 sku_boost 仅在当前 hit 已选中 SKU 时生效，默认值为 1.2，可通过
+    ``query.style_intent.selected_sku_boost`` 配置。
  
     对每条 hit 会写入：
     - _original_score: 原始 ES 分数
@@ -252,12 +260,16 @@ def fuse_scores_and_resort(
         rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
             rerank_score, text_score, knn_score, f
         )
+        sku_selected = _has_selected_sku(hit)
+        style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
+        fused *= style_boost
  
         hit["_original_score"] = hit.get("_score")
         hit["_rerank_score"] = rerank_score
         hit["_text_score"] = text_score
         hit["_knn_score"] = knn_score
         hit["_fused_score"] = fused
+        hit["_style_intent_selected_sku_boost"] = style_boost
         if debug:
             hit["_text_source_score"] = text_components["source_score"]
             hit["_text_translation_score"] = text_components["translation_score"]
@@ -285,6 +297,8 @@ def fuse_scores_and_resort(
                 "rerank_factor": rerank_factor,
                 "text_factor": text_factor,
                 "knn_factor": knn_factor,
+                "style_intent_selected_sku": sku_selected,
+                "style_intent_selected_sku_boost": style_boost,
                 "matched_queries": matched_queries,
                 "fused_score": fused,
             }
@@ -311,6 +325,7 @@ def run_rerank(
     top_n: Optional[int] = None,
     debug: bool = False,
     fusion: Optional[RerankFusionConfig] = None,
+    style_intent_selected_sku_boost: float = 1.2,
 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
     """
     完整重排流程：从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
@@ -345,6 +360,7 @@ def run_rerank(
         weight_es=weight_es,
         weight_ai=weight_ai,
         fusion=fusion,
+        style_intent_selected_sku_boost=style_intent_selected_sku_boost,
         debug=debug,
         rerank_debug_rows=rerank_debug_rows,
     )
@@ -594,6 +594,7 @@ class Searcher:
                     top_n=(from_ + size),
                     debug=debug,
                     fusion=rc.fusion,
+                    style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
                 )
  
                 if rerank_meta is not None:
@@ -1055,4 +1056,3 @@ class Searcher:
         except Exception as e:
             logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True)
             return None
-
@@ -118,3 +118,34 @@ def test_fuse_scores_and_resort_uses_configurable_fusion_params():
     by_id = {h["_id"]: h for h in hits}
     assert isclose(by_id["a"]["_fused_score"], 1.0, rel_tol=1e-9)
     assert isclose(by_id["b"]["_fused_score"], 0.0, rel_tol=1e-9)
+
+
+def test_fuse_scores_and_resort_boosts_hits_with_selected_sku():
+    hits = [
+        {
+            "_id": "style-selected",
+            "_score": 1.0,
+            "_style_rerank_suffix": "Blue XL",
+            "matched_queries": {"base_query": 1.0, "knn_query": 0.0},
+        },
+        {
+            "_id": "plain",
+            "_score": 1.0,
+            "matched_queries": {"base_query": 1.0, "knn_query": 0.0},
+        },
+    ]
+
+    debug = fuse_scores_and_resort(
+        hits,
+        [1.0, 1.0],
+        style_intent_selected_sku_boost=1.2,
+        debug=True,
+    )
+
+    by_id = {h["_id"]: h for h in hits}
+    assert isclose(by_id["style-selected"]["_fused_score"], by_id["plain"]["_fused_score"] * 1.2, rel_tol=1e-9)
+    assert by_id["style-selected"]["_style_intent_selected_sku_boost"] == 1.2
+    assert by_id["plain"]["_style_intent_selected_sku_boost"] == 1.0
+    assert [h["_id"] for h in hits] == ["style-selected", "plain"]
+    assert debug[0]["style_intent_selected_sku"] is True
+    assert debug[0]["style_intent_selected_sku_boost"] == 1.2