From 87cacb1b9ccaf340ae699fa6f66edf84dca0aa11 Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 25 Mar 2026 10:58:56 +0800 Subject: [PATCH] 融合公式优化。加入意图匹配因子 --- config/config.yaml | 1 + config/loader.py | 3 +++ config/schema.py | 1 + search/rerank_client.py | 20 ++++++++++++++++++-- search/searcher.py | 2 +- tests/test_rerank_client.py | 31 +++++++++++++++++++++++++++++++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 9e10130..6cf5b49 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -118,6 +118,7 @@ query_config: style_intent: enabled: true + selected_sku_boost: 1.2 color_dictionary_path: "config/dictionaries/style_intent_color.csv" size_dictionary_path: "config/dictionaries/style_intent_size.csv" dimension_aliases: diff --git a/config/loader.py b/config/loader.py index 96ba410..f01b495 100644 --- a/config/loader.py +++ b/config/loader.py @@ -439,6 +439,9 @@ class AppConfigLoader: query_cfg.get("translation_embedding_wait_budget_ms_source_not_in_index", 200) ), style_intent_enabled=bool(style_intent_cfg.get("enabled", True)), + style_intent_selected_sku_boost=float( + style_intent_cfg.get("selected_sku_boost", 1.2) + ), style_intent_terms=style_intent_terms, style_intent_dimension_aliases=style_dimension_aliases, product_title_exclusion_enabled=bool(product_title_exclusion_cfg.get("enabled", True)), diff --git a/config/schema.py b/config/schema.py index e83d79a..2e4fabb 100644 --- a/config/schema.py +++ b/config/schema.py @@ -65,6 +65,7 @@ class QueryConfig: translation_embedding_wait_budget_ms_source_in_index: int = 80 translation_embedding_wait_budget_ms_source_not_in_index: int = 200 style_intent_enabled: bool = True + style_intent_selected_sku_boost: float = 1.2 style_intent_terms: Dict[str, List[Dict[str, List[str]]]] = field(default_factory=dict) style_intent_dimension_aliases: Dict[str, List[str]] = field(default_factory=dict) product_title_exclusion_enabled: bool = True diff --git a/search/rerank_client.py b/search/rerank_client.py index 8bd759b..09f410d 100644 --- a/search/rerank_client.py +++ b/search/rerank_client.py @@ -200,19 +200,24 @@ def _multiply_fusion_factors( knn_score: float, fusion: RerankFusionConfig, ) -> Tuple[float, float, float, float]: - """(rerank_factor, text_factor, knn_factor, fused).""" + """(rerank_factor, text_factor, knn_factor, fused_without_style_boost).""" r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent return r, t, k, r * t * k +def _has_selected_sku(hit: Dict[str, Any]) -> bool: + return bool(str(hit.get("_style_rerank_suffix") or "").strip()) + + def fuse_scores_and_resort( es_hits: List[Dict[str, Any]], rerank_scores: List[float], weight_es: float = DEFAULT_WEIGHT_ES, weight_ai: float = DEFAULT_WEIGHT_AI, fusion: Optional[RerankFusionConfig] = None, + style_intent_selected_sku_boost: float = 1.2, debug: bool = False, rerank_debug_rows: Optional[List[Dict[str, Any]]] = None, ) -> List[Dict[str, Any]]: @@ -220,7 +225,10 @@ def fuse_scores_and_resort( 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 融合形式(由 ``fusion`` 配置 bias / exponent):: - fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k + fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k * sku_boost + + 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过 + ``query.style_intent.selected_sku_boost`` 配置。 对每条 hit 会写入: - _original_score: 原始 ES 分数 @@ -252,12 +260,16 @@ def fuse_scores_and_resort( rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors( rerank_score, text_score, knn_score, f ) + sku_selected = _has_selected_sku(hit) + style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 + fused *= style_boost hit["_original_score"] = hit.get("_score") hit["_rerank_score"] = rerank_score hit["_text_score"] = text_score hit["_knn_score"] = knn_score hit["_fused_score"] = fused + hit["_style_intent_selected_sku_boost"] = style_boost if debug: hit["_text_source_score"] = text_components["source_score"] hit["_text_translation_score"] = text_components["translation_score"] @@ -285,6 +297,8 @@ def fuse_scores_and_resort( "rerank_factor": rerank_factor, "text_factor": text_factor, "knn_factor": knn_factor, + "style_intent_selected_sku": sku_selected, + "style_intent_selected_sku_boost": style_boost, "matched_queries": matched_queries, "fused_score": fused, } @@ -311,6 +325,7 @@ def run_rerank( top_n: Optional[int] = None, debug: bool = False, fusion: Optional[RerankFusionConfig] = None, + style_intent_selected_sku_boost: float = 1.2, ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: """ 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 @@ -345,6 +360,7 @@ def run_rerank( weight_es=weight_es, weight_ai=weight_ai, fusion=fusion, + style_intent_selected_sku_boost=style_intent_selected_sku_boost, debug=debug, rerank_debug_rows=rerank_debug_rows, ) diff --git a/search/searcher.py b/search/searcher.py index 6ed1f72..f5fcea7 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -594,6 +594,7 @@ class Searcher: top_n=(from_ + size), debug=debug, fusion=rc.fusion, + style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost, ) if rerank_meta is not None: @@ -1055,4 +1056,3 @@ class Searcher: except Exception as e: logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True) return None - diff --git a/tests/test_rerank_client.py b/tests/test_rerank_client.py index 683606b..736336e 100644 --- a/tests/test_rerank_client.py +++ b/tests/test_rerank_client.py @@ -118,3 +118,34 @@ def test_fuse_scores_and_resort_uses_configurable_fusion_params(): by_id = {h["_id"]: h for h in hits} assert isclose(by_id["a"]["_fused_score"], 1.0, rel_tol=1e-9) assert isclose(by_id["b"]["_fused_score"], 0.0, rel_tol=1e-9) + + +def test_fuse_scores_and_resort_boosts_hits_with_selected_sku(): + hits = [ + { + "_id": "style-selected", + "_score": 1.0, + "_style_rerank_suffix": "Blue XL", + "matched_queries": {"base_query": 1.0, "knn_query": 0.0}, + }, + { + "_id": "plain", + "_score": 1.0, + "matched_queries": {"base_query": 1.0, "knn_query": 0.0}, + }, + ] + + debug = fuse_scores_and_resort( + hits, + [1.0, 1.0], + style_intent_selected_sku_boost=1.2, + debug=True, + ) + + by_id = {h["_id"]: h for h in hits} + assert isclose(by_id["style-selected"]["_fused_score"], by_id["plain"]["_fused_score"] * 1.2, rel_tol=1e-9) + assert by_id["style-selected"]["_style_intent_selected_sku_boost"] == 1.2 + assert by_id["plain"]["_style_intent_selected_sku_boost"] == 1.0 + assert [h["_id"] for h in hits] == ["style-selected", "plain"] + assert debug[0]["style_intent_selected_sku"] is True + assert debug[0]["style_intent_selected_sku_boost"] == 1.2 -- libgit2 0.21.2