Commit 87cacb1b9ccaf340ae699fa6f66edf84dca0aa11
1 parent
837d5d76
融合公式优化。加入意图匹配因子
Showing
6 changed files
with
55 additions
and
3 deletions
Show diff stats
config/config.yaml
| ... | ... | @@ -118,6 +118,7 @@ query_config: |
| 118 | 118 | |
| 119 | 119 | style_intent: |
| 120 | 120 | enabled: true |
| 121 | + selected_sku_boost: 1.2 | |
| 121 | 122 | color_dictionary_path: "config/dictionaries/style_intent_color.csv" |
| 122 | 123 | size_dictionary_path: "config/dictionaries/style_intent_size.csv" |
| 123 | 124 | dimension_aliases: | ... | ... |
config/loader.py
| ... | ... | @@ -439,6 +439,9 @@ class AppConfigLoader: |
| 439 | 439 | query_cfg.get("translation_embedding_wait_budget_ms_source_not_in_index", 200) |
| 440 | 440 | ), |
| 441 | 441 | style_intent_enabled=bool(style_intent_cfg.get("enabled", True)), |
| 442 | + style_intent_selected_sku_boost=float( | |
| 443 | + style_intent_cfg.get("selected_sku_boost", 1.2) | |
| 444 | + ), | |
| 442 | 445 | style_intent_terms=style_intent_terms, |
| 443 | 446 | style_intent_dimension_aliases=style_dimension_aliases, |
| 444 | 447 | product_title_exclusion_enabled=bool(product_title_exclusion_cfg.get("enabled", True)), | ... | ... |
config/schema.py
| ... | ... | @@ -65,6 +65,7 @@ class QueryConfig: |
| 65 | 65 | translation_embedding_wait_budget_ms_source_in_index: int = 80 |
| 66 | 66 | translation_embedding_wait_budget_ms_source_not_in_index: int = 200 |
| 67 | 67 | style_intent_enabled: bool = True |
| 68 | + style_intent_selected_sku_boost: float = 1.2 | |
| 68 | 69 | style_intent_terms: Dict[str, List[Dict[str, List[str]]]] = field(default_factory=dict) |
| 69 | 70 | style_intent_dimension_aliases: Dict[str, List[str]] = field(default_factory=dict) |
| 70 | 71 | product_title_exclusion_enabled: bool = True | ... | ... |
search/rerank_client.py
| ... | ... | @@ -200,19 +200,24 @@ def _multiply_fusion_factors( |
| 200 | 200 | knn_score: float, |
| 201 | 201 | fusion: RerankFusionConfig, |
| 202 | 202 | ) -> Tuple[float, float, float, float]: |
| 203 | - """(rerank_factor, text_factor, knn_factor, fused).""" | |
| 203 | + """(rerank_factor, text_factor, knn_factor, fused_without_style_boost).""" | |
| 204 | 204 | r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent |
| 205 | 205 | t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent |
| 206 | 206 | k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent |
| 207 | 207 | return r, t, k, r * t * k |
| 208 | 208 | |
| 209 | 209 | |
| 210 | +def _has_selected_sku(hit: Dict[str, Any]) -> bool: | |
| 211 | + return bool(str(hit.get("_style_rerank_suffix") or "").strip()) | |
| 212 | + | |
| 213 | + | |
| 210 | 214 | def fuse_scores_and_resort( |
| 211 | 215 | es_hits: List[Dict[str, Any]], |
| 212 | 216 | rerank_scores: List[float], |
| 213 | 217 | weight_es: float = DEFAULT_WEIGHT_ES, |
| 214 | 218 | weight_ai: float = DEFAULT_WEIGHT_AI, |
| 215 | 219 | fusion: Optional[RerankFusionConfig] = None, |
| 220 | + style_intent_selected_sku_boost: float = 1.2, | |
| 216 | 221 | debug: bool = False, |
| 217 | 222 | rerank_debug_rows: Optional[List[Dict[str, Any]]] = None, |
| 218 | 223 | ) -> List[Dict[str, Any]]: |
| ... | ... | @@ -220,7 +225,10 @@ def fuse_scores_and_resort( |
| 220 | 225 | 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 |
| 221 | 226 | |
| 222 | 227 | 融合形式(由 ``fusion`` 配置 bias / exponent):: |
| 223 | - fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k | |
| 228 | + fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k * sku_boost | |
| 229 | + | |
| 230 | + 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过 | |
| 231 | + ``query.style_intent.selected_sku_boost`` 配置。 | |
| 224 | 232 | |
| 225 | 233 | 对每条 hit 会写入: |
| 226 | 234 | - _original_score: 原始 ES 分数 |
| ... | ... | @@ -252,12 +260,16 @@ def fuse_scores_and_resort( |
| 252 | 260 | rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors( |
| 253 | 261 | rerank_score, text_score, knn_score, f |
| 254 | 262 | ) |
| 263 | + sku_selected = _has_selected_sku(hit) | |
| 264 | + style_boost = style_intent_selected_sku_boost if sku_selected else 1.0 | |
| 265 | + fused *= style_boost | |
| 255 | 266 | |
| 256 | 267 | hit["_original_score"] = hit.get("_score") |
| 257 | 268 | hit["_rerank_score"] = rerank_score |
| 258 | 269 | hit["_text_score"] = text_score |
| 259 | 270 | hit["_knn_score"] = knn_score |
| 260 | 271 | hit["_fused_score"] = fused |
| 272 | + hit["_style_intent_selected_sku_boost"] = style_boost | |
| 261 | 273 | if debug: |
| 262 | 274 | hit["_text_source_score"] = text_components["source_score"] |
| 263 | 275 | hit["_text_translation_score"] = text_components["translation_score"] |
| ... | ... | @@ -285,6 +297,8 @@ def fuse_scores_and_resort( |
| 285 | 297 | "rerank_factor": rerank_factor, |
| 286 | 298 | "text_factor": text_factor, |
| 287 | 299 | "knn_factor": knn_factor, |
| 300 | + "style_intent_selected_sku": sku_selected, | |
| 301 | + "style_intent_selected_sku_boost": style_boost, | |
| 288 | 302 | "matched_queries": matched_queries, |
| 289 | 303 | "fused_score": fused, |
| 290 | 304 | } |
| ... | ... | @@ -311,6 +325,7 @@ def run_rerank( |
| 311 | 325 | top_n: Optional[int] = None, |
| 312 | 326 | debug: bool = False, |
| 313 | 327 | fusion: Optional[RerankFusionConfig] = None, |
| 328 | + style_intent_selected_sku_boost: float = 1.2, | |
| 314 | 329 | ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: |
| 315 | 330 | """ |
| 316 | 331 | 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 |
| ... | ... | @@ -345,6 +360,7 @@ def run_rerank( |
| 345 | 360 | weight_es=weight_es, |
| 346 | 361 | weight_ai=weight_ai, |
| 347 | 362 | fusion=fusion, |
| 363 | + style_intent_selected_sku_boost=style_intent_selected_sku_boost, | |
| 348 | 364 | debug=debug, |
| 349 | 365 | rerank_debug_rows=rerank_debug_rows, |
| 350 | 366 | ) | ... | ... |
search/searcher.py
| ... | ... | @@ -594,6 +594,7 @@ class Searcher: |
| 594 | 594 | top_n=(from_ + size), |
| 595 | 595 | debug=debug, |
| 596 | 596 | fusion=rc.fusion, |
| 597 | + style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost, | |
| 597 | 598 | ) |
| 598 | 599 | |
| 599 | 600 | if rerank_meta is not None: |
| ... | ... | @@ -1055,4 +1056,3 @@ class Searcher: |
| 1055 | 1056 | except Exception as e: |
| 1056 | 1057 | logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True) |
| 1057 | 1058 | return None |
| 1058 | - | ... | ... |
tests/test_rerank_client.py
| ... | ... | @@ -118,3 +118,34 @@ def test_fuse_scores_and_resort_uses_configurable_fusion_params(): |
| 118 | 118 | by_id = {h["_id"]: h for h in hits} |
| 119 | 119 | assert isclose(by_id["a"]["_fused_score"], 1.0, rel_tol=1e-9) |
| 120 | 120 | assert isclose(by_id["b"]["_fused_score"], 0.0, rel_tol=1e-9) |
| 121 | + | |
| 122 | + | |
| 123 | +def test_fuse_scores_and_resort_boosts_hits_with_selected_sku(): | |
| 124 | + hits = [ | |
| 125 | + { | |
| 126 | + "_id": "style-selected", | |
| 127 | + "_score": 1.0, | |
| 128 | + "_style_rerank_suffix": "Blue XL", | |
| 129 | + "matched_queries": {"base_query": 1.0, "knn_query": 0.0}, | |
| 130 | + }, | |
| 131 | + { | |
| 132 | + "_id": "plain", | |
| 133 | + "_score": 1.0, | |
| 134 | + "matched_queries": {"base_query": 1.0, "knn_query": 0.0}, | |
| 135 | + }, | |
| 136 | + ] | |
| 137 | + | |
| 138 | + debug = fuse_scores_and_resort( | |
| 139 | + hits, | |
| 140 | + [1.0, 1.0], | |
| 141 | + style_intent_selected_sku_boost=1.2, | |
| 142 | + debug=True, | |
| 143 | + ) | |
| 144 | + | |
| 145 | + by_id = {h["_id"]: h for h in hits} | |
| 146 | + assert isclose(by_id["style-selected"]["_fused_score"], by_id["plain"]["_fused_score"] * 1.2, rel_tol=1e-9) | |
| 147 | + assert by_id["style-selected"]["_style_intent_selected_sku_boost"] == 1.2 | |
| 148 | + assert by_id["plain"]["_style_intent_selected_sku_boost"] == 1.0 | |
| 149 | + assert [h["_id"] for h in hits] == ["style-selected", "plain"] | |
| 150 | + assert debug[0]["style_intent_selected_sku"] is True | |
| 151 | + assert debug[0]["style_intent_selected_sku_boost"] == 1.2 | ... | ... |