Commit 87cacb1b9ccaf340ae699fa6f66edf84dca0aa11

Authored by tangwang
1 parent 837d5d76

融合公式优化。加入意图匹配因子

config/config.yaml
... ... @@ -118,6 +118,7 @@ query_config:
118 118  
119 119 style_intent:
120 120 enabled: true
  121 + selected_sku_boost: 1.2
121 122 color_dictionary_path: "config/dictionaries/style_intent_color.csv"
122 123 size_dictionary_path: "config/dictionaries/style_intent_size.csv"
123 124 dimension_aliases:
... ...
config/loader.py
... ... @@ -439,6 +439,9 @@ class AppConfigLoader:
439 439 query_cfg.get("translation_embedding_wait_budget_ms_source_not_in_index", 200)
440 440 ),
441 441 style_intent_enabled=bool(style_intent_cfg.get("enabled", True)),
  442 + style_intent_selected_sku_boost=float(
  443 + style_intent_cfg.get("selected_sku_boost", 1.2)
  444 + ),
442 445 style_intent_terms=style_intent_terms,
443 446 style_intent_dimension_aliases=style_dimension_aliases,
444 447 product_title_exclusion_enabled=bool(product_title_exclusion_cfg.get("enabled", True)),
... ...
config/schema.py
... ... @@ -65,6 +65,7 @@ class QueryConfig:
65 65 translation_embedding_wait_budget_ms_source_in_index: int = 80
66 66 translation_embedding_wait_budget_ms_source_not_in_index: int = 200
67 67 style_intent_enabled: bool = True
  68 + style_intent_selected_sku_boost: float = 1.2
68 69 style_intent_terms: Dict[str, List[Dict[str, List[str]]]] = field(default_factory=dict)
69 70 style_intent_dimension_aliases: Dict[str, List[str]] = field(default_factory=dict)
70 71 product_title_exclusion_enabled: bool = True
... ...
search/rerank_client.py
... ... @@ -200,19 +200,24 @@ def _multiply_fusion_factors(
200 200 knn_score: float,
201 201 fusion: RerankFusionConfig,
202 202 ) -> Tuple[float, float, float, float]:
203   - """(rerank_factor, text_factor, knn_factor, fused)."""
  203 + """(rerank_factor, text_factor, knn_factor, fused_without_style_boost)."""
204 204 r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
205 205 t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
206 206 k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
207 207 return r, t, k, r * t * k
208 208  
209 209  
  210 +def _has_selected_sku(hit: Dict[str, Any]) -> bool:
  211 + return bool(str(hit.get("_style_rerank_suffix") or "").strip())
  212 +
  213 +
210 214 def fuse_scores_and_resort(
211 215 es_hits: List[Dict[str, Any]],
212 216 rerank_scores: List[float],
213 217 weight_es: float = DEFAULT_WEIGHT_ES,
214 218 weight_ai: float = DEFAULT_WEIGHT_AI,
215 219 fusion: Optional[RerankFusionConfig] = None,
  220 + style_intent_selected_sku_boost: float = 1.2,
216 221 debug: bool = False,
217 222 rerank_debug_rows: Optional[List[Dict[str, Any]]] = None,
218 223 ) -> List[Dict[str, Any]]:
... ... @@ -220,7 +225,10 @@ def fuse_scores_and_resort(
220 225 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。
221 226  
222 227 融合形式(由 ``fusion`` 配置 bias / exponent)::
223   - fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k
  228 + fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k * sku_boost
  229 +
  230 + 其中 sku_boost 仅在当前 hit 已选中 SKU 时生效,默认值为 1.2,可通过
  231 + ``query.style_intent.selected_sku_boost`` 配置。
224 232  
225 233 对每条 hit 会写入:
226 234 - _original_score: 原始 ES 分数
... ... @@ -252,12 +260,16 @@ def fuse_scores_and_resort(
252 260 rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
253 261 rerank_score, text_score, knn_score, f
254 262 )
  263 + sku_selected = _has_selected_sku(hit)
  264 + style_boost = style_intent_selected_sku_boost if sku_selected else 1.0
  265 + fused *= style_boost
255 266  
256 267 hit["_original_score"] = hit.get("_score")
257 268 hit["_rerank_score"] = rerank_score
258 269 hit["_text_score"] = text_score
259 270 hit["_knn_score"] = knn_score
260 271 hit["_fused_score"] = fused
  272 + hit["_style_intent_selected_sku_boost"] = style_boost
261 273 if debug:
262 274 hit["_text_source_score"] = text_components["source_score"]
263 275 hit["_text_translation_score"] = text_components["translation_score"]
... ... @@ -285,6 +297,8 @@ def fuse_scores_and_resort(
285 297 "rerank_factor": rerank_factor,
286 298 "text_factor": text_factor,
287 299 "knn_factor": knn_factor,
  300 + "style_intent_selected_sku": sku_selected,
  301 + "style_intent_selected_sku_boost": style_boost,
288 302 "matched_queries": matched_queries,
289 303 "fused_score": fused,
290 304 }
... ... @@ -311,6 +325,7 @@ def run_rerank(
311 325 top_n: Optional[int] = None,
312 326 debug: bool = False,
313 327 fusion: Optional[RerankFusionConfig] = None,
  328 + style_intent_selected_sku_boost: float = 1.2,
314 329 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
315 330 """
316 331 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
... ... @@ -345,6 +360,7 @@ def run_rerank(
345 360 weight_es=weight_es,
346 361 weight_ai=weight_ai,
347 362 fusion=fusion,
  363 + style_intent_selected_sku_boost=style_intent_selected_sku_boost,
348 364 debug=debug,
349 365 rerank_debug_rows=rerank_debug_rows,
350 366 )
... ...
search/searcher.py
... ... @@ -594,6 +594,7 @@ class Searcher:
594 594 top_n=(from_ + size),
595 595 debug=debug,
596 596 fusion=rc.fusion,
  597 + style_intent_selected_sku_boost=self.config.query_config.style_intent_selected_sku_boost,
597 598 )
598 599  
599 600 if rerank_meta is not None:
... ... @@ -1055,4 +1056,3 @@ class Searcher:
1055 1056 except Exception as e:
1056 1057 logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True)
1057 1058 return None
1058   -
... ...
tests/test_rerank_client.py
... ... @@ -118,3 +118,34 @@ def test_fuse_scores_and_resort_uses_configurable_fusion_params():
118 118 by_id = {h["_id"]: h for h in hits}
119 119 assert isclose(by_id["a"]["_fused_score"], 1.0, rel_tol=1e-9)
120 120 assert isclose(by_id["b"]["_fused_score"], 0.0, rel_tol=1e-9)
  121 +
  122 +
  123 +def test_fuse_scores_and_resort_boosts_hits_with_selected_sku():
  124 + hits = [
  125 + {
  126 + "_id": "style-selected",
  127 + "_score": 1.0,
  128 + "_style_rerank_suffix": "Blue XL",
  129 + "matched_queries": {"base_query": 1.0, "knn_query": 0.0},
  130 + },
  131 + {
  132 + "_id": "plain",
  133 + "_score": 1.0,
  134 + "matched_queries": {"base_query": 1.0, "knn_query": 0.0},
  135 + },
  136 + ]
  137 +
  138 + debug = fuse_scores_and_resort(
  139 + hits,
  140 + [1.0, 1.0],
  141 + style_intent_selected_sku_boost=1.2,
  142 + debug=True,
  143 + )
  144 +
  145 + by_id = {h["_id"]: h for h in hits}
  146 + assert isclose(by_id["style-selected"]["_fused_score"], by_id["plain"]["_fused_score"] * 1.2, rel_tol=1e-9)
  147 + assert by_id["style-selected"]["_style_intent_selected_sku_boost"] == 1.2
  148 + assert by_id["plain"]["_style_intent_selected_sku_boost"] == 1.0
  149 + assert [h["_id"] for h in hits] == ["style-selected", "plain"]
  150 + assert debug[0]["style_intent_selected_sku"] is True
  151 + assert debug[0]["style_intent_selected_sku_boost"] == 1.2
... ...