Commit 814e352b28210fc835604282a1af188d53d81d63

Authored by tangwang
1 parent 581dafae

乘法公式配置化

config/__init__.py
@@ -8,6 +8,7 @@ from config.schema import ( @@ -8,6 +8,7 @@ from config.schema import (
8 IndexConfig, 8 IndexConfig,
9 QueryConfig, 9 QueryConfig,
10 RerankConfig, 10 RerankConfig,
  11 + RerankFusionConfig,
11 SPUConfig, 12 SPUConfig,
12 SearchConfig, 13 SearchConfig,
13 ServicesConfig, 14 ServicesConfig,
@@ -36,6 +37,7 @@ __all__ = [ @@ -36,6 +37,7 @@ __all__ = [
36 "IndexConfig", 37 "IndexConfig",
37 "QueryConfig", 38 "QueryConfig",
38 "RerankConfig", 39 "RerankConfig",
  40 + "RerankFusionConfig",
39 "SPUConfig", 41 "SPUConfig",
40 "SearchConfig", 42 "SearchConfig",
41 "ServicesConfig", 43 "ServicesConfig",
config/config.yaml
@@ -219,6 +219,14 @@ rerank: @@ -219,6 +219,14 @@ rerank:
219 weight_ai: 0.6 219 weight_ai: 0.6
220 rerank_query_template: "{query}" 220 rerank_query_template: "{query}"
221 rerank_doc_template: "{title}" 221 rerank_doc_template: "{title}"
  222 + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项)
  223 + fusion:
  224 + rerank_bias: 0.00001
  225 + rerank_exponent: 1.0
  226 + text_bias: 0.1
  227 + text_exponent: 0.35
  228 + knn_bias: 0.6
  229 + knn_exponent: 0.2
222 230
223 # 可扩展服务/provider 注册表(单一配置源) 231 # 可扩展服务/provider 注册表(单一配置源)
224 services: 232 services:
@@ -37,6 +37,7 @@ from config.schema import ( @@ -37,6 +37,7 @@ from config.schema import (
37 ProductEnrichConfig, 37 ProductEnrichConfig,
38 RedisSettings, 38 RedisSettings,
39 RerankConfig, 39 RerankConfig,
  40 + RerankFusionConfig,
40 RerankServiceConfig, 41 RerankServiceConfig,
41 RuntimeConfig, 42 RuntimeConfig,
42 SearchConfig, 43 SearchConfig,
@@ -393,6 +394,7 @@ class AppConfigLoader: @@ -393,6 +394,7 @@ class AppConfigLoader:
393 394
394 function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {} 395 function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {}
395 rerank_cfg = raw.get("rerank") if isinstance(raw.get("rerank"), dict) else {} 396 rerank_cfg = raw.get("rerank") if isinstance(raw.get("rerank"), dict) else {}
  397 + fusion_raw = rerank_cfg.get("fusion") if isinstance(rerank_cfg.get("fusion"), dict) else {}
396 spu_cfg = raw.get("spu_config") if isinstance(raw.get("spu_config"), dict) else {} 398 spu_cfg = raw.get("spu_config") if isinstance(raw.get("spu_config"), dict) else {}
397 399
398 return SearchConfig( 400 return SearchConfig(
@@ -412,6 +414,14 @@ class AppConfigLoader: @@ -412,6 +414,14 @@ class AppConfigLoader:
412 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)), 414 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)),
413 rerank_query_template=str(rerank_cfg.get("rerank_query_template") or "{query}"), 415 rerank_query_template=str(rerank_cfg.get("rerank_query_template") or "{query}"),
414 rerank_doc_template=str(rerank_cfg.get("rerank_doc_template") or "{title}"), 416 rerank_doc_template=str(rerank_cfg.get("rerank_doc_template") or "{title}"),
  417 + fusion=RerankFusionConfig(
  418 + rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)),
  419 + rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)),
  420 + text_bias=float(fusion_raw.get("text_bias", 0.1)),
  421 + text_exponent=float(fusion_raw.get("text_exponent", 0.35)),
  422 + knn_bias=float(fusion_raw.get("knn_bias", 0.6)),
  423 + knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)),
  424 + ),
415 ), 425 ),
416 spu_config=SPUConfig( 426 spu_config=SPUConfig(
417 enabled=bool(spu_cfg.get("enabled", False)), 427 enabled=bool(spu_cfg.get("enabled", False)),
@@ -91,6 +91,21 @@ class FunctionScoreConfig: @@ -91,6 +91,21 @@ class FunctionScoreConfig:
91 91
92 92
93 @dataclass(frozen=True) 93 @dataclass(frozen=True)
  94 +class RerankFusionConfig:
  95 + """
  96 + Multiplicative fusion: fused = Π (max(score_i, 0) + bias_i) ** exponent_i
  97 + for rerank / text / knn terms respectively.
  98 + """
  99 +
  100 + rerank_bias: float = 0.00001
  101 + rerank_exponent: float = 1.0
  102 + text_bias: float = 0.1
  103 + text_exponent: float = 0.35
  104 + knn_bias: float = 0.6
  105 + knn_exponent: float = 0.2
  106 +
  107 +
  108 +@dataclass(frozen=True)
94 class RerankConfig: 109 class RerankConfig:
95 """Search-time rerank configuration.""" 110 """Search-time rerank configuration."""
96 111
@@ -101,6 +116,7 @@ class RerankConfig: @@ -101,6 +116,7 @@ class RerankConfig:
101 weight_ai: float = 0.6 116 weight_ai: float = 0.6
102 rerank_query_template: str = "{query}" 117 rerank_query_template: str = "{query}"
103 rerank_doc_template: str = "{title}" 118 rerank_doc_template: str = "{title}"
  119 + fusion: RerankFusionConfig = field(default_factory=RerankFusionConfig)
104 120
105 121
106 @dataclass(frozen=True) 122 @dataclass(frozen=True)
frontend/static/js/app.js
@@ -411,11 +411,6 @@ function displayResults(data) { @@ -411,11 +411,6 @@ function displayResults(data) {
411 const esNorm = typeof debug.es_score_normalized === 'number' 411 const esNorm = typeof debug.es_score_normalized === 'number'
412 ? debug.es_score_normalized.toFixed(4) 412 ? debug.es_score_normalized.toFixed(4)
413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); 413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
414 -  
415 - const esNormMinMax = typeof debug.es_score_norm === 'number'  
416 - ? debug.es_score_norm.toFixed(4)  
417 - : (debug.es_score_norm == null ? '' : String(debug.es_score_norm));  
418 -  
419 const rerankScore = typeof debug.rerank_score === 'number' 414 const rerankScore = typeof debug.rerank_score === 'number'
420 ? debug.rerank_score.toFixed(4) 415 ? debug.rerank_score.toFixed(4)
421 : (debug.rerank_score == null ? '' : String(debug.rerank_score)); 416 : (debug.rerank_score == null ? '' : String(debug.rerank_score));
@@ -437,13 +432,28 @@ function displayResults(data) { @@ -437,13 +432,28 @@ function displayResults(data) {
437 const resultJson = customStringify(result); 432 const resultJson = customStringify(result);
438 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; 433 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
439 const rerankInputHtml = debug.rerank_input 434 const rerankInputHtml = debug.rerank_input
440 - ? `<details><summary>Rerank input</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre></details>` 435 + ? `
  436 + <details>
  437 + <summary>Rerank input</summary>
  438 + <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre>
  439 + </details>
  440 + `
441 : ''; 441 : '';
442 const styleIntentHtml = debug.style_intent_sku 442 const styleIntentHtml = debug.style_intent_sku
443 - ? `<details><summary>Selected SKU</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre></details>` 443 + ? `
  444 + <details>
  445 + <summary>Selected SKU</summary>
  446 + <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre>
  447 + </details>
  448 + `
444 : ''; 449 : '';
445 const matchedQueriesHtml = debug.matched_queries 450 const matchedQueriesHtml = debug.matched_queries
446 - ? `<details><summary>matched_queries</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre></details>` 451 + ? `
  452 + <details>
  453 + <summary>matched_queries</summary>
  454 + <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre>
  455 + </details>
  456 + `
447 : ''; 457 : '';
448 458
449 debugHtml = ` 459 debugHtml = `
@@ -453,9 +463,7 @@ function displayResults(data) { @@ -453,9 +463,7 @@ function displayResults(data) {
453 <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div> 463 <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>
454 <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div> 464 <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>
455 <div class="product-debug-line">ES score: ${esScore}</div> 465 <div class="product-debug-line">ES score: ${esScore}</div>
456 - <div class="product-debug-line">ES normalized (score / initial ES max): ${esNorm}</div>  
457 - <div class="product-debug-line">ES norm (min-max over initial ES window): ${esNormMinMax}</div>  
458 - <div class="product-debug-line">ES score min/max: ${escapeHtml(String(debug.es_score_min ?? ''))} / ${escapeHtml(String(debug.es_score_max ?? ''))}</div> 466 + <div class="product-debug-line">ES normalized: ${esNorm}</div>
459 <div class="product-debug-line">Rerank score: ${rerankScore}</div> 467 <div class="product-debug-line">Rerank score: ${rerankScore}</div>
460 <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div> 468 <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>
461 <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div> 469 <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>
@@ -910,9 +918,6 @@ function displayDebugInfo(data) { @@ -910,9 +918,6 @@ function displayDebugInfo(data) {
910 html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`; 918 html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`;
911 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; 919 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`;
912 html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`; 920 html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`;
913 - if (debugInfo.query_analysis.query_vector_summary) {  
914 - html += `<div>query_vector_summary: ${escapeHtml(customStringify(debugInfo.query_analysis.query_vector_summary))}</div>`;  
915 - }  
916 921
917 if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) { 922 if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) {
918 html += '<div>translations: '; 923 html += '<div>translations: ';
@@ -946,25 +951,21 @@ function displayDebugInfo(data) { @@ -946,25 +951,21 @@ function displayDebugInfo(data) {
946 html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`; 951 html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`;
947 html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`; 952 html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`;
948 html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`; 953 html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`;
949 - html += `<div>initial_es_max_score: ${escapeHtml(String(debugInfo.es_response.initial_es_max_score ?? ''))}</div>`;  
950 - html += `<div>initial_es_min_score: ${escapeHtml(String(debugInfo.es_response.initial_es_min_score ?? ''))}</div>`; 954 + html += `<div>es_score_normalization_factor: ${escapeHtml(String(debugInfo.es_response.es_score_normalization_factor ?? ''))}</div>`;
951 html += '</div>'; 955 html += '</div>';
952 } 956 }
953 957
954 if (debugInfo.rerank) { 958 if (debugInfo.rerank) {
955 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Rerank:</strong>'; 959 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Rerank:</strong>';
956 - html += `<div>requested: ${debugInfo.rerank.requested ? 'yes' : 'no'}</div>`;  
957 - html += `<div>executed: ${debugInfo.rerank.executed ? 'yes' : 'no'}</div>`;  
958 - html += `<div>in_rerank_window: ${debugInfo.rerank.in_rerank_window ? 'yes' : 'no'}</div>`;  
959 - html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`;  
960 html += `<div>query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}</div>`; 960 html += `<div>query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}</div>`;
961 html += `<div>doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}</div>`; 961 html += `<div>doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}</div>`;
962 - html += '</div>';  
963 - }  
964 -  
965 - if (debugInfo.page_fill) {  
966 - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Page Fill:</strong>';  
967 - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debugInfo.page_fill))}</pre>`; 962 + html += `<div>query_text: ${escapeHtml(debugInfo.rerank.query_text || 'N/A')}</div>`;
  963 + html += `<div>docs: ${escapeHtml(String(debugInfo.rerank.docs ?? ''))}</div>`;
  964 + html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`;
  965 + if (debugInfo.rerank.fusion) {
  966 + html += '<div>fusion:</div>';
  967 + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 160px;">${escapeHtml(customStringify(debugInfo.rerank.fusion))}</pre>`;
  968 + }
968 html += '</div>'; 969 html += '</div>';
969 } 970 }
970 971
@@ -992,7 +993,7 @@ function displayDebugInfo(data) { @@ -992,7 +993,7 @@ function displayDebugInfo(data) {
992 993
993 if (debugInfo.es_query_context) { 994 if (debugInfo.es_query_context) {
994 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query Context:</strong>'; 995 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query Context:</strong>';
995 - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 300px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`; 996 + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 240px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`;
996 html += '</div>'; 997 html += '</div>';
997 } 998 }
998 999
search/rerank_client.py
@@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
10 from typing import Dict, Any, List, Optional, Tuple 10 from typing import Dict, Any, List, Optional, Tuple
11 import logging 11 import logging
12 12
  13 +from config.schema import RerankFusionConfig
13 from providers import create_rerank_provider 14 from providers import create_rerank_provider
14 15
15 logger = logging.getLogger(__name__) 16 logger = logging.getLogger(__name__)
@@ -176,17 +177,34 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa @@ -176,17 +177,34 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
176 } 177 }
177 178
178 179
  180 +def _multiply_fusion_factors(
  181 + rerank_score: float,
  182 + text_score: float,
  183 + knn_score: float,
  184 + fusion: RerankFusionConfig,
  185 +) -> Tuple[float, float, float, float]:
  186 + """(rerank_factor, text_factor, knn_factor, fused)."""
  187 + r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
  188 + t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
  189 + k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
  190 + return r, t, k, r * t * k
  191 +
  192 +
179 def fuse_scores_and_resort( 193 def fuse_scores_and_resort(
180 es_hits: List[Dict[str, Any]], 194 es_hits: List[Dict[str, Any]],
181 rerank_scores: List[float], 195 rerank_scores: List[float],
182 weight_es: float = DEFAULT_WEIGHT_ES, 196 weight_es: float = DEFAULT_WEIGHT_ES,
183 weight_ai: float = DEFAULT_WEIGHT_AI, 197 weight_ai: float = DEFAULT_WEIGHT_AI,
  198 + fusion: Optional[RerankFusionConfig] = None,
184 debug: bool = False, 199 debug: bool = False,
185 rerank_debug_rows: Optional[List[Dict[str, Any]]] = None, 200 rerank_debug_rows: Optional[List[Dict[str, Any]]] = None,
186 ) -> List[Dict[str, Any]]: 201 ) -> List[Dict[str, Any]]:
187 """ 202 """
188 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 203 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。
189 204
  205 + 融合形式(由 ``fusion`` 配置 bias / exponent)::
  206 + fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k
  207 +
190 对每条 hit 会写入: 208 对每条 hit 会写入:
191 - _original_score: 原始 ES 分数 209 - _original_score: 原始 ES 分数
192 - _rerank_score: 重排服务返回的分数 210 - _rerank_score: 重排服务返回的分数
@@ -199,40 +217,35 @@ def fuse_scores_and_resort( @@ -199,40 +217,35 @@ def fuse_scores_and_resort(
199 rerank_scores: 与 es_hits 等长的重排分数列表 217 rerank_scores: 与 es_hits 等长的重排分数列表
200 weight_es: 兼容保留,当前未使用 218 weight_es: 兼容保留,当前未使用
201 weight_ai: 兼容保留,当前未使用 219 weight_ai: 兼容保留,当前未使用
202 -  
203 - Returns:  
204 - 每条文档的融合调试信息列表,用于 debug_info  
205 """ 220 """
206 n = len(es_hits) 221 n = len(es_hits)
207 if n == 0 or len(rerank_scores) != n: 222 if n == 0 or len(rerank_scores) != n:
208 return [] 223 return []
209 224
210 - fused_debug: List[Dict[str, Any]] = [] 225 + f = fusion or RerankFusionConfig()
  226 + fused_debug: List[Dict[str, Any]] = [] if debug else []
211 227
212 for idx, hit in enumerate(es_hits): 228 for idx, hit in enumerate(es_hits):
213 es_score = _to_score(hit.get("_score")) 229 es_score = _to_score(hit.get("_score"))
214 -  
215 - ai_score_raw = rerank_scores[idx]  
216 - rerank_score = _to_score(ai_score_raw)  
217 - 230 + rerank_score = _to_score(rerank_scores[idx])
218 matched_queries = hit.get("matched_queries") 231 matched_queries = hit.get("matched_queries")
219 knn_score = _extract_named_query_score(matched_queries, "knn_query") 232 knn_score = _extract_named_query_score(matched_queries, "knn_query")
220 text_components = _collect_text_score_components(matched_queries, es_score) 233 text_components = _collect_text_score_components(matched_queries, es_score)
221 text_score = text_components["text_score"] 234 text_score = text_components["text_score"]
222 - rerank_factor = max(rerank_score, 0.0) + 0.00001  
223 - text_factor = (max(text_score, 0.0) + 0.1) ** 0.35  
224 - knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2  
225 - fused = rerank_factor * text_factor * knn_factor 235 + rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
  236 + rerank_score, text_score, knn_score, f
  237 + )
226 238
227 hit["_original_score"] = hit.get("_score") 239 hit["_original_score"] = hit.get("_score")
228 hit["_rerank_score"] = rerank_score 240 hit["_rerank_score"] = rerank_score
229 hit["_text_score"] = text_score 241 hit["_text_score"] = text_score
230 hit["_knn_score"] = knn_score 242 hit["_knn_score"] = knn_score
231 - hit["_text_source_score"] = text_components["source_score"]  
232 - hit["_text_translation_score"] = text_components["translation_score"]  
233 - hit["_text_primary_score"] = text_components["primary_text_score"]  
234 - hit["_text_support_score"] = text_components["support_text_score"]  
235 hit["_fused_score"] = fused 243 hit["_fused_score"] = fused
  244 + if debug:
  245 + hit["_text_source_score"] = text_components["source_score"]
  246 + hit["_text_translation_score"] = text_components["translation_score"]
  247 + hit["_text_primary_score"] = text_components["primary_text_score"]
  248 + hit["_text_support_score"] = text_components["support_text_score"]
236 249
237 if debug: 250 if debug:
238 debug_entry = { 251 debug_entry = {
@@ -262,7 +275,6 @@ def fuse_scores_and_resort( @@ -262,7 +275,6 @@ def fuse_scores_and_resort(
262 debug_entry["rerank_input"] = rerank_debug_rows[idx] 275 debug_entry["rerank_input"] = rerank_debug_rows[idx]
263 fused_debug.append(debug_entry) 276 fused_debug.append(debug_entry)
264 277
265 - # 按融合分数降序重排  
266 es_hits.sort( 278 es_hits.sort(
267 key=lambda h: h.get("_fused_score", h.get("_score", 0.0)), 279 key=lambda h: h.get("_fused_score", h.get("_score", 0.0)),
268 reverse=True, 280 reverse=True,
@@ -281,6 +293,7 @@ def run_rerank( @@ -281,6 +293,7 @@ def run_rerank(
281 rerank_doc_template: str = "{title}", 293 rerank_doc_template: str = "{title}",
282 top_n: Optional[int] = None, 294 top_n: Optional[int] = None,
283 debug: bool = False, 295 debug: bool = False,
  296 + fusion: Optional[RerankFusionConfig] = None,
284 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: 297 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
285 """ 298 """
286 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 299 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
@@ -314,6 +327,7 @@ def run_rerank( @@ -314,6 +327,7 @@ def run_rerank(
314 scores, 327 scores,
315 weight_es=weight_es, 328 weight_es=weight_es,
316 weight_ai=weight_ai, 329 weight_ai=weight_ai,
  330 + fusion=fusion,
317 debug=debug, 331 debug=debug,
318 rerank_debug_rows=rerank_debug_rows, 332 rerank_debug_rows=rerank_debug_rows,
319 ) 333 )
search/searcher.py
@@ -4,9 +4,8 @@ Main Searcher module - executes search queries against Elasticsearch. @@ -4,9 +4,8 @@ Main Searcher module - executes search queries against Elasticsearch.
4 Handles query parsing, ranking, and result formatting. 4 Handles query parsing, ranking, and result formatting.
5 """ 5 """
6 6
7 -from typing import Dict, Any, List, Optional, Union, Tuple  
8 -import os  
9 -import time, json 7 +from typing import Dict, Any, List, Optional
  8 +import json
10 import logging 9 import logging
11 import hashlib 10 import hashlib
12 from string import Formatter 11 from string import Formatter
@@ -20,7 +19,7 @@ from .sku_intent_selector import SkuSelectionDecision, StyleSkuSelector @@ -20,7 +19,7 @@ from .sku_intent_selector import SkuSelectionDecision, StyleSkuSelector
20 from config import SearchConfig 19 from config import SearchConfig
21 from config.tenant_config_loader import get_tenant_config_loader 20 from config.tenant_config_loader import get_tenant_config_loader
22 from context.request_context import RequestContext, RequestContextStage 21 from context.request_context import RequestContext, RequestContextStage
23 -from api.models import FacetResult, FacetValue, FacetConfig 22 +from api.models import FacetResult, FacetConfig
24 from api.result_formatter import ResultFormatter 23 from api.result_formatter import ResultFormatter
25 from indexer.mapping_generator import get_tenant_index_name 24 from indexer.mapping_generator import get_tenant_index_name
26 25
@@ -259,13 +258,7 @@ class Searcher: @@ -259,13 +258,7 @@ class Searcher:
259 if context is not None: 258 if context is not None:
260 context.start_stage(RequestContextStage.STYLE_SKU_PREPARE_HITS) 259 context.start_stage(RequestContextStage.STYLE_SKU_PREPARE_HITS)
261 try: 260 try:
262 - decisions = self.style_sku_selector.prepare_hits(es_hits, parsed_query)  
263 - if decisions and context is not None:  
264 - context.store_intermediate_result(  
265 - "style_intent_sku_decisions",  
266 - {doc_id: decision.to_dict() for doc_id, decision in decisions.items()},  
267 - )  
268 - return decisions 261 + return self.style_sku_selector.prepare_hits(es_hits, parsed_query)
269 finally: 262 finally:
270 if context is not None: 263 if context is not None:
271 context.end_stage(RequestContextStage.STYLE_SKU_PREPARE_HITS) 264 context.end_stage(RequestContextStage.STYLE_SKU_PREPARE_HITS)
@@ -339,21 +332,10 @@ class Searcher: @@ -339,21 +332,10 @@ class Searcher:
339 in_rerank_window = do_rerank and (from_ + size) <= rerank_window 332 in_rerank_window = do_rerank and (from_ + size) <= rerank_window
340 es_fetch_from = 0 if in_rerank_window else from_ 333 es_fetch_from = 0 if in_rerank_window else from_
341 es_fetch_size = rerank_window if in_rerank_window else size 334 es_fetch_size = rerank_window if in_rerank_window else size
342 - initial_es_positions: Dict[str, int] = {}  
343 - initial_es_min_score: Optional[float] = None  
344 - initial_es_max_score: Optional[float] = None  
345 - page_fill_debug: Optional[Dict[str, Any]] = None 335 +
  336 + es_score_normalization_factor: Optional[float] = None
  337 + initial_ranks_by_doc: Dict[str, int] = {}
346 rerank_debug_info: Optional[Dict[str, Any]] = None 338 rerank_debug_info: Optional[Dict[str, Any]] = None
347 - if debug:  
348 - rerank_debug_info = {  
349 - "requested": do_rerank,  
350 - "executed": False,  
351 - "in_rerank_window": in_rerank_window,  
352 - "rerank_window": rerank_window,  
353 - "top_n": from_ + size,  
354 - "query_template": effective_query_template,  
355 - "doc_template": effective_doc_template,  
356 - }  
357 339
358 # Start timing 340 # Start timing
359 context.start_stage(RequestContextStage.TOTAL) 341 context.start_stage(RequestContextStage.TOTAL)
@@ -402,8 +384,8 @@ class Searcher: @@ -402,8 +384,8 @@ class Searcher:
402 try: 384 try:
403 parsed_query = self.query_parser.parse( 385 parsed_query = self.query_parser.parse(
404 query, 386 query,
405 - tenant_id=tenant_id,  
406 generate_vector=enable_embedding, 387 generate_vector=enable_embedding,
  388 + tenant_id=tenant_id,
407 context=context, 389 context=context,
408 target_languages=index_langs if enable_translation else [], 390 target_languages=index_langs if enable_translation else [],
409 ) 391 )
@@ -493,8 +475,6 @@ class Searcher: @@ -493,8 +475,6 @@ class Searcher:
493 context.store_intermediate_result('es_query', es_query) 475 context.store_intermediate_result('es_query', es_query)
494 if in_rerank_window and rerank_prefetch_source is not None: 476 if in_rerank_window and rerank_prefetch_source is not None:
495 context.store_intermediate_result('es_query_rerank_prefetch_source', rerank_prefetch_source) 477 context.store_intermediate_result('es_query_rerank_prefetch_source', rerank_prefetch_source)
496 - context.store_intermediate_result('es_body_for_search', body_for_es)  
497 -  
498 # Serialize ES query to compute a compact size + stable digest for correlation 478 # Serialize ES query to compute a compact size + stable digest for correlation
499 es_query_compact = json.dumps(es_query_for_fetch, ensure_ascii=False, separators=(",", ":")) 479 es_query_compact = json.dumps(es_query_for_fetch, ensure_ascii=False, separators=(",", ":"))
500 es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16] 480 es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16]
@@ -548,35 +528,29 @@ class Searcher: @@ -548,35 +528,29 @@ class Searcher:
548 # Store ES response in context 528 # Store ES response in context
549 context.store_intermediate_result('es_response', es_response) 529 context.store_intermediate_result('es_response', es_response)
550 if debug: 530 if debug:
551 - initial_hits = es_response.get('hits', {}).get('hits') or []  
552 - initial_scores: List[float] = [] 531 + initial_hits = es_response.get("hits", {}).get("hits") or []
553 for rank, hit in enumerate(initial_hits, 1): 532 for rank, hit in enumerate(initial_hits, 1):
554 doc_id = hit.get("_id") 533 doc_id = hit.get("_id")
555 if doc_id is not None: 534 if doc_id is not None:
556 - initial_es_positions[str(doc_id)] = rank  
557 - raw_score = hit.get("_score")  
558 - try:  
559 - if raw_score is not None:  
560 - initial_scores.append(float(raw_score))  
561 - except (TypeError, ValueError):  
562 - pass  
563 - raw_max_score = es_response.get('hits', {}).get('max_score') 535 + initial_ranks_by_doc[str(doc_id)] = rank
  536 + raw_initial_max_score = es_response.get("hits", {}).get("max_score")
564 try: 537 try:
565 - initial_es_max_score = float(raw_max_score) if raw_max_score is not None else None 538 + es_score_normalization_factor = float(raw_initial_max_score) if raw_initial_max_score is not None else None
566 except (TypeError, ValueError): 539 except (TypeError, ValueError):
567 - initial_es_max_score = None  
568 - if initial_es_max_score is None and initial_scores:  
569 - initial_es_max_score = max(initial_scores)  
570 - initial_es_min_score = min(initial_scores) if initial_scores else None 540 + es_score_normalization_factor = None
  541 + if es_score_normalization_factor is None and initial_hits:
  542 + first_score = initial_hits[0].get("_score")
  543 + try:
  544 + es_score_normalization_factor = float(first_score) if first_score is not None else None
  545 + except (TypeError, ValueError):
  546 + es_score_normalization_factor = None
571 547
572 # Extract timing from ES response 548 # Extract timing from ES response
573 es_took = es_response.get('took', 0) 549 es_took = es_response.get('took', 0)
574 context.logger.info( 550 context.logger.info(
575 f"ES搜索完成 | 耗时: {es_took}ms | " 551 f"ES搜索完成 | 耗时: {es_took}ms | "
576 f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | " 552 f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | "
577 - f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f} | "  
578 - f"detected_language={parsed_query.detected_language} | "  
579 - f"translations={list((parsed_query.translations or {}).keys())}", 553 + f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f}",
580 extra={'reqid': context.reqid, 'uid': context.uid} 554 extra={'reqid': context.reqid, 'uid': context.uid}
581 ) 555 )
582 except Exception as e: 556 except Exception as e:
@@ -621,30 +595,31 @@ class Searcher: @@ -621,30 +595,31 @@ class Searcher:
621 rerank_doc_template=effective_doc_template, 595 rerank_doc_template=effective_doc_template,
622 top_n=(from_ + size), 596 top_n=(from_ + size),
623 debug=debug, 597 debug=debug,
  598 + fusion=rc.fusion,
624 ) 599 )
625 600
626 if rerank_meta is not None: 601 if rerank_meta is not None:
627 - from config.services_config import get_rerank_service_url  
628 - rerank_url = get_rerank_service_url()  
629 - if debug and rerank_debug_info is not None:  
630 - rerank_debug_info.update({  
631 - "executed": True,  
632 - "service_url": rerank_url, 602 + if debug:
  603 + from dataclasses import asdict
  604 + from config.services_config import get_rerank_service_url
  605 + rerank_debug_info = {
  606 + "service_url": get_rerank_service_url(),
  607 + "query_template": effective_query_template,
  608 + "doc_template": effective_doc_template,
633 "query_text": str(effective_query_template).format_map({"query": rerank_query}), 609 "query_text": str(effective_query_template).format_map({"query": rerank_query}),
634 "docs": len(es_response.get("hits", {}).get("hits") or []), 610 "docs": len(es_response.get("hits", {}).get("hits") or []),
  611 + "top_n": from_ + size,
635 "meta": rerank_meta, 612 "meta": rerank_meta,
636 - }) 613 + "fusion": asdict(rc.fusion),
  614 + }
637 context.store_intermediate_result("rerank_scores", fused_debug) 615 context.store_intermediate_result("rerank_scores", fused_debug)
638 context.logger.info( 616 context.logger.info(
639 f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | " 617 f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | "
640 - f"top_n={from_ + size} | query_template={effective_query_template} | "  
641 - f"doc_template={effective_doc_template} | meta={rerank_meta}", 618 + f"top_n={from_ + size} | meta={rerank_meta}",
642 extra={'reqid': context.reqid, 'uid': context.uid} 619 extra={'reqid': context.reqid, 'uid': context.uid}
643 ) 620 )
644 except Exception as e: 621 except Exception as e:
645 context.add_warning(f"Rerank failed: {e}") 622 context.add_warning(f"Rerank failed: {e}")
646 - if debug and rerank_debug_info is not None:  
647 - rerank_debug_info["error"] = str(e)  
648 context.logger.warning( 623 context.logger.warning(
649 f"调用重排服务失败 | error: {e}", 624 f"调用重排服务失败 | error: {e}",
650 extra={'reqid': context.reqid, 'uid': context.uid}, 625 extra={'reqid': context.reqid, 'uid': context.uid},
@@ -707,13 +682,6 @@ class Searcher: @@ -707,13 +682,6 @@ class Searcher:
707 ) 682 )
708 if fill_took: 683 if fill_took:
709 es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took) 684 es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
710 - if debug:  
711 - page_fill_debug = {  
712 - "requested_ids": page_ids,  
713 - "filled": filled,  
714 - "fill_took_ms": fill_took,  
715 - "response_source_spec": response_source_spec,  
716 - }  
717 context.logger.info( 685 context.logger.info(
718 f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms", 686 f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms",
719 extra={'reqid': context.reqid, 'uid': context.uid} 687 extra={'reqid': context.reqid, 'uid': context.uid}
@@ -781,8 +749,8 @@ class Searcher: @@ -781,8 +749,8 @@ class Searcher:
781 # Build per-result debug info (per SPU) when debug mode is enabled 749 # Build per-result debug info (per SPU) when debug mode is enabled
782 per_result_debug = [] 750 per_result_debug = []
783 if debug and es_hits and formatted_results: 751 if debug and es_hits and formatted_results:
784 - final_positions_by_doc = {  
785 - str(hit.get("_id")): (from_ + rank) 752 + final_ranks_by_doc = {
  753 + str(hit.get("_id")): from_ + rank
786 for rank, hit in enumerate(es_hits, 1) 754 for rank, hit in enumerate(es_hits, 1)
787 if hit.get("_id") is not None 755 if hit.get("_id") is not None
788 } 756 }
@@ -805,28 +773,11 @@ class Searcher: @@ -805,28 +773,11 @@ class Searcher:
805 es_score = 0.0 773 es_score = 0.0
806 try: 774 try:
807 normalized = ( 775 normalized = (
808 - float(es_score) / float(initial_es_max_score)  
809 - if initial_es_max_score  
810 - else None 776 + float(es_score) / float(es_score_normalization_factor)
  777 + if es_score_normalization_factor else None
811 ) 778 )
812 except (TypeError, ValueError, ZeroDivisionError): 779 except (TypeError, ValueError, ZeroDivisionError):
813 normalized = None 780 normalized = None
814 - try:  
815 - es_score_norm = (  
816 - (float(es_score) - float(initial_es_min_score))  
817 - / (float(initial_es_max_score) - float(initial_es_min_score))  
818 - if initial_es_min_score is not None  
819 - and initial_es_max_score is not None  
820 - and float(initial_es_max_score) > float(initial_es_min_score)  
821 - else (  
822 - 1.0  
823 - if initial_es_min_score is not None  
824 - and initial_es_max_score is not None  
825 - else None  
826 - )  
827 - )  
828 - except (TypeError, ValueError, ZeroDivisionError):  
829 - es_score_norm = None  
830 781
831 title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None 782 title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None
832 brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None 783 brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
@@ -836,11 +787,8 @@ class Searcher: @@ -836,11 +787,8 @@ class Searcher:
836 "spu_id": spu.spu_id, 787 "spu_id": spu.spu_id,
837 "es_score": es_score, 788 "es_score": es_score,
838 "es_score_normalized": normalized, 789 "es_score_normalized": normalized,
839 - "es_score_norm": es_score_norm,  
840 - "es_score_min": initial_es_min_score,  
841 - "es_score_max": initial_es_max_score,  
842 - "initial_rank": initial_es_positions.get(str(doc_id)) if doc_id is not None else None,  
843 - "final_rank": final_positions_by_doc.get(str(doc_id)) if doc_id is not None else None, 790 + "initial_rank": initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None,
  791 + "final_rank": final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None,
844 "title_multilingual": title_multilingual, 792 "title_multilingual": title_multilingual,
845 "brief_multilingual": brief_multilingual, 793 "brief_multilingual": brief_multilingual,
846 "vendor_multilingual": vendor_multilingual, 794 "vendor_multilingual": vendor_multilingual,
@@ -908,12 +856,6 @@ class Searcher: @@ -908,12 +856,6 @@ class Searcher:
908 # Collect debug information if requested 856 # Collect debug information if requested
909 debug_info = None 857 debug_info = None
910 if debug: 858 if debug:
911 - query_vector_summary = None  
912 - if parsed_query.query_vector is not None:  
913 - query_vector_summary = {  
914 - "dims": int(len(parsed_query.query_vector)),  
915 - "preview": [round(float(v), 6) for v in parsed_query.query_vector[:8].tolist()],  
916 - }  
917 debug_info = { 859 debug_info = {
918 "query_analysis": { 860 "query_analysis": {
919 "original_query": context.query_analysis.original_query, 861 "original_query": context.query_analysis.original_query,
@@ -923,7 +865,6 @@ class Searcher: @@ -923,7 +865,6 @@ class Searcher:
923 "index_languages": index_langs, 865 "index_languages": index_langs,
924 "translations": context.query_analysis.translations, 866 "translations": context.query_analysis.translations,
925 "has_vector": context.query_analysis.query_vector is not None, 867 "has_vector": context.query_analysis.query_vector is not None,
926 - "query_vector_summary": query_vector_summary,  
927 "query_tokens": getattr(parsed_query, "query_tokens", []), 868 "query_tokens": getattr(parsed_query, "query_tokens", []),
928 "is_simple_query": context.query_analysis.is_simple_query, 869 "is_simple_query": context.query_analysis.is_simple_query,
929 "domain": context.query_analysis.domain, 870 "domain": context.query_analysis.domain,
@@ -931,12 +872,6 @@ class Searcher: @@ -931,12 +872,6 @@ class Searcher:
931 }, 872 },
932 "es_query": context.get_intermediate_result('es_query', {}), 873 "es_query": context.get_intermediate_result('es_query', {}),
933 "es_query_context": { 874 "es_query_context": {
934 - "filters": filters,  
935 - "range_filters": range_filters,  
936 - "facets": [getattr(facet, "field", str(facet)) for facet in facets] if facets else [],  
937 - "sort_by": sort_by,  
938 - "sort_order": sort_order,  
939 - "min_score": min_score,  
940 "es_fetch_from": es_fetch_from, 875 "es_fetch_from": es_fetch_from,
941 "es_fetch_size": es_fetch_size, 876 "es_fetch_size": es_fetch_size,
942 "in_rerank_window": in_rerank_window, 877 "in_rerank_window": in_rerank_window,
@@ -948,11 +883,9 @@ class Searcher: @@ -948,11 +883,9 @@ class Searcher:
948 "total_hits": total_value, 883 "total_hits": total_value,
949 "max_score": max_score, 884 "max_score": max_score,
950 "shards": es_response.get('_shards', {}), 885 "shards": es_response.get('_shards', {}),
951 - "initial_es_max_score": initial_es_max_score,  
952 - "initial_es_min_score": initial_es_min_score, 886 + "es_score_normalization_factor": es_score_normalization_factor,
953 }, 887 },
954 "rerank": rerank_debug_info, 888 "rerank": rerank_debug_info,
955 - "page_fill": page_fill_debug,  
956 "feature_flags": context.metadata.get('feature_flags', {}), 889 "feature_flags": context.metadata.get('feature_flags', {}),
957 "stage_timings": { 890 "stage_timings": {
958 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() 891 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items()
@@ -1126,76 +1059,3 @@ class Searcher: @@ -1126,76 +1059,3 @@ class Searcher:
1126 logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True) 1059 logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True)
1127 return None 1060 return None
1128 1061
1129 - def _standardize_facets(  
1130 - self,  
1131 - es_aggregations: Dict[str, Any],  
1132 - facet_configs: Optional[List[Union[str, Any]]],  
1133 - current_filters: Optional[Dict[str, Any]]  
1134 - ) -> Optional[List[FacetResult]]:  
1135 - """  
1136 - 将 ES 聚合结果转换为标准化的分面格式(返回 Pydantic 模型)。  
1137 -  
1138 - Args:  
1139 - es_aggregations: ES 原始聚合结果  
1140 - facet_configs: 分面配置列表(str 或 FacetConfig)  
1141 - current_filters: 当前应用的过滤器  
1142 -  
1143 - Returns:  
1144 - 标准化的分面结果列表(FacetResult 对象)  
1145 - """  
1146 - if not es_aggregations or not facet_configs:  
1147 - return None  
1148 -  
1149 - standardized_facets: List[FacetResult] = []  
1150 -  
1151 - for config in facet_configs:  
1152 - # 解析配置  
1153 - if isinstance(config, str):  
1154 - field = config  
1155 - facet_type = "terms"  
1156 - else:  
1157 - # FacetConfig 对象  
1158 - field = config.field  
1159 - facet_type = config.type  
1160 -  
1161 - agg_name = f"{field}_facet"  
1162 -  
1163 - if agg_name not in es_aggregations:  
1164 - continue  
1165 -  
1166 - agg_result = es_aggregations[agg_name]  
1167 -  
1168 - # 获取当前字段的选中值  
1169 - selected_values = set()  
1170 - if current_filters and field in current_filters:  
1171 - filter_value = current_filters[field]  
1172 - if isinstance(filter_value, list):  
1173 - selected_values = set(filter_value)  
1174 - else:  
1175 - selected_values = {filter_value}  
1176 -  
1177 - # 转换 buckets 为 FacetValue 对象  
1178 - facet_values: List[FacetValue] = []  
1179 - if 'buckets' in agg_result:  
1180 - for bucket in agg_result['buckets']:  
1181 - value = bucket.get('key')  
1182 - count = bucket.get('doc_count', 0)  
1183 -  
1184 - facet_values.append(FacetValue(  
1185 - value=value,  
1186 - label=str(value),  
1187 - count=count,  
1188 - selected=value in selected_values  
1189 - ))  
1190 -  
1191 - # 构建 FacetResult 对象  
1192 - facet_result = FacetResult(  
1193 - field=field,  
1194 - label=field,  
1195 - type=facet_type,  
1196 - values=facet_values  
1197 - )  
1198 -  
1199 - standardized_facets.append(facet_result)  
1200 -  
1201 - return standardized_facets if standardized_facets else None  
tests/test_rerank_client.py
1 from math import isclose 1 from math import isclose
2 2
  3 +from config.schema import RerankFusionConfig
3 from search.rerank_client import fuse_scores_and_resort 4 from search.rerank_client import fuse_scores_and_resort
4 5
5 6
@@ -88,3 +89,32 @@ def test_fuse_scores_and_resort_downweights_text_only_advantage(): @@ -88,3 +89,32 @@ def test_fuse_scores_and_resort_downweights_text_only_advantage():
88 fuse_scores_and_resort(hits, [0.72, 0.98]) 89 fuse_scores_and_resort(hits, [0.72, 0.98])
89 90
90 assert [hit["_id"] for hit in hits] == ["rerank-better", "lexical-heavy"] 91 assert [hit["_id"] for hit in hits] == ["rerank-better", "lexical-heavy"]
  92 +
  93 +
  94 +def test_fuse_scores_and_resort_uses_configurable_fusion_params():
  95 + hits = [
  96 + {
  97 + "_id": "a",
  98 + "_score": 1.0,
  99 + "matched_queries": {"base_query": 2.0, "knn_query": 0.5},
  100 + },
  101 + {
  102 + "_id": "b",
  103 + "_score": 1.0,
  104 + "matched_queries": {"base_query": 3.0, "knn_query": 0.0},
  105 + },
  106 + ]
  107 + fusion = RerankFusionConfig(
  108 + rerank_bias=0.0,
  109 + rerank_exponent=1.0,
  110 + text_bias=0.0,
  111 + text_exponent=1.0,
  112 + knn_bias=0.0,
  113 + knn_exponent=1.0,
  114 + )
  115 + fuse_scores_and_resort(hits, [1.0, 1.0], fusion=fusion)
  116 + # b 的 knn 为 0 -> 融合为 0;a 为 1 * 2 * 0.5
  117 + assert [h["_id"] for h in hits] == ["a", "b"]
  118 + by_id = {h["_id"]: h for h in hits}
  119 + assert isclose(by_id["a"]["_fused_score"], 1.0, rel_tol=1e-9)
  120 + assert isclose(by_id["b"]["_fused_score"], 0.0, rel_tol=1e-9)
tests/test_search_rerank_window.py
@@ -614,7 +614,7 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc @@ -614,7 +614,7 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
614 assert result.results[0].image_url == "https://img/blue.jpg" 614 assert result.results[0].image_url == "https://img/blue.jpg"
615 615
616 616
617 -def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch): 617 +def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeypatch):
618 es_client = _FakeESClient(total_hits=3) 618 es_client = _FakeESClient(total_hits=3)
619 searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) 619 searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
620 context = create_request_context(reqid="dbg", uid="u-dbg") 620 context = create_request_context(reqid="dbg", uid="u-dbg")
@@ -635,10 +635,10 @@ def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch): @@ -635,10 +635,10 @@ def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch):
635 ) 635 )
636 636
637 assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"] 637 assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"]
  638 + assert result.debug_info["query_analysis"]["query_tokens"] == []
638 assert result.debug_info["es_query_context"]["es_fetch_size"] == 2 639 assert result.debug_info["es_query_context"]["es_fetch_size"] == 2
639 - assert result.debug_info["es_response"]["initial_es_max_score"] == 3.0  
640 - assert result.debug_info["es_response"]["initial_es_min_score"] == 2.0 640 + assert result.debug_info["es_response"]["es_score_normalization_factor"] == 3.0
641 assert result.debug_info["per_result"][0]["initial_rank"] == 1 641 assert result.debug_info["per_result"][0]["initial_rank"] == 1
642 assert result.debug_info["per_result"][0]["final_rank"] == 1 642 assert result.debug_info["per_result"][0]["final_rank"] == 1
643 assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0 643 assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0
644 - assert result.debug_info["per_result"][1]["es_score_norm"] == 0.0 644 + assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0