Commit 814e352b28210fc835604282a1af188d53d81d63

Authored by tangwang
1 parent 581dafae

乘法公式配置化

config/__init__.py
... ... @@ -8,6 +8,7 @@ from config.schema import (
8 8 IndexConfig,
9 9 QueryConfig,
10 10 RerankConfig,
  11 + RerankFusionConfig,
11 12 SPUConfig,
12 13 SearchConfig,
13 14 ServicesConfig,
... ... @@ -36,6 +37,7 @@ __all__ = [
36 37 "IndexConfig",
37 38 "QueryConfig",
38 39 "RerankConfig",
  40 + "RerankFusionConfig",
39 41 "SPUConfig",
40 42 "SearchConfig",
41 43 "ServicesConfig",
... ...
config/config.yaml
... ... @@ -219,6 +219,14 @@ rerank:
219 219 weight_ai: 0.6
220 220 rerank_query_template: "{query}"
221 221 rerank_doc_template: "{title}"
  222 + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项)
  223 + fusion:
  224 + rerank_bias: 0.00001
  225 + rerank_exponent: 1.0
  226 + text_bias: 0.1
  227 + text_exponent: 0.35
  228 + knn_bias: 0.6
  229 + knn_exponent: 0.2
222 230  
223 231 # 可扩展服务/provider 注册表(单一配置源)
224 232 services:
... ...
config/loader.py
... ... @@ -37,6 +37,7 @@ from config.schema import (
37 37 ProductEnrichConfig,
38 38 RedisSettings,
39 39 RerankConfig,
  40 + RerankFusionConfig,
40 41 RerankServiceConfig,
41 42 RuntimeConfig,
42 43 SearchConfig,
... ... @@ -393,6 +394,7 @@ class AppConfigLoader:
393 394  
394 395 function_score_cfg = raw.get("function_score") if isinstance(raw.get("function_score"), dict) else {}
395 396 rerank_cfg = raw.get("rerank") if isinstance(raw.get("rerank"), dict) else {}
  397 + fusion_raw = rerank_cfg.get("fusion") if isinstance(rerank_cfg.get("fusion"), dict) else {}
396 398 spu_cfg = raw.get("spu_config") if isinstance(raw.get("spu_config"), dict) else {}
397 399  
398 400 return SearchConfig(
... ... @@ -412,6 +414,14 @@ class AppConfigLoader:
412 414 weight_ai=float(rerank_cfg.get("weight_ai", 0.6)),
413 415 rerank_query_template=str(rerank_cfg.get("rerank_query_template") or "{query}"),
414 416 rerank_doc_template=str(rerank_cfg.get("rerank_doc_template") or "{title}"),
  417 + fusion=RerankFusionConfig(
  418 + rerank_bias=float(fusion_raw.get("rerank_bias", 0.00001)),
  419 + rerank_exponent=float(fusion_raw.get("rerank_exponent", 1.0)),
  420 + text_bias=float(fusion_raw.get("text_bias", 0.1)),
  421 + text_exponent=float(fusion_raw.get("text_exponent", 0.35)),
  422 + knn_bias=float(fusion_raw.get("knn_bias", 0.6)),
  423 + knn_exponent=float(fusion_raw.get("knn_exponent", 0.2)),
  424 + ),
415 425 ),
416 426 spu_config=SPUConfig(
417 427 enabled=bool(spu_cfg.get("enabled", False)),
... ...
config/schema.py
... ... @@ -91,6 +91,21 @@ class FunctionScoreConfig:
91 91  
92 92  
93 93 @dataclass(frozen=True)
  94 +class RerankFusionConfig:
  95 + """
  96 + Multiplicative fusion: fused = Π (max(score_i, 0) + bias_i) ** exponent_i
  97 + for rerank / text / knn terms respectively.
  98 + """
  99 +
  100 + rerank_bias: float = 0.00001
  101 + rerank_exponent: float = 1.0
  102 + text_bias: float = 0.1
  103 + text_exponent: float = 0.35
  104 + knn_bias: float = 0.6
  105 + knn_exponent: float = 0.2
  106 +
  107 +
  108 +@dataclass(frozen=True)
94 109 class RerankConfig:
95 110 """Search-time rerank configuration."""
96 111  
... ... @@ -101,6 +116,7 @@ class RerankConfig:
101 116 weight_ai: float = 0.6
102 117 rerank_query_template: str = "{query}"
103 118 rerank_doc_template: str = "{title}"
  119 + fusion: RerankFusionConfig = field(default_factory=RerankFusionConfig)
104 120  
105 121  
106 122 @dataclass(frozen=True)
... ...
frontend/static/js/app.js
... ... @@ -411,11 +411,6 @@ function displayResults(data) {
411 411 const esNorm = typeof debug.es_score_normalized === 'number'
412 412 ? debug.es_score_normalized.toFixed(4)
413 413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
414   -
415   - const esNormMinMax = typeof debug.es_score_norm === 'number'
416   - ? debug.es_score_norm.toFixed(4)
417   - : (debug.es_score_norm == null ? '' : String(debug.es_score_norm));
418   -
419 414 const rerankScore = typeof debug.rerank_score === 'number'
420 415 ? debug.rerank_score.toFixed(4)
421 416 : (debug.rerank_score == null ? '' : String(debug.rerank_score));
... ... @@ -437,13 +432,28 @@ function displayResults(data) {
437 432 const resultJson = customStringify(result);
438 433 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
439 434 const rerankInputHtml = debug.rerank_input
440   - ? `<details><summary>Rerank input</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre></details>`
  435 + ? `
  436 + <details>
  437 + <summary>Rerank input</summary>
  438 + <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre>
  439 + </details>
  440 + `
441 441 : '';
442 442 const styleIntentHtml = debug.style_intent_sku
443   - ? `<details><summary>Selected SKU</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre></details>`
  443 + ? `
  444 + <details>
  445 + <summary>Selected SKU</summary>
  446 + <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre>
  447 + </details>
  448 + `
444 449 : '';
445 450 const matchedQueriesHtml = debug.matched_queries
446   - ? `<details><summary>matched_queries</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre></details>`
  451 + ? `
  452 + <details>
  453 + <summary>matched_queries</summary>
  454 + <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre>
  455 + </details>
  456 + `
447 457 : '';
448 458  
449 459 debugHtml = `
... ... @@ -453,9 +463,7 @@ function displayResults(data) {
453 463 <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>
454 464 <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>
455 465 <div class="product-debug-line">ES score: ${esScore}</div>
456   - <div class="product-debug-line">ES normalized (score / initial ES max): ${esNorm}</div>
457   - <div class="product-debug-line">ES norm (min-max over initial ES window): ${esNormMinMax}</div>
458   - <div class="product-debug-line">ES score min/max: ${escapeHtml(String(debug.es_score_min ?? ''))} / ${escapeHtml(String(debug.es_score_max ?? ''))}</div>
  466 + <div class="product-debug-line">ES normalized: ${esNorm}</div>
459 467 <div class="product-debug-line">Rerank score: ${rerankScore}</div>
460 468 <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>
461 469 <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>
... ... @@ -910,9 +918,6 @@ function displayDebugInfo(data) {
910 918 html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`;
911 919 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`;
912 920 html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`;
913   - if (debugInfo.query_analysis.query_vector_summary) {
914   - html += `<div>query_vector_summary: ${escapeHtml(customStringify(debugInfo.query_analysis.query_vector_summary))}</div>`;
915   - }
916 921  
917 922 if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) {
918 923 html += '<div>translations: ';
... ... @@ -946,25 +951,21 @@ function displayDebugInfo(data) {
946 951 html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`;
947 952 html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`;
948 953 html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`;
949   - html += `<div>initial_es_max_score: ${escapeHtml(String(debugInfo.es_response.initial_es_max_score ?? ''))}</div>`;
950   - html += `<div>initial_es_min_score: ${escapeHtml(String(debugInfo.es_response.initial_es_min_score ?? ''))}</div>`;
  954 + html += `<div>es_score_normalization_factor: ${escapeHtml(String(debugInfo.es_response.es_score_normalization_factor ?? ''))}</div>`;
951 955 html += '</div>';
952 956 }
953 957  
954 958 if (debugInfo.rerank) {
955 959 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Rerank:</strong>';
956   - html += `<div>requested: ${debugInfo.rerank.requested ? 'yes' : 'no'}</div>`;
957   - html += `<div>executed: ${debugInfo.rerank.executed ? 'yes' : 'no'}</div>`;
958   - html += `<div>in_rerank_window: ${debugInfo.rerank.in_rerank_window ? 'yes' : 'no'}</div>`;
959   - html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`;
960 960 html += `<div>query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}</div>`;
961 961 html += `<div>doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}</div>`;
962   - html += '</div>';
963   - }
964   -
965   - if (debugInfo.page_fill) {
966   - html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Page Fill:</strong>';
967   - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debugInfo.page_fill))}</pre>`;
  962 + html += `<div>query_text: ${escapeHtml(debugInfo.rerank.query_text || 'N/A')}</div>`;
  963 + html += `<div>docs: ${escapeHtml(String(debugInfo.rerank.docs ?? ''))}</div>`;
  964 + html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`;
  965 + if (debugInfo.rerank.fusion) {
  966 + html += '<div>fusion:</div>';
  967 + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 160px;">${escapeHtml(customStringify(debugInfo.rerank.fusion))}</pre>`;
  968 + }
968 969 html += '</div>';
969 970 }
970 971  
... ... @@ -992,7 +993,7 @@ function displayDebugInfo(data) {
992 993  
993 994 if (debugInfo.es_query_context) {
994 995 html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query Context:</strong>';
995   - html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 300px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`;
  996 + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 240px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`;
996 997 html += '</div>';
997 998 }
998 999  
... ...
search/rerank_client.py
... ... @@ -10,6 +10,7 @@
10 10 from typing import Dict, Any, List, Optional, Tuple
11 11 import logging
12 12  
  13 +from config.schema import RerankFusionConfig
13 14 from providers import create_rerank_provider
14 15  
15 16 logger = logging.getLogger(__name__)
... ... @@ -176,17 +177,34 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
176 177 }
177 178  
178 179  
  180 +def _multiply_fusion_factors(
  181 + rerank_score: float,
  182 + text_score: float,
  183 + knn_score: float,
  184 + fusion: RerankFusionConfig,
  185 +) -> Tuple[float, float, float, float]:
  186 + """(rerank_factor, text_factor, knn_factor, fused)."""
  187 + r = (max(rerank_score, 0.0) + fusion.rerank_bias) ** fusion.rerank_exponent
  188 + t = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
  189 + k = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
  190 + return r, t, k, r * t * k
  191 +
  192 +
179 193 def fuse_scores_and_resort(
180 194 es_hits: List[Dict[str, Any]],
181 195 rerank_scores: List[float],
182 196 weight_es: float = DEFAULT_WEIGHT_ES,
183 197 weight_ai: float = DEFAULT_WEIGHT_AI,
  198 + fusion: Optional[RerankFusionConfig] = None,
184 199 debug: bool = False,
185 200 rerank_debug_rows: Optional[List[Dict[str, Any]]] = None,
186 201 ) -> List[Dict[str, Any]]:
187 202 """
188 203 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。
189 204  
  205 + 融合形式(由 ``fusion`` 配置 bias / exponent)::
  206 + fused = (max(rerank,0)+b_r)^e_r * (max(text,0)+b_t)^e_t * (max(knn,0)+b_k)^e_k
  207 +
190 208 对每条 hit 会写入:
191 209 - _original_score: 原始 ES 分数
192 210 - _rerank_score: 重排服务返回的分数
... ... @@ -199,40 +217,35 @@ def fuse_scores_and_resort(
199 217 rerank_scores: 与 es_hits 等长的重排分数列表
200 218 weight_es: 兼容保留,当前未使用
201 219 weight_ai: 兼容保留,当前未使用
202   -
203   - Returns:
204   - 每条文档的融合调试信息列表,用于 debug_info
205 220 """
206 221 n = len(es_hits)
207 222 if n == 0 or len(rerank_scores) != n:
208 223 return []
209 224  
210   - fused_debug: List[Dict[str, Any]] = []
  225 + f = fusion or RerankFusionConfig()
  226 + fused_debug: List[Dict[str, Any]] = [] if debug else []
211 227  
212 228 for idx, hit in enumerate(es_hits):
213 229 es_score = _to_score(hit.get("_score"))
214   -
215   - ai_score_raw = rerank_scores[idx]
216   - rerank_score = _to_score(ai_score_raw)
217   -
  230 + rerank_score = _to_score(rerank_scores[idx])
218 231 matched_queries = hit.get("matched_queries")
219 232 knn_score = _extract_named_query_score(matched_queries, "knn_query")
220 233 text_components = _collect_text_score_components(matched_queries, es_score)
221 234 text_score = text_components["text_score"]
222   - rerank_factor = max(rerank_score, 0.0) + 0.00001
223   - text_factor = (max(text_score, 0.0) + 0.1) ** 0.35
224   - knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2
225   - fused = rerank_factor * text_factor * knn_factor
  235 + rerank_factor, text_factor, knn_factor, fused = _multiply_fusion_factors(
  236 + rerank_score, text_score, knn_score, f
  237 + )
226 238  
227 239 hit["_original_score"] = hit.get("_score")
228 240 hit["_rerank_score"] = rerank_score
229 241 hit["_text_score"] = text_score
230 242 hit["_knn_score"] = knn_score
231   - hit["_text_source_score"] = text_components["source_score"]
232   - hit["_text_translation_score"] = text_components["translation_score"]
233   - hit["_text_primary_score"] = text_components["primary_text_score"]
234   - hit["_text_support_score"] = text_components["support_text_score"]
235 243 hit["_fused_score"] = fused
  244 + if debug:
  245 + hit["_text_source_score"] = text_components["source_score"]
  246 + hit["_text_translation_score"] = text_components["translation_score"]
  247 + hit["_text_primary_score"] = text_components["primary_text_score"]
  248 + hit["_text_support_score"] = text_components["support_text_score"]
236 249  
237 250 if debug:
238 251 debug_entry = {
... ... @@ -262,7 +275,6 @@ def fuse_scores_and_resort(
262 275 debug_entry["rerank_input"] = rerank_debug_rows[idx]
263 276 fused_debug.append(debug_entry)
264 277  
265   - # 按融合分数降序重排
266 278 es_hits.sort(
267 279 key=lambda h: h.get("_fused_score", h.get("_score", 0.0)),
268 280 reverse=True,
... ... @@ -281,6 +293,7 @@ def run_rerank(
281 293 rerank_doc_template: str = "{title}",
282 294 top_n: Optional[int] = None,
283 295 debug: bool = False,
  296 + fusion: Optional[RerankFusionConfig] = None,
284 297 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
285 298 """
286 299 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
... ... @@ -314,6 +327,7 @@ def run_rerank(
314 327 scores,
315 328 weight_es=weight_es,
316 329 weight_ai=weight_ai,
  330 + fusion=fusion,
317 331 debug=debug,
318 332 rerank_debug_rows=rerank_debug_rows,
319 333 )
... ...
search/searcher.py
... ... @@ -4,9 +4,8 @@ Main Searcher module - executes search queries against Elasticsearch.
4 4 Handles query parsing, ranking, and result formatting.
5 5 """
6 6  
7   -from typing import Dict, Any, List, Optional, Union, Tuple
8   -import os
9   -import time, json
  7 +from typing import Dict, Any, List, Optional
  8 +import json
10 9 import logging
11 10 import hashlib
12 11 from string import Formatter
... ... @@ -20,7 +19,7 @@ from .sku_intent_selector import SkuSelectionDecision, StyleSkuSelector
20 19 from config import SearchConfig
21 20 from config.tenant_config_loader import get_tenant_config_loader
22 21 from context.request_context import RequestContext, RequestContextStage
23   -from api.models import FacetResult, FacetValue, FacetConfig
  22 +from api.models import FacetResult, FacetConfig
24 23 from api.result_formatter import ResultFormatter
25 24 from indexer.mapping_generator import get_tenant_index_name
26 25  
... ... @@ -259,13 +258,7 @@ class Searcher:
259 258 if context is not None:
260 259 context.start_stage(RequestContextStage.STYLE_SKU_PREPARE_HITS)
261 260 try:
262   - decisions = self.style_sku_selector.prepare_hits(es_hits, parsed_query)
263   - if decisions and context is not None:
264   - context.store_intermediate_result(
265   - "style_intent_sku_decisions",
266   - {doc_id: decision.to_dict() for doc_id, decision in decisions.items()},
267   - )
268   - return decisions
  261 + return self.style_sku_selector.prepare_hits(es_hits, parsed_query)
269 262 finally:
270 263 if context is not None:
271 264 context.end_stage(RequestContextStage.STYLE_SKU_PREPARE_HITS)
... ... @@ -339,21 +332,10 @@ class Searcher:
339 332 in_rerank_window = do_rerank and (from_ + size) <= rerank_window
340 333 es_fetch_from = 0 if in_rerank_window else from_
341 334 es_fetch_size = rerank_window if in_rerank_window else size
342   - initial_es_positions: Dict[str, int] = {}
343   - initial_es_min_score: Optional[float] = None
344   - initial_es_max_score: Optional[float] = None
345   - page_fill_debug: Optional[Dict[str, Any]] = None
  335 +
  336 + es_score_normalization_factor: Optional[float] = None
  337 + initial_ranks_by_doc: Dict[str, int] = {}
346 338 rerank_debug_info: Optional[Dict[str, Any]] = None
347   - if debug:
348   - rerank_debug_info = {
349   - "requested": do_rerank,
350   - "executed": False,
351   - "in_rerank_window": in_rerank_window,
352   - "rerank_window": rerank_window,
353   - "top_n": from_ + size,
354   - "query_template": effective_query_template,
355   - "doc_template": effective_doc_template,
356   - }
357 339  
358 340 # Start timing
359 341 context.start_stage(RequestContextStage.TOTAL)
... ... @@ -402,8 +384,8 @@ class Searcher:
402 384 try:
403 385 parsed_query = self.query_parser.parse(
404 386 query,
405   - tenant_id=tenant_id,
406 387 generate_vector=enable_embedding,
  388 + tenant_id=tenant_id,
407 389 context=context,
408 390 target_languages=index_langs if enable_translation else [],
409 391 )
... ... @@ -493,8 +475,6 @@ class Searcher:
493 475 context.store_intermediate_result('es_query', es_query)
494 476 if in_rerank_window and rerank_prefetch_source is not None:
495 477 context.store_intermediate_result('es_query_rerank_prefetch_source', rerank_prefetch_source)
496   - context.store_intermediate_result('es_body_for_search', body_for_es)
497   -
498 478 # Serialize ES query to compute a compact size + stable digest for correlation
499 479 es_query_compact = json.dumps(es_query_for_fetch, ensure_ascii=False, separators=(",", ":"))
500 480 es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16]
... ... @@ -548,35 +528,29 @@ class Searcher:
548 528 # Store ES response in context
549 529 context.store_intermediate_result('es_response', es_response)
550 530 if debug:
551   - initial_hits = es_response.get('hits', {}).get('hits') or []
552   - initial_scores: List[float] = []
  531 + initial_hits = es_response.get("hits", {}).get("hits") or []
553 532 for rank, hit in enumerate(initial_hits, 1):
554 533 doc_id = hit.get("_id")
555 534 if doc_id is not None:
556   - initial_es_positions[str(doc_id)] = rank
557   - raw_score = hit.get("_score")
558   - try:
559   - if raw_score is not None:
560   - initial_scores.append(float(raw_score))
561   - except (TypeError, ValueError):
562   - pass
563   - raw_max_score = es_response.get('hits', {}).get('max_score')
  535 + initial_ranks_by_doc[str(doc_id)] = rank
  536 + raw_initial_max_score = es_response.get("hits", {}).get("max_score")
564 537 try:
565   - initial_es_max_score = float(raw_max_score) if raw_max_score is not None else None
  538 + es_score_normalization_factor = float(raw_initial_max_score) if raw_initial_max_score is not None else None
566 539 except (TypeError, ValueError):
567   - initial_es_max_score = None
568   - if initial_es_max_score is None and initial_scores:
569   - initial_es_max_score = max(initial_scores)
570   - initial_es_min_score = min(initial_scores) if initial_scores else None
  540 + es_score_normalization_factor = None
  541 + if es_score_normalization_factor is None and initial_hits:
  542 + first_score = initial_hits[0].get("_score")
  543 + try:
  544 + es_score_normalization_factor = float(first_score) if first_score is not None else None
  545 + except (TypeError, ValueError):
  546 + es_score_normalization_factor = None
571 547  
572 548 # Extract timing from ES response
573 549 es_took = es_response.get('took', 0)
574 550 context.logger.info(
575 551 f"ES搜索完成 | 耗时: {es_took}ms | "
576 552 f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | "
577   - f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f} | "
578   - f"detected_language={parsed_query.detected_language} | "
579   - f"translations={list((parsed_query.translations or {}).keys())}",
  553 + f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f}",
580 554 extra={'reqid': context.reqid, 'uid': context.uid}
581 555 )
582 556 except Exception as e:
... ... @@ -621,30 +595,31 @@ class Searcher:
621 595 rerank_doc_template=effective_doc_template,
622 596 top_n=(from_ + size),
623 597 debug=debug,
  598 + fusion=rc.fusion,
624 599 )
625 600  
626 601 if rerank_meta is not None:
627   - from config.services_config import get_rerank_service_url
628   - rerank_url = get_rerank_service_url()
629   - if debug and rerank_debug_info is not None:
630   - rerank_debug_info.update({
631   - "executed": True,
632   - "service_url": rerank_url,
  602 + if debug:
  603 + from dataclasses import asdict
  604 + from config.services_config import get_rerank_service_url
  605 + rerank_debug_info = {
  606 + "service_url": get_rerank_service_url(),
  607 + "query_template": effective_query_template,
  608 + "doc_template": effective_doc_template,
633 609 "query_text": str(effective_query_template).format_map({"query": rerank_query}),
634 610 "docs": len(es_response.get("hits", {}).get("hits") or []),
  611 + "top_n": from_ + size,
635 612 "meta": rerank_meta,
636   - })
  613 + "fusion": asdict(rc.fusion),
  614 + }
637 615 context.store_intermediate_result("rerank_scores", fused_debug)
638 616 context.logger.info(
639 617 f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | "
640   - f"top_n={from_ + size} | query_template={effective_query_template} | "
641   - f"doc_template={effective_doc_template} | meta={rerank_meta}",
  618 + f"top_n={from_ + size} | meta={rerank_meta}",
642 619 extra={'reqid': context.reqid, 'uid': context.uid}
643 620 )
644 621 except Exception as e:
645 622 context.add_warning(f"Rerank failed: {e}")
646   - if debug and rerank_debug_info is not None:
647   - rerank_debug_info["error"] = str(e)
648 623 context.logger.warning(
649 624 f"调用重排服务失败 | error: {e}",
650 625 extra={'reqid': context.reqid, 'uid': context.uid},
... ... @@ -707,13 +682,6 @@ class Searcher:
707 682 )
708 683 if fill_took:
709 684 es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
710   - if debug:
711   - page_fill_debug = {
712   - "requested_ids": page_ids,
713   - "filled": filled,
714   - "fill_took_ms": fill_took,
715   - "response_source_spec": response_source_spec,
716   - }
717 685 context.logger.info(
718 686 f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms",
719 687 extra={'reqid': context.reqid, 'uid': context.uid}
... ... @@ -781,8 +749,8 @@ class Searcher:
781 749 # Build per-result debug info (per SPU) when debug mode is enabled
782 750 per_result_debug = []
783 751 if debug and es_hits and formatted_results:
784   - final_positions_by_doc = {
785   - str(hit.get("_id")): (from_ + rank)
  752 + final_ranks_by_doc = {
  753 + str(hit.get("_id")): from_ + rank
786 754 for rank, hit in enumerate(es_hits, 1)
787 755 if hit.get("_id") is not None
788 756 }
... ... @@ -805,28 +773,11 @@ class Searcher:
805 773 es_score = 0.0
806 774 try:
807 775 normalized = (
808   - float(es_score) / float(initial_es_max_score)
809   - if initial_es_max_score
810   - else None
  776 + float(es_score) / float(es_score_normalization_factor)
  777 + if es_score_normalization_factor else None
811 778 )
812 779 except (TypeError, ValueError, ZeroDivisionError):
813 780 normalized = None
814   - try:
815   - es_score_norm = (
816   - (float(es_score) - float(initial_es_min_score))
817   - / (float(initial_es_max_score) - float(initial_es_min_score))
818   - if initial_es_min_score is not None
819   - and initial_es_max_score is not None
820   - and float(initial_es_max_score) > float(initial_es_min_score)
821   - else (
822   - 1.0
823   - if initial_es_min_score is not None
824   - and initial_es_max_score is not None
825   - else None
826   - )
827   - )
828   - except (TypeError, ValueError, ZeroDivisionError):
829   - es_score_norm = None
830 781  
831 782 title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None
832 783 brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
... ... @@ -836,11 +787,8 @@ class Searcher:
836 787 "spu_id": spu.spu_id,
837 788 "es_score": es_score,
838 789 "es_score_normalized": normalized,
839   - "es_score_norm": es_score_norm,
840   - "es_score_min": initial_es_min_score,
841   - "es_score_max": initial_es_max_score,
842   - "initial_rank": initial_es_positions.get(str(doc_id)) if doc_id is not None else None,
843   - "final_rank": final_positions_by_doc.get(str(doc_id)) if doc_id is not None else None,
  790 + "initial_rank": initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None,
  791 + "final_rank": final_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None,
844 792 "title_multilingual": title_multilingual,
845 793 "brief_multilingual": brief_multilingual,
846 794 "vendor_multilingual": vendor_multilingual,
... ... @@ -908,12 +856,6 @@ class Searcher:
908 856 # Collect debug information if requested
909 857 debug_info = None
910 858 if debug:
911   - query_vector_summary = None
912   - if parsed_query.query_vector is not None:
913   - query_vector_summary = {
914   - "dims": int(len(parsed_query.query_vector)),
915   - "preview": [round(float(v), 6) for v in parsed_query.query_vector[:8].tolist()],
916   - }
917 859 debug_info = {
918 860 "query_analysis": {
919 861 "original_query": context.query_analysis.original_query,
... ... @@ -923,7 +865,6 @@ class Searcher:
923 865 "index_languages": index_langs,
924 866 "translations": context.query_analysis.translations,
925 867 "has_vector": context.query_analysis.query_vector is not None,
926   - "query_vector_summary": query_vector_summary,
927 868 "query_tokens": getattr(parsed_query, "query_tokens", []),
928 869 "is_simple_query": context.query_analysis.is_simple_query,
929 870 "domain": context.query_analysis.domain,
... ... @@ -931,12 +872,6 @@ class Searcher:
931 872 },
932 873 "es_query": context.get_intermediate_result('es_query', {}),
933 874 "es_query_context": {
934   - "filters": filters,
935   - "range_filters": range_filters,
936   - "facets": [getattr(facet, "field", str(facet)) for facet in facets] if facets else [],
937   - "sort_by": sort_by,
938   - "sort_order": sort_order,
939   - "min_score": min_score,
940 875 "es_fetch_from": es_fetch_from,
941 876 "es_fetch_size": es_fetch_size,
942 877 "in_rerank_window": in_rerank_window,
... ... @@ -948,11 +883,9 @@ class Searcher:
948 883 "total_hits": total_value,
949 884 "max_score": max_score,
950 885 "shards": es_response.get('_shards', {}),
951   - "initial_es_max_score": initial_es_max_score,
952   - "initial_es_min_score": initial_es_min_score,
  886 + "es_score_normalization_factor": es_score_normalization_factor,
953 887 },
954 888 "rerank": rerank_debug_info,
955   - "page_fill": page_fill_debug,
956 889 "feature_flags": context.metadata.get('feature_flags', {}),
957 890 "stage_timings": {
958 891 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items()
... ... @@ -1126,76 +1059,3 @@ class Searcher:
1126 1059 logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True)
1127 1060 return None
1128 1061  
1129   - def _standardize_facets(
1130   - self,
1131   - es_aggregations: Dict[str, Any],
1132   - facet_configs: Optional[List[Union[str, Any]]],
1133   - current_filters: Optional[Dict[str, Any]]
1134   - ) -> Optional[List[FacetResult]]:
1135   - """
1136   - 将 ES 聚合结果转换为标准化的分面格式(返回 Pydantic 模型)。
1137   -
1138   - Args:
1139   - es_aggregations: ES 原始聚合结果
1140   - facet_configs: 分面配置列表(str 或 FacetConfig)
1141   - current_filters: 当前应用的过滤器
1142   -
1143   - Returns:
1144   - 标准化的分面结果列表(FacetResult 对象)
1145   - """
1146   - if not es_aggregations or not facet_configs:
1147   - return None
1148   -
1149   - standardized_facets: List[FacetResult] = []
1150   -
1151   - for config in facet_configs:
1152   - # 解析配置
1153   - if isinstance(config, str):
1154   - field = config
1155   - facet_type = "terms"
1156   - else:
1157   - # FacetConfig 对象
1158   - field = config.field
1159   - facet_type = config.type
1160   -
1161   - agg_name = f"{field}_facet"
1162   -
1163   - if agg_name not in es_aggregations:
1164   - continue
1165   -
1166   - agg_result = es_aggregations[agg_name]
1167   -
1168   - # 获取当前字段的选中值
1169   - selected_values = set()
1170   - if current_filters and field in current_filters:
1171   - filter_value = current_filters[field]
1172   - if isinstance(filter_value, list):
1173   - selected_values = set(filter_value)
1174   - else:
1175   - selected_values = {filter_value}
1176   -
1177   - # 转换 buckets 为 FacetValue 对象
1178   - facet_values: List[FacetValue] = []
1179   - if 'buckets' in agg_result:
1180   - for bucket in agg_result['buckets']:
1181   - value = bucket.get('key')
1182   - count = bucket.get('doc_count', 0)
1183   -
1184   - facet_values.append(FacetValue(
1185   - value=value,
1186   - label=str(value),
1187   - count=count,
1188   - selected=value in selected_values
1189   - ))
1190   -
1191   - # 构建 FacetResult 对象
1192   - facet_result = FacetResult(
1193   - field=field,
1194   - label=field,
1195   - type=facet_type,
1196   - values=facet_values
1197   - )
1198   -
1199   - standardized_facets.append(facet_result)
1200   -
1201   - return standardized_facets if standardized_facets else None
... ...
tests/test_rerank_client.py
1 1 from math import isclose
2 2  
  3 +from config.schema import RerankFusionConfig
3 4 from search.rerank_client import fuse_scores_and_resort
4 5  
5 6  
... ... @@ -88,3 +89,32 @@ def test_fuse_scores_and_resort_downweights_text_only_advantage():
88 89 fuse_scores_and_resort(hits, [0.72, 0.98])
89 90  
90 91 assert [hit["_id"] for hit in hits] == ["rerank-better", "lexical-heavy"]
  92 +
  93 +
  94 +def test_fuse_scores_and_resort_uses_configurable_fusion_params():
  95 + hits = [
  96 + {
  97 + "_id": "a",
  98 + "_score": 1.0,
  99 + "matched_queries": {"base_query": 2.0, "knn_query": 0.5},
  100 + },
  101 + {
  102 + "_id": "b",
  103 + "_score": 1.0,
  104 + "matched_queries": {"base_query": 3.0, "knn_query": 0.0},
  105 + },
  106 + ]
  107 + fusion = RerankFusionConfig(
  108 + rerank_bias=0.0,
  109 + rerank_exponent=1.0,
  110 + text_bias=0.0,
  111 + text_exponent=1.0,
  112 + knn_bias=0.0,
  113 + knn_exponent=1.0,
  114 + )
  115 + fuse_scores_and_resort(hits, [1.0, 1.0], fusion=fusion)
  116 + # b 的 knn 为 0 -> 融合为 0;a 为 1 * 2 * 0.5
  117 + assert [h["_id"] for h in hits] == ["a", "b"]
  118 + by_id = {h["_id"]: h for h in hits}
  119 + assert isclose(by_id["a"]["_fused_score"], 1.0, rel_tol=1e-9)
  120 + assert isclose(by_id["b"]["_fused_score"], 0.0, rel_tol=1e-9)
... ...
tests/test_search_rerank_window.py
... ... @@ -614,7 +614,7 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
614 614 assert result.results[0].image_url == "https://img/blue.jpg"
615 615  
616 616  
617   -def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch):
  617 +def test_searcher_debug_info_uses_initial_es_max_score_for_normalization(monkeypatch):
618 618 es_client = _FakeESClient(total_hits=3)
619 619 searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
620 620 context = create_request_context(reqid="dbg", uid="u-dbg")
... ... @@ -635,10 +635,10 @@ def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch):
635 635 )
636 636  
637 637 assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"]
  638 + assert result.debug_info["query_analysis"]["query_tokens"] == []
638 639 assert result.debug_info["es_query_context"]["es_fetch_size"] == 2
639   - assert result.debug_info["es_response"]["initial_es_max_score"] == 3.0
640   - assert result.debug_info["es_response"]["initial_es_min_score"] == 2.0
  640 + assert result.debug_info["es_response"]["es_score_normalization_factor"] == 3.0
641 641 assert result.debug_info["per_result"][0]["initial_rank"] == 1
642 642 assert result.debug_info["per_result"][0]["final_rank"] == 1
643 643 assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0
644   - assert result.debug_info["per_result"][1]["es_score_norm"] == 0.0
  644 + assert result.debug_info["per_result"][1]["es_score_normalized"] == 2.0 / 3.0
... ...