Commit dbe04e9e3ed87e1879d2e9830ea17eabae1d251e

Authored by tangwang
1 parent 1e370dc9

统一排序漏斗协议,精简冗余字段与前端渲染逻辑

本次提交将 Coarse Rank 完整纳入 fusion 调试体系,并彻底清理了历史兼容层,
实现了 per-result 阶段信息的单一事实来源。

1. 统一后端阶段构造器 (search/searcher.py)
   - 新增 `_build_result_stage` 方法,为 es_recall / coarse_rank / fine_rank /
     rerank / final_page 提供统一的阶段信息构造入口。
   - 公共字段 (rank, rank_change, signals, ltr_features, fusion_summary,
     fusion_inputs, fusion_factors) 不再分散手写,由构造器集中生成。
   - 为 Coarse Rank 补齐了 fusion 调试字段,使其现在能与 Fine/Rerank 一样
     展示各因子与最终分数的乘法融合公式。

2. 移除 per-result 冗余顶层字段 (search/searcher.py)
   - 删除结果字典中与 ranking_funnel 重复的 `coarse_score`, `rerank_score`,
     `fusion_summary`, `rerank_input`, `ltr_features` 等顶层字段。
   - ranking_funnel 成为阶段相关调试信息的唯一事实来源,避免数据不一致风险。
   - LTR summary 改为直接从 funnel 中的 rerank -> fine_rank -> coarse_rank
     按序读取特征,不再依赖已删除的顶层兜底字段。

3. 抽取重复调试行索引逻辑 (search/searcher.py)
   - 新增 `_index_debug_rows_by_doc` 辅助方法,消除三段重复的
     "按 doc_id 建立 debug_rows 索引" 的代码块。

4. 前端统一阶段渲染与指标清理 (frontend/static/js/app.js)
   - 新增 `renderStageFusionDetails` 公共 helper,供 Coarse/Fine/Rerank
     共用公式详情展示逻辑。
   - 新增 `buildStageStatusMetrics` 统一阶段状态指标生成函数,全局漏斗中的
     阶段状态展示不再需要特判 coarse。
   - per-result 阶段卡片全面切换至使用 `stage.fusion_summary` 等统一字段,
     移除所有 `debug.xxx` 历史字段的 fallback 读取逻辑。
   - 为 Fine Rank 与 Final Rerank 补上先前缺失的 fusion 公式展示区域。
frontend/static/js/app.js
... ... @@ -499,6 +499,33 @@ function renderJsonDetails(title, payload, open = false) {
499 499 `;
500 500 }
501 501  
  502 +function renderStageFusionDetails({ summaryTitle, summary, factors, signals, summaryOpen = false, signalsTitle = 'Signals', signalsOpen = false }) {
  503 + return `${renderJsonDetails(summaryTitle, summary || factors, summaryOpen)}${renderJsonDetails(signalsTitle, signals, signalsOpen)}`;
  504 +}
  505 +
  506 +function firstDefined(...values) {
  507 + for (const value of values) {
  508 + if (value !== undefined && value !== null) {
  509 + return value;
  510 + }
  511 + }
  512 + return null;
  513 +}
  514 +
  515 +function buildStageStatusMetrics(stageInfo, extraMetrics = []) {
  516 + const metrics = [];
  517 + if (stageInfo && Object.prototype.hasOwnProperty.call(stageInfo, 'enabled')) {
  518 + metrics.push({ label: 'enabled', value: stageInfo.enabled ? 'yes' : 'no' });
  519 + }
  520 + if (stageInfo && Object.prototype.hasOwnProperty.call(stageInfo, 'applied')) {
  521 + metrics.push({ label: 'applied', value: stageInfo.applied ? 'yes' : 'no' });
  522 + }
  523 + if (stageInfo && stageInfo.skipped_reason) {
  524 + metrics.push({ label: 'skipped_reason', value: stageInfo.skipped_reason });
  525 + }
  526 + return metrics.concat(extraMetrics);
  527 +}
  528 +
502 529 /** Multilingual title/brief/vendor from per-result debug; shown under image/price/title on the left. */
503 530 function buildMultilingualFieldsHtml(debug) {
504 531 if (!debug || typeof debug !== 'object') {
... ... @@ -532,58 +559,86 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) {
532 559 ]);
533 560  
534 561 const stageScores = renderScorePills([
535   - { label: 'ES', value: formatDebugNumber(esStage.score ?? debug.es_score), tone: 'tone-es' },
536   - { label: 'ES Norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized), tone: 'tone-neutral' },
537   - { label: 'Coarse', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score), tone: 'tone-coarse' },
538   - { label: 'Fine', value: formatDebugNumber(fineStage.score ?? debug.fine_score), tone: 'tone-fine' },
539   - { label: 'Rerank', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score), tone: 'tone-rerank' },
540   - { label: 'Fused', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score), tone: 'tone-final' },
  562 + { label: 'ES', value: formatDebugNumber(esStage.score), tone: 'tone-es' },
  563 + { label: 'ES Norm', value: formatDebugNumber(firstDefined(esStage.normalized_score, debug.es_score_normalized)), tone: 'tone-neutral' },
  564 + { label: 'Coarse', value: formatDebugNumber(coarseStage.score), tone: 'tone-coarse' },
  565 + { label: 'Fine', value: formatDebugNumber(firstDefined(fineStage.score, fineStage.fine_score)), tone: 'tone-fine' },
  566 + { label: 'Rerank', value: formatDebugNumber(firstDefined(rerankStage.rerank_score, rerankStage.score)), tone: 'tone-rerank' },
  567 + { label: 'Fused', value: formatDebugNumber(firstDefined(rerankStage.fused_score, rerankStage.score, fineStage.score, coarseStage.score)), tone: 'tone-final' },
541 568 ]);
542 569  
543 570 const stageGrid = `
544 571 <div class="debug-stage-grid">
545 572 ${buildStageCard('ES Recall', 'Matched queries and ES raw score', [
546   - { label: 'rank', value: esStage.rank ?? debug.initial_rank ?? 'N/A' },
547   - { label: 'es_score', value: formatDebugNumber(esStage.score ?? debug.es_score) },
548   - { label: 'es_norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized) },
549   - ], renderJsonDetails('Matched Queries', esStage.matched_queries ?? debug.matched_queries, true))}
  573 + { label: 'rank', value: esStage.rank ?? 'N/A' },
  574 + { label: 'es_score', value: formatDebugNumber(esStage.score) },
  575 + { label: 'es_norm', value: formatDebugNumber(firstDefined(esStage.normalized_score, debug.es_score_normalized)) },
  576 + ], renderJsonDetails('Matched Queries', esStage.matched_queries, true))}
550 577 ${buildStageCard('Coarse Rank', 'Text + vector fusion', [
551 578 { label: 'rank', value: coarseStage.rank ?? 'N/A' },
552 579 { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' },
553   - { label: 'coarse_score', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score) },
554   - { label: 'text_score', value: formatDebugNumber(coarseStage.text_score ?? debug.text_score) },
555   - { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score ?? debug.text_source_score) },
556   - { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score ?? debug.text_translation_score) },
557   - { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score ?? debug.text_primary_score) },
558   - { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score ?? debug.text_support_score) },
559   - { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score ?? debug.knn_score) },
560   - { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score ?? debug.text_knn_score) },
561   - { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score ?? debug.image_knn_score) },
562   - { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor ?? debug.coarse_text_factor) },
563   - { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor ?? debug.coarse_knn_factor) },
564   - ], renderJsonDetails('Coarse Signals', coarseStage.signals, true))}
  580 + { label: 'coarse_score', value: formatDebugNumber(coarseStage.score) },
  581 + { label: 'es_score', value: formatDebugNumber(coarseStage.es_score) },
  582 + { label: 'text_score', value: formatDebugNumber(coarseStage.text_score) },
  583 + { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score) },
  584 + { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score) },
  585 + { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score) },
  586 + { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score) },
  587 + { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score) },
  588 + { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score) },
  589 + { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score) },
  590 + { label: 'es_factor', value: formatDebugNumber(coarseStage.es_factor) },
  591 + { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor) },
  592 + { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor) },
  593 + { label: 'text_knn_factor', value: formatDebugNumber(coarseStage.text_knn_factor) },
  594 + { label: 'image_knn_factor', value: formatDebugNumber(coarseStage.image_knn_factor) },
  595 + ], renderStageFusionDetails({
  596 + summaryTitle: 'Coarse Fusion',
  597 + summary: coarseStage.fusion_summary,
  598 + factors: coarseStage.fusion_factors,
  599 + signals: coarseStage.signals,
  600 + summaryOpen: true,
  601 + signalsTitle: 'Coarse Signals',
  602 + signalsOpen: false,
  603 + }))}
565 604 ${buildStageCard('Fine Rank', 'Lightweight reranker output', [
566 605 { label: 'rank', value: fineStage.rank ?? 'N/A' },
567 606 { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' },
568   - { label: 'stage_score', value: formatDebugNumber(fineStage.score ?? debug.score) },
569   - { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score ?? debug.fine_score) },
570   - { label: 'text_score', value: formatDebugNumber(fineStage.text_score ?? debug.text_score) },
571   - { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score ?? debug.knn_score) },
572   - ], `${renderJsonDetails('Fine Fusion', fineStage.fusion_summary || debug.fusion_summary || fineStage.fusion_factors, true)}${renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, true)}`)}
  607 + { label: 'stage_score', value: formatDebugNumber(firstDefined(fineStage.score, fineStage.fine_score)) },
  608 + { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score) },
  609 + { label: 'text_score', value: formatDebugNumber(fineStage.text_score) },
  610 + { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score) },
  611 + ], `${renderStageFusionDetails({
  612 + summaryTitle: 'Fine Fusion',
  613 + summary: fineStage.fusion_summary,
  614 + factors: fineStage.fusion_factors,
  615 + signals: fineStage.signals,
  616 + summaryOpen: true,
  617 + signalsTitle: 'Fine Signals',
  618 + signalsOpen: false,
  619 + })}${renderJsonDetails('Fine Input', fineStage.rerank_input, true)}`)}
573 620 ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
574   - { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' },
  621 + { label: 'rank', value: firstDefined(rerankStage.rank, finalPageStage.rank, debug.final_rank) ?? 'N/A' },
575 622 { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' },
576   - { label: 'stage_score', value: formatDebugNumber(rerankStage.score ?? rerankStage.fused_score ?? debug.score) },
577   - { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) },
578   - { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score ?? debug.fine_score) },
579   - { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) },
580   - { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) },
581   - { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) },
582   - { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) },
583   - { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) },
584   - { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) },
585   - { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) },
586   - ], `${renderJsonDetails('Final Fusion', rerankStage.fusion_summary || debug.fusion_summary || rerankStage.fusion_factors, false)}${renderJsonDetails('Rerank Signals', rerankStage.signals, false)}`)}
  623 + { label: 'stage_score', value: formatDebugNumber(firstDefined(rerankStage.score, rerankStage.fused_score)) },
  624 + { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score) },
  625 + { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score) },
  626 + { label: 'text_score', value: formatDebugNumber(rerankStage.text_score) },
  627 + { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score) },
  628 + { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor) },
  629 + { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor) },
  630 + { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor) },
  631 + { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor) },
  632 + { label: 'fused_score', value: formatDebugNumber(firstDefined(rerankStage.fused_score, rerankStage.score)) },
  633 + ], renderStageFusionDetails({
  634 + summaryTitle: 'Final Fusion',
  635 + summary: rerankStage.fusion_summary,
  636 + factors: rerankStage.fusion_factors,
  637 + signals: rerankStage.signals,
  638 + summaryOpen: false,
  639 + signalsTitle: 'Rerank Signals',
  640 + signalsOpen: false,
  641 + }))}
587 642 </div>
588 643 `;
589 644  
... ... @@ -1099,11 +1154,14 @@ function buildGlobalFunnelHtml(data, debugInfo) {
1099 1154 { label: 'include_named_queries_score', value: esQueryContext.include_named_queries_score ? 'yes' : 'no' },
1100 1155 ])}
1101 1156 ${buildStageCard('Coarse Rank', 'Lexical + vector fusion only', [
1102   - { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' },
1103   - { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' },
1104   - { label: 'formula', value: 'text x knn' },
  1157 + ...buildStageStatusMetrics(coarseInfo, [
  1158 + { label: 'backend', value: coarseInfo.backend || 'N/A' },
  1159 + { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' },
  1160 + { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' },
  1161 + { label: 'top_n', value: coarseInfo.top_n ?? 'N/A' },
  1162 + ]),
1105 1163 ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')}
1106   - ${buildStageCard('Fine Rank', 'Lightweight reranker', [
  1164 + ${buildStageCard('Fine Rank', 'Lightweight reranker', buildStageStatusMetrics(fineInfo, [
1107 1165 { label: 'service_url', value: fineInfo.service_url || 'N/A' },
1108 1166 { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' },
1109 1167 { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' },
... ... @@ -1111,8 +1169,8 @@ function buildGlobalFunnelHtml(data, debugInfo) {
1111 1169 { label: 'backend', value: fineInfo.backend || 'N/A' },
1112 1170 { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' },
1113 1171 { label: 'query_template', value: fineInfo.query_template || 'N/A' },
1114   - ], fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')}
1115   - ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [
  1172 + ]), fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')}
  1173 + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', buildStageStatusMetrics(rerankInfo, [
1116 1174 { label: 'service_url', value: rerankInfo.service_url || 'N/A' },
1117 1175 { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' },
1118 1176 { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' },
... ... @@ -1120,7 +1178,7 @@ function buildGlobalFunnelHtml(data, debugInfo) {
1120 1178 { label: 'backend', value: rerankInfo.backend || 'N/A' },
1121 1179 { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' },
1122 1180 { label: 'query_template', value: rerankInfo.query_template || 'N/A' },
1123   - ], `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)}
  1181 + ]), `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)}
1124 1182 ${buildStageCard('Page Return', 'Final slice returned to UI', [
1125 1183 { label: 'from', value: searchParams.from_ ?? 0 },
1126 1184 { label: 'size', value: searchParams.size ?? 'N/A' },
... ...
search/rerank_client.py
... ... @@ -438,7 +438,7 @@ def _compute_multiplicative_fusion(
438 438 es_score: float,
439 439 text_score: float,
440 440 knn_score: float,
441   - fusion: RerankFusionConfig,
  441 + fusion: CoarseRankFusionConfig | RerankFusionConfig,
442 442 knn_components: Optional[Dict[str, Any]] = None,
443 443 rerank_score: Optional[float] = None,
444 444 fine_score: Optional[float] = None,
... ... @@ -461,8 +461,9 @@ def _compute_multiplicative_fusion(
461 461 )
462 462  
463 463 _add_term("es_score", es_score, fusion.es_bias, fusion.es_exponent)
464   - _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent)
465   - _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent)
  464 + if isinstance(fusion, RerankFusionConfig):
  465 + _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent)
  466 + _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent)
466 467 _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent)
467 468 _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent)
468 469 _maybe_append_weighted_knn_terms(term_rows=term_rows, fusion=fusion, knn_components=knn_components)
... ... @@ -485,36 +486,6 @@ def _compute_multiplicative_fusion(
485 486 }
486 487  
487 488  
488   -def _multiply_coarse_fusion_factors(
489   - es_score: float,
490   - text_score: float,
491   - knn_score: float,
492   - knn_components: Dict[str, Any],
493   - fusion: CoarseRankFusionConfig,
494   -) -> Tuple[float, float, float, float, float, float]:
495   - es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent
496   - text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent
497   - knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent
498   - text_knn_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias))
499   - image_knn_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias))
500   - text_knn_factor = (
501   - (max(_to_score(knn_components.get("weighted_text_knn_score")), 0.0) + text_knn_bias)
502   - ** float(getattr(fusion, "knn_text_exponent", 0.0))
503   - )
504   - image_knn_factor = (
505   - (max(_to_score(knn_components.get("weighted_image_knn_score")), 0.0) + image_knn_bias)
506   - ** float(getattr(fusion, "knn_image_exponent", 0.0))
507   - )
508   - return (
509   - es_factor,
510   - text_factor,
511   - knn_factor,
512   - text_knn_factor,
513   - image_knn_factor,
514   - es_factor * text_factor * knn_factor * text_knn_factor * image_knn_factor,
515   - )
516   -
517   -
518 489 def _has_selected_sku(hit: Dict[str, Any]) -> bool:
519 490 return bool(str(hit.get("_style_rerank_suffix") or "").strip())
520 491  
... ... @@ -538,20 +509,14 @@ def coarse_resort_hits(
538 509 knn_components = signal_bundle["knn_components"]
539 510 text_score = signal_bundle["text_score"]
540 511 knn_score = signal_bundle["knn_score"]
541   - (
542   - es_factor,
543   - text_factor,
544   - knn_factor,
545   - text_knn_factor,
546   - image_knn_factor,
547   - coarse_score,
548   - ) = _multiply_coarse_fusion_factors(
  512 + fusion_result = _compute_multiplicative_fusion(
549 513 es_score=es_score,
550 514 text_score=text_score,
551 515 knn_score=knn_score,
552   - knn_components=knn_components,
553 516 fusion=f,
  517 + knn_components=knn_components,
554 518 )
  519 + coarse_score = fusion_result["score"]
555 520  
556 521 hit["_text_score"] = text_score
557 522 hit["_knn_score"] = knn_score
... ... @@ -597,11 +562,14 @@ def coarse_resort_hits(
597 562 "knn_primary_score": knn_components["primary_knn_score"],
598 563 "knn_support_score": knn_components["support_knn_score"],
599 564 "knn_score": knn_score,
600   - "coarse_es_factor": es_factor,
601   - "coarse_text_factor": text_factor,
602   - "coarse_knn_factor": knn_factor,
603   - "coarse_text_knn_factor": text_knn_factor,
604   - "coarse_image_knn_factor": image_knn_factor,
  565 + "fusion_inputs": fusion_result["inputs"],
  566 + "fusion_factors": fusion_result["factors"],
  567 + "fusion_summary": fusion_result["summary"],
  568 + "coarse_es_factor": fusion_result["factors"].get("es_score"),
  569 + "coarse_text_factor": fusion_result["factors"].get("text_score"),
  570 + "coarse_knn_factor": fusion_result["factors"].get("knn_score"),
  571 + "coarse_text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"),
  572 + "coarse_image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"),
605 573 "coarse_score": coarse_score,
606 574 "matched_queries": matched_queries,
607 575 "ltr_features": ltr_features,
... ...
search/searcher.py
... ... @@ -35,19 +35,31 @@ def _log_backend_verbose(payload: Dict[str, Any]) -&gt; None:
35 35 )
36 36  
37 37  
  38 +def _index_debug_rows_by_doc(rows: Any) -> Dict[str, Dict[str, Any]]:
  39 + indexed: Dict[str, Dict[str, Any]] = {}
  40 + if not isinstance(rows, list):
  41 + return indexed
  42 + for item in rows:
  43 + if not isinstance(item, dict):
  44 + continue
  45 + doc_id = item.get("doc_id")
  46 + if doc_id is None:
  47 + continue
  48 + indexed[str(doc_id)] = item
  49 + return indexed
  50 +
  51 +
38 52 def _summarize_ltr_features(per_result_debug: List[Dict[str, Any]], top_n: int = 20) -> Dict[str, Any]:
39 53 rows = list(per_result_debug[:top_n])
40 54 if not rows:
41 55 return {"top_n": 0, "counts": {}, "averages": {}, "top_docs": []}
42 56  
43 57 def _feature(row: Dict[str, Any], key: str) -> Any:
44   - features = row.get("ltr_features")
45   - if isinstance(features, dict):
46   - return features.get(key)
47   - rerank_stage = row.get("ranking_funnel", {}).get("rerank", {})
48   - stage_features = rerank_stage.get("ltr_features")
49   - if isinstance(stage_features, dict):
50   - return stage_features.get(key)
  58 + funnel = row.get("ranking_funnel", {})
  59 + for stage_name in ("rerank", "fine_rank", "coarse_rank"):
  60 + stage_features = funnel.get(stage_name, {}).get("ltr_features")
  61 + if isinstance(stage_features, dict) and key in stage_features:
  62 + return stage_features.get(key)
51 63 return None
52 64  
53 65 def _count(flag: str) -> int:
... ... @@ -801,8 +813,8 @@ class Searcher:
801 813 applied: bool,
802 814 skipped_reason: Optional[str],
803 815 service_profile: Optional[str],
804   - query_template: str,
805   - doc_template: str,
  816 + query_template: Optional[str],
  817 + doc_template: Optional[str],
806 818 docs_in: int,
807 819 docs_out: int,
808 820 top_n: int,
... ... @@ -825,7 +837,11 @@ class Searcher:
825 837 "backend_model_name": backend_model_name,
826 838 "query_template": query_template,
827 839 "doc_template": doc_template,
828   - "query_text": str(query_template).format_map({"query": rerank_query}),
  840 + "query_text": (
  841 + str(query_template).format_map({"query": rerank_query})
  842 + if query_template is not None
  843 + else None
  844 + ),
829 845 "docs_in": docs_in,
830 846 "docs_out": docs_out,
831 847 "top_n": top_n,
... ... @@ -833,6 +849,36 @@ class Searcher:
833 849 "fusion": fusion,
834 850 }
835 851  
  852 + def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]:
  853 + if previous_rank is None or current_rank is None:
  854 + return None
  855 + return previous_rank - current_rank
  856 +
  857 + def _build_result_stage(
  858 + *,
  859 + rank: Optional[int],
  860 + previous_rank: Optional[int],
  861 + values: Optional[Dict[str, Any]] = None,
  862 + signals: Optional[Dict[str, Any]] = None,
  863 + signal_fields: Optional[Dict[str, str]] = None,
  864 + ) -> Dict[str, Any]:
  865 + stage_payload: Dict[str, Any] = {
  866 + "rank": rank,
  867 + "rank_change": _rank_change(previous_rank, rank),
  868 + }
  869 + if values:
  870 + stage_payload.update(values)
  871 + if signals:
  872 + stage_payload["signals"] = signals
  873 + stage_payload["ltr_features"] = signals.get("ltr_features")
  874 + for shared_key in ("fusion_summary", "fusion_inputs", "fusion_factors"):
  875 + if stage_payload.get(shared_key) is None:
  876 + stage_payload[shared_key] = signals.get(shared_key)
  877 + for payload_key, signal_key in (signal_fields or {}).items():
  878 + if stage_payload.get(payload_key) is None:
  879 + stage_payload[payload_key] = signals.get(signal_key)
  880 + return stage_payload
  881 +
836 882 def _run_optional_stage(
837 883 *,
838 884 stage: RequestContextStage,
... ... @@ -967,11 +1013,23 @@ class Searcher:
967 1013 es_response.setdefault("hits", {})["hits"] = hits
968 1014 if debug:
969 1015 coarse_ranks_by_doc = _rank_map(hits)
970   - coarse_debug_info = {
971   - "docs_in": es_fetch_size,
972   - "docs_out": len(hits),
973   - "fusion": coarse_fusion_debug,
974   - }
  1016 + coarse_debug_info = _stage_debug_info(
  1017 + enabled=True,
  1018 + applied=True,
  1019 + skipped_reason=None,
  1020 + service_profile=None,
  1021 + service_url=None,
  1022 + backend="local_coarse_fusion",
  1023 + backend_model_name=None,
  1024 + model=None,
  1025 + query_template=None,
  1026 + doc_template=None,
  1027 + docs_in=es_fetch_size,
  1028 + docs_out=len(hits),
  1029 + top_n=coarse_output_window,
  1030 + meta=None,
  1031 + fusion=coarse_fusion_debug,
  1032 + )
975 1033 context.store_intermediate_result("coarse_rank_scores", coarse_debug)
976 1034 context.logger.info(
977 1035 "粗排完成 | docs_in=%s | docs_out=%s",
... ... @@ -1189,36 +1247,9 @@ class Searcher:
1189 1247 max_score = es_response.get('hits', {}).get('max_score') or 0.0
1190 1248  
1191 1249 # 从上下文中取出重排调试信息(若有)
1192   - rerank_debug_raw = context.get_intermediate_result('rerank_scores', None)
1193   - rerank_debug_by_doc: Dict[str, Dict[str, Any]] = {}
1194   - if isinstance(rerank_debug_raw, list):
1195   - for item in rerank_debug_raw:
1196   - if not isinstance(item, dict):
1197   - continue
1198   - doc_id = item.get("doc_id")
1199   - if doc_id is None:
1200   - continue
1201   - rerank_debug_by_doc[str(doc_id)] = item
1202   - coarse_debug_raw = context.get_intermediate_result('coarse_rank_scores', None)
1203   - coarse_debug_by_doc: Dict[str, Dict[str, Any]] = {}
1204   - if isinstance(coarse_debug_raw, list):
1205   - for item in coarse_debug_raw:
1206   - if not isinstance(item, dict):
1207   - continue
1208   - doc_id = item.get("doc_id")
1209   - if doc_id is None:
1210   - continue
1211   - coarse_debug_by_doc[str(doc_id)] = item
1212   - fine_debug_raw = context.get_intermediate_result('fine_rank_scores', None)
1213   - fine_debug_by_doc: Dict[str, Dict[str, Any]] = {}
1214   - if isinstance(fine_debug_raw, list):
1215   - for item in fine_debug_raw:
1216   - if not isinstance(item, dict):
1217   - continue
1218   - doc_id = item.get("doc_id")
1219   - if doc_id is None:
1220   - continue
1221   - fine_debug_by_doc[str(doc_id)] = item
  1250 + rerank_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('rerank_scores', None))
  1251 + coarse_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('coarse_rank_scores', None))
  1252 + fine_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('fine_rank_scores', None))
1222 1253  
1223 1254 if self._has_style_intent(parsed_query):
1224 1255 if style_intent_decisions:
... ... @@ -1289,47 +1320,6 @@ class Searcher:
1289 1320 "vendor_multilingual": vendor_multilingual,
1290 1321 }
1291 1322  
1292   - if coarse_debug:
1293   - debug_entry["coarse_score"] = coarse_debug.get("coarse_score")
1294   - debug_entry["coarse_es_factor"] = coarse_debug.get("coarse_es_factor")
1295   - debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor")
1296   - debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor")
1297   -
1298   - # 若存在重排调试信息,则补充 doc 级别的融合分数信息
1299   - if rerank_debug:
1300   - debug_entry["doc_id"] = rerank_debug.get("doc_id")
1301   - debug_entry["score"] = rerank_debug.get("score")
1302   - debug_entry["rerank_score"] = rerank_debug.get("rerank_score")
1303   - debug_entry["fine_score"] = rerank_debug.get("fine_score")
1304   - debug_entry["es_score"] = rerank_debug.get("es_score", es_score)
1305   - debug_entry["text_score"] = rerank_debug.get("text_score")
1306   - debug_entry["knn_score"] = rerank_debug.get("knn_score")
1307   - debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs")
1308   - debug_entry["fusion_factors"] = rerank_debug.get("fusion_factors")
1309   - debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary")
1310   - debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
1311   - debug_entry["fine_factor"] = rerank_debug.get("fine_factor")
1312   - debug_entry["es_factor"] = rerank_debug.get("es_factor")
1313   - debug_entry["text_factor"] = rerank_debug.get("text_factor")
1314   - debug_entry["knn_factor"] = rerank_debug.get("knn_factor")
1315   - debug_entry["fused_score"] = rerank_debug.get("fused_score")
1316   - debug_entry["rerank_input"] = rerank_debug.get("rerank_input")
1317   - debug_entry["matched_queries"] = rerank_debug.get("matched_queries")
1318   - debug_entry["ltr_features"] = rerank_debug.get("ltr_features")
1319   - elif fine_debug:
1320   - debug_entry["doc_id"] = fine_debug.get("doc_id")
1321   - debug_entry["score"] = fine_debug.get("score")
1322   - debug_entry["fine_score"] = fine_debug.get("fine_score")
1323   - debug_entry["es_score"] = fine_debug.get("es_score", es_score)
1324   - debug_entry["text_score"] = fine_debug.get("text_score")
1325   - debug_entry["knn_score"] = fine_debug.get("knn_score")
1326   - debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs")
1327   - debug_entry["fusion_factors"] = fine_debug.get("fusion_factors")
1328   - debug_entry["fusion_summary"] = fine_debug.get("fusion_summary")
1329   - debug_entry["es_factor"] = fine_debug.get("es_factor")
1330   - debug_entry["rerank_input"] = fine_debug.get("rerank_input")
1331   - debug_entry["ltr_features"] = fine_debug.get("ltr_features")
1332   -
1333 1323 initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
1334 1324 coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
1335 1325 fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None
... ... @@ -1344,76 +1334,79 @@ class Searcher:
1344 1334 if final_previous_rank is None:
1345 1335 final_previous_rank = initial_rank
1346 1336  
1347   - def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]:
1348   - if previous_rank is None or current_rank is None:
1349   - return None
1350   - return previous_rank - current_rank
1351   -
1352 1337 debug_entry["ranking_funnel"] = {
1353   - "es_recall": {
1354   - "rank": initial_rank,
1355   - "score": es_score,
1356   - "normalized_score": normalized,
1357   - "matched_queries": hit.get("matched_queries"),
1358   - },
1359   - "coarse_rank": {
1360   - "rank": coarse_rank,
1361   - "rank_change": _rank_change(initial_rank, coarse_rank),
1362   - "score": coarse_debug.get("coarse_score") if coarse_debug else None,
1363   - "es_score": coarse_debug.get("es_score") if coarse_debug else es_score,
1364   - "text_score": coarse_debug.get("text_score") if coarse_debug else None,
1365   - "knn_score": coarse_debug.get("knn_score") if coarse_debug else None,
1366   - "es_factor": coarse_debug.get("coarse_es_factor") if coarse_debug else None,
1367   - "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None,
1368   - "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None,
1369   - "signals": coarse_debug,
1370   - "ltr_features": coarse_debug.get("ltr_features") if coarse_debug else None,
1371   - },
1372   - "fine_rank": {
1373   - "rank": fine_rank,
1374   - "rank_change": _rank_change(coarse_rank, fine_rank),
1375   - "score": (
1376   - fine_debug.get("score")
1377   - if fine_debug and fine_debug.get("score") is not None
1378   - else hit.get("_fine_fused_score", hit.get("_fine_score"))
1379   - ),
1380   - "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
1381   - "es_score": fine_debug.get("es_score") if fine_debug else es_score,
1382   - "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"),
1383   - "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"),
1384   - "es_factor": fine_debug.get("es_factor") if fine_debug else None,
1385   - "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None,
1386   - "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None,
1387   - "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None,
1388   - "rerank_input": fine_debug.get("rerank_input") if fine_debug else None,
1389   - "signals": fine_debug,
1390   - "ltr_features": fine_debug.get("ltr_features") if fine_debug else None,
1391   - },
1392   - "rerank": {
1393   - "rank": rerank_rank,
1394   - "rank_change": _rank_change(rerank_previous_rank, rerank_rank),
1395   - "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"),
1396   - "es_score": rerank_debug.get("es_score") if rerank_debug else es_score,
1397   - "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"),
1398   - "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"),
1399   - "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"),
1400   - "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"),
1401   - "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"),
1402   - "fusion_summary": rerank_debug.get("fusion_summary") if rerank_debug else None,
1403   - "fusion_inputs": rerank_debug.get("fusion_inputs") if rerank_debug else None,
1404   - "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None,
1405   - "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None,
1406   - "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None,
1407   - "es_factor": rerank_debug.get("es_factor") if rerank_debug else None,
1408   - "text_factor": rerank_debug.get("text_factor") if rerank_debug else None,
1409   - "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None,
1410   - "signals": rerank_debug,
1411   - "ltr_features": rerank_debug.get("ltr_features") if rerank_debug else None,
1412   - },
1413   - "final_page": {
1414   - "rank": final_rank,
1415   - "rank_change": _rank_change(final_previous_rank, final_rank),
1416   - },
  1338 + "es_recall": _build_result_stage(
  1339 + rank=initial_rank,
  1340 + previous_rank=None,
  1341 + values={
  1342 + "score": es_score,
  1343 + "normalized_score": normalized,
  1344 + "matched_queries": hit.get("matched_queries"),
  1345 + },
  1346 + ),
  1347 + "coarse_rank": _build_result_stage(
  1348 + rank=coarse_rank,
  1349 + previous_rank=initial_rank,
  1350 + values={
  1351 + "score": coarse_debug.get("coarse_score") if coarse_debug else None,
  1352 + "es_score": coarse_debug.get("es_score") if coarse_debug else es_score,
  1353 + "text_score": coarse_debug.get("text_score") if coarse_debug else None,
  1354 + "knn_score": coarse_debug.get("knn_score") if coarse_debug else None,
  1355 + },
  1356 + signals=coarse_debug,
  1357 + signal_fields={
  1358 + "es_factor": "coarse_es_factor",
  1359 + "text_factor": "coarse_text_factor",
  1360 + "knn_factor": "coarse_knn_factor",
  1361 + "text_knn_factor": "coarse_text_knn_factor",
  1362 + "image_knn_factor": "coarse_image_knn_factor",
  1363 + },
  1364 + ),
  1365 + "fine_rank": _build_result_stage(
  1366 + rank=fine_rank,
  1367 + previous_rank=coarse_rank,
  1368 + values={
  1369 + "score": (
  1370 + fine_debug.get("score")
  1371 + if fine_debug and fine_debug.get("score") is not None
  1372 + else hit.get("_fine_fused_score", hit.get("_fine_score"))
  1373 + ),
  1374 + "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"),
  1375 + "es_score": fine_debug.get("es_score") if fine_debug else es_score,
  1376 + "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"),
  1377 + "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"),
  1378 + "rerank_input": fine_debug.get("rerank_input") if fine_debug else None,
  1379 + },
  1380 + signals=fine_debug,
  1381 + signal_fields={
  1382 + "es_factor": "es_factor",
  1383 + },
  1384 + ),
  1385 + "rerank": _build_result_stage(
  1386 + rank=rerank_rank,
  1387 + previous_rank=rerank_previous_rank,
  1388 + values={
  1389 + "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"),
  1390 + "es_score": rerank_debug.get("es_score") if rerank_debug else es_score,
  1391 + "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"),
  1392 + "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"),
  1393 + "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"),
  1394 + "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"),
  1395 + "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"),
  1396 + },
  1397 + signals=rerank_debug,
  1398 + signal_fields={
  1399 + "rerank_factor": "rerank_factor",
  1400 + "fine_factor": "fine_factor",
  1401 + "es_factor": "es_factor",
  1402 + "text_factor": "text_factor",
  1403 + "knn_factor": "knn_factor",
  1404 + },
  1405 + ),
  1406 + "final_page": _build_result_stage(
  1407 + rank=final_rank,
  1408 + previous_rank=final_previous_rank,
  1409 + ),
1417 1410 }
1418 1411  
1419 1412 if style_intent_debug:
... ...
tests/test_rerank_client.py
... ... @@ -279,7 +279,9 @@ def test_fuse_scores_and_resort_can_add_weighted_text_and_image_knn_factors():
279 279 knn_tie_breaker=0.25,
280 280 knn_bias=0.1,
281 281 knn_exponent=1.0,
  282 + knn_text_bias=0.1,
282 283 knn_text_exponent=2.0,
  284 + knn_image_bias=0.1,
283 285 knn_image_exponent=3.0,
284 286 )
285 287  
... ... @@ -325,7 +327,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors():
325 327 knn_tie_breaker=0.25,
326 328 knn_bias=0.1,
327 329 knn_exponent=1.0,
  330 + knn_text_bias=0.1,
328 331 knn_text_exponent=2.0,
  332 + knn_image_bias=0.1,
329 333 knn_image_exponent=3.0,
330 334 )
331 335  
... ... @@ -345,6 +349,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors():
345 349 assert isclose(hits[0]["_coarse_score"], expected_coarse, rel_tol=1e-9)
346 350 assert isclose(debug[0]["coarse_text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9)
347 351 assert isclose(debug[0]["coarse_image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9)
  352 + assert debug[0]["fusion_inputs"]["es_score"] == 1.0
  353 + assert "weighted_text_knn_score=" in debug[0]["fusion_summary"]
  354 + assert "weighted_image_knn_score=" in debug[0]["fusion_summary"]
348 355  
349 356  
350 357 def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch):
... ...
tests/test_search_rerank_window.py
... ... @@ -520,9 +520,12 @@ def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch):
520 520  
521 521 assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80
522 522 assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20
  523 + assert result.debug_info["ranking_funnel"]["coarse_rank"]["applied"] is True
  524 + assert result.debug_info["ranking_funnel"]["coarse_rank"]["backend"] == "local_coarse_fusion"
523 525 first = result.debug_info["per_result"][0]["ranking_funnel"]
524 526 assert first["es_recall"]["rank"] is not None
525 527 assert first["coarse_rank"]["score"] is not None
  528 + assert first["coarse_rank"]["fusion_summary"] is not None
526 529 assert first["fine_rank"]["score"] is not None
527 530 assert first["rerank"]["rerank_score"] is not None
528 531  
... ...