Commit dbe04e9e3ed87e1879d2e9830ea17eabae1d251e
1 parent
1e370dc9
统一排序漏斗协议,精简冗余字段与前端渲染逻辑
本次提交将 Coarse Rank 完整纳入 fusion 调试体系,并彻底清理了历史兼容层,
实现了 per-result 阶段信息的单一事实来源。
1. 统一后端阶段构造器 (search/searcher.py)
- 新增 `_build_result_stage` 方法,为 es_recall / coarse_rank / fine_rank /
rerank / final_page 提供统一的阶段信息构造入口。
- 公共字段 (rank, rank_change, signals, ltr_features, fusion_summary,
fusion_inputs, fusion_factors) 不再分散手写,由构造器集中生成。
- 为 Coarse Rank 补齐了 fusion 调试字段,使其现在能与 Fine/Rerank 一样
展示各因子与最终分数的乘法融合公式。
2. 移除 per-result 冗余顶层字段 (search/searcher.py)
- 删除结果字典中与 ranking_funnel 重复的 `coarse_score`, `rerank_score`,
`fusion_summary`, `rerank_input`, `ltr_features` 等顶层字段。
- ranking_funnel 成为阶段相关调试信息的唯一事实来源,避免数据不一致风险。
- LTR summary 改为直接从 funnel 中的 rerank -> fine_rank -> coarse_rank
按序读取特征,不再依赖已删除的顶层兜底字段。
3. 抽取重复调试行索引逻辑 (search/searcher.py)
- 新增 `_index_debug_rows_by_doc` 辅助方法,消除三段重复的
"按 doc_id 建立 debug_rows 索引" 的代码块。
4. 前端统一阶段渲染与指标清理 (frontend/static/js/app.js)
- 新增 `renderStageFusionDetails` 公共 helper,供 Coarse/Fine/Rerank
共用公式详情展示逻辑。
- 新增 `buildStageStatusMetrics` 统一阶段状态指标生成函数,全局漏斗中的
阶段状态展示不再需要特判 coarse。
- per-result 阶段卡片全面切换至使用 `stage.fusion_summary` 等统一字段,
移除所有 `debug.xxx` 历史字段的 fallback 读取逻辑。
- 为 Fine Rank 与 Final Rerank 补上先前缺失的 fusion 公式展示区域。
Showing
5 changed files
with
277 additions
and
248 deletions
Show diff stats
frontend/static/js/app.js
| @@ -499,6 +499,33 @@ function renderJsonDetails(title, payload, open = false) { | @@ -499,6 +499,33 @@ function renderJsonDetails(title, payload, open = false) { | ||
| 499 | `; | 499 | `; |
| 500 | } | 500 | } |
| 501 | 501 | ||
| 502 | +function renderStageFusionDetails({ summaryTitle, summary, factors, signals, summaryOpen = false, signalsTitle = 'Signals', signalsOpen = false }) { | ||
| 503 | + return `${renderJsonDetails(summaryTitle, summary || factors, summaryOpen)}${renderJsonDetails(signalsTitle, signals, signalsOpen)}`; | ||
| 504 | +} | ||
| 505 | + | ||
| 506 | +function firstDefined(...values) { | ||
| 507 | + for (const value of values) { | ||
| 508 | + if (value !== undefined && value !== null) { | ||
| 509 | + return value; | ||
| 510 | + } | ||
| 511 | + } | ||
| 512 | + return null; | ||
| 513 | +} | ||
| 514 | + | ||
| 515 | +function buildStageStatusMetrics(stageInfo, extraMetrics = []) { | ||
| 516 | + const metrics = []; | ||
| 517 | + if (stageInfo && Object.prototype.hasOwnProperty.call(stageInfo, 'enabled')) { | ||
| 518 | + metrics.push({ label: 'enabled', value: stageInfo.enabled ? 'yes' : 'no' }); | ||
| 519 | + } | ||
| 520 | + if (stageInfo && Object.prototype.hasOwnProperty.call(stageInfo, 'applied')) { | ||
| 521 | + metrics.push({ label: 'applied', value: stageInfo.applied ? 'yes' : 'no' }); | ||
| 522 | + } | ||
| 523 | + if (stageInfo && stageInfo.skipped_reason) { | ||
| 524 | + metrics.push({ label: 'skipped_reason', value: stageInfo.skipped_reason }); | ||
| 525 | + } | ||
| 526 | + return metrics.concat(extraMetrics); | ||
| 527 | +} | ||
| 528 | + | ||
| 502 | /** Multilingual title/brief/vendor from per-result debug; shown under image/price/title on the left. */ | 529 | /** Multilingual title/brief/vendor from per-result debug; shown under image/price/title on the left. */ |
| 503 | function buildMultilingualFieldsHtml(debug) { | 530 | function buildMultilingualFieldsHtml(debug) { |
| 504 | if (!debug || typeof debug !== 'object') { | 531 | if (!debug || typeof debug !== 'object') { |
| @@ -532,58 +559,86 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) { | @@ -532,58 +559,86 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) { | ||
| 532 | ]); | 559 | ]); |
| 533 | 560 | ||
| 534 | const stageScores = renderScorePills([ | 561 | const stageScores = renderScorePills([ |
| 535 | - { label: 'ES', value: formatDebugNumber(esStage.score ?? debug.es_score), tone: 'tone-es' }, | ||
| 536 | - { label: 'ES Norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized), tone: 'tone-neutral' }, | ||
| 537 | - { label: 'Coarse', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score), tone: 'tone-coarse' }, | ||
| 538 | - { label: 'Fine', value: formatDebugNumber(fineStage.score ?? debug.fine_score), tone: 'tone-fine' }, | ||
| 539 | - { label: 'Rerank', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score), tone: 'tone-rerank' }, | ||
| 540 | - { label: 'Fused', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score), tone: 'tone-final' }, | 562 | + { label: 'ES', value: formatDebugNumber(esStage.score), tone: 'tone-es' }, |
| 563 | + { label: 'ES Norm', value: formatDebugNumber(firstDefined(esStage.normalized_score, debug.es_score_normalized)), tone: 'tone-neutral' }, | ||
| 564 | + { label: 'Coarse', value: formatDebugNumber(coarseStage.score), tone: 'tone-coarse' }, | ||
| 565 | + { label: 'Fine', value: formatDebugNumber(firstDefined(fineStage.score, fineStage.fine_score)), tone: 'tone-fine' }, | ||
| 566 | + { label: 'Rerank', value: formatDebugNumber(firstDefined(rerankStage.rerank_score, rerankStage.score)), tone: 'tone-rerank' }, | ||
| 567 | + { label: 'Fused', value: formatDebugNumber(firstDefined(rerankStage.fused_score, rerankStage.score, fineStage.score, coarseStage.score)), tone: 'tone-final' }, | ||
| 541 | ]); | 568 | ]); |
| 542 | 569 | ||
| 543 | const stageGrid = ` | 570 | const stageGrid = ` |
| 544 | <div class="debug-stage-grid"> | 571 | <div class="debug-stage-grid"> |
| 545 | ${buildStageCard('ES Recall', 'Matched queries and ES raw score', [ | 572 | ${buildStageCard('ES Recall', 'Matched queries and ES raw score', [ |
| 546 | - { label: 'rank', value: esStage.rank ?? debug.initial_rank ?? 'N/A' }, | ||
| 547 | - { label: 'es_score', value: formatDebugNumber(esStage.score ?? debug.es_score) }, | ||
| 548 | - { label: 'es_norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized) }, | ||
| 549 | - ], renderJsonDetails('Matched Queries', esStage.matched_queries ?? debug.matched_queries, true))} | 573 | + { label: 'rank', value: esStage.rank ?? 'N/A' }, |
| 574 | + { label: 'es_score', value: formatDebugNumber(esStage.score) }, | ||
| 575 | + { label: 'es_norm', value: formatDebugNumber(firstDefined(esStage.normalized_score, debug.es_score_normalized)) }, | ||
| 576 | + ], renderJsonDetails('Matched Queries', esStage.matched_queries, true))} | ||
| 550 | ${buildStageCard('Coarse Rank', 'Text + vector fusion', [ | 577 | ${buildStageCard('Coarse Rank', 'Text + vector fusion', [ |
| 551 | { label: 'rank', value: coarseStage.rank ?? 'N/A' }, | 578 | { label: 'rank', value: coarseStage.rank ?? 'N/A' }, |
| 552 | { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' }, | 579 | { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' }, |
| 553 | - { label: 'coarse_score', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score) }, | ||
| 554 | - { label: 'text_score', value: formatDebugNumber(coarseStage.text_score ?? debug.text_score) }, | ||
| 555 | - { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score ?? debug.text_source_score) }, | ||
| 556 | - { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score ?? debug.text_translation_score) }, | ||
| 557 | - { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score ?? debug.text_primary_score) }, | ||
| 558 | - { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score ?? debug.text_support_score) }, | ||
| 559 | - { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score ?? debug.knn_score) }, | ||
| 560 | - { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score ?? debug.text_knn_score) }, | ||
| 561 | - { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score ?? debug.image_knn_score) }, | ||
| 562 | - { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor ?? debug.coarse_text_factor) }, | ||
| 563 | - { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor ?? debug.coarse_knn_factor) }, | ||
| 564 | - ], renderJsonDetails('Coarse Signals', coarseStage.signals, true))} | 580 | + { label: 'coarse_score', value: formatDebugNumber(coarseStage.score) }, |
| 581 | + { label: 'es_score', value: formatDebugNumber(coarseStage.es_score) }, | ||
| 582 | + { label: 'text_score', value: formatDebugNumber(coarseStage.text_score) }, | ||
| 583 | + { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score) }, | ||
| 584 | + { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score) }, | ||
| 585 | + { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score) }, | ||
| 586 | + { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score) }, | ||
| 587 | + { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score) }, | ||
| 588 | + { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score) }, | ||
| 589 | + { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score) }, | ||
| 590 | + { label: 'es_factor', value: formatDebugNumber(coarseStage.es_factor) }, | ||
| 591 | + { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor) }, | ||
| 592 | + { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor) }, | ||
| 593 | + { label: 'text_knn_factor', value: formatDebugNumber(coarseStage.text_knn_factor) }, | ||
| 594 | + { label: 'image_knn_factor', value: formatDebugNumber(coarseStage.image_knn_factor) }, | ||
| 595 | + ], renderStageFusionDetails({ | ||
| 596 | + summaryTitle: 'Coarse Fusion', | ||
| 597 | + summary: coarseStage.fusion_summary, | ||
| 598 | + factors: coarseStage.fusion_factors, | ||
| 599 | + signals: coarseStage.signals, | ||
| 600 | + summaryOpen: true, | ||
| 601 | + signalsTitle: 'Coarse Signals', | ||
| 602 | + signalsOpen: false, | ||
| 603 | + }))} | ||
| 565 | ${buildStageCard('Fine Rank', 'Lightweight reranker output', [ | 604 | ${buildStageCard('Fine Rank', 'Lightweight reranker output', [ |
| 566 | { label: 'rank', value: fineStage.rank ?? 'N/A' }, | 605 | { label: 'rank', value: fineStage.rank ?? 'N/A' }, |
| 567 | { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' }, | 606 | { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' }, |
| 568 | - { label: 'stage_score', value: formatDebugNumber(fineStage.score ?? debug.score) }, | ||
| 569 | - { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score ?? debug.fine_score) }, | ||
| 570 | - { label: 'text_score', value: formatDebugNumber(fineStage.text_score ?? debug.text_score) }, | ||
| 571 | - { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score ?? debug.knn_score) }, | ||
| 572 | - ], `${renderJsonDetails('Fine Fusion', fineStage.fusion_summary || debug.fusion_summary || fineStage.fusion_factors, true)}${renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, true)}`)} | 607 | + { label: 'stage_score', value: formatDebugNumber(firstDefined(fineStage.score, fineStage.fine_score)) }, |
| 608 | + { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score) }, | ||
| 609 | + { label: 'text_score', value: formatDebugNumber(fineStage.text_score) }, | ||
| 610 | + { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score) }, | ||
| 611 | + ], `${renderStageFusionDetails({ | ||
| 612 | + summaryTitle: 'Fine Fusion', | ||
| 613 | + summary: fineStage.fusion_summary, | ||
| 614 | + factors: fineStage.fusion_factors, | ||
| 615 | + signals: fineStage.signals, | ||
| 616 | + summaryOpen: true, | ||
| 617 | + signalsTitle: 'Fine Signals', | ||
| 618 | + signalsOpen: false, | ||
| 619 | + })}${renderJsonDetails('Fine Input', fineStage.rerank_input, true)}`)} | ||
| 573 | ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ | 620 | ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ |
| 574 | - { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' }, | 621 | + { label: 'rank', value: firstDefined(rerankStage.rank, finalPageStage.rank, debug.final_rank) ?? 'N/A' }, |
| 575 | { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' }, | 622 | { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' }, |
| 576 | - { label: 'stage_score', value: formatDebugNumber(rerankStage.score ?? rerankStage.fused_score ?? debug.score) }, | ||
| 577 | - { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) }, | ||
| 578 | - { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score ?? debug.fine_score) }, | ||
| 579 | - { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) }, | ||
| 580 | - { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) }, | ||
| 581 | - { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) }, | ||
| 582 | - { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) }, | ||
| 583 | - { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) }, | ||
| 584 | - { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) }, | ||
| 585 | - { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) }, | ||
| 586 | - ], `${renderJsonDetails('Final Fusion', rerankStage.fusion_summary || debug.fusion_summary || rerankStage.fusion_factors, false)}${renderJsonDetails('Rerank Signals', rerankStage.signals, false)}`)} | 623 | + { label: 'stage_score', value: formatDebugNumber(firstDefined(rerankStage.score, rerankStage.fused_score)) }, |
| 624 | + { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score) }, | ||
| 625 | + { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score) }, | ||
| 626 | + { label: 'text_score', value: formatDebugNumber(rerankStage.text_score) }, | ||
| 627 | + { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score) }, | ||
| 628 | + { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor) }, | ||
| 629 | + { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor) }, | ||
| 630 | + { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor) }, | ||
| 631 | + { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor) }, | ||
| 632 | + { label: 'fused_score', value: formatDebugNumber(firstDefined(rerankStage.fused_score, rerankStage.score)) }, | ||
| 633 | + ], renderStageFusionDetails({ | ||
| 634 | + summaryTitle: 'Final Fusion', | ||
| 635 | + summary: rerankStage.fusion_summary, | ||
| 636 | + factors: rerankStage.fusion_factors, | ||
| 637 | + signals: rerankStage.signals, | ||
| 638 | + summaryOpen: false, | ||
| 639 | + signalsTitle: 'Rerank Signals', | ||
| 640 | + signalsOpen: false, | ||
| 641 | + }))} | ||
| 587 | </div> | 642 | </div> |
| 588 | `; | 643 | `; |
| 589 | 644 | ||
| @@ -1099,11 +1154,14 @@ function buildGlobalFunnelHtml(data, debugInfo) { | @@ -1099,11 +1154,14 @@ function buildGlobalFunnelHtml(data, debugInfo) { | ||
| 1099 | { label: 'include_named_queries_score', value: esQueryContext.include_named_queries_score ? 'yes' : 'no' }, | 1154 | { label: 'include_named_queries_score', value: esQueryContext.include_named_queries_score ? 'yes' : 'no' }, |
| 1100 | ])} | 1155 | ])} |
| 1101 | ${buildStageCard('Coarse Rank', 'Lexical + vector fusion only', [ | 1156 | ${buildStageCard('Coarse Rank', 'Lexical + vector fusion only', [ |
| 1102 | - { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' }, | ||
| 1103 | - { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' }, | ||
| 1104 | - { label: 'formula', value: 'text x knn' }, | 1157 | + ...buildStageStatusMetrics(coarseInfo, [ |
| 1158 | + { label: 'backend', value: coarseInfo.backend || 'N/A' }, | ||
| 1159 | + { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' }, | ||
| 1160 | + { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' }, | ||
| 1161 | + { label: 'top_n', value: coarseInfo.top_n ?? 'N/A' }, | ||
| 1162 | + ]), | ||
| 1105 | ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')} | 1163 | ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')} |
| 1106 | - ${buildStageCard('Fine Rank', 'Lightweight reranker', [ | 1164 | + ${buildStageCard('Fine Rank', 'Lightweight reranker', buildStageStatusMetrics(fineInfo, [ |
| 1107 | { label: 'service_url', value: fineInfo.service_url || 'N/A' }, | 1165 | { label: 'service_url', value: fineInfo.service_url || 'N/A' }, |
| 1108 | { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' }, | 1166 | { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' }, |
| 1109 | { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' }, | 1167 | { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' }, |
| @@ -1111,8 +1169,8 @@ function buildGlobalFunnelHtml(data, debugInfo) { | @@ -1111,8 +1169,8 @@ function buildGlobalFunnelHtml(data, debugInfo) { | ||
| 1111 | { label: 'backend', value: fineInfo.backend || 'N/A' }, | 1169 | { label: 'backend', value: fineInfo.backend || 'N/A' }, |
| 1112 | { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' }, | 1170 | { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' }, |
| 1113 | { label: 'query_template', value: fineInfo.query_template || 'N/A' }, | 1171 | { label: 'query_template', value: fineInfo.query_template || 'N/A' }, |
| 1114 | - ], fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')} | ||
| 1115 | - ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ | 1172 | + ]), fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')} |
| 1173 | + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', buildStageStatusMetrics(rerankInfo, [ | ||
| 1116 | { label: 'service_url', value: rerankInfo.service_url || 'N/A' }, | 1174 | { label: 'service_url', value: rerankInfo.service_url || 'N/A' }, |
| 1117 | { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' }, | 1175 | { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' }, |
| 1118 | { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' }, | 1176 | { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' }, |
| @@ -1120,7 +1178,7 @@ function buildGlobalFunnelHtml(data, debugInfo) { | @@ -1120,7 +1178,7 @@ function buildGlobalFunnelHtml(data, debugInfo) { | ||
| 1120 | { label: 'backend', value: rerankInfo.backend || 'N/A' }, | 1178 | { label: 'backend', value: rerankInfo.backend || 'N/A' }, |
| 1121 | { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' }, | 1179 | { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' }, |
| 1122 | { label: 'query_template', value: rerankInfo.query_template || 'N/A' }, | 1180 | { label: 'query_template', value: rerankInfo.query_template || 'N/A' }, |
| 1123 | - ], `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)} | 1181 | + ]), `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)} |
| 1124 | ${buildStageCard('Page Return', 'Final slice returned to UI', [ | 1182 | ${buildStageCard('Page Return', 'Final slice returned to UI', [ |
| 1125 | { label: 'from', value: searchParams.from_ ?? 0 }, | 1183 | { label: 'from', value: searchParams.from_ ?? 0 }, |
| 1126 | { label: 'size', value: searchParams.size ?? 'N/A' }, | 1184 | { label: 'size', value: searchParams.size ?? 'N/A' }, |
search/rerank_client.py
| @@ -438,7 +438,7 @@ def _compute_multiplicative_fusion( | @@ -438,7 +438,7 @@ def _compute_multiplicative_fusion( | ||
| 438 | es_score: float, | 438 | es_score: float, |
| 439 | text_score: float, | 439 | text_score: float, |
| 440 | knn_score: float, | 440 | knn_score: float, |
| 441 | - fusion: RerankFusionConfig, | 441 | + fusion: CoarseRankFusionConfig | RerankFusionConfig, |
| 442 | knn_components: Optional[Dict[str, Any]] = None, | 442 | knn_components: Optional[Dict[str, Any]] = None, |
| 443 | rerank_score: Optional[float] = None, | 443 | rerank_score: Optional[float] = None, |
| 444 | fine_score: Optional[float] = None, | 444 | fine_score: Optional[float] = None, |
| @@ -461,8 +461,9 @@ def _compute_multiplicative_fusion( | @@ -461,8 +461,9 @@ def _compute_multiplicative_fusion( | ||
| 461 | ) | 461 | ) |
| 462 | 462 | ||
| 463 | _add_term("es_score", es_score, fusion.es_bias, fusion.es_exponent) | 463 | _add_term("es_score", es_score, fusion.es_bias, fusion.es_exponent) |
| 464 | - _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) | ||
| 465 | - _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) | 464 | + if isinstance(fusion, RerankFusionConfig): |
| 465 | + _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) | ||
| 466 | + _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) | ||
| 466 | _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) | 467 | _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) |
| 467 | _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent) | 468 | _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent) |
| 468 | _maybe_append_weighted_knn_terms(term_rows=term_rows, fusion=fusion, knn_components=knn_components) | 469 | _maybe_append_weighted_knn_terms(term_rows=term_rows, fusion=fusion, knn_components=knn_components) |
| @@ -485,36 +486,6 @@ def _compute_multiplicative_fusion( | @@ -485,36 +486,6 @@ def _compute_multiplicative_fusion( | ||
| 485 | } | 486 | } |
| 486 | 487 | ||
| 487 | 488 | ||
| 488 | -def _multiply_coarse_fusion_factors( | ||
| 489 | - es_score: float, | ||
| 490 | - text_score: float, | ||
| 491 | - knn_score: float, | ||
| 492 | - knn_components: Dict[str, Any], | ||
| 493 | - fusion: CoarseRankFusionConfig, | ||
| 494 | -) -> Tuple[float, float, float, float, float, float]: | ||
| 495 | - es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent | ||
| 496 | - text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent | ||
| 497 | - knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent | ||
| 498 | - text_knn_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias)) | ||
| 499 | - image_knn_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias)) | ||
| 500 | - text_knn_factor = ( | ||
| 501 | - (max(_to_score(knn_components.get("weighted_text_knn_score")), 0.0) + text_knn_bias) | ||
| 502 | - ** float(getattr(fusion, "knn_text_exponent", 0.0)) | ||
| 503 | - ) | ||
| 504 | - image_knn_factor = ( | ||
| 505 | - (max(_to_score(knn_components.get("weighted_image_knn_score")), 0.0) + image_knn_bias) | ||
| 506 | - ** float(getattr(fusion, "knn_image_exponent", 0.0)) | ||
| 507 | - ) | ||
| 508 | - return ( | ||
| 509 | - es_factor, | ||
| 510 | - text_factor, | ||
| 511 | - knn_factor, | ||
| 512 | - text_knn_factor, | ||
| 513 | - image_knn_factor, | ||
| 514 | - es_factor * text_factor * knn_factor * text_knn_factor * image_knn_factor, | ||
| 515 | - ) | ||
| 516 | - | ||
| 517 | - | ||
| 518 | def _has_selected_sku(hit: Dict[str, Any]) -> bool: | 489 | def _has_selected_sku(hit: Dict[str, Any]) -> bool: |
| 519 | return bool(str(hit.get("_style_rerank_suffix") or "").strip()) | 490 | return bool(str(hit.get("_style_rerank_suffix") or "").strip()) |
| 520 | 491 | ||
| @@ -538,20 +509,14 @@ def coarse_resort_hits( | @@ -538,20 +509,14 @@ def coarse_resort_hits( | ||
| 538 | knn_components = signal_bundle["knn_components"] | 509 | knn_components = signal_bundle["knn_components"] |
| 539 | text_score = signal_bundle["text_score"] | 510 | text_score = signal_bundle["text_score"] |
| 540 | knn_score = signal_bundle["knn_score"] | 511 | knn_score = signal_bundle["knn_score"] |
| 541 | - ( | ||
| 542 | - es_factor, | ||
| 543 | - text_factor, | ||
| 544 | - knn_factor, | ||
| 545 | - text_knn_factor, | ||
| 546 | - image_knn_factor, | ||
| 547 | - coarse_score, | ||
| 548 | - ) = _multiply_coarse_fusion_factors( | 512 | + fusion_result = _compute_multiplicative_fusion( |
| 549 | es_score=es_score, | 513 | es_score=es_score, |
| 550 | text_score=text_score, | 514 | text_score=text_score, |
| 551 | knn_score=knn_score, | 515 | knn_score=knn_score, |
| 552 | - knn_components=knn_components, | ||
| 553 | fusion=f, | 516 | fusion=f, |
| 517 | + knn_components=knn_components, | ||
| 554 | ) | 518 | ) |
| 519 | + coarse_score = fusion_result["score"] | ||
| 555 | 520 | ||
| 556 | hit["_text_score"] = text_score | 521 | hit["_text_score"] = text_score |
| 557 | hit["_knn_score"] = knn_score | 522 | hit["_knn_score"] = knn_score |
| @@ -597,11 +562,14 @@ def coarse_resort_hits( | @@ -597,11 +562,14 @@ def coarse_resort_hits( | ||
| 597 | "knn_primary_score": knn_components["primary_knn_score"], | 562 | "knn_primary_score": knn_components["primary_knn_score"], |
| 598 | "knn_support_score": knn_components["support_knn_score"], | 563 | "knn_support_score": knn_components["support_knn_score"], |
| 599 | "knn_score": knn_score, | 564 | "knn_score": knn_score, |
| 600 | - "coarse_es_factor": es_factor, | ||
| 601 | - "coarse_text_factor": text_factor, | ||
| 602 | - "coarse_knn_factor": knn_factor, | ||
| 603 | - "coarse_text_knn_factor": text_knn_factor, | ||
| 604 | - "coarse_image_knn_factor": image_knn_factor, | 565 | + "fusion_inputs": fusion_result["inputs"], |
| 566 | + "fusion_factors": fusion_result["factors"], | ||
| 567 | + "fusion_summary": fusion_result["summary"], | ||
| 568 | + "coarse_es_factor": fusion_result["factors"].get("es_score"), | ||
| 569 | + "coarse_text_factor": fusion_result["factors"].get("text_score"), | ||
| 570 | + "coarse_knn_factor": fusion_result["factors"].get("knn_score"), | ||
| 571 | + "coarse_text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"), | ||
| 572 | + "coarse_image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"), | ||
| 605 | "coarse_score": coarse_score, | 573 | "coarse_score": coarse_score, |
| 606 | "matched_queries": matched_queries, | 574 | "matched_queries": matched_queries, |
| 607 | "ltr_features": ltr_features, | 575 | "ltr_features": ltr_features, |
search/searcher.py
| @@ -35,19 +35,31 @@ def _log_backend_verbose(payload: Dict[str, Any]) -> None: | @@ -35,19 +35,31 @@ def _log_backend_verbose(payload: Dict[str, Any]) -> None: | ||
| 35 | ) | 35 | ) |
| 36 | 36 | ||
| 37 | 37 | ||
| 38 | +def _index_debug_rows_by_doc(rows: Any) -> Dict[str, Dict[str, Any]]: | ||
| 39 | + indexed: Dict[str, Dict[str, Any]] = {} | ||
| 40 | + if not isinstance(rows, list): | ||
| 41 | + return indexed | ||
| 42 | + for item in rows: | ||
| 43 | + if not isinstance(item, dict): | ||
| 44 | + continue | ||
| 45 | + doc_id = item.get("doc_id") | ||
| 46 | + if doc_id is None: | ||
| 47 | + continue | ||
| 48 | + indexed[str(doc_id)] = item | ||
| 49 | + return indexed | ||
| 50 | + | ||
| 51 | + | ||
| 38 | def _summarize_ltr_features(per_result_debug: List[Dict[str, Any]], top_n: int = 20) -> Dict[str, Any]: | 52 | def _summarize_ltr_features(per_result_debug: List[Dict[str, Any]], top_n: int = 20) -> Dict[str, Any]: |
| 39 | rows = list(per_result_debug[:top_n]) | 53 | rows = list(per_result_debug[:top_n]) |
| 40 | if not rows: | 54 | if not rows: |
| 41 | return {"top_n": 0, "counts": {}, "averages": {}, "top_docs": []} | 55 | return {"top_n": 0, "counts": {}, "averages": {}, "top_docs": []} |
| 42 | 56 | ||
| 43 | def _feature(row: Dict[str, Any], key: str) -> Any: | 57 | def _feature(row: Dict[str, Any], key: str) -> Any: |
| 44 | - features = row.get("ltr_features") | ||
| 45 | - if isinstance(features, dict): | ||
| 46 | - return features.get(key) | ||
| 47 | - rerank_stage = row.get("ranking_funnel", {}).get("rerank", {}) | ||
| 48 | - stage_features = rerank_stage.get("ltr_features") | ||
| 49 | - if isinstance(stage_features, dict): | ||
| 50 | - return stage_features.get(key) | 58 | + funnel = row.get("ranking_funnel", {}) |
| 59 | + for stage_name in ("rerank", "fine_rank", "coarse_rank"): | ||
| 60 | + stage_features = funnel.get(stage_name, {}).get("ltr_features") | ||
| 61 | + if isinstance(stage_features, dict) and key in stage_features: | ||
| 62 | + return stage_features.get(key) | ||
| 51 | return None | 63 | return None |
| 52 | 64 | ||
| 53 | def _count(flag: str) -> int: | 65 | def _count(flag: str) -> int: |
| @@ -801,8 +813,8 @@ class Searcher: | @@ -801,8 +813,8 @@ class Searcher: | ||
| 801 | applied: bool, | 813 | applied: bool, |
| 802 | skipped_reason: Optional[str], | 814 | skipped_reason: Optional[str], |
| 803 | service_profile: Optional[str], | 815 | service_profile: Optional[str], |
| 804 | - query_template: str, | ||
| 805 | - doc_template: str, | 816 | + query_template: Optional[str], |
| 817 | + doc_template: Optional[str], | ||
| 806 | docs_in: int, | 818 | docs_in: int, |
| 807 | docs_out: int, | 819 | docs_out: int, |
| 808 | top_n: int, | 820 | top_n: int, |
| @@ -825,7 +837,11 @@ class Searcher: | @@ -825,7 +837,11 @@ class Searcher: | ||
| 825 | "backend_model_name": backend_model_name, | 837 | "backend_model_name": backend_model_name, |
| 826 | "query_template": query_template, | 838 | "query_template": query_template, |
| 827 | "doc_template": doc_template, | 839 | "doc_template": doc_template, |
| 828 | - "query_text": str(query_template).format_map({"query": rerank_query}), | 840 | + "query_text": ( |
| 841 | + str(query_template).format_map({"query": rerank_query}) | ||
| 842 | + if query_template is not None | ||
| 843 | + else None | ||
| 844 | + ), | ||
| 829 | "docs_in": docs_in, | 845 | "docs_in": docs_in, |
| 830 | "docs_out": docs_out, | 846 | "docs_out": docs_out, |
| 831 | "top_n": top_n, | 847 | "top_n": top_n, |
| @@ -833,6 +849,36 @@ class Searcher: | @@ -833,6 +849,36 @@ class Searcher: | ||
| 833 | "fusion": fusion, | 849 | "fusion": fusion, |
| 834 | } | 850 | } |
| 835 | 851 | ||
| 852 | + def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: | ||
| 853 | + if previous_rank is None or current_rank is None: | ||
| 854 | + return None | ||
| 855 | + return previous_rank - current_rank | ||
| 856 | + | ||
| 857 | + def _build_result_stage( | ||
| 858 | + *, | ||
| 859 | + rank: Optional[int], | ||
| 860 | + previous_rank: Optional[int], | ||
| 861 | + values: Optional[Dict[str, Any]] = None, | ||
| 862 | + signals: Optional[Dict[str, Any]] = None, | ||
| 863 | + signal_fields: Optional[Dict[str, str]] = None, | ||
| 864 | + ) -> Dict[str, Any]: | ||
| 865 | + stage_payload: Dict[str, Any] = { | ||
| 866 | + "rank": rank, | ||
| 867 | + "rank_change": _rank_change(previous_rank, rank), | ||
| 868 | + } | ||
| 869 | + if values: | ||
| 870 | + stage_payload.update(values) | ||
| 871 | + if signals: | ||
| 872 | + stage_payload["signals"] = signals | ||
| 873 | + stage_payload["ltr_features"] = signals.get("ltr_features") | ||
| 874 | + for shared_key in ("fusion_summary", "fusion_inputs", "fusion_factors"): | ||
| 875 | + if stage_payload.get(shared_key) is None: | ||
| 876 | + stage_payload[shared_key] = signals.get(shared_key) | ||
| 877 | + for payload_key, signal_key in (signal_fields or {}).items(): | ||
| 878 | + if stage_payload.get(payload_key) is None: | ||
| 879 | + stage_payload[payload_key] = signals.get(signal_key) | ||
| 880 | + return stage_payload | ||
| 881 | + | ||
| 836 | def _run_optional_stage( | 882 | def _run_optional_stage( |
| 837 | *, | 883 | *, |
| 838 | stage: RequestContextStage, | 884 | stage: RequestContextStage, |
| @@ -967,11 +1013,23 @@ class Searcher: | @@ -967,11 +1013,23 @@ class Searcher: | ||
| 967 | es_response.setdefault("hits", {})["hits"] = hits | 1013 | es_response.setdefault("hits", {})["hits"] = hits |
| 968 | if debug: | 1014 | if debug: |
| 969 | coarse_ranks_by_doc = _rank_map(hits) | 1015 | coarse_ranks_by_doc = _rank_map(hits) |
| 970 | - coarse_debug_info = { | ||
| 971 | - "docs_in": es_fetch_size, | ||
| 972 | - "docs_out": len(hits), | ||
| 973 | - "fusion": coarse_fusion_debug, | ||
| 974 | - } | 1016 | + coarse_debug_info = _stage_debug_info( |
| 1017 | + enabled=True, | ||
| 1018 | + applied=True, | ||
| 1019 | + skipped_reason=None, | ||
| 1020 | + service_profile=None, | ||
| 1021 | + service_url=None, | ||
| 1022 | + backend="local_coarse_fusion", | ||
| 1023 | + backend_model_name=None, | ||
| 1024 | + model=None, | ||
| 1025 | + query_template=None, | ||
| 1026 | + doc_template=None, | ||
| 1027 | + docs_in=es_fetch_size, | ||
| 1028 | + docs_out=len(hits), | ||
| 1029 | + top_n=coarse_output_window, | ||
| 1030 | + meta=None, | ||
| 1031 | + fusion=coarse_fusion_debug, | ||
| 1032 | + ) | ||
| 975 | context.store_intermediate_result("coarse_rank_scores", coarse_debug) | 1033 | context.store_intermediate_result("coarse_rank_scores", coarse_debug) |
| 976 | context.logger.info( | 1034 | context.logger.info( |
| 977 | "粗排完成 | docs_in=%s | docs_out=%s", | 1035 | "粗排完成 | docs_in=%s | docs_out=%s", |
| @@ -1189,36 +1247,9 @@ class Searcher: | @@ -1189,36 +1247,9 @@ class Searcher: | ||
| 1189 | max_score = es_response.get('hits', {}).get('max_score') or 0.0 | 1247 | max_score = es_response.get('hits', {}).get('max_score') or 0.0 |
| 1190 | 1248 | ||
| 1191 | # 从上下文中取出重排调试信息(若有) | 1249 | # 从上下文中取出重排调试信息(若有) |
| 1192 | - rerank_debug_raw = context.get_intermediate_result('rerank_scores', None) | ||
| 1193 | - rerank_debug_by_doc: Dict[str, Dict[str, Any]] = {} | ||
| 1194 | - if isinstance(rerank_debug_raw, list): | ||
| 1195 | - for item in rerank_debug_raw: | ||
| 1196 | - if not isinstance(item, dict): | ||
| 1197 | - continue | ||
| 1198 | - doc_id = item.get("doc_id") | ||
| 1199 | - if doc_id is None: | ||
| 1200 | - continue | ||
| 1201 | - rerank_debug_by_doc[str(doc_id)] = item | ||
| 1202 | - coarse_debug_raw = context.get_intermediate_result('coarse_rank_scores', None) | ||
| 1203 | - coarse_debug_by_doc: Dict[str, Dict[str, Any]] = {} | ||
| 1204 | - if isinstance(coarse_debug_raw, list): | ||
| 1205 | - for item in coarse_debug_raw: | ||
| 1206 | - if not isinstance(item, dict): | ||
| 1207 | - continue | ||
| 1208 | - doc_id = item.get("doc_id") | ||
| 1209 | - if doc_id is None: | ||
| 1210 | - continue | ||
| 1211 | - coarse_debug_by_doc[str(doc_id)] = item | ||
| 1212 | - fine_debug_raw = context.get_intermediate_result('fine_rank_scores', None) | ||
| 1213 | - fine_debug_by_doc: Dict[str, Dict[str, Any]] = {} | ||
| 1214 | - if isinstance(fine_debug_raw, list): | ||
| 1215 | - for item in fine_debug_raw: | ||
| 1216 | - if not isinstance(item, dict): | ||
| 1217 | - continue | ||
| 1218 | - doc_id = item.get("doc_id") | ||
| 1219 | - if doc_id is None: | ||
| 1220 | - continue | ||
| 1221 | - fine_debug_by_doc[str(doc_id)] = item | 1250 | + rerank_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('rerank_scores', None)) |
| 1251 | + coarse_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('coarse_rank_scores', None)) | ||
| 1252 | + fine_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('fine_rank_scores', None)) | ||
| 1222 | 1253 | ||
| 1223 | if self._has_style_intent(parsed_query): | 1254 | if self._has_style_intent(parsed_query): |
| 1224 | if style_intent_decisions: | 1255 | if style_intent_decisions: |
| @@ -1289,47 +1320,6 @@ class Searcher: | @@ -1289,47 +1320,6 @@ class Searcher: | ||
| 1289 | "vendor_multilingual": vendor_multilingual, | 1320 | "vendor_multilingual": vendor_multilingual, |
| 1290 | } | 1321 | } |
| 1291 | 1322 | ||
| 1292 | - if coarse_debug: | ||
| 1293 | - debug_entry["coarse_score"] = coarse_debug.get("coarse_score") | ||
| 1294 | - debug_entry["coarse_es_factor"] = coarse_debug.get("coarse_es_factor") | ||
| 1295 | - debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor") | ||
| 1296 | - debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor") | ||
| 1297 | - | ||
| 1298 | - # 若存在重排调试信息,则补充 doc 级别的融合分数信息 | ||
| 1299 | - if rerank_debug: | ||
| 1300 | - debug_entry["doc_id"] = rerank_debug.get("doc_id") | ||
| 1301 | - debug_entry["score"] = rerank_debug.get("score") | ||
| 1302 | - debug_entry["rerank_score"] = rerank_debug.get("rerank_score") | ||
| 1303 | - debug_entry["fine_score"] = rerank_debug.get("fine_score") | ||
| 1304 | - debug_entry["es_score"] = rerank_debug.get("es_score", es_score) | ||
| 1305 | - debug_entry["text_score"] = rerank_debug.get("text_score") | ||
| 1306 | - debug_entry["knn_score"] = rerank_debug.get("knn_score") | ||
| 1307 | - debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs") | ||
| 1308 | - debug_entry["fusion_factors"] = rerank_debug.get("fusion_factors") | ||
| 1309 | - debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary") | ||
| 1310 | - debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") | ||
| 1311 | - debug_entry["fine_factor"] = rerank_debug.get("fine_factor") | ||
| 1312 | - debug_entry["es_factor"] = rerank_debug.get("es_factor") | ||
| 1313 | - debug_entry["text_factor"] = rerank_debug.get("text_factor") | ||
| 1314 | - debug_entry["knn_factor"] = rerank_debug.get("knn_factor") | ||
| 1315 | - debug_entry["fused_score"] = rerank_debug.get("fused_score") | ||
| 1316 | - debug_entry["rerank_input"] = rerank_debug.get("rerank_input") | ||
| 1317 | - debug_entry["matched_queries"] = rerank_debug.get("matched_queries") | ||
| 1318 | - debug_entry["ltr_features"] = rerank_debug.get("ltr_features") | ||
| 1319 | - elif fine_debug: | ||
| 1320 | - debug_entry["doc_id"] = fine_debug.get("doc_id") | ||
| 1321 | - debug_entry["score"] = fine_debug.get("score") | ||
| 1322 | - debug_entry["fine_score"] = fine_debug.get("fine_score") | ||
| 1323 | - debug_entry["es_score"] = fine_debug.get("es_score", es_score) | ||
| 1324 | - debug_entry["text_score"] = fine_debug.get("text_score") | ||
| 1325 | - debug_entry["knn_score"] = fine_debug.get("knn_score") | ||
| 1326 | - debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs") | ||
| 1327 | - debug_entry["fusion_factors"] = fine_debug.get("fusion_factors") | ||
| 1328 | - debug_entry["fusion_summary"] = fine_debug.get("fusion_summary") | ||
| 1329 | - debug_entry["es_factor"] = fine_debug.get("es_factor") | ||
| 1330 | - debug_entry["rerank_input"] = fine_debug.get("rerank_input") | ||
| 1331 | - debug_entry["ltr_features"] = fine_debug.get("ltr_features") | ||
| 1332 | - | ||
| 1333 | initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1323 | initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| 1334 | coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1324 | coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| 1335 | fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None | 1325 | fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None |
| @@ -1344,76 +1334,79 @@ class Searcher: | @@ -1344,76 +1334,79 @@ class Searcher: | ||
| 1344 | if final_previous_rank is None: | 1334 | if final_previous_rank is None: |
| 1345 | final_previous_rank = initial_rank | 1335 | final_previous_rank = initial_rank |
| 1346 | 1336 | ||
| 1347 | - def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: | ||
| 1348 | - if previous_rank is None or current_rank is None: | ||
| 1349 | - return None | ||
| 1350 | - return previous_rank - current_rank | ||
| 1351 | - | ||
| 1352 | debug_entry["ranking_funnel"] = { | 1337 | debug_entry["ranking_funnel"] = { |
| 1353 | - "es_recall": { | ||
| 1354 | - "rank": initial_rank, | ||
| 1355 | - "score": es_score, | ||
| 1356 | - "normalized_score": normalized, | ||
| 1357 | - "matched_queries": hit.get("matched_queries"), | ||
| 1358 | - }, | ||
| 1359 | - "coarse_rank": { | ||
| 1360 | - "rank": coarse_rank, | ||
| 1361 | - "rank_change": _rank_change(initial_rank, coarse_rank), | ||
| 1362 | - "score": coarse_debug.get("coarse_score") if coarse_debug else None, | ||
| 1363 | - "es_score": coarse_debug.get("es_score") if coarse_debug else es_score, | ||
| 1364 | - "text_score": coarse_debug.get("text_score") if coarse_debug else None, | ||
| 1365 | - "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, | ||
| 1366 | - "es_factor": coarse_debug.get("coarse_es_factor") if coarse_debug else None, | ||
| 1367 | - "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None, | ||
| 1368 | - "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None, | ||
| 1369 | - "signals": coarse_debug, | ||
| 1370 | - "ltr_features": coarse_debug.get("ltr_features") if coarse_debug else None, | ||
| 1371 | - }, | ||
| 1372 | - "fine_rank": { | ||
| 1373 | - "rank": fine_rank, | ||
| 1374 | - "rank_change": _rank_change(coarse_rank, fine_rank), | ||
| 1375 | - "score": ( | ||
| 1376 | - fine_debug.get("score") | ||
| 1377 | - if fine_debug and fine_debug.get("score") is not None | ||
| 1378 | - else hit.get("_fine_fused_score", hit.get("_fine_score")) | ||
| 1379 | - ), | ||
| 1380 | - "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), | ||
| 1381 | - "es_score": fine_debug.get("es_score") if fine_debug else es_score, | ||
| 1382 | - "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), | ||
| 1383 | - "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), | ||
| 1384 | - "es_factor": fine_debug.get("es_factor") if fine_debug else None, | ||
| 1385 | - "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None, | ||
| 1386 | - "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None, | ||
| 1387 | - "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None, | ||
| 1388 | - "rerank_input": fine_debug.get("rerank_input") if fine_debug else None, | ||
| 1389 | - "signals": fine_debug, | ||
| 1390 | - "ltr_features": fine_debug.get("ltr_features") if fine_debug else None, | ||
| 1391 | - }, | ||
| 1392 | - "rerank": { | ||
| 1393 | - "rank": rerank_rank, | ||
| 1394 | - "rank_change": _rank_change(rerank_previous_rank, rerank_rank), | ||
| 1395 | - "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), | ||
| 1396 | - "es_score": rerank_debug.get("es_score") if rerank_debug else es_score, | ||
| 1397 | - "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), | ||
| 1398 | - "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), | ||
| 1399 | - "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), | ||
| 1400 | - "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"), | ||
| 1401 | - "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"), | ||
| 1402 | - "fusion_summary": rerank_debug.get("fusion_summary") if rerank_debug else None, | ||
| 1403 | - "fusion_inputs": rerank_debug.get("fusion_inputs") if rerank_debug else None, | ||
| 1404 | - "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None, | ||
| 1405 | - "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None, | ||
| 1406 | - "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None, | ||
| 1407 | - "es_factor": rerank_debug.get("es_factor") if rerank_debug else None, | ||
| 1408 | - "text_factor": rerank_debug.get("text_factor") if rerank_debug else None, | ||
| 1409 | - "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None, | ||
| 1410 | - "signals": rerank_debug, | ||
| 1411 | - "ltr_features": rerank_debug.get("ltr_features") if rerank_debug else None, | ||
| 1412 | - }, | ||
| 1413 | - "final_page": { | ||
| 1414 | - "rank": final_rank, | ||
| 1415 | - "rank_change": _rank_change(final_previous_rank, final_rank), | ||
| 1416 | - }, | 1338 | + "es_recall": _build_result_stage( |
| 1339 | + rank=initial_rank, | ||
| 1340 | + previous_rank=None, | ||
| 1341 | + values={ | ||
| 1342 | + "score": es_score, | ||
| 1343 | + "normalized_score": normalized, | ||
| 1344 | + "matched_queries": hit.get("matched_queries"), | ||
| 1345 | + }, | ||
| 1346 | + ), | ||
| 1347 | + "coarse_rank": _build_result_stage( | ||
| 1348 | + rank=coarse_rank, | ||
| 1349 | + previous_rank=initial_rank, | ||
| 1350 | + values={ | ||
| 1351 | + "score": coarse_debug.get("coarse_score") if coarse_debug else None, | ||
| 1352 | + "es_score": coarse_debug.get("es_score") if coarse_debug else es_score, | ||
| 1353 | + "text_score": coarse_debug.get("text_score") if coarse_debug else None, | ||
| 1354 | + "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, | ||
| 1355 | + }, | ||
| 1356 | + signals=coarse_debug, | ||
| 1357 | + signal_fields={ | ||
| 1358 | + "es_factor": "coarse_es_factor", | ||
| 1359 | + "text_factor": "coarse_text_factor", | ||
| 1360 | + "knn_factor": "coarse_knn_factor", | ||
| 1361 | + "text_knn_factor": "coarse_text_knn_factor", | ||
| 1362 | + "image_knn_factor": "coarse_image_knn_factor", | ||
| 1363 | + }, | ||
| 1364 | + ), | ||
| 1365 | + "fine_rank": _build_result_stage( | ||
| 1366 | + rank=fine_rank, | ||
| 1367 | + previous_rank=coarse_rank, | ||
| 1368 | + values={ | ||
| 1369 | + "score": ( | ||
| 1370 | + fine_debug.get("score") | ||
| 1371 | + if fine_debug and fine_debug.get("score") is not None | ||
| 1372 | + else hit.get("_fine_fused_score", hit.get("_fine_score")) | ||
| 1373 | + ), | ||
| 1374 | + "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), | ||
| 1375 | + "es_score": fine_debug.get("es_score") if fine_debug else es_score, | ||
| 1376 | + "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), | ||
| 1377 | + "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), | ||
| 1378 | + "rerank_input": fine_debug.get("rerank_input") if fine_debug else None, | ||
| 1379 | + }, | ||
| 1380 | + signals=fine_debug, | ||
| 1381 | + signal_fields={ | ||
| 1382 | + "es_factor": "es_factor", | ||
| 1383 | + }, | ||
| 1384 | + ), | ||
| 1385 | + "rerank": _build_result_stage( | ||
| 1386 | + rank=rerank_rank, | ||
| 1387 | + previous_rank=rerank_previous_rank, | ||
| 1388 | + values={ | ||
| 1389 | + "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), | ||
| 1390 | + "es_score": rerank_debug.get("es_score") if rerank_debug else es_score, | ||
| 1391 | + "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), | ||
| 1392 | + "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), | ||
| 1393 | + "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), | ||
| 1394 | + "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"), | ||
| 1395 | + "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"), | ||
| 1396 | + }, | ||
| 1397 | + signals=rerank_debug, | ||
| 1398 | + signal_fields={ | ||
| 1399 | + "rerank_factor": "rerank_factor", | ||
| 1400 | + "fine_factor": "fine_factor", | ||
| 1401 | + "es_factor": "es_factor", | ||
| 1402 | + "text_factor": "text_factor", | ||
| 1403 | + "knn_factor": "knn_factor", | ||
| 1404 | + }, | ||
| 1405 | + ), | ||
| 1406 | + "final_page": _build_result_stage( | ||
| 1407 | + rank=final_rank, | ||
| 1408 | + previous_rank=final_previous_rank, | ||
| 1409 | + ), | ||
| 1417 | } | 1410 | } |
| 1418 | 1411 | ||
| 1419 | if style_intent_debug: | 1412 | if style_intent_debug: |
tests/test_rerank_client.py
| @@ -279,7 +279,9 @@ def test_fuse_scores_and_resort_can_add_weighted_text_and_image_knn_factors(): | @@ -279,7 +279,9 @@ def test_fuse_scores_and_resort_can_add_weighted_text_and_image_knn_factors(): | ||
| 279 | knn_tie_breaker=0.25, | 279 | knn_tie_breaker=0.25, |
| 280 | knn_bias=0.1, | 280 | knn_bias=0.1, |
| 281 | knn_exponent=1.0, | 281 | knn_exponent=1.0, |
| 282 | + knn_text_bias=0.1, | ||
| 282 | knn_text_exponent=2.0, | 283 | knn_text_exponent=2.0, |
| 284 | + knn_image_bias=0.1, | ||
| 283 | knn_image_exponent=3.0, | 285 | knn_image_exponent=3.0, |
| 284 | ) | 286 | ) |
| 285 | 287 | ||
| @@ -325,7 +327,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): | @@ -325,7 +327,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): | ||
| 325 | knn_tie_breaker=0.25, | 327 | knn_tie_breaker=0.25, |
| 326 | knn_bias=0.1, | 328 | knn_bias=0.1, |
| 327 | knn_exponent=1.0, | 329 | knn_exponent=1.0, |
| 330 | + knn_text_bias=0.1, | ||
| 328 | knn_text_exponent=2.0, | 331 | knn_text_exponent=2.0, |
| 332 | + knn_image_bias=0.1, | ||
| 329 | knn_image_exponent=3.0, | 333 | knn_image_exponent=3.0, |
| 330 | ) | 334 | ) |
| 331 | 335 | ||
| @@ -345,6 +349,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): | @@ -345,6 +349,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): | ||
| 345 | assert isclose(hits[0]["_coarse_score"], expected_coarse, rel_tol=1e-9) | 349 | assert isclose(hits[0]["_coarse_score"], expected_coarse, rel_tol=1e-9) |
| 346 | assert isclose(debug[0]["coarse_text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9) | 350 | assert isclose(debug[0]["coarse_text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9) |
| 347 | assert isclose(debug[0]["coarse_image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9) | 351 | assert isclose(debug[0]["coarse_image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9) |
| 352 | + assert debug[0]["fusion_inputs"]["es_score"] == 1.0 | ||
| 353 | + assert "weighted_text_knn_score=" in debug[0]["fusion_summary"] | ||
| 354 | + assert "weighted_image_knn_score=" in debug[0]["fusion_summary"] | ||
| 348 | 355 | ||
| 349 | 356 | ||
| 350 | def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch): | 357 | def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch): |
tests/test_search_rerank_window.py
| @@ -520,9 +520,12 @@ def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch): | @@ -520,9 +520,12 @@ def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch): | ||
| 520 | 520 | ||
| 521 | assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80 | 521 | assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80 |
| 522 | assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20 | 522 | assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20 |
| 523 | + assert result.debug_info["ranking_funnel"]["coarse_rank"]["applied"] is True | ||
| 524 | + assert result.debug_info["ranking_funnel"]["coarse_rank"]["backend"] == "local_coarse_fusion" | ||
| 523 | first = result.debug_info["per_result"][0]["ranking_funnel"] | 525 | first = result.debug_info["per_result"][0]["ranking_funnel"] |
| 524 | assert first["es_recall"]["rank"] is not None | 526 | assert first["es_recall"]["rank"] is not None |
| 525 | assert first["coarse_rank"]["score"] is not None | 527 | assert first["coarse_rank"]["score"] is not None |
| 528 | + assert first["coarse_rank"]["fusion_summary"] is not None | ||
| 526 | assert first["fine_rank"]["score"] is not None | 529 | assert first["fine_rank"]["score"] is not None |
| 527 | assert first["rerank"]["rerank_score"] is not None | 530 | assert first["rerank"]["rerank_score"] is not None |
| 528 | 531 |