diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index ea036ba..5ade0ad 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -499,6 +499,33 @@ function renderJsonDetails(title, payload, open = false) { `; } +function renderStageFusionDetails({ summaryTitle, summary, factors, signals, summaryOpen = false, signalsTitle = 'Signals', signalsOpen = false }) { + return `${renderJsonDetails(summaryTitle, summary || factors, summaryOpen)}${renderJsonDetails(signalsTitle, signals, signalsOpen)}`; +} + +function firstDefined(...values) { + for (const value of values) { + if (value !== undefined && value !== null) { + return value; + } + } + return null; +} + +function buildStageStatusMetrics(stageInfo, extraMetrics = []) { + const metrics = []; + if (stageInfo && Object.prototype.hasOwnProperty.call(stageInfo, 'enabled')) { + metrics.push({ label: 'enabled', value: stageInfo.enabled ? 'yes' : 'no' }); + } + if (stageInfo && Object.prototype.hasOwnProperty.call(stageInfo, 'applied')) { + metrics.push({ label: 'applied', value: stageInfo.applied ? 'yes' : 'no' }); + } + if (stageInfo && stageInfo.skipped_reason) { + metrics.push({ label: 'skipped_reason', value: stageInfo.skipped_reason }); + } + return metrics.concat(extraMetrics); +} + /** Multilingual title/brief/vendor from per-result debug; shown under image/price/title on the left. */ function buildMultilingualFieldsHtml(debug) { if (!debug || typeof debug !== 'object') { @@ -532,58 +559,86 @@ function buildProductDebugHtml({ debug, result, spuId, tenantId }) { ]); const stageScores = renderScorePills([ - { label: 'ES', value: formatDebugNumber(esStage.score ?? debug.es_score), tone: 'tone-es' }, - { label: 'ES Norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized), tone: 'tone-neutral' }, - { label: 'Coarse', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score), tone: 'tone-coarse' }, - { label: 'Fine', value: formatDebugNumber(fineStage.score ?? debug.fine_score), tone: 'tone-fine' }, - { label: 'Rerank', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score), tone: 'tone-rerank' }, - { label: 'Fused', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score), tone: 'tone-final' }, + { label: 'ES', value: formatDebugNumber(esStage.score), tone: 'tone-es' }, + { label: 'ES Norm', value: formatDebugNumber(firstDefined(esStage.normalized_score, debug.es_score_normalized)), tone: 'tone-neutral' }, + { label: 'Coarse', value: formatDebugNumber(coarseStage.score), tone: 'tone-coarse' }, + { label: 'Fine', value: formatDebugNumber(firstDefined(fineStage.score, fineStage.fine_score)), tone: 'tone-fine' }, + { label: 'Rerank', value: formatDebugNumber(firstDefined(rerankStage.rerank_score, rerankStage.score)), tone: 'tone-rerank' }, + { label: 'Fused', value: formatDebugNumber(firstDefined(rerankStage.fused_score, rerankStage.score, fineStage.score, coarseStage.score)), tone: 'tone-final' }, ]); const stageGrid = `
${buildStageCard('ES Recall', 'Matched queries and ES raw score', [ - { label: 'rank', value: esStage.rank ?? debug.initial_rank ?? 'N/A' }, - { label: 'es_score', value: formatDebugNumber(esStage.score ?? debug.es_score) }, - { label: 'es_norm', value: formatDebugNumber(esStage.normalized_score ?? debug.es_score_normalized) }, - ], renderJsonDetails('Matched Queries', esStage.matched_queries ?? debug.matched_queries, true))} + { label: 'rank', value: esStage.rank ?? 'N/A' }, + { label: 'es_score', value: formatDebugNumber(esStage.score) }, + { label: 'es_norm', value: formatDebugNumber(firstDefined(esStage.normalized_score, debug.es_score_normalized)) }, + ], renderJsonDetails('Matched Queries', esStage.matched_queries, true))} ${buildStageCard('Coarse Rank', 'Text + vector fusion', [ { label: 'rank', value: coarseStage.rank ?? 'N/A' }, { label: 'rank_change', value: coarseStage.rank_change ?? 'N/A' }, - { label: 'coarse_score', value: formatDebugNumber(coarseStage.score ?? debug.coarse_score) }, - { label: 'text_score', value: formatDebugNumber(coarseStage.text_score ?? debug.text_score) }, - { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score ?? debug.text_source_score) }, - { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score ?? debug.text_translation_score) }, - { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score ?? debug.text_primary_score) }, - { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score ?? debug.text_support_score) }, - { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score ?? debug.knn_score) }, - { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score ?? debug.text_knn_score) }, - { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score ?? debug.image_knn_score) }, - { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor ?? debug.coarse_text_factor) }, - { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor ?? debug.coarse_knn_factor) }, - ], renderJsonDetails('Coarse Signals', coarseStage.signals, true))} + { label: 'coarse_score', value: formatDebugNumber(coarseStage.score) }, + { label: 'es_score', value: formatDebugNumber(coarseStage.es_score) }, + { label: 'text_score', value: formatDebugNumber(coarseStage.text_score) }, + { label: 'text_source', value: formatDebugNumber(coarseStage.signals?.text_source_score) }, + { label: 'text_translation', value: formatDebugNumber(coarseStage.signals?.text_translation_score) }, + { label: 'text_primary', value: formatDebugNumber(coarseStage.signals?.text_primary_score) }, + { label: 'text_support', value: formatDebugNumber(coarseStage.signals?.text_support_score) }, + { label: 'knn_score', value: formatDebugNumber(coarseStage.knn_score) }, + { label: 'text_knn', value: formatDebugNumber(coarseStage.signals?.text_knn_score) }, + { label: 'image_knn', value: formatDebugNumber(coarseStage.signals?.image_knn_score) }, + { label: 'es_factor', value: formatDebugNumber(coarseStage.es_factor) }, + { label: 'text_factor', value: formatDebugNumber(coarseStage.text_factor) }, + { label: 'knn_factor', value: formatDebugNumber(coarseStage.knn_factor) }, + { label: 'text_knn_factor', value: formatDebugNumber(coarseStage.text_knn_factor) }, + { label: 'image_knn_factor', value: formatDebugNumber(coarseStage.image_knn_factor) }, + ], renderStageFusionDetails({ + summaryTitle: 'Coarse Fusion', + summary: coarseStage.fusion_summary, + factors: coarseStage.fusion_factors, + signals: coarseStage.signals, + summaryOpen: true, + signalsTitle: 'Coarse Signals', + signalsOpen: false, + }))} ${buildStageCard('Fine Rank', 'Lightweight reranker output', [ { label: 'rank', value: fineStage.rank ?? 'N/A' }, { label: 'rank_change', value: fineStage.rank_change ?? 'N/A' }, - { label: 'stage_score', value: formatDebugNumber(fineStage.score ?? debug.score) }, - { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score ?? debug.fine_score) }, - { label: 'text_score', value: formatDebugNumber(fineStage.text_score ?? debug.text_score) }, - { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score ?? debug.knn_score) }, - ], `${renderJsonDetails('Fine Fusion', fineStage.fusion_summary || debug.fusion_summary || fineStage.fusion_factors, true)}${renderJsonDetails('Fine Input', fineStage.rerank_input ?? debug.rerank_input, true)}`)} + { label: 'stage_score', value: formatDebugNumber(firstDefined(fineStage.score, fineStage.fine_score)) }, + { label: 'fine_score', value: formatDebugNumber(fineStage.fine_score) }, + { label: 'text_score', value: formatDebugNumber(fineStage.text_score) }, + { label: 'knn_score', value: formatDebugNumber(fineStage.knn_score) }, + ], `${renderStageFusionDetails({ + summaryTitle: 'Fine Fusion', + summary: fineStage.fusion_summary, + factors: fineStage.fusion_factors, + signals: fineStage.signals, + summaryOpen: true, + signalsTitle: 'Fine Signals', + signalsOpen: false, + })}${renderJsonDetails('Fine Input', fineStage.rerank_input, true)}`)} ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ - { label: 'rank', value: rerankStage.rank ?? finalPageStage.rank ?? debug.final_rank ?? 'N/A' }, + { label: 'rank', value: firstDefined(rerankStage.rank, finalPageStage.rank, debug.final_rank) ?? 'N/A' }, { label: 'rank_change', value: rerankStage.rank_change ?? finalPageStage.rank_change ?? 'N/A' }, - { label: 'stage_score', value: formatDebugNumber(rerankStage.score ?? rerankStage.fused_score ?? debug.score) }, - { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score ?? debug.rerank_score) }, - { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score ?? debug.fine_score) }, - { label: 'text_score', value: formatDebugNumber(rerankStage.text_score ?? debug.text_score) }, - { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score ?? debug.knn_score) }, - { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor ?? debug.fine_factor) }, - { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor ?? debug.rerank_factor) }, - { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor ?? debug.text_factor) }, - { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor ?? debug.knn_factor) }, - { label: 'fused_score', value: formatDebugNumber(rerankStage.fused_score ?? debug.fused_score) }, - ], `${renderJsonDetails('Final Fusion', rerankStage.fusion_summary || debug.fusion_summary || rerankStage.fusion_factors, false)}${renderJsonDetails('Rerank Signals', rerankStage.signals, false)}`)} + { label: 'stage_score', value: formatDebugNumber(firstDefined(rerankStage.score, rerankStage.fused_score)) }, + { label: 'rerank_score', value: formatDebugNumber(rerankStage.rerank_score) }, + { label: 'fine_score', value: formatDebugNumber(rerankStage.fine_score) }, + { label: 'text_score', value: formatDebugNumber(rerankStage.text_score) }, + { label: 'knn_score', value: formatDebugNumber(rerankStage.knn_score) }, + { label: 'fine_factor', value: formatDebugNumber(rerankStage.fine_factor) }, + { label: 'rerank_factor', value: formatDebugNumber(rerankStage.rerank_factor) }, + { label: 'text_factor', value: formatDebugNumber(rerankStage.text_factor) }, + { label: 'knn_factor', value: formatDebugNumber(rerankStage.knn_factor) }, + { label: 'fused_score', value: formatDebugNumber(firstDefined(rerankStage.fused_score, rerankStage.score)) }, + ], renderStageFusionDetails({ + summaryTitle: 'Final Fusion', + summary: rerankStage.fusion_summary, + factors: rerankStage.fusion_factors, + signals: rerankStage.signals, + summaryOpen: false, + signalsTitle: 'Rerank Signals', + signalsOpen: false, + }))}
`; @@ -1099,11 +1154,14 @@ function buildGlobalFunnelHtml(data, debugInfo) { { label: 'include_named_queries_score', value: esQueryContext.include_named_queries_score ? 'yes' : 'no' }, ])} ${buildStageCard('Coarse Rank', 'Lexical + vector fusion only', [ - { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' }, - { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' }, - { label: 'formula', value: 'text x knn' }, + ...buildStageStatusMetrics(coarseInfo, [ + { label: 'backend', value: coarseInfo.backend || 'N/A' }, + { label: 'docs_in', value: coarseInfo.docs_in ?? searchParams.es_fetch_size ?? 'N/A' }, + { label: 'docs_out', value: coarseInfo.docs_out ?? 'N/A' }, + { label: 'top_n', value: coarseInfo.top_n ?? 'N/A' }, + ]), ], coarseInfo.fusion ? renderJsonDetails('Coarse Fusion', coarseInfo.fusion, false) : '')} - ${buildStageCard('Fine Rank', 'Lightweight reranker', [ + ${buildStageCard('Fine Rank', 'Lightweight reranker', buildStageStatusMetrics(fineInfo, [ { label: 'service_url', value: fineInfo.service_url || 'N/A' }, { label: 'docs_in', value: fineInfo.docs_in ?? 'N/A' }, { label: 'docs_out', value: fineInfo.docs_out ?? fineInfo.top_n ?? 'N/A' }, @@ -1111,8 +1169,8 @@ function buildGlobalFunnelHtml(data, debugInfo) { { label: 'backend', value: fineInfo.backend || 'N/A' }, { label: 'model', value: fineInfo.model || fineInfo.backend_model_name || 'N/A' }, { label: 'query_template', value: fineInfo.query_template || 'N/A' }, - ], fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')} - ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', [ + ]), fineInfo.meta ? renderJsonDetails('Fine Meta', fineInfo.meta, false) : '')} + ${buildStageCard('Final Rerank', 'Heavy reranker + final fusion', buildStageStatusMetrics(rerankInfo, [ { label: 'service_url', value: rerankInfo.service_url || 'N/A' }, { label: 'docs_in', value: rerankInfo.docs_in ?? 'N/A' }, { label: 'docs_out', value: rerankInfo.docs_out ?? 'N/A' }, @@ -1120,7 +1178,7 @@ function buildGlobalFunnelHtml(data, debugInfo) { { label: 'backend', value: rerankInfo.backend || 'N/A' }, { label: 'model', value: rerankInfo.model || rerankInfo.backend_model_name || 'N/A' }, { label: 'query_template', value: rerankInfo.query_template || 'N/A' }, - ], `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)} + ]), `${rerankInfo.fusion ? renderJsonDetails('Final Fusion', rerankInfo.fusion, false) : ''}${rerankInfo.meta ? renderJsonDetails('Rerank Meta', rerankInfo.meta, false) : ''}`)} ${buildStageCard('Page Return', 'Final slice returned to UI', [ { label: 'from', value: searchParams.from_ ?? 0 }, { label: 'size', value: searchParams.size ?? 'N/A' }, diff --git a/search/rerank_client.py b/search/rerank_client.py index a236196..adfbc66 100644 --- a/search/rerank_client.py +++ b/search/rerank_client.py @@ -438,7 +438,7 @@ def _compute_multiplicative_fusion( es_score: float, text_score: float, knn_score: float, - fusion: RerankFusionConfig, + fusion: CoarseRankFusionConfig | RerankFusionConfig, knn_components: Optional[Dict[str, Any]] = None, rerank_score: Optional[float] = None, fine_score: Optional[float] = None, @@ -461,8 +461,9 @@ def _compute_multiplicative_fusion( ) _add_term("es_score", es_score, fusion.es_bias, fusion.es_exponent) - _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) - _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) + if isinstance(fusion, RerankFusionConfig): + _add_term("rerank_score", rerank_score, fusion.rerank_bias, fusion.rerank_exponent) + _add_term("fine_score", fine_score, fusion.fine_bias, fusion.fine_exponent) _add_term("text_score", text_score, fusion.text_bias, fusion.text_exponent) _add_term("knn_score", knn_score, fusion.knn_bias, fusion.knn_exponent) _maybe_append_weighted_knn_terms(term_rows=term_rows, fusion=fusion, knn_components=knn_components) @@ -485,36 +486,6 @@ def _compute_multiplicative_fusion( } -def _multiply_coarse_fusion_factors( - es_score: float, - text_score: float, - knn_score: float, - knn_components: Dict[str, Any], - fusion: CoarseRankFusionConfig, -) -> Tuple[float, float, float, float, float, float]: - es_factor = (max(es_score, 0.0) + fusion.es_bias) ** fusion.es_exponent - text_factor = (max(text_score, 0.0) + fusion.text_bias) ** fusion.text_exponent - knn_factor = (max(knn_score, 0.0) + fusion.knn_bias) ** fusion.knn_exponent - text_knn_bias = float(getattr(fusion, "knn_text_bias", fusion.knn_bias)) - image_knn_bias = float(getattr(fusion, "knn_image_bias", fusion.knn_bias)) - text_knn_factor = ( - (max(_to_score(knn_components.get("weighted_text_knn_score")), 0.0) + text_knn_bias) - ** float(getattr(fusion, "knn_text_exponent", 0.0)) - ) - image_knn_factor = ( - (max(_to_score(knn_components.get("weighted_image_knn_score")), 0.0) + image_knn_bias) - ** float(getattr(fusion, "knn_image_exponent", 0.0)) - ) - return ( - es_factor, - text_factor, - knn_factor, - text_knn_factor, - image_knn_factor, - es_factor * text_factor * knn_factor * text_knn_factor * image_knn_factor, - ) - - def _has_selected_sku(hit: Dict[str, Any]) -> bool: return bool(str(hit.get("_style_rerank_suffix") or "").strip()) @@ -538,20 +509,14 @@ def coarse_resort_hits( knn_components = signal_bundle["knn_components"] text_score = signal_bundle["text_score"] knn_score = signal_bundle["knn_score"] - ( - es_factor, - text_factor, - knn_factor, - text_knn_factor, - image_knn_factor, - coarse_score, - ) = _multiply_coarse_fusion_factors( + fusion_result = _compute_multiplicative_fusion( es_score=es_score, text_score=text_score, knn_score=knn_score, - knn_components=knn_components, fusion=f, + knn_components=knn_components, ) + coarse_score = fusion_result["score"] hit["_text_score"] = text_score hit["_knn_score"] = knn_score @@ -597,11 +562,14 @@ def coarse_resort_hits( "knn_primary_score": knn_components["primary_knn_score"], "knn_support_score": knn_components["support_knn_score"], "knn_score": knn_score, - "coarse_es_factor": es_factor, - "coarse_text_factor": text_factor, - "coarse_knn_factor": knn_factor, - "coarse_text_knn_factor": text_knn_factor, - "coarse_image_knn_factor": image_knn_factor, + "fusion_inputs": fusion_result["inputs"], + "fusion_factors": fusion_result["factors"], + "fusion_summary": fusion_result["summary"], + "coarse_es_factor": fusion_result["factors"].get("es_score"), + "coarse_text_factor": fusion_result["factors"].get("text_score"), + "coarse_knn_factor": fusion_result["factors"].get("knn_score"), + "coarse_text_knn_factor": fusion_result["factors"].get("weighted_text_knn_score"), + "coarse_image_knn_factor": fusion_result["factors"].get("weighted_image_knn_score"), "coarse_score": coarse_score, "matched_queries": matched_queries, "ltr_features": ltr_features, diff --git a/search/searcher.py b/search/searcher.py index f2e068c..0b9ce97 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -35,19 +35,31 @@ def _log_backend_verbose(payload: Dict[str, Any]) -> None: ) +def _index_debug_rows_by_doc(rows: Any) -> Dict[str, Dict[str, Any]]: + indexed: Dict[str, Dict[str, Any]] = {} + if not isinstance(rows, list): + return indexed + for item in rows: + if not isinstance(item, dict): + continue + doc_id = item.get("doc_id") + if doc_id is None: + continue + indexed[str(doc_id)] = item + return indexed + + def _summarize_ltr_features(per_result_debug: List[Dict[str, Any]], top_n: int = 20) -> Dict[str, Any]: rows = list(per_result_debug[:top_n]) if not rows: return {"top_n": 0, "counts": {}, "averages": {}, "top_docs": []} def _feature(row: Dict[str, Any], key: str) -> Any: - features = row.get("ltr_features") - if isinstance(features, dict): - return features.get(key) - rerank_stage = row.get("ranking_funnel", {}).get("rerank", {}) - stage_features = rerank_stage.get("ltr_features") - if isinstance(stage_features, dict): - return stage_features.get(key) + funnel = row.get("ranking_funnel", {}) + for stage_name in ("rerank", "fine_rank", "coarse_rank"): + stage_features = funnel.get(stage_name, {}).get("ltr_features") + if isinstance(stage_features, dict) and key in stage_features: + return stage_features.get(key) return None def _count(flag: str) -> int: @@ -801,8 +813,8 @@ class Searcher: applied: bool, skipped_reason: Optional[str], service_profile: Optional[str], - query_template: str, - doc_template: str, + query_template: Optional[str], + doc_template: Optional[str], docs_in: int, docs_out: int, top_n: int, @@ -825,7 +837,11 @@ class Searcher: "backend_model_name": backend_model_name, "query_template": query_template, "doc_template": doc_template, - "query_text": str(query_template).format_map({"query": rerank_query}), + "query_text": ( + str(query_template).format_map({"query": rerank_query}) + if query_template is not None + else None + ), "docs_in": docs_in, "docs_out": docs_out, "top_n": top_n, @@ -833,6 +849,36 @@ class Searcher: "fusion": fusion, } + def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: + if previous_rank is None or current_rank is None: + return None + return previous_rank - current_rank + + def _build_result_stage( + *, + rank: Optional[int], + previous_rank: Optional[int], + values: Optional[Dict[str, Any]] = None, + signals: Optional[Dict[str, Any]] = None, + signal_fields: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: + stage_payload: Dict[str, Any] = { + "rank": rank, + "rank_change": _rank_change(previous_rank, rank), + } + if values: + stage_payload.update(values) + if signals: + stage_payload["signals"] = signals + stage_payload["ltr_features"] = signals.get("ltr_features") + for shared_key in ("fusion_summary", "fusion_inputs", "fusion_factors"): + if stage_payload.get(shared_key) is None: + stage_payload[shared_key] = signals.get(shared_key) + for payload_key, signal_key in (signal_fields or {}).items(): + if stage_payload.get(payload_key) is None: + stage_payload[payload_key] = signals.get(signal_key) + return stage_payload + def _run_optional_stage( *, stage: RequestContextStage, @@ -967,11 +1013,23 @@ class Searcher: es_response.setdefault("hits", {})["hits"] = hits if debug: coarse_ranks_by_doc = _rank_map(hits) - coarse_debug_info = { - "docs_in": es_fetch_size, - "docs_out": len(hits), - "fusion": coarse_fusion_debug, - } + coarse_debug_info = _stage_debug_info( + enabled=True, + applied=True, + skipped_reason=None, + service_profile=None, + service_url=None, + backend="local_coarse_fusion", + backend_model_name=None, + model=None, + query_template=None, + doc_template=None, + docs_in=es_fetch_size, + docs_out=len(hits), + top_n=coarse_output_window, + meta=None, + fusion=coarse_fusion_debug, + ) context.store_intermediate_result("coarse_rank_scores", coarse_debug) context.logger.info( "粗排完成 | docs_in=%s | docs_out=%s", @@ -1189,36 +1247,9 @@ class Searcher: max_score = es_response.get('hits', {}).get('max_score') or 0.0 # 从上下文中取出重排调试信息(若有) - rerank_debug_raw = context.get_intermediate_result('rerank_scores', None) - rerank_debug_by_doc: Dict[str, Dict[str, Any]] = {} - if isinstance(rerank_debug_raw, list): - for item in rerank_debug_raw: - if not isinstance(item, dict): - continue - doc_id = item.get("doc_id") - if doc_id is None: - continue - rerank_debug_by_doc[str(doc_id)] = item - coarse_debug_raw = context.get_intermediate_result('coarse_rank_scores', None) - coarse_debug_by_doc: Dict[str, Dict[str, Any]] = {} - if isinstance(coarse_debug_raw, list): - for item in coarse_debug_raw: - if not isinstance(item, dict): - continue - doc_id = item.get("doc_id") - if doc_id is None: - continue - coarse_debug_by_doc[str(doc_id)] = item - fine_debug_raw = context.get_intermediate_result('fine_rank_scores', None) - fine_debug_by_doc: Dict[str, Dict[str, Any]] = {} - if isinstance(fine_debug_raw, list): - for item in fine_debug_raw: - if not isinstance(item, dict): - continue - doc_id = item.get("doc_id") - if doc_id is None: - continue - fine_debug_by_doc[str(doc_id)] = item + rerank_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('rerank_scores', None)) + coarse_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('coarse_rank_scores', None)) + fine_debug_by_doc = _index_debug_rows_by_doc(context.get_intermediate_result('fine_rank_scores', None)) if self._has_style_intent(parsed_query): if style_intent_decisions: @@ -1289,47 +1320,6 @@ class Searcher: "vendor_multilingual": vendor_multilingual, } - if coarse_debug: - debug_entry["coarse_score"] = coarse_debug.get("coarse_score") - debug_entry["coarse_es_factor"] = coarse_debug.get("coarse_es_factor") - debug_entry["coarse_text_factor"] = coarse_debug.get("coarse_text_factor") - debug_entry["coarse_knn_factor"] = coarse_debug.get("coarse_knn_factor") - - # 若存在重排调试信息,则补充 doc 级别的融合分数信息 - if rerank_debug: - debug_entry["doc_id"] = rerank_debug.get("doc_id") - debug_entry["score"] = rerank_debug.get("score") - debug_entry["rerank_score"] = rerank_debug.get("rerank_score") - debug_entry["fine_score"] = rerank_debug.get("fine_score") - debug_entry["es_score"] = rerank_debug.get("es_score", es_score) - debug_entry["text_score"] = rerank_debug.get("text_score") - debug_entry["knn_score"] = rerank_debug.get("knn_score") - debug_entry["fusion_inputs"] = rerank_debug.get("fusion_inputs") - debug_entry["fusion_factors"] = rerank_debug.get("fusion_factors") - debug_entry["fusion_summary"] = rerank_debug.get("fusion_summary") - debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") - debug_entry["fine_factor"] = rerank_debug.get("fine_factor") - debug_entry["es_factor"] = rerank_debug.get("es_factor") - debug_entry["text_factor"] = rerank_debug.get("text_factor") - debug_entry["knn_factor"] = rerank_debug.get("knn_factor") - debug_entry["fused_score"] = rerank_debug.get("fused_score") - debug_entry["rerank_input"] = rerank_debug.get("rerank_input") - debug_entry["matched_queries"] = rerank_debug.get("matched_queries") - debug_entry["ltr_features"] = rerank_debug.get("ltr_features") - elif fine_debug: - debug_entry["doc_id"] = fine_debug.get("doc_id") - debug_entry["score"] = fine_debug.get("score") - debug_entry["fine_score"] = fine_debug.get("fine_score") - debug_entry["es_score"] = fine_debug.get("es_score", es_score) - debug_entry["text_score"] = fine_debug.get("text_score") - debug_entry["knn_score"] = fine_debug.get("knn_score") - debug_entry["fusion_inputs"] = fine_debug.get("fusion_inputs") - debug_entry["fusion_factors"] = fine_debug.get("fusion_factors") - debug_entry["fusion_summary"] = fine_debug.get("fusion_summary") - debug_entry["es_factor"] = fine_debug.get("es_factor") - debug_entry["rerank_input"] = fine_debug.get("rerank_input") - debug_entry["ltr_features"] = fine_debug.get("ltr_features") - initial_rank = initial_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None coarse_rank = coarse_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None fine_rank = fine_ranks_by_doc.get(str(doc_id)) if doc_id is not None else None @@ -1344,76 +1334,79 @@ class Searcher: if final_previous_rank is None: final_previous_rank = initial_rank - def _rank_change(previous_rank: Optional[int], current_rank: Optional[int]) -> Optional[int]: - if previous_rank is None or current_rank is None: - return None - return previous_rank - current_rank - debug_entry["ranking_funnel"] = { - "es_recall": { - "rank": initial_rank, - "score": es_score, - "normalized_score": normalized, - "matched_queries": hit.get("matched_queries"), - }, - "coarse_rank": { - "rank": coarse_rank, - "rank_change": _rank_change(initial_rank, coarse_rank), - "score": coarse_debug.get("coarse_score") if coarse_debug else None, - "es_score": coarse_debug.get("es_score") if coarse_debug else es_score, - "text_score": coarse_debug.get("text_score") if coarse_debug else None, - "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, - "es_factor": coarse_debug.get("coarse_es_factor") if coarse_debug else None, - "text_factor": coarse_debug.get("coarse_text_factor") if coarse_debug else None, - "knn_factor": coarse_debug.get("coarse_knn_factor") if coarse_debug else None, - "signals": coarse_debug, - "ltr_features": coarse_debug.get("ltr_features") if coarse_debug else None, - }, - "fine_rank": { - "rank": fine_rank, - "rank_change": _rank_change(coarse_rank, fine_rank), - "score": ( - fine_debug.get("score") - if fine_debug and fine_debug.get("score") is not None - else hit.get("_fine_fused_score", hit.get("_fine_score")) - ), - "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), - "es_score": fine_debug.get("es_score") if fine_debug else es_score, - "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), - "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), - "es_factor": fine_debug.get("es_factor") if fine_debug else None, - "fusion_summary": fine_debug.get("fusion_summary") if fine_debug else None, - "fusion_inputs": fine_debug.get("fusion_inputs") if fine_debug else None, - "fusion_factors": fine_debug.get("fusion_factors") if fine_debug else None, - "rerank_input": fine_debug.get("rerank_input") if fine_debug else None, - "signals": fine_debug, - "ltr_features": fine_debug.get("ltr_features") if fine_debug else None, - }, - "rerank": { - "rank": rerank_rank, - "rank_change": _rank_change(rerank_previous_rank, rerank_rank), - "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), - "es_score": rerank_debug.get("es_score") if rerank_debug else es_score, - "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), - "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), - "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), - "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"), - "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"), - "fusion_summary": rerank_debug.get("fusion_summary") if rerank_debug else None, - "fusion_inputs": rerank_debug.get("fusion_inputs") if rerank_debug else None, - "fusion_factors": rerank_debug.get("fusion_factors") if rerank_debug else None, - "rerank_factor": rerank_debug.get("rerank_factor") if rerank_debug else None, - "fine_factor": rerank_debug.get("fine_factor") if rerank_debug else None, - "es_factor": rerank_debug.get("es_factor") if rerank_debug else None, - "text_factor": rerank_debug.get("text_factor") if rerank_debug else None, - "knn_factor": rerank_debug.get("knn_factor") if rerank_debug else None, - "signals": rerank_debug, - "ltr_features": rerank_debug.get("ltr_features") if rerank_debug else None, - }, - "final_page": { - "rank": final_rank, - "rank_change": _rank_change(final_previous_rank, final_rank), - }, + "es_recall": _build_result_stage( + rank=initial_rank, + previous_rank=None, + values={ + "score": es_score, + "normalized_score": normalized, + "matched_queries": hit.get("matched_queries"), + }, + ), + "coarse_rank": _build_result_stage( + rank=coarse_rank, + previous_rank=initial_rank, + values={ + "score": coarse_debug.get("coarse_score") if coarse_debug else None, + "es_score": coarse_debug.get("es_score") if coarse_debug else es_score, + "text_score": coarse_debug.get("text_score") if coarse_debug else None, + "knn_score": coarse_debug.get("knn_score") if coarse_debug else None, + }, + signals=coarse_debug, + signal_fields={ + "es_factor": "coarse_es_factor", + "text_factor": "coarse_text_factor", + "knn_factor": "coarse_knn_factor", + "text_knn_factor": "coarse_text_knn_factor", + "image_knn_factor": "coarse_image_knn_factor", + }, + ), + "fine_rank": _build_result_stage( + rank=fine_rank, + previous_rank=coarse_rank, + values={ + "score": ( + fine_debug.get("score") + if fine_debug and fine_debug.get("score") is not None + else hit.get("_fine_fused_score", hit.get("_fine_score")) + ), + "fine_score": fine_debug.get("fine_score") if fine_debug else hit.get("_fine_score"), + "es_score": fine_debug.get("es_score") if fine_debug else es_score, + "text_score": fine_debug.get("text_score") if fine_debug else hit.get("_text_score"), + "knn_score": fine_debug.get("knn_score") if fine_debug else hit.get("_knn_score"), + "rerank_input": fine_debug.get("rerank_input") if fine_debug else None, + }, + signals=fine_debug, + signal_fields={ + "es_factor": "es_factor", + }, + ), + "rerank": _build_result_stage( + rank=rerank_rank, + previous_rank=rerank_previous_rank, + values={ + "score": rerank_debug.get("score") if rerank_debug else hit.get("_fused_score"), + "es_score": rerank_debug.get("es_score") if rerank_debug else es_score, + "rerank_score": rerank_debug.get("rerank_score") if rerank_debug else hit.get("_rerank_score"), + "fine_score": rerank_debug.get("fine_score") if rerank_debug else hit.get("_fine_score"), + "fused_score": rerank_debug.get("fused_score") if rerank_debug else hit.get("_fused_score"), + "text_score": rerank_debug.get("text_score") if rerank_debug else hit.get("_text_score"), + "knn_score": rerank_debug.get("knn_score") if rerank_debug else hit.get("_knn_score"), + }, + signals=rerank_debug, + signal_fields={ + "rerank_factor": "rerank_factor", + "fine_factor": "fine_factor", + "es_factor": "es_factor", + "text_factor": "text_factor", + "knn_factor": "knn_factor", + }, + ), + "final_page": _build_result_stage( + rank=final_rank, + previous_rank=final_previous_rank, + ), } if style_intent_debug: diff --git a/tests/test_rerank_client.py b/tests/test_rerank_client.py index b77a2a9..f91bb1e 100644 --- a/tests/test_rerank_client.py +++ b/tests/test_rerank_client.py @@ -279,7 +279,9 @@ def test_fuse_scores_and_resort_can_add_weighted_text_and_image_knn_factors(): knn_tie_breaker=0.25, knn_bias=0.1, knn_exponent=1.0, + knn_text_bias=0.1, knn_text_exponent=2.0, + knn_image_bias=0.1, knn_image_exponent=3.0, ) @@ -325,7 +327,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): knn_tie_breaker=0.25, knn_bias=0.1, knn_exponent=1.0, + knn_text_bias=0.1, knn_text_exponent=2.0, + knn_image_bias=0.1, knn_image_exponent=3.0, ) @@ -345,6 +349,9 @@ def test_coarse_resort_hits_can_add_weighted_text_and_image_knn_factors(): assert isclose(hits[0]["_coarse_score"], expected_coarse, rel_tol=1e-9) assert isclose(debug[0]["coarse_text_knn_factor"], (weighted_text_knn + 0.1) ** 2.0, rel_tol=1e-9) assert isclose(debug[0]["coarse_image_knn_factor"], (weighted_image_knn + 0.1) ** 3.0, rel_tol=1e-9) + assert debug[0]["fusion_inputs"]["es_score"] == 1.0 + assert "weighted_text_knn_score=" in debug[0]["fusion_summary"] + assert "weighted_image_knn_score=" in debug[0]["fusion_summary"] def test_run_lightweight_rerank_sorts_by_fused_stage_score(monkeypatch): diff --git a/tests/test_search_rerank_window.py b/tests/test_search_rerank_window.py index 08ffd00..b336e33 100644 --- a/tests/test_search_rerank_window.py +++ b/tests/test_search_rerank_window.py @@ -520,9 +520,12 @@ def test_searcher_debug_info_exposes_ranking_funnel(monkeypatch): assert result.debug_info["ranking_funnel"]["fine_rank"]["docs_out"] == 80 assert result.debug_info["ranking_funnel"]["rerank"]["docs_out"] == 20 + assert result.debug_info["ranking_funnel"]["coarse_rank"]["applied"] is True + assert result.debug_info["ranking_funnel"]["coarse_rank"]["backend"] == "local_coarse_fusion" first = result.debug_info["per_result"][0]["ranking_funnel"] assert first["es_recall"]["rank"] is not None assert first["coarse_rank"]["score"] is not None + assert first["coarse_rank"]["fusion_summary"] is not None assert first["fine_rank"]["score"] is not None assert first["rerank"]["rerank_score"] is not None -- libgit2 0.21.2