diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index dee88e8..5466315 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -412,7 +412,7 @@ function displayResults(data) { ? debug.es_score_normalized.toFixed(4) : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); - const esNormRerank = typeof debug.es_score_norm === 'number' + const esNormMinMax = typeof debug.es_score_norm === 'number' ? debug.es_score_norm.toFixed(4) : (debug.es_score_norm == null ? '' : String(debug.es_score_norm)); @@ -436,17 +436,37 @@ function displayResults(data) { const resultJson = customStringify(result); const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; + const rerankInputHtml = debug.rerank_input + ? `
Rerank input
${escapeHtml(customStringify(debug.rerank_input))}
` + : ''; + const styleIntentHtml = debug.style_intent_sku + ? `
Selected SKU
${escapeHtml(customStringify(debug.style_intent_sku))}
` + : ''; + const matchedQueriesHtml = debug.matched_queries + ? `
matched_queries
${escapeHtml(customStringify(debug.matched_queries))}
` + : ''; debugHtml = `
Ranking Debug
spu_id: ${escapeHtml(String(spuId || ''))}
+
Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}
+
Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}
ES score: ${esScore}
-
ES normalized: ${esNorm}
-
ES norm (rerank input): ${esNormRerank}
+
ES normalized (score / initial ES max): ${esNorm}
+
ES norm (min-max over initial ES window): ${esNormMinMax}
+
ES score min/max: ${escapeHtml(String(debug.es_score_min ?? ''))} / ${escapeHtml(String(debug.es_score_max ?? ''))}
Rerank score: ${rerankScore}
+
rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}
+
text_score: ${escapeHtml(String(debug.text_score ?? ''))}
+
text_factor: ${escapeHtml(String(debug.text_factor ?? ''))}
+
knn_score: ${escapeHtml(String(debug.knn_score ?? ''))}
+
knn_factor: ${escapeHtml(String(debug.knn_factor ?? ''))}
Fused score: ${fusedScore}
${titleLines} + ${rerankInputHtml} + ${styleIntentHtml} + ${matchedQueriesHtml}
`; - html += `
detected_language: ${data.query_info.detected_languag}
`; + html += `
detected_language: ${escapeHtml(data.query_info.detected_language || 'N/A')}
`; html += '
'; debugInfoDiv.innerHTML = html; } else { @@ -885,9 +905,14 @@ function displayDebugInfo(data) { html += `
original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}
`; html += `
query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}
`; html += `
rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}
`; - html += `
detected_language: ${debugInfo.query_analysis.detected_language}
`; + html += `
detected_language: ${escapeHtml(debugInfo.query_analysis.detected_language || 'N/A')}
`; + html += `
index_languages: ${escapeHtml((debugInfo.query_analysis.index_languages || []).join(', ') || 'N/A')}
`; + html += `
query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}
`; html += `
domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}
`; html += `
is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}
`; + if (debugInfo.query_analysis.query_vector_summary) { + html += `
query_vector_summary: ${escapeHtml(customStringify(debugInfo.query_analysis.query_vector_summary))}
`; + } if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) { html += '
translations: '; @@ -921,6 +946,25 @@ function displayDebugInfo(data) { html += `
took_ms: ${debugInfo.es_response.took_ms}ms
`; html += `
total_hits: ${debugInfo.es_response.total_hits}
`; html += `
max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}
`; + html += `
initial_es_max_score: ${escapeHtml(String(debugInfo.es_response.initial_es_max_score ?? ''))}
`; + html += `
initial_es_min_score: ${escapeHtml(String(debugInfo.es_response.initial_es_min_score ?? ''))}
`; + html += '
'; + } + + if (debugInfo.rerank) { + html += '
Rerank:'; + html += `
requested: ${debugInfo.rerank.requested ? 'yes' : 'no'}
`; + html += `
executed: ${debugInfo.rerank.executed ? 'yes' : 'no'}
`; + html += `
in_rerank_window: ${debugInfo.rerank.in_rerank_window ? 'yes' : 'no'}
`; + html += `
top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}
`; + html += `
query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}
`; + html += `
doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}
`; + html += '
'; + } + + if (debugInfo.page_fill) { + html += '
Page Fill:'; + html += `
${escapeHtml(customStringify(debugInfo.page_fill))}
`; html += '
'; } @@ -945,6 +989,12 @@ function displayDebugInfo(data) { html += `
${escapeHtml(customStringify(debugInfo.es_query))}
`; html += ''; } + + if (debugInfo.es_query_context) { + html += '
ES Query Context:'; + html += `
${escapeHtml(customStringify(debugInfo.es_query_context))}
`; + html += '
'; + } html += ''; debugInfoDiv.innerHTML = html; diff --git a/search/rerank_client.py b/search/rerank_client.py index 7953dac..ebddeb0 100644 --- a/search/rerank_client.py +++ b/search/rerank_client.py @@ -25,6 +25,7 @@ def build_docs_from_hits( es_hits: List[Dict[str, Any]], language: str = "zh", doc_template: str = "{title}", + debug_rows: Optional[List[Dict[str, Any]]] = None, ) -> List[str]: """ 从 ES 命中结果构造重排服务所需的文档文本列表(与 hits 一一对应)。 @@ -63,24 +64,36 @@ def build_docs_from_hits( for hit in es_hits: src = hit.get("_source") or {} title_suffix = str(hit.get("_style_rerank_suffix") or "").strip() + values = _SafeDict( + title=( + f"{pick_lang_text(src.get('title'))} {title_suffix}".strip() + if title_suffix + else pick_lang_text(src.get("title")) + ), + brief=pick_lang_text(src.get("brief")) if need_brief else "", + vendor=pick_lang_text(src.get("vendor")) if need_vendor else "", + description=pick_lang_text(src.get("description")) if need_description else "", + category_path=pick_lang_text(src.get("category_path")) if need_category_path else "", + ) if only_title: - title = pick_lang_text(src.get("title")) - if title_suffix: - title = f"{title} {title_suffix}".strip() - docs.append(title) + doc_text = values["title"] else: - values = _SafeDict( - title=( - f"{pick_lang_text(src.get('title'))} {title_suffix}".strip() - if title_suffix - else pick_lang_text(src.get("title")) - ), - brief=pick_lang_text(src.get("brief")) if need_brief else "", - vendor=pick_lang_text(src.get("vendor")) if need_vendor else "", - description=pick_lang_text(src.get("description")) if need_description else "", - category_path=pick_lang_text(src.get("category_path")) if need_category_path else "", - ) - docs.append(str(doc_template).format_map(values)) + doc_text = str(doc_template).format_map(values) + docs.append(doc_text) + if debug_rows is not None: + preview = doc_text if len(doc_text) <= 300 else f"{doc_text[:300]}..." + debug_rows.append({ + "doc_template": doc_template, + "title_suffix": title_suffix or None, + "fields": { + "title": values["title"] or None, + "brief": values["brief"] or None, + "vendor": values["vendor"] or None, + "category_path": values["category_path"] or None, + }, + "doc_preview": preview, + "doc_length": len(doc_text), + }) return docs @@ -163,18 +176,13 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa } -def _fuse_score(rerank_score: float, text_score: float, knn_score: float) -> float: - rerank_factor = max(rerank_score, 0.0) + 0.00001 - text_factor = (max(text_score, 0.0) + 0.1) ** 0.35 - knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2 - return rerank_factor * text_factor * knn_factor - - def fuse_scores_and_resort( es_hits: List[Dict[str, Any]], rerank_scores: List[float], weight_es: float = DEFAULT_WEIGHT_ES, weight_ai: float = DEFAULT_WEIGHT_AI, + debug: bool = False, + rerank_debug_rows: Optional[List[Dict[str, Any]]] = None, ) -> List[Dict[str, Any]]: """ 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 @@ -211,7 +219,10 @@ def fuse_scores_and_resort( knn_score = _extract_named_query_score(matched_queries, "knn_query") text_components = _collect_text_score_components(matched_queries, es_score) text_score = text_components["text_score"] - fused = _fuse_score(rerank_score, text_score, knn_score) + rerank_factor = max(rerank_score, 0.0) + 0.00001 + text_factor = (max(text_score, 0.0) + 0.1) ** 0.35 + knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2 + fused = rerank_factor * text_factor * knn_factor hit["_original_score"] = hit.get("_score") hit["_rerank_score"] = rerank_score @@ -223,19 +234,33 @@ def fuse_scores_and_resort( hit["_text_support_score"] = text_components["support_text_score"] hit["_fused_score"] = fused - fused_debug.append({ - "doc_id": hit.get("_id"), - "es_score": es_score, - "rerank_score": rerank_score, - "text_score": text_score, - "text_source_score": text_components["source_score"], - "text_translation_score": text_components["translation_score"], - "text_primary_score": text_components["primary_text_score"], - "text_support_score": text_components["support_text_score"], - "knn_score": knn_score, - "matched_queries": matched_queries, - "fused_score": fused, - }) + if debug: + debug_entry = { + "doc_id": hit.get("_id"), + "es_score": es_score, + "rerank_score": rerank_score, + "text_score": text_score, + "text_source_score": text_components["source_score"], + "text_translation_score": text_components["translation_score"], + "text_weighted_source_score": text_components["weighted_source_score"], + "text_weighted_translation_score": text_components["weighted_translation_score"], + "text_primary_score": text_components["primary_text_score"], + "text_support_score": text_components["support_text_score"], + "text_score_fallback_to_es": ( + text_score == es_score + and text_components["source_score"] <= 0.0 + and text_components["translation_score"] <= 0.0 + ), + "knn_score": knn_score, + "rerank_factor": rerank_factor, + "text_factor": text_factor, + "knn_factor": knn_factor, + "matched_queries": matched_queries, + "fused_score": fused, + } + if rerank_debug_rows is not None and idx < len(rerank_debug_rows): + debug_entry["rerank_input"] = rerank_debug_rows[idx] + fused_debug.append(debug_entry) # 按融合分数降序重排 es_hits.sort( @@ -255,6 +280,7 @@ def run_rerank( rerank_query_template: str = "{query}", rerank_doc_template: str = "{title}", top_n: Optional[int] = None, + debug: bool = False, ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: """ 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 @@ -266,7 +292,13 @@ def run_rerank( return es_response, None, [] query_text = str(rerank_query_template).format_map({"query": query}) - docs = build_docs_from_hits(hits, language=language, doc_template=rerank_doc_template) + rerank_debug_rows: Optional[List[Dict[str, Any]]] = [] if debug else None + docs = build_docs_from_hits( + hits, + language=language, + doc_template=rerank_doc_template, + debug_rows=rerank_debug_rows, + ) scores, meta = call_rerank_service( query_text, docs, @@ -282,6 +314,8 @@ def run_rerank( scores, weight_es=weight_es, weight_ai=weight_ai, + debug=debug, + rerank_debug_rows=rerank_debug_rows, ) # 更新 max_score 为融合后的最高分 diff --git a/search/searcher.py b/search/searcher.py index 450c63d..7654a9d 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -339,6 +339,21 @@ class Searcher: in_rerank_window = do_rerank and (from_ + size) <= rerank_window es_fetch_from = 0 if in_rerank_window else from_ es_fetch_size = rerank_window if in_rerank_window else size + initial_es_positions: Dict[str, int] = {} + initial_es_min_score: Optional[float] = None + initial_es_max_score: Optional[float] = None + page_fill_debug: Optional[Dict[str, Any]] = None + rerank_debug_info: Optional[Dict[str, Any]] = None + if debug: + rerank_debug_info = { + "requested": do_rerank, + "executed": False, + "in_rerank_window": in_rerank_window, + "rerank_window": rerank_window, + "top_n": from_ + size, + "query_template": effective_query_template, + "doc_template": effective_doc_template, + } # Start timing context.start_stage(RequestContextStage.TOTAL) @@ -348,6 +363,7 @@ class Searcher: f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, " f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, " f"es_fetch=({es_fetch_from},{es_fetch_size}) | " + f"index_languages={index_langs} | " f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}", extra={'reqid': context.reqid, 'uid': context.uid} ) @@ -531,13 +547,36 @@ class Searcher: # Store ES response in context context.store_intermediate_result('es_response', es_response) + if debug: + initial_hits = es_response.get('hits', {}).get('hits') or [] + initial_scores: List[float] = [] + for rank, hit in enumerate(initial_hits, 1): + doc_id = hit.get("_id") + if doc_id is not None: + initial_es_positions[str(doc_id)] = rank + raw_score = hit.get("_score") + try: + if raw_score is not None: + initial_scores.append(float(raw_score)) + except (TypeError, ValueError): + pass + raw_max_score = es_response.get('hits', {}).get('max_score') + try: + initial_es_max_score = float(raw_max_score) if raw_max_score is not None else None + except (TypeError, ValueError): + initial_es_max_score = None + if initial_es_max_score is None and initial_scores: + initial_es_max_score = max(initial_scores) + initial_es_min_score = min(initial_scores) if initial_scores else None # Extract timing from ES response es_took = es_response.get('took', 0) context.logger.info( f"ES搜索完成 | 耗时: {es_took}ms | " f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | " - f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f}", + f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f} | " + f"detected_language={parsed_query.detected_language} | " + f"translations={list((parsed_query.translations or {}).keys())}", extra={'reqid': context.reqid, 'uid': context.uid} ) except Exception as e: @@ -581,24 +620,31 @@ class Searcher: rerank_query_template=effective_query_template, rerank_doc_template=effective_doc_template, top_n=(from_ + size), + debug=debug, ) if rerank_meta is not None: from config.services_config import get_rerank_service_url rerank_url = get_rerank_service_url() - context.metadata.setdefault("rerank_info", {}) - context.metadata["rerank_info"].update({ - "service_url": rerank_url, - "docs": len(es_response.get("hits", {}).get("hits") or []), - "meta": rerank_meta, - }) - context.store_intermediate_result("rerank_scores", fused_debug) + if debug and rerank_debug_info is not None: + rerank_debug_info.update({ + "executed": True, + "service_url": rerank_url, + "query_text": str(effective_query_template).format_map({"query": rerank_query}), + "docs": len(es_response.get("hits", {}).get("hits") or []), + "meta": rerank_meta, + }) + context.store_intermediate_result("rerank_scores", fused_debug) context.logger.info( - f"重排完成 | docs={len(fused_debug)} | meta={rerank_meta}", + f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | " + f"top_n={from_ + size} | query_template={effective_query_template} | " + f"doc_template={effective_doc_template} | meta={rerank_meta}", extra={'reqid': context.reqid, 'uid': context.uid} ) except Exception as e: context.add_warning(f"Rerank failed: {e}") + if debug and rerank_debug_info is not None: + rerank_debug_info["error"] = str(e) context.logger.warning( f"调用重排服务失败 | error: {e}", extra={'reqid': context.reqid, 'uid': context.uid}, @@ -661,6 +707,13 @@ class Searcher: ) if fill_took: es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took) + if debug: + page_fill_debug = { + "requested_ids": page_ids, + "filled": filled, + "fill_took_ms": fill_took, + "response_source_spec": response_source_spec, + } context.logger.info( f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms", extra={'reqid': context.reqid, 'uid': context.uid} @@ -728,6 +781,11 @@ class Searcher: # Build per-result debug info (per SPU) when debug mode is enabled per_result_debug = [] if debug and es_hits and formatted_results: + final_positions_by_doc = { + str(hit.get("_id")): (from_ + rank) + for rank, hit in enumerate(es_hits, 1) + if hit.get("_id") is not None + } for hit, spu in zip(es_hits, formatted_results): source = hit.get("_source", {}) or {} doc_id = hit.get("_id") @@ -746,9 +804,29 @@ class Searcher: except (TypeError, ValueError): es_score = 0.0 try: - normalized = float(es_score) / float(max_score) if max_score else None + normalized = ( + float(es_score) / float(initial_es_max_score) + if initial_es_max_score + else None + ) except (TypeError, ValueError, ZeroDivisionError): normalized = None + try: + es_score_norm = ( + (float(es_score) - float(initial_es_min_score)) + / (float(initial_es_max_score) - float(initial_es_min_score)) + if initial_es_min_score is not None + and initial_es_max_score is not None + and float(initial_es_max_score) > float(initial_es_min_score) + else ( + 1.0 + if initial_es_min_score is not None + and initial_es_max_score is not None + else None + ) + ) + except (TypeError, ValueError, ZeroDivisionError): + es_score_norm = None title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None @@ -758,6 +836,11 @@ class Searcher: "spu_id": spu.spu_id, "es_score": es_score, "es_score_normalized": normalized, + "es_score_norm": es_score_norm, + "es_score_min": initial_es_min_score, + "es_score_max": initial_es_max_score, + "initial_rank": initial_es_positions.get(str(doc_id)) if doc_id is not None else None, + "final_rank": final_positions_by_doc.get(str(doc_id)) if doc_id is not None else None, "title_multilingual": title_multilingual, "brief_multilingual": brief_multilingual, "vendor_multilingual": vendor_multilingual, @@ -771,10 +854,17 @@ class Searcher: debug_entry["text_score"] = rerank_debug.get("text_score") debug_entry["text_source_score"] = rerank_debug.get("text_source_score") debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score") + debug_entry["text_weighted_source_score"] = rerank_debug.get("text_weighted_source_score") + debug_entry["text_weighted_translation_score"] = rerank_debug.get("text_weighted_translation_score") debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score") debug_entry["text_support_score"] = rerank_debug.get("text_support_score") + debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es") debug_entry["knn_score"] = rerank_debug.get("knn_score") + debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") + debug_entry["text_factor"] = rerank_debug.get("text_factor") + debug_entry["knn_factor"] = rerank_debug.get("knn_factor") debug_entry["fused_score"] = rerank_debug.get("fused_score") + debug_entry["rerank_input"] = rerank_debug.get("rerank_input") debug_entry["matched_queries"] = rerank_debug.get("matched_queries") if style_intent_debug: @@ -818,25 +908,51 @@ class Searcher: # Collect debug information if requested debug_info = None if debug: + query_vector_summary = None + if parsed_query.query_vector is not None: + query_vector_summary = { + "dims": int(len(parsed_query.query_vector)), + "preview": [round(float(v), 6) for v in parsed_query.query_vector[:8].tolist()], + } debug_info = { "query_analysis": { "original_query": context.query_analysis.original_query, "query_normalized": context.query_analysis.query_normalized, "rewritten_query": context.query_analysis.rewritten_query, "detected_language": context.query_analysis.detected_language, + "index_languages": index_langs, "translations": context.query_analysis.translations, "has_vector": context.query_analysis.query_vector is not None, + "query_vector_summary": query_vector_summary, + "query_tokens": getattr(parsed_query, "query_tokens", []), "is_simple_query": context.query_analysis.is_simple_query, "domain": context.query_analysis.domain, "style_intent_profile": context.get_intermediate_result("style_intent_profile"), }, "es_query": context.get_intermediate_result('es_query', {}), + "es_query_context": { + "filters": filters, + "range_filters": range_filters, + "facets": [getattr(facet, "field", str(facet)) for facet in facets] if facets else [], + "sort_by": sort_by, + "sort_order": sort_order, + "min_score": min_score, + "es_fetch_from": es_fetch_from, + "es_fetch_size": es_fetch_size, + "in_rerank_window": in_rerank_window, + "rerank_prefetch_source": context.get_intermediate_result('es_query_rerank_prefetch_source'), + "include_named_queries_score": bool(do_rerank and in_rerank_window), + }, "es_response": { "took_ms": es_response.get('took', 0), "total_hits": total_value, "max_score": max_score, - "shards": es_response.get('_shards', {}) + "shards": es_response.get('_shards', {}), + "initial_es_max_score": initial_es_max_score, + "initial_es_min_score": initial_es_min_score, }, + "rerank": rerank_debug_info, + "page_fill": page_fill_debug, "feature_flags": context.metadata.get('feature_flags', {}), "stage_timings": { k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() diff --git a/tests/test_rerank_client.py b/tests/test_rerank_client.py index 950e945..bfca160 100644 --- a/tests/test_rerank_client.py +++ b/tests/test_rerank_client.py @@ -24,7 +24,7 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim }, ] - debug = fuse_scores_and_resort(hits, [0.9, 0.7]) + debug = fuse_scores_and_resort(hits, [0.9, 0.7], debug=True) expected_text_1 = 2.4 + 0.25 * (0.8 * 1.8) expected_fused_1 = (0.9 + 0.00001) * ((expected_text_1 + 0.1) ** 0.35) * ((0.8 + 0.6) ** 0.2) @@ -37,7 +37,9 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim assert isclose(by_id["2"]["_fused_score"], expected_fused_2, rel_tol=1e-9) assert debug[0]["text_source_score"] == 2.4 assert debug[0]["text_translation_score"] == 1.8 + assert isclose(debug[0]["text_weighted_translation_score"], 1.44, rel_tol=1e-9) assert debug[0]["knn_score"] == 0.8 + assert isclose(debug[0]["rerank_factor"], 0.90001, rel_tol=1e-9) assert [hit["_id"] for hit in hits] == ["2", "1"] @@ -47,7 +49,7 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): {"_id": "2", "_score": 2.0}, ] - fuse_scores_and_resort(hits, [0.4, 0.3]) + debug = fuse_scores_and_resort(hits, [0.4, 0.3], debug=True) expected_1 = (0.4 + 0.00001) * ((0.5 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) expected_2 = (0.3 + 0.00001) * ((2.0 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) @@ -58,6 +60,8 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): assert isclose(by_id["1"]["_fused_score"], expected_1, rel_tol=1e-9) assert isclose(by_id["2"]["_text_score"], 2.0, rel_tol=1e-9) assert isclose(by_id["2"]["_fused_score"], expected_2, rel_tol=1e-9) + assert debug[0]["text_score_fallback_to_es"] is True + assert debug[1]["text_score_fallback_to_es"] is True assert [hit["_id"] for hit in hits] == ["2", "1"] diff --git a/tests/test_search_rerank_window.py b/tests/test_search_rerank_window.py index c03da39..1bff506 100644 --- a/tests/test_search_rerank_window.py +++ b/tests/test_search_rerank_window.py @@ -612,3 +612,33 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc assert len(result.results) == 1 assert result.results[0].skus[0].sku_id == "sku-blue" assert result.results[0].image_url == "https://img/blue.jpg" + + +def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch): + es_client = _FakeESClient(total_hits=3) + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) + context = create_request_context(reqid="dbg", uid="u-dbg") + + monkeypatch.setattr( + "search.searcher.get_tenant_config_loader", + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), + ) + + result = searcher.search( + query="toy", + tenant_id="162", + from_=0, + size=2, + context=context, + enable_rerank=False, + debug=True, + ) + + assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"] + assert result.debug_info["es_query_context"]["es_fetch_size"] == 2 + assert result.debug_info["es_response"]["initial_es_max_score"] == 3.0 + assert result.debug_info["es_response"]["initial_es_min_score"] == 2.0 + assert result.debug_info["per_result"][0]["initial_rank"] == 1 + assert result.debug_info["per_result"][0]["final_rank"] == 1 + assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0 + assert result.debug_info["per_result"][1]["es_score_norm"] == 0.0 -- libgit2 0.21.2