diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js
index dee88e8..5466315 100644
--- a/frontend/static/js/app.js
+++ b/frontend/static/js/app.js
@@ -412,7 +412,7 @@ function displayResults(data) {
? debug.es_score_normalized.toFixed(4)
: (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
- const esNormRerank = typeof debug.es_score_norm === 'number'
+ const esNormMinMax = typeof debug.es_score_norm === 'number'
? debug.es_score_norm.toFixed(4)
: (debug.es_score_norm == null ? '' : String(debug.es_score_norm));
@@ -436,17 +436,37 @@ function displayResults(data) {
const resultJson = customStringify(result);
const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
+ const rerankInputHtml = debug.rerank_input
+ ? `Rerank input
${escapeHtml(customStringify(debug.rerank_input))} `
+ : '';
+ const styleIntentHtml = debug.style_intent_sku
+ ? `Selected SKU
${escapeHtml(customStringify(debug.style_intent_sku))} `
+ : '';
+ const matchedQueriesHtml = debug.matched_queries
+ ? `matched_queries
${escapeHtml(customStringify(debug.matched_queries))} `
+ : '';
debugHtml = `
Ranking Debug
spu_id: ${escapeHtml(String(spuId || ''))}
+
Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}
+
Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}
ES score: ${esScore}
-
ES normalized: ${esNorm}
-
ES norm (rerank input): ${esNormRerank}
+
ES normalized (score / initial ES max): ${esNorm}
+
ES norm (min-max over initial ES window): ${esNormMinMax}
+
ES score min/max: ${escapeHtml(String(debug.es_score_min ?? ''))} / ${escapeHtml(String(debug.es_score_max ?? ''))}
Rerank score: ${rerankScore}
+
rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}
+
text_score: ${escapeHtml(String(debug.text_score ?? ''))}
+
text_factor: ${escapeHtml(String(debug.text_factor ?? ''))}
+
knn_score: ${escapeHtml(String(debug.knn_score ?? ''))}
+
knn_factor: ${escapeHtml(String(debug.knn_factor ?? ''))}
Fused score: ${fusedScore}
${titleLines}
+ ${rerankInputHtml}
+ ${styleIntentHtml}
+ ${matchedQueriesHtml}
`;
- html += `
detected_language: ${data.query_info.detected_languag}
`;
+ html += `
detected_language: ${escapeHtml(data.query_info.detected_language || 'N/A')}
`;
html += '
';
debugInfoDiv.innerHTML = html;
} else {
@@ -885,9 +905,14 @@ function displayDebugInfo(data) {
html += `original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}
`;
html += `query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}
`;
html += `rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}
`;
- html += `detected_language: ${debugInfo.query_analysis.detected_language}
`;
+ html += `detected_language: ${escapeHtml(debugInfo.query_analysis.detected_language || 'N/A')}
`;
+ html += `index_languages: ${escapeHtml((debugInfo.query_analysis.index_languages || []).join(', ') || 'N/A')}
`;
+ html += `query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}
`;
html += `domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}
`;
html += `is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}
`;
+ if (debugInfo.query_analysis.query_vector_summary) {
+ html += `query_vector_summary: ${escapeHtml(customStringify(debugInfo.query_analysis.query_vector_summary))}
`;
+ }
if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) {
html += 'translations: ';
@@ -921,6 +946,25 @@ function displayDebugInfo(data) {
html += `
took_ms: ${debugInfo.es_response.took_ms}ms
`;
html += `
total_hits: ${debugInfo.es_response.total_hits}
`;
html += `
max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}
`;
+ html += `
initial_es_max_score: ${escapeHtml(String(debugInfo.es_response.initial_es_max_score ?? ''))}
`;
+ html += `
initial_es_min_score: ${escapeHtml(String(debugInfo.es_response.initial_es_min_score ?? ''))}
`;
+ html += '
';
+ }
+
+ if (debugInfo.rerank) {
+ html += 'Rerank:';
+ html += `
requested: ${debugInfo.rerank.requested ? 'yes' : 'no'}
`;
+ html += `
executed: ${debugInfo.rerank.executed ? 'yes' : 'no'}
`;
+ html += `
in_rerank_window: ${debugInfo.rerank.in_rerank_window ? 'yes' : 'no'}
`;
+ html += `
top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}
`;
+ html += `
query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}
`;
+ html += `
doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}
`;
+ html += '
';
+ }
+
+ if (debugInfo.page_fill) {
+ html += 'Page Fill:';
+ html += `
${escapeHtml(customStringify(debugInfo.page_fill))}`;
html += '
';
}
@@ -945,6 +989,12 @@ function displayDebugInfo(data) {
html += `${escapeHtml(customStringify(debugInfo.es_query))}`;
html += '';
}
+
+ if (debugInfo.es_query_context) {
+ html += 'ES Query Context:';
+ html += `
${escapeHtml(customStringify(debugInfo.es_query_context))}`;
+ html += '
';
+ }
html += '';
debugInfoDiv.innerHTML = html;
diff --git a/search/rerank_client.py b/search/rerank_client.py
index 7953dac..ebddeb0 100644
--- a/search/rerank_client.py
+++ b/search/rerank_client.py
@@ -25,6 +25,7 @@ def build_docs_from_hits(
es_hits: List[Dict[str, Any]],
language: str = "zh",
doc_template: str = "{title}",
+ debug_rows: Optional[List[Dict[str, Any]]] = None,
) -> List[str]:
"""
从 ES 命中结果构造重排服务所需的文档文本列表(与 hits 一一对应)。
@@ -63,24 +64,36 @@ def build_docs_from_hits(
for hit in es_hits:
src = hit.get("_source") or {}
title_suffix = str(hit.get("_style_rerank_suffix") or "").strip()
+ values = _SafeDict(
+ title=(
+ f"{pick_lang_text(src.get('title'))} {title_suffix}".strip()
+ if title_suffix
+ else pick_lang_text(src.get("title"))
+ ),
+ brief=pick_lang_text(src.get("brief")) if need_brief else "",
+ vendor=pick_lang_text(src.get("vendor")) if need_vendor else "",
+ description=pick_lang_text(src.get("description")) if need_description else "",
+ category_path=pick_lang_text(src.get("category_path")) if need_category_path else "",
+ )
if only_title:
- title = pick_lang_text(src.get("title"))
- if title_suffix:
- title = f"{title} {title_suffix}".strip()
- docs.append(title)
+ doc_text = values["title"]
else:
- values = _SafeDict(
- title=(
- f"{pick_lang_text(src.get('title'))} {title_suffix}".strip()
- if title_suffix
- else pick_lang_text(src.get("title"))
- ),
- brief=pick_lang_text(src.get("brief")) if need_brief else "",
- vendor=pick_lang_text(src.get("vendor")) if need_vendor else "",
- description=pick_lang_text(src.get("description")) if need_description else "",
- category_path=pick_lang_text(src.get("category_path")) if need_category_path else "",
- )
- docs.append(str(doc_template).format_map(values))
+ doc_text = str(doc_template).format_map(values)
+ docs.append(doc_text)
+ if debug_rows is not None:
+ preview = doc_text if len(doc_text) <= 300 else f"{doc_text[:300]}..."
+ debug_rows.append({
+ "doc_template": doc_template,
+ "title_suffix": title_suffix or None,
+ "fields": {
+ "title": values["title"] or None,
+ "brief": values["brief"] or None,
+ "vendor": values["vendor"] or None,
+ "category_path": values["category_path"] or None,
+ },
+ "doc_preview": preview,
+ "doc_length": len(doc_text),
+ })
return docs
@@ -163,18 +176,13 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
}
-def _fuse_score(rerank_score: float, text_score: float, knn_score: float) -> float:
- rerank_factor = max(rerank_score, 0.0) + 0.00001
- text_factor = (max(text_score, 0.0) + 0.1) ** 0.35
- knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2
- return rerank_factor * text_factor * knn_factor
-
-
def fuse_scores_and_resort(
es_hits: List[Dict[str, Any]],
rerank_scores: List[float],
weight_es: float = DEFAULT_WEIGHT_ES,
weight_ai: float = DEFAULT_WEIGHT_AI,
+ debug: bool = False,
+ rerank_debug_rows: Optional[List[Dict[str, Any]]] = None,
) -> List[Dict[str, Any]]:
"""
将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。
@@ -211,7 +219,10 @@ def fuse_scores_and_resort(
knn_score = _extract_named_query_score(matched_queries, "knn_query")
text_components = _collect_text_score_components(matched_queries, es_score)
text_score = text_components["text_score"]
- fused = _fuse_score(rerank_score, text_score, knn_score)
+ rerank_factor = max(rerank_score, 0.0) + 0.00001
+ text_factor = (max(text_score, 0.0) + 0.1) ** 0.35
+ knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2
+ fused = rerank_factor * text_factor * knn_factor
hit["_original_score"] = hit.get("_score")
hit["_rerank_score"] = rerank_score
@@ -223,19 +234,33 @@ def fuse_scores_and_resort(
hit["_text_support_score"] = text_components["support_text_score"]
hit["_fused_score"] = fused
- fused_debug.append({
- "doc_id": hit.get("_id"),
- "es_score": es_score,
- "rerank_score": rerank_score,
- "text_score": text_score,
- "text_source_score": text_components["source_score"],
- "text_translation_score": text_components["translation_score"],
- "text_primary_score": text_components["primary_text_score"],
- "text_support_score": text_components["support_text_score"],
- "knn_score": knn_score,
- "matched_queries": matched_queries,
- "fused_score": fused,
- })
+ if debug:
+ debug_entry = {
+ "doc_id": hit.get("_id"),
+ "es_score": es_score,
+ "rerank_score": rerank_score,
+ "text_score": text_score,
+ "text_source_score": text_components["source_score"],
+ "text_translation_score": text_components["translation_score"],
+ "text_weighted_source_score": text_components["weighted_source_score"],
+ "text_weighted_translation_score": text_components["weighted_translation_score"],
+ "text_primary_score": text_components["primary_text_score"],
+ "text_support_score": text_components["support_text_score"],
+ "text_score_fallback_to_es": (
+ text_score == es_score
+ and text_components["source_score"] <= 0.0
+ and text_components["translation_score"] <= 0.0
+ ),
+ "knn_score": knn_score,
+ "rerank_factor": rerank_factor,
+ "text_factor": text_factor,
+ "knn_factor": knn_factor,
+ "matched_queries": matched_queries,
+ "fused_score": fused,
+ }
+ if rerank_debug_rows is not None and idx < len(rerank_debug_rows):
+ debug_entry["rerank_input"] = rerank_debug_rows[idx]
+ fused_debug.append(debug_entry)
# 按融合分数降序重排
es_hits.sort(
@@ -255,6 +280,7 @@ def run_rerank(
rerank_query_template: str = "{query}",
rerank_doc_template: str = "{title}",
top_n: Optional[int] = None,
+ debug: bool = False,
) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
"""
完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
@@ -266,7 +292,13 @@ def run_rerank(
return es_response, None, []
query_text = str(rerank_query_template).format_map({"query": query})
- docs = build_docs_from_hits(hits, language=language, doc_template=rerank_doc_template)
+ rerank_debug_rows: Optional[List[Dict[str, Any]]] = [] if debug else None
+ docs = build_docs_from_hits(
+ hits,
+ language=language,
+ doc_template=rerank_doc_template,
+ debug_rows=rerank_debug_rows,
+ )
scores, meta = call_rerank_service(
query_text,
docs,
@@ -282,6 +314,8 @@ def run_rerank(
scores,
weight_es=weight_es,
weight_ai=weight_ai,
+ debug=debug,
+ rerank_debug_rows=rerank_debug_rows,
)
# 更新 max_score 为融合后的最高分
diff --git a/search/searcher.py b/search/searcher.py
index 450c63d..7654a9d 100644
--- a/search/searcher.py
+++ b/search/searcher.py
@@ -339,6 +339,21 @@ class Searcher:
in_rerank_window = do_rerank and (from_ + size) <= rerank_window
es_fetch_from = 0 if in_rerank_window else from_
es_fetch_size = rerank_window if in_rerank_window else size
+ initial_es_positions: Dict[str, int] = {}
+ initial_es_min_score: Optional[float] = None
+ initial_es_max_score: Optional[float] = None
+ page_fill_debug: Optional[Dict[str, Any]] = None
+ rerank_debug_info: Optional[Dict[str, Any]] = None
+ if debug:
+ rerank_debug_info = {
+ "requested": do_rerank,
+ "executed": False,
+ "in_rerank_window": in_rerank_window,
+ "rerank_window": rerank_window,
+ "top_n": from_ + size,
+ "query_template": effective_query_template,
+ "doc_template": effective_doc_template,
+ }
# Start timing
context.start_stage(RequestContextStage.TOTAL)
@@ -348,6 +363,7 @@ class Searcher:
f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, "
f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, "
f"es_fetch=({es_fetch_from},{es_fetch_size}) | "
+ f"index_languages={index_langs} | "
f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}",
extra={'reqid': context.reqid, 'uid': context.uid}
)
@@ -531,13 +547,36 @@ class Searcher:
# Store ES response in context
context.store_intermediate_result('es_response', es_response)
+ if debug:
+ initial_hits = es_response.get('hits', {}).get('hits') or []
+ initial_scores: List[float] = []
+ for rank, hit in enumerate(initial_hits, 1):
+ doc_id = hit.get("_id")
+ if doc_id is not None:
+ initial_es_positions[str(doc_id)] = rank
+ raw_score = hit.get("_score")
+ try:
+ if raw_score is not None:
+ initial_scores.append(float(raw_score))
+ except (TypeError, ValueError):
+ pass
+ raw_max_score = es_response.get('hits', {}).get('max_score')
+ try:
+ initial_es_max_score = float(raw_max_score) if raw_max_score is not None else None
+ except (TypeError, ValueError):
+ initial_es_max_score = None
+ if initial_es_max_score is None and initial_scores:
+ initial_es_max_score = max(initial_scores)
+ initial_es_min_score = min(initial_scores) if initial_scores else None
# Extract timing from ES response
es_took = es_response.get('took', 0)
context.logger.info(
f"ES搜索完成 | 耗时: {es_took}ms | "
f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | "
- f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f}",
+ f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f} | "
+ f"detected_language={parsed_query.detected_language} | "
+ f"translations={list((parsed_query.translations or {}).keys())}",
extra={'reqid': context.reqid, 'uid': context.uid}
)
except Exception as e:
@@ -581,24 +620,31 @@ class Searcher:
rerank_query_template=effective_query_template,
rerank_doc_template=effective_doc_template,
top_n=(from_ + size),
+ debug=debug,
)
if rerank_meta is not None:
from config.services_config import get_rerank_service_url
rerank_url = get_rerank_service_url()
- context.metadata.setdefault("rerank_info", {})
- context.metadata["rerank_info"].update({
- "service_url": rerank_url,
- "docs": len(es_response.get("hits", {}).get("hits") or []),
- "meta": rerank_meta,
- })
- context.store_intermediate_result("rerank_scores", fused_debug)
+ if debug and rerank_debug_info is not None:
+ rerank_debug_info.update({
+ "executed": True,
+ "service_url": rerank_url,
+ "query_text": str(effective_query_template).format_map({"query": rerank_query}),
+ "docs": len(es_response.get("hits", {}).get("hits") or []),
+ "meta": rerank_meta,
+ })
+ context.store_intermediate_result("rerank_scores", fused_debug)
context.logger.info(
- f"重排完成 | docs={len(fused_debug)} | meta={rerank_meta}",
+ f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | "
+ f"top_n={from_ + size} | query_template={effective_query_template} | "
+ f"doc_template={effective_doc_template} | meta={rerank_meta}",
extra={'reqid': context.reqid, 'uid': context.uid}
)
except Exception as e:
context.add_warning(f"Rerank failed: {e}")
+ if debug and rerank_debug_info is not None:
+ rerank_debug_info["error"] = str(e)
context.logger.warning(
f"调用重排服务失败 | error: {e}",
extra={'reqid': context.reqid, 'uid': context.uid},
@@ -661,6 +707,13 @@ class Searcher:
)
if fill_took:
es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
+ if debug:
+ page_fill_debug = {
+ "requested_ids": page_ids,
+ "filled": filled,
+ "fill_took_ms": fill_took,
+ "response_source_spec": response_source_spec,
+ }
context.logger.info(
f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms",
extra={'reqid': context.reqid, 'uid': context.uid}
@@ -728,6 +781,11 @@ class Searcher:
# Build per-result debug info (per SPU) when debug mode is enabled
per_result_debug = []
if debug and es_hits and formatted_results:
+ final_positions_by_doc = {
+ str(hit.get("_id")): (from_ + rank)
+ for rank, hit in enumerate(es_hits, 1)
+ if hit.get("_id") is not None
+ }
for hit, spu in zip(es_hits, formatted_results):
source = hit.get("_source", {}) or {}
doc_id = hit.get("_id")
@@ -746,9 +804,29 @@ class Searcher:
except (TypeError, ValueError):
es_score = 0.0
try:
- normalized = float(es_score) / float(max_score) if max_score else None
+ normalized = (
+ float(es_score) / float(initial_es_max_score)
+ if initial_es_max_score
+ else None
+ )
except (TypeError, ValueError, ZeroDivisionError):
normalized = None
+ try:
+ es_score_norm = (
+ (float(es_score) - float(initial_es_min_score))
+ / (float(initial_es_max_score) - float(initial_es_min_score))
+ if initial_es_min_score is not None
+ and initial_es_max_score is not None
+ and float(initial_es_max_score) > float(initial_es_min_score)
+ else (
+ 1.0
+ if initial_es_min_score is not None
+ and initial_es_max_score is not None
+ else None
+ )
+ )
+ except (TypeError, ValueError, ZeroDivisionError):
+ es_score_norm = None
title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None
brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
@@ -758,6 +836,11 @@ class Searcher:
"spu_id": spu.spu_id,
"es_score": es_score,
"es_score_normalized": normalized,
+ "es_score_norm": es_score_norm,
+ "es_score_min": initial_es_min_score,
+ "es_score_max": initial_es_max_score,
+ "initial_rank": initial_es_positions.get(str(doc_id)) if doc_id is not None else None,
+ "final_rank": final_positions_by_doc.get(str(doc_id)) if doc_id is not None else None,
"title_multilingual": title_multilingual,
"brief_multilingual": brief_multilingual,
"vendor_multilingual": vendor_multilingual,
@@ -771,10 +854,17 @@ class Searcher:
debug_entry["text_score"] = rerank_debug.get("text_score")
debug_entry["text_source_score"] = rerank_debug.get("text_source_score")
debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score")
+ debug_entry["text_weighted_source_score"] = rerank_debug.get("text_weighted_source_score")
+ debug_entry["text_weighted_translation_score"] = rerank_debug.get("text_weighted_translation_score")
debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score")
debug_entry["text_support_score"] = rerank_debug.get("text_support_score")
+ debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es")
debug_entry["knn_score"] = rerank_debug.get("knn_score")
+ debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
+ debug_entry["text_factor"] = rerank_debug.get("text_factor")
+ debug_entry["knn_factor"] = rerank_debug.get("knn_factor")
debug_entry["fused_score"] = rerank_debug.get("fused_score")
+ debug_entry["rerank_input"] = rerank_debug.get("rerank_input")
debug_entry["matched_queries"] = rerank_debug.get("matched_queries")
if style_intent_debug:
@@ -818,25 +908,51 @@ class Searcher:
# Collect debug information if requested
debug_info = None
if debug:
+ query_vector_summary = None
+ if parsed_query.query_vector is not None:
+ query_vector_summary = {
+ "dims": int(len(parsed_query.query_vector)),
+ "preview": [round(float(v), 6) for v in parsed_query.query_vector[:8].tolist()],
+ }
debug_info = {
"query_analysis": {
"original_query": context.query_analysis.original_query,
"query_normalized": context.query_analysis.query_normalized,
"rewritten_query": context.query_analysis.rewritten_query,
"detected_language": context.query_analysis.detected_language,
+ "index_languages": index_langs,
"translations": context.query_analysis.translations,
"has_vector": context.query_analysis.query_vector is not None,
+ "query_vector_summary": query_vector_summary,
+ "query_tokens": getattr(parsed_query, "query_tokens", []),
"is_simple_query": context.query_analysis.is_simple_query,
"domain": context.query_analysis.domain,
"style_intent_profile": context.get_intermediate_result("style_intent_profile"),
},
"es_query": context.get_intermediate_result('es_query', {}),
+ "es_query_context": {
+ "filters": filters,
+ "range_filters": range_filters,
+ "facets": [getattr(facet, "field", str(facet)) for facet in facets] if facets else [],
+ "sort_by": sort_by,
+ "sort_order": sort_order,
+ "min_score": min_score,
+ "es_fetch_from": es_fetch_from,
+ "es_fetch_size": es_fetch_size,
+ "in_rerank_window": in_rerank_window,
+ "rerank_prefetch_source": context.get_intermediate_result('es_query_rerank_prefetch_source'),
+ "include_named_queries_score": bool(do_rerank and in_rerank_window),
+ },
"es_response": {
"took_ms": es_response.get('took', 0),
"total_hits": total_value,
"max_score": max_score,
- "shards": es_response.get('_shards', {})
+ "shards": es_response.get('_shards', {}),
+ "initial_es_max_score": initial_es_max_score,
+ "initial_es_min_score": initial_es_min_score,
},
+ "rerank": rerank_debug_info,
+ "page_fill": page_fill_debug,
"feature_flags": context.metadata.get('feature_flags', {}),
"stage_timings": {
k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items()
diff --git a/tests/test_rerank_client.py b/tests/test_rerank_client.py
index 950e945..bfca160 100644
--- a/tests/test_rerank_client.py
+++ b/tests/test_rerank_client.py
@@ -24,7 +24,7 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim
},
]
- debug = fuse_scores_and_resort(hits, [0.9, 0.7])
+ debug = fuse_scores_and_resort(hits, [0.9, 0.7], debug=True)
expected_text_1 = 2.4 + 0.25 * (0.8 * 1.8)
expected_fused_1 = (0.9 + 0.00001) * ((expected_text_1 + 0.1) ** 0.35) * ((0.8 + 0.6) ** 0.2)
@@ -37,7 +37,9 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim
assert isclose(by_id["2"]["_fused_score"], expected_fused_2, rel_tol=1e-9)
assert debug[0]["text_source_score"] == 2.4
assert debug[0]["text_translation_score"] == 1.8
+ assert isclose(debug[0]["text_weighted_translation_score"], 1.44, rel_tol=1e-9)
assert debug[0]["knn_score"] == 0.8
+ assert isclose(debug[0]["rerank_factor"], 0.90001, rel_tol=1e-9)
assert [hit["_id"] for hit in hits] == ["2", "1"]
@@ -47,7 +49,7 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing():
{"_id": "2", "_score": 2.0},
]
- fuse_scores_and_resort(hits, [0.4, 0.3])
+ debug = fuse_scores_and_resort(hits, [0.4, 0.3], debug=True)
expected_1 = (0.4 + 0.00001) * ((0.5 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2)
expected_2 = (0.3 + 0.00001) * ((2.0 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2)
@@ -58,6 +60,8 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing():
assert isclose(by_id["1"]["_fused_score"], expected_1, rel_tol=1e-9)
assert isclose(by_id["2"]["_text_score"], 2.0, rel_tol=1e-9)
assert isclose(by_id["2"]["_fused_score"], expected_2, rel_tol=1e-9)
+ assert debug[0]["text_score_fallback_to_es"] is True
+ assert debug[1]["text_score_fallback_to_es"] is True
assert [hit["_id"] for hit in hits] == ["2", "1"]
diff --git a/tests/test_search_rerank_window.py b/tests/test_search_rerank_window.py
index c03da39..1bff506 100644
--- a/tests/test_search_rerank_window.py
+++ b/tests/test_search_rerank_window.py
@@ -612,3 +612,33 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
assert len(result.results) == 1
assert result.results[0].skus[0].sku_id == "sku-blue"
assert result.results[0].image_url == "https://img/blue.jpg"
+
+
+def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch):
+ es_client = _FakeESClient(total_hits=3)
+ searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
+ context = create_request_context(reqid="dbg", uid="u-dbg")
+
+ monkeypatch.setattr(
+ "search.searcher.get_tenant_config_loader",
+ lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
+ )
+
+ result = searcher.search(
+ query="toy",
+ tenant_id="162",
+ from_=0,
+ size=2,
+ context=context,
+ enable_rerank=False,
+ debug=True,
+ )
+
+ assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"]
+ assert result.debug_info["es_query_context"]["es_fetch_size"] == 2
+ assert result.debug_info["es_response"]["initial_es_max_score"] == 3.0
+ assert result.debug_info["es_response"]["initial_es_min_score"] == 2.0
+ assert result.debug_info["per_result"][0]["initial_rank"] == 1
+ assert result.debug_info["per_result"][0]["final_rank"] == 1
+ assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0
+ assert result.debug_info["per_result"][1]["es_score_norm"] == 0.0
--
libgit2 0.21.2