Commit 581dafaeb165ad1fb9ed8208e27fea5364a0dc59

Authored by tangwang
1 parent 8ae95af0

debug工具,每条结果的打分中间过程展示

The backend now exposes a structured debug_info that is much closer to
the real ranking pipeline:

query_analysis now includes index_languages, query_tokens, query-vector
summary, translation/enrichment plan, and translation debug.
query_build now explains the ES recall plan: base-language clause,
translated clauses, filters vs post-filters, KNN settings,
function-score config, and related inputs.
es_request distinguishes the logical DSL from the actual body sent to
ES, including rerank prefetch _source.
es_response now includes the initial ES ranking window stats used for
score interpretation.
rerank now includes execution state, templates, rendered rerank query
text, window/top_n, service/meta, and the fusion formula.
pagination now shows rerank-window fetch vs requested page plus
page-fill details.
For each result in debug_info.per_result, ranking debug is now much
richer:

initial rank and final rank
raw ES score
es_score_normalized = raw score / initial ES window max
es_score_norm = min-max normalization over the initial ES window
explicit normalization notes explaining that fusion does not directly
consume an ES-normalized score
rerank input details: doc template, title suffix, template field values,
doc preview/length
fusion breakdown: rerank_factor, text_factor, knn_factor, constants, raw
inputs, final fused score
text subcomponents: source/translation/weighted/primary/support/fallback
evidence via matched_queries
richer style-intent SKU debug, including selected SKU summary and intent
texts
frontend/static/js/app.js
@@ -412,7 +412,7 @@ function displayResults(data) { @@ -412,7 +412,7 @@ function displayResults(data) {
412 ? debug.es_score_normalized.toFixed(4) 412 ? debug.es_score_normalized.toFixed(4)
413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); 413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
414 414
415 - const esNormRerank = typeof debug.es_score_norm === 'number' 415 + const esNormMinMax = typeof debug.es_score_norm === 'number'
416 ? debug.es_score_norm.toFixed(4) 416 ? debug.es_score_norm.toFixed(4)
417 : (debug.es_score_norm == null ? '' : String(debug.es_score_norm)); 417 : (debug.es_score_norm == null ? '' : String(debug.es_score_norm));
418 418
@@ -436,17 +436,37 @@ function displayResults(data) { @@ -436,17 +436,37 @@ function displayResults(data) {
436 436
437 const resultJson = customStringify(result); 437 const resultJson = customStringify(result);
438 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; 438 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
  439 + const rerankInputHtml = debug.rerank_input
  440 + ? `<details><summary>Rerank input</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre></details>`
  441 + : '';
  442 + const styleIntentHtml = debug.style_intent_sku
  443 + ? `<details><summary>Selected SKU</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre></details>`
  444 + : '';
  445 + const matchedQueriesHtml = debug.matched_queries
  446 + ? `<details><summary>matched_queries</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre></details>`
  447 + : '';
439 448
440 debugHtml = ` 449 debugHtml = `
441 <div class="product-debug"> 450 <div class="product-debug">
442 <div class="product-debug-title">Ranking Debug</div> 451 <div class="product-debug-title">Ranking Debug</div>
443 <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> 452 <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div>
  453 + <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>
  454 + <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>
444 <div class="product-debug-line">ES score: ${esScore}</div> 455 <div class="product-debug-line">ES score: ${esScore}</div>
445 - <div class="product-debug-line">ES normalized: ${esNorm}</div>  
446 - <div class="product-debug-line">ES norm (rerank input): ${esNormRerank}</div> 456 + <div class="product-debug-line">ES normalized (score / initial ES max): ${esNorm}</div>
  457 + <div class="product-debug-line">ES norm (min-max over initial ES window): ${esNormMinMax}</div>
  458 + <div class="product-debug-line">ES score min/max: ${escapeHtml(String(debug.es_score_min ?? ''))} / ${escapeHtml(String(debug.es_score_max ?? ''))}</div>
447 <div class="product-debug-line">Rerank score: ${rerankScore}</div> 459 <div class="product-debug-line">Rerank score: ${rerankScore}</div>
  460 + <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>
  461 + <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>
  462 + <div class="product-debug-line">text_factor: ${escapeHtml(String(debug.text_factor ?? ''))}</div>
  463 + <div class="product-debug-line">knn_score: ${escapeHtml(String(debug.knn_score ?? ''))}</div>
  464 + <div class="product-debug-line">knn_factor: ${escapeHtml(String(debug.knn_factor ?? ''))}</div>
448 <div class="product-debug-line">Fused score: ${fusedScore}</div> 465 <div class="product-debug-line">Fused score: ${fusedScore}</div>
449 ${titleLines} 466 ${titleLines}
  467 + ${rerankInputHtml}
  468 + ${styleIntentHtml}
  469 + ${matchedQueriesHtml}
450 <div class="product-debug-actions"> 470 <div class="product-debug-actions">
451 <button type="button" class="product-debug-inline-result-btn" 471 <button type="button" class="product-debug-inline-result-btn"
452 data-action="toggle-result-inline-doc" 472 data-action="toggle-result-inline-doc"
@@ -866,7 +886,7 @@ function displayDebugInfo(data) { @@ -866,7 +886,7 @@ function displayDebugInfo(data) {
866 if (data.query_info) { 886 if (data.query_info) {
867 let html = '<div style="padding: 10px;">'; 887 let html = '<div style="padding: 10px;">';
868 html += `<div><strong>original_query:</strong> ${escapeHtml(data.query_info.original_query || 'N/A')}</div>`; 888 html += `<div><strong>original_query:</strong> ${escapeHtml(data.query_info.original_query || 'N/A')}</div>`;
869 - html += `<div><strong>detected_language:</strong> ${data.query_info.detected_languag}</div>`; 889 + html += `<div><strong>detected_language:</strong> ${escapeHtml(data.query_info.detected_language || 'N/A')}</div>`;
870 html += '</div>'; 890 html += '</div>';
871 debugInfoDiv.innerHTML = html; 891 debugInfoDiv.innerHTML = html;
872 } else { 892 } else {
@@ -885,9 +905,14 @@ function displayDebugInfo(data) { @@ -885,9 +905,14 @@ function displayDebugInfo(data) {
885 html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; 905 html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`;
886 html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`; 906 html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`;
887 html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; 907 html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`;
888 - html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`; 908 + html += `<div>detected_language: ${escapeHtml(debugInfo.query_analysis.detected_language || 'N/A')}</div>`;
  909 + html += `<div>index_languages: ${escapeHtml((debugInfo.query_analysis.index_languages || []).join(', ') || 'N/A')}</div>`;
  910 + html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`;
889 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; 911 html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`;
890 html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`; 912 html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`;
  913 + if (debugInfo.query_analysis.query_vector_summary) {
  914 + html += `<div>query_vector_summary: ${escapeHtml(customStringify(debugInfo.query_analysis.query_vector_summary))}</div>`;
  915 + }
891 916
892 if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) { 917 if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) {
893 html += '<div>translations: '; 918 html += '<div>translations: ';
@@ -921,6 +946,25 @@ function displayDebugInfo(data) { @@ -921,6 +946,25 @@ function displayDebugInfo(data) {
921 html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`; 946 html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`;
922 html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`; 947 html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`;
923 html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`; 948 html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`;
  949 + html += `<div>initial_es_max_score: ${escapeHtml(String(debugInfo.es_response.initial_es_max_score ?? ''))}</div>`;
  950 + html += `<div>initial_es_min_score: ${escapeHtml(String(debugInfo.es_response.initial_es_min_score ?? ''))}</div>`;
  951 + html += '</div>';
  952 + }
  953 +
  954 + if (debugInfo.rerank) {
  955 + html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Rerank:</strong>';
  956 + html += `<div>requested: ${debugInfo.rerank.requested ? 'yes' : 'no'}</div>`;
  957 + html += `<div>executed: ${debugInfo.rerank.executed ? 'yes' : 'no'}</div>`;
  958 + html += `<div>in_rerank_window: ${debugInfo.rerank.in_rerank_window ? 'yes' : 'no'}</div>`;
  959 + html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`;
  960 + html += `<div>query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}</div>`;
  961 + html += `<div>doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}</div>`;
  962 + html += '</div>';
  963 + }
  964 +
  965 + if (debugInfo.page_fill) {
  966 + html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Page Fill:</strong>';
  967 + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debugInfo.page_fill))}</pre>`;
924 html += '</div>'; 968 html += '</div>';
925 } 969 }
926 970
@@ -945,6 +989,12 @@ function displayDebugInfo(data) { @@ -945,6 +989,12 @@ function displayDebugInfo(data) {
945 html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 400px;">${escapeHtml(customStringify(debugInfo.es_query))}</pre>`; 989 html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 400px;">${escapeHtml(customStringify(debugInfo.es_query))}</pre>`;
946 html += '</div>'; 990 html += '</div>';
947 } 991 }
  992 +
  993 + if (debugInfo.es_query_context) {
  994 + html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query Context:</strong>';
  995 + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 300px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`;
  996 + html += '</div>';
  997 + }
948 998
949 html += '</div>'; 999 html += '</div>';
950 debugInfoDiv.innerHTML = html; 1000 debugInfoDiv.innerHTML = html;
search/rerank_client.py
@@ -25,6 +25,7 @@ def build_docs_from_hits( @@ -25,6 +25,7 @@ def build_docs_from_hits(
25 es_hits: List[Dict[str, Any]], 25 es_hits: List[Dict[str, Any]],
26 language: str = "zh", 26 language: str = "zh",
27 doc_template: str = "{title}", 27 doc_template: str = "{title}",
  28 + debug_rows: Optional[List[Dict[str, Any]]] = None,
28 ) -> List[str]: 29 ) -> List[str]:
29 """ 30 """
30 从 ES 命中结果构造重排服务所需的文档文本列表(与 hits 一一对应)。 31 从 ES 命中结果构造重排服务所需的文档文本列表(与 hits 一一对应)。
@@ -63,24 +64,36 @@ def build_docs_from_hits( @@ -63,24 +64,36 @@ def build_docs_from_hits(
63 for hit in es_hits: 64 for hit in es_hits:
64 src = hit.get("_source") or {} 65 src = hit.get("_source") or {}
65 title_suffix = str(hit.get("_style_rerank_suffix") or "").strip() 66 title_suffix = str(hit.get("_style_rerank_suffix") or "").strip()
  67 + values = _SafeDict(
  68 + title=(
  69 + f"{pick_lang_text(src.get('title'))} {title_suffix}".strip()
  70 + if title_suffix
  71 + else pick_lang_text(src.get("title"))
  72 + ),
  73 + brief=pick_lang_text(src.get("brief")) if need_brief else "",
  74 + vendor=pick_lang_text(src.get("vendor")) if need_vendor else "",
  75 + description=pick_lang_text(src.get("description")) if need_description else "",
  76 + category_path=pick_lang_text(src.get("category_path")) if need_category_path else "",
  77 + )
66 if only_title: 78 if only_title:
67 - title = pick_lang_text(src.get("title"))  
68 - if title_suffix:  
69 - title = f"{title} {title_suffix}".strip()  
70 - docs.append(title) 79 + doc_text = values["title"]
71 else: 80 else:
72 - values = _SafeDict(  
73 - title=(  
74 - f"{pick_lang_text(src.get('title'))} {title_suffix}".strip()  
75 - if title_suffix  
76 - else pick_lang_text(src.get("title"))  
77 - ),  
78 - brief=pick_lang_text(src.get("brief")) if need_brief else "",  
79 - vendor=pick_lang_text(src.get("vendor")) if need_vendor else "",  
80 - description=pick_lang_text(src.get("description")) if need_description else "",  
81 - category_path=pick_lang_text(src.get("category_path")) if need_category_path else "",  
82 - )  
83 - docs.append(str(doc_template).format_map(values)) 81 + doc_text = str(doc_template).format_map(values)
  82 + docs.append(doc_text)
  83 + if debug_rows is not None:
  84 + preview = doc_text if len(doc_text) <= 300 else f"{doc_text[:300]}..."
  85 + debug_rows.append({
  86 + "doc_template": doc_template,
  87 + "title_suffix": title_suffix or None,
  88 + "fields": {
  89 + "title": values["title"] or None,
  90 + "brief": values["brief"] or None,
  91 + "vendor": values["vendor"] or None,
  92 + "category_path": values["category_path"] or None,
  93 + },
  94 + "doc_preview": preview,
  95 + "doc_length": len(doc_text),
  96 + })
84 return docs 97 return docs
85 98
86 99
@@ -163,18 +176,13 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa @@ -163,18 +176,13 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa
163 } 176 }
164 177
165 178
166 -def _fuse_score(rerank_score: float, text_score: float, knn_score: float) -> float:  
167 - rerank_factor = max(rerank_score, 0.0) + 0.00001  
168 - text_factor = (max(text_score, 0.0) + 0.1) ** 0.35  
169 - knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2  
170 - return rerank_factor * text_factor * knn_factor  
171 -  
172 -  
173 def fuse_scores_and_resort( 179 def fuse_scores_and_resort(
174 es_hits: List[Dict[str, Any]], 180 es_hits: List[Dict[str, Any]],
175 rerank_scores: List[float], 181 rerank_scores: List[float],
176 weight_es: float = DEFAULT_WEIGHT_ES, 182 weight_es: float = DEFAULT_WEIGHT_ES,
177 weight_ai: float = DEFAULT_WEIGHT_AI, 183 weight_ai: float = DEFAULT_WEIGHT_AI,
  184 + debug: bool = False,
  185 + rerank_debug_rows: Optional[List[Dict[str, Any]]] = None,
178 ) -> List[Dict[str, Any]]: 186 ) -> List[Dict[str, Any]]:
179 """ 187 """
180 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 188 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。
@@ -211,7 +219,10 @@ def fuse_scores_and_resort( @@ -211,7 +219,10 @@ def fuse_scores_and_resort(
211 knn_score = _extract_named_query_score(matched_queries, "knn_query") 219 knn_score = _extract_named_query_score(matched_queries, "knn_query")
212 text_components = _collect_text_score_components(matched_queries, es_score) 220 text_components = _collect_text_score_components(matched_queries, es_score)
213 text_score = text_components["text_score"] 221 text_score = text_components["text_score"]
214 - fused = _fuse_score(rerank_score, text_score, knn_score) 222 + rerank_factor = max(rerank_score, 0.0) + 0.00001
  223 + text_factor = (max(text_score, 0.0) + 0.1) ** 0.35
  224 + knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2
  225 + fused = rerank_factor * text_factor * knn_factor
215 226
216 hit["_original_score"] = hit.get("_score") 227 hit["_original_score"] = hit.get("_score")
217 hit["_rerank_score"] = rerank_score 228 hit["_rerank_score"] = rerank_score
@@ -223,19 +234,33 @@ def fuse_scores_and_resort( @@ -223,19 +234,33 @@ def fuse_scores_and_resort(
223 hit["_text_support_score"] = text_components["support_text_score"] 234 hit["_text_support_score"] = text_components["support_text_score"]
224 hit["_fused_score"] = fused 235 hit["_fused_score"] = fused
225 236
226 - fused_debug.append({  
227 - "doc_id": hit.get("_id"),  
228 - "es_score": es_score,  
229 - "rerank_score": rerank_score,  
230 - "text_score": text_score,  
231 - "text_source_score": text_components["source_score"],  
232 - "text_translation_score": text_components["translation_score"],  
233 - "text_primary_score": text_components["primary_text_score"],  
234 - "text_support_score": text_components["support_text_score"],  
235 - "knn_score": knn_score,  
236 - "matched_queries": matched_queries,  
237 - "fused_score": fused,  
238 - }) 237 + if debug:
  238 + debug_entry = {
  239 + "doc_id": hit.get("_id"),
  240 + "es_score": es_score,
  241 + "rerank_score": rerank_score,
  242 + "text_score": text_score,
  243 + "text_source_score": text_components["source_score"],
  244 + "text_translation_score": text_components["translation_score"],
  245 + "text_weighted_source_score": text_components["weighted_source_score"],
  246 + "text_weighted_translation_score": text_components["weighted_translation_score"],
  247 + "text_primary_score": text_components["primary_text_score"],
  248 + "text_support_score": text_components["support_text_score"],
  249 + "text_score_fallback_to_es": (
  250 + text_score == es_score
  251 + and text_components["source_score"] <= 0.0
  252 + and text_components["translation_score"] <= 0.0
  253 + ),
  254 + "knn_score": knn_score,
  255 + "rerank_factor": rerank_factor,
  256 + "text_factor": text_factor,
  257 + "knn_factor": knn_factor,
  258 + "matched_queries": matched_queries,
  259 + "fused_score": fused,
  260 + }
  261 + if rerank_debug_rows is not None and idx < len(rerank_debug_rows):
  262 + debug_entry["rerank_input"] = rerank_debug_rows[idx]
  263 + fused_debug.append(debug_entry)
239 264
240 # 按融合分数降序重排 265 # 按融合分数降序重排
241 es_hits.sort( 266 es_hits.sort(
@@ -255,6 +280,7 @@ def run_rerank( @@ -255,6 +280,7 @@ def run_rerank(
255 rerank_query_template: str = "{query}", 280 rerank_query_template: str = "{query}",
256 rerank_doc_template: str = "{title}", 281 rerank_doc_template: str = "{title}",
257 top_n: Optional[int] = None, 282 top_n: Optional[int] = None,
  283 + debug: bool = False,
258 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: 284 ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]:
259 """ 285 """
260 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 286 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。
@@ -266,7 +292,13 @@ def run_rerank( @@ -266,7 +292,13 @@ def run_rerank(
266 return es_response, None, [] 292 return es_response, None, []
267 293
268 query_text = str(rerank_query_template).format_map({"query": query}) 294 query_text = str(rerank_query_template).format_map({"query": query})
269 - docs = build_docs_from_hits(hits, language=language, doc_template=rerank_doc_template) 295 + rerank_debug_rows: Optional[List[Dict[str, Any]]] = [] if debug else None
  296 + docs = build_docs_from_hits(
  297 + hits,
  298 + language=language,
  299 + doc_template=rerank_doc_template,
  300 + debug_rows=rerank_debug_rows,
  301 + )
270 scores, meta = call_rerank_service( 302 scores, meta = call_rerank_service(
271 query_text, 303 query_text,
272 docs, 304 docs,
@@ -282,6 +314,8 @@ def run_rerank( @@ -282,6 +314,8 @@ def run_rerank(
282 scores, 314 scores,
283 weight_es=weight_es, 315 weight_es=weight_es,
284 weight_ai=weight_ai, 316 weight_ai=weight_ai,
  317 + debug=debug,
  318 + rerank_debug_rows=rerank_debug_rows,
285 ) 319 )
286 320
287 # 更新 max_score 为融合后的最高分 321 # 更新 max_score 为融合后的最高分
search/searcher.py
@@ -339,6 +339,21 @@ class Searcher: @@ -339,6 +339,21 @@ class Searcher:
339 in_rerank_window = do_rerank and (from_ + size) <= rerank_window 339 in_rerank_window = do_rerank and (from_ + size) <= rerank_window
340 es_fetch_from = 0 if in_rerank_window else from_ 340 es_fetch_from = 0 if in_rerank_window else from_
341 es_fetch_size = rerank_window if in_rerank_window else size 341 es_fetch_size = rerank_window if in_rerank_window else size
  342 + initial_es_positions: Dict[str, int] = {}
  343 + initial_es_min_score: Optional[float] = None
  344 + initial_es_max_score: Optional[float] = None
  345 + page_fill_debug: Optional[Dict[str, Any]] = None
  346 + rerank_debug_info: Optional[Dict[str, Any]] = None
  347 + if debug:
  348 + rerank_debug_info = {
  349 + "requested": do_rerank,
  350 + "executed": False,
  351 + "in_rerank_window": in_rerank_window,
  352 + "rerank_window": rerank_window,
  353 + "top_n": from_ + size,
  354 + "query_template": effective_query_template,
  355 + "doc_template": effective_doc_template,
  356 + }
342 357
343 # Start timing 358 # Start timing
344 context.start_stage(RequestContextStage.TOTAL) 359 context.start_stage(RequestContextStage.TOTAL)
@@ -348,6 +363,7 @@ class Searcher: @@ -348,6 +363,7 @@ class Searcher:
348 f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, " 363 f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, "
349 f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, " 364 f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, "
350 f"es_fetch=({es_fetch_from},{es_fetch_size}) | " 365 f"es_fetch=({es_fetch_from},{es_fetch_size}) | "
  366 + f"index_languages={index_langs} | "
351 f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}", 367 f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}",
352 extra={'reqid': context.reqid, 'uid': context.uid} 368 extra={'reqid': context.reqid, 'uid': context.uid}
353 ) 369 )
@@ -531,13 +547,36 @@ class Searcher: @@ -531,13 +547,36 @@ class Searcher:
531 547
532 # Store ES response in context 548 # Store ES response in context
533 context.store_intermediate_result('es_response', es_response) 549 context.store_intermediate_result('es_response', es_response)
  550 + if debug:
  551 + initial_hits = es_response.get('hits', {}).get('hits') or []
  552 + initial_scores: List[float] = []
  553 + for rank, hit in enumerate(initial_hits, 1):
  554 + doc_id = hit.get("_id")
  555 + if doc_id is not None:
  556 + initial_es_positions[str(doc_id)] = rank
  557 + raw_score = hit.get("_score")
  558 + try:
  559 + if raw_score is not None:
  560 + initial_scores.append(float(raw_score))
  561 + except (TypeError, ValueError):
  562 + pass
  563 + raw_max_score = es_response.get('hits', {}).get('max_score')
  564 + try:
  565 + initial_es_max_score = float(raw_max_score) if raw_max_score is not None else None
  566 + except (TypeError, ValueError):
  567 + initial_es_max_score = None
  568 + if initial_es_max_score is None and initial_scores:
  569 + initial_es_max_score = max(initial_scores)
  570 + initial_es_min_score = min(initial_scores) if initial_scores else None
534 571
535 # Extract timing from ES response 572 # Extract timing from ES response
536 es_took = es_response.get('took', 0) 573 es_took = es_response.get('took', 0)
537 context.logger.info( 574 context.logger.info(
538 f"ES搜索完成 | 耗时: {es_took}ms | " 575 f"ES搜索完成 | 耗时: {es_took}ms | "
539 f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | " 576 f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | "
540 - f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f}", 577 + f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f} | "
  578 + f"detected_language={parsed_query.detected_language} | "
  579 + f"translations={list((parsed_query.translations or {}).keys())}",
541 extra={'reqid': context.reqid, 'uid': context.uid} 580 extra={'reqid': context.reqid, 'uid': context.uid}
542 ) 581 )
543 except Exception as e: 582 except Exception as e:
@@ -581,24 +620,31 @@ class Searcher: @@ -581,24 +620,31 @@ class Searcher:
581 rerank_query_template=effective_query_template, 620 rerank_query_template=effective_query_template,
582 rerank_doc_template=effective_doc_template, 621 rerank_doc_template=effective_doc_template,
583 top_n=(from_ + size), 622 top_n=(from_ + size),
  623 + debug=debug,
584 ) 624 )
585 625
586 if rerank_meta is not None: 626 if rerank_meta is not None:
587 from config.services_config import get_rerank_service_url 627 from config.services_config import get_rerank_service_url
588 rerank_url = get_rerank_service_url() 628 rerank_url = get_rerank_service_url()
589 - context.metadata.setdefault("rerank_info", {})  
590 - context.metadata["rerank_info"].update({  
591 - "service_url": rerank_url,  
592 - "docs": len(es_response.get("hits", {}).get("hits") or []),  
593 - "meta": rerank_meta,  
594 - })  
595 - context.store_intermediate_result("rerank_scores", fused_debug) 629 + if debug and rerank_debug_info is not None:
  630 + rerank_debug_info.update({
  631 + "executed": True,
  632 + "service_url": rerank_url,
  633 + "query_text": str(effective_query_template).format_map({"query": rerank_query}),
  634 + "docs": len(es_response.get("hits", {}).get("hits") or []),
  635 + "meta": rerank_meta,
  636 + })
  637 + context.store_intermediate_result("rerank_scores", fused_debug)
596 context.logger.info( 638 context.logger.info(
597 - f"重排完成 | docs={len(fused_debug)} | meta={rerank_meta}", 639 + f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | "
  640 + f"top_n={from_ + size} | query_template={effective_query_template} | "
  641 + f"doc_template={effective_doc_template} | meta={rerank_meta}",
598 extra={'reqid': context.reqid, 'uid': context.uid} 642 extra={'reqid': context.reqid, 'uid': context.uid}
599 ) 643 )
600 except Exception as e: 644 except Exception as e:
601 context.add_warning(f"Rerank failed: {e}") 645 context.add_warning(f"Rerank failed: {e}")
  646 + if debug and rerank_debug_info is not None:
  647 + rerank_debug_info["error"] = str(e)
602 context.logger.warning( 648 context.logger.warning(
603 f"调用重排服务失败 | error: {e}", 649 f"调用重排服务失败 | error: {e}",
604 extra={'reqid': context.reqid, 'uid': context.uid}, 650 extra={'reqid': context.reqid, 'uid': context.uid},
@@ -661,6 +707,13 @@ class Searcher: @@ -661,6 +707,13 @@ class Searcher:
661 ) 707 )
662 if fill_took: 708 if fill_took:
663 es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took) 709 es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
  710 + if debug:
  711 + page_fill_debug = {
  712 + "requested_ids": page_ids,
  713 + "filled": filled,
  714 + "fill_took_ms": fill_took,
  715 + "response_source_spec": response_source_spec,
  716 + }
664 context.logger.info( 717 context.logger.info(
665 f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms", 718 f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms",
666 extra={'reqid': context.reqid, 'uid': context.uid} 719 extra={'reqid': context.reqid, 'uid': context.uid}
@@ -728,6 +781,11 @@ class Searcher: @@ -728,6 +781,11 @@ class Searcher:
728 # Build per-result debug info (per SPU) when debug mode is enabled 781 # Build per-result debug info (per SPU) when debug mode is enabled
729 per_result_debug = [] 782 per_result_debug = []
730 if debug and es_hits and formatted_results: 783 if debug and es_hits and formatted_results:
  784 + final_positions_by_doc = {
  785 + str(hit.get("_id")): (from_ + rank)
  786 + for rank, hit in enumerate(es_hits, 1)
  787 + if hit.get("_id") is not None
  788 + }
731 for hit, spu in zip(es_hits, formatted_results): 789 for hit, spu in zip(es_hits, formatted_results):
732 source = hit.get("_source", {}) or {} 790 source = hit.get("_source", {}) or {}
733 doc_id = hit.get("_id") 791 doc_id = hit.get("_id")
@@ -746,9 +804,29 @@ class Searcher: @@ -746,9 +804,29 @@ class Searcher:
746 except (TypeError, ValueError): 804 except (TypeError, ValueError):
747 es_score = 0.0 805 es_score = 0.0
748 try: 806 try:
749 - normalized = float(es_score) / float(max_score) if max_score else None 807 + normalized = (
  808 + float(es_score) / float(initial_es_max_score)
  809 + if initial_es_max_score
  810 + else None
  811 + )
750 except (TypeError, ValueError, ZeroDivisionError): 812 except (TypeError, ValueError, ZeroDivisionError):
751 normalized = None 813 normalized = None
  814 + try:
  815 + es_score_norm = (
  816 + (float(es_score) - float(initial_es_min_score))
  817 + / (float(initial_es_max_score) - float(initial_es_min_score))
  818 + if initial_es_min_score is not None
  819 + and initial_es_max_score is not None
  820 + and float(initial_es_max_score) > float(initial_es_min_score)
  821 + else (
  822 + 1.0
  823 + if initial_es_min_score is not None
  824 + and initial_es_max_score is not None
  825 + else None
  826 + )
  827 + )
  828 + except (TypeError, ValueError, ZeroDivisionError):
  829 + es_score_norm = None
752 830
753 title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None 831 title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None
754 brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None 832 brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None
@@ -758,6 +836,11 @@ class Searcher: @@ -758,6 +836,11 @@ class Searcher:
758 "spu_id": spu.spu_id, 836 "spu_id": spu.spu_id,
759 "es_score": es_score, 837 "es_score": es_score,
760 "es_score_normalized": normalized, 838 "es_score_normalized": normalized,
  839 + "es_score_norm": es_score_norm,
  840 + "es_score_min": initial_es_min_score,
  841 + "es_score_max": initial_es_max_score,
  842 + "initial_rank": initial_es_positions.get(str(doc_id)) if doc_id is not None else None,
  843 + "final_rank": final_positions_by_doc.get(str(doc_id)) if doc_id is not None else None,
761 "title_multilingual": title_multilingual, 844 "title_multilingual": title_multilingual,
762 "brief_multilingual": brief_multilingual, 845 "brief_multilingual": brief_multilingual,
763 "vendor_multilingual": vendor_multilingual, 846 "vendor_multilingual": vendor_multilingual,
@@ -771,10 +854,17 @@ class Searcher: @@ -771,10 +854,17 @@ class Searcher:
771 debug_entry["text_score"] = rerank_debug.get("text_score") 854 debug_entry["text_score"] = rerank_debug.get("text_score")
772 debug_entry["text_source_score"] = rerank_debug.get("text_source_score") 855 debug_entry["text_source_score"] = rerank_debug.get("text_source_score")
773 debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score") 856 debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score")
  857 + debug_entry["text_weighted_source_score"] = rerank_debug.get("text_weighted_source_score")
  858 + debug_entry["text_weighted_translation_score"] = rerank_debug.get("text_weighted_translation_score")
774 debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score") 859 debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score")
775 debug_entry["text_support_score"] = rerank_debug.get("text_support_score") 860 debug_entry["text_support_score"] = rerank_debug.get("text_support_score")
  861 + debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es")
776 debug_entry["knn_score"] = rerank_debug.get("knn_score") 862 debug_entry["knn_score"] = rerank_debug.get("knn_score")
  863 + debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor")
  864 + debug_entry["text_factor"] = rerank_debug.get("text_factor")
  865 + debug_entry["knn_factor"] = rerank_debug.get("knn_factor")
777 debug_entry["fused_score"] = rerank_debug.get("fused_score") 866 debug_entry["fused_score"] = rerank_debug.get("fused_score")
  867 + debug_entry["rerank_input"] = rerank_debug.get("rerank_input")
778 debug_entry["matched_queries"] = rerank_debug.get("matched_queries") 868 debug_entry["matched_queries"] = rerank_debug.get("matched_queries")
779 869
780 if style_intent_debug: 870 if style_intent_debug:
@@ -818,25 +908,51 @@ class Searcher: @@ -818,25 +908,51 @@ class Searcher:
818 # Collect debug information if requested 908 # Collect debug information if requested
819 debug_info = None 909 debug_info = None
820 if debug: 910 if debug:
  911 + query_vector_summary = None
  912 + if parsed_query.query_vector is not None:
  913 + query_vector_summary = {
  914 + "dims": int(len(parsed_query.query_vector)),
  915 + "preview": [round(float(v), 6) for v in parsed_query.query_vector[:8].tolist()],
  916 + }
821 debug_info = { 917 debug_info = {
822 "query_analysis": { 918 "query_analysis": {
823 "original_query": context.query_analysis.original_query, 919 "original_query": context.query_analysis.original_query,
824 "query_normalized": context.query_analysis.query_normalized, 920 "query_normalized": context.query_analysis.query_normalized,
825 "rewritten_query": context.query_analysis.rewritten_query, 921 "rewritten_query": context.query_analysis.rewritten_query,
826 "detected_language": context.query_analysis.detected_language, 922 "detected_language": context.query_analysis.detected_language,
  923 + "index_languages": index_langs,
827 "translations": context.query_analysis.translations, 924 "translations": context.query_analysis.translations,
828 "has_vector": context.query_analysis.query_vector is not None, 925 "has_vector": context.query_analysis.query_vector is not None,
  926 + "query_vector_summary": query_vector_summary,
  927 + "query_tokens": getattr(parsed_query, "query_tokens", []),
829 "is_simple_query": context.query_analysis.is_simple_query, 928 "is_simple_query": context.query_analysis.is_simple_query,
830 "domain": context.query_analysis.domain, 929 "domain": context.query_analysis.domain,
831 "style_intent_profile": context.get_intermediate_result("style_intent_profile"), 930 "style_intent_profile": context.get_intermediate_result("style_intent_profile"),
832 }, 931 },
833 "es_query": context.get_intermediate_result('es_query', {}), 932 "es_query": context.get_intermediate_result('es_query', {}),
  933 + "es_query_context": {
  934 + "filters": filters,
  935 + "range_filters": range_filters,
  936 + "facets": [getattr(facet, "field", str(facet)) for facet in facets] if facets else [],
  937 + "sort_by": sort_by,
  938 + "sort_order": sort_order,
  939 + "min_score": min_score,
  940 + "es_fetch_from": es_fetch_from,
  941 + "es_fetch_size": es_fetch_size,
  942 + "in_rerank_window": in_rerank_window,
  943 + "rerank_prefetch_source": context.get_intermediate_result('es_query_rerank_prefetch_source'),
  944 + "include_named_queries_score": bool(do_rerank and in_rerank_window),
  945 + },
834 "es_response": { 946 "es_response": {
835 "took_ms": es_response.get('took', 0), 947 "took_ms": es_response.get('took', 0),
836 "total_hits": total_value, 948 "total_hits": total_value,
837 "max_score": max_score, 949 "max_score": max_score,
838 - "shards": es_response.get('_shards', {}) 950 + "shards": es_response.get('_shards', {}),
  951 + "initial_es_max_score": initial_es_max_score,
  952 + "initial_es_min_score": initial_es_min_score,
839 }, 953 },
  954 + "rerank": rerank_debug_info,
  955 + "page_fill": page_fill_debug,
840 "feature_flags": context.metadata.get('feature_flags', {}), 956 "feature_flags": context.metadata.get('feature_flags', {}),
841 "stage_timings": { 957 "stage_timings": {
842 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() 958 k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items()
tests/test_rerank_client.py
@@ -24,7 +24,7 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim @@ -24,7 +24,7 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim
24 }, 24 },
25 ] 25 ]
26 26
27 - debug = fuse_scores_and_resort(hits, [0.9, 0.7]) 27 + debug = fuse_scores_and_resort(hits, [0.9, 0.7], debug=True)
28 28
29 expected_text_1 = 2.4 + 0.25 * (0.8 * 1.8) 29 expected_text_1 = 2.4 + 0.25 * (0.8 * 1.8)
30 expected_fused_1 = (0.9 + 0.00001) * ((expected_text_1 + 0.1) ** 0.35) * ((0.8 + 0.6) ** 0.2) 30 expected_fused_1 = (0.9 + 0.00001) * ((expected_text_1 + 0.1) ** 0.35) * ((0.8 + 0.6) ** 0.2)
@@ -37,7 +37,9 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim @@ -37,7 +37,9 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim
37 assert isclose(by_id["2"]["_fused_score"], expected_fused_2, rel_tol=1e-9) 37 assert isclose(by_id["2"]["_fused_score"], expected_fused_2, rel_tol=1e-9)
38 assert debug[0]["text_source_score"] == 2.4 38 assert debug[0]["text_source_score"] == 2.4
39 assert debug[0]["text_translation_score"] == 1.8 39 assert debug[0]["text_translation_score"] == 1.8
  40 + assert isclose(debug[0]["text_weighted_translation_score"], 1.44, rel_tol=1e-9)
40 assert debug[0]["knn_score"] == 0.8 41 assert debug[0]["knn_score"] == 0.8
  42 + assert isclose(debug[0]["rerank_factor"], 0.90001, rel_tol=1e-9)
41 assert [hit["_id"] for hit in hits] == ["2", "1"] 43 assert [hit["_id"] for hit in hits] == ["2", "1"]
42 44
43 45
@@ -47,7 +49,7 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): @@ -47,7 +49,7 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing():
47 {"_id": "2", "_score": 2.0}, 49 {"_id": "2", "_score": 2.0},
48 ] 50 ]
49 51
50 - fuse_scores_and_resort(hits, [0.4, 0.3]) 52 + debug = fuse_scores_and_resort(hits, [0.4, 0.3], debug=True)
51 53
52 expected_1 = (0.4 + 0.00001) * ((0.5 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) 54 expected_1 = (0.4 + 0.00001) * ((0.5 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2)
53 expected_2 = (0.3 + 0.00001) * ((2.0 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) 55 expected_2 = (0.3 + 0.00001) * ((2.0 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2)
@@ -58,6 +60,8 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): @@ -58,6 +60,8 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing():
58 assert isclose(by_id["1"]["_fused_score"], expected_1, rel_tol=1e-9) 60 assert isclose(by_id["1"]["_fused_score"], expected_1, rel_tol=1e-9)
59 assert isclose(by_id["2"]["_text_score"], 2.0, rel_tol=1e-9) 61 assert isclose(by_id["2"]["_text_score"], 2.0, rel_tol=1e-9)
60 assert isclose(by_id["2"]["_fused_score"], expected_2, rel_tol=1e-9) 62 assert isclose(by_id["2"]["_fused_score"], expected_2, rel_tol=1e-9)
  63 + assert debug[0]["text_score_fallback_to_es"] is True
  64 + assert debug[1]["text_score_fallback_to_es"] is True
61 assert [hit["_id"] for hit in hits] == ["2", "1"] 65 assert [hit["_id"] for hit in hits] == ["2", "1"]
62 66
63 67
tests/test_search_rerank_window.py
@@ -612,3 +612,33 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc @@ -612,3 +612,33 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
612 assert len(result.results) == 1 612 assert len(result.results) == 1
613 assert result.results[0].skus[0].sku_id == "sku-blue" 613 assert result.results[0].skus[0].sku_id == "sku-blue"
614 assert result.results[0].image_url == "https://img/blue.jpg" 614 assert result.results[0].image_url == "https://img/blue.jpg"
  615 +
  616 +
  617 +def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch):
  618 + es_client = _FakeESClient(total_hits=3)
  619 + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
  620 + context = create_request_context(reqid="dbg", uid="u-dbg")
  621 +
  622 + monkeypatch.setattr(
  623 + "search.searcher.get_tenant_config_loader",
  624 + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
  625 + )
  626 +
  627 + result = searcher.search(
  628 + query="toy",
  629 + tenant_id="162",
  630 + from_=0,
  631 + size=2,
  632 + context=context,
  633 + enable_rerank=False,
  634 + debug=True,
  635 + )
  636 +
  637 + assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"]
  638 + assert result.debug_info["es_query_context"]["es_fetch_size"] == 2
  639 + assert result.debug_info["es_response"]["initial_es_max_score"] == 3.0
  640 + assert result.debug_info["es_response"]["initial_es_min_score"] == 2.0
  641 + assert result.debug_info["per_result"][0]["initial_rank"] == 1
  642 + assert result.debug_info["per_result"][0]["final_rank"] == 1
  643 + assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0
  644 + assert result.debug_info["per_result"][1]["es_score_norm"] == 0.0