Commit 581dafaeb165ad1fb9ed8208e27fea5364a0dc59
1 parent
8ae95af0
debug工具,每条结果的打分中间过程展示
The backend now exposes a structured debug_info that is much closer to the real ranking pipeline: query_analysis now includes index_languages, query_tokens, query-vector summary, translation/enrichment plan, and translation debug. query_build now explains the ES recall plan: base-language clause, translated clauses, filters vs post-filters, KNN settings, function-score config, and related inputs. es_request distinguishes the logical DSL from the actual body sent to ES, including rerank prefetch _source. es_response now includes the initial ES ranking window stats used for score interpretation. rerank now includes execution state, templates, rendered rerank query text, window/top_n, service/meta, and the fusion formula. pagination now shows rerank-window fetch vs requested page plus page-fill details. For each result in debug_info.per_result, ranking debug is now much richer: initial rank and final rank raw ES score es_score_normalized = raw score / initial ES window max es_score_norm = min-max normalization over the initial ES window explicit normalization notes explaining that fusion does not directly consume an ES-normalized score rerank input details: doc template, title suffix, template field values, doc preview/length fusion breakdown: rerank_factor, text_factor, knn_factor, constants, raw inputs, final fused score text subcomponents: source/translation/weighted/primary/support/fallback evidence via matched_queries richer style-intent SKU debug, including selected SKU summary and intent texts
Showing
5 changed files
with
290 additions
and
56 deletions
Show diff stats
frontend/static/js/app.js
| @@ -412,7 +412,7 @@ function displayResults(data) { | @@ -412,7 +412,7 @@ function displayResults(data) { | ||
| 412 | ? debug.es_score_normalized.toFixed(4) | 412 | ? debug.es_score_normalized.toFixed(4) |
| 413 | : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); | 413 | : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); |
| 414 | 414 | ||
| 415 | - const esNormRerank = typeof debug.es_score_norm === 'number' | 415 | + const esNormMinMax = typeof debug.es_score_norm === 'number' |
| 416 | ? debug.es_score_norm.toFixed(4) | 416 | ? debug.es_score_norm.toFixed(4) |
| 417 | : (debug.es_score_norm == null ? '' : String(debug.es_score_norm)); | 417 | : (debug.es_score_norm == null ? '' : String(debug.es_score_norm)); |
| 418 | 418 | ||
| @@ -436,17 +436,37 @@ function displayResults(data) { | @@ -436,17 +436,37 @@ function displayResults(data) { | ||
| 436 | 436 | ||
| 437 | const resultJson = customStringify(result); | 437 | const resultJson = customStringify(result); |
| 438 | const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; | 438 | const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; |
| 439 | + const rerankInputHtml = debug.rerank_input | ||
| 440 | + ? `<details><summary>Rerank input</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre></details>` | ||
| 441 | + : ''; | ||
| 442 | + const styleIntentHtml = debug.style_intent_sku | ||
| 443 | + ? `<details><summary>Selected SKU</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre></details>` | ||
| 444 | + : ''; | ||
| 445 | + const matchedQueriesHtml = debug.matched_queries | ||
| 446 | + ? `<details><summary>matched_queries</summary><pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre></details>` | ||
| 447 | + : ''; | ||
| 439 | 448 | ||
| 440 | debugHtml = ` | 449 | debugHtml = ` |
| 441 | <div class="product-debug"> | 450 | <div class="product-debug"> |
| 442 | <div class="product-debug-title">Ranking Debug</div> | 451 | <div class="product-debug-title">Ranking Debug</div> |
| 443 | <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> | 452 | <div class="product-debug-line">spu_id: ${escapeHtml(String(spuId || ''))}</div> |
| 453 | + <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div> | ||
| 454 | + <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div> | ||
| 444 | <div class="product-debug-line">ES score: ${esScore}</div> | 455 | <div class="product-debug-line">ES score: ${esScore}</div> |
| 445 | - <div class="product-debug-line">ES normalized: ${esNorm}</div> | ||
| 446 | - <div class="product-debug-line">ES norm (rerank input): ${esNormRerank}</div> | 456 | + <div class="product-debug-line">ES normalized (score / initial ES max): ${esNorm}</div> |
| 457 | + <div class="product-debug-line">ES norm (min-max over initial ES window): ${esNormMinMax}</div> | ||
| 458 | + <div class="product-debug-line">ES score min/max: ${escapeHtml(String(debug.es_score_min ?? ''))} / ${escapeHtml(String(debug.es_score_max ?? ''))}</div> | ||
| 447 | <div class="product-debug-line">Rerank score: ${rerankScore}</div> | 459 | <div class="product-debug-line">Rerank score: ${rerankScore}</div> |
| 460 | + <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div> | ||
| 461 | + <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div> | ||
| 462 | + <div class="product-debug-line">text_factor: ${escapeHtml(String(debug.text_factor ?? ''))}</div> | ||
| 463 | + <div class="product-debug-line">knn_score: ${escapeHtml(String(debug.knn_score ?? ''))}</div> | ||
| 464 | + <div class="product-debug-line">knn_factor: ${escapeHtml(String(debug.knn_factor ?? ''))}</div> | ||
| 448 | <div class="product-debug-line">Fused score: ${fusedScore}</div> | 465 | <div class="product-debug-line">Fused score: ${fusedScore}</div> |
| 449 | ${titleLines} | 466 | ${titleLines} |
| 467 | + ${rerankInputHtml} | ||
| 468 | + ${styleIntentHtml} | ||
| 469 | + ${matchedQueriesHtml} | ||
| 450 | <div class="product-debug-actions"> | 470 | <div class="product-debug-actions"> |
| 451 | <button type="button" class="product-debug-inline-result-btn" | 471 | <button type="button" class="product-debug-inline-result-btn" |
| 452 | data-action="toggle-result-inline-doc" | 472 | data-action="toggle-result-inline-doc" |
| @@ -866,7 +886,7 @@ function displayDebugInfo(data) { | @@ -866,7 +886,7 @@ function displayDebugInfo(data) { | ||
| 866 | if (data.query_info) { | 886 | if (data.query_info) { |
| 867 | let html = '<div style="padding: 10px;">'; | 887 | let html = '<div style="padding: 10px;">'; |
| 868 | html += `<div><strong>original_query:</strong> ${escapeHtml(data.query_info.original_query || 'N/A')}</div>`; | 888 | html += `<div><strong>original_query:</strong> ${escapeHtml(data.query_info.original_query || 'N/A')}</div>`; |
| 869 | - html += `<div><strong>detected_language:</strong> ${data.query_info.detected_languag}</div>`; | 889 | + html += `<div><strong>detected_language:</strong> ${escapeHtml(data.query_info.detected_language || 'N/A')}</div>`; |
| 870 | html += '</div>'; | 890 | html += '</div>'; |
| 871 | debugInfoDiv.innerHTML = html; | 891 | debugInfoDiv.innerHTML = html; |
| 872 | } else { | 892 | } else { |
| @@ -885,9 +905,14 @@ function displayDebugInfo(data) { | @@ -885,9 +905,14 @@ function displayDebugInfo(data) { | ||
| 885 | html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; | 905 | html += `<div>original_query: ${escapeHtml(debugInfo.query_analysis.original_query || 'N/A')}</div>`; |
| 886 | html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`; | 906 | html += `<div>query_normalized: ${escapeHtml(debugInfo.query_analysis.query_normalized || 'N/A')}</div>`; |
| 887 | html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; | 907 | html += `<div>rewritten_query: ${escapeHtml(debugInfo.query_analysis.rewritten_query || 'N/A')}</div>`; |
| 888 | - html += `<div>detected_language: ${debugInfo.query_analysis.detected_language}</div>`; | 908 | + html += `<div>detected_language: ${escapeHtml(debugInfo.query_analysis.detected_language || 'N/A')}</div>`; |
| 909 | + html += `<div>index_languages: ${escapeHtml((debugInfo.query_analysis.index_languages || []).join(', ') || 'N/A')}</div>`; | ||
| 910 | + html += `<div>query_tokens: ${escapeHtml((debugInfo.query_analysis.query_tokens || []).join(', ') || 'N/A')}</div>`; | ||
| 889 | html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; | 911 | html += `<div>domain: ${escapeHtml(debugInfo.query_analysis.domain || 'default')}</div>`; |
| 890 | html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`; | 912 | html += `<div>is_simple_query: ${debugInfo.query_analysis.is_simple_query ? 'yes' : 'no'}</div>`; |
| 913 | + if (debugInfo.query_analysis.query_vector_summary) { | ||
| 914 | + html += `<div>query_vector_summary: ${escapeHtml(customStringify(debugInfo.query_analysis.query_vector_summary))}</div>`; | ||
| 915 | + } | ||
| 891 | 916 | ||
| 892 | if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) { | 917 | if (debugInfo.query_analysis.translations && Object.keys(debugInfo.query_analysis.translations).length > 0) { |
| 893 | html += '<div>translations: '; | 918 | html += '<div>translations: '; |
| @@ -921,6 +946,25 @@ function displayDebugInfo(data) { | @@ -921,6 +946,25 @@ function displayDebugInfo(data) { | ||
| 921 | html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`; | 946 | html += `<div>took_ms: ${debugInfo.es_response.took_ms}ms</div>`; |
| 922 | html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`; | 947 | html += `<div>total_hits: ${debugInfo.es_response.total_hits}</div>`; |
| 923 | html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`; | 948 | html += `<div>max_score: ${debugInfo.es_response.max_score?.toFixed(3) || 0}</div>`; |
| 949 | + html += `<div>initial_es_max_score: ${escapeHtml(String(debugInfo.es_response.initial_es_max_score ?? ''))}</div>`; | ||
| 950 | + html += `<div>initial_es_min_score: ${escapeHtml(String(debugInfo.es_response.initial_es_min_score ?? ''))}</div>`; | ||
| 951 | + html += '</div>'; | ||
| 952 | + } | ||
| 953 | + | ||
| 954 | + if (debugInfo.rerank) { | ||
| 955 | + html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Rerank:</strong>'; | ||
| 956 | + html += `<div>requested: ${debugInfo.rerank.requested ? 'yes' : 'no'}</div>`; | ||
| 957 | + html += `<div>executed: ${debugInfo.rerank.executed ? 'yes' : 'no'}</div>`; | ||
| 958 | + html += `<div>in_rerank_window: ${debugInfo.rerank.in_rerank_window ? 'yes' : 'no'}</div>`; | ||
| 959 | + html += `<div>top_n: ${escapeHtml(String(debugInfo.rerank.top_n ?? ''))}</div>`; | ||
| 960 | + html += `<div>query_template: ${escapeHtml(debugInfo.rerank.query_template || 'N/A')}</div>`; | ||
| 961 | + html += `<div>doc_template: ${escapeHtml(debugInfo.rerank.doc_template || 'N/A')}</div>`; | ||
| 962 | + html += '</div>'; | ||
| 963 | + } | ||
| 964 | + | ||
| 965 | + if (debugInfo.page_fill) { | ||
| 966 | + html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">Page Fill:</strong>'; | ||
| 967 | + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debugInfo.page_fill))}</pre>`; | ||
| 924 | html += '</div>'; | 968 | html += '</div>'; |
| 925 | } | 969 | } |
| 926 | 970 | ||
| @@ -945,6 +989,12 @@ function displayDebugInfo(data) { | @@ -945,6 +989,12 @@ function displayDebugInfo(data) { | ||
| 945 | html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 400px;">${escapeHtml(customStringify(debugInfo.es_query))}</pre>`; | 989 | html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 400px;">${escapeHtml(customStringify(debugInfo.es_query))}</pre>`; |
| 946 | html += '</div>'; | 990 | html += '</div>'; |
| 947 | } | 991 | } |
| 992 | + | ||
| 993 | + if (debugInfo.es_query_context) { | ||
| 994 | + html += '<div style="margin-bottom: 15px;"><strong style="font-size: 14px;">ES Query Context:</strong>'; | ||
| 995 | + html += `<pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 300px;">${escapeHtml(customStringify(debugInfo.es_query_context))}</pre>`; | ||
| 996 | + html += '</div>'; | ||
| 997 | + } | ||
| 948 | 998 | ||
| 949 | html += '</div>'; | 999 | html += '</div>'; |
| 950 | debugInfoDiv.innerHTML = html; | 1000 | debugInfoDiv.innerHTML = html; |
search/rerank_client.py
| @@ -25,6 +25,7 @@ def build_docs_from_hits( | @@ -25,6 +25,7 @@ def build_docs_from_hits( | ||
| 25 | es_hits: List[Dict[str, Any]], | 25 | es_hits: List[Dict[str, Any]], |
| 26 | language: str = "zh", | 26 | language: str = "zh", |
| 27 | doc_template: str = "{title}", | 27 | doc_template: str = "{title}", |
| 28 | + debug_rows: Optional[List[Dict[str, Any]]] = None, | ||
| 28 | ) -> List[str]: | 29 | ) -> List[str]: |
| 29 | """ | 30 | """ |
| 30 | 从 ES 命中结果构造重排服务所需的文档文本列表(与 hits 一一对应)。 | 31 | 从 ES 命中结果构造重排服务所需的文档文本列表(与 hits 一一对应)。 |
| @@ -63,24 +64,36 @@ def build_docs_from_hits( | @@ -63,24 +64,36 @@ def build_docs_from_hits( | ||
| 63 | for hit in es_hits: | 64 | for hit in es_hits: |
| 64 | src = hit.get("_source") or {} | 65 | src = hit.get("_source") or {} |
| 65 | title_suffix = str(hit.get("_style_rerank_suffix") or "").strip() | 66 | title_suffix = str(hit.get("_style_rerank_suffix") or "").strip() |
| 67 | + values = _SafeDict( | ||
| 68 | + title=( | ||
| 69 | + f"{pick_lang_text(src.get('title'))} {title_suffix}".strip() | ||
| 70 | + if title_suffix | ||
| 71 | + else pick_lang_text(src.get("title")) | ||
| 72 | + ), | ||
| 73 | + brief=pick_lang_text(src.get("brief")) if need_brief else "", | ||
| 74 | + vendor=pick_lang_text(src.get("vendor")) if need_vendor else "", | ||
| 75 | + description=pick_lang_text(src.get("description")) if need_description else "", | ||
| 76 | + category_path=pick_lang_text(src.get("category_path")) if need_category_path else "", | ||
| 77 | + ) | ||
| 66 | if only_title: | 78 | if only_title: |
| 67 | - title = pick_lang_text(src.get("title")) | ||
| 68 | - if title_suffix: | ||
| 69 | - title = f"{title} {title_suffix}".strip() | ||
| 70 | - docs.append(title) | 79 | + doc_text = values["title"] |
| 71 | else: | 80 | else: |
| 72 | - values = _SafeDict( | ||
| 73 | - title=( | ||
| 74 | - f"{pick_lang_text(src.get('title'))} {title_suffix}".strip() | ||
| 75 | - if title_suffix | ||
| 76 | - else pick_lang_text(src.get("title")) | ||
| 77 | - ), | ||
| 78 | - brief=pick_lang_text(src.get("brief")) if need_brief else "", | ||
| 79 | - vendor=pick_lang_text(src.get("vendor")) if need_vendor else "", | ||
| 80 | - description=pick_lang_text(src.get("description")) if need_description else "", | ||
| 81 | - category_path=pick_lang_text(src.get("category_path")) if need_category_path else "", | ||
| 82 | - ) | ||
| 83 | - docs.append(str(doc_template).format_map(values)) | 81 | + doc_text = str(doc_template).format_map(values) |
| 82 | + docs.append(doc_text) | ||
| 83 | + if debug_rows is not None: | ||
| 84 | + preview = doc_text if len(doc_text) <= 300 else f"{doc_text[:300]}..." | ||
| 85 | + debug_rows.append({ | ||
| 86 | + "doc_template": doc_template, | ||
| 87 | + "title_suffix": title_suffix or None, | ||
| 88 | + "fields": { | ||
| 89 | + "title": values["title"] or None, | ||
| 90 | + "brief": values["brief"] or None, | ||
| 91 | + "vendor": values["vendor"] or None, | ||
| 92 | + "category_path": values["category_path"] or None, | ||
| 93 | + }, | ||
| 94 | + "doc_preview": preview, | ||
| 95 | + "doc_length": len(doc_text), | ||
| 96 | + }) | ||
| 84 | return docs | 97 | return docs |
| 85 | 98 | ||
| 86 | 99 | ||
| @@ -163,18 +176,13 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa | @@ -163,18 +176,13 @@ def _collect_text_score_components(matched_queries: Any, fallback_es_score: floa | ||
| 163 | } | 176 | } |
| 164 | 177 | ||
| 165 | 178 | ||
| 166 | -def _fuse_score(rerank_score: float, text_score: float, knn_score: float) -> float: | ||
| 167 | - rerank_factor = max(rerank_score, 0.0) + 0.00001 | ||
| 168 | - text_factor = (max(text_score, 0.0) + 0.1) ** 0.35 | ||
| 169 | - knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2 | ||
| 170 | - return rerank_factor * text_factor * knn_factor | ||
| 171 | - | ||
| 172 | - | ||
| 173 | def fuse_scores_and_resort( | 179 | def fuse_scores_and_resort( |
| 174 | es_hits: List[Dict[str, Any]], | 180 | es_hits: List[Dict[str, Any]], |
| 175 | rerank_scores: List[float], | 181 | rerank_scores: List[float], |
| 176 | weight_es: float = DEFAULT_WEIGHT_ES, | 182 | weight_es: float = DEFAULT_WEIGHT_ES, |
| 177 | weight_ai: float = DEFAULT_WEIGHT_AI, | 183 | weight_ai: float = DEFAULT_WEIGHT_AI, |
| 184 | + debug: bool = False, | ||
| 185 | + rerank_debug_rows: Optional[List[Dict[str, Any]]] = None, | ||
| 178 | ) -> List[Dict[str, Any]]: | 186 | ) -> List[Dict[str, Any]]: |
| 179 | """ | 187 | """ |
| 180 | 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 | 188 | 将 ES 分数与重排分数按乘法公式融合(不修改原始 _score),并按融合分数降序重排。 |
| @@ -211,7 +219,10 @@ def fuse_scores_and_resort( | @@ -211,7 +219,10 @@ def fuse_scores_and_resort( | ||
| 211 | knn_score = _extract_named_query_score(matched_queries, "knn_query") | 219 | knn_score = _extract_named_query_score(matched_queries, "knn_query") |
| 212 | text_components = _collect_text_score_components(matched_queries, es_score) | 220 | text_components = _collect_text_score_components(matched_queries, es_score) |
| 213 | text_score = text_components["text_score"] | 221 | text_score = text_components["text_score"] |
| 214 | - fused = _fuse_score(rerank_score, text_score, knn_score) | 222 | + rerank_factor = max(rerank_score, 0.0) + 0.00001 |
| 223 | + text_factor = (max(text_score, 0.0) + 0.1) ** 0.35 | ||
| 224 | + knn_factor = (max(knn_score, 0.0) + 0.6) ** 0.2 | ||
| 225 | + fused = rerank_factor * text_factor * knn_factor | ||
| 215 | 226 | ||
| 216 | hit["_original_score"] = hit.get("_score") | 227 | hit["_original_score"] = hit.get("_score") |
| 217 | hit["_rerank_score"] = rerank_score | 228 | hit["_rerank_score"] = rerank_score |
| @@ -223,19 +234,33 @@ def fuse_scores_and_resort( | @@ -223,19 +234,33 @@ def fuse_scores_and_resort( | ||
| 223 | hit["_text_support_score"] = text_components["support_text_score"] | 234 | hit["_text_support_score"] = text_components["support_text_score"] |
| 224 | hit["_fused_score"] = fused | 235 | hit["_fused_score"] = fused |
| 225 | 236 | ||
| 226 | - fused_debug.append({ | ||
| 227 | - "doc_id": hit.get("_id"), | ||
| 228 | - "es_score": es_score, | ||
| 229 | - "rerank_score": rerank_score, | ||
| 230 | - "text_score": text_score, | ||
| 231 | - "text_source_score": text_components["source_score"], | ||
| 232 | - "text_translation_score": text_components["translation_score"], | ||
| 233 | - "text_primary_score": text_components["primary_text_score"], | ||
| 234 | - "text_support_score": text_components["support_text_score"], | ||
| 235 | - "knn_score": knn_score, | ||
| 236 | - "matched_queries": matched_queries, | ||
| 237 | - "fused_score": fused, | ||
| 238 | - }) | 237 | + if debug: |
| 238 | + debug_entry = { | ||
| 239 | + "doc_id": hit.get("_id"), | ||
| 240 | + "es_score": es_score, | ||
| 241 | + "rerank_score": rerank_score, | ||
| 242 | + "text_score": text_score, | ||
| 243 | + "text_source_score": text_components["source_score"], | ||
| 244 | + "text_translation_score": text_components["translation_score"], | ||
| 245 | + "text_weighted_source_score": text_components["weighted_source_score"], | ||
| 246 | + "text_weighted_translation_score": text_components["weighted_translation_score"], | ||
| 247 | + "text_primary_score": text_components["primary_text_score"], | ||
| 248 | + "text_support_score": text_components["support_text_score"], | ||
| 249 | + "text_score_fallback_to_es": ( | ||
| 250 | + text_score == es_score | ||
| 251 | + and text_components["source_score"] <= 0.0 | ||
| 252 | + and text_components["translation_score"] <= 0.0 | ||
| 253 | + ), | ||
| 254 | + "knn_score": knn_score, | ||
| 255 | + "rerank_factor": rerank_factor, | ||
| 256 | + "text_factor": text_factor, | ||
| 257 | + "knn_factor": knn_factor, | ||
| 258 | + "matched_queries": matched_queries, | ||
| 259 | + "fused_score": fused, | ||
| 260 | + } | ||
| 261 | + if rerank_debug_rows is not None and idx < len(rerank_debug_rows): | ||
| 262 | + debug_entry["rerank_input"] = rerank_debug_rows[idx] | ||
| 263 | + fused_debug.append(debug_entry) | ||
| 239 | 264 | ||
| 240 | # 按融合分数降序重排 | 265 | # 按融合分数降序重排 |
| 241 | es_hits.sort( | 266 | es_hits.sort( |
| @@ -255,6 +280,7 @@ def run_rerank( | @@ -255,6 +280,7 @@ def run_rerank( | ||
| 255 | rerank_query_template: str = "{query}", | 280 | rerank_query_template: str = "{query}", |
| 256 | rerank_doc_template: str = "{title}", | 281 | rerank_doc_template: str = "{title}", |
| 257 | top_n: Optional[int] = None, | 282 | top_n: Optional[int] = None, |
| 283 | + debug: bool = False, | ||
| 258 | ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: | 284 | ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: |
| 259 | """ | 285 | """ |
| 260 | 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 | 286 | 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 |
| @@ -266,7 +292,13 @@ def run_rerank( | @@ -266,7 +292,13 @@ def run_rerank( | ||
| 266 | return es_response, None, [] | 292 | return es_response, None, [] |
| 267 | 293 | ||
| 268 | query_text = str(rerank_query_template).format_map({"query": query}) | 294 | query_text = str(rerank_query_template).format_map({"query": query}) |
| 269 | - docs = build_docs_from_hits(hits, language=language, doc_template=rerank_doc_template) | 295 | + rerank_debug_rows: Optional[List[Dict[str, Any]]] = [] if debug else None |
| 296 | + docs = build_docs_from_hits( | ||
| 297 | + hits, | ||
| 298 | + language=language, | ||
| 299 | + doc_template=rerank_doc_template, | ||
| 300 | + debug_rows=rerank_debug_rows, | ||
| 301 | + ) | ||
| 270 | scores, meta = call_rerank_service( | 302 | scores, meta = call_rerank_service( |
| 271 | query_text, | 303 | query_text, |
| 272 | docs, | 304 | docs, |
| @@ -282,6 +314,8 @@ def run_rerank( | @@ -282,6 +314,8 @@ def run_rerank( | ||
| 282 | scores, | 314 | scores, |
| 283 | weight_es=weight_es, | 315 | weight_es=weight_es, |
| 284 | weight_ai=weight_ai, | 316 | weight_ai=weight_ai, |
| 317 | + debug=debug, | ||
| 318 | + rerank_debug_rows=rerank_debug_rows, | ||
| 285 | ) | 319 | ) |
| 286 | 320 | ||
| 287 | # 更新 max_score 为融合后的最高分 | 321 | # 更新 max_score 为融合后的最高分 |
search/searcher.py
| @@ -339,6 +339,21 @@ class Searcher: | @@ -339,6 +339,21 @@ class Searcher: | ||
| 339 | in_rerank_window = do_rerank and (from_ + size) <= rerank_window | 339 | in_rerank_window = do_rerank and (from_ + size) <= rerank_window |
| 340 | es_fetch_from = 0 if in_rerank_window else from_ | 340 | es_fetch_from = 0 if in_rerank_window else from_ |
| 341 | es_fetch_size = rerank_window if in_rerank_window else size | 341 | es_fetch_size = rerank_window if in_rerank_window else size |
| 342 | + initial_es_positions: Dict[str, int] = {} | ||
| 343 | + initial_es_min_score: Optional[float] = None | ||
| 344 | + initial_es_max_score: Optional[float] = None | ||
| 345 | + page_fill_debug: Optional[Dict[str, Any]] = None | ||
| 346 | + rerank_debug_info: Optional[Dict[str, Any]] = None | ||
| 347 | + if debug: | ||
| 348 | + rerank_debug_info = { | ||
| 349 | + "requested": do_rerank, | ||
| 350 | + "executed": False, | ||
| 351 | + "in_rerank_window": in_rerank_window, | ||
| 352 | + "rerank_window": rerank_window, | ||
| 353 | + "top_n": from_ + size, | ||
| 354 | + "query_template": effective_query_template, | ||
| 355 | + "doc_template": effective_doc_template, | ||
| 356 | + } | ||
| 342 | 357 | ||
| 343 | # Start timing | 358 | # Start timing |
| 344 | context.start_stage(RequestContextStage.TOTAL) | 359 | context.start_stage(RequestContextStage.TOTAL) |
| @@ -348,6 +363,7 @@ class Searcher: | @@ -348,6 +363,7 @@ class Searcher: | ||
| 348 | f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, " | 363 | f"enable_rerank(request)={enable_rerank}, enable_rerank(config)={rerank_enabled_by_config}, " |
| 349 | f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, " | 364 | f"enable_rerank(effective)={do_rerank}, in_rerank_window={in_rerank_window}, " |
| 350 | f"es_fetch=({es_fetch_from},{es_fetch_size}) | " | 365 | f"es_fetch=({es_fetch_from},{es_fetch_size}) | " |
| 366 | + f"index_languages={index_langs} | " | ||
| 351 | f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}", | 367 | f"enable_translation={enable_translation}, enable_embedding={enable_embedding}, min_score={min_score}", |
| 352 | extra={'reqid': context.reqid, 'uid': context.uid} | 368 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 353 | ) | 369 | ) |
| @@ -531,13 +547,36 @@ class Searcher: | @@ -531,13 +547,36 @@ class Searcher: | ||
| 531 | 547 | ||
| 532 | # Store ES response in context | 548 | # Store ES response in context |
| 533 | context.store_intermediate_result('es_response', es_response) | 549 | context.store_intermediate_result('es_response', es_response) |
| 550 | + if debug: | ||
| 551 | + initial_hits = es_response.get('hits', {}).get('hits') or [] | ||
| 552 | + initial_scores: List[float] = [] | ||
| 553 | + for rank, hit in enumerate(initial_hits, 1): | ||
| 554 | + doc_id = hit.get("_id") | ||
| 555 | + if doc_id is not None: | ||
| 556 | + initial_es_positions[str(doc_id)] = rank | ||
| 557 | + raw_score = hit.get("_score") | ||
| 558 | + try: | ||
| 559 | + if raw_score is not None: | ||
| 560 | + initial_scores.append(float(raw_score)) | ||
| 561 | + except (TypeError, ValueError): | ||
| 562 | + pass | ||
| 563 | + raw_max_score = es_response.get('hits', {}).get('max_score') | ||
| 564 | + try: | ||
| 565 | + initial_es_max_score = float(raw_max_score) if raw_max_score is not None else None | ||
| 566 | + except (TypeError, ValueError): | ||
| 567 | + initial_es_max_score = None | ||
| 568 | + if initial_es_max_score is None and initial_scores: | ||
| 569 | + initial_es_max_score = max(initial_scores) | ||
| 570 | + initial_es_min_score = min(initial_scores) if initial_scores else None | ||
| 534 | 571 | ||
| 535 | # Extract timing from ES response | 572 | # Extract timing from ES response |
| 536 | es_took = es_response.get('took', 0) | 573 | es_took = es_response.get('took', 0) |
| 537 | context.logger.info( | 574 | context.logger.info( |
| 538 | f"ES搜索完成 | 耗时: {es_took}ms | " | 575 | f"ES搜索完成 | 耗时: {es_took}ms | " |
| 539 | f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | " | 576 | f"命中数: {es_response.get('hits', {}).get('total', {}).get('value', 0)} | " |
| 540 | - f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f}", | 577 | + f"最高分: {(es_response.get('hits', {}).get('max_score') or 0):.3f} | " |
| 578 | + f"detected_language={parsed_query.detected_language} | " | ||
| 579 | + f"translations={list((parsed_query.translations or {}).keys())}", | ||
| 541 | extra={'reqid': context.reqid, 'uid': context.uid} | 580 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 542 | ) | 581 | ) |
| 543 | except Exception as e: | 582 | except Exception as e: |
| @@ -581,24 +620,31 @@ class Searcher: | @@ -581,24 +620,31 @@ class Searcher: | ||
| 581 | rerank_query_template=effective_query_template, | 620 | rerank_query_template=effective_query_template, |
| 582 | rerank_doc_template=effective_doc_template, | 621 | rerank_doc_template=effective_doc_template, |
| 583 | top_n=(from_ + size), | 622 | top_n=(from_ + size), |
| 623 | + debug=debug, | ||
| 584 | ) | 624 | ) |
| 585 | 625 | ||
| 586 | if rerank_meta is not None: | 626 | if rerank_meta is not None: |
| 587 | from config.services_config import get_rerank_service_url | 627 | from config.services_config import get_rerank_service_url |
| 588 | rerank_url = get_rerank_service_url() | 628 | rerank_url = get_rerank_service_url() |
| 589 | - context.metadata.setdefault("rerank_info", {}) | ||
| 590 | - context.metadata["rerank_info"].update({ | ||
| 591 | - "service_url": rerank_url, | ||
| 592 | - "docs": len(es_response.get("hits", {}).get("hits") or []), | ||
| 593 | - "meta": rerank_meta, | ||
| 594 | - }) | ||
| 595 | - context.store_intermediate_result("rerank_scores", fused_debug) | 629 | + if debug and rerank_debug_info is not None: |
| 630 | + rerank_debug_info.update({ | ||
| 631 | + "executed": True, | ||
| 632 | + "service_url": rerank_url, | ||
| 633 | + "query_text": str(effective_query_template).format_map({"query": rerank_query}), | ||
| 634 | + "docs": len(es_response.get("hits", {}).get("hits") or []), | ||
| 635 | + "meta": rerank_meta, | ||
| 636 | + }) | ||
| 637 | + context.store_intermediate_result("rerank_scores", fused_debug) | ||
| 596 | context.logger.info( | 638 | context.logger.info( |
| 597 | - f"重排完成 | docs={len(fused_debug)} | meta={rerank_meta}", | 639 | + f"重排完成 | docs={len(es_response.get('hits', {}).get('hits') or [])} | " |
| 640 | + f"top_n={from_ + size} | query_template={effective_query_template} | " | ||
| 641 | + f"doc_template={effective_doc_template} | meta={rerank_meta}", | ||
| 598 | extra={'reqid': context.reqid, 'uid': context.uid} | 642 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 599 | ) | 643 | ) |
| 600 | except Exception as e: | 644 | except Exception as e: |
| 601 | context.add_warning(f"Rerank failed: {e}") | 645 | context.add_warning(f"Rerank failed: {e}") |
| 646 | + if debug and rerank_debug_info is not None: | ||
| 647 | + rerank_debug_info["error"] = str(e) | ||
| 602 | context.logger.warning( | 648 | context.logger.warning( |
| 603 | f"调用重排服务失败 | error: {e}", | 649 | f"调用重排服务失败 | error: {e}", |
| 604 | extra={'reqid': context.reqid, 'uid': context.uid}, | 650 | extra={'reqid': context.reqid, 'uid': context.uid}, |
| @@ -661,6 +707,13 @@ class Searcher: | @@ -661,6 +707,13 @@ class Searcher: | ||
| 661 | ) | 707 | ) |
| 662 | if fill_took: | 708 | if fill_took: |
| 663 | es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took) | 709 | es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took) |
| 710 | + if debug: | ||
| 711 | + page_fill_debug = { | ||
| 712 | + "requested_ids": page_ids, | ||
| 713 | + "filled": filled, | ||
| 714 | + "fill_took_ms": fill_took, | ||
| 715 | + "response_source_spec": response_source_spec, | ||
| 716 | + } | ||
| 664 | context.logger.info( | 717 | context.logger.info( |
| 665 | f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms", | 718 | f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms", |
| 666 | extra={'reqid': context.reqid, 'uid': context.uid} | 719 | extra={'reqid': context.reqid, 'uid': context.uid} |
| @@ -728,6 +781,11 @@ class Searcher: | @@ -728,6 +781,11 @@ class Searcher: | ||
| 728 | # Build per-result debug info (per SPU) when debug mode is enabled | 781 | # Build per-result debug info (per SPU) when debug mode is enabled |
| 729 | per_result_debug = [] | 782 | per_result_debug = [] |
| 730 | if debug and es_hits and formatted_results: | 783 | if debug and es_hits and formatted_results: |
| 784 | + final_positions_by_doc = { | ||
| 785 | + str(hit.get("_id")): (from_ + rank) | ||
| 786 | + for rank, hit in enumerate(es_hits, 1) | ||
| 787 | + if hit.get("_id") is not None | ||
| 788 | + } | ||
| 731 | for hit, spu in zip(es_hits, formatted_results): | 789 | for hit, spu in zip(es_hits, formatted_results): |
| 732 | source = hit.get("_source", {}) or {} | 790 | source = hit.get("_source", {}) or {} |
| 733 | doc_id = hit.get("_id") | 791 | doc_id = hit.get("_id") |
| @@ -746,9 +804,29 @@ class Searcher: | @@ -746,9 +804,29 @@ class Searcher: | ||
| 746 | except (TypeError, ValueError): | 804 | except (TypeError, ValueError): |
| 747 | es_score = 0.0 | 805 | es_score = 0.0 |
| 748 | try: | 806 | try: |
| 749 | - normalized = float(es_score) / float(max_score) if max_score else None | 807 | + normalized = ( |
| 808 | + float(es_score) / float(initial_es_max_score) | ||
| 809 | + if initial_es_max_score | ||
| 810 | + else None | ||
| 811 | + ) | ||
| 750 | except (TypeError, ValueError, ZeroDivisionError): | 812 | except (TypeError, ValueError, ZeroDivisionError): |
| 751 | normalized = None | 813 | normalized = None |
| 814 | + try: | ||
| 815 | + es_score_norm = ( | ||
| 816 | + (float(es_score) - float(initial_es_min_score)) | ||
| 817 | + / (float(initial_es_max_score) - float(initial_es_min_score)) | ||
| 818 | + if initial_es_min_score is not None | ||
| 819 | + and initial_es_max_score is not None | ||
| 820 | + and float(initial_es_max_score) > float(initial_es_min_score) | ||
| 821 | + else ( | ||
| 822 | + 1.0 | ||
| 823 | + if initial_es_min_score is not None | ||
| 824 | + and initial_es_max_score is not None | ||
| 825 | + else None | ||
| 826 | + ) | ||
| 827 | + ) | ||
| 828 | + except (TypeError, ValueError, ZeroDivisionError): | ||
| 829 | + es_score_norm = None | ||
| 752 | 830 | ||
| 753 | title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None | 831 | title_multilingual = source.get("title") if isinstance(source.get("title"), dict) else None |
| 754 | brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None | 832 | brief_multilingual = source.get("brief") if isinstance(source.get("brief"), dict) else None |
| @@ -758,6 +836,11 @@ class Searcher: | @@ -758,6 +836,11 @@ class Searcher: | ||
| 758 | "spu_id": spu.spu_id, | 836 | "spu_id": spu.spu_id, |
| 759 | "es_score": es_score, | 837 | "es_score": es_score, |
| 760 | "es_score_normalized": normalized, | 838 | "es_score_normalized": normalized, |
| 839 | + "es_score_norm": es_score_norm, | ||
| 840 | + "es_score_min": initial_es_min_score, | ||
| 841 | + "es_score_max": initial_es_max_score, | ||
| 842 | + "initial_rank": initial_es_positions.get(str(doc_id)) if doc_id is not None else None, | ||
| 843 | + "final_rank": final_positions_by_doc.get(str(doc_id)) if doc_id is not None else None, | ||
| 761 | "title_multilingual": title_multilingual, | 844 | "title_multilingual": title_multilingual, |
| 762 | "brief_multilingual": brief_multilingual, | 845 | "brief_multilingual": brief_multilingual, |
| 763 | "vendor_multilingual": vendor_multilingual, | 846 | "vendor_multilingual": vendor_multilingual, |
| @@ -771,10 +854,17 @@ class Searcher: | @@ -771,10 +854,17 @@ class Searcher: | ||
| 771 | debug_entry["text_score"] = rerank_debug.get("text_score") | 854 | debug_entry["text_score"] = rerank_debug.get("text_score") |
| 772 | debug_entry["text_source_score"] = rerank_debug.get("text_source_score") | 855 | debug_entry["text_source_score"] = rerank_debug.get("text_source_score") |
| 773 | debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score") | 856 | debug_entry["text_translation_score"] = rerank_debug.get("text_translation_score") |
| 857 | + debug_entry["text_weighted_source_score"] = rerank_debug.get("text_weighted_source_score") | ||
| 858 | + debug_entry["text_weighted_translation_score"] = rerank_debug.get("text_weighted_translation_score") | ||
| 774 | debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score") | 859 | debug_entry["text_primary_score"] = rerank_debug.get("text_primary_score") |
| 775 | debug_entry["text_support_score"] = rerank_debug.get("text_support_score") | 860 | debug_entry["text_support_score"] = rerank_debug.get("text_support_score") |
| 861 | + debug_entry["text_score_fallback_to_es"] = rerank_debug.get("text_score_fallback_to_es") | ||
| 776 | debug_entry["knn_score"] = rerank_debug.get("knn_score") | 862 | debug_entry["knn_score"] = rerank_debug.get("knn_score") |
| 863 | + debug_entry["rerank_factor"] = rerank_debug.get("rerank_factor") | ||
| 864 | + debug_entry["text_factor"] = rerank_debug.get("text_factor") | ||
| 865 | + debug_entry["knn_factor"] = rerank_debug.get("knn_factor") | ||
| 777 | debug_entry["fused_score"] = rerank_debug.get("fused_score") | 866 | debug_entry["fused_score"] = rerank_debug.get("fused_score") |
| 867 | + debug_entry["rerank_input"] = rerank_debug.get("rerank_input") | ||
| 778 | debug_entry["matched_queries"] = rerank_debug.get("matched_queries") | 868 | debug_entry["matched_queries"] = rerank_debug.get("matched_queries") |
| 779 | 869 | ||
| 780 | if style_intent_debug: | 870 | if style_intent_debug: |
| @@ -818,25 +908,51 @@ class Searcher: | @@ -818,25 +908,51 @@ class Searcher: | ||
| 818 | # Collect debug information if requested | 908 | # Collect debug information if requested |
| 819 | debug_info = None | 909 | debug_info = None |
| 820 | if debug: | 910 | if debug: |
| 911 | + query_vector_summary = None | ||
| 912 | + if parsed_query.query_vector is not None: | ||
| 913 | + query_vector_summary = { | ||
| 914 | + "dims": int(len(parsed_query.query_vector)), | ||
| 915 | + "preview": [round(float(v), 6) for v in parsed_query.query_vector[:8].tolist()], | ||
| 916 | + } | ||
| 821 | debug_info = { | 917 | debug_info = { |
| 822 | "query_analysis": { | 918 | "query_analysis": { |
| 823 | "original_query": context.query_analysis.original_query, | 919 | "original_query": context.query_analysis.original_query, |
| 824 | "query_normalized": context.query_analysis.query_normalized, | 920 | "query_normalized": context.query_analysis.query_normalized, |
| 825 | "rewritten_query": context.query_analysis.rewritten_query, | 921 | "rewritten_query": context.query_analysis.rewritten_query, |
| 826 | "detected_language": context.query_analysis.detected_language, | 922 | "detected_language": context.query_analysis.detected_language, |
| 923 | + "index_languages": index_langs, | ||
| 827 | "translations": context.query_analysis.translations, | 924 | "translations": context.query_analysis.translations, |
| 828 | "has_vector": context.query_analysis.query_vector is not None, | 925 | "has_vector": context.query_analysis.query_vector is not None, |
| 926 | + "query_vector_summary": query_vector_summary, | ||
| 927 | + "query_tokens": getattr(parsed_query, "query_tokens", []), | ||
| 829 | "is_simple_query": context.query_analysis.is_simple_query, | 928 | "is_simple_query": context.query_analysis.is_simple_query, |
| 830 | "domain": context.query_analysis.domain, | 929 | "domain": context.query_analysis.domain, |
| 831 | "style_intent_profile": context.get_intermediate_result("style_intent_profile"), | 930 | "style_intent_profile": context.get_intermediate_result("style_intent_profile"), |
| 832 | }, | 931 | }, |
| 833 | "es_query": context.get_intermediate_result('es_query', {}), | 932 | "es_query": context.get_intermediate_result('es_query', {}), |
| 933 | + "es_query_context": { | ||
| 934 | + "filters": filters, | ||
| 935 | + "range_filters": range_filters, | ||
| 936 | + "facets": [getattr(facet, "field", str(facet)) for facet in facets] if facets else [], | ||
| 937 | + "sort_by": sort_by, | ||
| 938 | + "sort_order": sort_order, | ||
| 939 | + "min_score": min_score, | ||
| 940 | + "es_fetch_from": es_fetch_from, | ||
| 941 | + "es_fetch_size": es_fetch_size, | ||
| 942 | + "in_rerank_window": in_rerank_window, | ||
| 943 | + "rerank_prefetch_source": context.get_intermediate_result('es_query_rerank_prefetch_source'), | ||
| 944 | + "include_named_queries_score": bool(do_rerank and in_rerank_window), | ||
| 945 | + }, | ||
| 834 | "es_response": { | 946 | "es_response": { |
| 835 | "took_ms": es_response.get('took', 0), | 947 | "took_ms": es_response.get('took', 0), |
| 836 | "total_hits": total_value, | 948 | "total_hits": total_value, |
| 837 | "max_score": max_score, | 949 | "max_score": max_score, |
| 838 | - "shards": es_response.get('_shards', {}) | 950 | + "shards": es_response.get('_shards', {}), |
| 951 | + "initial_es_max_score": initial_es_max_score, | ||
| 952 | + "initial_es_min_score": initial_es_min_score, | ||
| 839 | }, | 953 | }, |
| 954 | + "rerank": rerank_debug_info, | ||
| 955 | + "page_fill": page_fill_debug, | ||
| 840 | "feature_flags": context.metadata.get('feature_flags', {}), | 956 | "feature_flags": context.metadata.get('feature_flags', {}), |
| 841 | "stage_timings": { | 957 | "stage_timings": { |
| 842 | k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() | 958 | k: round(v, 2) for k, v in context.performance_metrics.stage_timings.items() |
tests/test_rerank_client.py
| @@ -24,7 +24,7 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim | @@ -24,7 +24,7 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim | ||
| 24 | }, | 24 | }, |
| 25 | ] | 25 | ] |
| 26 | 26 | ||
| 27 | - debug = fuse_scores_and_resort(hits, [0.9, 0.7]) | 27 | + debug = fuse_scores_and_resort(hits, [0.9, 0.7], debug=True) |
| 28 | 28 | ||
| 29 | expected_text_1 = 2.4 + 0.25 * (0.8 * 1.8) | 29 | expected_text_1 = 2.4 + 0.25 * (0.8 * 1.8) |
| 30 | expected_fused_1 = (0.9 + 0.00001) * ((expected_text_1 + 0.1) ** 0.35) * ((0.8 + 0.6) ** 0.2) | 30 | expected_fused_1 = (0.9 + 0.00001) * ((expected_text_1 + 0.1) ** 0.35) * ((0.8 + 0.6) ** 0.2) |
| @@ -37,7 +37,9 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim | @@ -37,7 +37,9 @@ def test_fuse_scores_and_resort_aggregates_text_components_and_keeps_rerank_prim | ||
| 37 | assert isclose(by_id["2"]["_fused_score"], expected_fused_2, rel_tol=1e-9) | 37 | assert isclose(by_id["2"]["_fused_score"], expected_fused_2, rel_tol=1e-9) |
| 38 | assert debug[0]["text_source_score"] == 2.4 | 38 | assert debug[0]["text_source_score"] == 2.4 |
| 39 | assert debug[0]["text_translation_score"] == 1.8 | 39 | assert debug[0]["text_translation_score"] == 1.8 |
| 40 | + assert isclose(debug[0]["text_weighted_translation_score"], 1.44, rel_tol=1e-9) | ||
| 40 | assert debug[0]["knn_score"] == 0.8 | 41 | assert debug[0]["knn_score"] == 0.8 |
| 42 | + assert isclose(debug[0]["rerank_factor"], 0.90001, rel_tol=1e-9) | ||
| 41 | assert [hit["_id"] for hit in hits] == ["2", "1"] | 43 | assert [hit["_id"] for hit in hits] == ["2", "1"] |
| 42 | 44 | ||
| 43 | 45 | ||
| @@ -47,7 +49,7 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): | @@ -47,7 +49,7 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): | ||
| 47 | {"_id": "2", "_score": 2.0}, | 49 | {"_id": "2", "_score": 2.0}, |
| 48 | ] | 50 | ] |
| 49 | 51 | ||
| 50 | - fuse_scores_and_resort(hits, [0.4, 0.3]) | 52 | + debug = fuse_scores_and_resort(hits, [0.4, 0.3], debug=True) |
| 51 | 53 | ||
| 52 | expected_1 = (0.4 + 0.00001) * ((0.5 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) | 54 | expected_1 = (0.4 + 0.00001) * ((0.5 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) |
| 53 | expected_2 = (0.3 + 0.00001) * ((2.0 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) | 55 | expected_2 = (0.3 + 0.00001) * ((2.0 + 0.1) ** 0.35) * ((0.0 + 0.6) ** 0.2) |
| @@ -58,6 +60,8 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): | @@ -58,6 +60,8 @@ def test_fuse_scores_and_resort_falls_back_when_matched_queries_missing(): | ||
| 58 | assert isclose(by_id["1"]["_fused_score"], expected_1, rel_tol=1e-9) | 60 | assert isclose(by_id["1"]["_fused_score"], expected_1, rel_tol=1e-9) |
| 59 | assert isclose(by_id["2"]["_text_score"], 2.0, rel_tol=1e-9) | 61 | assert isclose(by_id["2"]["_text_score"], 2.0, rel_tol=1e-9) |
| 60 | assert isclose(by_id["2"]["_fused_score"], expected_2, rel_tol=1e-9) | 62 | assert isclose(by_id["2"]["_fused_score"], expected_2, rel_tol=1e-9) |
| 63 | + assert debug[0]["text_score_fallback_to_es"] is True | ||
| 64 | + assert debug[1]["text_score_fallback_to_es"] is True | ||
| 61 | assert [hit["_id"] for hit in hits] == ["2", "1"] | 65 | assert [hit["_id"] for hit in hits] == ["2", "1"] |
| 62 | 66 | ||
| 63 | 67 |
tests/test_search_rerank_window.py
| @@ -612,3 +612,33 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc | @@ -612,3 +612,33 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc | ||
| 612 | assert len(result.results) == 1 | 612 | assert len(result.results) == 1 |
| 613 | assert result.results[0].skus[0].sku_id == "sku-blue" | 613 | assert result.results[0].skus[0].sku_id == "sku-blue" |
| 614 | assert result.results[0].image_url == "https://img/blue.jpg" | 614 | assert result.results[0].image_url == "https://img/blue.jpg" |
| 615 | + | ||
| 616 | + | ||
| 617 | +def test_searcher_debug_info_includes_es_positions_and_context(monkeypatch): | ||
| 618 | + es_client = _FakeESClient(total_hits=3) | ||
| 619 | + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) | ||
| 620 | + context = create_request_context(reqid="dbg", uid="u-dbg") | ||
| 621 | + | ||
| 622 | + monkeypatch.setattr( | ||
| 623 | + "search.searcher.get_tenant_config_loader", | ||
| 624 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), | ||
| 625 | + ) | ||
| 626 | + | ||
| 627 | + result = searcher.search( | ||
| 628 | + query="toy", | ||
| 629 | + tenant_id="162", | ||
| 630 | + from_=0, | ||
| 631 | + size=2, | ||
| 632 | + context=context, | ||
| 633 | + enable_rerank=False, | ||
| 634 | + debug=True, | ||
| 635 | + ) | ||
| 636 | + | ||
| 637 | + assert result.debug_info["query_analysis"]["index_languages"] == ["en", "zh"] | ||
| 638 | + assert result.debug_info["es_query_context"]["es_fetch_size"] == 2 | ||
| 639 | + assert result.debug_info["es_response"]["initial_es_max_score"] == 3.0 | ||
| 640 | + assert result.debug_info["es_response"]["initial_es_min_score"] == 2.0 | ||
| 641 | + assert result.debug_info["per_result"][0]["initial_rank"] == 1 | ||
| 642 | + assert result.debug_info["per_result"][0]["final_rank"] == 1 | ||
| 643 | + assert result.debug_info["per_result"][0]["es_score_normalized"] == 1.0 | ||
| 644 | + assert result.debug_info["per_result"][1]["es_score_norm"] == 0.0 |