Commit e38dc1beeeff3d4f0abefb69b9fae39d3f47a350

Authored by tangwang
1 parent b0972ff9

融合公式参数调整、以及展示信息优化

1 #!/bin/bash 1 #!/bin/bash
2 -start=$(date +%s%N) # 开始时间,纳秒级  
3 -  
4 -time curl -X POST "http://localhost:6007/rerank" \  
5 - -H "Content-Type: application/json" \  
6 - -d '{  
7 - "query": "健身女生T恤短袖",  
8 - "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor",  
9 -"FITORY Mens Sandals",  
10 -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",  
11 -"Merrell Mens Hydro Moc",  
12 -"FITORY Mens Sandals",  
13 -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",  
14 -"Merrell Mens Hydro Moc",  
15 -  
16 -"FITORY Mens Sandals",  
17 -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",  
18 -"Merrell Mens Hydro Moc",  
19 -  
20 -  
21 -"FITORY Mens Sandals",  
22 -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",  
23 -"Merrell Mens Hydro Moc",  
24 -Superband Mermaid Tails for Swimming for Women and Adults Without Monofin",  
25 -"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops"  
26 - ],  
27 - "top_n":386,  
28 - "normalize": true  
29 - }'  
30 -  
31 -end=$(date +%s%N) # 结束时间,纳秒级  
32 -duration=$(( (end - start) / 1000000 )) # 转换为毫秒  
33 -echo "Command took $duration milliseconds."  
34 2
  3 +# 配置参数
  4 +FILE="/home/ubuntu/rerank_test/titles.1.8w"
  5 +LINES=400
  6 +REPEAT=10
  7 +URLS=(
  8 + "http://localhost:6007/rerank"
  9 + "http://172.19.27.9:9997/v1/rerank"
  10 +)
35 11
  12 +# 检查必要命令
  13 +if ! command -v jq &> /dev/null; then
  14 + echo "错误:jq 未安装,请先安装 jq。"
  15 + exit 1
  16 +fi
  17 +
  18 +# 检查文件是否存在
  19 +if [ ! -f "$FILE" ]; then
  20 + echo "错误:文件 $FILE 不存在。"
  21 + exit 1
  22 +fi
  23 +
  24 +# 读取前 LINES 行,构建 docs 数组的 JSON
  25 +echo "正在从 $FILE 读取前 $LINES 行..."
  26 +docs_json=$(head -n "$LINES" "$FILE" | jq -R . | jq -s .)
  27 +if [ -z "$docs_json" ]; then
  28 + echo "错误:读取文件失败或文件为空。"
  29 + exit 1
  30 +fi
  31 +
  32 +# 构建完整请求体 JSON
  33 +base_json='{"query": "健身女生T恤短袖", "top_n": 386, "normalize": true}'
  34 +full_json=$(echo "$base_json" | jq --argjson docs "$docs_json" '. + {docs: $docs}')
  35 +if [ -z "$full_json" ]; then
  36 + echo "错误:构建 JSON 失败。"
  37 + exit 1
  38 +fi
  39 +
  40 +# 创建临时文件存放请求体
  41 +temp_file=$(mktemp)
  42 +echo "$full_json" > "$temp_file"
  43 +echo "请求体已保存到临时文件: $temp_file"
  44 +
  45 +# 函数:测试单个 URL
  46 +test_url() {
  47 + local url=$1
  48 + local durations=()
  49 + local failed=0
  50 +
  51 + echo "========================================"
  52 + echo "开始测试 URL: $url"
  53 + echo "重复次数: $REPEAT"
  54 + echo "----------------------------------------"
  55 +
  56 + for i in $(seq 1 $REPEAT); do
  57 + # 开始计时(纳秒)
  58 + start=$(date +%s%N)
  59 +
  60 + # 发送请求,-s 静默,-o /dev/null 丢弃响应体,--data-binary 直接读取文件
  61 + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url" \
  62 + -H "Content-Type: application/json" \
  63 + --data-binary @"$temp_file")
  64 +
  65 + # 结束计时
  66 + end=$(date +%s%N)
  67 + duration=$(( (end - start) / 1000000 )) # 毫秒
  68 +
  69 + # 判断是否成功(HTTP 2xx 视为成功)
  70 + if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then
  71 + durations+=($duration)
  72 + echo "第 $i 次: 耗时 ${duration} ms (HTTP $http_code)"
  73 + else
  74 + echo "第 $i 次: 失败 (HTTP $http_code) 耗时 ${duration} ms"
  75 + failed=$((failed + 1))
  76 + fi
  77 +
  78 + # 可选:每次请求间隔一点时间,避免过载(取消注释以启用)
  79 + # sleep 0.1
  80 + done
  81 +
  82 + # 统计成功请求的耗时
  83 + local count=${#durations[@]}
  84 + echo "----------------------------------------"
  85 + if [ $count -eq 0 ]; then
  86 + echo "所有请求均失败,无法统计耗时。"
  87 + return
  88 + fi
  89 +
  90 + # 计算总和、平均值、最小值、最大值
  91 + local sum=0
  92 + local min=${durations[0]}
  93 + local max=${durations[0]}
  94 + for d in "${durations[@]}"; do
  95 + sum=$((sum + d))
  96 + (( d < min )) && min=$d
  97 + (( d > max )) && max=$d
  98 + done
  99 + local avg=$((sum / count))
  100 +
  101 + echo "成功请求数: $count / $REPEAT"
  102 + echo "失败请求数: $failed"
  103 + echo "平均耗时: $avg ms"
  104 + echo "最小耗时: $min ms"
  105 + echo "最大耗时: $max ms"
  106 + echo "========================================"
  107 +}
  108 +
  109 +# 依次测试每个 URL
  110 +for url in "${URLS[@]}"; do
  111 + test_url "$url"
  112 +done
  113 +
  114 +# 清理临时文件
  115 +rm -f "$temp_file"
  116 +echo "临时文件已删除。"
config/config.yaml
@@ -412,7 +412,9 @@ services: @@ -412,7 +412,9 @@ services:
412 # instruction: "Rank by query relevance, prioritize category & style" 412 # instruction: "Rank by query relevance, prioritize category & style"
413 # instruction: "Relevance ranking: category & style match first" 413 # instruction: "Relevance ranking: category & style match first"
414 # instruction: "Score product relevance by query with category & style match prioritized" 414 # instruction: "Score product relevance by query with category & style match prioritized"
415 - instruction: "Rank products by query with category & style match prioritized" 415 + # instruction: "Rank products by query with category & style match prioritized"
  416 + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
  417 + instruction: "rank products by given query"
416 # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score 418 # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
417 # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 419 # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
418 qwen3_vllm_score: 420 qwen3_vllm_score:
@@ -424,9 +426,9 @@ services: @@ -424,9 +426,9 @@ services:
424 # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 426 # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
425 # hf_overrides: {} 427 # hf_overrides: {}
426 engine: "vllm" 428 engine: "vllm"
427 - max_model_len: 256 429 + max_model_len: 224
428 tensor_parallel_size: 1 430 tensor_parallel_size: 1
429 - gpu_memory_utilization: 0.20 431 + gpu_memory_utilization: 0.12
430 dtype: "float16" 432 dtype: "float16"
431 enable_prefix_caching: true 433 enable_prefix_caching: true
432 enforce_eager: false 434 enforce_eager: false
@@ -434,7 +436,9 @@ services: @@ -434,7 +436,9 @@ services:
434 sort_by_doc_length: true 436 sort_by_doc_length: true
435 # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 437 # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
436 instruction_format: standard # compact standard 438 instruction_format: standard # compact standard
  439 + # instruction: "Rank products by query with category & style match prioritized"
437 instruction: "Rank products by query with category & style match prioritized" 440 instruction: "Rank products by query with category & style match prioritized"
  441 + # instruction: "Given a shopping query, rank products by relevance"
438 qwen3_transformers: 442 qwen3_transformers:
439 model_name: "Qwen/Qwen3-Reranker-0.6B" 443 model_name: "Qwen/Qwen3-Reranker-0.6B"
440 instruction: "rank products by given query" 444 instruction: "rank products by given query"
frontend/static/js/app.js
@@ -408,7 +408,7 @@ function displayResults(data) { @@ -408,7 +408,7 @@ function displayResults(data) {
408 let debugHtml = ''; 408 let debugHtml = '';
409 if (debug) { 409 if (debug) {
410 const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? ''); 410 const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? '');
411 - const esNorm = typeof debug.es_score_normalized === 'number' 411 + const es_score_normalized = typeof debug.es_score_normalized === 'number'
412 ? debug.es_score_normalized.toFixed(4) 412 ? debug.es_score_normalized.toFixed(4)
413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); 413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
414 const rerankScore = typeof debug.rerank_score === 'number' 414 const rerankScore = typeof debug.rerank_score === 'number'
@@ -433,7 +433,7 @@ function displayResults(data) { @@ -433,7 +433,7 @@ function displayResults(data) {
433 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; 433 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
434 const rerankInputHtml = debug.rerank_input 434 const rerankInputHtml = debug.rerank_input
435 ? ` 435 ? `
436 - <details> 436 + <details open>
437 <summary>Rerank input</summary> 437 <summary>Rerank input</summary>
438 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre> 438 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre>
439 </details> 439 </details>
@@ -441,7 +441,7 @@ function displayResults(data) { @@ -441,7 +441,7 @@ function displayResults(data) {
441 : ''; 441 : '';
442 const styleIntentHtml = debug.style_intent_sku 442 const styleIntentHtml = debug.style_intent_sku
443 ? ` 443 ? `
444 - <details> 444 + <details open>
445 <summary>Selected SKU</summary> 445 <summary>Selected SKU</summary>
446 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre> 446 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre>
447 </details> 447 </details>
@@ -449,7 +449,7 @@ function displayResults(data) { @@ -449,7 +449,7 @@ function displayResults(data) {
449 : ''; 449 : '';
450 const matchedQueriesHtml = debug.matched_queries 450 const matchedQueriesHtml = debug.matched_queries
451 ? ` 451 ? `
452 - <details> 452 + <details open>
453 <summary>matched_queries</summary> 453 <summary>matched_queries</summary>
454 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre> 454 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre>
455 </details> 455 </details>
@@ -463,7 +463,7 @@ function displayResults(data) { @@ -463,7 +463,7 @@ function displayResults(data) {
463 <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div> 463 <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>
464 <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div> 464 <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>
465 <div class="product-debug-line">ES score: ${esScore}</div> 465 <div class="product-debug-line">ES score: ${esScore}</div>
466 - <div class="product-debug-line">ES normalized: ${esNorm}</div> 466 + <div class="product-debug-line">ES normalized: ${es_score_normalized}</div>
467 <div class="product-debug-line">Rerank score: ${rerankScore}</div> 467 <div class="product-debug-line">Rerank score: ${rerankScore}</div>
468 <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div> 468 <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>
469 <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div> 469 <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>
perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md
@@ -89,7 +89,7 @@ Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n @@ -89,7 +89,7 @@ Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n
89 | instruction_format | n=100 | n=200 | n=400 | n=600 | n=800 | n=1000 | vs baseline same row (approx.) | 89 | instruction_format | n=100 | n=200 | n=400 | n=600 | n=800 | n=1000 | vs baseline same row (approx.) |
90 |--------------------|------:|------:|------:|------:|------:|-------:|--------------------------------| 90 |--------------------|------:|------:|------:|------:|------:|-------:|--------------------------------|
91 | `compact` | 178.5 | 351.7 | **688.2** | 1024.0 | 1375.8 | **1752.4** | e.g. n=400 **−28.8%**, n=1000 **−27.8%** vs 966.2 / 2428.4 | 91 | `compact` | 178.5 | 351.7 | **688.2** | 1024.0 | 1375.8 | **1752.4** | e.g. n=400 **−28.8%**, n=1000 **−27.8%** vs 966.2 / 2428.4 |
92 -| `standard` | 198.4 | 386.4 | **778.8** | 1174.6 | 1548.1 | **1956.6** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 | 92 +| `standard` | 198.4 | 386.4 | **762.83** | 1174.6 | 1540.10 | **1970.14** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 |
93 93
94 **`instruction_format: standard` 的优化点(本版):** 与 `compact` **共享**同一套 vLLM attention 自动选择;不再在 T4 上单独锁死 `TRITON_ATTN`。Prompt 仍比 `compact` 更长(固定 yes/no system + 官方前缀模板),因此 **absolute 延迟仍高于 `compact`**,但相对旧版 **standard** 行降幅与 **compact** 同量级(上表)。 94 **`instruction_format: standard` 的优化点(本版):** 与 `compact` **共享**同一套 vLLM attention 自动选择;不再在 T4 上单独锁死 `TRITON_ATTN`。Prompt 仍比 `compact` 更长(固定 yes/no system + 官方前缀模板),因此 **absolute 延迟仍高于 `compact`**,但相对旧版 **standard** 行降幅与 **compact** 同量级(上表)。
95 95
search/rerank_client.py
@@ -151,7 +151,25 @@ def _extract_named_query_score(matched_queries: Any, name: str) -&gt; float: @@ -151,7 +151,25 @@ def _extract_named_query_score(matched_queries: Any, name: str) -&gt; float:
151 return 1.0 if name in matched_queries else 0.0 151 return 1.0 if name in matched_queries else 0.0
152 return 0.0 152 return 0.0
153 153
154 - 154 +"""
  155 +原始变量:
  156 +ES总分
  157 +source_score:从 ES 返回的 matched_queries 里取 base_query 这条 named query 的分(dict 用具体分数;list 形式则“匹配到名字就算 1.0”)。
  158 +translation_score:所有名字以 base_query_trans_ 开头的 named query 的分,在 dict 里取 最大值;在 list 里只要存在这类名字就记为 1.0。
  159 +
  160 +中间变量:计算原始query得分和翻译query得分
  161 +weighted_source :
  162 +weighted_translation : 0.8 * translation_score
  163 +
  164 +区分主信号和辅助信号:
  165 +合成primary_text_score和support_text_score,取 更强 的那一路(原文检索 vs 翻译检索)作为主信号
  166 +primary_text_score : max(weighted_source, weighted_translation)
  167 +support_text_score : weighted_source + weighted_translation - primary_text_score
  168 +
  169 +主信号和辅助信号的融合:dismax融合公式
  170 +最终text_score:主信号 + 0.25 * 辅助信号
  171 +text_score : primary_text_score + 0.25 * support_text_score
  172 +"""
155 def _collect_text_score_components(matched_queries: Any, fallback_es_score: float) -> Dict[str, float]: 173 def _collect_text_score_components(matched_queries: Any, fallback_es_score: float) -> Dict[str, float]:
156 source_score = _extract_named_query_score(matched_queries, "base_query") 174 source_score = _extract_named_query_score(matched_queries, "base_query")
157 translation_score = 0.0 175 translation_score = 0.0