Commit e38dc1beeeff3d4f0abefb69b9fae39d3f47a350

Authored by tangwang
1 parent b0972ff9

融合公式参数调整、以及展示信息优化

1 1 #!/bin/bash
2   -start=$(date +%s%N) # 开始时间,纳秒级
3   -
4   -time curl -X POST "http://localhost:6007/rerank" \
5   - -H "Content-Type: application/json" \
6   - -d '{
7   - "query": "健身女生T恤短袖",
8   - "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor",
9   -"FITORY Mens Sandals",
10   -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
11   -"Merrell Mens Hydro Moc",
12   -"FITORY Mens Sandals",
13   -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
14   -"Merrell Mens Hydro Moc",
15   -
16   -"FITORY Mens Sandals",
17   -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
18   -"Merrell Mens Hydro Moc",
19   -
20   -
21   -"FITORY Mens Sandals",
22   -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
23   -"Merrell Mens Hydro Moc",
24   -Superband Mermaid Tails for Swimming for Women and Adults Without Monofin",
25   -"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops"
26   - ],
27   - "top_n":386,
28   - "normalize": true
29   - }'
30   -
31   -end=$(date +%s%N) # 结束时间,纳秒级
32   -duration=$(( (end - start) / 1000000 )) # 转换为毫秒
33   -echo "Command took $duration milliseconds."
34 2  
  3 +# 配置参数
  4 +FILE="/home/ubuntu/rerank_test/titles.1.8w"
  5 +LINES=400
  6 +REPEAT=10
  7 +URLS=(
  8 + "http://localhost:6007/rerank"
  9 + "http://172.19.27.9:9997/v1/rerank"
  10 +)
35 11  
  12 +# 检查必要命令
  13 +if ! command -v jq &> /dev/null; then
  14 + echo "错误:jq 未安装,请先安装 jq。"
  15 + exit 1
  16 +fi
  17 +
  18 +# 检查文件是否存在
  19 +if [ ! -f "$FILE" ]; then
  20 + echo "错误:文件 $FILE 不存在。"
  21 + exit 1
  22 +fi
  23 +
  24 +# 读取前 LINES 行,构建 docs 数组的 JSON
  25 +echo "正在从 $FILE 读取前 $LINES 行..."
  26 +docs_json=$(head -n "$LINES" "$FILE" | jq -R . | jq -s .)
  27 +if [ -z "$docs_json" ]; then
  28 + echo "错误:读取文件失败或文件为空。"
  29 + exit 1
  30 +fi
  31 +
  32 +# 构建完整请求体 JSON
  33 +base_json='{"query": "健身女生T恤短袖", "top_n": 386, "normalize": true}'
  34 +full_json=$(echo "$base_json" | jq --argjson docs "$docs_json" '. + {docs: $docs}')
  35 +if [ -z "$full_json" ]; then
  36 + echo "错误:构建 JSON 失败。"
  37 + exit 1
  38 +fi
  39 +
  40 +# 创建临时文件存放请求体
  41 +temp_file=$(mktemp)
  42 +echo "$full_json" > "$temp_file"
  43 +echo "请求体已保存到临时文件: $temp_file"
  44 +
  45 +# 函数:测试单个 URL
  46 +test_url() {
  47 + local url=$1
  48 + local durations=()
  49 + local failed=0
  50 +
  51 + echo "========================================"
  52 + echo "开始测试 URL: $url"
  53 + echo "重复次数: $REPEAT"
  54 + echo "----------------------------------------"
  55 +
  56 + for i in $(seq 1 $REPEAT); do
  57 + # 开始计时(纳秒)
  58 + start=$(date +%s%N)
  59 +
  60 + # 发送请求,-s 静默,-o /dev/null 丢弃响应体,--data-binary 直接读取文件
  61 + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url" \
  62 + -H "Content-Type: application/json" \
  63 + --data-binary @"$temp_file")
  64 +
  65 + # 结束计时
  66 + end=$(date +%s%N)
  67 + duration=$(( (end - start) / 1000000 )) # 毫秒
  68 +
  69 + # 判断是否成功(HTTP 2xx 视为成功)
  70 + if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then
  71 + durations+=($duration)
  72 + echo "第 $i 次: 耗时 ${duration} ms (HTTP $http_code)"
  73 + else
  74 + echo "第 $i 次: 失败 (HTTP $http_code) 耗时 ${duration} ms"
  75 + failed=$((failed + 1))
  76 + fi
  77 +
  78 + # 可选:每次请求间隔一点时间,避免过载(取消注释以启用)
  79 + # sleep 0.1
  80 + done
  81 +
  82 + # 统计成功请求的耗时
  83 + local count=${#durations[@]}
  84 + echo "----------------------------------------"
  85 + if [ $count -eq 0 ]; then
  86 + echo "所有请求均失败,无法统计耗时。"
  87 + return
  88 + fi
  89 +
  90 + # 计算总和、平均值、最小值、最大值
  91 + local sum=0
  92 + local min=${durations[0]}
  93 + local max=${durations[0]}
  94 + for d in "${durations[@]}"; do
  95 + sum=$((sum + d))
  96 + (( d < min )) && min=$d
  97 + (( d > max )) && max=$d
  98 + done
  99 + local avg=$((sum / count))
  100 +
  101 + echo "成功请求数: $count / $REPEAT"
  102 + echo "失败请求数: $failed"
  103 + echo "平均耗时: $avg ms"
  104 + echo "最小耗时: $min ms"
  105 + echo "最大耗时: $max ms"
  106 + echo "========================================"
  107 +}
  108 +
  109 +# 依次测试每个 URL
  110 +for url in "${URLS[@]}"; do
  111 + test_url "$url"
  112 +done
  113 +
  114 +# 清理临时文件
  115 +rm -f "$temp_file"
  116 +echo "临时文件已删除。"
... ...
config/config.yaml
... ... @@ -412,7 +412,9 @@ services:
412 412 # instruction: "Rank by query relevance, prioritize category & style"
413 413 # instruction: "Relevance ranking: category & style match first"
414 414 # instruction: "Score product relevance by query with category & style match prioritized"
415   - instruction: "Rank products by query with category & style match prioritized"
  415 + # instruction: "Rank products by query with category & style match prioritized"
  416 + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
  417 + instruction: "rank products by given query"
416 418 # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
417 419 # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
418 420 qwen3_vllm_score:
... ... @@ -424,9 +426,9 @@ services:
424 426 # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
425 427 # hf_overrides: {}
426 428 engine: "vllm"
427   - max_model_len: 256
  429 + max_model_len: 224
428 430 tensor_parallel_size: 1
429   - gpu_memory_utilization: 0.20
  431 + gpu_memory_utilization: 0.12
430 432 dtype: "float16"
431 433 enable_prefix_caching: true
432 434 enforce_eager: false
... ... @@ -434,7 +436,9 @@ services:
434 436 sort_by_doc_length: true
435 437 # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
436 438 instruction_format: standard # compact standard
  439 + # instruction: "Rank products by query with category & style match prioritized"
437 440 instruction: "Rank products by query with category & style match prioritized"
  441 + # instruction: "Given a shopping query, rank products by relevance"
438 442 qwen3_transformers:
439 443 model_name: "Qwen/Qwen3-Reranker-0.6B"
440 444 instruction: "rank products by given query"
... ...
frontend/static/js/app.js
... ... @@ -408,7 +408,7 @@ function displayResults(data) {
408 408 let debugHtml = '';
409 409 if (debug) {
410 410 const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? '');
411   - const esNorm = typeof debug.es_score_normalized === 'number'
  411 + const es_score_normalized = typeof debug.es_score_normalized === 'number'
412 412 ? debug.es_score_normalized.toFixed(4)
413 413 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
414 414 const rerankScore = typeof debug.rerank_score === 'number'
... ... @@ -433,7 +433,7 @@ function displayResults(data) {
433 433 const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
434 434 const rerankInputHtml = debug.rerank_input
435 435 ? `
436   - <details>
  436 + <details open>
437 437 <summary>Rerank input</summary>
438 438 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre>
439 439 </details>
... ... @@ -441,7 +441,7 @@ function displayResults(data) {
441 441 : '';
442 442 const styleIntentHtml = debug.style_intent_sku
443 443 ? `
444   - <details>
  444 + <details open>
445 445 <summary>Selected SKU</summary>
446 446 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre>
447 447 </details>
... ... @@ -449,7 +449,7 @@ function displayResults(data) {
449 449 : '';
450 450 const matchedQueriesHtml = debug.matched_queries
451 451 ? `
452   - <details>
  452 + <details open>
453 453 <summary>matched_queries</summary>
454 454 <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre>
455 455 </details>
... ... @@ -463,7 +463,7 @@ function displayResults(data) {
463 463 <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>
464 464 <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>
465 465 <div class="product-debug-line">ES score: ${esScore}</div>
466   - <div class="product-debug-line">ES normalized: ${esNorm}</div>
  466 + <div class="product-debug-line">ES normalized: ${es_score_normalized}</div>
467 467 <div class="product-debug-line">Rerank score: ${rerankScore}</div>
468 468 <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>
469 469 <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>
... ...
perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md
... ... @@ -89,7 +89,7 @@ Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n
89 89 | instruction_format | n=100 | n=200 | n=400 | n=600 | n=800 | n=1000 | vs baseline same row (approx.) |
90 90 |--------------------|------:|------:|------:|------:|------:|-------:|--------------------------------|
91 91 | `compact` | 178.5 | 351.7 | **688.2** | 1024.0 | 1375.8 | **1752.4** | e.g. n=400 **−28.8%**, n=1000 **−27.8%** vs 966.2 / 2428.4 |
92   -| `standard` | 198.4 | 386.4 | **778.8** | 1174.6 | 1548.1 | **1956.6** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 |
  92 +| `standard` | 198.4 | 386.4 | **762.83** | 1174.6 | 1540.10 | **1970.14** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 |
93 93  
94 94 **`instruction_format: standard` 的优化点(本版):** 与 `compact` **共享**同一套 vLLM attention 自动选择;不再在 T4 上单独锁死 `TRITON_ATTN`。Prompt 仍比 `compact` 更长(固定 yes/no system + 官方前缀模板),因此 **absolute 延迟仍高于 `compact`**,但相对旧版 **standard** 行降幅与 **compact** 同量级(上表)。
95 95  
... ...
search/rerank_client.py
... ... @@ -151,7 +151,25 @@ def _extract_named_query_score(matched_queries: Any, name: str) -&gt; float:
151 151 return 1.0 if name in matched_queries else 0.0
152 152 return 0.0
153 153  
154   -
  154 +"""
  155 +原始变量:
  156 +ES总分
  157 +source_score:从 ES 返回的 matched_queries 里取 base_query 这条 named query 的分(dict 用具体分数;list 形式则“匹配到名字就算 1.0”)。
  158 +translation_score:所有名字以 base_query_trans_ 开头的 named query 的分,在 dict 里取 最大值;在 list 里只要存在这类名字就记为 1.0。
  159 +
  160 +中间变量:计算原始query得分和翻译query得分
  161 +weighted_source :
  162 +weighted_translation : 0.8 * translation_score
  163 +
  164 +区分主信号和辅助信号:
  165 +合成primary_text_score和support_text_score,取 更强 的那一路(原文检索 vs 翻译检索)作为主信号
  166 +primary_text_score : max(weighted_source, weighted_translation)
  167 +support_text_score : weighted_source + weighted_translation - primary_text_score
  168 +
  169 +主信号和辅助信号的融合:dismax融合公式
  170 +最终text_score:主信号 + 0.25 * 辅助信号
  171 +text_score : primary_text_score + 0.25 * support_text_score
  172 +"""
155 173 def _collect_text_score_components(matched_queries: Any, fallback_es_score: float) -> Dict[str, float]:
156 174 source_score = _extract_named_query_score(matched_queries, "base_query")
157 175 translation_score = 0.0
... ...