Commit e38dc1beeeff3d4f0abefb69b9fae39d3f47a350
1 parent
b0972ff9
融合公式参数调整、以及展示信息优化
Showing
5 changed files
with
145 additions
and
42 deletions
Show diff stats
| 1 | 1 | #!/bin/bash |
| 2 | -start=$(date +%s%N) # 开始时间,纳秒级 | |
| 3 | - | |
| 4 | -time curl -X POST "http://localhost:6007/rerank" \ | |
| 5 | - -H "Content-Type: application/json" \ | |
| 6 | - -d '{ | |
| 7 | - "query": "健身女生T恤短袖", | |
| 8 | - "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor", | |
| 9 | -"FITORY Mens Sandals", | |
| 10 | -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 11 | -"Merrell Mens Hydro Moc", | |
| 12 | -"FITORY Mens Sandals", | |
| 13 | -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 14 | -"Merrell Mens Hydro Moc", | |
| 15 | - | |
| 16 | -"FITORY Mens Sandals", | |
| 17 | -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 18 | -"Merrell Mens Hydro Moc", | |
| 19 | - | |
| 20 | - | |
| 21 | -"FITORY Mens Sandals", | |
| 22 | -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", | |
| 23 | -"Merrell Mens Hydro Moc", | |
| 24 | -Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", | |
| 25 | -"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops" | |
| 26 | - ], | |
| 27 | - "top_n":386, | |
| 28 | - "normalize": true | |
| 29 | - }' | |
| 30 | - | |
| 31 | -end=$(date +%s%N) # 结束时间,纳秒级 | |
| 32 | -duration=$(( (end - start) / 1000000 )) # 转换为毫秒 | |
| 33 | -echo "Command took $duration milliseconds." | |
| 34 | 2 | |
| 3 | +# 配置参数 | |
| 4 | +FILE="/home/ubuntu/rerank_test/titles.1.8w" | |
| 5 | +LINES=400 | |
| 6 | +REPEAT=10 | |
| 7 | +URLS=( | |
| 8 | + "http://localhost:6007/rerank" | |
| 9 | + "http://172.19.27.9:9997/v1/rerank" | |
| 10 | +) | |
| 35 | 11 | |
| 12 | +# 检查必要命令 | |
| 13 | +if ! command -v jq &> /dev/null; then | |
| 14 | + echo "错误:jq 未安装,请先安装 jq。" | |
| 15 | + exit 1 | |
| 16 | +fi | |
| 17 | + | |
| 18 | +# 检查文件是否存在 | |
| 19 | +if [ ! -f "$FILE" ]; then | |
| 20 | + echo "错误:文件 $FILE 不存在。" | |
| 21 | + exit 1 | |
| 22 | +fi | |
| 23 | + | |
| 24 | +# 读取前 LINES 行,构建 docs 数组的 JSON | |
| 25 | +echo "正在从 $FILE 读取前 $LINES 行..." | |
| 26 | +docs_json=$(head -n "$LINES" "$FILE" | jq -R . | jq -s .) | |
| 27 | +if [ -z "$docs_json" ]; then | |
| 28 | + echo "错误:读取文件失败或文件为空。" | |
| 29 | + exit 1 | |
| 30 | +fi | |
| 31 | + | |
| 32 | +# 构建完整请求体 JSON | |
| 33 | +base_json='{"query": "健身女生T恤短袖", "top_n": 386, "normalize": true}' | |
| 34 | +full_json=$(echo "$base_json" | jq --argjson docs "$docs_json" '. + {docs: $docs}') | |
| 35 | +if [ -z "$full_json" ]; then | |
| 36 | + echo "错误:构建 JSON 失败。" | |
| 37 | + exit 1 | |
| 38 | +fi | |
| 39 | + | |
| 40 | +# 创建临时文件存放请求体 | |
| 41 | +temp_file=$(mktemp) | |
| 42 | +echo "$full_json" > "$temp_file" | |
| 43 | +echo "请求体已保存到临时文件: $temp_file" | |
| 44 | + | |
| 45 | +# 函数:测试单个 URL | |
| 46 | +test_url() { | |
| 47 | + local url=$1 | |
| 48 | + local durations=() | |
| 49 | + local failed=0 | |
| 50 | + | |
| 51 | + echo "========================================" | |
| 52 | + echo "开始测试 URL: $url" | |
| 53 | + echo "重复次数: $REPEAT" | |
| 54 | + echo "----------------------------------------" | |
| 55 | + | |
| 56 | + for i in $(seq 1 $REPEAT); do | |
| 57 | + # 开始计时(纳秒) | |
| 58 | + start=$(date +%s%N) | |
| 59 | + | |
| 60 | + # 发送请求,-s 静默,-o /dev/null 丢弃响应体,--data-binary 直接读取文件 | |
| 61 | + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url" \ | |
| 62 | + -H "Content-Type: application/json" \ | |
| 63 | + --data-binary @"$temp_file") | |
| 64 | + | |
| 65 | + # 结束计时 | |
| 66 | + end=$(date +%s%N) | |
| 67 | + duration=$(( (end - start) / 1000000 )) # 毫秒 | |
| 68 | + | |
| 69 | + # 判断是否成功(HTTP 2xx 视为成功) | |
| 70 | + if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then | |
| 71 | + durations+=($duration) | |
| 72 | + echo "第 $i 次: 耗时 ${duration} ms (HTTP $http_code)" | |
| 73 | + else | |
| 74 | + echo "第 $i 次: 失败 (HTTP $http_code) 耗时 ${duration} ms" | |
| 75 | + failed=$((failed + 1)) | |
| 76 | + fi | |
| 77 | + | |
| 78 | + # 可选:每次请求间隔一点时间,避免过载(取消注释以启用) | |
| 79 | + # sleep 0.1 | |
| 80 | + done | |
| 81 | + | |
| 82 | + # 统计成功请求的耗时 | |
| 83 | + local count=${#durations[@]} | |
| 84 | + echo "----------------------------------------" | |
| 85 | + if [ $count -eq 0 ]; then | |
| 86 | + echo "所有请求均失败,无法统计耗时。" | |
| 87 | + return | |
| 88 | + fi | |
| 89 | + | |
| 90 | + # 计算总和、平均值、最小值、最大值 | |
| 91 | + local sum=0 | |
| 92 | + local min=${durations[0]} | |
| 93 | + local max=${durations[0]} | |
| 94 | + for d in "${durations[@]}"; do | |
| 95 | + sum=$((sum + d)) | |
| 96 | + (( d < min )) && min=$d | |
| 97 | + (( d > max )) && max=$d | |
| 98 | + done | |
| 99 | + local avg=$((sum / count)) | |
| 100 | + | |
| 101 | + echo "成功请求数: $count / $REPEAT" | |
| 102 | + echo "失败请求数: $failed" | |
| 103 | + echo "平均耗时: $avg ms" | |
| 104 | + echo "最小耗时: $min ms" | |
| 105 | + echo "最大耗时: $max ms" | |
| 106 | + echo "========================================" | |
| 107 | +} | |
| 108 | + | |
| 109 | +# 依次测试每个 URL | |
| 110 | +for url in "${URLS[@]}"; do | |
| 111 | + test_url "$url" | |
| 112 | +done | |
| 113 | + | |
| 114 | +# 清理临时文件 | |
| 115 | +rm -f "$temp_file" | |
| 116 | +echo "临时文件已删除。" | ... | ... |
config/config.yaml
| ... | ... | @@ -412,7 +412,9 @@ services: |
| 412 | 412 | # instruction: "Rank by query relevance, prioritize category & style" |
| 413 | 413 | # instruction: "Relevance ranking: category & style match first" |
| 414 | 414 | # instruction: "Score product relevance by query with category & style match prioritized" |
| 415 | - instruction: "Rank products by query with category & style match prioritized" | |
| 415 | + # instruction: "Rank products by query with category & style match prioritized" | |
| 416 | + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" | |
| 417 | + instruction: "rank products by given query" | |
| 416 | 418 | # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score |
| 417 | 419 | # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 |
| 418 | 420 | qwen3_vllm_score: |
| ... | ... | @@ -424,9 +426,9 @@ services: |
| 424 | 426 | # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 |
| 425 | 427 | # hf_overrides: {} |
| 426 | 428 | engine: "vllm" |
| 427 | - max_model_len: 256 | |
| 429 | + max_model_len: 224 | |
| 428 | 430 | tensor_parallel_size: 1 |
| 429 | - gpu_memory_utilization: 0.20 | |
| 431 | + gpu_memory_utilization: 0.12 | |
| 430 | 432 | dtype: "float16" |
| 431 | 433 | enable_prefix_caching: true |
| 432 | 434 | enforce_eager: false |
| ... | ... | @@ -434,7 +436,9 @@ services: |
| 434 | 436 | sort_by_doc_length: true |
| 435 | 437 | # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 |
| 436 | 438 | instruction_format: standard # compact standard |
| 439 | + # instruction: "Rank products by query with category & style match prioritized" | |
| 437 | 440 | instruction: "Rank products by query with category & style match prioritized" |
| 441 | + # instruction: "Given a shopping query, rank products by relevance" | |
| 438 | 442 | qwen3_transformers: |
| 439 | 443 | model_name: "Qwen/Qwen3-Reranker-0.6B" |
| 440 | 444 | instruction: "rank products by given query" | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -408,7 +408,7 @@ function displayResults(data) { |
| 408 | 408 | let debugHtml = ''; |
| 409 | 409 | if (debug) { |
| 410 | 410 | const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? ''); |
| 411 | - const esNorm = typeof debug.es_score_normalized === 'number' | |
| 411 | + const es_score_normalized = typeof debug.es_score_normalized === 'number' | |
| 412 | 412 | ? debug.es_score_normalized.toFixed(4) |
| 413 | 413 | : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); |
| 414 | 414 | const rerankScore = typeof debug.rerank_score === 'number' |
| ... | ... | @@ -433,7 +433,7 @@ function displayResults(data) { |
| 433 | 433 | const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; |
| 434 | 434 | const rerankInputHtml = debug.rerank_input |
| 435 | 435 | ? ` |
| 436 | - <details> | |
| 436 | + <details open> | |
| 437 | 437 | <summary>Rerank input</summary> |
| 438 | 438 | <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre> |
| 439 | 439 | </details> |
| ... | ... | @@ -441,7 +441,7 @@ function displayResults(data) { |
| 441 | 441 | : ''; |
| 442 | 442 | const styleIntentHtml = debug.style_intent_sku |
| 443 | 443 | ? ` |
| 444 | - <details> | |
| 444 | + <details open> | |
| 445 | 445 | <summary>Selected SKU</summary> |
| 446 | 446 | <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre> |
| 447 | 447 | </details> |
| ... | ... | @@ -449,7 +449,7 @@ function displayResults(data) { |
| 449 | 449 | : ''; |
| 450 | 450 | const matchedQueriesHtml = debug.matched_queries |
| 451 | 451 | ? ` |
| 452 | - <details> | |
| 452 | + <details open> | |
| 453 | 453 | <summary>matched_queries</summary> |
| 454 | 454 | <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre> |
| 455 | 455 | </details> |
| ... | ... | @@ -463,7 +463,7 @@ function displayResults(data) { |
| 463 | 463 | <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div> |
| 464 | 464 | <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div> |
| 465 | 465 | <div class="product-debug-line">ES score: ${esScore}</div> |
| 466 | - <div class="product-debug-line">ES normalized: ${esNorm}</div> | |
| 466 | + <div class="product-debug-line">ES normalized: ${es_score_normalized}</div> | |
| 467 | 467 | <div class="product-debug-line">Rerank score: ${rerankScore}</div> |
| 468 | 468 | <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div> |
| 469 | 469 | <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div> | ... | ... |
perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md
| ... | ... | @@ -89,7 +89,7 @@ Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n |
| 89 | 89 | | instruction_format | n=100 | n=200 | n=400 | n=600 | n=800 | n=1000 | vs baseline same row (approx.) | |
| 90 | 90 | |--------------------|------:|------:|------:|------:|------:|-------:|--------------------------------| |
| 91 | 91 | | `compact` | 178.5 | 351.7 | **688.2** | 1024.0 | 1375.8 | **1752.4** | e.g. n=400 **−28.8%**, n=1000 **−27.8%** vs 966.2 / 2428.4 | |
| 92 | -| `standard` | 198.4 | 386.4 | **778.8** | 1174.6 | 1548.1 | **1956.6** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 | | |
| 92 | +| `standard` | 198.4 | 386.4 | **762.83** | 1174.6 | 1540.10 | **1970.14** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 | | |
| 93 | 93 | |
| 94 | 94 | **`instruction_format: standard` 的优化点(本版):** 与 `compact` **共享**同一套 vLLM attention 自动选择;不再在 T4 上单独锁死 `TRITON_ATTN`。Prompt 仍比 `compact` 更长(固定 yes/no system + 官方前缀模板),因此 **absolute 延迟仍高于 `compact`**,但相对旧版 **standard** 行降幅与 **compact** 同量级(上表)。 |
| 95 | 95 | ... | ... |
search/rerank_client.py
| ... | ... | @@ -151,7 +151,25 @@ def _extract_named_query_score(matched_queries: Any, name: str) -> float: |
| 151 | 151 | return 1.0 if name in matched_queries else 0.0 |
| 152 | 152 | return 0.0 |
| 153 | 153 | |
| 154 | - | |
| 154 | +""" | |
| 155 | +原始变量: | |
| 156 | +ES总分 | |
| 157 | +source_score:从 ES 返回的 matched_queries 里取 base_query 这条 named query 的分(dict 用具体分数;list 形式则“匹配到名字就算 1.0”)。 | |
| 158 | +translation_score:所有名字以 base_query_trans_ 开头的 named query 的分,在 dict 里取 最大值;在 list 里只要存在这类名字就记为 1.0。 | |
| 159 | + | |
| 160 | +中间变量:计算原始query得分和翻译query得分 | |
| 161 | +weighted_source : | |
| 162 | +weighted_translation : 0.8 * translation_score | |
| 163 | + | |
| 164 | +区分主信号和辅助信号: | |
| 165 | +合成primary_text_score和support_text_score,取 更强 的那一路(原文检索 vs 翻译检索)作为主信号 | |
| 166 | +primary_text_score : max(weighted_source, weighted_translation) | |
| 167 | +support_text_score : weighted_source + weighted_translation - primary_text_score | |
| 168 | + | |
| 169 | +主信号和辅助信号的融合:dismax融合公式 | |
| 170 | +最终text_score:主信号 + 0.25 * 辅助信号 | |
| 171 | +text_score : primary_text_score + 0.25 * support_text_score | |
| 172 | +""" | |
| 155 | 173 | def _collect_text_score_components(matched_queries: Any, fallback_es_score: float) -> Dict[str, float]: |
| 156 | 174 | source_score = _extract_named_query_score(matched_queries, "base_query") |
| 157 | 175 | translation_score = 0.0 | ... | ... |