diff --git a/bb.sh b/bb.sh index 023590d..6eba372 100644 --- a/bb.sh +++ b/bb.sh @@ -1,35 +1,116 @@ #!/bin/bash -start=$(date +%s%N) # 开始时间,纳秒级 - -time curl -X POST "http://localhost:6007/rerank" \ - -H "Content-Type: application/json" \ - -d '{ - "query": "健身女生T恤短袖", - "docs": [ "60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor", -"FITORY Mens Sandals", -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", -"Merrell Mens Hydro Moc", -"FITORY Mens Sandals", -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", -"Merrell Mens Hydro Moc", - -"FITORY Mens Sandals", -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", -"Merrell Mens Hydro Moc", - - -"FITORY Mens Sandals", -"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum", -"Merrell Mens Hydro Moc", -Superband Mermaid Tails for Swimming for Women and Adults Without Monofin", -"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops" - ], - "top_n":386, - "normalize": true - }' - -end=$(date +%s%N) # 结束时间,纳秒级 -duration=$(( (end - start) / 1000000 )) # 转换为毫秒 -echo "Command took $duration milliseconds." +# 配置参数 +FILE="/home/ubuntu/rerank_test/titles.1.8w" +LINES=400 +REPEAT=10 +URLS=( + "http://localhost:6007/rerank" + "http://172.19.27.9:9997/v1/rerank" +) +# 检查必要命令 +if ! command -v jq &> /dev/null; then + echo "错误:jq 未安装,请先安装 jq。" + exit 1 +fi + +# 检查文件是否存在 +if [ ! -f "$FILE" ]; then + echo "错误:文件 $FILE 不存在。" + exit 1 +fi + +# 读取前 LINES 行,构建 docs 数组的 JSON +echo "正在从 $FILE 读取前 $LINES 行..." +docs_json=$(head -n "$LINES" "$FILE" | jq -R . | jq -s .) +if [ -z "$docs_json" ]; then + echo "错误:读取文件失败或文件为空。" + exit 1 +fi + +# 构建完整请求体 JSON +base_json='{"query": "健身女生T恤短袖", "top_n": 386, "normalize": true}' +full_json=$(echo "$base_json" | jq --argjson docs "$docs_json" '. + {docs: $docs}') +if [ -z "$full_json" ]; then + echo "错误:构建 JSON 失败。" + exit 1 +fi + +# 创建临时文件存放请求体 +temp_file=$(mktemp) +echo "$full_json" > "$temp_file" +echo "请求体已保存到临时文件: $temp_file" + +# 函数:测试单个 URL +test_url() { + local url=$1 + local durations=() + local failed=0 + + echo "========================================" + echo "开始测试 URL: $url" + echo "重复次数: $REPEAT" + echo "----------------------------------------" + + for i in $(seq 1 $REPEAT); do + # 开始计时(纳秒) + start=$(date +%s%N) + + # 发送请求,-s 静默,-o /dev/null 丢弃响应体,--data-binary 直接读取文件 + http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url" \ + -H "Content-Type: application/json" \ + --data-binary @"$temp_file") + + # 结束计时 + end=$(date +%s%N) + duration=$(( (end - start) / 1000000 )) # 毫秒 + + # 判断是否成功(HTTP 2xx 视为成功) + if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then + durations+=($duration) + echo "第 $i 次: 耗时 ${duration} ms (HTTP $http_code)" + else + echo "第 $i 次: 失败 (HTTP $http_code) 耗时 ${duration} ms" + failed=$((failed + 1)) + fi + + # 可选:每次请求间隔一点时间,避免过载(取消注释以启用) + # sleep 0.1 + done + + # 统计成功请求的耗时 + local count=${#durations[@]} + echo "----------------------------------------" + if [ $count -eq 0 ]; then + echo "所有请求均失败,无法统计耗时。" + return + fi + + # 计算总和、平均值、最小值、最大值 + local sum=0 + local min=${durations[0]} + local max=${durations[0]} + for d in "${durations[@]}"; do + sum=$((sum + d)) + (( d < min )) && min=$d + (( d > max )) && max=$d + done + local avg=$((sum / count)) + + echo "成功请求数: $count / $REPEAT" + echo "失败请求数: $failed" + echo "平均耗时: $avg ms" + echo "最小耗时: $min ms" + echo "最大耗时: $max ms" + echo "========================================" +} + +# 依次测试每个 URL +for url in "${URLS[@]}"; do + test_url "$url" +done + +# 清理临时文件 +rm -f "$temp_file" +echo "临时文件已删除。" diff --git a/config/config.yaml b/config/config.yaml index 1847d98..50ad9dc 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -412,7 +412,9 @@ services: # instruction: "Rank by query relevance, prioritize category & style" # instruction: "Relevance ranking: category & style match first" # instruction: "Score product relevance by query with category & style match prioritized" - instruction: "Rank products by query with category & style match prioritized" + # instruction: "Rank products by query with category & style match prioritized" + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" + instruction: "rank products by given query" # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 qwen3_vllm_score: @@ -424,9 +426,9 @@ services: # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 # hf_overrides: {} engine: "vllm" - max_model_len: 256 + max_model_len: 224 tensor_parallel_size: 1 - gpu_memory_utilization: 0.20 + gpu_memory_utilization: 0.12 dtype: "float16" enable_prefix_caching: true enforce_eager: false @@ -434,7 +436,9 @@ services: sort_by_doc_length: true # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 instruction_format: standard # compact standard + # instruction: "Rank products by query with category & style match prioritized" instruction: "Rank products by query with category & style match prioritized" + # instruction: "Given a shopping query, rank products by relevance" qwen3_transformers: model_name: "Qwen/Qwen3-Reranker-0.6B" instruction: "rank products by given query" diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index 23363ce..1e04e45 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -408,7 +408,7 @@ function displayResults(data) { let debugHtml = ''; if (debug) { const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? ''); - const esNorm = typeof debug.es_score_normalized === 'number' + const es_score_normalized = typeof debug.es_score_normalized === 'number' ? debug.es_score_normalized.toFixed(4) : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized)); const rerankScore = typeof debug.rerank_score === 'number' @@ -433,7 +433,7 @@ function displayResults(data) { const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; const rerankInputHtml = debug.rerank_input ? ` -
+
Rerank input
${escapeHtml(customStringify(debug.rerank_input))}
@@ -441,7 +441,7 @@ function displayResults(data) { : ''; const styleIntentHtml = debug.style_intent_sku ? ` -
+
Selected SKU
${escapeHtml(customStringify(debug.style_intent_sku))}
@@ -449,7 +449,7 @@ function displayResults(data) { : ''; const matchedQueriesHtml = debug.matched_queries ? ` -
+
matched_queries
${escapeHtml(customStringify(debug.matched_queries))}
@@ -463,7 +463,7 @@ function displayResults(data) {
Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}
Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}
ES score: ${esScore}
-
ES normalized: ${esNorm}
+
ES normalized: ${es_score_normalized}
Rerank score: ${rerankScore}
rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}
text_score: ${escapeHtml(String(debug.text_score ?? ''))}
diff --git a/perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md b/perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md index 4b6767c..71a832f 100644 --- a/perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md +++ b/perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md @@ -89,7 +89,7 @@ Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n | instruction_format | n=100 | n=200 | n=400 | n=600 | n=800 | n=1000 | vs baseline same row (approx.) | |--------------------|------:|------:|------:|------:|------:|-------:|--------------------------------| | `compact` | 178.5 | 351.7 | **688.2** | 1024.0 | 1375.8 | **1752.4** | e.g. n=400 **−28.8%**, n=1000 **−27.8%** vs 966.2 / 2428.4 | -| `standard` | 198.4 | 386.4 | **778.8** | 1174.6 | 1548.1 | **1956.6** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 | +| `standard` | 198.4 | 386.4 | **762.83** | 1174.6 | 1540.10 | **1970.14** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 | **`instruction_format: standard` 的优化点(本版):** 与 `compact` **共享**同一套 vLLM attention 自动选择;不再在 T4 上单独锁死 `TRITON_ATTN`。Prompt 仍比 `compact` 更长(固定 yes/no system + 官方前缀模板),因此 **absolute 延迟仍高于 `compact`**,但相对旧版 **standard** 行降幅与 **compact** 同量级(上表)。 diff --git a/search/rerank_client.py b/search/rerank_client.py index 09f410d..f8433d9 100644 --- a/search/rerank_client.py +++ b/search/rerank_client.py @@ -151,7 +151,25 @@ def _extract_named_query_score(matched_queries: Any, name: str) -> float: return 1.0 if name in matched_queries else 0.0 return 0.0 - +""" +原始变量: +ES总分 +source_score:从 ES 返回的 matched_queries 里取 base_query 这条 named query 的分(dict 用具体分数;list 形式则“匹配到名字就算 1.0”)。 +translation_score:所有名字以 base_query_trans_ 开头的 named query 的分,在 dict 里取 最大值;在 list 里只要存在这类名字就记为 1.0。 + +中间变量:计算原始query得分和翻译query得分 +weighted_source : +weighted_translation : 0.8 * translation_score + +区分主信号和辅助信号: +合成primary_text_score和support_text_score,取 更强 的那一路(原文检索 vs 翻译检索)作为主信号 +primary_text_score : max(weighted_source, weighted_translation) +support_text_score : weighted_source + weighted_translation - primary_text_score + +主信号和辅助信号的融合:dismax融合公式 +最终text_score:主信号 + 0.25 * 辅助信号 +text_score : primary_text_score + 0.25 * support_text_score +""" def _collect_text_score_components(matched_queries: Any, fallback_es_score: float) -> Dict[str, float]: source_score = _extract_named_query_score(matched_queries, "base_query") translation_score = 0.0 -- libgit2 0.21.2