融合公式参数调整、以及展示信息优化

tangwang
1 parent b0972ff9
Showing 5 changed files with 145 additions and 42 deletions Show diff stats
bb.sh
config/config.yaml
frontend/static/js/app.js
perf_reports/reranker_vllm_instruction/2026-03-25/RESULTS.md
search/rerank_client.py
 #!/bin/bash
-start=$(date +%s%N)  # 开始时间，纳秒级
-
-time curl -X POST "http://localhost:6007/rerank" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "query": "健身女生T恤短袖",
-    "docs": [	"60 Jelly Bracelets 80 s Adult Size - MAQIHAN Neon Gummy Bracelets for Women 80s Jelly Bangles Glow Silicone Bands Jewelry Wristband Rainbow Jellies Bangle Girls Boys Colored Accessories Party Favor",
-"FITORY Mens Sandals",
-"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
-"Merrell Mens Hydro Moc",
-"FITORY Mens Sandals",
-"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
-"Merrell Mens Hydro Moc",
-
-"FITORY Mens Sandals",
-"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
-"Merrell Mens Hydro Moc",
-
-
-"FITORY Mens Sandals",
-"Lefant 3 Packs Dust Bags Replacement Kit Suitable for Lefant Base Station of M3/M3 Max Robot Vacuum",
-"Merrell Mens Hydro Moc",
-Superband Mermaid Tails for Swimming for Women and Adults Without Monofin",
-"Pink Queen Women s 2025 Casual Pullover Sweaters Sexy V Neck Long Sleeve Twist Knot Cropped Knit Sweater Tops"
-	],
-    "top_n":386,
-    "normalize": true
-  }'
-
-end=$(date +%s%N)    # 结束时间，纳秒级
-duration=$(( (end - start) / 1000000 ))  # 转换为毫秒
-echo "Command took $duration milliseconds."
  
+# 配置参数
+FILE="/home/ubuntu/rerank_test/titles.1.8w"
+LINES=400
+REPEAT=10
+URLS=(
+    "http://localhost:6007/rerank"
+    "http://172.19.27.9:9997/v1/rerank"
+)
  
+# 检查必要命令
+if ! command -v jq &> /dev/null; then
+    echo "错误：jq 未安装，请先安装 jq。"
+    exit 1
+fi
+
+# 检查文件是否存在
+if [ ! -f "$FILE" ]; then
+    echo "错误：文件 $FILE 不存在。"
+    exit 1
+fi
+
+# 读取前 LINES 行，构建 docs 数组的 JSON
+echo "正在从 $FILE 读取前 $LINES 行..."
+docs_json=$(head -n "$LINES" "$FILE" | jq -R . | jq -s .)
+if [ -z "$docs_json" ]; then
+    echo "错误：读取文件失败或文件为空。"
+    exit 1
+fi
+
+# 构建完整请求体 JSON
+base_json='{"query": "健身女生T恤短袖", "top_n": 386, "normalize": true}'
+full_json=$(echo "$base_json" | jq --argjson docs "$docs_json" '. + {docs: $docs}')
+if [ -z "$full_json" ]; then
+    echo "错误：构建 JSON 失败。"
+    exit 1
+fi
+
+# 创建临时文件存放请求体
+temp_file=$(mktemp)
+echo "$full_json" > "$temp_file"
+echo "请求体已保存到临时文件: $temp_file"
+
+# 函数：测试单个 URL
+test_url() {
+    local url=$1
+    local durations=()
+    local failed=0
+
+    echo "========================================"
+    echo "开始测试 URL: $url"
+    echo "重复次数: $REPEAT"
+    echo "----------------------------------------"
+
+    for i in $(seq 1 $REPEAT); do
+        # 开始计时（纳秒）
+        start=$(date +%s%N)
+
+        # 发送请求，-s 静默，-o /dev/null 丢弃响应体，--data-binary 直接读取文件
+        http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url" \
+            -H "Content-Type: application/json" \
+            --data-binary @"$temp_file")
+
+        # 结束计时
+        end=$(date +%s%N)
+        duration=$(( (end - start) / 1000000 ))  # 毫秒
+
+        # 判断是否成功（HTTP 2xx 视为成功）
+        if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then
+            durations+=($duration)
+            echo "第 $i 次: 耗时 ${duration} ms (HTTP $http_code)"
+        else
+            echo "第 $i 次: 失败 (HTTP $http_code) 耗时 ${duration} ms"
+            failed=$((failed + 1))
+        fi
+
+        # 可选：每次请求间隔一点时间，避免过载（取消注释以启用）
+        # sleep 0.1
+    done
+
+    # 统计成功请求的耗时
+    local count=${#durations[@]}
+    echo "----------------------------------------"
+    if [ $count -eq 0 ]; then
+        echo "所有请求均失败，无法统计耗时。"
+        return
+    fi
+
+    # 计算总和、平均值、最小值、最大值
+    local sum=0
+    local min=${durations[0]}
+    local max=${durations[0]}
+    for d in "${durations[@]}"; do
+        sum=$((sum + d))
+        (( d < min )) && min=$d
+        (( d > max )) && max=$d
+    done
+    local avg=$((sum / count))
+
+    echo "成功请求数: $count / $REPEAT"
+    echo "失败请求数: $failed"
+    echo "平均耗时: $avg ms"
+    echo "最小耗时: $min ms"
+    echo "最大耗时: $max ms"
+    echo "========================================"
+}
+
+# 依次测试每个 URL
+for url in "${URLS[@]}"; do
+    test_url "$url"
+done
+
+# 清理临时文件
+rm -f "$temp_file"
+echo "临时文件已删除。"
@@ -412,7 +412,9 @@ services:
         # instruction: "Rank by query relevance, prioritize category & style"
         # instruction: "Relevance ranking: category & style match first"
         # instruction: "Score product relevance by query with category & style match prioritized"
-        instruction: "Rank products by query with category & style match prioritized"
+        # instruction: "Rank products by query with category & style match prioritized"
+        # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
+        instruction: "rank products by given query"
       # vLLM LLM.score()（跨编码打分）。独立高性能环境 .venv-reranker-score（vllm 0.18 固定版）：./scripts/setup_reranker_venv.sh qwen3_vllm_score
       # 与 qwen3_vllm 可共用同一 model_name / HF 缓存；venv 分离以便升级 vLLM 而不影响 generate 后端。
       qwen3_vllm_score:
@@ -424,9 +426,9 @@ services:
         # 可选：在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
         # hf_overrides: {}
         engine: "vllm"
-        max_model_len: 256
+        max_model_len: 224
         tensor_parallel_size: 1
-        gpu_memory_utilization: 0.20
+        gpu_memory_utilization: 0.12
         dtype: "float16"
         enable_prefix_caching: true
         enforce_eager: false
@@ -434,7 +436,9 @@ services:
         sort_by_doc_length: true
         # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
         instruction_format: standard # compact standard
+        # instruction: "Rank products by query with category & style match prioritized"
         instruction: "Rank products by query with category & style match prioritized"
+        # instruction: "Given a shopping query, rank products by relevance"
       qwen3_transformers:
         model_name: "Qwen/Qwen3-Reranker-0.6B"
         instruction: "rank products by given query"
@@ -408,7 +408,7 @@ function displayResults(data) {
         let debugHtml = '';
         if (debug) {
             const esScore = typeof debug.es_score === 'number' ? debug.es_score.toFixed(4) : String(debug.es_score ?? '');
-            const esNorm = typeof debug.es_score_normalized === 'number'
+            const es_score_normalized = typeof debug.es_score_normalized === 'number'
                 ? debug.es_score_normalized.toFixed(4)
                 : (debug.es_score_normalized == null ? '' : String(debug.es_score_normalized));
             const rerankScore = typeof debug.rerank_score === 'number'
@@ -433,7 +433,7 @@ function displayResults(data) {
             const rawUrl = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
             const rerankInputHtml = debug.rerank_input
                 ? `
-                    <details>
+                    <details open>
                         <summary>Rerank input</summary>
                         <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.rerank_input))}</pre>
                     </details>
@@ -441,7 +441,7 @@ function displayResults(data) {
                 : '';
             const styleIntentHtml = debug.style_intent_sku
                 ? `
-                    <details>
+                    <details open>
                         <summary>Selected SKU</summary>
                         <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.style_intent_sku))}</pre>
                     </details>
@@ -449,7 +449,7 @@ function displayResults(data) {
                 : '';
             const matchedQueriesHtml = debug.matched_queries
                 ? `
-                    <details>
+                    <details open>
                         <summary>matched_queries</summary>
                         <pre style="background: #f5f5f5; padding: 10px; overflow: auto; max-height: 220px;">${escapeHtml(customStringify(debug.matched_queries))}</pre>
                     </details>
@@ -463,7 +463,7 @@ function displayResults(data) {
                     <div class="product-debug-line">Position before rerank: ${escapeHtml(String(debug.initial_rank ?? ''))}</div>
                     <div class="product-debug-line">Position after rerank: ${escapeHtml(String(debug.final_rank ?? ''))}</div>
                     <div class="product-debug-line">ES score: ${esScore}</div>
-                    <div class="product-debug-line">ES normalized: ${esNorm}</div>
+                    <div class="product-debug-line">ES normalized: ${es_score_normalized}</div>
                     <div class="product-debug-line">Rerank score: ${rerankScore}</div>
                     <div class="product-debug-line">rerank_factor: ${escapeHtml(String(debug.rerank_factor ?? ''))}</div>
                     <div class="product-debug-line">text_score: ${escapeHtml(String(debug.text_score ?? ''))}</div>
@@ -89,7 +89,7 @@ Micro-benchmark (same machine, isolated): **~927.5 ms → ~673.1 ms** at **n
 | instruction_format | n=100 | n=200 | n=400 | n=600 | n=800 | n=1000 | vs baseline same row (approx.) |
 |--------------------|------:|------:|------:|------:|------:|-------:|--------------------------------|
 | `compact` | 178.5 | 351.7 | **688.2** | 1024.0 | 1375.8 | **1752.4** | e.g. n=400 **−28.8%**, n=1000 **−27.8%** vs 966.2 / 2428.4 |
-| `standard` | 198.4 | 386.4 | **778.8** | 1174.6 | 1548.1 | **1956.6** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 |
+| `standard` | 198.4 | 386.4 | **762.83** | 1174.6 | 1540.10 | **1970.14** | e.g. n=400 **−33.9%**, n=1000 **−33.3%** vs 1178.9 / 2931.7 |
  
 **`instruction_format: standard` 的优化点（本版）：** 与 `compact` **共享**同一套 vLLM attention 自动选择；不再在 T4 上单独锁死 `TRITON_ATTN`。Prompt 仍比 `compact` 更长（固定 yes/no system + 官方前缀模板），因此 **absolute 延迟仍高于 `compact`**，但相对旧版 **standard** 行降幅与 **compact** 同量级（上表）。
  
@@ -151,7 +151,25 @@ def _extract_named_query_score(matched_queries: Any, name: str) -&gt; float:
         return 1.0 if name in matched_queries else 0.0
     return 0.0
  
-
+"""
+原始变量：
+ES总分
+source_score：从 ES 返回的 matched_queries 里取 base_query 这条 named query 的分（dict 用具体分数；list 形式则“匹配到名字就算 1.0”）。
+translation_score：所有名字以 base_query_trans_ 开头的 named query 的分，在 dict 里取 最大值；在 list 里只要存在这类名字就记为 1.0。
+
+中间变量：计算原始query得分和翻译query得分
+weighted_source :
+weighted_translation : 0.8 * translation_score
+
+区分主信号和辅助信号：
+合成primary_text_score和support_text_score，取 更强 的那一路（原文检索 vs 翻译检索）作为主信号
+primary_text_score : max(weighted_source, weighted_translation)
+support_text_score : weighted_source + weighted_translation - primary_text_score
+
+主信号和辅助信号的融合：dismax融合公式
+最终text_score：主信号 + 0.25 * 辅助信号
+text_score : primary_text_score + 0.25 * support_text_score
+"""
 def _collect_text_score_components(matched_queries: Any, fallback_es_score: float) -> Dict[str, float]:
     source_score = _extract_named_query_score(matched_queries, "base_query")
     translation_score = 0.0