add rerank-cloud-perf-study

tangwang
1 parent 0d3e73ba
Showing 2 changed files with 741 additions and 0 deletions Show diff stats
reranker/rerank-cloud-perf-study/rerank_dashscope_perf.py
reranker/rerank-cloud-perf-study/rerank_dashscope_perf_usage.md
@@ -0,0 +1,472 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import math
+import random
+import statistics
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import httpx
+
+
+@dataclass
+class RequestTemplate:
+    method: str
+    url: str
+    json_body: Optional[Dict[str, Any]] = None
+    headers: Optional[Dict[str, str]] = None
+
+
+def percentile(sorted_values: List[float], p: float) -> float:
+    if not sorted_values:
+        return 0.0
+    if p <= 0:
+        return sorted_values[0]
+    if p >= 100:
+        return sorted_values[-1]
+    rank = (len(sorted_values) - 1) * (p / 100.0)
+    low = int(math.floor(rank))
+    high = int(math.ceil(rank))
+    if low == high:
+        return sorted_values[low]
+    weight = rank - low
+    return sorted_values[low] * (1.0 - weight) + sorted_values[high] * weight
+
+
+def parse_csv_items(raw: str) -> List[str]:
+    return [x.strip() for x in str(raw or "").split(",") if x.strip()]
+
+
+def parse_csv_ints(raw: str) -> List[int]:
+    values: List[int] = []
+    seen = set()
+    for item in parse_csv_items(raw):
+        try:
+            value = int(item)
+        except ValueError as exc:
+            raise ValueError(f"Invalid integer in CSV list: {item}") from exc
+        if value <= 0:
+            raise ValueError(f"Concurrency must be > 0, got {value}")
+        if value in seen:
+            continue
+        seen.add(value)
+        values.append(value)
+    return values
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="DashScope /compatible-api/v1/reranks perf test")
+    parser.add_argument("--duration", type=int, default=20, help="Duration seconds per concurrency; <=0 means no duration cap")
+    parser.add_argument("--concurrency", type=int, default=1, help="Default concurrency if --concurrency-list is not set")
+    parser.add_argument(
+        "--concurrency-list",
+        type=str,
+        default="1,5,10,20",
+        help="Comma-separated concurrency list (e.g. 1,5,10,20). If set, overrides --concurrency.",
+    )
+    parser.add_argument("--max-requests", type=int, default=0, help="Stop after N requests per concurrency (0 means unlimited)")
+    parser.add_argument("--timeout", type=float, default=90.0, help="Request timeout seconds")
+    parser.add_argument("--max-errors", type=int, default=0, help="Stop current run when accumulated errors reach this value")
+    parser.add_argument(
+        "--base-url",
+        type=str,
+        default="https://dashscope.aliyuncs.com/compatible-api/v1",
+        help="Base URL for DashScope compatible API",
+    )
+    parser.add_argument("--api-key", type=str, default="", help="DashScope API key; if omitted, read from DASHSCOPE_API_KEY env")
+    parser.add_argument("--output", type=str, default="", help="Optional output JSON path")
+    parser.add_argument("--pause", type=float, default=0.0, help="Pause seconds between concurrency runs")
+
+    parser.add_argument("--model", type=str, default="qwen3-rerank", help="Rerank model name")
+    parser.add_argument("--rerank-dynamic-docs", action="store_true", help="Generate documents payload dynamically on every request")
+    parser.add_argument("--rerank-doc-count", type=int, default=386, help="Document count per rerank request")
+    parser.add_argument("--rerank-vocab-size", type=int, default=1000, help="Word pool size for synthetic document generation")
+    parser.add_argument("--rerank-sentence-min-words", type=int, default=15, help="Minimum words per generated document")
+    parser.add_argument("--rerank-sentence-max-words", type=int, default=40, help="Maximum words per generated document")
+    parser.add_argument("--rerank-query", type=str, default="wireless mouse", help="Fixed query used for rerank dynamic docs mode")
+    parser.add_argument("--rerank-seed", type=int, default=20260312, help="Base random seed for dynamic docs mode")
+    parser.add_argument("--rerank-top-n", type=int, default=386, help="top_n for rerank requests; 0 means omit")
+    parser.add_argument(
+        "--rerank-instruct",
+        type=str,
+        default="Given a web search query, retrieve relevant passages that answer the query.",
+        help="Instruct field for DashScope rerank",
+    )
+    return parser.parse_args()
+
+
+def build_word_pool(vocab_size: int) -> List[str]:
+    syllables = [
+        "al", "an", "ar", "as", "at", "ba", "be", "bi", "bo", "ca",
+        "ce", "ci", "co", "da", "de", "di", "do", "el", "en", "er",
+        "fa", "fe", "fi", "fo", "ga", "ge", "gi", "go", "ha", "he",
+        "hi", "ho", "ia", "ie", "il", "in", "io", "is", "ka", "ke",
+        "ki", "ko", "la", "le", "li", "lo", "ma", "me", "mi", "mo",
+    ]
+    word_pool: List[str] = []
+    for a in syllables:
+        for b in syllables:
+            word_pool.append(f"{a}{b}")
+            if len(word_pool) >= vocab_size:
+                return word_pool
+    raise ValueError(f"Unable to generate enough synthetic words: requested={vocab_size}, got={len(word_pool)}")
+
+
+def build_rerank_dynamic_cfg(args: argparse.Namespace) -> Dict[str, Any]:
+    min_words = int(args.rerank_sentence_min_words)
+    max_words = int(args.rerank_sentence_max_words)
+    doc_count = int(args.rerank_doc_count)
+    vocab_size = int(args.rerank_vocab_size)
+    if doc_count <= 0:
+        raise ValueError(f"rerank-doc-count must be > 0, got {doc_count}")
+    if vocab_size <= 0:
+        raise ValueError(f"rerank-vocab-size must be > 0, got {vocab_size}")
+    if min_words <= 0:
+        raise ValueError(f"rerank-sentence-min-words must be > 0, got {min_words}")
+    if max_words < min_words:
+        raise ValueError(
+            f"rerank-sentence-max-words must be >= rerank-sentence-min-words, got {max_words} < {min_words}"
+        )
+    if args.rerank_seed < 0:
+        raise ValueError(f"rerank-seed must be >= 0, got {args.rerank_seed}")
+    if int(args.rerank_top_n) < 0:
+        raise ValueError(f"rerank-top-n must be >= 0, got {args.rerank_top_n}")
+
+    return {
+        "model": args.model,
+        "query": args.rerank_query,
+        "doc_count": doc_count,
+        "min_words": min_words,
+        "max_words": max_words,
+        "seed": int(args.rerank_seed),
+        "top_n": int(args.rerank_top_n),
+        "instruct": args.rerank_instruct,
+        "word_pool": build_word_pool(vocab_size),
+    }
+
+
+def build_random_rerank_payload(cfg: Dict[str, Any], rng: random.Random) -> Dict[str, Any]:
+    word_pool: List[str] = cfg["word_pool"]
+    documents: List[str] = []
+    for _ in range(cfg["doc_count"]):
+        doc_len = rng.randint(cfg["min_words"], cfg["max_words"])
+        documents.append(" ".join(rng.choices(word_pool, k=doc_len)))
+
+    payload = {
+        "model": cfg["model"],
+        "documents": documents,
+        "query": cfg["query"],
+        "instruct": cfg["instruct"],
+    }
+    if int(cfg.get("top_n", 0)) > 0:
+        payload["top_n"] = int(cfg["top_n"])
+    return payload
+
+
+def build_static_template(base_url: str, api_key: str, args: argparse.Namespace) -> RequestTemplate:
+    payload: Dict[str, Any] = {
+        "model": args.model,
+        "documents": [
+            "文本排序模型广泛用于搜索引擎和推荐系统中，它们根据文本相关性对候选文本进行排序",
+            "量子计算是计算科学的一个前沿领域",
+            "预训练语言模型的发展给文本排序模型带来了新的进展",
+        ],
+        "query": "什么是文本排序模型",
+        "instruct": args.rerank_instruct,
+    }
+    if int(args.rerank_top_n) > 0:
+        payload["top_n"] = int(args.rerank_top_n)
+
+    return RequestTemplate(
+        method="POST",
+        url=f"{base_url.rstrip('/')}/reranks",
+        json_body=payload,
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        },
+    )
+
+
+def validate_response_payload(payload: Any) -> tuple[bool, str]:
+    if not isinstance(payload, dict):
+        return False, "invalid_payload_non_dict"
+    if "results" not in payload:
+        return False, "invalid_payload_missing_results"
+    if not isinstance(payload["results"], list):
+        return False, "invalid_payload_results_non_list"
+    return True, ""
+
+
+async def run_single_concurrency(
+    template: RequestTemplate,
+    duration_sec: int,
+    concurrency: int,
+    max_requests: int,
+    max_errors: int,
+    timeout_sec: float,
+    rerank_dynamic_cfg: Optional[Dict[str, Any]],
+) -> Dict[str, Any]:
+    latencies: List[float] = []
+    status_counter: Dict[int, int] = {}
+    err_counter: Dict[str, int] = {}
+    total_requests = 0
+    success_requests = 0
+    stop_flag = False
+    lock = asyncio.Lock()
+    start = time.perf_counter()
+
+    timeout = httpx.Timeout(timeout=timeout_sec)
+    limits = httpx.Limits(max_connections=max(concurrency * 2, 20), max_keepalive_connections=max(concurrency, 10))
+
+    async def worker(worker_id: int, client: httpx.AsyncClient) -> None:
+        nonlocal total_requests, success_requests, stop_flag
+        worker_rng: Optional[random.Random] = None
+        if rerank_dynamic_cfg is not None:
+            worker_rng = random.Random(int(rerank_dynamic_cfg["seed"]) + worker_id)
+
+        while not stop_flag:
+            elapsed = time.perf_counter() - start
+            if duration_sec > 0 and elapsed >= duration_sec:
+                break
+
+            async with lock:
+                if max_requests > 0 and total_requests >= max_requests:
+                    stop_flag = True
+                    break
+                total_requests += 1
+
+            payload = template.json_body
+            if rerank_dynamic_cfg is not None and worker_rng is not None:
+                payload = build_random_rerank_payload(rerank_dynamic_cfg, worker_rng)
+
+            t0 = time.perf_counter()
+            ok = False
+            status = 0
+            err = ""
+            try:
+                resp = await client.request(
+                    method=template.method,
+                    url=template.url,
+                    headers=template.headers,
+                    json=payload,
+                )
+                status = int(resp.status_code)
+                ok = 200 <= status < 300
+                if ok:
+                    try:
+                        body = resp.json()
+                    except Exception:
+                        ok = False
+                        err = "invalid_json_response"
+                    else:
+                        valid, reason = validate_response_payload(body)
+                        if not valid:
+                            ok = False
+                            err = reason or "invalid_payload"
+                if not ok and not err:
+                    err = f"http_{status}"
+            except Exception as e:
+                err = type(e).__name__
+
+            latency_ms = (time.perf_counter() - t0) * 1000.0
+            async with lock:
+                latencies.append(latency_ms)
+                if status:
+                    status_counter[status] = status_counter.get(status, 0) + 1
+                if ok:
+                    success_requests += 1
+                else:
+                    err_counter[err or "unknown"] = err_counter.get(err or "unknown", 0) + 1
+                    if max_errors > 0 and sum(err_counter.values()) >= max_errors:
+                        stop_flag = True
+
+    async with httpx.AsyncClient(timeout=timeout, limits=limits) as client:
+        tasks = [asyncio.create_task(worker(i, client)) for i in range(concurrency)]
+        await asyncio.gather(*tasks)
+
+    elapsed = max(time.perf_counter() - start, 1e-9)
+    lat_sorted = sorted(latencies)
+    result = {
+        "scenario": "rerank_dashscope",
+        "concurrency": concurrency,
+        "duration_sec": round(elapsed, 3),
+        "total_requests": total_requests,
+        "success_requests": success_requests,
+        "failed_requests": max(total_requests - success_requests, 0),
+        "success_rate": round((success_requests / total_requests) * 100.0, 2) if total_requests else 0.0,
+        "throughput_rps": round(total_requests / elapsed, 2),
+        "latency_ms": {
+            "avg": round(statistics.mean(lat_sorted), 2) if lat_sorted else 0.0,
+            "p50": round(percentile(lat_sorted, 50), 2),
+            "p90": round(percentile(lat_sorted, 90), 2),
+            "p95": round(percentile(lat_sorted, 95), 2),
+            "p99": round(percentile(lat_sorted, 99), 2),
+            "max": round(max(lat_sorted), 2) if lat_sorted else 0.0,
+        },
+        "status_codes": dict(sorted(status_counter.items(), key=lambda x: x[0])),
+        "errors": dict(sorted(err_counter.items(), key=lambda x: x[0])),
+    }
+    return result
+
+
+def format_summary(result: Dict[str, Any]) -> str:
+    lat = result["latency_ms"]
+    lines = [
+        f"
+=== Scenario: {result['scenario']} @ concurrency={result['concurrency']} ===",
+        "requests={total_requests} success={success_requests} fail={failed_requests} success_rate={success_rate}% rps={throughput_rps}".format(**result),
+        f"latency(ms): avg={lat['avg']} p50={lat['p50']} p90={lat['p90']} p95={lat['p95']} p99={lat['p99']} max={lat['max']}",
+        f"status_codes: {result['status_codes']}",
+    ]
+    if result["errors"]:
+        lines.append(f"errors: {result['errors']}")
+    return "
+".join(lines)
+
+
+def aggregate_results(results: List[Dict[str, Any]]) -> Dict[str, Any]:
+    if not results:
+        return {}
+    total_requests = sum(x["total_requests"] for x in results)
+    success_requests = sum(x["success_requests"] for x in results)
+    failed_requests = sum(x["failed_requests"] for x in results)
+    total_duration = sum(x["duration_sec"] for x in results)
+    weighted_avg_latency = 0.0
+    if total_requests > 0:
+        weighted_avg_latency = sum(x["latency_ms"]["avg"] * x["total_requests"] for x in results) / total_requests
+
+    return {
+        "scenario": "ALL",
+        "total_requests": total_requests,
+        "success_requests": success_requests,
+        "failed_requests": failed_requests,
+        "success_rate": round((success_requests / total_requests) * 100.0, 2) if total_requests else 0.0,
+        "aggregate_rps": round(total_requests / max(total_duration, 1e-9), 2),
+        "weighted_avg_latency_ms": round(weighted_avg_latency, 2),
+    }
+
+
+async def main_async() -> int:
+    import os
+
+    args = parse_args()
+    api_key = (args.api_key or os.getenv("DASHSCOPE_API_KEY") or "").strip()
+    if not api_key:
+        print("Missing API key. Set --api-key or DASHSCOPE_API_KEY.")
+        return 2
+
+    try:
+        concurrency_values = parse_csv_ints(args.concurrency_list) if args.concurrency_list else [args.concurrency]
+    except ValueError as exc:
+        print(str(exc))
+        return 2
+    if not concurrency_values:
+        print("concurrency-list is empty after parsing.")
+        return 2
+
+    try:
+        rerank_dynamic_cfg = build_rerank_dynamic_cfg(args) if args.rerank_dynamic_docs else None
+    except ValueError as exc:
+        print(str(exc))
+        return 2
+
+    template = build_static_template(args.base_url, api_key, args)
+
+    print("Load test config:")
+    print("  scenario=rerank_dashscope")
+    print(f"  duration={args.duration}s")
+    print(f"  concurrency={args.concurrency}")
+    print(f"  concurrency_list={concurrency_values}")
+    print(f"  max_requests={args.max_requests}")
+    print(f"  timeout={args.timeout}s")
+    print(f"  max_errors={args.max_errors}")
+    print(f"  base_url={args.base_url}")
+    print(f"  model={args.model}")
+    print(f"  rerank_dynamic_docs={args.rerank_dynamic_docs}")
+    if args.rerank_dynamic_docs:
+        print(f"  rerank_doc_count={args.rerank_doc_count}")
+        print(f"  rerank_vocab_size={args.rerank_vocab_size}")
+        print(f"  rerank_sentence_words=[{args.rerank_sentence_min_words},{args.rerank_sentence_max_words}]")
+        print(f"  rerank_query={args.rerank_query}")
+        print(f"  rerank_seed={args.rerank_seed}")
+        print(f"  rerank_top_n={args.rerank_top_n}")
+        print(f"  rerank_instruct={args.rerank_instruct}")
+    else:
+        print("  static_request_payload=demo_payload")
+
+    results: List[Dict[str, Any]] = []
+    total_jobs = len(concurrency_values)
+    for idx, c in enumerate(concurrency_values, start=1):
+        print(f"
+[{idx}/{total_jobs}] running rerank_dashscope @ concurrency={c} ...")
+        result = await run_single_concurrency(
+            template=template,
+            duration_sec=args.duration,
+            concurrency=c,
+            max_requests=args.max_requests,
+            max_errors=args.max_errors,
+            timeout_sec=args.timeout,
+            rerank_dynamic_cfg=rerank_dynamic_cfg,
+        )
+        print(format_summary(result))
+        results.append(result)
+        if args.pause > 0 and idx < total_jobs:
+            await asyncio.sleep(args.pause)
+
+    final = {
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+        "config": {
+            "scenario": "rerank_dashscope",
+            "duration_sec": args.duration,
+            "concurrency": args.concurrency,
+            "concurrency_list": concurrency_values,
+            "max_requests": args.max_requests,
+            "timeout_sec": args.timeout,
+            "max_errors": args.max_errors,
+            "base_url": args.base_url,
+            "model": args.model,
+            "output": args.output or None,
+            "rerank_dynamic_docs": args.rerank_dynamic_docs,
+            "rerank_doc_count": args.rerank_doc_count,
+            "rerank_vocab_size": args.rerank_vocab_size,
+            "rerank_sentence_min_words": args.rerank_sentence_min_words,
+            "rerank_sentence_max_words": args.rerank_sentence_max_words,
+            "rerank_query": args.rerank_query,
+            "rerank_seed": args.rerank_seed,
+            "rerank_top_n": args.rerank_top_n,
+            "rerank_instruct": args.rerank_instruct,
+        },
+        "results": results,
+        "overall": aggregate_results(results),
+    }
+
+    print("
+=== Overall ===")
+    print(json.dumps(final["overall"], ensure_ascii=False, indent=2))
+
+    if args.output:
+        out_path = Path(args.output)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(json.dumps(final, ensure_ascii=False, indent=2), encoding="utf-8")
+        print(f"Saved JSON report: {out_path}")
+
+    return 0
+
+
+def main() -> int:
+    try:
+        return asyncio.run(main_async())
+    except KeyboardInterrupt:
+        print("Interrupted by user")
+        return 130
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,269 @@
+下面是一份**简洁但完整的使用说明**，适合直接放在 README 或脚本注释里。
+
+---
+
+# rerank_dashscope_perf.py 使用说明
+
+该脚本用于对 **DashScope `qwen3-rerank` 接口**进行并发性能测试，
+测试接口：
+
+```
+POST https://dashscope.aliyuncs.com/compatible-api/v1/reranks
+```
+
+脚本支持：
+
+* 并发压测
+* 固定或动态生成 documents
+* 自定义 doc 数量（例如 386）
+* 输出详细 latency / RPS 统计
+* 多并发梯度测试（如 1,5,10,20）
+
+---
+
+# 一、环境准备
+
+### 1 安装依赖
+
+脚本依赖 `httpx`：
+
+```bash
+pip install httpx
+```
+
+如果使用虚拟环境：
+
+```bash
+.venv/bin/pip install httpx
+```
+
+---
+
+### 2 设置 DashScope API Key
+
+```bash
+export DASHSCOPE_API_KEY=你的key
+```
+
+也可以通过参数指定：
+
+```bash
+--api-key xxx
+```
+
+---
+
+# 二、基本用法
+
+最常见的压测方式：
+
+```bash
+python rerank_dashscope_perf.py \
+  --duration 20 \
+  --concurrency-list 1,5,10,20 \
+  --timeout 90 \
+  --rerank-dynamic-docs \
+  --rerank-doc-count 386 \
+  --rerank-vocab-size 1000 \
+  --rerank-sentence-min-words 15 \
+  --rerank-sentence-max-words 40 \
+  --rerank-query "wireless mouse" \
+  --rerank-seed 20260312 \
+  --rerank-top-n 386 \
+  --output perf_result.json
+```
+
+含义：
+
+| 参数                  | 说明            |
+| ------------------- | ------------- |
+| duration            | 每个并发测试持续时间（秒） |
+| concurrency-list    | 并发列表          |
+| timeout             | 单请求超时时间       |
+| rerank-dynamic-docs | 启用动态 doc 生成   |
+| rerank-doc-count    | 每个请求 doc 数量   |
+| rerank-top-n        | 返回 top_n      |
+| output              | 保存结果 JSON     |
+
+---
+
+# 三、测试模式
+
+脚本有两种请求模式：
+
+---
+
+# 1 静态请求模式（默认）
+
+如果**不使用 `--rerank-dynamic-docs`**，请求 payload 固定为：
+
+```json
+{
+  "model": "qwen3-rerank",
+  "documents": [
+    "文本排序模型广泛用于搜索引擎和推荐系统中，它们根据文本相关性对候选文本进行排序",
+    "量子计算是计算科学的一个前沿领域",
+    "预训练语言模型的发展给文本排序模型带来了新的进展"
+  ],
+  "query": "什么是文本排序模型",
+  "top_n": 2
+}
+```
+
+适合：
+
+* 验证接口
+* 小规模测试
+
+---
+
+# 2 动态 documents 模式（推荐）
+
+启用参数：
+
+```
+--rerank-dynamic-docs
+```
+
+脚本会：
+
+* 每个请求生成 **N 条 documents**
+* 每条 doc 是 **随机词拼接句子**
+* 每个请求 **内容不同**
+
+示例 doc：
+
+```
+alce bafi kolo dede hobe anma cigi lofi asbe erko kaci molo fadi helo
+mace biro aldi kolo gace hoin doka lale cebo fafa ineri kasi hobe lomo
+gifi beme koha laci anfi celi dore ioce kobo hila mefi arce enbo hega
+```
+
+优点：
+
+* 不依赖真实语料
+* 更接近真实 token 分布
+* 压测稳定
+
+---
+
+# 四、386 documents 压测示例
+
+与你的测试方式一致：
+
+```bash
+python rerank_dashscope_perf.py \
+  --duration 20 \
+  --concurrency-list 1,5,10,20 \
+  --timeout 90 \
+  --rerank-dynamic-docs \
+  --rerank-doc-count 386 \
+  --rerank-vocab-size 1000 \
+  --rerank-sentence-min-words 15 \
+  --rerank-sentence-max-words 40 \
+  --rerank-query "wireless mouse" \
+  --rerank-seed 20260312 \
+  --rerank-top-n 386
+```
+
+每个请求：
+
+```
+query: wireless mouse
+documents: 386条
+每条doc长度: 15~40词
+```
+
+---
+
+# 五、输出结果示例
+
+终端输出：
+
+```
+[1/4] running rerank_dashscope @ concurrency=1 ...
+
+=== Scenario: rerank_dashscope @ concurrency=1 ===
+requests=84 success=84 fail=0 success_rate=100.0% rps=4.2
+latency(ms): avg=230 p50=220 p90=260 p95=280 p99=310 max=340
+status_codes: {200: 84}
+```
+
+字段说明：
+
+| 指标              | 说明   |
+| --------------- | ---- |
+| requests        | 总请求数 |
+| success         | 成功请求 |
+| fail            | 失败请求 |
+| success_rate    | 成功率  |
+| rps             | 吞吐量  |
+| p50/p90/p95/p99 | 延迟分位 |
+| max             | 最大延迟 |
+
+---
+
+# 六、JSON 报告
+
+如果指定：
+
+```
+--output perf_result.json
+```
+
+会生成报告：
+
+```json
+{
+  "results": [
+    {
+      "concurrency": 1,
+      "throughput_rps": 4.2,
+      "latency_ms": {
+        "avg": 230,
+        "p95": 280
+      }
+    }
+  ]
+}
+```
+
+适合：
+
+* 性能对比
+* 画图
+* 压测记录
+
+---
+
+# 七、常见参数
+
+| 参数                          | 默认值       | 说明      |
+| --------------------------- | --------- | ------- |
+| --duration                  | 20        | 单并发测试时间 |
+| --concurrency-list          | 1,5,10,20 | 并发梯度    |
+| --timeout                   | 90        | 请求超时    |
+| --rerank-doc-count          | 386       | doc数量   |
+| --rerank-vocab-size         | 1000      | 词表大小    |
+| --rerank-sentence-min-words | 15        | doc最小长度 |
+| --rerank-sentence-max-words | 40        | doc最大长度 |
+| --rerank-top-n              | 386       | 返回top_n |
+
+---
+
+# 八、推荐压测方式
+
+推荐测试：
+
+```
+docs = 386
+query = wireless mouse
+concurrency = 1,5,10,20
+duration = 20~60s
+```
+
+即可得到：
+
+* latency 曲线
+* RPS
+* 并发极限