Blame view

benchmarks/reranker/smoke_qwen3_vllm_score_backend.py 2.31 KB
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
1
2
3
4
5
  #!/usr/bin/env python3
  """
  Smoke test: load Qwen3VLLMScoreRerankerBackend (must run as a file, not stdin  vLLM spawn).
  
  Usage (from repo root, score venv):
3abbc95a   tangwang   重构(scripts): 整理sc...
6
    PYTHONPATH=. ./.venv-reranker-score/bin/python benchmarks/reranker/smoke_qwen3_vllm_score_backend.py
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
7
8
  
  Same as production: vLLM child processes need the venv's ``bin`` on PATH (for pip's ``ninja`` when
b0972ff9   tangwang   qwen3_vllm_score ...
9
10
11
  vLLM auto-selects FLASHINFER on T4/Turing). ``start_reranker.sh`` exports that; this script prepends
  ``sysconfig.get_path("scripts")`` (the stdlib location for this environment's console scripts,
  independent of ``python`` symlink targets).
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
12
13
14
15
16
17
18
19
20
21
22
  """
  
  from __future__ import annotations
  
  import argparse
  import logging
  import os
  import sys
  import sysconfig
  from pathlib import Path
  
3abbc95a   tangwang   重构(scripts): 整理sc...
23
24
  # Repo root on sys.path when run from benchmarks/reranker/.
  _ROOT = Path(__file__).resolve().parents[2]
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
25
26
27
28
29
30
31
32
33
  if str(_ROOT) not in sys.path:
      sys.path.insert(0, str(_ROOT))
  
  logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
  
  import torch
  
  from reranker.backends.qwen3_vllm_score import (
      Qwen3VLLMScoreRerankerBackend,
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
34
35
36
37
38
39
  )
  
  
  def main() -> int:
      p = argparse.ArgumentParser()
      p.add_argument(
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
          "--gpu-memory-utilization",
          type=float,
          default=0.12,
          help="vLLM gpu_memory_utilization (default 0.12 for tight GPUs)",
      )
      args = p.parse_args()
  
      scripts = sysconfig.get_path("scripts")
      if scripts:
          os.environ["PATH"] = scripts + os.pathsep + os.environ.get("PATH", "")
  
      if not torch.cuda.is_available():
          print("SKIP: CUDA not available")
          return 0
  
      cfg = {
          "model_name": "Qwen/Qwen3-Reranker-0.6B",
          "max_model_len": 160,
          "tensor_parallel_size": 1,
          "gpu_memory_utilization": args.gpu_memory_utilization,
          "dtype": "float16",
          "enable_prefix_caching": False,
          "enforce_eager": True,
          "infer_batch_size": 4,
b0972ff9   tangwang   qwen3_vllm_score ...
64
          "instruction_format": "compact",
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
65
      }
540fb5af   tangwang   添加了可关闭的开关:保留默认行为(...
66
67
68
69
70
71
72
73
74
75
76
      print("Loading backend ...")
      backend = Qwen3VLLMScoreRerankerBackend(cfg)
      scores, meta = backend.score_with_meta("smoke query", ["title one", "title two"], normalize=False)
      print("scores:", scores)
      print("meta:", {k: meta[k] for k in ("backend", "infer_batch_size", "instruction_format") if k in meta})
      print("OK")
      return 0
  
  
  if __name__ == "__main__":
      raise SystemExit(main())