Blame view

reranker/server.py 6.39 KB
d90e7428   tangwang   补充重排
1
  """
d31c7f65   tangwang   补充云服务reranker
2
3
  Reranker service - unified /rerank API backed by pluggable backends
  (BGE, Qwen3-vLLM, Qwen3-Transformers, DashScope cloud rerank).
d90e7428   tangwang   补充重排
4
5
  
  POST /rerank
701ae503   tangwang   docs
6
7
8
  Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
  Response: { "scores": [float], "meta": {...} }
  
d31c7f65   tangwang   补充云服务reranker
9
  Backend selected via config: services.rerank.backend
4823f463   tangwang   qwen3_vllm_score ...
10
  (bge | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND.
d90e7428   tangwang   补充重排
11
12
13
  """
  
  import logging
28e57bb1   tangwang   日志体系优化
14
  import os
d90e7428   tangwang   补充重排
15
16
17
18
19
20
  import time
  from typing import Any, Dict, List, Optional
  
  from fastapi import FastAPI, HTTPException
  from pydantic import BaseModel, Field
  
701ae503   tangwang   docs
21
22
  from config.services_config import get_rerank_backend_config
  from reranker.backends import RerankBackendProtocol, get_rerank_backend
d90e7428   tangwang   补充重排
23
24
25
26
27
28
29
30
  from reranker.config import CONFIG
  
  logging.basicConfig(
      level=logging.INFO,
      format="%(asctime)s %(levelname)s %(name)s | %(message)s",
  )
  logger = logging.getLogger("reranker.service")
  
a7920e17   tangwang   项目名称和部署路径修改
31
  app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
d90e7428   tangwang   补充重排
32
  
701ae503   tangwang   docs
33
34
  _reranker: Optional[RerankBackendProtocol] = None
  _backend_name: str = ""
28e57bb1   tangwang   日志体系优化
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
  _LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
  _LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
  
  
  def _compact_preview(text: str, max_chars: int) -> str:
      compact = " ".join((text or "").split())
      if len(compact) <= max_chars:
          return compact
      return compact[:max_chars] + "..."
  
  
  def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
      previews: List[Dict[str, Any]] = []
      for idx, doc in enumerate(docs[:max_items]):
          previews.append(
              {
                  "idx": idx,
                  "len": len(doc),
                  "preview": _compact_preview(doc, max_chars),
              }
          )
      return previews
d90e7428   tangwang   补充重排
57
58
59
60
61
62
63
64
  
  
  class RerankRequest(BaseModel):
      query: str = Field(..., description="Search query")
      docs: List[str] = Field(..., description="Documents/passages to rerank")
      normalize: Optional[bool] = Field(
          default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
      )
d31c7f65   tangwang   补充云服务reranker
65
66
67
68
      top_n: Optional[int] = Field(
          default=None,
          description="Optional top_n hint for backends that support partial ranking",
      )
d90e7428   tangwang   补充重排
69
70
71
72
73
74
75
76
77
  
  
  class RerankResponse(BaseModel):
      scores: List[float] = Field(..., description="Scores aligned to input docs order")
      meta: Dict[str, Any] = Field(default_factory=dict)
  
  
  @app.on_event("startup")
  def load_model() -> None:
701ae503   tangwang   docs
78
      global _reranker, _backend_name
d90e7428   tangwang   补充重排
79
80
      logger.info("Starting reranker service on port %s", CONFIG.PORT)
      try:
701ae503   tangwang   docs
81
82
83
84
          backend_name, backend_cfg = get_rerank_backend_config()
          _backend_name = backend_name
          _reranker = get_rerank_backend(backend_name, backend_cfg)
          model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
d90e7428   tangwang   补充重排
85
          logger.info(
701ae503   tangwang   docs
86
87
88
              "Reranker ready | backend=%s model=%s",
              _backend_name,
              model_info,
d90e7428   tangwang   补充重排
89
90
91
92
93
94
95
96
          )
      except Exception as exc:
          logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
          raise
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
701ae503   tangwang   docs
97
98
99
100
101
      model_info = ""
      if _reranker is not None:
          model_info = getattr(_reranker, "_model_name", None) or getattr(
              _reranker, "_config", {}
          ).get("model_name", _backend_name)
52ea6529   tangwang   性能测试:
102
      payload: Dict[str, Any] = {
d90e7428   tangwang   补充重排
103
104
          "status": "ok" if _reranker is not None else "unavailable",
          "model_loaded": _reranker is not None,
701ae503   tangwang   docs
105
106
          "model": model_info,
          "backend": _backend_name,
d90e7428   tangwang   补充重排
107
      }
52ea6529   tangwang   性能测试:
108
109
110
111
112
      if _reranker is not None:
          _fmt = getattr(_reranker, "_instruction_format", None)
          if _fmt is not None:
              payload["instruction_format"] = _fmt
      return payload
d90e7428   tangwang   补充重排
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
  
  
  @app.post("/rerank", response_model=RerankResponse)
  def rerank(request: RerankRequest) -> RerankResponse:
      if _reranker is None:
          raise HTTPException(status_code=503, detail="Reranker model not loaded")
  
      query = (request.query or "").strip()
      if not query:
          raise HTTPException(status_code=400, detail="query cannot be empty")
  
      if request.docs is None or len(request.docs) == 0:
          raise HTTPException(status_code=400, detail="docs cannot be empty")
  
      if len(request.docs) > CONFIG.MAX_DOCS:
          raise HTTPException(
              status_code=400,
              detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
          )
d31c7f65   tangwang   补充云服务reranker
132
133
      if request.top_n is not None and int(request.top_n) <= 0:
          raise HTTPException(status_code=400, detail="top_n must be > 0")
d90e7428   tangwang   补充重排
134
135
  
      normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
d31c7f65   tangwang   补充云服务reranker
136
      top_n = int(request.top_n) if request.top_n is not None else None
d90e7428   tangwang   补充重排
137
138
139
  
      start_ts = time.time()
      logger.info(
28e57bb1   tangwang   日志体系优化
140
          "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
d90e7428   tangwang   补充重排
141
142
          len(request.docs),
          normalize,
28e57bb1   tangwang   日志体系优化
143
144
145
          len(query),
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
d90e7428   tangwang   补充重排
146
      )
d31c7f65   tangwang   补充云服务reranker
147
148
149
150
151
152
153
154
155
      if top_n is not None and hasattr(_reranker, "score_with_meta_topn"):
          scores, meta = getattr(_reranker, "score_with_meta_topn")(
              query,
              request.docs,
              normalize=normalize,
              top_n=top_n,
          )
      else:
          scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
d90e7428   tangwang   补充重排
156
      meta = dict(meta)
d31c7f65   tangwang   补充云服务reranker
157
158
      if top_n is not None:
          meta.setdefault("requested_top_n", top_n)
d90e7428   tangwang   补充重排
159
      meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
28e57bb1   tangwang   日志体系优化
160
      score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
d90e7428   tangwang   补充重排
161
      logger.info(
0d3e73ba   tangwang   rerank mini batch
162
          "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s",
d90e7428   tangwang   补充重排
163
164
165
166
          meta.get("input_docs"),
          meta.get("unique_docs"),
          meta.get("dedup_ratio"),
          meta.get("service_elapsed_ms"),
0d3e73ba   tangwang   rerank mini batch
167
168
169
          meta.get("batches"),
          meta.get("batchsize"),
          meta.get("batch_concurrency"),
28e57bb1   tangwang   日志体系优化
170
171
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          score_preview,
d90e7428   tangwang   补充重排
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
      )
  
      return RerankResponse(scores=scores, meta=meta)
  
  
  if __name__ == "__main__":
      import uvicorn
  
      uvicorn.run(
          "reranker.server:app",
          host=CONFIG.HOST,
          port=CONFIG.PORT,
          reload=False,
          log_level="info",
      )