Blame view

reranker/server.py 6.54 KB
d90e7428   tangwang   补充重排
1
  """
d31c7f65   tangwang   补充云服务reranker
2
3
  Reranker service - unified /rerank API backed by pluggable backends
  (BGE, Qwen3-vLLM, Qwen3-Transformers, DashScope cloud rerank).
d90e7428   tangwang   补充重排
4
5
  
  POST /rerank
701ae503   tangwang   docs
6
7
8
  Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
  Response: { "scores": [float], "meta": {...} }
  
daa2690b   tangwang   漏斗参数调优&呈现优化
9
  Backend selected via config: services.rerank.instances.<name>.backend
971a0851   tangwang   补充reranker-jina,探...
10
  (bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND.
d90e7428   tangwang   补充重排
11
12
13
  """
  
  import logging
28e57bb1   tangwang   日志体系优化
14
  import os
d90e7428   tangwang   补充重排
15
16
17
18
19
20
  import time
  from typing import Any, Dict, List, Optional
  
  from fastapi import FastAPI, HTTPException
  from pydantic import BaseModel, Field
  
701ae503   tangwang   docs
21
22
  from config.services_config import get_rerank_backend_config
  from reranker.backends import RerankBackendProtocol, get_rerank_backend
d90e7428   tangwang   补充重排
23
24
25
26
27
28
29
30
  from reranker.config import CONFIG
  
  logging.basicConfig(
      level=logging.INFO,
      format="%(asctime)s %(levelname)s %(name)s | %(message)s",
  )
  logger = logging.getLogger("reranker.service")
  
a7920e17   tangwang   项目名称和部署路径修改
31
  app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
d90e7428   tangwang   补充重排
32
  
701ae503   tangwang   docs
33
34
  _reranker: Optional[RerankBackendProtocol] = None
  _backend_name: str = ""
28e57bb1   tangwang   日志体系优化
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
  _LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
  _LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
  
  
  def _compact_preview(text: str, max_chars: int) -> str:
      compact = " ".join((text or "").split())
      if len(compact) <= max_chars:
          return compact
      return compact[:max_chars] + "..."
  
  
  def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
      previews: List[Dict[str, Any]] = []
      for idx, doc in enumerate(docs[:max_items]):
          previews.append(
              {
                  "idx": idx,
                  "len": len(doc),
                  "preview": _compact_preview(doc, max_chars),
              }
          )
      return previews
d90e7428   tangwang   补充重排
57
58
59
60
61
62
63
64
  
  
  class RerankRequest(BaseModel):
      query: str = Field(..., description="Search query")
      docs: List[str] = Field(..., description="Documents/passages to rerank")
      normalize: Optional[bool] = Field(
          default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
      )
d31c7f65   tangwang   补充云服务reranker
65
66
67
68
      top_n: Optional[int] = Field(
          default=None,
          description="Optional top_n hint for backends that support partial ranking",
      )
d90e7428   tangwang   补充重排
69
70
71
72
73
74
75
76
77
  
  
  class RerankResponse(BaseModel):
      scores: List[float] = Field(..., description="Scores aligned to input docs order")
      meta: Dict[str, Any] = Field(default_factory=dict)
  
  
  @app.on_event("startup")
  def load_model() -> None:
701ae503   tangwang   docs
78
      global _reranker, _backend_name
daa2690b   tangwang   漏斗参数调优&呈现优化
79
      logger.info("Starting reranker service | instance=%s port=%s", CONFIG.INSTANCE, CONFIG.PORT)
d90e7428   tangwang   补充重排
80
      try:
daa2690b   tangwang   漏斗参数调优&呈现优化
81
          backend_name, backend_cfg = get_rerank_backend_config(CONFIG.INSTANCE)
701ae503   tangwang   docs
82
83
84
          _backend_name = backend_name
          _reranker = get_rerank_backend(backend_name, backend_cfg)
          model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
d90e7428   tangwang   补充重排
85
          logger.info(
daa2690b   tangwang   漏斗参数调优&呈现优化
86
87
              "Reranker ready | instance=%s backend=%s model=%s",
              CONFIG.INSTANCE,
701ae503   tangwang   docs
88
89
              _backend_name,
              model_info,
d90e7428   tangwang   补充重排
90
91
92
93
94
95
96
97
          )
      except Exception as exc:
          logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
          raise
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
701ae503   tangwang   docs
98
99
100
101
102
      model_info = ""
      if _reranker is not None:
          model_info = getattr(_reranker, "_model_name", None) or getattr(
              _reranker, "_config", {}
          ).get("model_name", _backend_name)
52ea6529   tangwang   性能测试:
103
      payload: Dict[str, Any] = {
d90e7428   tangwang   补充重排
104
          "status": "ok" if _reranker is not None else "unavailable",
daa2690b   tangwang   漏斗参数调优&呈现优化
105
          "instance": CONFIG.INSTANCE,
d90e7428   tangwang   补充重排
106
          "model_loaded": _reranker is not None,
701ae503   tangwang   docs
107
108
          "model": model_info,
          "backend": _backend_name,
d90e7428   tangwang   补充重排
109
      }
52ea6529   tangwang   性能测试:
110
111
112
113
114
      if _reranker is not None:
          _fmt = getattr(_reranker, "_instruction_format", None)
          if _fmt is not None:
              payload["instruction_format"] = _fmt
      return payload
d90e7428   tangwang   补充重排
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
  
  
  @app.post("/rerank", response_model=RerankResponse)
  def rerank(request: RerankRequest) -> RerankResponse:
      if _reranker is None:
          raise HTTPException(status_code=503, detail="Reranker model not loaded")
  
      query = (request.query or "").strip()
      if not query:
          raise HTTPException(status_code=400, detail="query cannot be empty")
  
      if request.docs is None or len(request.docs) == 0:
          raise HTTPException(status_code=400, detail="docs cannot be empty")
  
      if len(request.docs) > CONFIG.MAX_DOCS:
          raise HTTPException(
              status_code=400,
              detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
          )
d31c7f65   tangwang   补充云服务reranker
134
135
      if request.top_n is not None and int(request.top_n) <= 0:
          raise HTTPException(status_code=400, detail="top_n must be > 0")
d90e7428   tangwang   补充重排
136
137
  
      normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
d31c7f65   tangwang   补充云服务reranker
138
      top_n = int(request.top_n) if request.top_n is not None else None
d90e7428   tangwang   补充重排
139
140
141
  
      start_ts = time.time()
      logger.info(
28e57bb1   tangwang   日志体系优化
142
          "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
d90e7428   tangwang   补充重排
143
144
          len(request.docs),
          normalize,
28e57bb1   tangwang   日志体系优化
145
146
147
          len(query),
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
d90e7428   tangwang   补充重排
148
      )
d31c7f65   tangwang   补充云服务reranker
149
150
151
152
153
154
155
156
157
      if top_n is not None and hasattr(_reranker, "score_with_meta_topn"):
          scores, meta = getattr(_reranker, "score_with_meta_topn")(
              query,
              request.docs,
              normalize=normalize,
              top_n=top_n,
          )
      else:
          scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
d90e7428   tangwang   补充重排
158
      meta = dict(meta)
d31c7f65   tangwang   补充云服务reranker
159
160
      if top_n is not None:
          meta.setdefault("requested_top_n", top_n)
d90e7428   tangwang   补充重排
161
      meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
28e57bb1   tangwang   日志体系优化
162
      score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
d90e7428   tangwang   补充重排
163
      logger.info(
0d3e73ba   tangwang   rerank mini batch
164
          "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s",
d90e7428   tangwang   补充重排
165
166
167
168
          meta.get("input_docs"),
          meta.get("unique_docs"),
          meta.get("dedup_ratio"),
          meta.get("service_elapsed_ms"),
0d3e73ba   tangwang   rerank mini batch
169
170
171
          meta.get("batches"),
          meta.get("batchsize"),
          meta.get("batch_concurrency"),
28e57bb1   tangwang   日志体系优化
172
173
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          score_preview,
d90e7428   tangwang   补充重排
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
      )
  
      return RerankResponse(scores=scores, meta=meta)
  
  
  if __name__ == "__main__":
      import uvicorn
  
      uvicorn.run(
          "reranker.server:app",
          host=CONFIG.HOST,
          port=CONFIG.PORT,
          reload=False,
          log_level="info",
      )