Blame view

reranker/server.py 6.12 KB
d90e7428   tangwang   补充重排
1
  """
d31c7f65   tangwang   补充云服务reranker
2
3
  Reranker service - unified /rerank API backed by pluggable backends
  (BGE, Qwen3-vLLM, Qwen3-Transformers, DashScope cloud rerank).
d90e7428   tangwang   补充重排
4
5
  
  POST /rerank
701ae503   tangwang   docs
6
7
8
  Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
  Response: { "scores": [float], "meta": {...} }
  
d31c7f65   tangwang   补充云服务reranker
9
  Backend selected via config: services.rerank.backend
3d508beb   tangwang   reranker-4b-gguf
10
  (bge | qwen3_vllm | qwen3_transformers | qwen3_gguf | dashscope_rerank), env RERANK_BACKEND.
d90e7428   tangwang   补充重排
11
12
13
  """
  
  import logging
28e57bb1   tangwang   日志体系优化
14
  import os
d90e7428   tangwang   补充重排
15
16
17
18
19
20
  import time
  from typing import Any, Dict, List, Optional
  
  from fastapi import FastAPI, HTTPException
  from pydantic import BaseModel, Field
  
701ae503   tangwang   docs
21
22
  from config.services_config import get_rerank_backend_config
  from reranker.backends import RerankBackendProtocol, get_rerank_backend
d90e7428   tangwang   补充重排
23
24
25
26
27
28
29
30
  from reranker.config import CONFIG
  
  logging.basicConfig(
      level=logging.INFO,
      format="%(asctime)s %(levelname)s %(name)s | %(message)s",
  )
  logger = logging.getLogger("reranker.service")
  
a7920e17   tangwang   项目名称和部署路径修改
31
  app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
d90e7428   tangwang   补充重排
32
  
701ae503   tangwang   docs
33
34
  _reranker: Optional[RerankBackendProtocol] = None
  _backend_name: str = ""
28e57bb1   tangwang   日志体系优化
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
  _LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
  _LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
  
  
  def _compact_preview(text: str, max_chars: int) -> str:
      compact = " ".join((text or "").split())
      if len(compact) <= max_chars:
          return compact
      return compact[:max_chars] + "..."
  
  
  def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
      previews: List[Dict[str, Any]] = []
      for idx, doc in enumerate(docs[:max_items]):
          previews.append(
              {
                  "idx": idx,
                  "len": len(doc),
                  "preview": _compact_preview(doc, max_chars),
              }
          )
      return previews
d90e7428   tangwang   补充重排
57
58
59
60
61
62
63
64
  
  
  class RerankRequest(BaseModel):
      query: str = Field(..., description="Search query")
      docs: List[str] = Field(..., description="Documents/passages to rerank")
      normalize: Optional[bool] = Field(
          default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
      )
d31c7f65   tangwang   补充云服务reranker
65
66
67
68
      top_n: Optional[int] = Field(
          default=None,
          description="Optional top_n hint for backends that support partial ranking",
      )
d90e7428   tangwang   补充重排
69
70
71
72
73
74
75
76
77
  
  
  class RerankResponse(BaseModel):
      scores: List[float] = Field(..., description="Scores aligned to input docs order")
      meta: Dict[str, Any] = Field(default_factory=dict)
  
  
  @app.on_event("startup")
  def load_model() -> None:
701ae503   tangwang   docs
78
      global _reranker, _backend_name
d90e7428   tangwang   补充重排
79
80
      logger.info("Starting reranker service on port %s", CONFIG.PORT)
      try:
701ae503   tangwang   docs
81
82
83
84
          backend_name, backend_cfg = get_rerank_backend_config()
          _backend_name = backend_name
          _reranker = get_rerank_backend(backend_name, backend_cfg)
          model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
d90e7428   tangwang   补充重排
85
          logger.info(
701ae503   tangwang   docs
86
87
88
              "Reranker ready | backend=%s model=%s",
              _backend_name,
              model_info,
d90e7428   tangwang   补充重排
89
90
91
92
93
94
95
96
          )
      except Exception as exc:
          logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
          raise
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
701ae503   tangwang   docs
97
98
99
100
101
      model_info = ""
      if _reranker is not None:
          model_info = getattr(_reranker, "_model_name", None) or getattr(
              _reranker, "_config", {}
          ).get("model_name", _backend_name)
d90e7428   tangwang   补充重排
102
103
104
      return {
          "status": "ok" if _reranker is not None else "unavailable",
          "model_loaded": _reranker is not None,
701ae503   tangwang   docs
105
106
          "model": model_info,
          "backend": _backend_name,
d90e7428   tangwang   补充重排
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
      }
  
  
  @app.post("/rerank", response_model=RerankResponse)
  def rerank(request: RerankRequest) -> RerankResponse:
      if _reranker is None:
          raise HTTPException(status_code=503, detail="Reranker model not loaded")
  
      query = (request.query or "").strip()
      if not query:
          raise HTTPException(status_code=400, detail="query cannot be empty")
  
      if request.docs is None or len(request.docs) == 0:
          raise HTTPException(status_code=400, detail="docs cannot be empty")
  
      if len(request.docs) > CONFIG.MAX_DOCS:
          raise HTTPException(
              status_code=400,
              detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
          )
d31c7f65   tangwang   补充云服务reranker
127
128
      if request.top_n is not None and int(request.top_n) <= 0:
          raise HTTPException(status_code=400, detail="top_n must be > 0")
d90e7428   tangwang   补充重排
129
130
  
      normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
d31c7f65   tangwang   补充云服务reranker
131
      top_n = int(request.top_n) if request.top_n is not None else None
d90e7428   tangwang   补充重排
132
133
134
  
      start_ts = time.time()
      logger.info(
28e57bb1   tangwang   日志体系优化
135
          "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
d90e7428   tangwang   补充重排
136
137
          len(request.docs),
          normalize,
28e57bb1   tangwang   日志体系优化
138
139
140
          len(query),
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
d90e7428   tangwang   补充重排
141
      )
d31c7f65   tangwang   补充云服务reranker
142
143
144
145
146
147
148
149
150
      if top_n is not None and hasattr(_reranker, "score_with_meta_topn"):
          scores, meta = getattr(_reranker, "score_with_meta_topn")(
              query,
              request.docs,
              normalize=normalize,
              top_n=top_n,
          )
      else:
          scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
d90e7428   tangwang   补充重排
151
      meta = dict(meta)
d31c7f65   tangwang   补充云服务reranker
152
153
      if top_n is not None:
          meta.setdefault("requested_top_n", top_n)
d90e7428   tangwang   补充重排
154
      meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
28e57bb1   tangwang   日志体系优化
155
      score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
d90e7428   tangwang   补充重排
156
      logger.info(
0d3e73ba   tangwang   rerank mini batch
157
          "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s",
d90e7428   tangwang   补充重排
158
159
160
161
          meta.get("input_docs"),
          meta.get("unique_docs"),
          meta.get("dedup_ratio"),
          meta.get("service_elapsed_ms"),
0d3e73ba   tangwang   rerank mini batch
162
163
164
          meta.get("batches"),
          meta.get("batchsize"),
          meta.get("batch_concurrency"),
28e57bb1   tangwang   日志体系优化
165
166
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          score_preview,
d90e7428   tangwang   补充重排
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
      )
  
      return RerankResponse(scores=scores, meta=meta)
  
  
  if __name__ == "__main__":
      import uvicorn
  
      uvicorn.run(
          "reranker.server:app",
          host=CONFIG.HOST,
          port=CONFIG.PORT,
          reload=False,
          log_level="info",
      )