Blame view

reranker/server.py 4.07 KB
d90e7428   tangwang   补充重排
1
  """
701ae503   tangwang   docs
2
  Reranker service - unified /rerank API backed by pluggable backends (BGE, Qwen3-vLLM).
d90e7428   tangwang   补充重排
3
4
  
  POST /rerank
701ae503   tangwang   docs
5
6
7
8
  Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
  Response: { "scores": [float], "meta": {...} }
  
  Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RERANK_BACKEND.
d90e7428   tangwang   补充重排
9
10
11
12
13
14
15
16
17
  """
  
  import logging
  import time
  from typing import Any, Dict, List, Optional
  
  from fastapi import FastAPI, HTTPException
  from pydantic import BaseModel, Field
  
701ae503   tangwang   docs
18
19
  from config.services_config import get_rerank_backend_config
  from reranker.backends import RerankBackendProtocol, get_rerank_backend
d90e7428   tangwang   补充重排
20
21
22
23
24
25
26
27
  from reranker.config import CONFIG
  
  logging.basicConfig(
      level=logging.INFO,
      format="%(asctime)s %(levelname)s %(name)s | %(message)s",
  )
  logger = logging.getLogger("reranker.service")
  
a7920e17   tangwang   项目名称和部署路径修改
28
  app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
d90e7428   tangwang   补充重排
29
  
701ae503   tangwang   docs
30
31
  _reranker: Optional[RerankBackendProtocol] = None
  _backend_name: str = ""
d90e7428   tangwang   补充重排
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  
  
  class RerankRequest(BaseModel):
      query: str = Field(..., description="Search query")
      docs: List[str] = Field(..., description="Documents/passages to rerank")
      normalize: Optional[bool] = Field(
          default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
      )
  
  
  class RerankResponse(BaseModel):
      scores: List[float] = Field(..., description="Scores aligned to input docs order")
      meta: Dict[str, Any] = Field(default_factory=dict)
  
  
  @app.on_event("startup")
  def load_model() -> None:
701ae503   tangwang   docs
49
      global _reranker, _backend_name
d90e7428   tangwang   补充重排
50
51
      logger.info("Starting reranker service on port %s", CONFIG.PORT)
      try:
701ae503   tangwang   docs
52
53
54
55
          backend_name, backend_cfg = get_rerank_backend_config()
          _backend_name = backend_name
          _reranker = get_rerank_backend(backend_name, backend_cfg)
          model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
d90e7428   tangwang   补充重排
56
          logger.info(
701ae503   tangwang   docs
57
58
59
              "Reranker ready | backend=%s model=%s",
              _backend_name,
              model_info,
d90e7428   tangwang   补充重排
60
61
62
63
64
65
66
67
          )
      except Exception as exc:
          logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
          raise
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
701ae503   tangwang   docs
68
69
70
71
72
      model_info = ""
      if _reranker is not None:
          model_info = getattr(_reranker, "_model_name", None) or getattr(
              _reranker, "_config", {}
          ).get("model_name", _backend_name)
d90e7428   tangwang   补充重排
73
74
75
      return {
          "status": "ok" if _reranker is not None else "unavailable",
          "model_loaded": _reranker is not None,
701ae503   tangwang   docs
76
77
          "model": model_info,
          "backend": _backend_name,
d90e7428   tangwang   补充重排
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
      }
  
  
  @app.post("/rerank", response_model=RerankResponse)
  def rerank(request: RerankRequest) -> RerankResponse:
      if _reranker is None:
          raise HTTPException(status_code=503, detail="Reranker model not loaded")
  
      query = (request.query or "").strip()
      if not query:
          raise HTTPException(status_code=400, detail="query cannot be empty")
  
      if request.docs is None or len(request.docs) == 0:
          raise HTTPException(status_code=400, detail="docs cannot be empty")
  
      if len(request.docs) > CONFIG.MAX_DOCS:
          raise HTTPException(
              status_code=400,
              detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
          )
  
      normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
  
      start_ts = time.time()
      logger.info(
          "Rerank request | docs=%d normalize=%s",
          len(request.docs),
          normalize,
      )
      scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
      meta = dict(meta)
      meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
      logger.info(
          "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s",
          meta.get("input_docs"),
          meta.get("unique_docs"),
          meta.get("dedup_ratio"),
          meta.get("service_elapsed_ms"),
      )
  
      return RerankResponse(scores=scores, meta=meta)
  
  
  if __name__ == "__main__":
      import uvicorn
  
      uvicorn.run(
          "reranker.server:app",
          host=CONFIG.HOST,
          port=CONFIG.PORT,
          reload=False,
          log_level="info",
      )