Blame view

reranker/server.py 5.21 KB
d90e7428   tangwang   补充重排
1
  """
701ae503   tangwang   docs
2
  Reranker service - unified /rerank API backed by pluggable backends (BGE, Qwen3-vLLM).
d90e7428   tangwang   补充重排
3
4
  
  POST /rerank
701ae503   tangwang   docs
5
6
7
8
  Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
  Response: { "scores": [float], "meta": {...} }
  
  Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RERANK_BACKEND.
d90e7428   tangwang   补充重排
9
10
11
  """
  
  import logging
28e57bb1   tangwang   日志体系优化
12
  import os
d90e7428   tangwang   补充重排
13
14
15
16
17
18
  import time
  from typing import Any, Dict, List, Optional
  
  from fastapi import FastAPI, HTTPException
  from pydantic import BaseModel, Field
  
701ae503   tangwang   docs
19
20
  from config.services_config import get_rerank_backend_config
  from reranker.backends import RerankBackendProtocol, get_rerank_backend
d90e7428   tangwang   补充重排
21
22
23
24
25
26
27
28
  from reranker.config import CONFIG
  
  logging.basicConfig(
      level=logging.INFO,
      format="%(asctime)s %(levelname)s %(name)s | %(message)s",
  )
  logger = logging.getLogger("reranker.service")
  
a7920e17   tangwang   项目名称和部署路径修改
29
  app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
d90e7428   tangwang   补充重排
30
  
701ae503   tangwang   docs
31
32
  _reranker: Optional[RerankBackendProtocol] = None
  _backend_name: str = ""
28e57bb1   tangwang   日志体系优化
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
  _LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
  _LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
  
  
  def _compact_preview(text: str, max_chars: int) -> str:
      compact = " ".join((text or "").split())
      if len(compact) <= max_chars:
          return compact
      return compact[:max_chars] + "..."
  
  
  def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
      previews: List[Dict[str, Any]] = []
      for idx, doc in enumerate(docs[:max_items]):
          previews.append(
              {
                  "idx": idx,
                  "len": len(doc),
                  "preview": _compact_preview(doc, max_chars),
              }
          )
      return previews
d90e7428   tangwang   补充重排
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  
  
  class RerankRequest(BaseModel):
      query: str = Field(..., description="Search query")
      docs: List[str] = Field(..., description="Documents/passages to rerank")
      normalize: Optional[bool] = Field(
          default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
      )
  
  
  class RerankResponse(BaseModel):
      scores: List[float] = Field(..., description="Scores aligned to input docs order")
      meta: Dict[str, Any] = Field(default_factory=dict)
  
  
  @app.on_event("startup")
  def load_model() -> None:
701ae503   tangwang   docs
72
      global _reranker, _backend_name
d90e7428   tangwang   补充重排
73
74
      logger.info("Starting reranker service on port %s", CONFIG.PORT)
      try:
701ae503   tangwang   docs
75
76
77
78
          backend_name, backend_cfg = get_rerank_backend_config()
          _backend_name = backend_name
          _reranker = get_rerank_backend(backend_name, backend_cfg)
          model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
d90e7428   tangwang   补充重排
79
          logger.info(
701ae503   tangwang   docs
80
81
82
              "Reranker ready | backend=%s model=%s",
              _backend_name,
              model_info,
d90e7428   tangwang   补充重排
83
84
85
86
87
88
89
90
          )
      except Exception as exc:
          logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
          raise
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
701ae503   tangwang   docs
91
92
93
94
95
      model_info = ""
      if _reranker is not None:
          model_info = getattr(_reranker, "_model_name", None) or getattr(
              _reranker, "_config", {}
          ).get("model_name", _backend_name)
d90e7428   tangwang   补充重排
96
97
98
      return {
          "status": "ok" if _reranker is not None else "unavailable",
          "model_loaded": _reranker is not None,
701ae503   tangwang   docs
99
100
          "model": model_info,
          "backend": _backend_name,
d90e7428   tangwang   补充重排
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
      }
  
  
  @app.post("/rerank", response_model=RerankResponse)
  def rerank(request: RerankRequest) -> RerankResponse:
      if _reranker is None:
          raise HTTPException(status_code=503, detail="Reranker model not loaded")
  
      query = (request.query or "").strip()
      if not query:
          raise HTTPException(status_code=400, detail="query cannot be empty")
  
      if request.docs is None or len(request.docs) == 0:
          raise HTTPException(status_code=400, detail="docs cannot be empty")
  
      if len(request.docs) > CONFIG.MAX_DOCS:
          raise HTTPException(
              status_code=400,
              detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
          )
  
      normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
  
      start_ts = time.time()
      logger.info(
28e57bb1   tangwang   日志体系优化
126
          "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
d90e7428   tangwang   补充重排
127
128
          len(request.docs),
          normalize,
28e57bb1   tangwang   日志体系优化
129
130
131
          len(query),
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
d90e7428   tangwang   补充重排
132
133
134
135
      )
      scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
      meta = dict(meta)
      meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
28e57bb1   tangwang   日志体系优化
136
      score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
d90e7428   tangwang   补充重排
137
      logger.info(
28e57bb1   tangwang   日志体系优化
138
          "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s",
d90e7428   tangwang   补充重排
139
140
141
142
          meta.get("input_docs"),
          meta.get("unique_docs"),
          meta.get("dedup_ratio"),
          meta.get("service_elapsed_ms"),
28e57bb1   tangwang   日志体系优化
143
144
          _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
          score_preview,
d90e7428   tangwang   补充重排
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
      )
  
      return RerankResponse(scores=scores, meta=meta)
  
  
  if __name__ == "__main__":
      import uvicorn
  
      uvicorn.run(
          "reranker.server:app",
          host=CONFIG.HOST,
          port=CONFIG.PORT,
          reload=False,
          log_level="info",
      )