d90e7428
tangwang
补充重排
|
1
|
"""
|
701ae503
tangwang
docs
|
2
|
Reranker service - unified /rerank API backed by pluggable backends (BGE, Qwen3-vLLM).
|
d90e7428
tangwang
补充重排
|
3
4
|
POST /rerank
|
701ae503
tangwang
docs
|
5
6
7
8
|
Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
Response: { "scores": [float], "meta": {...} }
Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RERANK_BACKEND.
|
d90e7428
tangwang
补充重排
|
9
10
11
|
"""
import logging
|
28e57bb1
tangwang
日志体系优化
|
12
|
import os
|
d90e7428
tangwang
补充重排
|
13
14
15
16
17
18
|
import time
from typing import Any, Dict, List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
|
701ae503
tangwang
docs
|
19
20
|
from config.services_config import get_rerank_backend_config
from reranker.backends import RerankBackendProtocol, get_rerank_backend
|
d90e7428
tangwang
补充重排
|
21
22
23
24
25
26
27
28
|
from reranker.config import CONFIG
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s | %(message)s",
)
logger = logging.getLogger("reranker.service")
|
a7920e17
tangwang
项目名称和部署路径修改
|
29
|
app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
|
d90e7428
tangwang
补充重排
|
30
|
|
701ae503
tangwang
docs
|
31
32
|
_reranker: Optional[RerankBackendProtocol] = None
_backend_name: str = ""
|
28e57bb1
tangwang
日志体系优化
|
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
_LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
def _compact_preview(text: str, max_chars: int) -> str:
compact = " ".join((text or "").split())
if len(compact) <= max_chars:
return compact
return compact[:max_chars] + "..."
def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
previews: List[Dict[str, Any]] = []
for idx, doc in enumerate(docs[:max_items]):
previews.append(
{
"idx": idx,
"len": len(doc),
"preview": _compact_preview(doc, max_chars),
}
)
return previews
|
d90e7428
tangwang
补充重排
|
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
class RerankRequest(BaseModel):
query: str = Field(..., description="Search query")
docs: List[str] = Field(..., description="Documents/passages to rerank")
normalize: Optional[bool] = Field(
default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
)
class RerankResponse(BaseModel):
scores: List[float] = Field(..., description="Scores aligned to input docs order")
meta: Dict[str, Any] = Field(default_factory=dict)
@app.on_event("startup")
def load_model() -> None:
|
701ae503
tangwang
docs
|
72
|
global _reranker, _backend_name
|
d90e7428
tangwang
补充重排
|
73
74
|
logger.info("Starting reranker service on port %s", CONFIG.PORT)
try:
|
701ae503
tangwang
docs
|
75
76
77
78
|
backend_name, backend_cfg = get_rerank_backend_config()
_backend_name = backend_name
_reranker = get_rerank_backend(backend_name, backend_cfg)
model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
|
d90e7428
tangwang
补充重排
|
79
|
logger.info(
|
701ae503
tangwang
docs
|
80
81
82
|
"Reranker ready | backend=%s model=%s",
_backend_name,
model_info,
|
d90e7428
tangwang
补充重排
|
83
84
85
86
87
88
89
90
|
)
except Exception as exc:
logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
raise
@app.get("/health")
def health() -> Dict[str, Any]:
|
701ae503
tangwang
docs
|
91
92
93
94
95
|
model_info = ""
if _reranker is not None:
model_info = getattr(_reranker, "_model_name", None) or getattr(
_reranker, "_config", {}
).get("model_name", _backend_name)
|
d90e7428
tangwang
补充重排
|
96
97
98
|
return {
"status": "ok" if _reranker is not None else "unavailable",
"model_loaded": _reranker is not None,
|
701ae503
tangwang
docs
|
99
100
|
"model": model_info,
"backend": _backend_name,
|
d90e7428
tangwang
补充重排
|
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
}
@app.post("/rerank", response_model=RerankResponse)
def rerank(request: RerankRequest) -> RerankResponse:
if _reranker is None:
raise HTTPException(status_code=503, detail="Reranker model not loaded")
query = (request.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query cannot be empty")
if request.docs is None or len(request.docs) == 0:
raise HTTPException(status_code=400, detail="docs cannot be empty")
if len(request.docs) > CONFIG.MAX_DOCS:
raise HTTPException(
status_code=400,
detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
)
normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
start_ts = time.time()
logger.info(
|
28e57bb1
tangwang
日志体系优化
|
126
|
"Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
|
d90e7428
tangwang
补充重排
|
127
128
|
len(request.docs),
normalize,
|
28e57bb1
tangwang
日志体系优化
|
129
130
131
|
len(query),
_compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
_preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
|
d90e7428
tangwang
补充重排
|
132
133
134
135
|
)
scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
meta = dict(meta)
meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
|
28e57bb1
tangwang
日志体系优化
|
136
|
score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
|
d90e7428
tangwang
补充重排
|
137
|
logger.info(
|
28e57bb1
tangwang
日志体系优化
|
138
|
"Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s",
|
d90e7428
tangwang
补充重排
|
139
140
141
142
|
meta.get("input_docs"),
meta.get("unique_docs"),
meta.get("dedup_ratio"),
meta.get("service_elapsed_ms"),
|
28e57bb1
tangwang
日志体系优化
|
143
144
|
_compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
score_preview,
|
d90e7428
tangwang
补充重排
|
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
)
return RerankResponse(scores=scores, meta=meta)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"reranker.server:app",
host=CONFIG.HOST,
port=CONFIG.PORT,
reload=False,
log_level="info",
)
|