d90e7428
tangwang
补充重排
|
1
|
"""
|
d31c7f65
tangwang
补充云服务reranker
|
2
3
|
Reranker service - unified /rerank API backed by pluggable backends
(BGE, Qwen3-vLLM, Qwen3-Transformers, DashScope cloud rerank).
|
d90e7428
tangwang
补充重排
|
4
5
|
POST /rerank
|
701ae503
tangwang
docs
|
6
7
8
|
Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
Response: { "scores": [float], "meta": {...} }
|
d31c7f65
tangwang
补充云服务reranker
|
9
|
Backend selected via config: services.rerank.backend
|
5c21a485
tangwang
qwen3-reranker-0....
|
10
|
(bge | qwen3_vllm | qwen3_transformers | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND.
|
d90e7428
tangwang
补充重排
|
11
12
13
|
"""
import logging
|
28e57bb1
tangwang
日志体系优化
|
14
|
import os
|
d90e7428
tangwang
补充重排
|
15
16
17
18
19
20
|
import time
from typing import Any, Dict, List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
|
701ae503
tangwang
docs
|
21
22
|
from config.services_config import get_rerank_backend_config
from reranker.backends import RerankBackendProtocol, get_rerank_backend
|
d90e7428
tangwang
补充重排
|
23
24
25
26
27
28
29
30
|
from reranker.config import CONFIG
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s | %(message)s",
)
logger = logging.getLogger("reranker.service")
|
a7920e17
tangwang
项目名称和部署路径修改
|
31
|
app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
|
d90e7428
tangwang
补充重排
|
32
|
|
701ae503
tangwang
docs
|
33
34
|
_reranker: Optional[RerankBackendProtocol] = None
_backend_name: str = ""
|
28e57bb1
tangwang
日志体系优化
|
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
_LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
def _compact_preview(text: str, max_chars: int) -> str:
compact = " ".join((text or "").split())
if len(compact) <= max_chars:
return compact
return compact[:max_chars] + "..."
def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
previews: List[Dict[str, Any]] = []
for idx, doc in enumerate(docs[:max_items]):
previews.append(
{
"idx": idx,
"len": len(doc),
"preview": _compact_preview(doc, max_chars),
}
)
return previews
|
d90e7428
tangwang
补充重排
|
57
58
59
60
61
62
63
64
|
class RerankRequest(BaseModel):
query: str = Field(..., description="Search query")
docs: List[str] = Field(..., description="Documents/passages to rerank")
normalize: Optional[bool] = Field(
default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
)
|
d31c7f65
tangwang
补充云服务reranker
|
65
66
67
68
|
top_n: Optional[int] = Field(
default=None,
description="Optional top_n hint for backends that support partial ranking",
)
|
d90e7428
tangwang
补充重排
|
69
70
71
72
73
74
75
76
77
|
class RerankResponse(BaseModel):
scores: List[float] = Field(..., description="Scores aligned to input docs order")
meta: Dict[str, Any] = Field(default_factory=dict)
@app.on_event("startup")
def load_model() -> None:
|
701ae503
tangwang
docs
|
78
|
global _reranker, _backend_name
|
d90e7428
tangwang
补充重排
|
79
80
|
logger.info("Starting reranker service on port %s", CONFIG.PORT)
try:
|
701ae503
tangwang
docs
|
81
82
83
84
|
backend_name, backend_cfg = get_rerank_backend_config()
_backend_name = backend_name
_reranker = get_rerank_backend(backend_name, backend_cfg)
model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
|
d90e7428
tangwang
补充重排
|
85
|
logger.info(
|
701ae503
tangwang
docs
|
86
87
88
|
"Reranker ready | backend=%s model=%s",
_backend_name,
model_info,
|
d90e7428
tangwang
补充重排
|
89
90
91
92
93
94
95
96
|
)
except Exception as exc:
logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
raise
@app.get("/health")
def health() -> Dict[str, Any]:
|
701ae503
tangwang
docs
|
97
98
99
100
101
|
model_info = ""
if _reranker is not None:
model_info = getattr(_reranker, "_model_name", None) or getattr(
_reranker, "_config", {}
).get("model_name", _backend_name)
|
d90e7428
tangwang
补充重排
|
102
103
104
|
return {
"status": "ok" if _reranker is not None else "unavailable",
"model_loaded": _reranker is not None,
|
701ae503
tangwang
docs
|
105
106
|
"model": model_info,
"backend": _backend_name,
|
d90e7428
tangwang
补充重排
|
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
}
@app.post("/rerank", response_model=RerankResponse)
def rerank(request: RerankRequest) -> RerankResponse:
if _reranker is None:
raise HTTPException(status_code=503, detail="Reranker model not loaded")
query = (request.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query cannot be empty")
if request.docs is None or len(request.docs) == 0:
raise HTTPException(status_code=400, detail="docs cannot be empty")
if len(request.docs) > CONFIG.MAX_DOCS:
raise HTTPException(
status_code=400,
detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
)
|
d31c7f65
tangwang
补充云服务reranker
|
127
128
|
if request.top_n is not None and int(request.top_n) <= 0:
raise HTTPException(status_code=400, detail="top_n must be > 0")
|
d90e7428
tangwang
补充重排
|
129
130
|
normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
|
d31c7f65
tangwang
补充云服务reranker
|
131
|
top_n = int(request.top_n) if request.top_n is not None else None
|
d90e7428
tangwang
补充重排
|
132
133
134
|
start_ts = time.time()
logger.info(
|
28e57bb1
tangwang
日志体系优化
|
135
|
"Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
|
d90e7428
tangwang
补充重排
|
136
137
|
len(request.docs),
normalize,
|
28e57bb1
tangwang
日志体系优化
|
138
139
140
|
len(query),
_compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
_preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
|
d90e7428
tangwang
补充重排
|
141
|
)
|
d31c7f65
tangwang
补充云服务reranker
|
142
143
144
145
146
147
148
149
150
|
if top_n is not None and hasattr(_reranker, "score_with_meta_topn"):
scores, meta = getattr(_reranker, "score_with_meta_topn")(
query,
request.docs,
normalize=normalize,
top_n=top_n,
)
else:
scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
|
d90e7428
tangwang
补充重排
|
151
|
meta = dict(meta)
|
d31c7f65
tangwang
补充云服务reranker
|
152
153
|
if top_n is not None:
meta.setdefault("requested_top_n", top_n)
|
d90e7428
tangwang
补充重排
|
154
|
meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
|
28e57bb1
tangwang
日志体系优化
|
155
|
score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
|
d90e7428
tangwang
补充重排
|
156
|
logger.info(
|
0d3e73ba
tangwang
rerank mini batch
|
157
|
"Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s",
|
d90e7428
tangwang
补充重排
|
158
159
160
161
|
meta.get("input_docs"),
meta.get("unique_docs"),
meta.get("dedup_ratio"),
meta.get("service_elapsed_ms"),
|
0d3e73ba
tangwang
rerank mini batch
|
162
163
164
|
meta.get("batches"),
meta.get("batchsize"),
meta.get("batch_concurrency"),
|
28e57bb1
tangwang
日志体系优化
|
165
166
|
_compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
score_preview,
|
d90e7428
tangwang
补充重排
|
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
)
return RerankResponse(scores=scores, meta=meta)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"reranker.server:app",
host=CONFIG.HOST,
port=CONFIG.PORT,
reload=False,
log_level="info",
)
|