d90e7428
tangwang
补充重排
|
1
|
"""
|
d31c7f65
tangwang
补充云服务reranker
|
2
3
|
Reranker service - unified /rerank API backed by pluggable backends
(BGE, Qwen3-vLLM, Qwen3-Transformers, DashScope cloud rerank).
|
d90e7428
tangwang
补充重排
|
4
5
|
POST /rerank
|
701ae503
tangwang
docs
|
6
7
8
|
Request: { "query": "...", "docs": ["doc1", "doc2", ...], "normalize": optional bool }
Response: { "scores": [float], "meta": {...} }
|
daa2690b
tangwang
漏斗参数调优&呈现优化
|
9
|
Backend selected via config: services.rerank.instances.<name>.backend
|
971a0851
tangwang
补充reranker-jina,探...
|
10
|
(bge | jina_reranker_v3 | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank), env RERANK_BACKEND.
|
d90e7428
tangwang
补充重排
|
11
12
13
|
"""
import logging
|
28e57bb1
tangwang
日志体系优化
|
14
|
import os
|
d90e7428
tangwang
补充重排
|
15
16
17
18
19
20
|
import time
from typing import Any, Dict, List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
|
701ae503
tangwang
docs
|
21
22
|
from config.services_config import get_rerank_backend_config
from reranker.backends import RerankBackendProtocol, get_rerank_backend
|
d90e7428
tangwang
补充重排
|
23
24
25
26
27
28
29
30
|
from reranker.config import CONFIG
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s | %(message)s",
)
logger = logging.getLogger("reranker.service")
|
a7920e17
tangwang
项目名称和部署路径修改
|
31
|
app = FastAPI(title="saas-search Reranker Service", version="1.0.0")
|
d90e7428
tangwang
补充重排
|
32
|
|
701ae503
tangwang
docs
|
33
34
|
_reranker: Optional[RerankBackendProtocol] = None
_backend_name: str = ""
|
28e57bb1
tangwang
日志体系优化
|
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
_LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3")))
_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180")))
def _compact_preview(text: str, max_chars: int) -> str:
compact = " ".join((text or "").split())
if len(compact) <= max_chars:
return compact
return compact[:max_chars] + "..."
def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]:
previews: List[Dict[str, Any]] = []
for idx, doc in enumerate(docs[:max_items]):
previews.append(
{
"idx": idx,
"len": len(doc),
"preview": _compact_preview(doc, max_chars),
}
)
return previews
|
d90e7428
tangwang
补充重排
|
57
58
59
60
61
62
63
64
|
class RerankRequest(BaseModel):
query: str = Field(..., description="Search query")
docs: List[str] = Field(..., description="Documents/passages to rerank")
normalize: Optional[bool] = Field(
default=CONFIG.NORMALIZE, description="Apply sigmoid normalization"
)
|
d31c7f65
tangwang
补充云服务reranker
|
65
66
67
68
|
top_n: Optional[int] = Field(
default=None,
description="Optional top_n hint for backends that support partial ranking",
)
|
d90e7428
tangwang
补充重排
|
69
70
71
72
73
74
75
76
77
|
class RerankResponse(BaseModel):
scores: List[float] = Field(..., description="Scores aligned to input docs order")
meta: Dict[str, Any] = Field(default_factory=dict)
@app.on_event("startup")
def load_model() -> None:
|
701ae503
tangwang
docs
|
78
|
global _reranker, _backend_name
|
daa2690b
tangwang
漏斗参数调优&呈现优化
|
79
|
logger.info("Starting reranker service | instance=%s port=%s", CONFIG.INSTANCE, CONFIG.PORT)
|
d90e7428
tangwang
补充重排
|
80
|
try:
|
daa2690b
tangwang
漏斗参数调优&呈现优化
|
81
|
backend_name, backend_cfg = get_rerank_backend_config(CONFIG.INSTANCE)
|
701ae503
tangwang
docs
|
82
83
84
|
_backend_name = backend_name
_reranker = get_rerank_backend(backend_name, backend_cfg)
model_info = getattr(_reranker, "_model_name", None) or backend_cfg.get("model_name", backend_name)
|
d90e7428
tangwang
补充重排
|
85
|
logger.info(
|
daa2690b
tangwang
漏斗参数调优&呈现优化
|
86
87
|
"Reranker ready | instance=%s backend=%s model=%s",
CONFIG.INSTANCE,
|
701ae503
tangwang
docs
|
88
89
|
_backend_name,
model_info,
|
d90e7428
tangwang
补充重排
|
90
91
92
93
94
95
96
97
|
)
except Exception as exc:
logger.error("Failed to initialize reranker: %s", exc, exc_info=True)
raise
@app.get("/health")
def health() -> Dict[str, Any]:
|
701ae503
tangwang
docs
|
98
99
100
101
102
|
model_info = ""
if _reranker is not None:
model_info = getattr(_reranker, "_model_name", None) or getattr(
_reranker, "_config", {}
).get("model_name", _backend_name)
|
52ea6529
tangwang
性能测试:
|
103
|
payload: Dict[str, Any] = {
|
d90e7428
tangwang
补充重排
|
104
|
"status": "ok" if _reranker is not None else "unavailable",
|
daa2690b
tangwang
漏斗参数调优&呈现优化
|
105
|
"instance": CONFIG.INSTANCE,
|
d90e7428
tangwang
补充重排
|
106
|
"model_loaded": _reranker is not None,
|
701ae503
tangwang
docs
|
107
108
|
"model": model_info,
"backend": _backend_name,
|
d90e7428
tangwang
补充重排
|
109
|
}
|
52ea6529
tangwang
性能测试:
|
110
111
112
113
114
|
if _reranker is not None:
_fmt = getattr(_reranker, "_instruction_format", None)
if _fmt is not None:
payload["instruction_format"] = _fmt
return payload
|
d90e7428
tangwang
补充重排
|
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
@app.post("/rerank", response_model=RerankResponse)
def rerank(request: RerankRequest) -> RerankResponse:
if _reranker is None:
raise HTTPException(status_code=503, detail="Reranker model not loaded")
query = (request.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query cannot be empty")
if request.docs is None or len(request.docs) == 0:
raise HTTPException(status_code=400, detail="docs cannot be empty")
if len(request.docs) > CONFIG.MAX_DOCS:
raise HTTPException(
status_code=400,
detail=f"Too many docs: {len(request.docs)} > {CONFIG.MAX_DOCS}",
)
|
d31c7f65
tangwang
补充云服务reranker
|
134
135
|
if request.top_n is not None and int(request.top_n) <= 0:
raise HTTPException(status_code=400, detail="top_n must be > 0")
|
d90e7428
tangwang
补充重排
|
136
137
|
normalize = CONFIG.NORMALIZE if request.normalize is None else bool(request.normalize)
|
d31c7f65
tangwang
补充云服务reranker
|
138
|
top_n = int(request.top_n) if request.top_n is not None else None
|
d90e7428
tangwang
补充重排
|
139
140
141
|
start_ts = time.time()
logger.info(
|
28e57bb1
tangwang
日志体系优化
|
142
|
"Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s",
|
d90e7428
tangwang
补充重排
|
143
144
|
len(request.docs),
normalize,
|
28e57bb1
tangwang
日志体系优化
|
145
146
147
|
len(query),
_compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
_preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS),
|
d90e7428
tangwang
补充重排
|
148
|
)
|
d31c7f65
tangwang
补充云服务reranker
|
149
150
151
152
153
154
155
156
157
|
if top_n is not None and hasattr(_reranker, "score_with_meta_topn"):
scores, meta = getattr(_reranker, "score_with_meta_topn")(
query,
request.docs,
normalize=normalize,
top_n=top_n,
)
else:
scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize)
|
d90e7428
tangwang
补充重排
|
158
|
meta = dict(meta)
|
d31c7f65
tangwang
补充云服务reranker
|
159
160
|
if top_n is not None:
meta.setdefault("requested_top_n", top_n)
|
d90e7428
tangwang
补充重排
|
161
|
meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)})
|
28e57bb1
tangwang
日志体系优化
|
162
|
score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]]
|
d90e7428
tangwang
补充重排
|
163
|
logger.info(
|
0d3e73ba
tangwang
rerank mini batch
|
164
|
"Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s batches=%s batchsize=%s batch_concurrency=%s query=%r score_preview=%s",
|
d90e7428
tangwang
补充重排
|
165
166
167
168
|
meta.get("input_docs"),
meta.get("unique_docs"),
meta.get("dedup_ratio"),
meta.get("service_elapsed_ms"),
|
0d3e73ba
tangwang
rerank mini batch
|
169
170
171
|
meta.get("batches"),
meta.get("batchsize"),
meta.get("batch_concurrency"),
|
28e57bb1
tangwang
日志体系优化
|
172
173
|
_compact_preview(query, _LOG_TEXT_PREVIEW_CHARS),
score_preview,
|
d90e7428
tangwang
补充重排
|
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
)
return RerankResponse(scores=scores, meta=meta)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"reranker.server:app",
host=CONFIG.HOST,
port=CONFIG.PORT,
reload=False,
log_level="info",
)
|