7bfb9946
tangwang
向量化模块
|
1
2
3
|
"""
Embedding service (FastAPI).
|
ed948666
tangwang
tidy
|
4
5
6
|
API (simple list-in, list-out; aligned by index):
- POST /embed/text body: ["text1", "text2", ...] -> [[...], ...]
- POST /embed/image body: ["url_or_path1", ...] -> [[...], ...]
|
7bfb9946
tangwang
向量化模块
|
7
8
|
"""
|
0a3764c4
tangwang
优化embedding模型加载
|
9
|
import logging
|
07cf5a93
tangwang
START_EMBEDDING=...
|
10
|
import os
|
7bfb9946
tangwang
向量化模块
|
11
12
13
14
|
import threading
from typing import Any, Dict, List, Optional
import numpy as np
|
ed948666
tangwang
tidy
|
15
|
from fastapi import FastAPI, HTTPException
|
7bfb9946
tangwang
向量化模块
|
16
17
|
from embeddings.config import CONFIG
|
c10f90fe
tangwang
cnclip
|
18
|
from embeddings.protocols import ImageEncoderProtocol
|
07cf5a93
tangwang
START_EMBEDDING=...
|
19
|
from config.services_config import get_embedding_backend_config
|
7bfb9946
tangwang
向量化模块
|
20
|
|
0a3764c4
tangwang
优化embedding模型加载
|
21
|
logger = logging.getLogger(__name__)
|
7bfb9946
tangwang
向量化模块
|
22
|
|
a7920e17
tangwang
项目名称和部署路径修改
|
23
|
app = FastAPI(title="saas-search Embedding Service", version="1.0.0")
|
7bfb9946
tangwang
向量化模块
|
24
|
|
0a3764c4
tangwang
优化embedding模型加载
|
25
|
# Models are loaded at startup, not lazily
|
950a640e
tangwang
embeddings
|
26
|
_text_model: Optional[Any] = None
|
c10f90fe
tangwang
cnclip
|
27
|
_image_model: Optional[ImageEncoderProtocol] = None
|
07cf5a93
tangwang
START_EMBEDDING=...
|
28
|
_text_backend_name: str = ""
|
40f1e391
tangwang
cnclip
|
29
|
open_text_model = True
|
c10f90fe
tangwang
cnclip
|
30
|
open_image_model = True # Enable image embedding when using clip-as-service
|
7bfb9946
tangwang
向量化模块
|
31
32
33
34
35
|
_text_encode_lock = threading.Lock()
_image_encode_lock = threading.Lock()
|
0a3764c4
tangwang
优化embedding模型加载
|
36
37
38
|
@app.on_event("startup")
def load_models():
"""Load models at service startup to avoid first-request latency."""
|
07cf5a93
tangwang
START_EMBEDDING=...
|
39
|
global _text_model, _image_model, _text_backend_name
|
7bfb9946
tangwang
向量化模块
|
40
|
|
0a3764c4
tangwang
优化embedding模型加载
|
41
|
logger.info("Loading embedding models at startup...")
|
7bfb9946
tangwang
向量化模块
|
42
|
|
0a3764c4
tangwang
优化embedding模型加载
|
43
|
# Load text model
|
40f1e391
tangwang
cnclip
|
44
45
|
if open_text_model:
try:
|
07cf5a93
tangwang
START_EMBEDDING=...
|
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
backend_name, backend_cfg = get_embedding_backend_config()
_text_backend_name = backend_name
if backend_name == "tei":
from embeddings.tei_model import TEITextModel
base_url = (
os.getenv("TEI_BASE_URL")
or backend_cfg.get("base_url")
or CONFIG.TEI_BASE_URL
)
timeout_sec = int(
os.getenv("TEI_TIMEOUT_SEC")
or backend_cfg.get("timeout_sec")
or CONFIG.TEI_TIMEOUT_SEC
)
logger.info("Loading text backend: tei (base_url=%s)", base_url)
_text_model = TEITextModel(
base_url=str(base_url),
timeout_sec=timeout_sec,
)
elif backend_name == "local_st":
from embeddings.qwen3_model import Qwen3TextModel
|
950a640e
tangwang
embeddings
|
68
|
|
07cf5a93
tangwang
START_EMBEDDING=...
|
69
70
71
72
73
74
75
76
77
78
79
80
81
|
model_id = (
os.getenv("TEXT_MODEL_ID")
or backend_cfg.get("model_id")
or CONFIG.TEXT_MODEL_ID
)
logger.info("Loading text backend: local_st (model=%s)", model_id)
_text_model = Qwen3TextModel(model_id=str(model_id))
else:
raise ValueError(
f"Unsupported embedding backend: {backend_name}. "
"Supported: tei, local_st"
)
logger.info("Text backend loaded successfully: %s", _text_backend_name)
|
40f1e391
tangwang
cnclip
|
82
83
84
85
|
except Exception as e:
logger.error(f"Failed to load text model: {e}", exc_info=True)
raise
|
0a3764c4
tangwang
优化embedding模型加载
|
86
|
|
c10f90fe
tangwang
cnclip
|
87
|
# Load image model: clip-as-service (recommended) or local CN-CLIP
|
40f1e391
tangwang
cnclip
|
88
89
|
if open_image_model:
try:
|
c10f90fe
tangwang
cnclip
|
90
|
if CONFIG.USE_CLIP_AS_SERVICE:
|
950a640e
tangwang
embeddings
|
91
92
|
from embeddings.clip_as_service_encoder import ClipAsServiceImageEncoder
|
c10f90fe
tangwang
cnclip
|
93
94
95
96
97
98
99
|
logger.info(f"Loading image encoder via clip-as-service: {CONFIG.CLIP_AS_SERVICE_SERVER}")
_image_model = ClipAsServiceImageEncoder(
server=CONFIG.CLIP_AS_SERVICE_SERVER,
batch_size=CONFIG.IMAGE_BATCH_SIZE,
)
logger.info("Image model (clip-as-service) loaded successfully")
else:
|
950a640e
tangwang
embeddings
|
100
101
|
from embeddings.clip_model import ClipImageModel
|
c10f90fe
tangwang
cnclip
|
102
103
104
105
106
107
|
logger.info(f"Loading local image model: {CONFIG.IMAGE_MODEL_NAME} (device: {CONFIG.IMAGE_DEVICE})")
_image_model = ClipImageModel(
model_name=CONFIG.IMAGE_MODEL_NAME,
device=CONFIG.IMAGE_DEVICE,
)
logger.info("Image model (local CN-CLIP) loaded successfully")
|
40f1e391
tangwang
cnclip
|
108
|
except Exception as e:
|
ed948666
tangwang
tidy
|
109
110
|
logger.error("Failed to load image model: %s", e, exc_info=True)
raise
|
0a3764c4
tangwang
优化embedding模型加载
|
111
112
|
logger.info("All embedding models loaded successfully, service ready")
|
7bfb9946
tangwang
向量化模块
|
113
114
|
|
200fdddf
tangwang
embed norm
|
115
116
117
118
119
120
121
122
|
def _normalize_vector(vec: np.ndarray) -> np.ndarray:
norm = float(np.linalg.norm(vec))
if not np.isfinite(norm) or norm <= 0.0:
raise RuntimeError("Embedding vector has invalid norm (must be > 0)")
return vec / norm
def _as_list(embedding: Optional[np.ndarray], normalize: bool = False) -> Optional[List[float]]:
|
7bfb9946
tangwang
向量化模块
|
123
124
125
126
127
128
|
if embedding is None:
return None
if not isinstance(embedding, np.ndarray):
embedding = np.array(embedding, dtype=np.float32)
if embedding.ndim != 1:
embedding = embedding.reshape(-1)
|
200fdddf
tangwang
embed norm
|
129
130
131
132
|
embedding = embedding.astype(np.float32, copy=False)
if normalize:
embedding = _normalize_vector(embedding).astype(np.float32, copy=False)
return embedding.tolist()
|
7bfb9946
tangwang
向量化模块
|
133
134
135
136
|
@app.get("/health")
def health() -> Dict[str, Any]:
|
0a3764c4
tangwang
优化embedding模型加载
|
137
138
139
140
|
"""Health check endpoint. Returns status and model loading state."""
return {
"status": "ok",
"text_model_loaded": _text_model is not None,
|
07cf5a93
tangwang
START_EMBEDDING=...
|
141
|
"text_backend": _text_backend_name,
|
0a3764c4
tangwang
优化embedding模型加载
|
142
143
|
"image_model_loaded": _image_model is not None,
}
|
7bfb9946
tangwang
向量化模块
|
144
145
146
|
@app.post("/embed/text")
|
200fdddf
tangwang
embed norm
|
147
|
def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optional[List[float]]]:
|
0a3764c4
tangwang
优化embedding模型加载
|
148
149
|
if _text_model is None:
raise RuntimeError("Text model not loaded")
|
200fdddf
tangwang
embed norm
|
150
|
effective_normalize = bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS) if normalize is None else bool(normalize)
|
ed948666
tangwang
tidy
|
151
|
normalized: List[str] = []
|
7bfb9946
tangwang
向量化模块
|
152
|
for i, t in enumerate(texts):
|
7bfb9946
tangwang
向量化模块
|
153
|
if not isinstance(t, str):
|
ed948666
tangwang
tidy
|
154
155
156
157
158
159
|
raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: must be string")
s = t.strip()
if not s:
raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string")
normalized.append(s)
|
54ccf28c
tangwang
tei
|
160
161
162
163
164
165
|
try:
with _text_encode_lock:
embs = _text_model.encode_batch(
normalized,
batch_size=int(CONFIG.TEXT_BATCH_SIZE),
device=CONFIG.TEXT_DEVICE,
|
200fdddf
tangwang
embed norm
|
166
|
normalize_embeddings=effective_normalize,
|
54ccf28c
tangwang
tei
|
167
168
169
170
171
172
173
|
)
except Exception as e:
logger.error("Text embedding backend failure: %s", e, exc_info=True)
raise HTTPException(
status_code=502,
detail=f"Text embedding backend failure: {e}",
) from e
|
ed948666
tangwang
tidy
|
174
175
176
177
178
179
180
|
if embs is None or len(embs) != len(normalized):
raise RuntimeError(
f"Text model response length mismatch: expected {len(normalized)}, "
f"got {0 if embs is None else len(embs)}"
)
out: List[Optional[List[float]]] = []
for i, emb in enumerate(embs):
|
200fdddf
tangwang
embed norm
|
181
|
vec = _as_list(emb, normalize=effective_normalize)
|
ed948666
tangwang
tidy
|
182
183
184
|
if vec is None:
raise RuntimeError(f"Text model returned empty embedding for index {i}")
out.append(vec)
|
7bfb9946
tangwang
向量化模块
|
185
186
187
188
|
return out
@app.post("/embed/image")
|
200fdddf
tangwang
embed norm
|
189
|
def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Optional[List[float]]]:
|
0a3764c4
tangwang
优化embedding模型加载
|
190
|
if _image_model is None:
|
ed948666
tangwang
tidy
|
191
|
raise RuntimeError("Image model not loaded")
|
200fdddf
tangwang
embed norm
|
192
|
effective_normalize = bool(CONFIG.IMAGE_NORMALIZE_EMBEDDINGS) if normalize is None else bool(normalize)
|
ed948666
tangwang
tidy
|
193
|
urls: List[str] = []
|
c10f90fe
tangwang
cnclip
|
194
|
for i, url_or_path in enumerate(images):
|
c10f90fe
tangwang
cnclip
|
195
|
if not isinstance(url_or_path, str):
|
ed948666
tangwang
tidy
|
196
197
198
199
200
|
raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: must be string URL/path")
s = url_or_path.strip()
if not s:
raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path")
urls.append(s)
|
c10f90fe
tangwang
cnclip
|
201
|
|
7bfb9946
tangwang
向量化模块
|
202
|
with _image_encode_lock:
|
200fdddf
tangwang
embed norm
|
203
204
205
206
207
|
vectors = _image_model.encode_image_urls(
urls,
batch_size=CONFIG.IMAGE_BATCH_SIZE,
normalize_embeddings=effective_normalize,
)
|
ed948666
tangwang
tidy
|
208
209
210
211
212
213
214
|
if vectors is None or len(vectors) != len(urls):
raise RuntimeError(
f"Image model response length mismatch: expected {len(urls)}, "
f"got {0 if vectors is None else len(vectors)}"
)
out: List[Optional[List[float]]] = []
for i, vec in enumerate(vectors):
|
200fdddf
tangwang
embed norm
|
215
|
out_vec = _as_list(vec, normalize=effective_normalize)
|
ed948666
tangwang
tidy
|
216
217
218
|
if out_vec is None:
raise RuntimeError(f"Image model returned empty embedding for index {i}")
out.append(out_vec)
|
7bfb9946
tangwang
向量化模块
|
219
|
return out
|