7bfb9946
tangwang
向量化模块
|
1
2
3
4
5
6
7
8
|
"""
Embedding service (FastAPI).
API (simple list-in, list-out; aligned by index; failures -> null):
- POST /embed/text body: ["text1", "text2", ...] -> [[...], null, ...]
- POST /embed/image body: ["url_or_path1", ...] -> [[...], null, ...]
"""
|
0a3764c4
tangwang
优化embedding模型加载
|
9
|
import logging
|
7bfb9946
tangwang
向量化模块
|
10
11
12
13
14
15
16
|
import threading
from typing import Any, Dict, List, Optional
import numpy as np
from fastapi import FastAPI
from embeddings.config import CONFIG
|
c10f90fe
tangwang
cnclip
|
17
|
from embeddings.protocols import ImageEncoderProtocol
|
7bfb9946
tangwang
向量化模块
|
18
|
|
0a3764c4
tangwang
优化embedding模型加载
|
19
|
logger = logging.getLogger(__name__)
|
7bfb9946
tangwang
向量化模块
|
20
|
|
a7920e17
tangwang
项目名称和部署路径修改
|
21
|
app = FastAPI(title="saas-search Embedding Service", version="1.0.0")
|
7bfb9946
tangwang
向量化模块
|
22
|
|
0a3764c4
tangwang
优化embedding模型加载
|
23
|
# Models are loaded at startup, not lazily
|
950a640e
tangwang
embeddings
|
24
|
_text_model: Optional[Any] = None
|
c10f90fe
tangwang
cnclip
|
25
|
_image_model: Optional[ImageEncoderProtocol] = None
|
40f1e391
tangwang
cnclip
|
26
|
open_text_model = True
|
c10f90fe
tangwang
cnclip
|
27
|
open_image_model = True # Enable image embedding when using clip-as-service
|
7bfb9946
tangwang
向量化模块
|
28
29
30
31
32
|
_text_encode_lock = threading.Lock()
_image_encode_lock = threading.Lock()
|
0a3764c4
tangwang
优化embedding模型加载
|
33
34
35
36
|
@app.on_event("startup")
def load_models():
"""Load models at service startup to avoid first-request latency."""
global _text_model, _image_model
|
7bfb9946
tangwang
向量化模块
|
37
|
|
0a3764c4
tangwang
优化embedding模型加载
|
38
|
logger.info("Loading embedding models at startup...")
|
7bfb9946
tangwang
向量化模块
|
39
|
|
0a3764c4
tangwang
优化embedding模型加载
|
40
|
# Load text model
|
40f1e391
tangwang
cnclip
|
41
42
|
if open_text_model:
try:
|
950a640e
tangwang
embeddings
|
43
44
45
46
|
from embeddings.qwen3_model import Qwen3TextModel
logger.info(f"Loading text model: {CONFIG.TEXT_MODEL_ID}")
_text_model = Qwen3TextModel(model_id=CONFIG.TEXT_MODEL_ID)
|
40f1e391
tangwang
cnclip
|
47
48
49
50
51
|
logger.info("Text model loaded successfully")
except Exception as e:
logger.error(f"Failed to load text model: {e}", exc_info=True)
raise
|
0a3764c4
tangwang
优化embedding模型加载
|
52
|
|
c10f90fe
tangwang
cnclip
|
53
|
# Load image model: clip-as-service (recommended) or local CN-CLIP
|
cc11ae04
tangwang
cnclip
|
54
55
56
|
# IMPORTANT: failures here should NOT prevent the whole service from starting.
# If image model cannot be loaded, we keep `_image_model` as None and only
# disable /embed/image while keeping /embed/text fully functional.
|
40f1e391
tangwang
cnclip
|
57
58
|
if open_image_model:
try:
|
c10f90fe
tangwang
cnclip
|
59
|
if CONFIG.USE_CLIP_AS_SERVICE:
|
950a640e
tangwang
embeddings
|
60
61
|
from embeddings.clip_as_service_encoder import ClipAsServiceImageEncoder
|
c10f90fe
tangwang
cnclip
|
62
63
64
65
66
67
68
|
logger.info(f"Loading image encoder via clip-as-service: {CONFIG.CLIP_AS_SERVICE_SERVER}")
_image_model = ClipAsServiceImageEncoder(
server=CONFIG.CLIP_AS_SERVICE_SERVER,
batch_size=CONFIG.IMAGE_BATCH_SIZE,
)
logger.info("Image model (clip-as-service) loaded successfully")
else:
|
950a640e
tangwang
embeddings
|
69
70
|
from embeddings.clip_model import ClipImageModel
|
c10f90fe
tangwang
cnclip
|
71
72
73
74
75
76
|
logger.info(f"Loading local image model: {CONFIG.IMAGE_MODEL_NAME} (device: {CONFIG.IMAGE_DEVICE})")
_image_model = ClipImageModel(
model_name=CONFIG.IMAGE_MODEL_NAME,
device=CONFIG.IMAGE_DEVICE,
)
logger.info("Image model (local CN-CLIP) loaded successfully")
|
40f1e391
tangwang
cnclip
|
77
|
except Exception as e:
|
cc11ae04
tangwang
cnclip
|
78
79
80
81
82
83
|
logger.error(
"Failed to load image model; image embeddings will be disabled but text embeddings remain available: %s",
e,
exc_info=True,
)
_image_model = None
|
0a3764c4
tangwang
优化embedding模型加载
|
84
85
|
logger.info("All embedding models loaded successfully, service ready")
|
7bfb9946
tangwang
向量化模块
|
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
def _as_list(embedding: Optional[np.ndarray]) -> Optional[List[float]]:
if embedding is None:
return None
if not isinstance(embedding, np.ndarray):
embedding = np.array(embedding, dtype=np.float32)
if embedding.ndim != 1:
embedding = embedding.reshape(-1)
return embedding.astype(np.float32).tolist()
@app.get("/health")
def health() -> Dict[str, Any]:
|
0a3764c4
tangwang
优化embedding模型加载
|
100
101
102
103
104
105
|
"""Health check endpoint. Returns status and model loading state."""
return {
"status": "ok",
"text_model_loaded": _text_model is not None,
"image_model_loaded": _image_model is not None,
}
|
7bfb9946
tangwang
向量化模块
|
106
107
108
109
|
@app.post("/embed/text")
def embed_text(texts: List[str]) -> List[Optional[List[float]]]:
|
0a3764c4
tangwang
优化embedding模型加载
|
110
111
|
if _text_model is None:
raise RuntimeError("Text model not loaded")
|
7bfb9946
tangwang
向量化模块
|
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
out: List[Optional[List[float]]] = [None] * len(texts)
indexed_texts: List[tuple] = []
for i, t in enumerate(texts):
if t is None:
continue
if not isinstance(t, str):
t = str(t)
t = t.strip()
if not t:
continue
indexed_texts.append((i, t))
if not indexed_texts:
return out
batch_texts = [t for _, t in indexed_texts]
try:
with _text_encode_lock:
|
0a3764c4
tangwang
优化embedding模型加载
|
131
|
embs = _text_model.encode_batch(
|
950a640e
tangwang
embeddings
|
132
133
134
135
|
batch_texts,
batch_size=int(CONFIG.TEXT_BATCH_SIZE),
device=CONFIG.TEXT_DEVICE,
normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),
|
7bfb9946
tangwang
向量化模块
|
136
137
138
139
140
141
142
143
144
145
146
|
)
for j, (idx, _t) in enumerate(indexed_texts):
out[idx] = _as_list(embs[j])
except Exception:
# keep Nones
pass
return out
@app.post("/embed/image")
def embed_image(images: List[str]) -> List[Optional[List[float]]]:
|
0a3764c4
tangwang
优化embedding模型加载
|
147
|
if _image_model is None:
|
cc11ae04
tangwang
cnclip
|
148
149
150
|
# Graceful degradation: keep API shape but return all None
logger.warning("embed_image called but image model is not loaded; returning all None vectors")
return [None] * len(images)
|
7bfb9946
tangwang
向量化模块
|
151
152
|
out: List[Optional[List[float]]] = [None] * len(images)
|
c10f90fe
tangwang
cnclip
|
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
# Normalize inputs
urls = []
indices = []
for i, url_or_path in enumerate(images):
if url_or_path is None:
continue
if not isinstance(url_or_path, str):
url_or_path = str(url_or_path)
url_or_path = url_or_path.strip()
if url_or_path:
urls.append(url_or_path)
indices.append(i)
if not urls:
return out
|
7bfb9946
tangwang
向量化模块
|
169
|
with _image_encode_lock:
|
c10f90fe
tangwang
cnclip
|
170
171
172
173
174
175
176
177
|
try:
# Both ClipAsServiceImageEncoder and ClipImageModel implement encode_image_urls(urls, batch_size)
vectors = _image_model.encode_image_urls(urls, batch_size=CONFIG.IMAGE_BATCH_SIZE)
for j, idx in enumerate(indices):
out[idx] = _as_list(vectors[j] if j < len(vectors) else None)
except Exception:
for idx in indices:
out[idx] = None
|
7bfb9946
tangwang
向量化模块
|
178
|
return out
|