Blame view

embeddings/server.py 6.03 KB
7bfb9946   tangwang   向量化模块
1
2
3
4
5
6
7
8
  """
  Embedding service (FastAPI).
  
  API (simple list-in, list-out; aligned by index; failures -> null):
  - POST /embed/text   body: ["text1", "text2", ...] -> [[...], null, ...]
  - POST /embed/image  body: ["url_or_path1", ...]  -> [[...], null, ...]
  """
  
0a3764c4   tangwang   优化embedding模型加载
9
  import logging
7bfb9946   tangwang   向量化模块
10
11
12
13
14
15
16
  import threading
  from typing import Any, Dict, List, Optional
  
  import numpy as np
  from fastapi import FastAPI
  
  from embeddings.config import CONFIG
c10f90fe   tangwang   cnclip
17
  from embeddings.protocols import ImageEncoderProtocol
7bfb9946   tangwang   向量化模块
18
  
0a3764c4   tangwang   优化embedding模型加载
19
  logger = logging.getLogger(__name__)
7bfb9946   tangwang   向量化模块
20
  
a7920e17   tangwang   项目名称和部署路径修改
21
  app = FastAPI(title="saas-search Embedding Service", version="1.0.0")
7bfb9946   tangwang   向量化模块
22
  
0a3764c4   tangwang   优化embedding模型加载
23
  # Models are loaded at startup, not lazily
950a640e   tangwang   embeddings
24
  _text_model: Optional[Any] = None
c10f90fe   tangwang   cnclip
25
  _image_model: Optional[ImageEncoderProtocol] = None
40f1e391   tangwang   cnclip
26
  open_text_model = True
c10f90fe   tangwang   cnclip
27
  open_image_model = True  # Enable image embedding when using clip-as-service
7bfb9946   tangwang   向量化模块
28
29
30
31
32
  
  _text_encode_lock = threading.Lock()
  _image_encode_lock = threading.Lock()
  
  
0a3764c4   tangwang   优化embedding模型加载
33
34
35
36
  @app.on_event("startup")
  def load_models():
      """Load models at service startup to avoid first-request latency."""
      global _text_model, _image_model
7bfb9946   tangwang   向量化模块
37
  
0a3764c4   tangwang   优化embedding模型加载
38
      logger.info("Loading embedding models at startup...")
7bfb9946   tangwang   向量化模块
39
  
0a3764c4   tangwang   优化embedding模型加载
40
      # Load text model
40f1e391   tangwang   cnclip
41
42
      if open_text_model:
          try:
950a640e   tangwang   embeddings
43
44
45
46
              from embeddings.qwen3_model import Qwen3TextModel
  
              logger.info(f"Loading text model: {CONFIG.TEXT_MODEL_ID}")
              _text_model = Qwen3TextModel(model_id=CONFIG.TEXT_MODEL_ID)
40f1e391   tangwang   cnclip
47
48
49
50
51
              logger.info("Text model loaded successfully")
          except Exception as e:
              logger.error(f"Failed to load text model: {e}", exc_info=True)
              raise
      
0a3764c4   tangwang   优化embedding模型加载
52
  
c10f90fe   tangwang   cnclip
53
      # Load image model: clip-as-service (recommended) or local CN-CLIP
cc11ae04   tangwang   cnclip
54
55
56
      # IMPORTANT: failures here should NOT prevent the whole service from starting.
      # If image model cannot be loaded, we keep `_image_model` as None and only
      # disable /embed/image while keeping /embed/text fully functional.
40f1e391   tangwang   cnclip
57
58
      if open_image_model:
          try:
c10f90fe   tangwang   cnclip
59
              if CONFIG.USE_CLIP_AS_SERVICE:
950a640e   tangwang   embeddings
60
61
                  from embeddings.clip_as_service_encoder import ClipAsServiceImageEncoder
  
c10f90fe   tangwang   cnclip
62
63
64
65
66
67
68
                  logger.info(f"Loading image encoder via clip-as-service: {CONFIG.CLIP_AS_SERVICE_SERVER}")
                  _image_model = ClipAsServiceImageEncoder(
                      server=CONFIG.CLIP_AS_SERVICE_SERVER,
                      batch_size=CONFIG.IMAGE_BATCH_SIZE,
                  )
                  logger.info("Image model (clip-as-service) loaded successfully")
              else:
950a640e   tangwang   embeddings
69
70
                  from embeddings.clip_model import ClipImageModel
  
c10f90fe   tangwang   cnclip
71
72
73
74
75
76
                  logger.info(f"Loading local image model: {CONFIG.IMAGE_MODEL_NAME} (device: {CONFIG.IMAGE_DEVICE})")
                  _image_model = ClipImageModel(
                      model_name=CONFIG.IMAGE_MODEL_NAME,
                      device=CONFIG.IMAGE_DEVICE,
                  )
                  logger.info("Image model (local CN-CLIP) loaded successfully")
40f1e391   tangwang   cnclip
77
          except Exception as e:
cc11ae04   tangwang   cnclip
78
79
80
81
82
83
              logger.error(
                  "Failed to load image model; image embeddings will be disabled but text embeddings remain available: %s",
                  e,
                  exc_info=True,
              )
              _image_model = None
0a3764c4   tangwang   优化embedding模型加载
84
85
  
      logger.info("All embedding models loaded successfully, service ready")
7bfb9946   tangwang   向量化模块
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  
  
  def _as_list(embedding: Optional[np.ndarray]) -> Optional[List[float]]:
      if embedding is None:
          return None
      if not isinstance(embedding, np.ndarray):
          embedding = np.array(embedding, dtype=np.float32)
      if embedding.ndim != 1:
          embedding = embedding.reshape(-1)
      return embedding.astype(np.float32).tolist()
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
0a3764c4   tangwang   优化embedding模型加载
100
101
102
103
104
105
      """Health check endpoint. Returns status and model loading state."""
      return {
          "status": "ok",
          "text_model_loaded": _text_model is not None,
          "image_model_loaded": _image_model is not None,
      }
7bfb9946   tangwang   向量化模块
106
107
108
109
  
  
  @app.post("/embed/text")
  def embed_text(texts: List[str]) -> List[Optional[List[float]]]:
0a3764c4   tangwang   优化embedding模型加载
110
111
      if _text_model is None:
          raise RuntimeError("Text model not loaded")
7bfb9946   tangwang   向量化模块
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
      out: List[Optional[List[float]]] = [None] * len(texts)
  
      indexed_texts: List[tuple] = []
      for i, t in enumerate(texts):
          if t is None:
              continue
          if not isinstance(t, str):
              t = str(t)
          t = t.strip()
          if not t:
              continue
          indexed_texts.append((i, t))
  
      if not indexed_texts:
          return out
  
      batch_texts = [t for _, t in indexed_texts]
      try:
          with _text_encode_lock:
0a3764c4   tangwang   优化embedding模型加载
131
              embs = _text_model.encode_batch(
950a640e   tangwang   embeddings
132
133
134
135
                  batch_texts,
                  batch_size=int(CONFIG.TEXT_BATCH_SIZE),
                  device=CONFIG.TEXT_DEVICE,
                  normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),
7bfb9946   tangwang   向量化模块
136
137
138
139
140
141
142
143
144
145
146
              )
          for j, (idx, _t) in enumerate(indexed_texts):
              out[idx] = _as_list(embs[j])
      except Exception:
          # keep Nones
          pass
      return out
  
  
  @app.post("/embed/image")
  def embed_image(images: List[str]) -> List[Optional[List[float]]]:
0a3764c4   tangwang   优化embedding模型加载
147
      if _image_model is None:
cc11ae04   tangwang   cnclip
148
149
150
          # Graceful degradation: keep API shape but return all None
          logger.warning("embed_image called but image model is not loaded; returning all None vectors")
          return [None] * len(images)
7bfb9946   tangwang   向量化模块
151
152
      out: List[Optional[List[float]]] = [None] * len(images)
  
c10f90fe   tangwang   cnclip
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
      # Normalize inputs
      urls = []
      indices = []
      for i, url_or_path in enumerate(images):
          if url_or_path is None:
              continue
          if not isinstance(url_or_path, str):
              url_or_path = str(url_or_path)
          url_or_path = url_or_path.strip()
          if url_or_path:
              urls.append(url_or_path)
              indices.append(i)
  
      if not urls:
          return out
  
7bfb9946   tangwang   向量化模块
169
      with _image_encode_lock:
c10f90fe   tangwang   cnclip
170
171
172
173
174
175
176
177
          try:
              # Both ClipAsServiceImageEncoder and ClipImageModel implement encode_image_urls(urls, batch_size)
              vectors = _image_model.encode_image_urls(urls, batch_size=CONFIG.IMAGE_BATCH_SIZE)
              for j, idx in enumerate(indices):
                  out[idx] = _as_list(vectors[j] if j < len(vectors) else None)
          except Exception:
              for idx in indices:
                  out[idx] = None
7bfb9946   tangwang   向量化模块
178
      return out