Blame view

embeddings/server.py 5.78 KB
7bfb9946   tangwang   向量化模块
1
2
3
  """
  Embedding service (FastAPI).
  
ed948666   tangwang   tidy
4
5
6
  API (simple list-in, list-out; aligned by index):
  - POST /embed/text   body: ["text1", "text2", ...] -> [[...], ...]
  - POST /embed/image  body: ["url_or_path1", ...]  -> [[...], ...]
7bfb9946   tangwang   向量化模块
7
8
  """
  
0a3764c4   tangwang   优化embedding模型加载
9
  import logging
7bfb9946   tangwang   向量化模块
10
11
12
13
  import threading
  from typing import Any, Dict, List, Optional
  
  import numpy as np
ed948666   tangwang   tidy
14
  from fastapi import FastAPI, HTTPException
7bfb9946   tangwang   向量化模块
15
16
  
  from embeddings.config import CONFIG
c10f90fe   tangwang   cnclip
17
  from embeddings.protocols import ImageEncoderProtocol
7bfb9946   tangwang   向量化模块
18
  
0a3764c4   tangwang   优化embedding模型加载
19
  logger = logging.getLogger(__name__)
7bfb9946   tangwang   向量化模块
20
  
a7920e17   tangwang   项目名称和部署路径修改
21
  app = FastAPI(title="saas-search Embedding Service", version="1.0.0")
7bfb9946   tangwang   向量化模块
22
  
0a3764c4   tangwang   优化embedding模型加载
23
  # Models are loaded at startup, not lazily
950a640e   tangwang   embeddings
24
  _text_model: Optional[Any] = None
c10f90fe   tangwang   cnclip
25
  _image_model: Optional[ImageEncoderProtocol] = None
40f1e391   tangwang   cnclip
26
  open_text_model = True
c10f90fe   tangwang   cnclip
27
  open_image_model = True  # Enable image embedding when using clip-as-service
7bfb9946   tangwang   向量化模块
28
29
30
31
32
  
  _text_encode_lock = threading.Lock()
  _image_encode_lock = threading.Lock()
  
  
0a3764c4   tangwang   优化embedding模型加载
33
34
35
36
  @app.on_event("startup")
  def load_models():
      """Load models at service startup to avoid first-request latency."""
      global _text_model, _image_model
7bfb9946   tangwang   向量化模块
37
  
0a3764c4   tangwang   优化embedding模型加载
38
      logger.info("Loading embedding models at startup...")
7bfb9946   tangwang   向量化模块
39
  
0a3764c4   tangwang   优化embedding模型加载
40
      # Load text model
40f1e391   tangwang   cnclip
41
42
      if open_text_model:
          try:
950a640e   tangwang   embeddings
43
44
45
46
              from embeddings.qwen3_model import Qwen3TextModel
  
              logger.info(f"Loading text model: {CONFIG.TEXT_MODEL_ID}")
              _text_model = Qwen3TextModel(model_id=CONFIG.TEXT_MODEL_ID)
40f1e391   tangwang   cnclip
47
48
49
50
51
              logger.info("Text model loaded successfully")
          except Exception as e:
              logger.error(f"Failed to load text model: {e}", exc_info=True)
              raise
      
0a3764c4   tangwang   优化embedding模型加载
52
  
c10f90fe   tangwang   cnclip
53
      # Load image model: clip-as-service (recommended) or local CN-CLIP
40f1e391   tangwang   cnclip
54
55
      if open_image_model:
          try:
c10f90fe   tangwang   cnclip
56
              if CONFIG.USE_CLIP_AS_SERVICE:
950a640e   tangwang   embeddings
57
58
                  from embeddings.clip_as_service_encoder import ClipAsServiceImageEncoder
  
c10f90fe   tangwang   cnclip
59
60
61
62
63
64
65
                  logger.info(f"Loading image encoder via clip-as-service: {CONFIG.CLIP_AS_SERVICE_SERVER}")
                  _image_model = ClipAsServiceImageEncoder(
                      server=CONFIG.CLIP_AS_SERVICE_SERVER,
                      batch_size=CONFIG.IMAGE_BATCH_SIZE,
                  )
                  logger.info("Image model (clip-as-service) loaded successfully")
              else:
950a640e   tangwang   embeddings
66
67
                  from embeddings.clip_model import ClipImageModel
  
c10f90fe   tangwang   cnclip
68
69
70
71
72
73
                  logger.info(f"Loading local image model: {CONFIG.IMAGE_MODEL_NAME} (device: {CONFIG.IMAGE_DEVICE})")
                  _image_model = ClipImageModel(
                      model_name=CONFIG.IMAGE_MODEL_NAME,
                      device=CONFIG.IMAGE_DEVICE,
                  )
                  logger.info("Image model (local CN-CLIP) loaded successfully")
40f1e391   tangwang   cnclip
74
          except Exception as e:
ed948666   tangwang   tidy
75
76
              logger.error("Failed to load image model: %s", e, exc_info=True)
              raise
0a3764c4   tangwang   优化embedding模型加载
77
78
  
      logger.info("All embedding models loaded successfully, service ready")
7bfb9946   tangwang   向量化模块
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  
  
  def _as_list(embedding: Optional[np.ndarray]) -> Optional[List[float]]:
      if embedding is None:
          return None
      if not isinstance(embedding, np.ndarray):
          embedding = np.array(embedding, dtype=np.float32)
      if embedding.ndim != 1:
          embedding = embedding.reshape(-1)
      return embedding.astype(np.float32).tolist()
  
  
  @app.get("/health")
  def health() -> Dict[str, Any]:
0a3764c4   tangwang   优化embedding模型加载
93
94
95
96
97
98
      """Health check endpoint. Returns status and model loading state."""
      return {
          "status": "ok",
          "text_model_loaded": _text_model is not None,
          "image_model_loaded": _image_model is not None,
      }
7bfb9946   tangwang   向量化模块
99
100
101
102
  
  
  @app.post("/embed/text")
  def embed_text(texts: List[str]) -> List[Optional[List[float]]]:
0a3764c4   tangwang   优化embedding模型加载
103
104
      if _text_model is None:
          raise RuntimeError("Text model not loaded")
ed948666   tangwang   tidy
105
      normalized: List[str] = []
7bfb9946   tangwang   向量化模块
106
      for i, t in enumerate(texts):
7bfb9946   tangwang   向量化模块
107
          if not isinstance(t, str):
ed948666   tangwang   tidy
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
              raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: must be string")
          s = t.strip()
          if not s:
              raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string")
          normalized.append(s)
  
      with _text_encode_lock:
          embs = _text_model.encode_batch(
              normalized,
              batch_size=int(CONFIG.TEXT_BATCH_SIZE),
              device=CONFIG.TEXT_DEVICE,
              normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),
          )
      if embs is None or len(embs) != len(normalized):
          raise RuntimeError(
              f"Text model response length mismatch: expected {len(normalized)}, "
              f"got {0 if embs is None else len(embs)}"
          )
      out: List[Optional[List[float]]] = []
      for i, emb in enumerate(embs):
          vec = _as_list(emb)
          if vec is None:
              raise RuntimeError(f"Text model returned empty embedding for index {i}")
          out.append(vec)
7bfb9946   tangwang   向量化模块
132
133
134
135
136
      return out
  
  
  @app.post("/embed/image")
  def embed_image(images: List[str]) -> List[Optional[List[float]]]:
0a3764c4   tangwang   优化embedding模型加载
137
      if _image_model is None:
ed948666   tangwang   tidy
138
139
          raise RuntimeError("Image model not loaded")
      urls: List[str] = []
c10f90fe   tangwang   cnclip
140
      for i, url_or_path in enumerate(images):
c10f90fe   tangwang   cnclip
141
          if not isinstance(url_or_path, str):
ed948666   tangwang   tidy
142
143
144
145
146
              raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: must be string URL/path")
          s = url_or_path.strip()
          if not s:
              raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path")
          urls.append(s)
c10f90fe   tangwang   cnclip
147
  
7bfb9946   tangwang   向量化模块
148
      with _image_encode_lock:
ed948666   tangwang   tidy
149
150
151
152
153
154
155
156
157
158
159
160
          vectors = _image_model.encode_image_urls(urls, batch_size=CONFIG.IMAGE_BATCH_SIZE)
      if vectors is None or len(vectors) != len(urls):
          raise RuntimeError(
              f"Image model response length mismatch: expected {len(urls)}, "
              f"got {0 if vectors is None else len(vectors)}"
          )
      out: List[Optional[List[float]]] = []
      for i, vec in enumerate(vectors):
          out_vec = _as_list(vec)
          if out_vec is None:
              raise RuntimeError(f"Image model returned empty embedding for index {i}")
          out.append(out_vec)
7bfb9946   tangwang   向量化模块
161
      return out