Commit 54ccf28cd24af75e464c37fa02c08ddf8721a251

Authored by tangwang
1 parent c7e80cc2

tei

config/config.yaml
@@ -168,7 +168,7 @@ services: @@ -168,7 +168,7 @@ services:
168 http: 168 http:
169 base_url: "http://127.0.0.1:6005" 169 base_url: "http://127.0.0.1:6005"
170 # 服务内文本后端(embedding 进程启动时读取) 170 # 服务内文本后端(embedding 进程启动时读取)
171 - backend: "tei" # tei | local_st 171 + backend: "local_st" # tei | local_st
172 backends: 172 backends:
173 tei: 173 tei:
174 base_url: "http://127.0.0.1:8080" 174 base_url: "http://127.0.0.1:8080"
embeddings/server.py
@@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: @@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]:
146 raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") 146 raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string")
147 normalized.append(s) 147 normalized.append(s)
148 148
149 - with _text_encode_lock:  
150 - embs = _text_model.encode_batch(  
151 - normalized,  
152 - batch_size=int(CONFIG.TEXT_BATCH_SIZE),  
153 - device=CONFIG.TEXT_DEVICE,  
154 - normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),  
155 - ) 149 + try:
  150 + with _text_encode_lock:
  151 + embs = _text_model.encode_batch(
  152 + normalized,
  153 + batch_size=int(CONFIG.TEXT_BATCH_SIZE),
  154 + device=CONFIG.TEXT_DEVICE,
  155 + normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),
  156 + )
  157 + except Exception as e:
  158 + logger.error("Text embedding backend failure: %s", e, exc_info=True)
  159 + raise HTTPException(
  160 + status_code=502,
  161 + detail=f"Text embedding backend failure: {e}",
  162 + ) from e
156 if embs is None or len(embs) != len(normalized): 163 if embs is None or len(embs) != len(normalized):
157 raise RuntimeError( 164 raise RuntimeError(
158 f"Text model response length mismatch: expected {len(normalized)}, " 165 f"Text model response length mismatch: expected {len(normalized)}, "
embeddings/tei_model.py
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 from __future__ import annotations 3 from __future__ import annotations
4 4
5 -from typing import List, Union 5 +from typing import Any, List, Union
6 6
7 import numpy as np 7 import numpy as np
8 import requests 8 import requests
@@ -30,6 +30,15 @@ class TEITextModel: @@ -30,6 +30,15 @@ class TEITextModel:
30 health_url = f"{self.base_url}/health" 30 health_url = f"{self.base_url}/health"
31 response = requests.get(health_url, timeout=5) 31 response = requests.get(health_url, timeout=5)
32 response.raise_for_status() 32 response.raise_for_status()
  33 + # Probe one tiny embedding at startup so runtime requests do not fail later
  34 + # with opaque "Invalid TEI embedding" errors.
  35 + probe_resp = requests.post(
  36 + self.endpoint,
  37 + json={"inputs": ["health check"]},
  38 + timeout=min(self.timeout_sec, 15),
  39 + )
  40 + probe_resp.raise_for_status()
  41 + self._parse_payload(probe_resp.json(), expected_len=1)
33 42
34 @staticmethod 43 @staticmethod
35 def _normalize(embedding: np.ndarray) -> np.ndarray: 44 def _normalize(embedding: np.ndarray) -> np.ndarray:
@@ -77,20 +86,41 @@ class TEITextModel: @@ -77,20 +86,41 @@ class TEITextModel:
77 ) 86 )
78 response.raise_for_status() 87 response.raise_for_status()
79 payload = response.json() 88 payload = response.json()
  89 + vectors = self._parse_payload(payload, expected_len=len(texts))
  90 + if normalize_embeddings:
  91 + vectors = [self._normalize(vec) for vec in vectors]
  92 + return np.array(vectors, dtype=object)
80 93
81 - if not isinstance(payload, list) or len(payload) != len(texts): 94 + def _parse_payload(self, payload: Any, expected_len: int) -> List[np.ndarray]:
  95 + if not isinstance(payload, list) or len(payload) != expected_len:
  96 + got = 0 if payload is None else (len(payload) if isinstance(payload, list) else "non-list")
82 raise RuntimeError( 97 raise RuntimeError(
83 - f"TEI response length mismatch: expected {len(texts)}, "  
84 - f"got {0 if payload is None else len(payload)}" 98 + f"TEI response length mismatch: expected {expected_len}, got {got}. "
  99 + f"Response type={type(payload).__name__}"
85 ) 100 )
86 101
87 vectors: List[np.ndarray] = [] 102 vectors: List[np.ndarray] = []
88 - for i, emb in enumerate(payload):  
89 - vec = np.asarray(emb, dtype=np.float32)  
90 - if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all():  
91 - raise RuntimeError(f"Invalid TEI embedding at index {i}")  
92 - if normalize_embeddings:  
93 - vec = self._normalize(vec) 103 + for i, item in enumerate(payload):
  104 + emb = item.get("embedding") if isinstance(item, dict) else item
  105 + try:
  106 + vec = np.asarray(emb, dtype=np.float32)
  107 + except (TypeError, ValueError) as exc:
  108 + raise RuntimeError(
  109 + f"Invalid TEI embedding at index {i}: cannot convert to float array "
  110 + f"(item_type={type(item).__name__})"
  111 + ) from exc
  112 +
  113 + if vec.ndim != 1 or vec.size == 0:
  114 + raise RuntimeError(
  115 + f"Invalid TEI embedding at index {i}: shape={vec.shape}, size={vec.size}"
  116 + )
  117 + if not np.isfinite(vec).all():
  118 + preview = vec[:8].tolist()
  119 + raise RuntimeError(
  120 + f"Invalid TEI embedding at index {i}: contains non-finite values, "
  121 + f"preview={preview}. This often indicates TEI backend/model runtime issues "
  122 + f"(for example an incompatible dtype or model config)."
  123 + )
94 vectors.append(vec) 124 vectors.append(vec)
95 - return np.array(vectors, dtype=object) 125 + return vectors
96 126