Commit 54ccf28cd24af75e464c37fa02c08ddf8721a251

Authored by tangwang
1 parent c7e80cc2

tei

config/config.yaml
... ... @@ -168,7 +168,7 @@ services:
168 168 http:
169 169 base_url: "http://127.0.0.1:6005"
170 170 # 服务内文本后端(embedding 进程启动时读取)
171   - backend: "tei" # tei | local_st
  171 + backend: "local_st" # tei | local_st
172 172 backends:
173 173 tei:
174 174 base_url: "http://127.0.0.1:8080"
... ...
embeddings/server.py
... ... @@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]:
146 146 raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string")
147 147 normalized.append(s)
148 148  
149   - with _text_encode_lock:
150   - embs = _text_model.encode_batch(
151   - normalized,
152   - batch_size=int(CONFIG.TEXT_BATCH_SIZE),
153   - device=CONFIG.TEXT_DEVICE,
154   - normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),
155   - )
  149 + try:
  150 + with _text_encode_lock:
  151 + embs = _text_model.encode_batch(
  152 + normalized,
  153 + batch_size=int(CONFIG.TEXT_BATCH_SIZE),
  154 + device=CONFIG.TEXT_DEVICE,
  155 + normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS),
  156 + )
  157 + except Exception as e:
  158 + logger.error("Text embedding backend failure: %s", e, exc_info=True)
  159 + raise HTTPException(
  160 + status_code=502,
  161 + detail=f"Text embedding backend failure: {e}",
  162 + ) from e
156 163 if embs is None or len(embs) != len(normalized):
157 164 raise RuntimeError(
158 165 f"Text model response length mismatch: expected {len(normalized)}, "
... ...
embeddings/tei_model.py
... ... @@ -2,7 +2,7 @@
2 2  
3 3 from __future__ import annotations
4 4  
5   -from typing import List, Union
  5 +from typing import Any, List, Union
6 6  
7 7 import numpy as np
8 8 import requests
... ... @@ -30,6 +30,15 @@ class TEITextModel:
30 30 health_url = f"{self.base_url}/health"
31 31 response = requests.get(health_url, timeout=5)
32 32 response.raise_for_status()
  33 + # Probe one tiny embedding at startup so runtime requests do not fail later
  34 + # with opaque "Invalid TEI embedding" errors.
  35 + probe_resp = requests.post(
  36 + self.endpoint,
  37 + json={"inputs": ["health check"]},
  38 + timeout=min(self.timeout_sec, 15),
  39 + )
  40 + probe_resp.raise_for_status()
  41 + self._parse_payload(probe_resp.json(), expected_len=1)
33 42  
34 43 @staticmethod
35 44 def _normalize(embedding: np.ndarray) -> np.ndarray:
... ... @@ -77,20 +86,41 @@ class TEITextModel:
77 86 )
78 87 response.raise_for_status()
79 88 payload = response.json()
  89 + vectors = self._parse_payload(payload, expected_len=len(texts))
  90 + if normalize_embeddings:
  91 + vectors = [self._normalize(vec) for vec in vectors]
  92 + return np.array(vectors, dtype=object)
80 93  
81   - if not isinstance(payload, list) or len(payload) != len(texts):
  94 + def _parse_payload(self, payload: Any, expected_len: int) -> List[np.ndarray]:
  95 + if not isinstance(payload, list) or len(payload) != expected_len:
  96 + got = 0 if payload is None else (len(payload) if isinstance(payload, list) else "non-list")
82 97 raise RuntimeError(
83   - f"TEI response length mismatch: expected {len(texts)}, "
84   - f"got {0 if payload is None else len(payload)}"
  98 + f"TEI response length mismatch: expected {expected_len}, got {got}. "
  99 + f"Response type={type(payload).__name__}"
85 100 )
86 101  
87 102 vectors: List[np.ndarray] = []
88   - for i, emb in enumerate(payload):
89   - vec = np.asarray(emb, dtype=np.float32)
90   - if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all():
91   - raise RuntimeError(f"Invalid TEI embedding at index {i}")
92   - if normalize_embeddings:
93   - vec = self._normalize(vec)
  103 + for i, item in enumerate(payload):
  104 + emb = item.get("embedding") if isinstance(item, dict) else item
  105 + try:
  106 + vec = np.asarray(emb, dtype=np.float32)
  107 + except (TypeError, ValueError) as exc:
  108 + raise RuntimeError(
  109 + f"Invalid TEI embedding at index {i}: cannot convert to float array "
  110 + f"(item_type={type(item).__name__})"
  111 + ) from exc
  112 +
  113 + if vec.ndim != 1 or vec.size == 0:
  114 + raise RuntimeError(
  115 + f"Invalid TEI embedding at index {i}: shape={vec.shape}, size={vec.size}"
  116 + )
  117 + if not np.isfinite(vec).all():
  118 + preview = vec[:8].tolist()
  119 + raise RuntimeError(
  120 + f"Invalid TEI embedding at index {i}: contains non-finite values, "
  121 + f"preview={preview}. This often indicates TEI backend/model runtime issues "
  122 + f"(for example an incompatible dtype or model config)."
  123 + )
94 124 vectors.append(vec)
95   - return np.array(vectors, dtype=object)
  125 + return vectors
96 126  
... ...