Commit 54ccf28cd24af75e464c37fa02c08ddf8721a251
1 parent
c7e80cc2
tei
Showing
3 changed files
with
56 additions
and
19 deletions
Show diff stats
config/config.yaml
embeddings/server.py
| ... | ... | @@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: |
| 146 | 146 | raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") |
| 147 | 147 | normalized.append(s) |
| 148 | 148 | |
| 149 | - with _text_encode_lock: | |
| 150 | - embs = _text_model.encode_batch( | |
| 151 | - normalized, | |
| 152 | - batch_size=int(CONFIG.TEXT_BATCH_SIZE), | |
| 153 | - device=CONFIG.TEXT_DEVICE, | |
| 154 | - normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS), | |
| 155 | - ) | |
| 149 | + try: | |
| 150 | + with _text_encode_lock: | |
| 151 | + embs = _text_model.encode_batch( | |
| 152 | + normalized, | |
| 153 | + batch_size=int(CONFIG.TEXT_BATCH_SIZE), | |
| 154 | + device=CONFIG.TEXT_DEVICE, | |
| 155 | + normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS), | |
| 156 | + ) | |
| 157 | + except Exception as e: | |
| 158 | + logger.error("Text embedding backend failure: %s", e, exc_info=True) | |
| 159 | + raise HTTPException( | |
| 160 | + status_code=502, | |
| 161 | + detail=f"Text embedding backend failure: {e}", | |
| 162 | + ) from e | |
| 156 | 163 | if embs is None or len(embs) != len(normalized): |
| 157 | 164 | raise RuntimeError( |
| 158 | 165 | f"Text model response length mismatch: expected {len(normalized)}, " | ... | ... |
embeddings/tei_model.py
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | -from typing import List, Union | |
| 5 | +from typing import Any, List, Union | |
| 6 | 6 | |
| 7 | 7 | import numpy as np |
| 8 | 8 | import requests |
| ... | ... | @@ -30,6 +30,15 @@ class TEITextModel: |
| 30 | 30 | health_url = f"{self.base_url}/health" |
| 31 | 31 | response = requests.get(health_url, timeout=5) |
| 32 | 32 | response.raise_for_status() |
| 33 | + # Probe one tiny embedding at startup so runtime requests do not fail later | |
| 34 | + # with opaque "Invalid TEI embedding" errors. | |
| 35 | + probe_resp = requests.post( | |
| 36 | + self.endpoint, | |
| 37 | + json={"inputs": ["health check"]}, | |
| 38 | + timeout=min(self.timeout_sec, 15), | |
| 39 | + ) | |
| 40 | + probe_resp.raise_for_status() | |
| 41 | + self._parse_payload(probe_resp.json(), expected_len=1) | |
| 33 | 42 | |
| 34 | 43 | @staticmethod |
| 35 | 44 | def _normalize(embedding: np.ndarray) -> np.ndarray: |
| ... | ... | @@ -77,20 +86,41 @@ class TEITextModel: |
| 77 | 86 | ) |
| 78 | 87 | response.raise_for_status() |
| 79 | 88 | payload = response.json() |
| 89 | + vectors = self._parse_payload(payload, expected_len=len(texts)) | |
| 90 | + if normalize_embeddings: | |
| 91 | + vectors = [self._normalize(vec) for vec in vectors] | |
| 92 | + return np.array(vectors, dtype=object) | |
| 80 | 93 | |
| 81 | - if not isinstance(payload, list) or len(payload) != len(texts): | |
| 94 | + def _parse_payload(self, payload: Any, expected_len: int) -> List[np.ndarray]: | |
| 95 | + if not isinstance(payload, list) or len(payload) != expected_len: | |
| 96 | + got = 0 if payload is None else (len(payload) if isinstance(payload, list) else "non-list") | |
| 82 | 97 | raise RuntimeError( |
| 83 | - f"TEI response length mismatch: expected {len(texts)}, " | |
| 84 | - f"got {0 if payload is None else len(payload)}" | |
| 98 | + f"TEI response length mismatch: expected {expected_len}, got {got}. " | |
| 99 | + f"Response type={type(payload).__name__}" | |
| 85 | 100 | ) |
| 86 | 101 | |
| 87 | 102 | vectors: List[np.ndarray] = [] |
| 88 | - for i, emb in enumerate(payload): | |
| 89 | - vec = np.asarray(emb, dtype=np.float32) | |
| 90 | - if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all(): | |
| 91 | - raise RuntimeError(f"Invalid TEI embedding at index {i}") | |
| 92 | - if normalize_embeddings: | |
| 93 | - vec = self._normalize(vec) | |
| 103 | + for i, item in enumerate(payload): | |
| 104 | + emb = item.get("embedding") if isinstance(item, dict) else item | |
| 105 | + try: | |
| 106 | + vec = np.asarray(emb, dtype=np.float32) | |
| 107 | + except (TypeError, ValueError) as exc: | |
| 108 | + raise RuntimeError( | |
| 109 | + f"Invalid TEI embedding at index {i}: cannot convert to float array " | |
| 110 | + f"(item_type={type(item).__name__})" | |
| 111 | + ) from exc | |
| 112 | + | |
| 113 | + if vec.ndim != 1 or vec.size == 0: | |
| 114 | + raise RuntimeError( | |
| 115 | + f"Invalid TEI embedding at index {i}: shape={vec.shape}, size={vec.size}" | |
| 116 | + ) | |
| 117 | + if not np.isfinite(vec).all(): | |
| 118 | + preview = vec[:8].tolist() | |
| 119 | + raise RuntimeError( | |
| 120 | + f"Invalid TEI embedding at index {i}: contains non-finite values, " | |
| 121 | + f"preview={preview}. This often indicates TEI backend/model runtime issues " | |
| 122 | + f"(for example an incompatible dtype or model config)." | |
| 123 | + ) | |
| 94 | 124 | vectors.append(vec) |
| 95 | - return np.array(vectors, dtype=object) | |
| 125 | + return vectors | |
| 96 | 126 | ... | ... |