"""TEI text embedding backend client.""" from __future__ import annotations from typing import List, Union import numpy as np import requests class TEITextModel: """ Text embedding backend implemented via Hugging Face TEI HTTP API. Expected TEI endpoint: POST {base_url}/embed body: {"inputs": ["text1", "text2", ...]} response: [[...], [...], ...] """ def __init__(self, base_url: str, timeout_sec: int = 60): if not base_url or not str(base_url).strip(): raise ValueError("TEI base_url must not be empty") self.base_url = str(base_url).rstrip("/") self.endpoint = f"{self.base_url}/embed" self.timeout_sec = int(timeout_sec) self._health_check() def _health_check(self) -> None: health_url = f"{self.base_url}/health" response = requests.get(health_url, timeout=5) response.raise_for_status() @staticmethod def _normalize(embedding: np.ndarray) -> np.ndarray: norm = np.linalg.norm(embedding) if norm <= 0: raise RuntimeError("TEI returned zero-norm embedding") return embedding / norm def encode( self, sentences: Union[str, List[str]], normalize_embeddings: bool = True, device: str = "cuda", batch_size: int = 32, ) -> np.ndarray: if isinstance(sentences, str): sentences = [sentences] return self.encode_batch( texts=sentences, batch_size=batch_size, device=device, normalize_embeddings=normalize_embeddings, ) def encode_batch( self, texts: List[str], batch_size: int = 32, device: str = "cuda", normalize_embeddings: bool = True, ) -> np.ndarray: del batch_size # TEI performs its own batching. del device # Not used by HTTP backend. if texts is None or len(texts) == 0: return np.array([], dtype=object) for i, t in enumerate(texts): if not isinstance(t, str) or not t.strip(): raise ValueError(f"Invalid input text at index {i}: {t!r}") response = requests.post( self.endpoint, json={"inputs": texts}, timeout=self.timeout_sec, ) response.raise_for_status() payload = response.json() if not isinstance(payload, list) or len(payload) != len(texts): raise RuntimeError( f"TEI response length mismatch: expected {len(texts)}, " f"got {0 if payload is None else len(payload)}" ) vectors: List[np.ndarray] = [] for i, emb in enumerate(payload): vec = np.asarray(emb, dtype=np.float32) if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all(): raise RuntimeError(f"Invalid TEI embedding at index {i}") if normalize_embeddings: vec = self._normalize(vec) vectors.append(vec) return np.array(vectors, dtype=object)