From 54ccf28cd24af75e464c37fa02c08ddf8721a251 Mon Sep 17 00:00:00 2001 From: tangwang Date: Tue, 10 Mar 2026 12:21:46 +0800 Subject: [PATCH] tei --- config/config.yaml | 2 +- embeddings/server.py | 21 ++++++++++++++------- embeddings/tei_model.py | 52 +++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 19 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index e492e8c..198dad5 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -168,7 +168,7 @@ services: http: base_url: "http://127.0.0.1:6005" # 服务内文本后端(embedding 进程启动时读取) - backend: "tei" # tei | local_st + backend: "local_st" # tei | local_st backends: tei: base_url: "http://127.0.0.1:8080" diff --git a/embeddings/server.py b/embeddings/server.py index 13b220e..a1cdab4 100644 --- a/embeddings/server.py +++ b/embeddings/server.py @@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") normalized.append(s) - with _text_encode_lock: - embs = _text_model.encode_batch( - normalized, - batch_size=int(CONFIG.TEXT_BATCH_SIZE), - device=CONFIG.TEXT_DEVICE, - normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS), - ) + try: + with _text_encode_lock: + embs = _text_model.encode_batch( + normalized, + batch_size=int(CONFIG.TEXT_BATCH_SIZE), + device=CONFIG.TEXT_DEVICE, + normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS), + ) + except Exception as e: + logger.error("Text embedding backend failure: %s", e, exc_info=True) + raise HTTPException( + status_code=502, + detail=f"Text embedding backend failure: {e}", + ) from e if embs is None or len(embs) != len(normalized): raise RuntimeError( f"Text model response length mismatch: expected {len(normalized)}, " diff --git a/embeddings/tei_model.py b/embeddings/tei_model.py index 27b624d..529445a 100644 --- a/embeddings/tei_model.py +++ b/embeddings/tei_model.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import List, Union +from typing import Any, List, Union import numpy as np import requests @@ -30,6 +30,15 @@ class TEITextModel: health_url = f"{self.base_url}/health" response = requests.get(health_url, timeout=5) response.raise_for_status() + # Probe one tiny embedding at startup so runtime requests do not fail later + # with opaque "Invalid TEI embedding" errors. + probe_resp = requests.post( + self.endpoint, + json={"inputs": ["health check"]}, + timeout=min(self.timeout_sec, 15), + ) + probe_resp.raise_for_status() + self._parse_payload(probe_resp.json(), expected_len=1) @staticmethod def _normalize(embedding: np.ndarray) -> np.ndarray: @@ -77,20 +86,41 @@ class TEITextModel: ) response.raise_for_status() payload = response.json() + vectors = self._parse_payload(payload, expected_len=len(texts)) + if normalize_embeddings: + vectors = [self._normalize(vec) for vec in vectors] + return np.array(vectors, dtype=object) - if not isinstance(payload, list) or len(payload) != len(texts): + def _parse_payload(self, payload: Any, expected_len: int) -> List[np.ndarray]: + if not isinstance(payload, list) or len(payload) != expected_len: + got = 0 if payload is None else (len(payload) if isinstance(payload, list) else "non-list") raise RuntimeError( - f"TEI response length mismatch: expected {len(texts)}, " - f"got {0 if payload is None else len(payload)}" + f"TEI response length mismatch: expected {expected_len}, got {got}. " + f"Response type={type(payload).__name__}" ) vectors: List[np.ndarray] = [] - for i, emb in enumerate(payload): - vec = np.asarray(emb, dtype=np.float32) - if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all(): - raise RuntimeError(f"Invalid TEI embedding at index {i}") - if normalize_embeddings: - vec = self._normalize(vec) + for i, item in enumerate(payload): + emb = item.get("embedding") if isinstance(item, dict) else item + try: + vec = np.asarray(emb, dtype=np.float32) + except (TypeError, ValueError) as exc: + raise RuntimeError( + f"Invalid TEI embedding at index {i}: cannot convert to float array " + f"(item_type={type(item).__name__})" + ) from exc + + if vec.ndim != 1 or vec.size == 0: + raise RuntimeError( + f"Invalid TEI embedding at index {i}: shape={vec.shape}, size={vec.size}" + ) + if not np.isfinite(vec).all(): + preview = vec[:8].tolist() + raise RuntimeError( + f"Invalid TEI embedding at index {i}: contains non-finite values, " + f"preview={preview}. This often indicates TEI backend/model runtime issues " + f"(for example an incompatible dtype or model config)." + ) vectors.append(vec) - return np.array(vectors, dtype=object) + return vectors -- libgit2 0.21.2