Commit 54ccf28cd24af75e464c37fa02c08ddf8721a251
1 parent
c7e80cc2
tei
Showing
3 changed files
with
56 additions
and
19 deletions
Show diff stats
config/config.yaml
| @@ -168,7 +168,7 @@ services: | @@ -168,7 +168,7 @@ services: | ||
| 168 | http: | 168 | http: |
| 169 | base_url: "http://127.0.0.1:6005" | 169 | base_url: "http://127.0.0.1:6005" |
| 170 | # 服务内文本后端(embedding 进程启动时读取) | 170 | # 服务内文本后端(embedding 进程启动时读取) |
| 171 | - backend: "tei" # tei | local_st | 171 | + backend: "local_st" # tei | local_st |
| 172 | backends: | 172 | backends: |
| 173 | tei: | 173 | tei: |
| 174 | base_url: "http://127.0.0.1:8080" | 174 | base_url: "http://127.0.0.1:8080" |
embeddings/server.py
| @@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: | @@ -146,13 +146,20 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: | ||
| 146 | raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") | 146 | raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") |
| 147 | normalized.append(s) | 147 | normalized.append(s) |
| 148 | 148 | ||
| 149 | - with _text_encode_lock: | ||
| 150 | - embs = _text_model.encode_batch( | ||
| 151 | - normalized, | ||
| 152 | - batch_size=int(CONFIG.TEXT_BATCH_SIZE), | ||
| 153 | - device=CONFIG.TEXT_DEVICE, | ||
| 154 | - normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS), | ||
| 155 | - ) | 149 | + try: |
| 150 | + with _text_encode_lock: | ||
| 151 | + embs = _text_model.encode_batch( | ||
| 152 | + normalized, | ||
| 153 | + batch_size=int(CONFIG.TEXT_BATCH_SIZE), | ||
| 154 | + device=CONFIG.TEXT_DEVICE, | ||
| 155 | + normalize_embeddings=bool(CONFIG.TEXT_NORMALIZE_EMBEDDINGS), | ||
| 156 | + ) | ||
| 157 | + except Exception as e: | ||
| 158 | + logger.error("Text embedding backend failure: %s", e, exc_info=True) | ||
| 159 | + raise HTTPException( | ||
| 160 | + status_code=502, | ||
| 161 | + detail=f"Text embedding backend failure: {e}", | ||
| 162 | + ) from e | ||
| 156 | if embs is None or len(embs) != len(normalized): | 163 | if embs is None or len(embs) != len(normalized): |
| 157 | raise RuntimeError( | 164 | raise RuntimeError( |
| 158 | f"Text model response length mismatch: expected {len(normalized)}, " | 165 | f"Text model response length mismatch: expected {len(normalized)}, " |
embeddings/tei_model.py
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | from __future__ import annotations | 3 | from __future__ import annotations |
| 4 | 4 | ||
| 5 | -from typing import List, Union | 5 | +from typing import Any, List, Union |
| 6 | 6 | ||
| 7 | import numpy as np | 7 | import numpy as np |
| 8 | import requests | 8 | import requests |
| @@ -30,6 +30,15 @@ class TEITextModel: | @@ -30,6 +30,15 @@ class TEITextModel: | ||
| 30 | health_url = f"{self.base_url}/health" | 30 | health_url = f"{self.base_url}/health" |
| 31 | response = requests.get(health_url, timeout=5) | 31 | response = requests.get(health_url, timeout=5) |
| 32 | response.raise_for_status() | 32 | response.raise_for_status() |
| 33 | + # Probe one tiny embedding at startup so runtime requests do not fail later | ||
| 34 | + # with opaque "Invalid TEI embedding" errors. | ||
| 35 | + probe_resp = requests.post( | ||
| 36 | + self.endpoint, | ||
| 37 | + json={"inputs": ["health check"]}, | ||
| 38 | + timeout=min(self.timeout_sec, 15), | ||
| 39 | + ) | ||
| 40 | + probe_resp.raise_for_status() | ||
| 41 | + self._parse_payload(probe_resp.json(), expected_len=1) | ||
| 33 | 42 | ||
| 34 | @staticmethod | 43 | @staticmethod |
| 35 | def _normalize(embedding: np.ndarray) -> np.ndarray: | 44 | def _normalize(embedding: np.ndarray) -> np.ndarray: |
| @@ -77,20 +86,41 @@ class TEITextModel: | @@ -77,20 +86,41 @@ class TEITextModel: | ||
| 77 | ) | 86 | ) |
| 78 | response.raise_for_status() | 87 | response.raise_for_status() |
| 79 | payload = response.json() | 88 | payload = response.json() |
| 89 | + vectors = self._parse_payload(payload, expected_len=len(texts)) | ||
| 90 | + if normalize_embeddings: | ||
| 91 | + vectors = [self._normalize(vec) for vec in vectors] | ||
| 92 | + return np.array(vectors, dtype=object) | ||
| 80 | 93 | ||
| 81 | - if not isinstance(payload, list) or len(payload) != len(texts): | 94 | + def _parse_payload(self, payload: Any, expected_len: int) -> List[np.ndarray]: |
| 95 | + if not isinstance(payload, list) or len(payload) != expected_len: | ||
| 96 | + got = 0 if payload is None else (len(payload) if isinstance(payload, list) else "non-list") | ||
| 82 | raise RuntimeError( | 97 | raise RuntimeError( |
| 83 | - f"TEI response length mismatch: expected {len(texts)}, " | ||
| 84 | - f"got {0 if payload is None else len(payload)}" | 98 | + f"TEI response length mismatch: expected {expected_len}, got {got}. " |
| 99 | + f"Response type={type(payload).__name__}" | ||
| 85 | ) | 100 | ) |
| 86 | 101 | ||
| 87 | vectors: List[np.ndarray] = [] | 102 | vectors: List[np.ndarray] = [] |
| 88 | - for i, emb in enumerate(payload): | ||
| 89 | - vec = np.asarray(emb, dtype=np.float32) | ||
| 90 | - if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all(): | ||
| 91 | - raise RuntimeError(f"Invalid TEI embedding at index {i}") | ||
| 92 | - if normalize_embeddings: | ||
| 93 | - vec = self._normalize(vec) | 103 | + for i, item in enumerate(payload): |
| 104 | + emb = item.get("embedding") if isinstance(item, dict) else item | ||
| 105 | + try: | ||
| 106 | + vec = np.asarray(emb, dtype=np.float32) | ||
| 107 | + except (TypeError, ValueError) as exc: | ||
| 108 | + raise RuntimeError( | ||
| 109 | + f"Invalid TEI embedding at index {i}: cannot convert to float array " | ||
| 110 | + f"(item_type={type(item).__name__})" | ||
| 111 | + ) from exc | ||
| 112 | + | ||
| 113 | + if vec.ndim != 1 or vec.size == 0: | ||
| 114 | + raise RuntimeError( | ||
| 115 | + f"Invalid TEI embedding at index {i}: shape={vec.shape}, size={vec.size}" | ||
| 116 | + ) | ||
| 117 | + if not np.isfinite(vec).all(): | ||
| 118 | + preview = vec[:8].tolist() | ||
| 119 | + raise RuntimeError( | ||
| 120 | + f"Invalid TEI embedding at index {i}: contains non-finite values, " | ||
| 121 | + f"preview={preview}. This often indicates TEI backend/model runtime issues " | ||
| 122 | + f"(for example an incompatible dtype or model config)." | ||
| 123 | + ) | ||
| 94 | vectors.append(vec) | 124 | vectors.append(vec) |
| 95 | - return np.array(vectors, dtype=object) | 125 | + return vectors |
| 96 | 126 |