Commit 7a013ca7b2030bf2b14b92eeb9f5282fea304300
1 parent
d47889b9
多模态文本向量服务ok
Showing
12 changed files
with
493 additions
and
179 deletions
Show diff stats
docs/CNCLIP_SERVICE说明文档.md
| @@ -4,9 +4,11 @@ | @@ -4,9 +4,11 @@ | ||
| 4 | 4 | ||
| 5 | ## 1. 作用与边界 | 5 | ## 1. 作用与边界 |
| 6 | 6 | ||
| 7 | -- `cnclip` 是独立 gRPC 服务(默认 `grpc://127.0.0.1:51000`)。 | ||
| 8 | -- 图片 embedding 服务(默认 `6008`)在 `USE_CLIP_AS_SERVICE=true` 时调用它完成 `/embed/image`。 | ||
| 9 | -- `cnclip` 不负责文本向量;文本向量由 TEI(8080)负责。 | 7 | +- `cnclip` 是独立 gRPC 服务(默认 `grpc://127.0.0.1:51000`),底层为 **Chinese-CLIP**:**图像与文本在同一向量空间**(图文可互检)。 |
| 8 | +- 图片 embedding 服务(默认 `6008`)在 `image_backend: clip_as_service` 时通过 gRPC 调用它完成: | ||
| 9 | + - `POST /embed/image`:图片 URL / 本地路径 → 图向量; | ||
| 10 | + - `POST /embed/clip_text`:**自然语言短语** → 文本塔向量(与上图向量同空间,用于 `image_embedding` 检索、以文搜图等)。 | ||
| 11 | +- **语义检索用的文本向量**(`title_embedding`、query 语义召回)仍由 **TEI(8080)+ `POST /embed/text`(6005)** 负责,与 CN-CLIP 不是同一模型、不是同一向量空间;请勿混淆。 | ||
| 10 | 12 | ||
| 11 | ## 2. 代码与脚本入口 | 13 | ## 2. 代码与脚本入口 |
| 12 | 14 | ||
| @@ -138,6 +140,8 @@ cat third-party/clip-as-service/server/torch-flow-temp.yml | @@ -138,6 +140,8 @@ cat third-party/clip-as-service/server/torch-flow-temp.yml | ||
| 138 | 140 | ||
| 139 | ### 7.3 发送一次编码请求(触发模型加载) | 141 | ### 7.3 发送一次编码请求(触发模型加载) |
| 140 | 142 | ||
| 143 | +**gRPC 直连(文本或图片 URL 混传时由 client 自动区分):** | ||
| 144 | + | ||
| 141 | ```bash | 145 | ```bash |
| 142 | PYTHONPATH="third-party/clip-as-service/client:${PYTHONPATH}" \ | 146 | PYTHONPATH="third-party/clip-as-service/client:${PYTHONPATH}" \ |
| 143 | NO_VERSION_CHECK=1 \ | 147 | NO_VERSION_CHECK=1 \ |
| @@ -151,6 +155,14 @@ PY | @@ -151,6 +155,14 @@ PY | ||
| 151 | 155 | ||
| 152 | 预期 `shape` 为 `(1, 1024)`。 | 156 | 预期 `shape` 为 `(1, 1024)`。 |
| 153 | 157 | ||
| 158 | +**HTTP(推荐业务侧):图片走 6008 `/embed/image`,纯文本走 `/embed/clip_text`(勿把 `http://` 图 URL 发到 `clip_text`):** | ||
| 159 | + | ||
| 160 | +```bash | ||
| 161 | +curl -sS -X POST "http://127.0.0.1:6008/embed/clip_text?normalize=true&priority=1" \ | ||
| 162 | + -H "Content-Type: application/json" \ | ||
| 163 | + -d '["纯棉T恤", "芭比娃娃"]' | ||
| 164 | +``` | ||
| 165 | + | ||
| 154 | ### 7.4 GPU 验证 | 166 | ### 7.4 GPU 验证 |
| 155 | 167 | ||
| 156 | ```bash | 168 | ```bash |
docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md
| @@ -9,7 +9,7 @@ | @@ -9,7 +9,7 @@ | ||
| 9 | | 服务 | 默认端口 | Base URL | 说明 | | 9 | | 服务 | 默认端口 | Base URL | 说明 | |
| 10 | |------|----------|----------|------| | 10 | |------|----------|----------|------| |
| 11 | | 向量服务(文本) | 6005 | `http://localhost:6005` | 文本向量化,用于 query/doc 语义检索 | | 11 | | 向量服务(文本) | 6005 | `http://localhost:6005` | 文本向量化,用于 query/doc 语义检索 | |
| 12 | -| 向量服务(图片) | 6008 | `http://localhost:6008` | 图片向量化,用于以图搜图 | | 12 | +| 向量服务(图片 / 多模态 CN-CLIP) | 6008 | `http://localhost:6008` | 图片向量 `/embed/image`;同空间文本向量 `/embed/clip_text`(以文搜图等) | |
| 13 | | 翻译服务 | 6006 | `http://localhost:6006` | 多语言翻译(云端与本地模型统一入口) | | 13 | | 翻译服务 | 6006 | `http://localhost:6006` | 多语言翻译(云端与本地模型统一入口) | |
| 14 | | 重排服务 | 6007 | `http://localhost:6007` | 对检索结果进行二次排序 | | 14 | | 重排服务 | 6007 | `http://localhost:6007` | 对检索结果进行二次排序 | |
| 15 | 15 | ||
| @@ -100,7 +100,35 @@ curl -X POST "http://localhost:6008/embed/image?normalize=true&priority=1" \ | @@ -100,7 +100,35 @@ curl -X POST "http://localhost:6008/embed/image?normalize=true&priority=1" \ | ||
| 100 | 100 | ||
| 101 | 在线以图搜图等实时场景可传 `priority=1`;离线索引回填保持默认 `priority=0`。 | 101 | 在线以图搜图等实时场景可传 `priority=1`;离线索引回填保持默认 `priority=0`。 |
| 102 | 102 | ||
| 103 | -#### 7.1.3 `GET /health` — 健康检查 | 103 | +#### 7.1.3 `POST /embed/clip_text` — CN-CLIP 文本多模态向量(与图片同空间) |
| 104 | + | ||
| 105 | +将**自然语言短语**编码为向量,与 `POST /embed/image` 输出的图向量**处于同一向量空间**(Chinese-CLIP 文本塔 / 图塔),用于 **以文搜图**、与 ES `image_embedding` 对齐的 KNN 等。 | ||
| 106 | + | ||
| 107 | +与 **7.1.1** 的 `POST /embed/text`(TEI/BGE,语义检索)**不是同一模型、不是同一空间**,请勿混用。 | ||
| 108 | + | ||
| 109 | +**请求体**(JSON 数组,每项为字符串;**不要**传入 `http://` / `https://` 图片 URL,图片请用 `/embed/image`): | ||
| 110 | + | ||
| 111 | +```json | ||
| 112 | +["纯棉短袖T恤", "芭比娃娃连衣裙"] | ||
| 113 | +``` | ||
| 114 | + | ||
| 115 | +**响应**(JSON 数组,与输入一一对应): | ||
| 116 | + | ||
| 117 | +```json | ||
| 118 | +[[0.01, -0.02, ...], [0.03, 0.01, ...], ...] | ||
| 119 | +``` | ||
| 120 | + | ||
| 121 | +**curl 示例**: | ||
| 122 | + | ||
| 123 | +```bash | ||
| 124 | +curl -X POST "http://localhost:6008/embed/clip_text?normalize=true&priority=1" \ | ||
| 125 | + -H "Content-Type: application/json" \ | ||
| 126 | + -d '["纯棉短袖", "street tee"]' | ||
| 127 | +``` | ||
| 128 | + | ||
| 129 | +说明:与 `/embed/image` 共用图片侧限流与 `IMAGE_MAX_INFLIGHT`;Redis 缓存键 namespace 为 `clip_text`,与 TEI 文本缓存区分。 | ||
| 130 | + | ||
| 131 | +#### 7.1.4 `GET /health` — 健康检查 | ||
| 104 | 132 | ||
| 105 | ```bash | 133 | ```bash |
| 106 | curl "http://localhost:6005/health" | 134 | curl "http://localhost:6005/health" |
| @@ -110,18 +138,18 @@ curl "http://localhost:6008/health" | @@ -110,18 +138,18 @@ curl "http://localhost:6008/health" | ||
| 110 | 返回中会包含: | 138 | 返回中会包含: |
| 111 | 139 | ||
| 112 | - `service_kind`:`text` / `image` / `all` | 140 | - `service_kind`:`text` / `image` / `all` |
| 113 | -- `cache_enabled`:text/image Redis 缓存是否可用 | 141 | +- `cache_enabled`:text/image/clip_text Redis 缓存是否可用 |
| 114 | - `limits`:当前 inflight limit、active、rejected_total 等 | 142 | - `limits`:当前 inflight limit、active、rejected_total 等 |
| 115 | - `stats`:request_total、cache_hits、cache_misses、avg_latency_ms 等 | 143 | - `stats`:request_total、cache_hits、cache_misses、avg_latency_ms 等 |
| 116 | 144 | ||
| 117 | -#### 7.1.4 `GET /ready` — 就绪检查 | 145 | +#### 7.1.5 `GET /ready` — 就绪检查 |
| 118 | 146 | ||
| 119 | ```bash | 147 | ```bash |
| 120 | curl "http://localhost:6005/ready" | 148 | curl "http://localhost:6005/ready" |
| 121 | curl "http://localhost:6008/ready" | 149 | curl "http://localhost:6008/ready" |
| 122 | ``` | 150 | ``` |
| 123 | 151 | ||
| 124 | -#### 7.1.5 缓存与限流说明 | 152 | +#### 7.1.6 缓存与限流说明 |
| 125 | 153 | ||
| 126 | - 文本与图片都会先查 Redis 向量缓存。 | 154 | - 文本与图片都会先查 Redis 向量缓存。 |
| 127 | - Redis 中 value 仍是 **BF16 bytes**,读取后恢复成 `float32` 返回。 | 155 | - Redis 中 value 仍是 **BF16 bytes**,读取后恢复成 `float32` 返回。 |
| @@ -130,8 +158,9 @@ curl "http://localhost:6008/ready" | @@ -130,8 +158,9 @@ curl "http://localhost:6008/ready" | ||
| 130 | - 当服务端发现超过 `TEXT_MAX_INFLIGHT` / `IMAGE_MAX_INFLIGHT` 时,会直接拒绝,而不是无限排队。 | 158 | - 当服务端发现超过 `TEXT_MAX_INFLIGHT` / `IMAGE_MAX_INFLIGHT` 时,会直接拒绝,而不是无限排队。 |
| 131 | - 其中 `POST /embed/text` 的 `priority=0` 会按上面的 inflight 规则直接拒绝;`priority>0` 不会被 admission 拒绝,但仍计入 inflight,并在服务端排队时优先于 `priority=0` 请求。 | 159 | - 其中 `POST /embed/text` 的 `priority=0` 会按上面的 inflight 规则直接拒绝;`priority>0` 不会被 admission 拒绝,但仍计入 inflight,并在服务端排队时优先于 `priority=0` 请求。 |
| 132 | - `POST /embed/image` 的 `priority=0` 受 `IMAGE_MAX_INFLIGHT` 约束;`priority>0` 不会被 admission 拒绝,但仍计入 inflight(无插队)。 | 160 | - `POST /embed/image` 的 `priority=0` 受 `IMAGE_MAX_INFLIGHT` 约束;`priority>0` 不会被 admission 拒绝,但仍计入 inflight(无插队)。 |
| 161 | +- `POST /embed/clip_text` 与 `/embed/image` 共用同一后端与 `IMAGE_MAX_INFLIGHT`(计入图片侧并发)。 | ||
| 133 | 162 | ||
| 134 | -#### 7.1.6 TEI 统一调优建议(主服务) | 163 | +#### 7.1.7 TEI 统一调优建议(主服务) |
| 135 | 164 | ||
| 136 | 使用单套主服务即可同时兼顾: | 165 | 使用单套主服务即可同时兼顾: |
| 137 | - 在线 query 向量化(低延迟,常见 `batch=1~4`) | 166 | - 在线 query 向量化(低延迟,常见 `batch=1~4`) |
| @@ -147,7 +176,7 @@ curl "http://localhost:6008/ready" | @@ -147,7 +176,7 @@ curl "http://localhost:6008/ready" | ||
| 147 | 默认端口: | 176 | 默认端口: |
| 148 | - TEI: `http://127.0.0.1:8080` | 177 | - TEI: `http://127.0.0.1:8080` |
| 149 | - 文本向量服务(`/embed/text`): `http://127.0.0.1:6005` | 178 | - 文本向量服务(`/embed/text`): `http://127.0.0.1:6005` |
| 150 | -- 图片向量服务(`/embed/image`): `http://127.0.0.1:6008` | 179 | +- 图片向量服务(`/embed/image`、`/embed/clip_text`): `http://127.0.0.1:6008` |
| 151 | 180 | ||
| 152 | 当前主 TEI 启动默认值(已按 T4/短文本场景调优): | 181 | 当前主 TEI 启动默认值(已按 T4/短文本场景调优): |
| 153 | - `TEI_MAX_BATCH_TOKENS=4096` | 182 | - `TEI_MAX_BATCH_TOKENS=4096` |
embeddings/README.md
| @@ -16,7 +16,7 @@ | @@ -16,7 +16,7 @@ | ||
| 16 | - **clip-as-service 客户端**:`clip_as_service_encoder.py`(图片向量,推荐) | 16 | - **clip-as-service 客户端**:`clip_as_service_encoder.py`(图片向量,推荐) |
| 17 | - **向量化服务(FastAPI)**:`server.py` | 17 | - **向量化服务(FastAPI)**:`server.py` |
| 18 | - **统一配置**:`config.py` | 18 | - **统一配置**:`config.py` |
| 19 | -- **接口契约**:`protocols.ImageEncoderProtocol`(图片编码统一为 `encode_image_urls(urls, batch_size, normalize_embeddings)`,本地 CN-CLIP 与 clip-as-service 均实现该接口) | 19 | +- **接口契约**:`protocols.ImageEncoderProtocol`(`encode_image_urls` + `encode_clip_texts`;本地 CN-CLIP 与 clip-as-service 均实现) |
| 20 | 20 | ||
| 21 | 说明:历史上的云端 embedding 试验实现(DashScope)已从主仓库移除。当前默认部署为**文本服务 6005** 与**图片服务 6008** 两条独立链路;`all` 模式仅作为单进程调试入口。 | 21 | 说明:历史上的云端 embedding 试验实现(DashScope)已从主仓库移除。当前默认部署为**文本服务 6005** 与**图片服务 6008** 两条独立链路;`all` 模式仅作为单进程调试入口。 |
| 22 | 22 | ||
| @@ -36,8 +36,9 @@ | @@ -36,8 +36,9 @@ | ||
| 36 | - 返回:`[[...], [...], ...]` | 36 | - 返回:`[[...], [...], ...]` |
| 37 | - 健康接口:`GET /health`、`GET /ready` | 37 | - 健康接口:`GET /health`、`GET /ready` |
| 38 | - 图片服务(默认 `6008`) | 38 | - 图片服务(默认 `6008`) |
| 39 | - - `POST /embed/image` | ||
| 40 | - - 请求体:`["url或本地路径1", ...]` | 39 | + - `POST /embed/image`:图片 URL 或本地路径 |
| 40 | + - `POST /embed/clip_text`:自然语言(CN-CLIP 文本塔,与 `/embed/image` 同向量空间;勿传 `http://` 图 URL) | ||
| 41 | + - 请求体:`["...", ...]` 字符串数组 | ||
| 41 | - 可选 query 参数:`normalize=true|false`、`priority=0|1` | 42 | - 可选 query 参数:`normalize=true|false`、`priority=0|1` |
| 42 | - 返回:`[[...], [...], ...]` | 43 | - 返回:`[[...], [...], ...]` |
| 43 | - 健康接口:`GET /health`、`GET /ready` | 44 | - 健康接口:`GET /health`、`GET /ready` |
| @@ -51,8 +52,9 @@ | @@ -51,8 +52,9 @@ | ||
| 51 | - client 侧:`text_encoder.py` / `image_encoder.py` | 52 | - client 侧:`text_encoder.py` / `image_encoder.py` |
| 52 | - service 侧:`server.py` | 53 | - service 侧:`server.py` |
| 53 | - 当前主 key 格式: | 54 | - 当前主 key 格式: |
| 54 | - - 文本:`embedding:embed:norm{0|1}:{text}` | 55 | + - 文本(TEI):`embedding:embed:norm{0|1}:{text}` |
| 55 | - 图片:`embedding:image:embed:norm{0|1}:{url_or_path}` | 56 | - 图片:`embedding:image:embed:norm{0|1}:{url_or_path}` |
| 57 | + - CN-CLIP 文本:`embedding:clip_text:clip_mm:norm{0|1}:{text}` | ||
| 56 | - 当前实现不再兼容历史 key 规则,只保留这一套格式,减少代码路径和缓存歧义。 | 58 | - 当前实现不再兼容历史 key 规则,只保留这一套格式,减少代码路径和缓存歧义。 |
| 57 | 59 | ||
| 58 | ### 压力隔离与拒绝策略 | 60 | ### 压力隔离与拒绝策略 |
embeddings/cache_keys.py
| 1 | """Shared cache key helpers for embedding inputs. | 1 | """Shared cache key helpers for embedding inputs. |
| 2 | 2 | ||
| 3 | Current canonical raw-key format: | 3 | Current canonical raw-key format: |
| 4 | -- text: ``embed:norm1:<text>`` / ``embed:norm0:<text>`` | ||
| 5 | -- image: ``embed:norm1:<url>`` / ``embed:norm0:<url>`` | 4 | +- text (TEI/BGE): ``embed:norm1:<text>`` / ``embed:norm0:<text>`` |
| 5 | +- image (CLIP): ``embed:norm1:<url>`` / ``embed:norm0:<url>`` | ||
| 6 | +- clip_text (CN-CLIP 文本,与图同空间): ``clip_mm:norm1:<text>`` / ``clip_mm:norm0:<text>`` | ||
| 6 | 7 | ||
| 7 | `RedisEmbeddingCache` adds the configured key prefix and optional namespace on top. | 8 | `RedisEmbeddingCache` adds the configured key prefix and optional namespace on top. |
| 8 | """ | 9 | """ |
| @@ -18,3 +19,9 @@ def build_text_cache_key(text: str, *, normalize: bool) -> str: | @@ -18,3 +19,9 @@ def build_text_cache_key(text: str, *, normalize: bool) -> str: | ||
| 18 | def build_image_cache_key(url: str, *, normalize: bool) -> str: | 19 | def build_image_cache_key(url: str, *, normalize: bool) -> str: |
| 19 | normalized_url = str(url or "").strip() | 20 | normalized_url = str(url or "").strip() |
| 20 | return f"embed:norm{1 if normalize else 0}:{normalized_url}" | 21 | return f"embed:norm{1 if normalize else 0}:{normalized_url}" |
| 22 | + | ||
| 23 | + | ||
| 24 | +def build_clip_text_cache_key(text: str, *, normalize: bool) -> str: | ||
| 25 | + """CN-CLIP / multimodal text (same vector space as /embed/image).""" | ||
| 26 | + normalized_text = str(text or "").strip() | ||
| 27 | + return f"clip_mm:norm{1 if normalize else 0}:{normalized_text}" |
embeddings/clip_as_service_encoder.py
| @@ -127,3 +127,41 @@ class ClipAsServiceImageEncoder: | @@ -127,3 +127,41 @@ class ClipAsServiceImageEncoder: | ||
| 127 | if not results: | 127 | if not results: |
| 128 | raise RuntimeError("clip-as-service returned empty result for single image URL") | 128 | raise RuntimeError("clip-as-service returned empty result for single image URL") |
| 129 | return results[0] | 129 | return results[0] |
| 130 | + | ||
| 131 | + def encode_clip_texts( | ||
| 132 | + self, | ||
| 133 | + texts: List[str], | ||
| 134 | + batch_size: Optional[int] = None, | ||
| 135 | + normalize_embeddings: bool = True, | ||
| 136 | + ) -> List[np.ndarray]: | ||
| 137 | + """ | ||
| 138 | + CN-CLIP 文本塔:与 encode_image_urls 输出同一向量空间(图文检索 / image_embedding)。 | ||
| 139 | + | ||
| 140 | + 仅传入自然语言字符串;HTTP 侧见 ``POST /embed/clip_text``。 | ||
| 141 | + """ | ||
| 142 | + if not texts: | ||
| 143 | + return [] | ||
| 144 | + bs = batch_size if batch_size is not None else self._batch_size | ||
| 145 | + arr = self._client.encode( | ||
| 146 | + texts, | ||
| 147 | + batch_size=bs, | ||
| 148 | + show_progress=self._show_progress, | ||
| 149 | + ) | ||
| 150 | + if arr is None or not hasattr(arr, "shape"): | ||
| 151 | + raise RuntimeError("clip-as-service encode (text) returned empty result") | ||
| 152 | + if len(arr) != len(texts): | ||
| 153 | + raise RuntimeError( | ||
| 154 | + f"clip-as-service text encode length mismatch: expected {len(texts)}, got {len(arr)}" | ||
| 155 | + ) | ||
| 156 | + out: List[np.ndarray] = [] | ||
| 157 | + for row in arr: | ||
| 158 | + vec = np.asarray(row, dtype=np.float32) | ||
| 159 | + if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all(): | ||
| 160 | + raise RuntimeError("clip-as-service returned invalid text embedding vector") | ||
| 161 | + if normalize_embeddings: | ||
| 162 | + norm = float(np.linalg.norm(vec)) | ||
| 163 | + if not np.isfinite(norm) or norm <= 0.0: | ||
| 164 | + raise RuntimeError("clip-as-service returned zero/invalid norm vector") | ||
| 165 | + vec = vec / norm | ||
| 166 | + out.append(vec) | ||
| 167 | + return out |
embeddings/clip_model.py
| @@ -131,3 +131,28 @@ class ClipImageModel(object): | @@ -131,3 +131,28 @@ class ClipImageModel(object): | ||
| 131 | else: | 131 | else: |
| 132 | raise ValueError(f"Unsupported image input type: {type(img)!r}") | 132 | raise ValueError(f"Unsupported image input type: {type(img)!r}") |
| 133 | return results | 133 | return results |
| 134 | + | ||
| 135 | + def encode_clip_texts( | ||
| 136 | + self, | ||
| 137 | + texts: List[str], | ||
| 138 | + batch_size: Optional[int] = None, | ||
| 139 | + normalize_embeddings: bool = True, | ||
| 140 | + ) -> List[np.ndarray]: | ||
| 141 | + """ | ||
| 142 | + CN-CLIP 文本塔向量,与 encode_image 同空间;供 ``POST /embed/clip_text`` 使用。 | ||
| 143 | + """ | ||
| 144 | + if not texts: | ||
| 145 | + return [] | ||
| 146 | + bs = batch_size or 8 | ||
| 147 | + out: List[np.ndarray] = [] | ||
| 148 | + for i in range(0, len(texts), bs): | ||
| 149 | + batch = texts[i : i + bs] | ||
| 150 | + text_data = clip.tokenize(batch).to(self.device) | ||
| 151 | + with torch.no_grad(): | ||
| 152 | + feats = self.model.encode_text(text_data) | ||
| 153 | + if normalize_embeddings: | ||
| 154 | + feats = feats / feats.norm(dim=-1, keepdim=True) | ||
| 155 | + arr = feats.cpu().numpy().astype("float32") | ||
| 156 | + for row in arr: | ||
| 157 | + out.append(np.asarray(row, dtype=np.float32)) | ||
| 158 | + return out |
embeddings/image_encoder.py
| @@ -10,8 +10,8 @@ from PIL import Image | @@ -10,8 +10,8 @@ from PIL import Image | ||
| 10 | logger = logging.getLogger(__name__) | 10 | logger = logging.getLogger(__name__) |
| 11 | 11 | ||
| 12 | from config.loader import get_app_config | 12 | from config.loader import get_app_config |
| 13 | -from config.services_config import get_embedding_image_base_url | ||
| 14 | -from embeddings.cache_keys import build_image_cache_key | 13 | +from config.services_config import get_embedding_image_backend_config, get_embedding_image_base_url |
| 14 | +from embeddings.cache_keys import build_clip_text_cache_key, build_image_cache_key | ||
| 15 | from embeddings.redis_embedding_cache import RedisEmbeddingCache | 15 | from embeddings.redis_embedding_cache import RedisEmbeddingCache |
| 16 | from request_log_context import build_downstream_request_headers, build_request_log_extra | 16 | from request_log_context import build_downstream_request_headers, build_request_log_extra |
| 17 | 17 | ||
| @@ -28,6 +28,7 @@ class CLIPImageEncoder: | @@ -28,6 +28,7 @@ class CLIPImageEncoder: | ||
| 28 | redis_config = get_app_config().infrastructure.redis | 28 | redis_config = get_app_config().infrastructure.redis |
| 29 | self.service_url = str(resolved_url).rstrip("/") | 29 | self.service_url = str(resolved_url).rstrip("/") |
| 30 | self.endpoint = f"{self.service_url}/embed/image" | 30 | self.endpoint = f"{self.service_url}/embed/image" |
| 31 | + self.clip_text_endpoint = f"{self.service_url}/embed/clip_text" | ||
| 31 | # Reuse embedding cache prefix, but separate namespace for images to avoid collisions. | 32 | # Reuse embedding cache prefix, but separate namespace for images to avoid collisions. |
| 32 | self.cache_prefix = str(redis_config.embedding_cache_prefix).strip() or "embedding" | 33 | self.cache_prefix = str(redis_config.embedding_cache_prefix).strip() or "embedding" |
| 33 | logger.info("Creating CLIPImageEncoder instance with service URL: %s", self.service_url) | 34 | logger.info("Creating CLIPImageEncoder instance with service URL: %s", self.service_url) |
| @@ -35,6 +36,10 @@ class CLIPImageEncoder: | @@ -35,6 +36,10 @@ class CLIPImageEncoder: | ||
| 35 | key_prefix=self.cache_prefix, | 36 | key_prefix=self.cache_prefix, |
| 36 | namespace="image", | 37 | namespace="image", |
| 37 | ) | 38 | ) |
| 39 | + self._clip_text_cache = RedisEmbeddingCache( | ||
| 40 | + key_prefix=self.cache_prefix, | ||
| 41 | + namespace="clip_text", | ||
| 42 | + ) | ||
| 38 | 43 | ||
| 39 | def _call_service( | 44 | def _call_service( |
| 40 | self, | 45 | self, |
| @@ -84,6 +89,108 @@ class CLIPImageEncoder: | @@ -84,6 +89,108 @@ class CLIPImageEncoder: | ||
| 84 | ) | 89 | ) |
| 85 | raise | 90 | raise |
| 86 | 91 | ||
| 92 | + def _clip_text_via_grpc( | ||
| 93 | + self, | ||
| 94 | + request_data: List[str], | ||
| 95 | + normalize_embeddings: bool, | ||
| 96 | + ) -> List[Any]: | ||
| 97 | + """旧版 6008 无 ``/embed/clip_text`` 时走 gRPC(需 ``image_backend: clip_as_service``)。""" | ||
| 98 | + backend, cfg = get_embedding_image_backend_config() | ||
| 99 | + if backend != "clip_as_service": | ||
| 100 | + raise RuntimeError( | ||
| 101 | + "POST /embed/clip_text 返回 404:请重启图片向量服务(6008)以加载新路由;" | ||
| 102 | + "或配置 services.embedding.image_backend=clip_as_service 并启动 grpc cnclip。" | ||
| 103 | + ) | ||
| 104 | + from embeddings.clip_as_service_encoder import ClipAsServiceImageEncoder | ||
| 105 | + from embeddings.config import CONFIG | ||
| 106 | + | ||
| 107 | + enc = ClipAsServiceImageEncoder( | ||
| 108 | + server=str(cfg.get("server") or CONFIG.CLIP_AS_SERVICE_SERVER), | ||
| 109 | + batch_size=int(cfg.get("batch_size") or CONFIG.IMAGE_BATCH_SIZE), | ||
| 110 | + ) | ||
| 111 | + arrs = enc.encode_clip_texts( | ||
| 112 | + request_data, | ||
| 113 | + batch_size=len(request_data), | ||
| 114 | + normalize_embeddings=normalize_embeddings, | ||
| 115 | + ) | ||
| 116 | + return [v.tolist() for v in arrs] | ||
| 117 | + | ||
| 118 | + def _call_clip_text_service( | ||
| 119 | + self, | ||
| 120 | + request_data: List[str], | ||
| 121 | + normalize_embeddings: bool = True, | ||
| 122 | + priority: int = 1, | ||
| 123 | + request_id: Optional[str] = None, | ||
| 124 | + user_id: Optional[str] = None, | ||
| 125 | + ) -> List[Any]: | ||
| 126 | + response = None | ||
| 127 | + try: | ||
| 128 | + response = requests.post( | ||
| 129 | + self.clip_text_endpoint, | ||
| 130 | + params={ | ||
| 131 | + "normalize": "true" if normalize_embeddings else "false", | ||
| 132 | + "priority": max(0, int(priority)), | ||
| 133 | + }, | ||
| 134 | + json=request_data, | ||
| 135 | + headers=build_downstream_request_headers(request_id=request_id, user_id=user_id), | ||
| 136 | + timeout=60, | ||
| 137 | + ) | ||
| 138 | + if response.status_code == 404: | ||
| 139 | + logger.warning( | ||
| 140 | + "POST %s returned 404; using clip-as-service gRPC fallback (restart 6008 after deploy to use HTTP)", | ||
| 141 | + self.clip_text_endpoint, | ||
| 142 | + ) | ||
| 143 | + return self._clip_text_via_grpc(request_data, normalize_embeddings) | ||
| 144 | + response.raise_for_status() | ||
| 145 | + return response.json() | ||
| 146 | + except requests.exceptions.RequestException as e: | ||
| 147 | + body_preview = "" | ||
| 148 | + if response is not None: | ||
| 149 | + try: | ||
| 150 | + body_preview = (response.text or "")[:300] | ||
| 151 | + except Exception: | ||
| 152 | + body_preview = "" | ||
| 153 | + logger.error( | ||
| 154 | + "CLIPImageEncoder clip_text request failed | status=%s body=%s error=%s", | ||
| 155 | + getattr(response, "status_code", "n/a"), | ||
| 156 | + body_preview, | ||
| 157 | + e, | ||
| 158 | + exc_info=True, | ||
| 159 | + extra=build_request_log_extra(request_id=request_id, user_id=user_id), | ||
| 160 | + ) | ||
| 161 | + raise | ||
| 162 | + | ||
| 163 | + def encode_clip_text( | ||
| 164 | + self, | ||
| 165 | + text: str, | ||
| 166 | + normalize_embeddings: bool = True, | ||
| 167 | + priority: int = 1, | ||
| 168 | + request_id: Optional[str] = None, | ||
| 169 | + user_id: Optional[str] = None, | ||
| 170 | + ) -> np.ndarray: | ||
| 171 | + """ | ||
| 172 | + CN-CLIP 文本塔(与 ``/embed/image`` 同向量空间),对应服务端 ``POST /embed/clip_text``。 | ||
| 173 | + """ | ||
| 174 | + cache_key = build_clip_text_cache_key(text, normalize=normalize_embeddings) | ||
| 175 | + cached = self._clip_text_cache.get(cache_key) | ||
| 176 | + if cached is not None: | ||
| 177 | + return cached | ||
| 178 | + | ||
| 179 | + response_data = self._call_clip_text_service( | ||
| 180 | + [text.strip()], | ||
| 181 | + normalize_embeddings=normalize_embeddings, | ||
| 182 | + priority=priority, | ||
| 183 | + request_id=request_id, | ||
| 184 | + user_id=user_id, | ||
| 185 | + ) | ||
| 186 | + if not response_data or len(response_data) != 1 or response_data[0] is None: | ||
| 187 | + raise RuntimeError(f"No CLIP text embedding returned for: {text[:80]!r}") | ||
| 188 | + vec = np.array(response_data[0], dtype=np.float32) | ||
| 189 | + if vec.ndim != 1 or vec.size == 0 or not np.isfinite(vec).all(): | ||
| 190 | + raise RuntimeError("Invalid CLIP text embedding returned") | ||
| 191 | + self._clip_text_cache.set(cache_key, vec) | ||
| 192 | + return vec | ||
| 193 | + | ||
| 87 | def encode_image(self, image: Image.Image) -> np.ndarray: | 194 | def encode_image(self, image: Image.Image) -> np.ndarray: |
| 88 | """ | 195 | """ |
| 89 | Encode image to embedding vector using network service. | 196 | Encode image to embedding vector using network service. |
embeddings/protocols.py
| @@ -27,3 +27,17 @@ class ImageEncoderProtocol(Protocol): | @@ -27,3 +27,17 @@ class ImageEncoderProtocol(Protocol): | ||
| 27 | of returning partial None placeholders. | 27 | of returning partial None placeholders. |
| 28 | """ | 28 | """ |
| 29 | ... | 29 | ... |
| 30 | + | ||
| 31 | + def encode_clip_texts( | ||
| 32 | + self, | ||
| 33 | + texts: List[str], | ||
| 34 | + batch_size: Optional[int] = None, | ||
| 35 | + normalize_embeddings: bool = True, | ||
| 36 | + ) -> List[np.ndarray]: | ||
| 37 | + """ | ||
| 38 | + Encode natural-language strings with the CLIP/CN-CLIP text tower (same space as images). | ||
| 39 | + | ||
| 40 | + Returns: | ||
| 41 | + List of vectors, same length as texts. | ||
| 42 | + """ | ||
| 43 | + ... |
embeddings/server.py
| @@ -2,8 +2,9 @@ | @@ -2,8 +2,9 @@ | ||
| 2 | Embedding service (FastAPI). | 2 | Embedding service (FastAPI). |
| 3 | 3 | ||
| 4 | API (simple list-in, list-out; aligned by index): | 4 | API (simple list-in, list-out; aligned by index): |
| 5 | -- POST /embed/text body: ["text1", "text2", ...] -> [[...], ...] | ||
| 6 | -- POST /embed/image body: ["url_or_path1", ...] -> [[...], ...] | 5 | +- POST /embed/text body: ["text1", "text2", ...] -> [[...], ...] (TEI/BGE,语义检索 title_embedding) |
| 6 | +- POST /embed/image body: ["url_or_path1", ...] -> [[...], ...] (CN-CLIP 图向量) | ||
| 7 | +- POST /embed/clip_text body: ["短语1", "短语2", ...] -> [[...], ...] (CN-CLIP 文本塔,与 /embed/image 同空间) | ||
| 7 | """ | 8 | """ |
| 8 | 9 | ||
| 9 | import logging | 10 | import logging |
| @@ -22,7 +23,7 @@ from fastapi.concurrency import run_in_threadpool | @@ -22,7 +23,7 @@ from fastapi.concurrency import run_in_threadpool | ||
| 22 | 23 | ||
| 23 | from config.env_config import REDIS_CONFIG | 24 | from config.env_config import REDIS_CONFIG |
| 24 | from config.services_config import get_embedding_backend_config | 25 | from config.services_config import get_embedding_backend_config |
| 25 | -from embeddings.cache_keys import build_image_cache_key, build_text_cache_key | 26 | +from embeddings.cache_keys import build_clip_text_cache_key, build_image_cache_key, build_text_cache_key |
| 26 | from embeddings.config import CONFIG | 27 | from embeddings.config import CONFIG |
| 27 | from embeddings.protocols import ImageEncoderProtocol | 28 | from embeddings.protocols import ImageEncoderProtocol |
| 28 | from embeddings.redis_embedding_cache import RedisEmbeddingCache | 29 | from embeddings.redis_embedding_cache import RedisEmbeddingCache |
| @@ -373,6 +374,7 @@ _text_stats = _EndpointStats(name="text") | @@ -373,6 +374,7 @@ _text_stats = _EndpointStats(name="text") | ||
| 373 | _image_stats = _EndpointStats(name="image") | 374 | _image_stats = _EndpointStats(name="image") |
| 374 | _text_cache = RedisEmbeddingCache(key_prefix=_CACHE_PREFIX, namespace="") | 375 | _text_cache = RedisEmbeddingCache(key_prefix=_CACHE_PREFIX, namespace="") |
| 375 | _image_cache = RedisEmbeddingCache(key_prefix=_CACHE_PREFIX, namespace="image") | 376 | _image_cache = RedisEmbeddingCache(key_prefix=_CACHE_PREFIX, namespace="image") |
| 377 | +_clip_text_cache = RedisEmbeddingCache(key_prefix=_CACHE_PREFIX, namespace="clip_text") | ||
| 376 | 378 | ||
| 377 | 379 | ||
| 378 | @dataclass | 380 | @dataclass |
| @@ -752,13 +754,20 @@ def _try_full_text_cache_hit( | @@ -752,13 +754,20 @@ def _try_full_text_cache_hit( | ||
| 752 | ) | 754 | ) |
| 753 | 755 | ||
| 754 | 756 | ||
| 755 | -def _try_full_image_cache_hit( | ||
| 756 | - urls: List[str], | 757 | +def _try_full_image_lane_cache_hit( |
| 758 | + items: List[str], | ||
| 757 | effective_normalize: bool, | 759 | effective_normalize: bool, |
| 760 | + *, | ||
| 761 | + lane: str, | ||
| 758 | ) -> Optional[_EmbedResult]: | 762 | ) -> Optional[_EmbedResult]: |
| 759 | out: List[Optional[List[float]]] = [] | 763 | out: List[Optional[List[float]]] = [] |
| 760 | - for url in urls: | ||
| 761 | - cached = _image_cache.get(build_image_cache_key(url, normalize=effective_normalize)) | 764 | + for item in items: |
| 765 | + if lane == "image": | ||
| 766 | + ck = build_image_cache_key(item, normalize=effective_normalize) | ||
| 767 | + cached = _image_cache.get(ck) | ||
| 768 | + else: | ||
| 769 | + ck = build_clip_text_cache_key(item, normalize=effective_normalize) | ||
| 770 | + cached = _clip_text_cache.get(ck) | ||
| 762 | if cached is None: | 771 | if cached is None: |
| 763 | return None | 772 | return None |
| 764 | vec = _as_list(cached, normalize=False) | 773 | vec = _as_list(cached, normalize=False) |
| @@ -774,6 +783,108 @@ def _try_full_image_cache_hit( | @@ -774,6 +783,108 @@ def _try_full_image_cache_hit( | ||
| 774 | ) | 783 | ) |
| 775 | 784 | ||
| 776 | 785 | ||
| 786 | +def _embed_image_lane_impl( | ||
| 787 | + items: List[str], | ||
| 788 | + effective_normalize: bool, | ||
| 789 | + request_id: str, | ||
| 790 | + user_id: str, | ||
| 791 | + *, | ||
| 792 | + lane: str, | ||
| 793 | +) -> _EmbedResult: | ||
| 794 | + if _image_model is None: | ||
| 795 | + raise RuntimeError("Image model not loaded") | ||
| 796 | + | ||
| 797 | + out: List[Optional[List[float]]] = [None] * len(items) | ||
| 798 | + missing_indices: List[int] = [] | ||
| 799 | + missing_items: List[str] = [] | ||
| 800 | + missing_keys: List[str] = [] | ||
| 801 | + cache_hits = 0 | ||
| 802 | + for idx, item in enumerate(items): | ||
| 803 | + if lane == "image": | ||
| 804 | + ck = build_image_cache_key(item, normalize=effective_normalize) | ||
| 805 | + cached = _image_cache.get(ck) | ||
| 806 | + else: | ||
| 807 | + ck = build_clip_text_cache_key(item, normalize=effective_normalize) | ||
| 808 | + cached = _clip_text_cache.get(ck) | ||
| 809 | + if cached is not None: | ||
| 810 | + vec = _as_list(cached, normalize=False) | ||
| 811 | + if vec is not None: | ||
| 812 | + out[idx] = vec | ||
| 813 | + cache_hits += 1 | ||
| 814 | + continue | ||
| 815 | + missing_indices.append(idx) | ||
| 816 | + missing_items.append(item) | ||
| 817 | + missing_keys.append(ck) | ||
| 818 | + | ||
| 819 | + if not missing_items: | ||
| 820 | + logger.info( | ||
| 821 | + "%s lane cache-only | inputs=%d normalize=%s dim=%d cache_hits=%d", | ||
| 822 | + lane, | ||
| 823 | + len(items), | ||
| 824 | + effective_normalize, | ||
| 825 | + len(out[0]) if out and out[0] is not None else 0, | ||
| 826 | + cache_hits, | ||
| 827 | + extra=build_request_log_extra(request_id=request_id, user_id=user_id), | ||
| 828 | + ) | ||
| 829 | + return _EmbedResult( | ||
| 830 | + vectors=out, | ||
| 831 | + cache_hits=cache_hits, | ||
| 832 | + cache_misses=0, | ||
| 833 | + backend_elapsed_ms=0.0, | ||
| 834 | + mode="cache-only", | ||
| 835 | + ) | ||
| 836 | + | ||
| 837 | + backend_t0 = time.perf_counter() | ||
| 838 | + with _image_encode_lock: | ||
| 839 | + if lane == "image": | ||
| 840 | + vectors = _image_model.encode_image_urls( | ||
| 841 | + missing_items, | ||
| 842 | + batch_size=CONFIG.IMAGE_BATCH_SIZE, | ||
| 843 | + normalize_embeddings=effective_normalize, | ||
| 844 | + ) | ||
| 845 | + else: | ||
| 846 | + vectors = _image_model.encode_clip_texts( | ||
| 847 | + missing_items, | ||
| 848 | + batch_size=CONFIG.IMAGE_BATCH_SIZE, | ||
| 849 | + normalize_embeddings=effective_normalize, | ||
| 850 | + ) | ||
| 851 | + if vectors is None or len(vectors) != len(missing_items): | ||
| 852 | + raise RuntimeError( | ||
| 853 | + f"{lane} lane length mismatch: expected {len(missing_items)}, " | ||
| 854 | + f"got {0 if vectors is None else len(vectors)}" | ||
| 855 | + ) | ||
| 856 | + | ||
| 857 | + for pos, ck, vec in zip(missing_indices, missing_keys, vectors): | ||
| 858 | + out_vec = _as_list(vec, normalize=effective_normalize) | ||
| 859 | + if out_vec is None: | ||
| 860 | + raise RuntimeError(f"{lane} lane empty embedding at position {pos}") | ||
| 861 | + out[pos] = out_vec | ||
| 862 | + if lane == "image": | ||
| 863 | + _image_cache.set(ck, np.asarray(out_vec, dtype=np.float32)) | ||
| 864 | + else: | ||
| 865 | + _clip_text_cache.set(ck, np.asarray(out_vec, dtype=np.float32)) | ||
| 866 | + | ||
| 867 | + backend_elapsed_ms = (time.perf_counter() - backend_t0) * 1000.0 | ||
| 868 | + logger.info( | ||
| 869 | + "%s lane backend-batch | inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d backend_elapsed_ms=%.2f", | ||
| 870 | + lane, | ||
| 871 | + len(items), | ||
| 872 | + effective_normalize, | ||
| 873 | + len(out[0]) if out and out[0] is not None else 0, | ||
| 874 | + cache_hits, | ||
| 875 | + len(missing_items), | ||
| 876 | + backend_elapsed_ms, | ||
| 877 | + extra=build_request_log_extra(request_id=request_id, user_id=user_id), | ||
| 878 | + ) | ||
| 879 | + return _EmbedResult( | ||
| 880 | + vectors=out, | ||
| 881 | + cache_hits=cache_hits, | ||
| 882 | + cache_misses=len(missing_items), | ||
| 883 | + backend_elapsed_ms=backend_elapsed_ms, | ||
| 884 | + mode="backend-batch", | ||
| 885 | + ) | ||
| 886 | + | ||
| 887 | + | ||
| 777 | @app.get("/health") | 888 | @app.get("/health") |
| 778 | def health() -> Dict[str, Any]: | 889 | def health() -> Dict[str, Any]: |
| 779 | """Health check endpoint. Returns status and current throttling stats.""" | 890 | """Health check endpoint. Returns status and current throttling stats.""" |
| @@ -789,6 +900,7 @@ def health() -> Dict[str, Any]: | @@ -789,6 +900,7 @@ def health() -> Dict[str, Any]: | ||
| 789 | "cache_enabled": { | 900 | "cache_enabled": { |
| 790 | "text": _text_cache.redis_client is not None, | 901 | "text": _text_cache.redis_client is not None, |
| 791 | "image": _image_cache.redis_client is not None, | 902 | "image": _image_cache.redis_client is not None, |
| 903 | + "clip_text": _clip_text_cache.redis_client is not None, | ||
| 792 | }, | 904 | }, |
| 793 | "limits": { | 905 | "limits": { |
| 794 | "text": _text_request_limiter.snapshot(), | 906 | "text": _text_request_limiter.snapshot(), |
| @@ -1148,102 +1260,29 @@ async def embed_text( | @@ -1148,102 +1260,29 @@ async def embed_text( | ||
| 1148 | reset_request_log_context(log_tokens) | 1260 | reset_request_log_context(log_tokens) |
| 1149 | 1261 | ||
| 1150 | 1262 | ||
| 1151 | -def _embed_image_impl( | ||
| 1152 | - urls: List[str], | ||
| 1153 | - effective_normalize: bool, | ||
| 1154 | - request_id: str, | ||
| 1155 | - user_id: str, | ||
| 1156 | -) -> _EmbedResult: | ||
| 1157 | - if _image_model is None: | ||
| 1158 | - raise RuntimeError("Image model not loaded") | ||
| 1159 | - | ||
| 1160 | - out: List[Optional[List[float]]] = [None] * len(urls) | ||
| 1161 | - missing_indices: List[int] = [] | ||
| 1162 | - missing_urls: List[str] = [] | ||
| 1163 | - missing_cache_keys: List[str] = [] | ||
| 1164 | - cache_hits = 0 | ||
| 1165 | - for idx, url in enumerate(urls): | ||
| 1166 | - cache_key = build_image_cache_key(url, normalize=effective_normalize) | ||
| 1167 | - cached = _image_cache.get(cache_key) | ||
| 1168 | - if cached is not None: | ||
| 1169 | - vec = _as_list(cached, normalize=False) | ||
| 1170 | - if vec is not None: | ||
| 1171 | - out[idx] = vec | ||
| 1172 | - cache_hits += 1 | ||
| 1173 | - continue | ||
| 1174 | - missing_indices.append(idx) | ||
| 1175 | - missing_urls.append(url) | ||
| 1176 | - missing_cache_keys.append(cache_key) | ||
| 1177 | - | ||
| 1178 | - if not missing_urls: | ||
| 1179 | - logger.info( | ||
| 1180 | - "image backend done | mode=cache-only inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=0 backend_elapsed_ms=0.00", | ||
| 1181 | - len(urls), | ||
| 1182 | - effective_normalize, | ||
| 1183 | - len(out[0]) if out and out[0] is not None else 0, | ||
| 1184 | - cache_hits, | ||
| 1185 | - extra=build_request_log_extra(request_id, user_id), | ||
| 1186 | - ) | ||
| 1187 | - return _EmbedResult( | ||
| 1188 | - vectors=out, | ||
| 1189 | - cache_hits=cache_hits, | ||
| 1190 | - cache_misses=0, | ||
| 1191 | - backend_elapsed_ms=0.0, | ||
| 1192 | - mode="cache-only", | ||
| 1193 | - ) | ||
| 1194 | - | ||
| 1195 | - backend_t0 = time.perf_counter() | ||
| 1196 | - with _image_encode_lock: | ||
| 1197 | - vectors = _image_model.encode_image_urls( | ||
| 1198 | - missing_urls, | ||
| 1199 | - batch_size=CONFIG.IMAGE_BATCH_SIZE, | ||
| 1200 | - normalize_embeddings=effective_normalize, | ||
| 1201 | - ) | ||
| 1202 | - if vectors is None or len(vectors) != len(missing_urls): | ||
| 1203 | - raise RuntimeError( | ||
| 1204 | - f"Image model response length mismatch: expected {len(missing_urls)}, " | ||
| 1205 | - f"got {0 if vectors is None else len(vectors)}" | ||
| 1206 | - ) | 1263 | +def _parse_string_inputs(raw: List[Any], *, kind: str, empty_detail: str) -> List[str]: |
| 1264 | + out: List[str] = [] | ||
| 1265 | + for i, x in enumerate(raw): | ||
| 1266 | + if not isinstance(x, str): | ||
| 1267 | + raise HTTPException(status_code=400, detail=f"Invalid {kind} at index {i}: must be string") | ||
| 1268 | + s = x.strip() | ||
| 1269 | + if not s: | ||
| 1270 | + raise HTTPException(status_code=400, detail=f"Invalid {kind} at index {i}: {empty_detail}") | ||
| 1271 | + out.append(s) | ||
| 1272 | + return out | ||
| 1207 | 1273 | ||
| 1208 | - for pos, cache_key, vec in zip(missing_indices, missing_cache_keys, vectors): | ||
| 1209 | - out_vec = _as_list(vec, normalize=effective_normalize) | ||
| 1210 | - if out_vec is None: | ||
| 1211 | - raise RuntimeError(f"Image model returned empty embedding for position {pos}") | ||
| 1212 | - out[pos] = out_vec | ||
| 1213 | - _image_cache.set(cache_key, np.asarray(out_vec, dtype=np.float32)) | ||
| 1214 | - | ||
| 1215 | - backend_elapsed_ms = (time.perf_counter() - backend_t0) * 1000.0 | ||
| 1216 | - | ||
| 1217 | - logger.info( | ||
| 1218 | - "image backend done | mode=backend-batch inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d backend_elapsed_ms=%.2f", | ||
| 1219 | - len(urls), | ||
| 1220 | - effective_normalize, | ||
| 1221 | - len(out[0]) if out and out[0] is not None else 0, | ||
| 1222 | - cache_hits, | ||
| 1223 | - len(missing_urls), | ||
| 1224 | - backend_elapsed_ms, | ||
| 1225 | - extra=build_request_log_extra(request_id, user_id), | ||
| 1226 | - ) | ||
| 1227 | - return _EmbedResult( | ||
| 1228 | - vectors=out, | ||
| 1229 | - cache_hits=cache_hits, | ||
| 1230 | - cache_misses=len(missing_urls), | ||
| 1231 | - backend_elapsed_ms=backend_elapsed_ms, | ||
| 1232 | - mode="backend-batch", | ||
| 1233 | - ) | ||
| 1234 | 1274 | ||
| 1235 | - | ||
| 1236 | -@app.post("/embed/image") | ||
| 1237 | -async def embed_image( | ||
| 1238 | - images: List[str], | 1275 | +async def _run_image_lane_embed( |
| 1276 | + *, | ||
| 1277 | + route: str, | ||
| 1278 | + lane: str, | ||
| 1279 | + items: List[str], | ||
| 1239 | http_request: Request, | 1280 | http_request: Request, |
| 1240 | response: Response, | 1281 | response: Response, |
| 1241 | - normalize: Optional[bool] = None, | ||
| 1242 | - priority: int = 0, | 1282 | + normalize: Optional[bool], |
| 1283 | + priority: int, | ||
| 1284 | + preview_chars: int, | ||
| 1243 | ) -> List[Optional[List[float]]]: | 1285 | ) -> List[Optional[List[float]]]: |
| 1244 | - if _image_model is None: | ||
| 1245 | - raise HTTPException(status_code=503, detail="Image embedding model not loaded in this service") | ||
| 1246 | - | ||
| 1247 | request_id = _resolve_request_id(http_request) | 1286 | request_id = _resolve_request_id(http_request) |
| 1248 | user_id = _resolve_user_id(http_request) | 1287 | user_id = _resolve_user_id(http_request) |
| 1249 | _, _, log_tokens = bind_request_log_context(request_id, user_id) | 1288 | _, _, log_tokens = bind_request_log_context(request_id, user_id) |
| @@ -1255,24 +1294,16 @@ async def embed_image( | @@ -1255,24 +1294,16 @@ async def embed_image( | ||
| 1255 | cache_hits = 0 | 1294 | cache_hits = 0 |
| 1256 | cache_misses = 0 | 1295 | cache_misses = 0 |
| 1257 | limiter_acquired = False | 1296 | limiter_acquired = False |
| 1297 | + items_in: List[str] = list(items) | ||
| 1258 | 1298 | ||
| 1259 | try: | 1299 | try: |
| 1260 | if priority < 0: | 1300 | if priority < 0: |
| 1261 | raise HTTPException(status_code=400, detail="priority must be >= 0") | 1301 | raise HTTPException(status_code=400, detail="priority must be >= 0") |
| 1262 | effective_priority = _effective_priority(priority) | 1302 | effective_priority = _effective_priority(priority) |
| 1263 | - | ||
| 1264 | effective_normalize = bool(CONFIG.IMAGE_NORMALIZE_EMBEDDINGS) if normalize is None else bool(normalize) | 1303 | effective_normalize = bool(CONFIG.IMAGE_NORMALIZE_EMBEDDINGS) if normalize is None else bool(normalize) |
| 1265 | - urls: List[str] = [] | ||
| 1266 | - for i, url_or_path in enumerate(images): | ||
| 1267 | - if not isinstance(url_or_path, str): | ||
| 1268 | - raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: must be string URL/path") | ||
| 1269 | - s = url_or_path.strip() | ||
| 1270 | - if not s: | ||
| 1271 | - raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path") | ||
| 1272 | - urls.append(s) | ||
| 1273 | 1304 | ||
| 1274 | cache_check_started = time.perf_counter() | 1305 | cache_check_started = time.perf_counter() |
| 1275 | - cache_only = _try_full_image_cache_hit(urls, effective_normalize) | 1306 | + cache_only = _try_full_image_lane_cache_hit(items, effective_normalize, lane=lane) |
| 1276 | if cache_only is not None: | 1307 | if cache_only is not None: |
| 1277 | latency_ms = (time.perf_counter() - cache_check_started) * 1000.0 | 1308 | latency_ms = (time.perf_counter() - cache_check_started) * 1000.0 |
| 1278 | _image_stats.record_completed( | 1309 | _image_stats.record_completed( |
| @@ -1283,9 +1314,10 @@ async def embed_image( | @@ -1283,9 +1314,10 @@ async def embed_image( | ||
| 1283 | cache_misses=0, | 1314 | cache_misses=0, |
| 1284 | ) | 1315 | ) |
| 1285 | logger.info( | 1316 | logger.info( |
| 1286 | - "embed_image response | mode=cache-only priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=0 first_vector=%s latency_ms=%.2f", | 1317 | + "%s response | mode=cache-only priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d first_vector=%s latency_ms=%.2f", |
| 1318 | + route, | ||
| 1287 | _priority_label(effective_priority), | 1319 | _priority_label(effective_priority), |
| 1288 | - len(urls), | 1320 | + len(items), |
| 1289 | effective_normalize, | 1321 | effective_normalize, |
| 1290 | len(cache_only.vectors[0]) if cache_only.vectors and cache_only.vectors[0] is not None else 0, | 1322 | len(cache_only.vectors[0]) if cache_only.vectors and cache_only.vectors[0] is not None else 0, |
| 1291 | cache_only.cache_hits, | 1323 | cache_only.cache_hits, |
| @@ -1299,14 +1331,15 @@ async def embed_image( | @@ -1299,14 +1331,15 @@ async def embed_image( | ||
| 1299 | if not accepted: | 1331 | if not accepted: |
| 1300 | _image_stats.record_rejected() | 1332 | _image_stats.record_rejected() |
| 1301 | logger.warning( | 1333 | logger.warning( |
| 1302 | - "embed_image rejected | client=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | 1334 | + "%s rejected | client=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", |
| 1335 | + route, | ||
| 1303 | _request_client(http_request), | 1336 | _request_client(http_request), |
| 1304 | _priority_label(effective_priority), | 1337 | _priority_label(effective_priority), |
| 1305 | - len(urls), | 1338 | + len(items), |
| 1306 | effective_normalize, | 1339 | effective_normalize, |
| 1307 | active, | 1340 | active, |
| 1308 | _IMAGE_MAX_INFLIGHT, | 1341 | _IMAGE_MAX_INFLIGHT, |
| 1309 | - _preview_inputs(urls, _LOG_PREVIEW_COUNT, _LOG_IMAGE_PREVIEW_CHARS), | 1342 | + _preview_inputs(items, _LOG_PREVIEW_COUNT, preview_chars), |
| 1310 | extra=build_request_log_extra(request_id, user_id), | 1343 | extra=build_request_log_extra(request_id, user_id), |
| 1311 | ) | 1344 | ) |
| 1312 | raise HTTPException( | 1345 | raise HTTPException( |
| @@ -1318,24 +1351,33 @@ async def embed_image( | @@ -1318,24 +1351,33 @@ async def embed_image( | ||
| 1318 | ) | 1351 | ) |
| 1319 | limiter_acquired = True | 1352 | limiter_acquired = True |
| 1320 | logger.info( | 1353 | logger.info( |
| 1321 | - "embed_image request | client=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", | 1354 | + "%s request | client=%s priority=%s inputs=%d normalize=%s active=%d limit=%d preview=%s", |
| 1355 | + route, | ||
| 1322 | _request_client(http_request), | 1356 | _request_client(http_request), |
| 1323 | _priority_label(effective_priority), | 1357 | _priority_label(effective_priority), |
| 1324 | - len(urls), | 1358 | + len(items), |
| 1325 | effective_normalize, | 1359 | effective_normalize, |
| 1326 | active, | 1360 | active, |
| 1327 | _IMAGE_MAX_INFLIGHT, | 1361 | _IMAGE_MAX_INFLIGHT, |
| 1328 | - _preview_inputs(urls, _LOG_PREVIEW_COUNT, _LOG_IMAGE_PREVIEW_CHARS), | 1362 | + _preview_inputs(items, _LOG_PREVIEW_COUNT, preview_chars), |
| 1329 | extra=build_request_log_extra(request_id, user_id), | 1363 | extra=build_request_log_extra(request_id, user_id), |
| 1330 | ) | 1364 | ) |
| 1331 | verbose_logger.info( | 1365 | verbose_logger.info( |
| 1332 | - "embed_image detail | payload=%s normalize=%s priority=%s", | ||
| 1333 | - urls, | 1366 | + "%s detail | payload=%s normalize=%s priority=%s", |
| 1367 | + route, | ||
| 1368 | + items, | ||
| 1334 | effective_normalize, | 1369 | effective_normalize, |
| 1335 | _priority_label(effective_priority), | 1370 | _priority_label(effective_priority), |
| 1336 | extra=build_request_log_extra(request_id, user_id), | 1371 | extra=build_request_log_extra(request_id, user_id), |
| 1337 | ) | 1372 | ) |
| 1338 | - result = await run_in_threadpool(_embed_image_impl, urls, effective_normalize, request_id, user_id) | 1373 | + result = await run_in_threadpool( |
| 1374 | + _embed_image_lane_impl, | ||
| 1375 | + items, | ||
| 1376 | + effective_normalize, | ||
| 1377 | + request_id, | ||
| 1378 | + user_id, | ||
| 1379 | + lane=lane, | ||
| 1380 | + ) | ||
| 1339 | success = True | 1381 | success = True |
| 1340 | backend_elapsed_ms = result.backend_elapsed_ms | 1382 | backend_elapsed_ms = result.backend_elapsed_ms |
| 1341 | cache_hits = result.cache_hits | 1383 | cache_hits = result.cache_hits |
| @@ -1349,10 +1391,11 @@ async def embed_image( | @@ -1349,10 +1391,11 @@ async def embed_image( | ||
| 1349 | cache_misses=cache_misses, | 1391 | cache_misses=cache_misses, |
| 1350 | ) | 1392 | ) |
| 1351 | logger.info( | 1393 | logger.info( |
| 1352 | - "embed_image response | mode=%s priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d first_vector=%s latency_ms=%.2f", | 1394 | + "%s response | mode=%s priority=%s inputs=%d normalize=%s dim=%d cache_hits=%d cache_misses=%d first_vector=%s latency_ms=%.2f", |
| 1395 | + route, | ||
| 1353 | result.mode, | 1396 | result.mode, |
| 1354 | _priority_label(effective_priority), | 1397 | _priority_label(effective_priority), |
| 1355 | - len(urls), | 1398 | + len(items), |
| 1356 | effective_normalize, | 1399 | effective_normalize, |
| 1357 | len(result.vectors[0]) if result.vectors and result.vectors[0] is not None else 0, | 1400 | len(result.vectors[0]) if result.vectors and result.vectors[0] is not None else 0, |
| 1358 | cache_hits, | 1401 | cache_hits, |
| @@ -1362,7 +1405,8 @@ async def embed_image( | @@ -1362,7 +1405,8 @@ async def embed_image( | ||
| 1362 | extra=build_request_log_extra(request_id, user_id), | 1405 | extra=build_request_log_extra(request_id, user_id), |
| 1363 | ) | 1406 | ) |
| 1364 | verbose_logger.info( | 1407 | verbose_logger.info( |
| 1365 | - "embed_image result detail | count=%d first_vector=%s latency_ms=%.2f", | 1408 | + "%s result detail | count=%d first_vector=%s latency_ms=%.2f", |
| 1409 | + route, | ||
| 1366 | len(result.vectors), | 1410 | len(result.vectors), |
| 1367 | result.vectors[0][: _VECTOR_PREVIEW_DIMS] | 1411 | result.vectors[0][: _VECTOR_PREVIEW_DIMS] |
| 1368 | if result.vectors and result.vectors[0] is not None | 1412 | if result.vectors and result.vectors[0] is not None |
| @@ -1383,24 +1427,73 @@ async def embed_image( | @@ -1383,24 +1427,73 @@ async def embed_image( | ||
| 1383 | cache_misses=cache_misses, | 1427 | cache_misses=cache_misses, |
| 1384 | ) | 1428 | ) |
| 1385 | logger.error( | 1429 | logger.error( |
| 1386 | - "embed_image failed | priority=%s inputs=%d normalize=%s latency_ms=%.2f error=%s", | 1430 | + "%s failed | priority=%s inputs=%d normalize=%s latency_ms=%.2f error=%s", |
| 1431 | + route, | ||
| 1387 | _priority_label(effective_priority), | 1432 | _priority_label(effective_priority), |
| 1388 | - len(urls), | 1433 | + len(items_in), |
| 1389 | effective_normalize, | 1434 | effective_normalize, |
| 1390 | latency_ms, | 1435 | latency_ms, |
| 1391 | e, | 1436 | e, |
| 1392 | exc_info=True, | 1437 | exc_info=True, |
| 1393 | extra=build_request_log_extra(request_id, user_id), | 1438 | extra=build_request_log_extra(request_id, user_id), |
| 1394 | ) | 1439 | ) |
| 1395 | - raise HTTPException(status_code=502, detail=f"Image embedding backend failure: {e}") from e | 1440 | + raise HTTPException(status_code=502, detail=f"{route} backend failure: {e}") from e |
| 1396 | finally: | 1441 | finally: |
| 1397 | if limiter_acquired: | 1442 | if limiter_acquired: |
| 1398 | remaining = _image_request_limiter.release(success=success) | 1443 | remaining = _image_request_limiter.release(success=success) |
| 1399 | logger.info( | 1444 | logger.info( |
| 1400 | - "embed_image finalize | success=%s priority=%s active_after=%d", | 1445 | + "%s finalize | success=%s priority=%s active_after=%d", |
| 1446 | + route, | ||
| 1401 | success, | 1447 | success, |
| 1402 | _priority_label(effective_priority), | 1448 | _priority_label(effective_priority), |
| 1403 | remaining, | 1449 | remaining, |
| 1404 | extra=build_request_log_extra(request_id, user_id), | 1450 | extra=build_request_log_extra(request_id, user_id), |
| 1405 | ) | 1451 | ) |
| 1406 | reset_request_log_context(log_tokens) | 1452 | reset_request_log_context(log_tokens) |
| 1453 | + | ||
| 1454 | + | ||
| 1455 | +@app.post("/embed/image") | ||
| 1456 | +async def embed_image( | ||
| 1457 | + images: List[str], | ||
| 1458 | + http_request: Request, | ||
| 1459 | + response: Response, | ||
| 1460 | + normalize: Optional[bool] = None, | ||
| 1461 | + priority: int = 0, | ||
| 1462 | +) -> List[Optional[List[float]]]: | ||
| 1463 | + if _image_model is None: | ||
| 1464 | + raise HTTPException(status_code=503, detail="Image embedding model not loaded in this service") | ||
| 1465 | + items = _parse_string_inputs(images, kind="image", empty_detail="empty URL/path") | ||
| 1466 | + return await _run_image_lane_embed( | ||
| 1467 | + route="embed_image", | ||
| 1468 | + lane="image", | ||
| 1469 | + items=items, | ||
| 1470 | + http_request=http_request, | ||
| 1471 | + response=response, | ||
| 1472 | + normalize=normalize, | ||
| 1473 | + priority=priority, | ||
| 1474 | + preview_chars=_LOG_IMAGE_PREVIEW_CHARS, | ||
| 1475 | + ) | ||
| 1476 | + | ||
| 1477 | + | ||
| 1478 | +@app.post("/embed/clip_text") | ||
| 1479 | +async def embed_clip_text( | ||
| 1480 | + texts: List[str], | ||
| 1481 | + http_request: Request, | ||
| 1482 | + response: Response, | ||
| 1483 | + normalize: Optional[bool] = None, | ||
| 1484 | + priority: int = 0, | ||
| 1485 | +) -> List[Optional[List[float]]]: | ||
| 1486 | + """CN-CLIP 文本塔,与 ``POST /embed/image`` 同向量空间。""" | ||
| 1487 | + if _image_model is None: | ||
| 1488 | + raise HTTPException(status_code=503, detail="Image embedding model not loaded in this service") | ||
| 1489 | + items = _parse_string_inputs(texts, kind="text", empty_detail="empty string") | ||
| 1490 | + return await _run_image_lane_embed( | ||
| 1491 | + route="embed_clip_text", | ||
| 1492 | + lane="clip_text", | ||
| 1493 | + items=items, | ||
| 1494 | + http_request=http_request, | ||
| 1495 | + response=response, | ||
| 1496 | + normalize=normalize, | ||
| 1497 | + priority=priority, | ||
| 1498 | + preview_chars=_LOG_TEXT_PREVIEW_CHARS, | ||
| 1499 | + ) |
scripts/es_debug_search.py
| @@ -412,7 +412,7 @@ def _looks_like_image_ref(url: str) -> bool: | @@ -412,7 +412,7 @@ def _looks_like_image_ref(url: str) -> bool: | ||
| 412 | 412 | ||
| 413 | def _encode_clip_query_vector(query: str) -> List[float]: | 413 | def _encode_clip_query_vector(query: str) -> List[float]: |
| 414 | """ | 414 | """ |
| 415 | - 与索引中 image_embedding 同空间:图走 HTTP /embed/image;文本走 clip-as-service gRPC encode。 | 415 | + 与索引中 image_embedding 同空间:图走 ``POST /embed/image``;文本走 ``POST /embed/clip_text``(6008)。 |
| 416 | """ | 416 | """ |
| 417 | import numpy as np | 417 | import numpy as np |
| 418 | 418 | ||
| @@ -420,40 +420,14 @@ def _encode_clip_query_vector(query: str) -> List[float]: | @@ -420,40 +420,14 @@ def _encode_clip_query_vector(query: str) -> List[float]: | ||
| 420 | if not q: | 420 | if not q: |
| 421 | raise ValueError("empty query") | 421 | raise ValueError("empty query") |
| 422 | 422 | ||
| 423 | - from config.services_config import get_embedding_image_backend_config | ||
| 424 | - | ||
| 425 | - backend, cfg = get_embedding_image_backend_config() | 423 | + from embeddings.image_encoder import CLIPImageEncoder |
| 426 | 424 | ||
| 425 | + enc = CLIPImageEncoder() | ||
| 427 | if _looks_like_image_ref(q): | 426 | if _looks_like_image_ref(q): |
| 428 | - from embeddings.image_encoder import CLIPImageEncoder | ||
| 429 | - | ||
| 430 | - enc = CLIPImageEncoder() | ||
| 431 | vec = enc.encode_image_from_url(q, normalize_embeddings=True, priority=1) | 427 | vec = enc.encode_image_from_url(q, normalize_embeddings=True, priority=1) |
| 432 | - return vec.astype(np.float32).flatten().tolist() | ||
| 433 | - | ||
| 434 | - if backend != "clip_as_service": | ||
| 435 | - raise RuntimeError( | ||
| 436 | - "mode 5 纯文本查询需要 CN-CLIP 文本向量(与 clip-as-service 同空间)。" | ||
| 437 | - "当前 image_backend 为 local_cnclip,本脚本不加载本地模型。" | ||
| 438 | - "请将 config 中 services.embedding.image_backend 设为 clip_as_service 并启动 grpc " | ||
| 439 | - "(默认 51000),或输入图片 URL/路径(将调用 POST /embed/image 到 6008)。" | ||
| 440 | - ) | ||
| 441 | - | ||
| 442 | - from embeddings.clip_as_service_encoder import _ensure_clip_client_path | ||
| 443 | - | ||
| 444 | - _ensure_clip_client_path() | ||
| 445 | - from clip_client import Client | ||
| 446 | - | ||
| 447 | - server = str(cfg.get("server") or "grpc://127.0.0.1:51000").strip() | ||
| 448 | - normalize = bool(cfg.get("normalize_embeddings", True)) | ||
| 449 | - client = Client(server) | ||
| 450 | - arr = client.encode([q], batch_size=1, show_progress=False) | ||
| 451 | - vec = np.asarray(arr[0], dtype=np.float32).flatten() | ||
| 452 | - if normalize: | ||
| 453 | - n = float(np.linalg.norm(vec)) | ||
| 454 | - if np.isfinite(n) and n > 0.0: | ||
| 455 | - vec = vec / n | ||
| 456 | - return vec.tolist() | 428 | + else: |
| 429 | + vec = enc.encode_clip_text(q, normalize_embeddings=True, priority=1) | ||
| 430 | + return vec.astype(np.float32).flatten().tolist() | ||
| 457 | 431 | ||
| 458 | 432 | ||
| 459 | def search_title_knn(es: Any, index_name: str, query: str, size: int) -> List[Dict[str, Any]]: | 433 | def search_title_knn(es: Any, index_name: str, query: str, size: int) -> List[Dict[str, Any]]: |
| @@ -467,7 +441,6 @@ def search_title_knn(es: Any, index_name: str, query: str, size: int) -> List[Di | @@ -467,7 +441,6 @@ def search_title_knn(es: Any, index_name: str, query: str, size: int) -> List[Di | ||
| 467 | qv = vec.astype("float32").flatten().tolist() | 441 | qv = vec.astype("float32").flatten().tolist() |
| 468 | num_cand = max(size * 10, 100) | 442 | num_cand = max(size * 10, 100) |
| 469 | body: Dict[str, Any] = { | 443 | body: Dict[str, Any] = { |
| 470 | - "size": size, | ||
| 471 | "knn": { | 444 | "knn": { |
| 472 | "field": "title_embedding", | 445 | "field": "title_embedding", |
| 473 | "query_vector": qv, | 446 | "query_vector": qv, |
| @@ -484,7 +457,6 @@ def search_image_knn(es: Any, index_name: str, query: str, size: int) -> List[Di | @@ -484,7 +457,6 @@ def search_image_knn(es: Any, index_name: str, query: str, size: int) -> List[Di | ||
| 484 | num_cand = max(size * 10, 100) | 457 | num_cand = max(size * 10, 100) |
| 485 | field = "image_embedding.vector" | 458 | field = "image_embedding.vector" |
| 486 | body: Dict[str, Any] = { | 459 | body: Dict[str, Any] = { |
| 487 | - "size": size, | ||
| 488 | "knn": { | 460 | "knn": { |
| 489 | "field": field, | 461 | "field": field, |
| 490 | "query_vector": qv, | 462 | "query_vector": qv, |
scripts/service_ctl.sh
| @@ -871,7 +871,7 @@ Special targets: | @@ -871,7 +871,7 @@ Special targets: | ||
| 871 | 871 | ||
| 872 | Examples: | 872 | Examples: |
| 873 | ./scripts/service_ctl.sh up all | 873 | ./scripts/service_ctl.sh up all |
| 874 | - ./scripts/service_ctl.sh up tei cnclip embedding translator reranker | 874 | + ./scripts/service_ctl.sh up tei cnclip embedding embedding-image translator reranker |
| 875 | ./scripts/service_ctl.sh up backend indexer frontend | 875 | ./scripts/service_ctl.sh up backend indexer frontend |
| 876 | ./scripts/service_ctl.sh restart | 876 | ./scripts/service_ctl.sh restart |
| 877 | ./scripts/service_ctl.sh monitor-start all | 877 | ./scripts/service_ctl.sh monitor-start all |
tests/ci/test_service_api_contracts.py
| @@ -556,6 +556,9 @@ class _FakeImageModel: | @@ -556,6 +556,9 @@ class _FakeImageModel: | ||
| 556 | def encode_image_urls(self, urls, batch_size=8, normalize_embeddings=True): | 556 | def encode_image_urls(self, urls, batch_size=8, normalize_embeddings=True): |
| 557 | return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] | 557 | return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] |
| 558 | 558 | ||
| 559 | + def encode_clip_texts(self, texts, batch_size=8, normalize_embeddings=True): | ||
| 560 | + return [np.array([0.31, 0.21, 0.11], dtype=np.float32) for _ in texts] | ||
| 561 | + | ||
| 559 | 562 | ||
| 560 | class _EmbeddingCacheMiss: | 563 | class _EmbeddingCacheMiss: |
| 561 | """Avoid Redis/module cache hits so contract tests exercise the encode path.""" | 564 | """Avoid Redis/module cache hits so contract tests exercise the encode path.""" |
| @@ -579,6 +582,7 @@ def embedding_module(): | @@ -579,6 +582,7 @@ def embedding_module(): | ||
| 579 | emb_server._text_backend_name = "tei" | 582 | emb_server._text_backend_name = "tei" |
| 580 | emb_server._text_cache = _EmbeddingCacheMiss() | 583 | emb_server._text_cache = _EmbeddingCacheMiss() |
| 581 | emb_server._image_cache = _EmbeddingCacheMiss() | 584 | emb_server._image_cache = _EmbeddingCacheMiss() |
| 585 | + emb_server._clip_text_cache = _EmbeddingCacheMiss() | ||
| 582 | yield emb_server | 586 | yield emb_server |
| 583 | 587 | ||
| 584 | 588 | ||
| @@ -604,6 +608,17 @@ def test_embedding_image_contract(embedding_module): | @@ -604,6 +608,17 @@ def test_embedding_image_contract(embedding_module): | ||
| 604 | assert len(data[0]) == 3 | 608 | assert len(data[0]) == 3 |
| 605 | 609 | ||
| 606 | 610 | ||
| 611 | +def test_embedding_clip_text_contract(embedding_module): | ||
| 612 | + from fastapi.testclient import TestClient | ||
| 613 | + | ||
| 614 | + with TestClient(embedding_module.app) as client: | ||
| 615 | + resp = client.post("/embed/clip_text", json=["纯棉短袖", "street tee"]) | ||
| 616 | + assert resp.status_code == 200 | ||
| 617 | + data = resp.json() | ||
| 618 | + assert len(data) == 2 | ||
| 619 | + assert len(data[0]) == 3 | ||
| 620 | + | ||
| 621 | + | ||
| 607 | class _FakeTranslator: | 622 | class _FakeTranslator: |
| 608 | model = "qwen-mt" | 623 | model = "qwen-mt" |
| 609 | supports_batch = True | 624 | supports_batch = True |