cnclip

tangwang
1 parent 42e3aea6
Showing 11 changed files with 245 additions and 411 deletions Show diff stats
docs/reference/商品数据源入ES配置规范.md
docs/reference/阿里opensearch电商行业.md
docs/temporary/sku_image_src问题诊断报告.md
embeddings/README.md
embeddings/clip_as_service_encoder.py
embeddings/clip_model.py
embeddings/cloud_text_encoder.py
embeddings/config.py
embeddings/protocols.py
embeddings/server.py
requirements.txt
@@ -1,221 +0,0 @@
-根据您提供的内容，我将其整理为规范的Markdown格式：
-
-# ES索引配置文档
-
-## 1. 全局配置
-
-### 1.1 文本字段相关性设定
-需要修改所有text字段相关性算法-BM25算法的默认参数：
-```json
-"similarity": {
-  "default": {
-    "type": "BM25",
-    "b": "0.0",
-    "k1": "0.0"
-  }
-}
-```
-
-### 1.2 索引分片设定
-- `number_of_replicas`：0/1
-- `number_of_shards`：设置建议 分片数 <= ES集群的总CPU核心个数/ (副本数 + 1)
-
-### 1.3 索引刷新时间设定
-- `refresh_interval`：默认30S，根据客户需要进行调整
-```json
-"refresh_interval": "30s"
-```
-
-## 2. 单个字段配置
-
-| 分析方式 | 字段预处理和ES输入格式要求 | 对应ES mapping配置 | 备注 |
-|---------|--------------------------|-------------------|------|
-| 电商通用分析-中文 | - | ```json { "type": "text", "analyzer": "index_ansj", "search_analyzer": "query_ansj" } ``` | - |
-| 文本-多语言向量化 | 调用"文本向量化"模块得到1024维向量 | ```json { "type": "dense_vector", "dims": 1024, "index": true, "similarity": "dot_product" } ``` | 1. 依赖"文本向量化"模块<br>2. 如果定期全量，需要对向量化结果做缓存 |
-| 图片-向量化 | 调用"图片向量化"模块得到1024维向量 | ```json { "type": "nested", "properties": { "vector": { "type": "dense_vector", "dims": 1024, "similarity": "dot_product" }, "url": { "type": "text" } } } ``` | 1. 依赖"图片向量化"模块<br>2. 如果定期全量，需要对向量化结果做缓存 |
-| 关键词 | ES输入格式：list或者单个值 | ```json {"type": "keyword"} ``` | - |
-| 电商通用分析-英文 | - | ```json {"type": "text", "analyzer": "english"} ``` | - |
-| 电商通用分析-阿拉伯文 | - | ```json {"type": "text", "analyzer": "arabic"} ``` | - |
-| 电商通用分析-西班牙文 | - | ```json {"type": "text", "analyzer": "spanish"} ``` | - |
-| 电商通用分析-俄文 | - | ```json {"type": "text", "analyzer": "russian"} ``` | - |
-| 电商通用分析-日文 | - | ```json {"type": "text", "analyzer": "japanese"} ``` | - |
-| 数值-整数 | - | ```json {"type": "long"} ``` | - |
-| 数值-浮点型 | - | ```json {"type": "float"} ``` | - |
-| 分值 | 输入是float，配置处理方式：log, pow, sigmoid等 | TODO：给代码, log | - |
-| 子串 | - | 暂时不支持 | - |
-| ngram匹配或前缀匹配或边缘前缀匹配 | - | 暂时不支持 | 以后根据需要再添加 |
-
-这样整理后，文档结构更加清晰，表格格式规范，便于阅读和理解。
-
-
-参考 opensearch：
-
-数据接口
-文本相关性字段
-向量相关性字段
-3. 模块提取
-文本向量化
-import sys
-import torch
-from sentence_transformers import SentenceTransformer
-import time
-import threading
-from modelscope import snapshot_download
-from transformers import AutoModel
-import os
-from openai import OpenAI
-from config.logging_config import get_app_logger
-
-# Get logger for this module
-logger = get_app_logger(__name__)
-
-class BgeEncoder:
-    _instance = None
-    _lock = threading.Lock()
-
-    def __new__(cls, model_dir='Xorbits/bge-m3'):
-        with cls._lock:
-            if cls._instance is None:
-                cls._instance = super(BgeEncoder, cls).__new__(cls)
-                logger.info("[BgeEncoder] Creating a new instance with model directory: %s", model_dir)
-                cls._instance.model = SentenceTransformer(snapshot_download(model_dir))
-                logger.info("[BgeEncoder] New instance has been created")
-        return cls._instance
-
-    def encode(self, sentences, normalize_embeddings=True, device='cuda'):
-        # Move model to specified device
-        if device == 'gpu':
-            device = 'cuda'
-        self.model = self.model.to(device)
-        embeddings = self.model.encode(sentences, normalize_embeddings=normalize_embeddings, device=device, show_progress_bar=False)
-        return embeddings
-图片向量化
-import sys
-import os
-import io
-import requests
-import torch
-import numpy as np
-from PIL import Image
-import logging
-import threading
-from typing import List, Optional, Union
-from config.logging_config import get_app_logger
-import cn_clip.clip as clip
-from cn_clip.clip import load_from_name
-
-# Get logger for this module
-logger = get_app_logger(__name__)
-
-# DEFAULT_MODEL_NAME = "ViT-L-14-336" # ["ViT-B-16", "ViT-L-14", "ViT-L-14-336", "ViT-H-14", "RN50"]
-DEFAULT_MODEL_NAME = "ViT-H-14" 
-MODEL_DOWNLOAD_DIR = "/data/tw/uat/EsSearcher"
-
-class CLIPImageEncoder:
-    """CLIP Image Encoder for generating image embeddings using cn_clip"""
-    
-    _instance = None
-    _lock = threading.Lock()
-    
-    def __new__(cls, model_name=DEFAULT_MODEL_NAME, device=None):
-        with cls._lock:
-            if cls._instance is None:
-                cls._instance = super(CLIPImageEncoder, cls).__new__(cls)
-                logger.info(f"[CLIPImageEncoder] Creating new instance with model: {model_name}")
-                cls._instance._initialize_model(model_name, device)
-        return cls._instance
-    
-    def _initialize_model(self, model_name, device):
-        """Initialize the CLIP model using cn_clip"""
-        try:
-            self.device = device if device else ("cuda" if torch.cuda.is_available() else "cpu")
-            self.model, self.preprocess = load_from_name(model_name, device=self.device, download_root=MODEL_DOWNLOAD_DIR)
-            self.model.eval()
-            self.model_name = model_name
-            logger.info(f"[CLIPImageEncoder] Model {model_name} initialized successfully on device {self.device}")
-            
-        except Exception as e:
-            logger.error(f"[CLIPImageEncoder] Failed to initialize model: {str(e)}")
-            raise
-    
-    def validate_image(self, image_data: bytes) -> Image.Image:
-        """Validate image data and return PIL Image if valid"""
-        try:
-            image_stream = io.BytesIO(image_data)
-            image = Image.open(image_stream)
-            image.verify()
-            image_stream.seek(0)
-            image = Image.open(image_stream)
-            if image.mode != 'RGB':
-                image = image.convert('RGB')
-            return image
-        except Exception as e:
-            raise ValueError(f"Invalid image data: {str(e)}")
-    
-    def download_image(self, url: str, timeout: int = 10) -> bytes:
-        """Download image from URL"""
-        try:
-            if url.startswith(('http://', 'https://')):
-                response = requests.get(url, timeout=timeout)
-                if response.status_code != 200:
-                    raise ValueError(f"HTTP {response.status_code}")
-                return response.content
-            else:
-                # Local file path
-                with open(url, 'rb') as f:
-                    return f.read()
-        except Exception as e:
-            raise ValueError(f"Failed to download image from {url}: {str(e)}")
-    
-    def preprocess_image(self, image: Image.Image, max_size: int = 1024) -> Image.Image:
-        """Preprocess image for CLIP model"""
-        # Resize if too large
-        if max(image.size) > max_size:
-            ratio = max_size / max(image.size)
-            new_size = tuple(int(dim * ratio) for dim in image.size)
-            image = image.resize(new_size, Image.Resampling.LANCZOS)
-        return image
-    
-    def encode_text(self, text):
-        """Encode text to embedding vector using cn_clip"""
-        text_data = clip.tokenize([text] if type(text) == str else text).to(self.device)
-        with torch.no_grad():
-            text_features = self.model.encode_text(text_data)
-            text_features /= text_features.norm(dim=-1, keepdim=True)
-        return text_features
-
-    def encode_image(self, image: Image.Image) -> Optional[np.ndarray]:
-        """Encode image to embedding vector using cn_clip"""
-        if not isinstance(image, Image.Image):
-            raise ValueError("CLIPImageEncoder.encode_image Input must be a PIL.Image")
-
-        try:
-            infer_data = self.preprocess(image).unsqueeze(0).to(self.device)
-            with torch.no_grad():
-                image_features = self.model.encode_image(infer_data)
-                image_features /= image_features.norm(dim=-1, keepdim=True)
-            return image_features.cpu().numpy().astype('float32')[0]
-        except Exception as e:
-            logger.error(f"Failed to process image. Reason: {str(e)}")
-            return None
-    
-    def encode_image_from_url(self, url: str) -> Optional[np.ndarray]:
-        """Complete pipeline: download, validate, preprocess and encode image from URL"""
-        try:
-            # Download image
-            image_data = self.download_image(url)
-            
-            # Validate image
-            image = self.validate_image(image_data)
-            
-            # Preprocess image
-            image = self.preprocess_image(image)
-            
-            # Encode image
-            embedding = self.encode_image(image)
-            
-            return embedding
-            
-        except Exception as e:
-            logger.error(f"Error processing image from URL {url}: {str(e)}")
-            return None
 \ No newline at end of file
@@ -1,47 +0,0 @@
-https://help.aliyun.com/zh/open-search/industry-algorithm-edition/e-commerce?spm=a2c4g.11186623.help-menu-29102.d_3_2_1.5a903cfbxOsaHt&scm=20140722.H_99739._.OR_help-T_cn~zh-V_1
-
-
-## 定义应用结构
-示例如下：
-| 字段名称       | 主键 | 字段标签   | 类型         |
-|----------------|------|------------|--------------|
-| title          |      | 商品标题   | TEXT         |
-| text_embedding |      | 文本向量   | EMBEDDING         |
-| image_embedding |      | 图片向量   | EMBEDDING         |
-| category_name  |      | 类目名称   | TEXT         |
-| image_url      |      |            | LITERAL_ARRAY|
-| description    |      | 商品描述   | TEXT         |
-| brand_name     |      | 品牌名称   | TEXT         |
-| thumbnail_url  |      |            | LITERAL_ARRAY|
-| is_onsale      |      |            | INT          |
-| url            |      |            | LITERAL      |
-| brand_id       |      |            | LITERAL      |
-| series_id      |      |            | LITERAL      |
-| sold_num       |      | 商品销量   | INT          |
-| category_id    |      |            | INT          |
-| onsale_time    |      | 上架时间   | INT          |
-| price          |      |            | DOUBLE       |
-| series_name    |      |            | TEXT         |
-| discount_price |      | DOUBLE |
-| pid            | ●    | INT    |
-| sale_price     |      | DOUBLE |
-| act_price      |      | DOUBLE |
-
-
-## 定义索引结构
-
-| 索引名称 | 索引标签 | 包含字段 | 分析方式 | 使用示例 |
-| --- | --- | --- | --- | --- |
-| default | 默认索引 | category_name, description, brand_name, title, create_by, update_by | 行业 - 电商通用分析 | query=default:“云搜索” |
-| category_name | 类目名称索引 | category_name | 行业 - 电商通用分析 | query=category_name:“云搜索” |
-| category_id |  | category_id | 关键字 | query=category_id:“云搜索” |
-| series_name |  | series_name | 中文 - 通用分析 | query=series_name:“云搜索” |
-| brand_name |  | brand_name | 中文 - 通用分析 | query=brand_name:“云搜索” |
-| id |  | id | 关键字 | query=id:“云搜索” |
-| title | 标题索引 | title | 行业 - 电商通用分析 | query=title:“云搜索” |
-| seller_id |  | seller_id | 关键字 | query=seller_id:“云搜索” |
-| brand_id |  | brand_id | 关键字 | query=brand_id:“云搜索” |
-| series_id |  | series_id | 关键字 | query=series_id:“云搜索” |
-
-上面的只是阿里云的opensearch的例子，我们也要有同样的一套配置，这里支持的“字分析方式” 为ES预先支持的 多种分析器，我们要支持的分析方式参考 @商品数据源入ES配置规范.md
-
@@ -1,117 +0,0 @@
-# SKU image_src 字段为空问题诊断报告
-
-## 问题描述
-
-返回结果的每条结果中，多款式字段 `skus` 下面每个 SKU 的 `image_src` 为空。
-
-## 问题分析
-
-### 1. ES 数据检查
-
-通过查询 ES 数据，发现：
-- ES 中确实有 `skus` 数据（不是空数组）
-- 但是 `skus` 数组中的每个 SKU 对象**都没有 `image_src` 字段**
-
-示例 ES 文档：
-```json
-{
-  "spu_id": "68238",
-  "skus": [
-    {
-      "sku_id": "3568395",
-      "price": 329.61,
-      "compare_at_price": 485.65,
-      "sku_code": "3468269",
-      "stock": 57,
-      "weight": 0.26,
-      "weight_unit": "kg",
-      "option1_value": "",
-      "option2_value": "",
-      "option3_value": ""
-      // 注意：这里没有 image_src 字段
-    }
-  ]
-}
-```
-
-### 2. 代码逻辑检查
-
-在 `indexer/document_transformer.py` 的 `_transform_sku_row` 方法中（第558-560行），原有逻辑为：
-
-```python
-# Image src
-if pd.notna(sku_row.get('image_src')):
-    sku_data['image_src'] = str(sku_row['image_src'])
-```
-
-**问题根源**：
-- 只有当 MySQL 中的 `image_src` 字段**非空**时，才会将其添加到 `sku_data` 字典中
-- 如果 MySQL 中的 `image_src` 是 `NULL` 或空字符串，这个字段就**不会出现在返回的字典中**
-- 导致 ES 文档中缺少 `image_src` 字段
-- API 返回时，`sku_entry.get('image_src')` 返回 `None`，前端看到的就是空值
-
-### 3. MySQL 数据情况
-
-根据代码逻辑推断：
-- MySQL 的 `shoplazza_product_sku` 表中，`image_src` 字段可能为 `NULL` 或空字符串
-- 这导致索引时该字段没有被写入 ES
-
-## 解决方案
-
-### 修复方案
-
-修改 `indexer/document_transformer.py` 中的 `_transform_sku_row` 方法，**始终包含 `image_src` 字段**，即使值为空也设置为 `None`：
-
-```python
-# Image src - always include this field, even if empty
-# This ensures the field is present in ES documents and API responses
-image_src = sku_row.get('image_src')
-if pd.notna(image_src) and str(image_src).strip():
-    sku_data['image_src'] = str(image_src).strip()
-else:
-    # Set to None (will be serialized as null in JSON) instead of omitting the field
-    sku_data['image_src'] = None
-```
-
-### 修复效果
-
-修复后：
-1. **即使 MySQL 中 `image_src` 为 NULL 或空字符串**，ES 文档中也会包含该字段（值为 `null`）
-2. API 返回时，前端可以明确知道该字段存在但值为空
-3. 符合 API 模型定义：`image_src: Optional[str] = Field(None, ...)`
-
-## 问题分类
-
-**问题类型**：**本项目填充的问题**
-
-- ✅ **不是 MySQL 原始数据的问题**：MySQL 中 `image_src` 字段可能确实为 NULL，但这是正常的业务数据
-- ✅ **不是 ES 数据的问题**：ES mapping 中 `image_src` 字段定义正确
-- ❌ **是本项目填充的问题**：代码逻辑导致当 MySQL 中 `image_src` 为空时，该字段没有被写入 ES 文档
-
-## 后续操作
-
-1. **重新索引数据**：修复代码后，需要重新索引数据才能生效
-   ```bash
-   # 重新索引指定租户的数据
-   ./scripts/ingest.sh <tenant_id> true
-   ```
-
-2. **验证修复**：重新索引后，查询 ES 验证 `image_src` 字段是否已包含：
-   ```bash
-   curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \
-     -H 'Content-Type: application/json' \
-     -d '{
-       "size": 1,
-       "query": {"nested": {"path": "skus", "query": {"exists": {"field": "skus"}}}},
-       "_source": ["spu_id", "skus"]
-     }'
-   ```
-
-3. **可选优化**：如果业务需要，可以考虑当 SKU 的 `image_src` 为空时，使用 SPU 的主图（`image_url`）作为默认值
-
-## 相关文件
-
-- `indexer/document_transformer.py` - 已修复
-- `api/models.py` - `SkuResult.image_src: Optional[str]` - 模型定义正确
-- `api/result_formatter.py` - `image_src=sku_entry.get('image_src')` - 读取逻辑正确
-- `mappings/search_products.json` - `skus.image_src` mapping 定义正确
@@ -8,8 +8,10 @@
 - **HTTP 客户端**：`text_encoder.py` / `image_encoder.py`（供搜索/索引模块调用）
 - **本地模型实现**：`bge_model.py` / `clip_model.py`
+- **clip-as-service 客户端**：`clip_as_service_encoder.py`（图片向量，推荐）
 - **向量化服务（FastAPI）**：`server.py`
 - **统一配置**：`config.py`
+- **接口契约**：`protocols.ImageEncoderProtocol`（图片编码统一为 `encode_image_urls(urls, batch_size)`，本地 CN-CLIP 与 clip-as-service 均实现该接口）
 ### 服务接口
@@ -21,6 +23,24 @@
   - 入参：`["url或本地路径1", ...]`
   - 出参：`[[...], null, ...]`（与输入按 index 对齐，失败为 `null`）
+### 图片向量：clip-as-service（推荐）
+
+默认使用 `third-party/clip-as-service` 的 Jina CLIP 服务生成图片向量。
+
+1. **安装 clip-client**（首次使用）：
+   ```bash
+   pip install -e third-party/clip-as-service/client
+   ```
+
+2. **启动 CN-CLIP 服务**（独立 gRPC 服务，默认端口 51000，详见 `docs/CNCLIP_SERVICE说明文档.md`）：
+   ```bash
+   ./scripts/start_cnclip_service.sh
+   ```
+
+3. **配置**（`embeddings/config.py` 或环境变量）：
+   - `USE_CLIP_AS_SERVICE=true`（默认）
+   - `CLIP_AS_SERVICE_SERVER=grpc://127.0.0.1:51000`
+
 ### 启动服务
 使用仓库脚本启动（默认端口 6005）：
@@ -35,5 +55,6 @@
 - `PORT`: 服务端口（默认 6005）
 - `TEXT_MODEL_DIR`, `TEXT_DEVICE`, `TEXT_BATCH_SIZE`
-- `IMAGE_MODEL_NAME`, `IMAGE_DEVICE`
+- `USE_CLIP_AS_SERVICE`, `CLIP_AS_SERVICE_SERVER`：图片向量（clip-as-service）
+- `IMAGE_MODEL_NAME`, `IMAGE_DEVICE`：本地 CN-CLIP（当 `USE_CLIP_AS_SERVICE=false` 时）
@@ -0,0 +1,122 @@
+"""
+Image encoder using third-party clip-as-service (Jina CLIP server).
+
+Requires clip-as-service server to be running. The client is loaded from
+third-party/clip-as-service/client so no separate pip install is needed
+if that path is on sys.path or the package is installed in development mode.
+"""
+
+import logging
+import os
+import sys
+from typing import List, Optional
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+# Ensure third-party clip client is importable
+def _ensure_clip_client_path():
+    repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    client_path = os.path.join(repo_root, "third-party", "clip-as-service", "client")
+    if os.path.isdir(client_path) and client_path not in sys.path:
+        sys.path.insert(0, client_path)
+
+
+def _normalize_image_url(url: str) -> str:
+    """Normalize image URL for clip-as-service (e.g. //host/path -> https://host/path)."""
+    if not url or not isinstance(url, str):
+        return ""
+    url = url.strip()
+    if url.startswith("//"):
+        return "https:" + url
+    return url
+
+
+class ClipAsServiceImageEncoder:
+    """
+    Image embedding encoder using clip-as-service Client.
+    Encodes image URLs in batch; returns 1024-dim vectors (server model must match).
+    """
+
+    def __init__(
+        self,
+        server: str = "grpc://127.0.0.1:51000",
+        batch_size: int = 8,
+        show_progress: bool = False,
+    ):
+        """
+        Args:
+            server: clip-as-service server URI (e.g. grpc://127.0.0.1:51000 or http://127.0.0.1:51000).
+            batch_size: batch size for encode requests.
+            show_progress: whether to show progress bar when encoding.
+        """
+        _ensure_clip_client_path()
+        try:
+            from clip_client import Client
+        except ImportError as e:
+            raise ImportError(
+                "clip_client not found. Add third-party/clip-as-service/client to PYTHONPATH "
+                "or run: pip install -e third-party/clip-as-service/client"
+            ) from e
+
+        self._server = server
+        self._batch_size = batch_size
+        self._show_progress = show_progress
+        self._client = Client(server)
+
+    def encode_image_urls(
+        self,
+        urls: List[str],
+        batch_size: Optional[int] = None,
+    ) -> List[Optional[np.ndarray]]:
+        """
+        Encode a list of image URLs to vectors.
+
+        Args:
+            urls: list of image URLs (http/https or //host/path).
+            batch_size: override instance batch_size for this call.
+
+        Returns:
+            List of vectors (1024-dim float32) or None for failed items, same length as urls.
+        """
+        if not urls:
+            return []
+
+        normalized = [_normalize_image_url(u) for u in urls]
+        valid_indices = [i for i, u in enumerate(normalized) if u]
+        if not valid_indices:
+            return [None] * len(urls)
+
+        valid_urls = [normalized[i] for i in valid_indices]
+        bs = batch_size if batch_size is not None else self._batch_size
+        out: List[Optional[np.ndarray]] = [None] * len(urls)
+
+        try:
+            # Client.encode(iterable of str) returns np.ndarray [N, D] for string input
+            arr = self._client.encode(
+                valid_urls,
+                batch_size=bs,
+                show_progress=self._show_progress,
+            )
+            if arr is not None and hasattr(arr, "shape") and len(arr) == len(valid_indices):
+                for j, idx in enumerate(valid_indices):
+                    row = arr[j]
+                    if row is not None and hasattr(row, "tolist"):
+                        out[idx] = np.asarray(row, dtype=np.float32)
+                    else:
+                        out[idx] = np.array(row, dtype=np.float32)
+            else:
+                logger.warning(
+                    "clip-as-service encode returned unexpected shape/length, "
+                    "expected %d vectors", len(valid_indices)
+                )
+        except Exception as e:
+            logger.warning("clip-as-service encode failed: %s", e, exc_info=True)
+
+        return out
+
+    def encode_image_from_url(self, url: str) -> Optional[np.ndarray]:
+        """Encode a single image URL. Returns 1024-dim vector or None."""
+        results = self.encode_image_urls([url], batch_size=1)
+        return results[0] if results else None
@@ -17,7 +17,7 @@ import cn_clip.clip as clip
 DEFAULT_MODEL_NAME = "ViT-H-14"
-MODEL_DOWNLOAD_DIR = "/data/tw/uat/EsSearcher"
+MODEL_DOWNLOAD_DIR = "/data/"
 class ClipImageModel(object):
@@ -91,6 +91,23 @@ class ClipImageModel(object):
         image = self.preprocess_image(image)
         return self.encode_image(image)
+    def encode_image_urls(
+        self,
+        urls: List[str],
+        batch_size: Optional[int] = None,
+    ) -> List[Optional[np.ndarray]]:
+        """
+        Encode a list of image URLs to vectors. Same interface as ClipAsServiceImageEncoder.
+
+        Args:
+            urls: list of image URLs or local paths.
+            batch_size: batch size for internal batching (default 8).
+
+        Returns:
+            List of vectors (or None for failed items), same length as urls.
+        """
+        return self.encode_batch(urls, batch_size=batch_size or 8)
+
     def encode_batch(self, images: List[Union[str, Image.Image]], batch_size: int = 8) -> List[Optional[np.ndarray]]:
         results: List[Optional[np.ndarray]] = []
         for i in range(0, len(images), batch_size):
@@ -35,7 +35,7 @@ class CloudTextEncoder:
                 if not api_key:
                     raise ValueError("DASHSCOPE_API_KEY must be set in environment or passed as parameter")
-                # Use Beijing region by default
+                # 以下是北京地域base-url，如果使用新加坡地域的模型，需要将base_url替换为：https://dashscope-intl.aliyuncs.com/compatible-mode/v1
                 base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
                 cls._instance.client = OpenAI(
@@ -21,7 +21,12 @@ class EmbeddingConfig(object):
     TEXT_DEVICE = "cuda"  # "cuda" or "cpu" (model may fall back to CPU if needed)
     TEXT_BATCH_SIZE = 32
-    # Image embeddings (CN-CLIP)
+    # Image embeddings
+    # Option A: clip-as-service (Jina CLIP server, recommended)
+    USE_CLIP_AS_SERVICE = os.getenv("USE_CLIP_AS_SERVICE", "true").lower() in ("1", "true", "yes")
+    CLIP_AS_SERVICE_SERVER = os.getenv("CLIP_AS_SERVICE_SERVER", "grpc://127.0.0.1:51000")
+
+    # Option B: local CN-CLIP (when USE_CLIP_AS_SERVICE=false)
     IMAGE_MODEL_NAME = "ViT-H-14"
     IMAGE_DEVICE = None  # type: Optional[str]  # "cuda" / "cpu" / None(auto)
@@ -0,0 +1,27 @@
+"""
+Protocols for embedding backends (structural typing, no inheritance required).
+
+Used by the embedding service so that any backend (ClipAsServiceImageEncoder,
+ClipImageModel, etc.) can be used as long as it implements the same interface.
+"""
+
+from typing import List, Optional, Protocol
+
+import numpy as np
+
+
+class ImageEncoderProtocol(Protocol):
+    """Contract for image encoders used by the embedding service /embed/image endpoint."""
+
+    def encode_image_urls(
+        self,
+        urls: List[str],
+        batch_size: Optional[int] = None,
+    ) -> List[Optional[np.ndarray]]:
+        """
+        Encode a list of image URLs to vectors.
+
+        Returns:
+            List of vectors (or None for failed items), same length as urls.
+        """
+        ...
@@ -16,6 +16,8 @@ from fastapi import FastAPI
 from embeddings.config import CONFIG
 from embeddings.bge_model import BgeTextModel
 from embeddings.clip_model import ClipImageModel
+from embeddings.clip_as_service_encoder import ClipAsServiceImageEncoder
+from embeddings.protocols import ImageEncoderProtocol
 logger = logging.getLogger(__name__)
@@ -23,9 +25,9 @@ app = FastAPI(title=&quot;saas-search Embedding Service&quot;, version=&quot;1.0.0&quot;)
 # Models are loaded at startup, not lazily
 _text_model: Optional[BgeTextModel] = None
-_image_model: Optional[ClipImageModel] = None
+_image_model: Optional[ImageEncoderProtocol] = None
 open_text_model = True
-open_image_model = False
+open_image_model = True  # Enable image embedding when using clip-as-service
 _text_encode_lock = threading.Lock()
 _image_encode_lock = threading.Lock()
@@ -49,15 +51,23 @@ def load_models():
             raise
-    # Load image model
+    # Load image model: clip-as-service (recommended) or local CN-CLIP
     if open_image_model:
         try:
-            logger.info(f"Loading image model: {CONFIG.IMAGE_MODEL_NAME} (device: {CONFIG.IMAGE_DEVICE})")
-            _image_model = ClipImageModel(
-                model_name=CONFIG.IMAGE_MODEL_NAME,
-                device=CONFIG.IMAGE_DEVICE,
-            )
-            logger.info("Image model loaded successfully")
+            if CONFIG.USE_CLIP_AS_SERVICE:
+                logger.info(f"Loading image encoder via clip-as-service: {CONFIG.CLIP_AS_SERVICE_SERVER}")
+                _image_model = ClipAsServiceImageEncoder(
+                    server=CONFIG.CLIP_AS_SERVICE_SERVER,
+                    batch_size=CONFIG.IMAGE_BATCH_SIZE,
+                )
+                logger.info("Image model (clip-as-service) loaded successfully")
+            else:
+                logger.info(f"Loading local image model: {CONFIG.IMAGE_MODEL_NAME} (device: {CONFIG.IMAGE_DEVICE})")
+                _image_model = ClipImageModel(
+                    model_name=CONFIG.IMAGE_MODEL_NAME,
+                    device=CONFIG.IMAGE_DEVICE,
+                )
+                logger.info("Image model (local CN-CLIP) loaded successfully")
         except Exception as e:
             logger.error(f"Failed to load image model: {e}", exc_info=True)
             raise
@@ -125,20 +135,31 @@ def embed_image(images: List[str]) -&gt; List[Optional[List[float]]]:
         raise RuntimeError("Image model not loaded")
     out: List[Optional[List[float]]] = [None] * len(images)
+    # Normalize inputs
+    urls = []
+    indices = []
+    for i, url_or_path in enumerate(images):
+        if url_or_path is None:
+            continue
+        if not isinstance(url_or_path, str):
+            url_or_path = str(url_or_path)
+        url_or_path = url_or_path.strip()
+        if url_or_path:
+            urls.append(url_or_path)
+            indices.append(i)
+
+    if not urls:
+        return out
+
     with _image_encode_lock:
-        for i, url_or_path in enumerate(images):
-            try:
-                if url_or_path is None:
-                    continue
-                if not isinstance(url_or_path, str):
-                    url_or_path = str(url_or_path)
-                url_or_path = url_or_path.strip()
-                if not url_or_path:
-                    continue
-                emb = _image_model.encode_image_from_url(url_or_path)
-                out[i] = _as_list(emb)
-            except Exception:
-                out[i] = None
+        try:
+            # Both ClipAsServiceImageEncoder and ClipImageModel implement encode_image_urls(urls, batch_size)
+            vectors = _image_model.encode_image_urls(urls, batch_size=CONFIG.IMAGE_BATCH_SIZE)
+            for j, idx in enumerate(indices):
+                out[idx] = _as_list(vectors[j] if j < len(vectors) else None)
+        except Exception:
+            for idx in indices:
+                out[idx] = None
     return out
@@ -40,3 +40,9 @@ click&gt;=8.1.0
 pytest>=7.4.0
 pytest-asyncio>=0.21.0
 httpx>=0.24.0
+
+# clip-as-service client (for image embeddings via clip-as-service)
+# Install with: pip install -e third-party/clip-as-service/client
+# Or: pip install jina docarray
+jina>=3.12.0
+docarray[common]>=0.19.0,<0.30.0