Commit cc11ae049ef46f40dfc99065dbb82e914cf71b09
1 parent
e7a2c0b7
cnclip
Showing
17 changed files
with
468 additions
and
282 deletions
Show diff stats
| @@ -33,5 +33,9 @@ CACHE_DIR=.cache | @@ -33,5 +33,9 @@ CACHE_DIR=.cache | ||
| 33 | API_BASE_URL=http://43.166.252.75:6002 | 33 | API_BASE_URL=http://43.166.252.75:6002 |
| 34 | 34 | ||
| 35 | 35 | ||
| 36 | +# 国内 | ||
| 36 | DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b | 37 | DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b |
| 38 | +# 美国 | ||
| 39 | +DASHSCOPE_API_KEY=sk-482cc3ff37a8467dab134a7a46830556 | ||
| 40 | + | ||
| 37 | OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0 | 41 | OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0 |
.gitignore
| 1 | -# Prerequisites | ||
| 2 | -*.d | ||
| 3 | - | ||
| 4 | -# Compiled Object files | ||
| 5 | -*.slo | ||
| 6 | -*.lo | ||
| 7 | -*.o | ||
| 8 | -*.obj | ||
| 9 | - | ||
| 10 | -# Precompiled Headers | ||
| 11 | -*.gch | ||
| 12 | -*.pch | ||
| 13 | - | ||
| 14 | -# Compiled Dynamic libraries | ||
| 15 | -*.so | ||
| 16 | -*.dylib | ||
| 17 | -*.dll | ||
| 18 | - | ||
| 19 | -# Fortran module files | ||
| 20 | -*.mod | ||
| 21 | -*.smod | ||
| 22 | - | ||
| 23 | -# Compiled Static libraries | ||
| 24 | -*.lai | ||
| 25 | -*.la | ||
| 26 | -*.a | ||
| 27 | -*.lib | ||
| 28 | - | ||
| 29 | -# Executables | ||
| 30 | -*.exe | ||
| 31 | -*.out | ||
| 32 | -*.app | ||
| 33 | - | ||
| 34 | -# Projects | ||
| 35 | -.vscode | ||
| 36 | - | ||
| 37 | -model/* | ||
| 38 | -model.bin.* | ||
| 39 | -*.pyc | ||
| 40 | -*.swp | ||
| 41 | -.pydevproject | ||
| 42 | -.DS_Store | ||
| 43 | -.project | ||
| 44 | -.idea | ||
| 45 | -.data | ||
| 46 | -__pycache__ | ||
| 47 | -*.log | ||
| 48 | -*.bak*/ | ||
| 49 | -.history.txt | ||
| 50 | -log/ | ||
| 51 | -logs/ | ||
| 52 | -.venv/ | ||
| 53 | -nohup.out | ||
| 54 | -temp/ | ||
| 55 | -indexer_input* | ||
| 56 | -log.* | ||
| 57 | -output | ||
| 58 | -data.* | ||
| 59 | -*.json | ||
| 60 | -*.idx | ||
| 61 | -*.npy | ||
| 62 | -*.tgz | ||
| 63 | -*.tar.gz | ||
| 64 | -*.tar | ||
| 65 | -*.pt | ||
| 66 | - | ||
| 67 | -*.log | ||
| 68 | -log/ | ||
| 69 | -logs_*/ | ||
| 70 | - | ||
| 71 | -*.xlsx | 1 | +# Prerequisites |
| 2 | +*.d | ||
| 3 | + | ||
| 4 | +# Compiled Object files | ||
| 5 | +*.slo | ||
| 6 | +*.lo | ||
| 7 | +*.o | ||
| 8 | +*.obj | ||
| 9 | + | ||
| 10 | +# Precompiled Headers | ||
| 11 | +*.gch | ||
| 12 | +*.pch | ||
| 13 | + | ||
| 14 | +# Compiled Dynamic libraries | ||
| 15 | +*.so | ||
| 16 | +*.dylib | ||
| 17 | +*.dll | ||
| 18 | + | ||
| 19 | +# Fortran module files | ||
| 20 | +*.mod | ||
| 21 | +*.smod | ||
| 22 | + | ||
| 23 | +# Compiled Static libraries | ||
| 24 | +*.lai | ||
| 25 | +*.la | ||
| 26 | +*.a | ||
| 27 | +*.lib | ||
| 28 | + | ||
| 29 | +# Executables | ||
| 30 | +*.exe | ||
| 31 | +*.out | ||
| 32 | +*.app | ||
| 33 | + | ||
| 34 | +# Projects | ||
| 35 | +.vscode | ||
| 36 | + | ||
| 37 | +model/* | ||
| 38 | +model.bin.* | ||
| 39 | +*.pyc | ||
| 40 | +*.swp | ||
| 41 | +.pydevproject | ||
| 42 | +.DS_Store | ||
| 43 | +.project | ||
| 44 | +.idea | ||
| 45 | +.data | ||
| 46 | +__pycache__ | ||
| 47 | +*.log | ||
| 48 | +*.bak*/ | ||
| 49 | +.history.txt | ||
| 50 | +log/ | ||
| 51 | +logs/ | ||
| 52 | +.venv/ | ||
| 53 | +.venv-cnclip/ | ||
| 54 | +nohup.out | ||
| 55 | +temp/ | ||
| 56 | +indexer_input* | ||
| 57 | +log.* | ||
| 58 | +output | ||
| 59 | +data.* | ||
| 60 | +*.json | ||
| 61 | +*.idx | ||
| 62 | +*.npy | ||
| 63 | +*.tgz | ||
| 64 | +*.tar.gz | ||
| 65 | +*.tar | ||
| 66 | +*.pt | ||
| 67 | + | ||
| 68 | +*.log | ||
| 69 | +log/ | ||
| 70 | +logs_*/ | ||
| 71 | + | ||
| 72 | +*.xlsx |
api/indexer_app.py
| @@ -165,6 +165,26 @@ async def startup_event(): | @@ -165,6 +165,26 @@ async def startup_event(): | ||
| 165 | try: | 165 | try: |
| 166 | init_indexer_service(es_host=es_host) | 166 | init_indexer_service(es_host=es_host) |
| 167 | logger.info("Indexer service initialized successfully") | 167 | logger.info("Indexer service initialized successfully") |
| 168 | + | ||
| 169 | + # Eager warmup: build per-tenant transformer bundles at startup to avoid | ||
| 170 | + # first-request latency (config/provider/encoder + transformer wiring). | ||
| 171 | + try: | ||
| 172 | + if _incremental_service is not None and _config is not None: | ||
| 173 | + tenants = [] | ||
| 174 | + # config.tenant_config shape: {"default": {...}, "tenants": {"1": {...}, ...}} | ||
| 175 | + tc = getattr(_config, "tenant_config", None) or {} | ||
| 176 | + if isinstance(tc, dict): | ||
| 177 | + tmap = tc.get("tenants") | ||
| 178 | + if isinstance(tmap, dict): | ||
| 179 | + tenants = [str(k) for k in tmap.keys()] | ||
| 180 | + # If no explicit tenants configured, skip warmup. | ||
| 181 | + if tenants: | ||
| 182 | + warm = _incremental_service.warmup_transformers(tenants) | ||
| 183 | + logger.info("Indexer warmup completed: %s", warm) | ||
| 184 | + else: | ||
| 185 | + logger.info("Indexer warmup skipped (no tenant ids in config.tenant_config.tenants)") | ||
| 186 | + except Exception as e: | ||
| 187 | + logger.warning("Indexer warmup failed (service still starts): %s", e, exc_info=True) | ||
| 168 | except Exception as e: | 188 | except Exception as e: |
| 169 | logger.error(f"Failed to initialize indexer service: {e}", exc_info=True) | 189 | logger.error(f"Failed to initialize indexer service: {e}", exc_info=True) |
| 170 | logger.warning("Indexer service will start but may not function correctly") | 190 | logger.warning("Indexer service will start but may not function correctly") |
api/routes/indexer.py
| @@ -245,16 +245,30 @@ async def build_docs(request: BuildDocsRequest): | @@ -245,16 +245,30 @@ async def build_docs(request: BuildDocsRequest): | ||
| 245 | break | 245 | break |
| 246 | if title_text and str(title_text).strip(): | 246 | if title_text and str(title_text).strip(): |
| 247 | try: | 247 | try: |
| 248 | + import numpy as np | ||
| 249 | + | ||
| 248 | embeddings = encoder.encode(title_text) | 250 | embeddings = encoder.encode(title_text) |
| 249 | if embeddings is not None and len(embeddings) > 0: | 251 | if embeddings is not None and len(embeddings) > 0: |
| 250 | emb0 = embeddings[0] | 252 | emb0 = embeddings[0] |
| 251 | - import numpy as np | ||
| 252 | - | ||
| 253 | - if isinstance(emb0, np.ndarray): | 253 | + if isinstance(emb0, np.ndarray) and emb0.size > 0: |
| 254 | doc["title_embedding"] = emb0.tolist() | 254 | doc["title_embedding"] = emb0.tolist() |
| 255 | - except Exception: | 255 | + else: |
| 256 | + logger.warning( | ||
| 257 | + "build-docs: title_embedding skipped (encoder returned None/invalid for title: %s...)", | ||
| 258 | + title_text[:50], | ||
| 259 | + ) | ||
| 260 | + else: | ||
| 261 | + logger.warning( | ||
| 262 | + "build-docs: title_embedding skipped (encoder returned empty for title: %s...)", | ||
| 263 | + title_text[:50], | ||
| 264 | + ) | ||
| 265 | + except Exception as e: | ||
| 266 | + logger.warning( | ||
| 267 | + "build-docs: title_embedding failed for spu_id=%s: %s", | ||
| 268 | + doc.get("spu_id"), | ||
| 269 | + e, | ||
| 270 | + ) | ||
| 256 | # 构建 doc 接口不因为 embedding 失败而整体失败 | 271 | # 构建 doc 接口不因为 embedding 失败而整体失败 |
| 257 | - pass | ||
| 258 | 272 | ||
| 259 | docs.append(doc) | 273 | docs.append(doc) |
| 260 | except Exception as e: | 274 | except Exception as e: |
docs/CNCLIP_SERVICE说明文档.md
| 1 | -# CN-CLIP 服务(Legacy) | 1 | +# CN-CLIP 服务(clip-as-service)说明 |
| 2 | 2 | ||
| 3 | -> **注意**:当前主流程使用 embedding 服务(端口 6005),见 `docs/QUICKSTART.md` 3.3。本文档为 legacy gRPC 服务说明。 | 3 | +> 本文是本仓库的 CN-CLIP 运行手册与约束说明。主流程仍是 `embedding` 服务(`6005`);当 `embeddings/config.py` 中 `USE_CLIP_AS_SERVICE=true` 时,`embedding` 会调用本 gRPC 服务(默认 `grpc://127.0.0.1:51000`)生成图片向量。 |
| 4 | 4 | ||
| 5 | ---- | 5 | +## 1. 设计目标与官方对齐 |
| 6 | 6 | ||
| 7 | -# TODO(历史) | 7 | +- 采用 `clip-as-service` 的标准拆分:`clip-server`(服务端)与 `clip-client`(客户端)可独立安装。 |
| 8 | +- 服务协议使用 gRPC,符合官方推荐与本项目现有调用链。 | ||
| 9 | +- 保持“主项目环境”和“CN-CLIP 专用环境”解耦,避免 `grpcio/jina/docarray` 与主项目依赖互相污染。 | ||
| 8 | 10 | ||
| 9 | -现在,跟自己 cn_clip 预估的结果,有差别: | ||
| 10 | -这个比较接近: 可能是预处理逻辑有些不一样。 | ||
| 11 | -https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg | ||
| 12 | -normlize后的结果: | ||
| 13 | -0.046295166015625,0.012847900390625,-0.0299530029296875,-0.01629638671875,0.01708984375,0.00487518310546875,0.01284027099609375,0.01348876953125,0.04617632180452347, 0.012860896065831184, -0.030133124440908432, -0.0162516962736845, | ||
| 14 | -0.04617632180452347, 0.012860896065831184, -0.030133124440908432, -0.0162516962736845, 0.01708567887544632, 0.005110889207571745 | 11 | +官方仓库(安装方式、server/client 分离、基本使用示例): |
| 12 | +[jina-ai/clip-as-service](https://github.com/jina-ai/clip-as-service) | ||
| 15 | 13 | ||
| 16 | -以下两个,差别非常大,感觉不是一个模型: | ||
| 17 | -https://aisearch.cdn.bcebos.com/fileManager/GtB5doGAr1skTx38P7fb7Q/182.jpg?authorization=bce-auth-v1%2F7e22d8caf5af46cc9310f1e3021709f3%2F2025-12-30T04%3A45%3A38Z%2F86400%2Fhost%2Ffe222039926cb7ff593021af40268c782b8892598114e24773d0c1bfc976a8df | ||
| 18 | -https://oss.essa.cn/2e353867-7496-4d4e-a7c8-0af50f49f6eb.jpg?x-oss-process=image/resize,m_lfit,w_2048,h_2048 | 14 | +## 2. 当前架构(本仓库) |
| 19 | 15 | ||
| 20 | -curl -X POST "http://43.166.252.75:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[ | ||
| 21 | - { | ||
| 22 | - "id": "test_1", | ||
| 23 | - "pic_url": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" | ||
| 24 | - } | ||
| 25 | - ]' | ||
| 26 | - | 16 | +- **服务启动脚本**:`scripts/start_cnclip_service.sh` |
| 17 | +- **服务停止脚本**:`scripts/stop_cnclip_service.sh` | ||
| 18 | +- **环境初始化脚本**:`scripts/setup_cnclip_venv.sh` | ||
| 19 | +- **统一编排入口**:`scripts/service_ctl.sh`(`restart.sh` 调用它) | ||
| 20 | +- **默认端口**:`51000` | ||
| 21 | +- **默认模型**:`CN-CLIP/ViT-H-14` | ||
| 22 | +- **默认协议**:gRPC | ||
| 27 | 23 | ||
| 24 | +## 3. 环境准备策略(推荐做法) | ||
| 28 | 25 | ||
| 29 | -# CN-CLIP 编码服务 | 26 | +### 3.1 推荐:专用 venv(`.venv-cnclip`) |
| 30 | 27 | ||
| 31 | -## 模块说明 | ||
| 32 | - | ||
| 33 | -CN-CLIP 编码服务基于 [clip-as-service](https://github.com/jina-ai/clip-as-service) 提供中文 CLIP 模型的文本和图像编码功能。服务使用 gRPC 协议,支持批量编码,返回固定维度的向量表示。 | 28 | +```bash |
| 29 | +./scripts/setup_cnclip_venv.sh | ||
| 30 | +``` | ||
| 34 | 31 | ||
| 35 | -### 功能特性 | 32 | +脚本会创建 `.venv-cnclip`,并处理已知兼容性问题(`grpcio`、`jina`、`docarray`、`pkg_resources` 等),避免在主 `.venv` 中反复冲突。 |
| 36 | 33 | ||
| 37 | -- 文本编码:将中文文本编码为向量 | ||
| 38 | -- 图像编码:将图像(本地文件或远程 URL)编码为向量 | ||
| 39 | -- 混合编码:同时编码文本和图像 | ||
| 40 | -- 批量处理:支持批量编码,提高效率 | 34 | +### 3.2 启动时的环境选择 |
| 41 | 35 | ||
| 42 | -### 技术架构 | 36 | +`start_cnclip_service.sh` 的优先级: |
| 43 | 37 | ||
| 44 | -- **框架**: clip-as-service (基于 Jina) | ||
| 45 | -- **模型**: CN-CLIP/ViT-L-14-336(默认) | ||
| 46 | -- **协议**: gRPC(默认,官方推荐) | ||
| 47 | -- **运行时**: PyTorch | 38 | +1. 若存在 `.venv-cnclip`,优先使用; |
| 39 | +2. 否则回退到项目统一环境(`source activate.sh`); | ||
| 40 | +3. 若两者都不可用,启动失败并提示修复动作。 | ||
| 48 | 41 | ||
| 49 | -## 启动服务 | 42 | +## 4. 服务管理方式(推荐) |
| 50 | 43 | ||
| 51 | -### 基本用法 | 44 | +### 4.1 单独启动/停止 |
| 52 | 45 | ||
| 53 | ```bash | 46 | ```bash |
| 54 | -./scripts/start_cnclip_service.sh | 47 | +./scripts/start_cnclip_service.sh --device cuda |
| 48 | +./scripts/stop_cnclip_service.sh | ||
| 55 | ``` | 49 | ``` |
| 56 | 50 | ||
| 57 | -### 启动参数 | ||
| 58 | - | ||
| 59 | -| 参数 | 说明 | 默认值 | | ||
| 60 | -|------|------|--------| | ||
| 61 | -| `--port PORT` | 服务端口 | 51000 | | ||
| 62 | -| `--device DEVICE` | 设备类型:cuda 或 cpu | 自动检测 | | ||
| 63 | -| `--batch-size SIZE` | 批处理大小 | 32 | | ||
| 64 | -| `--num-workers NUM` | 预处理线程数 | 4 | | ||
| 65 | -| `--dtype TYPE` | 数据类型:float16 或 float32 | float16 | | ||
| 66 | -| `--model-name NAME` | 模型名称 | CN-CLIP/ViT-L-14-336 | | ||
| 67 | -| `--replicas NUM` | 副本数 | 1 | | ||
| 68 | - | ||
| 69 | -### 示例 | 51 | +### 4.2 统一编排(推荐日常用法) |
| 70 | 52 | ||
| 71 | ```bash | 53 | ```bash |
| 72 | -# 使用默认配置启动 | ||
| 73 | -./scripts/start_cnclip_service.sh | ||
| 74 | - | ||
| 75 | -# 指定端口和设备 | ||
| 76 | -./scripts/start_cnclip_service.sh --port 52000 --device cpu | ||
| 77 | - | ||
| 78 | -# 使用其他模型 | ||
| 79 | -./scripts/start_cnclip_service.sh --model-name CN-CLIP/ViT-H-14 | 54 | +./scripts/service_ctl.sh restart |
| 55 | +# 或 | ||
| 56 | +./restart.sh | ||
| 80 | ``` | 57 | ``` |
| 81 | 58 | ||
| 82 | -### 停止服务 | 59 | +`service_ctl.sh` 在启动 `cnclip` 时默认注入 `CNCLIP_DEVICE=cuda`。 |
| 60 | +若机器无 GPU 或希望改用 CPU,可在 `.env` 设置: | ||
| 83 | 61 | ||
| 84 | ```bash | 62 | ```bash |
| 85 | -./scripts/stop_cnclip_service.sh | 63 | +CNCLIP_DEVICE=cpu |
| 86 | ``` | 64 | ``` |
| 87 | 65 | ||
| 88 | -## API 接口说明 | 66 | +## 5. GPU 使用与验证 |
| 89 | 67 | ||
| 90 | -### Python 客户端 | 68 | +### 5.1 必须点 |
| 91 | 69 | ||
| 92 | -服务使用 gRPC 协议,必须使用 Python 客户端: | 70 | +- 启动日志显示 `device: cuda` 仅代表配置传入成功; |
| 71 | +- 只有在**首次编码请求触发模型加载后**,`nvidia-smi` 才一定能看到显存占用。 | ||
| 93 | 72 | ||
| 94 | -```python | ||
| 95 | -from clip_client import Client | 73 | +### 5.2 推荐验证步骤 |
| 96 | 74 | ||
| 97 | -# 创建客户端(使用 grpc:// 协议) | ||
| 98 | -c = Client('grpc://localhost:51000') | ||
| 99 | -``` | 75 | +1) 启动服务: |
| 100 | 76 | ||
| 101 | -### 编码接口 | 77 | +```bash |
| 78 | +./scripts/start_cnclip_service.sh --port 51000 --device cuda | ||
| 79 | +``` | ||
| 102 | 80 | ||
| 103 | -#### 1. 文本编码 | 81 | +2) 发送一次请求(触发模型加载): |
| 104 | 82 | ||
| 105 | -```python | 83 | +```bash |
| 84 | +PYTHONPATH="third-party/clip-as-service/client:${PYTHONPATH}" NO_VERSION_CHECK=1 .venv-cnclip/bin/python -c " | ||
| 106 | from clip_client import Client | 85 | from clip_client import Client |
| 107 | - | ||
| 108 | c = Client('grpc://localhost:51000') | 86 | c = Client('grpc://localhost:51000') |
| 109 | - | ||
| 110 | -# 编码单个文本 | ||
| 111 | -result = c.encode(['这是测试文本']) | ||
| 112 | -print(result.shape) # (1, 1024) | ||
| 113 | - | ||
| 114 | -# 编码多个文本 | ||
| 115 | -result = c.encode(['文本1', '文本2', '文本3']) | ||
| 116 | -print(result.shape) # (3, 1024) | 87 | +r = c.encode(['测试']) |
| 88 | +print('shape:', r.shape) | ||
| 89 | +" | ||
| 117 | ``` | 90 | ``` |
| 118 | 91 | ||
| 119 | -#### 2. 图像编码 | 92 | +3) 观察 GPU: |
| 120 | 93 | ||
| 121 | -```python | ||
| 122 | -# 编码远程图像 URL | ||
| 123 | -result = c.encode(['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']) | ||
| 124 | -print(result.shape) # (1, 1024) | ||
| 125 | - | ||
| 126 | -# 编码本地图像文件 | ||
| 127 | -result = c.encode(['/path/to/image.jpg']) | ||
| 128 | -print(result.shape) # (1, 1024) | ||
| 129 | -``` | ||
| 130 | - | ||
| 131 | -#### 3. 混合编码 | ||
| 132 | - | ||
| 133 | -```python | ||
| 134 | -# 同时编码文本和图像 | ||
| 135 | -result = c.encode([ | ||
| 136 | - '这是文本', | ||
| 137 | - 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg', | ||
| 138 | - '另一个文本' | ||
| 139 | -]) | ||
| 140 | -print(result.shape) # (3, 1024) | 94 | +```bash |
| 95 | +nvidia-smi | ||
| 141 | ``` | 96 | ``` |
| 142 | 97 | ||
| 143 | -### 返回格式 | 98 | +预期: |
| 99 | +- `shape` 为 `(1, 1024)`; | ||
| 100 | +- `nvidia-smi` 出现对应 `python`/`clip_server` 进程并有显存占用。 | ||
| 144 | 101 | ||
| 145 | -- **类型**: `numpy.ndarray` | ||
| 146 | -- **形状**: `(N, 1024)`,其中 N 是输入数量 | ||
| 147 | -- **数据类型**: `float32` | ||
| 148 | -- **维度**: 1024(CN-CLIP 模型的 embedding 维度) | 102 | +## 6. 使用方式(客户端) |
| 149 | 103 | ||
| 150 | -### 支持的模型 | 104 | +### 6.1 在本仓库中(推荐) |
| 151 | 105 | ||
| 152 | -| 模型名称 | 说明 | 推荐场景 | | ||
| 153 | -|---------|------|---------| | ||
| 154 | -| `CN-CLIP/ViT-B-16` | 基础版本,速度快 | 对速度要求高的场景 | | ||
| 155 | -| `CN-CLIP/ViT-L-14` | 平衡版本 | 通用场景 | | ||
| 156 | -| `CN-CLIP/ViT-L-14-336` | 高分辨率版本(默认) | 需要处理高分辨率图像 | | ||
| 157 | -| `CN-CLIP/ViT-H-14` | 大型版本,精度高 | 对精度要求高的场景 | | ||
| 158 | -| `CN-CLIP/RN50` | ResNet-50 版本 | 兼容性场景 | | 106 | +- 服务消费者一般是 `embedding` 服务,不建议业务侧直接连 `cnclip`。 |
| 107 | +- 若需手动调试,可在主 `.venv` 安装 client,或通过 `PYTHONPATH` 使用 vendored client。 | ||
| 159 | 108 | ||
| 160 | -## 测试 | 109 | +示例: |
| 161 | 110 | ||
| 162 | -运行测试脚本: | 111 | +```python |
| 112 | +from clip_client import Client | ||
| 163 | 113 | ||
| 164 | -```bash | ||
| 165 | -./scripts/test_cnclip_service.sh | 114 | +c = Client("grpc://127.0.0.1:51000") |
| 115 | +vec = c.encode(["https://example.com/a.jpg", "测试文本"]) | ||
| 116 | +print(vec.shape) # (2, 1024) | ||
| 166 | ``` | 117 | ``` |
| 167 | 118 | ||
| 168 | -测试脚本会验证: | ||
| 169 | -- 文本编码功能 | ||
| 170 | -- 图像编码功能(远程 URL) | ||
| 171 | -- 混合编码功能 | 119 | +### 6.2 常见误区 |
| 172 | 120 | ||
| 173 | -每个测试会显示 embedding 的维度和前 20 个数字。 | 121 | +- ❌ 用 `http://localhost:51000` 当成 HTTP 服务访问; |
| 122 | +- ❌ 只看“启动成功”就判断已用 GPU,不发请求不看 `nvidia-smi`; | ||
| 123 | +- ❌ 在主 `.venv` 直接安装 server 依赖导致依赖树污染。 | ||
| 174 | 124 | ||
| 175 | -## 查看日志 | 125 | +## 7. 已知兼容性说明(关键信息) |
| 176 | 126 | ||
| 177 | -```bash | ||
| 178 | -tail -f /data/tw/saas-search/logs/cnclip_service.log | ||
| 179 | -``` | ||
| 180 | - | ||
| 181 | -## 常见问题 | ||
| 182 | - | ||
| 183 | -### 1. 服务启动失败 | ||
| 184 | - | ||
| 185 | -- 检查端口是否被占用:`lsof -i :51000` | ||
| 186 | -- 检查 conda 环境是否正确激活 | ||
| 187 | -- 查看日志文件获取详细错误信息 | 127 | +- `clip-as-service` 在本项目场景下依赖链较老,`grpcio`/`jina`/`docarray` 组合在 Python 3.12 上易触发源码构建问题。 |
| 128 | +- `setuptools>=82` 移除了 `pkg_resources`;而部分依赖链仍会导入它,因此专用脚本固定了兼容范围。 | ||
| 129 | +- `setup_cnclip_venv.sh` 中存在“为可运行性而做的约束收敛”,这是有意行为,不建议手动放开。 | ||
| 188 | 130 | ||
| 189 | -### 2. 客户端连接失败 | 131 | +## 8. 排障速查 |
| 190 | 132 | ||
| 191 | -确保使用正确的协议: | 133 | +### 8.1 启动失败 |
| 192 | 134 | ||
| 193 | -```python | ||
| 194 | -# 正确:使用 grpc:// | ||
| 195 | -c = Client('grpc://localhost:51000') | 135 | +- 查看日志:`tail -f logs/cnclip_service.log` |
| 136 | +- 检查端口占用:`lsof -i :51000` | ||
| 137 | +- 重新构建环境:`rm -rf .venv-cnclip && ./scripts/setup_cnclip_venv.sh` | ||
| 196 | 138 | ||
| 197 | -# 错误:不要使用 http:// | ||
| 198 | -# c = Client('http://localhost:51000') # 会失败 | ||
| 199 | -``` | 139 | +### 8.2 连接失败 |
| 200 | 140 | ||
| 201 | -### 3. 编码失败 | 141 | +- 确认客户端使用 `grpc://` 协议; |
| 142 | +- 确认端口与服务端一致(默认 `51000`)。 | ||
| 202 | 143 | ||
| 203 | -- 检查服务是否正常运行 | ||
| 204 | -- 检查输入格式是否正确 | ||
| 205 | -- 查看服务日志排查错误 | 144 | +### 8.3 看不到 GPU 进程 |
| 206 | 145 | ||
| 207 | -### 4. 依赖安装 | 146 | +- 先发一次编码请求,再看 `nvidia-smi`; |
| 147 | +- 确认启动参数或环境变量为 `cuda`(`--device cuda` 或 `CNCLIP_DEVICE=cuda`); | ||
| 148 | +- 确认日志中无模型加载异常。 | ||
| 208 | 149 | ||
| 209 | -确保已安装必要的依赖: | 150 | +## 9. 与其他文档的关系 |
| 210 | 151 | ||
| 211 | -```bash | ||
| 212 | -pip install clip-client | ||
| 213 | -``` | 152 | +- 开发总览:`docs/QUICKSTART.md` |
| 153 | +- 系统架构:`docs/DEVELOPER_GUIDE.md` | ||
| 154 | +- 向量服务说明:`embeddings/README.md` | ||
| 214 | 155 | ||
| 215 | -服务端依赖会在启动脚本中自动检查。 | 156 | +本文件聚焦 CN-CLIP(clip-as-service)专项,不重复解释项目通用内容。 |
docs/DEVELOPER_GUIDE.md
| @@ -401,6 +401,7 @@ services: | @@ -401,6 +401,7 @@ services: | ||
| 401 | | 运维、日志、多环境、故障 | [Usage-Guide.md](./Usage-Guide.md) | | 401 | | 运维、日志、多环境、故障 | [Usage-Guide.md](./Usage-Guide.md) | |
| 402 | | 索引模块职责与 Java 对接 | [indexer/README.md](../indexer/README.md) | | 402 | | 索引模块职责与 Java 对接 | [indexer/README.md](../indexer/README.md) | |
| 403 | | 向量模块与 clip-as-service | [embeddings/README.md](../embeddings/README.md) | | 403 | | 向量模块与 clip-as-service | [embeddings/README.md](../embeddings/README.md) | |
| 404 | +| CN-CLIP 服务专项(环境/运维/GPU) | [CNCLIP_SERVICE说明文档.md](./CNCLIP_SERVICE说明文档.md) | | ||
| 404 | 405 | ||
| 405 | ### 10.2 仓库内入口 | 406 | ### 10.2 仓库内入口 |
| 406 | 407 |
docs/QUICKSTART.md
| @@ -495,6 +495,7 @@ lsof -i :6004 | @@ -495,6 +495,7 @@ lsof -i :6004 | ||
| 495 | | `docs/搜索API对接指南.md` | 搜索 API 完整说明 | | 495 | | `docs/搜索API对接指南.md` | 搜索 API 完整说明 | |
| 496 | | `indexer/README.md` | 索引模块职责与接口 | | 496 | | `indexer/README.md` | 索引模块职责与接口 | |
| 497 | | `embeddings/README.md` | 向量化服务说明 | | 497 | | `embeddings/README.md` | 向量化服务说明 | |
| 498 | +| `docs/CNCLIP_SERVICE说明文档.md` | CN-CLIP/clip-as-service 专项(环境、GPU、运维) | | ||
| 498 | | `reranker/README.md` | 重排服务说明 | | 499 | | `reranker/README.md` | 重排服务说明 | |
| 499 | 500 | ||
| 500 | --- | 501 | --- |
embeddings/clip_as_service_encoder.py
| @@ -21,6 +21,8 @@ def _ensure_clip_client_path(): | @@ -21,6 +21,8 @@ def _ensure_clip_client_path(): | ||
| 21 | client_path = os.path.join(repo_root, "third-party", "clip-as-service", "client") | 21 | client_path = os.path.join(repo_root, "third-party", "clip-as-service", "client") |
| 22 | if os.path.isdir(client_path) and client_path not in sys.path: | 22 | if os.path.isdir(client_path) and client_path not in sys.path: |
| 23 | sys.path.insert(0, client_path) | 23 | sys.path.insert(0, client_path) |
| 24 | + # Skip client version check to avoid importing helper (pkg_resources); no conda/separate env | ||
| 25 | + os.environ.setdefault("NO_VERSION_CHECK", "1") | ||
| 24 | 26 | ||
| 25 | 27 | ||
| 26 | def _normalize_image_url(url: str) -> str: | 28 | def _normalize_image_url(url: str) -> str: |
embeddings/server.py
| @@ -52,6 +52,9 @@ def load_models(): | @@ -52,6 +52,9 @@ def load_models(): | ||
| 52 | 52 | ||
| 53 | 53 | ||
| 54 | # Load image model: clip-as-service (recommended) or local CN-CLIP | 54 | # Load image model: clip-as-service (recommended) or local CN-CLIP |
| 55 | + # IMPORTANT: failures here should NOT prevent the whole service from starting. | ||
| 56 | + # If image model cannot be loaded, we keep `_image_model` as None and only | ||
| 57 | + # disable /embed/image while keeping /embed/text fully functional. | ||
| 55 | if open_image_model: | 58 | if open_image_model: |
| 56 | try: | 59 | try: |
| 57 | if CONFIG.USE_CLIP_AS_SERVICE: | 60 | if CONFIG.USE_CLIP_AS_SERVICE: |
| @@ -69,8 +72,12 @@ def load_models(): | @@ -69,8 +72,12 @@ def load_models(): | ||
| 69 | ) | 72 | ) |
| 70 | logger.info("Image model (local CN-CLIP) loaded successfully") | 73 | logger.info("Image model (local CN-CLIP) loaded successfully") |
| 71 | except Exception as e: | 74 | except Exception as e: |
| 72 | - logger.error(f"Failed to load image model: {e}", exc_info=True) | ||
| 73 | - raise | 75 | + logger.error( |
| 76 | + "Failed to load image model; image embeddings will be disabled but text embeddings remain available: %s", | ||
| 77 | + e, | ||
| 78 | + exc_info=True, | ||
| 79 | + ) | ||
| 80 | + _image_model = None | ||
| 74 | 81 | ||
| 75 | logger.info("All embedding models loaded successfully, service ready") | 82 | logger.info("All embedding models loaded successfully, service ready") |
| 76 | 83 | ||
| @@ -132,7 +139,9 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: | @@ -132,7 +139,9 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: | ||
| 132 | @app.post("/embed/image") | 139 | @app.post("/embed/image") |
| 133 | def embed_image(images: List[str]) -> List[Optional[List[float]]]: | 140 | def embed_image(images: List[str]) -> List[Optional[List[float]]]: |
| 134 | if _image_model is None: | 141 | if _image_model is None: |
| 135 | - raise RuntimeError("Image model not loaded") | 142 | + # Graceful degradation: keep API shape but return all None |
| 143 | + logger.warning("embed_image called but image model is not loaded; returning all None vectors") | ||
| 144 | + return [None] * len(images) | ||
| 136 | out: List[Optional[List[float]]] = [None] * len(images) | 145 | out: List[Optional[List[float]]] = [None] * len(images) |
| 137 | 146 | ||
| 138 | # Normalize inputs | 147 | # Normalize inputs |
indexer/incremental_service.py
| @@ -32,20 +32,77 @@ class IncrementalIndexerService: | @@ -32,20 +32,77 @@ class IncrementalIndexerService: | ||
| 32 | self.category_id_to_name = load_category_mapping(db_engine) | 32 | self.category_id_to_name = load_category_mapping(db_engine) |
| 33 | logger.info(f"Preloaded {len(self.category_id_to_name)} category mappings") | 33 | logger.info(f"Preloaded {len(self.category_id_to_name)} category mappings") |
| 34 | 34 | ||
| 35 | - # 缓存:避免频繁增量请求重复加载config / 构造transformer | 35 | + # 缓存:避免频繁增量请求重复加载 config / 构造 transformer |
| 36 | + # NOTE: 为避免“首请求”懒加载导致超时,尽量在进程启动阶段完成初始化: | ||
| 37 | + # - config.yaml 加载 | ||
| 38 | + # - translator / embedding / image encoder provider 初始化(best-effort) | ||
| 36 | self._config: Optional[Any] = None | 39 | self._config: Optional[Any] = None |
| 37 | self._config_lock = threading.Lock() | 40 | self._config_lock = threading.Lock() |
| 41 | + self._translator: Optional[Any] = None | ||
| 42 | + self._translation_prompts: Optional[Dict[str, Any]] = None | ||
| 43 | + self._searchable_option_dimensions: Optional[List[str]] = None | ||
| 44 | + self._shared_text_encoder: Optional[Any] = None | ||
| 45 | + self._shared_image_encoder: Optional[Any] = None | ||
| 46 | + | ||
| 47 | + self._eager_init() | ||
| 38 | # tenant_id -> (transformer, encoder, enable_embedding) | 48 | # tenant_id -> (transformer, encoder, enable_embedding) |
| 39 | self._transformer_cache: Dict[str, Tuple[Any, Optional[Any], bool]] = {} | 49 | self._transformer_cache: Dict[str, Tuple[Any, Optional[Any], bool]] = {} |
| 40 | self._transformer_cache_lock = threading.Lock() | 50 | self._transformer_cache_lock = threading.Lock() |
| 41 | 51 | ||
| 52 | + def _eager_init(self) -> None: | ||
| 53 | + """Best-effort eager initialization to reduce first-request latency.""" | ||
| 54 | + try: | ||
| 55 | + self._config = ConfigLoader("config/config.yaml").load_config() | ||
| 56 | + except Exception as e: | ||
| 57 | + logger.warning("Failed to eagerly load config/config.yaml: %s", e, exc_info=True) | ||
| 58 | + self._config = None | ||
| 59 | + return | ||
| 60 | + | ||
| 61 | + try: | ||
| 62 | + self._translation_prompts = getattr(self._config.query_config, "translation_prompts", {}) or {} | ||
| 63 | + self._searchable_option_dimensions = ( | ||
| 64 | + getattr(self._config.spu_config, "searchable_option_dimensions", None) | ||
| 65 | + or ["option1", "option2", "option3"] | ||
| 66 | + ) | ||
| 67 | + except Exception: | ||
| 68 | + self._translation_prompts = {} | ||
| 69 | + self._searchable_option_dimensions = ["option1", "option2", "option3"] | ||
| 70 | + | ||
| 71 | + # Translator provider (best-effort) | ||
| 72 | + try: | ||
| 73 | + from providers import create_translation_provider | ||
| 74 | + | ||
| 75 | + self._translator = create_translation_provider(self._config.query_config) | ||
| 76 | + except Exception as e: | ||
| 77 | + logger.warning("Failed to initialize translation provider at startup: %s", e) | ||
| 78 | + self._translator = None | ||
| 79 | + | ||
| 80 | + # Text embedding encoder (best-effort) | ||
| 81 | + if bool(getattr(self._config.query_config, "enable_text_embedding", False)): | ||
| 82 | + try: | ||
| 83 | + from embeddings.text_encoder import BgeEncoder | ||
| 84 | + | ||
| 85 | + self._shared_text_encoder = BgeEncoder() | ||
| 86 | + except Exception as e: | ||
| 87 | + logger.warning("Failed to initialize BgeEncoder at startup: %s", e) | ||
| 88 | + self._shared_text_encoder = None | ||
| 89 | + | ||
| 90 | + # Image embedding encoder (best-effort; may be unavailable if embedding service not running) | ||
| 91 | + try: | ||
| 92 | + from embeddings.image_encoder import CLIPImageEncoder | ||
| 93 | + | ||
| 94 | + self._shared_image_encoder = CLIPImageEncoder() | ||
| 95 | + except Exception as e: | ||
| 96 | + logger.debug("Image encoder not available for indexer startup: %s", e) | ||
| 97 | + self._shared_image_encoder = None | ||
| 98 | + | ||
| 42 | def _get_config(self) -> Any: | 99 | def _get_config(self) -> Any: |
| 43 | """Load config once per process (thread-safe).""" | 100 | """Load config once per process (thread-safe).""" |
| 44 | if self._config is not None: | 101 | if self._config is not None: |
| 45 | return self._config | 102 | return self._config |
| 46 | with self._config_lock: | 103 | with self._config_lock: |
| 47 | if self._config is None: | 104 | if self._config is None: |
| 48 | - self._config = ConfigLoader().load_config() | 105 | + self._config = ConfigLoader("config/config.yaml").load_config() |
| 49 | return self._config | 106 | return self._config |
| 50 | 107 | ||
| 51 | def _get_transformer_bundle(self, tenant_id: str) -> Tuple[Any, Optional[Any], bool]: | 108 | def _get_transformer_bundle(self, tenant_id: str) -> Tuple[Any, Optional[Any], bool]: |
| @@ -64,26 +121,39 @@ class IncrementalIndexerService: | @@ -64,26 +121,39 @@ class IncrementalIndexerService: | ||
| 64 | config = self._get_config() | 121 | config = self._get_config() |
| 65 | enable_embedding = bool(getattr(config.query_config, "enable_text_embedding", False)) | 122 | enable_embedding = bool(getattr(config.query_config, "enable_text_embedding", False)) |
| 66 | 123 | ||
| 67 | - encoder: Optional[Any] = None | ||
| 68 | - if enable_embedding: | 124 | + # Use shared encoders/providers preloaded at startup when可用; |
| 125 | + # 若启动时初始化失败,则在首次请求时做一次兜底初始化,避免永久禁用。 | ||
| 126 | + encoder: Optional[Any] = self._shared_text_encoder if enable_embedding else None | ||
| 127 | + if enable_embedding and encoder is None: | ||
| 69 | try: | 128 | try: |
| 70 | from embeddings.text_encoder import BgeEncoder | 129 | from embeddings.text_encoder import BgeEncoder |
| 130 | + | ||
| 71 | encoder = BgeEncoder() | 131 | encoder = BgeEncoder() |
| 132 | + self._shared_text_encoder = encoder | ||
| 133 | + logger.info("BgeEncoder lazily initialized in _get_transformer_bundle") | ||
| 72 | except Exception as e: | 134 | except Exception as e: |
| 73 | - logger.warning(f"Failed to initialize BgeEncoder for tenant_id={tenant_id}: {e}") | 135 | + logger.warning("Failed to lazily initialize BgeEncoder for tenant_id=%s: %s", tenant_id, e) |
| 74 | encoder = None | 136 | encoder = None |
| 75 | enable_embedding = False | 137 | enable_embedding = False |
| 76 | 138 | ||
| 77 | - image_encoder: Optional[Any] = None | ||
| 78 | - try: | ||
| 79 | - from embeddings.image_encoder import CLIPImageEncoder | ||
| 80 | - image_encoder = CLIPImageEncoder() | ||
| 81 | - except Exception as e: | ||
| 82 | - logger.debug("Image encoder not available for indexer: %s", e) | 139 | + image_encoder: Optional[Any] = self._shared_image_encoder |
| 140 | + if image_encoder is None: | ||
| 141 | + try: | ||
| 142 | + from embeddings.image_encoder import CLIPImageEncoder | ||
| 143 | + | ||
| 144 | + image_encoder = CLIPImageEncoder() | ||
| 145 | + self._shared_image_encoder = image_encoder | ||
| 146 | + logger.info("CLIPImageEncoder lazily initialized in _get_transformer_bundle") | ||
| 147 | + except Exception as e: | ||
| 148 | + logger.debug("Image encoder not available for indexer (lazy init): %s", e) | ||
| 149 | + image_encoder = None | ||
| 83 | 150 | ||
| 84 | transformer = create_document_transformer( | 151 | transformer = create_document_transformer( |
| 85 | category_id_to_name=self.category_id_to_name, | 152 | category_id_to_name=self.category_id_to_name, |
| 86 | tenant_id=tenant_id, | 153 | tenant_id=tenant_id, |
| 154 | + searchable_option_dimensions=self._searchable_option_dimensions, | ||
| 155 | + translator=self._translator, | ||
| 156 | + translation_prompts=self._translation_prompts, | ||
| 87 | encoder=encoder, | 157 | encoder=encoder, |
| 88 | enable_title_embedding=False, # batch fill later | 158 | enable_title_embedding=False, # batch fill later |
| 89 | image_encoder=image_encoder, | 159 | image_encoder=image_encoder, |
| @@ -97,6 +167,23 @@ class IncrementalIndexerService: | @@ -97,6 +167,23 @@ class IncrementalIndexerService: | ||
| 97 | self._transformer_cache[str(tenant_id)] = bundle | 167 | self._transformer_cache[str(tenant_id)] = bundle |
| 98 | return bundle | 168 | return bundle |
| 99 | 169 | ||
| 170 | + def warmup_transformers(self, tenant_ids: List[str]) -> Dict[str, Any]: | ||
| 171 | + """ | ||
| 172 | + Eagerly build transformer bundles for given tenant ids. | ||
| 173 | + This moves per-tenant initialization to startup phase, reducing first-request latency. | ||
| 174 | + """ | ||
| 175 | + start = time.time() | ||
| 176 | + ok = 0 | ||
| 177 | + failed: List[Dict[str, str]] = [] | ||
| 178 | + for tid in tenant_ids or []: | ||
| 179 | + try: | ||
| 180 | + _ = self._get_transformer_bundle(str(tid)) | ||
| 181 | + ok += 1 | ||
| 182 | + except Exception as e: | ||
| 183 | + failed.append({"tenant_id": str(tid), "error": str(e)}) | ||
| 184 | + elapsed_ms = round((time.time() - start) * 1000.0, 3) | ||
| 185 | + return {"requested": len(tenant_ids or []), "warmed": ok, "failed": failed, "elapsed_ms": elapsed_ms} | ||
| 186 | + | ||
| 100 | @staticmethod | 187 | @staticmethod |
| 101 | def _normalize_spu_ids(spu_ids: List[str]) -> List[int]: | 188 | def _normalize_spu_ids(spu_ids: List[str]) -> List[int]: |
| 102 | """Normalize SPU IDs to ints for DB queries; skip non-int IDs.""" | 189 | """Normalize SPU IDs to ints for DB queries; skip non-int IDs.""" |
indexer/process_products.py
| @@ -23,8 +23,11 @@ from config.env_config import REDIS_CONFIG | @@ -23,8 +23,11 @@ from config.env_config import REDIS_CONFIG | ||
| 23 | 23 | ||
| 24 | # 配置 | 24 | # 配置 |
| 25 | BATCH_SIZE = 20 | 25 | BATCH_SIZE = 20 |
| 26 | -API_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" | ||
| 27 | -MODEL_NAME = "qwen-max" | 26 | +# 华北2(北京):https://dashscope.aliyuncs.com/compatible-mode/v1 |
| 27 | +# 新加坡:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 | ||
| 28 | +# 美国(弗吉尼亚):https://dashscope-us.aliyuncs.com/compatible-mode/v1 | ||
| 29 | +API_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" | ||
| 30 | +MODEL_NAME = "qwen-flash" | ||
| 28 | API_KEY = os.environ.get("DASHSCOPE_API_KEY") | 31 | API_KEY = os.environ.get("DASHSCOPE_API_KEY") |
| 29 | MAX_RETRIES = 3 | 32 | MAX_RETRIES = 3 |
| 30 | RETRY_DELAY = 5 # 秒 | 33 | RETRY_DELAY = 5 # 秒 |
| @@ -398,8 +401,9 @@ def parse_markdown_table(markdown_content: str) -> List[Dict[str, str]]: | @@ -398,8 +401,9 @@ def parse_markdown_table(markdown_content: str) -> List[Dict[str, str]]: | ||
| 398 | 401 | ||
| 399 | # 表格行处理 | 402 | # 表格行处理 |
| 400 | if line.startswith('|'): | 403 | if line.startswith('|'): |
| 401 | - # 分隔行(----) | ||
| 402 | - if set(line.replace('|', '').strip()) <= {'-', ':'}: | 404 | + # 分隔行(---- 或 :---: 等;允许空格,如 "| ---- | ---- |") |
| 405 | + sep_chars = line.replace('|', '').strip().replace(' ', '') | ||
| 406 | + if sep_chars and set(sep_chars) <= {'-', ':'}: | ||
| 403 | data_started = True | 407 | data_started = True |
| 404 | continue | 408 | continue |
| 405 | 409 |
query/translator.py
| @@ -59,9 +59,12 @@ class Translator: | @@ -59,9 +59,12 @@ class Translator: | ||
| 59 | 59 | ||
| 60 | Default model is 'qwen' which uses Alibaba Cloud DashScope API. | 60 | Default model is 'qwen' which uses Alibaba Cloud DashScope API. |
| 61 | """ | 61 | """ |
| 62 | +# 华北2(北京):https://dashscope.aliyuncs.com/compatible-mode/v1 | ||
| 63 | +# 新加坡:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 | ||
| 64 | +# 美国(弗吉尼亚):https://dashscope-us.aliyuncs.com/compatible-mode/v1 | ||
| 62 | 65 | ||
| 63 | DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier | 66 | DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier |
| 64 | - QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" # 北京地域 | 67 | + QWEN_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" # 北京地域 |
| 65 | # QWEN_BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" # 新加坡 | 68 | # QWEN_BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" # 新加坡 |
| 66 | # 如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 | 69 | # 如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 |
| 67 | QWEN_MODEL = "qwen-mt-flash" # 快速翻译模型 | 70 | QWEN_MODEL = "qwen-mt-flash" # 快速翻译模型 |
scripts/create_tenant_index.sh
| @@ -61,6 +61,7 @@ echo | @@ -61,6 +61,7 @@ echo | ||
| 61 | echo "删除索引: $ES_INDEX" | 61 | echo "删除索引: $ES_INDEX" |
| 62 | echo | 62 | echo |
| 63 | curl -X DELETE "${ES_HOST}/${ES_INDEX}" $AUTH_PARAM -s -o /dev/null -w "HTTP状态码: %{http_code}\n" | 63 | curl -X DELETE "${ES_HOST}/${ES_INDEX}" $AUTH_PARAM -s -o /dev/null -w "HTTP状态码: %{http_code}\n" |
| 64 | + | ||
| 64 | echo | 65 | echo |
| 65 | echo "创建索引: $ES_INDEX" | 66 | echo "创建索引: $ES_INDEX" |
| 66 | echo | 67 | echo |
scripts/service_ctl.sh
| @@ -138,7 +138,11 @@ start_one() { | @@ -138,7 +138,11 @@ start_one() { | ||
| 138 | case "${service}" in | 138 | case "${service}" in |
| 139 | clip|cnclip) | 139 | clip|cnclip) |
| 140 | echo "[start] ${service} (managed by native script)" | 140 | echo "[start] ${service} (managed by native script)" |
| 141 | - bash -lc "${cmd}" >> "${lf}" 2>&1 || true | 141 | + if [ "${service}" = "cnclip" ]; then |
| 142 | + CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1 || true | ||
| 143 | + else | ||
| 144 | + bash -lc "${cmd}" >> "${lf}" 2>&1 || true | ||
| 145 | + fi | ||
| 142 | if is_running_by_pid "${service}" || is_running_by_port "${service}"; then | 146 | if is_running_by_pid "${service}" || is_running_by_port "${service}"; then |
| 143 | echo "[ok] ${service} started (log=${lf})" | 147 | echo "[ok] ${service} started (log=${lf})" |
| 144 | else | 148 | else |
| @@ -272,6 +276,7 @@ Default target set (when no service provided): | @@ -272,6 +276,7 @@ Default target set (when no service provided): | ||
| 272 | 276 | ||
| 273 | Optional startup flags: | 277 | Optional startup flags: |
| 274 | START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | 278 | START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh |
| 279 | + CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip | ||
| 275 | EOF | 280 | EOF |
| 276 | } | 281 | } |
| 277 | 282 |
| @@ -0,0 +1,85 @@ | @@ -0,0 +1,85 @@ | ||
| 1 | +#!/bin/bash | ||
| 2 | +# | ||
| 3 | +# 创建 CN-CLIP 服务专用虚拟环境(.venv-cnclip),用于隔离 clip-server 及其依赖 | ||
| 4 | +#(如 grpcio、jina、docarray 等),避免与主项目 .venv 的依赖冲突或构建失败。 | ||
| 5 | +# | ||
| 6 | +# 使用方式: | ||
| 7 | +# ./scripts/setup_cnclip_venv.sh | ||
| 8 | +# | ||
| 9 | +# 完成后,start_cnclip_service.sh 会自动优先使用 .venv-cnclip(若存在)。 | ||
| 10 | +# | ||
| 11 | +set -e | ||
| 12 | + | ||
| 13 | +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | ||
| 14 | +VENV_DIR="${PROJECT_ROOT}/.venv-cnclip" | ||
| 15 | +CLIP_SERVER="${PROJECT_ROOT}/third-party/clip-as-service/server" | ||
| 16 | + | ||
| 17 | +echo "==========================================" | ||
| 18 | +echo "CN-CLIP 专用环境 (.venv-cnclip)" | ||
| 19 | +echo "==========================================" | ||
| 20 | +echo "" | ||
| 21 | + | ||
| 22 | +if [ ! -d "${CLIP_SERVER}" ]; then | ||
| 23 | + echo "错误: 未找到 clip-as-service 服务目录: ${CLIP_SERVER}" >&2 | ||
| 24 | + exit 1 | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +# 使用系统或当前默认的 python3 创建 venv(不依赖主项目 .venv) | ||
| 28 | +if [ -d "${VENV_DIR}" ]; then | ||
| 29 | + echo "已存在 .venv-cnclip,将复用并更新依赖。" | ||
| 30 | +else | ||
| 31 | + echo "创建虚拟环境: ${VENV_DIR}" | ||
| 32 | + python3 -m venv "${VENV_DIR}" | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +# 激活并升级基础工具。固定 setuptools<82,因 82 移除了 pkg_resources,而 jina/hubble 仍依赖它。 | ||
| 36 | +"${VENV_DIR}/bin/pip" install --upgrade pip wheel | ||
| 37 | +"${VENV_DIR}/bin/pip" install 'setuptools>=66,<82' | ||
| 38 | + | ||
| 39 | +# grpcio:jina 要求 <=1.68;1.57 在 Python 3.12 上无预编译 wheel(会触发源码构建与 pkg_resources 报错)。1.68.x 有 3.12 wheel,故先安装 1.68.x,避免后续安装 clip-server 时解析到 1.57。 | ||
| 40 | +echo "安装 grpcio(优先预编译 wheel,兼容 jina<=1.68)..." | ||
| 41 | +if ! "${VENV_DIR}/bin/pip" install --only-binary=grpcio 'grpcio>=1.46.0,<=1.68.1' 2>/dev/null; then | ||
| 42 | + echo "镜像无匹配 wheel,尝试 PyPI..." | ||
| 43 | + if ! "${VENV_DIR}/bin/pip" install --only-binary=grpcio -i https://pypi.org/simple 'grpcio>=1.46.0,<=1.68.1' 2>/dev/null; then | ||
| 44 | + echo "错误: 无法获取 grpcio 预编译包,请检查网络或使用 Python 3.10/3.11。" >&2 | ||
| 45 | + exit 1 | ||
| 46 | + fi | ||
| 47 | +fi | ||
| 48 | + | ||
| 49 | +# 安装 docarray==0.21(clip-server 要求) | ||
| 50 | +echo "安装 docarray(clip-server 要求 0.21)..." | ||
| 51 | +"${VENV_DIR}/bin/pip" install 'docarray==0.21.0' | ||
| 52 | + | ||
| 53 | +# jina 3.27 声明 grpcio<=1.57,会触发源码构建;先装 grpcio 配套的 reflection/health 1.68(wheel),再以 --no-deps 装 jina,最后补齐 jina 的其余依赖(不包含 grpcio) | ||
| 54 | +echo "安装 grpcio-reflection / grpcio-health-checking(与已装 grpcio 1.68 一致)..." | ||
| 55 | +"${VENV_DIR}/bin/pip" install --only-binary=:all: 'grpcio-reflection>=1.46,<=1.68' 'grpcio-health-checking>=1.46,<=1.68' 2>/dev/null || "${VENV_DIR}/bin/pip" install 'grpcio-reflection>=1.46,<=1.68' 'grpcio-health-checking>=1.46,<=1.68' | ||
| 56 | +echo "安装 jina(--no-deps,避免拉取 grpcio 1.57)..." | ||
| 57 | +"${VENV_DIR}/bin/pip" install 'jina>=3.27,<3.28' --no-deps | ||
| 58 | +# 补齐 jina 3.27 的运行时依赖(见 jina 的 install_requires,不含 grpcio)。 | ||
| 59 | +# 关键约束: | ||
| 60 | +# - pydantic<2、opentelemetry-sdk<1.20、urllib3<2:与 jina 3.27 保持一致,减少 resolver 冲突告警 | ||
| 61 | +"${VENV_DIR}/bin/pip" install 'uvicorn[standard]<=0.23.1' 'fastapi>=0.76' 'protobuf>=3.19' 'pyyaml>=5.3' 'pydantic<2' 'prometheus_client>=0.12' 'aiofiles' 'opentelemetry-api>=1.12,<1.20' 'opentelemetry-sdk>=1.14,<1.20' 'opentelemetry-exporter-otlp>=1.12,<1.20' 'opentelemetry-instrumentation-grpc>=0.35' 'opentelemetry-instrumentation-fastapi>=0.33' 'opentelemetry-instrumentation-aiohttp-client>=0.33' 'opentelemetry-exporter-prometheus>=0.33b0' 'websockets' 'python-multipart' 'urllib3<2' | ||
| 62 | + | ||
| 63 | +# 安装 CN-CLIP | ||
| 64 | +echo "安装 cn-clip..." | ||
| 65 | +"${VENV_DIR}/bin/pip" install cn-clip | ||
| 66 | + | ||
| 67 | +# 安装 clip-server 以 --no-deps 方式,避免因 docarray==0.21 解析到旧 jina 并拉取 grpcio 1.57 源码构建。依赖由前面已装的 jina/grpcio/cn-clip 与下面显式安装补齐。 | ||
| 68 | +echo "安装 clip-server[cn_clip](--no-deps,再补齐依赖)..." | ||
| 69 | +"${VENV_DIR}/bin/pip" install -e "${CLIP_SERVER}[cn_clip]" --no-deps | ||
| 70 | +# clip-server 的 install_requires:ftfy, torch, regex, torchvision, jina, docarray, prometheus-client, open_clip_torch, pillow-avif-plugin;jina/cn_clip 已装;补齐 ftfy regex open_clip_torch pillow-avif-plugin(torch/torchvision 由 cn-clip 带入,prometheus_client 由 jina 带入) | ||
| 71 | +"${VENV_DIR}/bin/pip" install 'ftfy' 'regex' 'open_clip_torch>=2.8.0,<2.9.0' 'pillow-avif-plugin' | ||
| 72 | +# grpc_health 需要 protobuf 含 runtime_version(>=4);open_clip_torch 会拉低到 3.20,此处再升级 | ||
| 73 | +"${VENV_DIR}/bin/pip" install 'protobuf>=4,<6' | ||
| 74 | +# jina 3.27 声明的可选依赖,clip 服务需用到 | ||
| 75 | +"${VENV_DIR}/bin/pip" install 'jcloud>=0.0.35' | ||
| 76 | + | ||
| 77 | +echo "" | ||
| 78 | +echo "==========================================" | ||
| 79 | +echo "✓ .venv-cnclip 已就绪" | ||
| 80 | +echo "==========================================" | ||
| 81 | +echo "启动 CN-CLIP 服务时将自动使用此环境:" | ||
| 82 | +echo " ./scripts/start_cnclip_service.sh" | ||
| 83 | +echo "或:" | ||
| 84 | +echo " ./scripts/service_ctl.sh start cnclip" | ||
| 85 | +echo "" |
scripts/start_clip_service.sh
| @@ -19,19 +19,15 @@ echo "========================================" | @@ -19,19 +19,15 @@ echo "========================================" | ||
| 19 | echo "Starting CLIP vector service (clip-server)" | 19 | echo "Starting CLIP vector service (clip-server)" |
| 20 | echo "========================================" | 20 | echo "========================================" |
| 21 | 21 | ||
| 22 | -# Load conda and activate dedicated environment, if available | ||
| 23 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 24 | -if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then | ||
| 25 | - # shellcheck disable=SC1091 | ||
| 26 | - source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 27 | - conda activate clip_service || { | ||
| 28 | - echo "Failed to activate conda env 'clip_service'. Please create it first." >&2 | ||
| 29 | - echo "See CLIP_SERVICE_README.md for setup instructions." >&2 | 22 | +# Use project unified environment (same as activate.sh / service_ctl) |
| 23 | +if [ -z "${VIRTUAL_ENV}" ] && [ -z "${CONDA_DEFAULT_ENV}" ]; then | ||
| 24 | + if [ -f "$(pwd)/activate.sh" ]; then | ||
| 25 | + # shellcheck source=activate.sh | ||
| 26 | + source "$(pwd)/activate.sh" | ||
| 27 | + else | ||
| 28 | + echo "Error: activate.sh not found. Run from project root or source activate.sh first." >&2 | ||
| 30 | exit 1 | 29 | exit 1 |
| 31 | - } | ||
| 32 | -else | ||
| 33 | - echo "Warning: $CONDA_ROOT/etc/profile.d/conda.sh not found." >&2 | ||
| 34 | - echo "Please activate the 'clip_service' environment manually before running this script." >&2 | 30 | + fi |
| 35 | fi | 31 | fi |
| 36 | 32 | ||
| 37 | if [ -f "${PID_FILE}" ]; then | 33 | if [ -f "${PID_FILE}" ]; then |
scripts/start_cnclip_service.sh
| @@ -175,44 +175,55 @@ if lsof -Pi :${PORT} -sTCP:LISTEN -t >/dev/null 2>&1; then | @@ -175,44 +175,55 @@ if lsof -Pi :${PORT} -sTCP:LISTEN -t >/dev/null 2>&1; then | ||
| 175 | exit 1 | 175 | exit 1 |
| 176 | fi | 176 | fi |
| 177 | 177 | ||
| 178 | -# 检查 conda 环境 | ||
| 179 | -if [ -z "${CONDA_DEFAULT_ENV}" ] || [ "${CONDA_DEFAULT_ENV}" != "clip_service" ]; then | ||
| 180 | - echo -e "${YELLOW}警告: 当前未激活 clip_service 环境${NC}" | ||
| 181 | - echo -e "${YELLOW}正在激活环境...${NC}" | ||
| 182 | - | ||
| 183 | - CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 184 | - if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then | ||
| 185 | - source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 186 | - conda activate clip_service | ||
| 187 | - echo -e "${GREEN}✓ 环境已激活${NC}" | 178 | +# 优先使用 CN-CLIP 专用环境(避免与主项目依赖冲突;需先运行 ./scripts/setup_cnclip_venv.sh) |
| 179 | +# 若无 .venv-cnclip,则使用项目统一环境(activate.sh) | ||
| 180 | +CNCLIP_VENV="${PROJECT_ROOT}/.venv-cnclip" | ||
| 181 | +if [ -x "${CNCLIP_VENV}/bin/python" ]; then | ||
| 182 | + export PATH="${CNCLIP_VENV}/bin:${PATH}" | ||
| 183 | + export VIRTUAL_ENV="${CNCLIP_VENV}" | ||
| 184 | + echo -e "${GREEN}✓ 使用 CN-CLIP 专用环境: .venv-cnclip${NC}" | ||
| 185 | +elif [ -z "${VIRTUAL_ENV}" ] && [ -z "${CONDA_DEFAULT_ENV}" ]; then | ||
| 186 | + echo -e "${BLUE}激活项目环境...${NC}" | ||
| 187 | + if [ -f "${PROJECT_ROOT}/activate.sh" ]; then | ||
| 188 | + # shellcheck source=../activate.sh | ||
| 189 | + source "${PROJECT_ROOT}/activate.sh" | ||
| 188 | else | 190 | else |
| 189 | - echo -e "${RED}错误: 无法找到 conda 初始化脚本: $CONDA_ROOT/etc/profile.d/conda.sh${NC}" | 191 | + echo -e "${RED}错误: 未找到 ${PROJECT_ROOT}/activate.sh${NC}" |
| 192 | + echo -e "${YELLOW}建议先创建 CN-CLIP 专用环境: ./scripts/setup_cnclip_venv.sh${NC}" | ||
| 190 | exit 1 | 193 | exit 1 |
| 191 | fi | 194 | fi |
| 195 | + echo -e "${GREEN}✓ 使用环境: ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}${NC}" | ||
| 192 | else | 196 | else |
| 193 | - echo -e "${GREEN}✓ Conda 环境: ${CONDA_DEFAULT_ENV}${NC}" | 197 | + echo -e "${GREEN}✓ 使用当前环境: ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}${NC}" |
| 194 | fi | 198 | fi |
| 195 | 199 | ||
| 196 | -# 检查 Python 依赖 | 200 | +# 检查 Python 依赖(CN-CLIP 服务端需要 cn_clip 与 clip_server) |
| 197 | echo -e "${BLUE}检查 Python 依赖...${NC}" | 201 | echo -e "${BLUE}检查 Python 依赖...${NC}" |
| 198 | python -c "import cn_clip" 2>/dev/null || { | 202 | python -c "import cn_clip" 2>/dev/null || { |
| 199 | echo -e "${RED}错误: cn_clip 未安装${NC}" | 203 | echo -e "${RED}错误: cn_clip 未安装${NC}" |
| 200 | - echo -e "${YELLOW}请运行: pip install cn-clip${NC}" | 204 | + echo -e "${YELLOW}在项目环境中安装: pip install cn-clip 或 pip install -r requirements_ml.txt${NC}" |
| 201 | exit 1 | 205 | exit 1 |
| 202 | } | 206 | } |
| 203 | 207 | ||
| 204 | -python -c "from clip_client import Client" 2>/dev/null || { | ||
| 205 | - echo -e "${RED}错误: clip_client 未安装${NC}" | ||
| 206 | - echo -e "${YELLOW}请运行: pip install clip-client${NC}" | 208 | +# clip_server 通过 PYTHONPATH 加载(见下方启动命令),此处仅做可导入性检查 |
| 209 | +export PYTHONPATH="${CLIP_SERVER_DIR}:${PYTHONPATH}" | ||
| 210 | +python -c "import clip_server" 2>/dev/null || { | ||
| 211 | + echo -e "${RED}错误: clip_server 不可用${NC}" | ||
| 212 | + echo -e "${YELLOW}推荐使用专用环境(避免与主项目依赖冲突):${NC}" | ||
| 213 | + echo -e "${YELLOW} ./scripts/setup_cnclip_venv.sh${NC}" | ||
| 214 | + echo -e "${YELLOW}或在当前环境中安装: pip install -e third-party/clip-as-service/server[cn_clip]${NC}" | ||
| 207 | exit 1 | 215 | exit 1 |
| 208 | } | 216 | } |
| 209 | 217 | ||
| 210 | -echo -e "${GREEN}✓ 所有依赖已安装${NC}" | 218 | +echo -e "${GREEN}✓ 所有依赖已就绪${NC}" |
| 211 | echo "" | 219 | echo "" |
| 212 | 220 | ||
| 213 | -# 自动检测设备 | 221 | +# 自动检测设备(可通过环境变量 CNCLIP_DEVICE 指定,供 service_ctl/restart 使用) |
| 214 | if [ "${DEVICE}" == "auto" ]; then | 222 | if [ "${DEVICE}" == "auto" ]; then |
| 215 | - if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then | 223 | + if [ -n "${CNCLIP_DEVICE:-}" ]; then |
| 224 | + DEVICE="${CNCLIP_DEVICE}" | ||
| 225 | + echo -e "${GREEN}✓ 设备: ${DEVICE}(来自 CNCLIP_DEVICE)${NC}" | ||
| 226 | + elif command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then | ||
| 216 | DEVICE="cuda" | 227 | DEVICE="cuda" |
| 217 | echo -e "${GREEN}✓ 检测到 NVIDIA GPU,使用 CUDA${NC}" | 228 | echo -e "${GREEN}✓ 检测到 NVIDIA GPU,使用 CUDA${NC}" |
| 218 | else | 229 | else |
| @@ -263,7 +274,7 @@ if [ -f "${FLOW_FILE}" ] && [ ! -f "${FLOW_FILE}.original" ]; then | @@ -263,7 +274,7 @@ if [ -f "${FLOW_FILE}" ] && [ ! -f "${FLOW_FILE}.original" ]; then | ||
| 263 | echo -e "${YELLOW}已备份原配置文件: ${FLOW_FILE}.original${NC}" | 274 | echo -e "${YELLOW}已备份原配置文件: ${FLOW_FILE}.original${NC}" |
| 264 | fi | 275 | fi |
| 265 | 276 | ||
| 266 | -# 生成新的配置文件(使用官方默认配置,只指定模型名称) | 277 | +# 生成新的配置文件(使用官方默认配置,显式传入 device 以便使用 GPU) |
| 267 | cat > "${TEMP_FLOW_FILE}" << EOF | 278 | cat > "${TEMP_FLOW_FILE}" << EOF |
| 268 | jtype: Flow | 279 | jtype: Flow |
| 269 | version: '1' | 280 | version: '1' |
| @@ -275,6 +286,7 @@ executors: | @@ -275,6 +286,7 @@ executors: | ||
| 275 | jtype: CLIPEncoder | 286 | jtype: CLIPEncoder |
| 276 | with: | 287 | with: |
| 277 | name: '${MODEL_NAME}' | 288 | name: '${MODEL_NAME}' |
| 289 | + device: '${DEVICE}' | ||
| 278 | metas: | 290 | metas: |
| 279 | py_modules: | 291 | py_modules: |
| 280 | - clip_server.executors.clip_torch | 292 | - clip_server.executors.clip_torch |