Commit 74cca19021605ac3bc84f175293768d0b09ec7b4
1 parent
40f1e391
cnclip
Showing
15 changed files
with
147 additions
and
2628 deletions
Show diff stats
docs/CLIP_REPLICAS_GUIDE.md deleted
| @@ -1,415 +0,0 @@ | @@ -1,415 +0,0 @@ | ||
| 1 | -# CN-CLIP 服务副本数配置指南 | ||
| 2 | - | ||
| 3 | -## 📊 副本数(Replicas)详解 | ||
| 4 | - | ||
| 5 | -### 什么是副本? | ||
| 6 | - | ||
| 7 | -**副本(Replicas)** 是指同时运行的模型实例数量。每个副本都是独立的进程,可以: | ||
| 8 | -- 并行处理多个请求 | ||
| 9 | -- 提高服务吞吐量 | ||
| 10 | -- 充分利用多核 CPU 或多 GPU | ||
| 11 | - | ||
| 12 | -**关键点**: | ||
| 13 | -- ✅ 每个副本加载一份完整的模型到内存/显存 | ||
| 14 | -- ✅ 副本间共享网络端口(通过负载均衡) | ||
| 15 | -- ✅ 多副本**不会**自动分配到不同 GPU(需手动配置) | ||
| 16 | - | ||
| 17 | ---- | ||
| 18 | - | ||
| 19 | -## 💾 显存占用分析 | ||
| 20 | - | ||
| 21 | -### 问题:2个副本会比1个副本多一倍显存吗? | ||
| 22 | - | ||
| 23 | -**答案:是的!基本成倍增加。** | ||
| 24 | - | ||
| 25 | -| 副本数 | 模型显存 | 峰值显存 | 总显存占用 | | ||
| 26 | -|--------|---------|---------|-----------| | ||
| 27 | -| 1副本 | ~2-3 GB | ~4-5 GB | **~4-5 GB** | | ||
| 28 | -| 2副本 | ~4-6 GB | ~8-10 GB | **~8-10 GB** | | ||
| 29 | -| 3副本 | ~6-9 GB | ~12-15 GB | **~12-15 GB** | | ||
| 30 | -| 4副本 | ~8-12 GB | ~16-20 GB | **~16-20 GB** | | ||
| 31 | - | ||
| 32 | -**说明**: | ||
| 33 | -- **模型显存**:模型权重(固定部分) | ||
| 34 | -- **峰值显存**:模型 + 激活值 + 中间结果(批处理时) | ||
| 35 | -- **CN-CLIP/ViT-H-14**:约 2-3 GB 模型权重 | ||
| 36 | - | ||
| 37 | -### 你的 GPU 情况 | ||
| 38 | - | ||
| 39 | -``` | ||
| 40 | -GPU: Tesla T4 | ||
| 41 | -总显存: 16384 MiB (16 GB) | ||
| 42 | -当前空闲: ~2 GB | ||
| 43 | -``` | ||
| 44 | - | ||
| 45 | -**推荐配置**: | ||
| 46 | -- **1副本**:最安全,适合当前场景 | ||
| 47 | -- **2副本**:需要先停止其他占用显存的程序 | ||
| 48 | -- **3+副本**:不建议(显存不足) | ||
| 49 | - | ||
| 50 | ---- | ||
| 51 | - | ||
| 52 | -## ⚙️ 配置副本数 | ||
| 53 | - | ||
| 54 | -### 方法 1:修改配置文件(推荐) | ||
| 55 | - | ||
| 56 | -编辑 `third-party/clip-as-service/server/clip_server/torch-flow.yml`: | ||
| 57 | - | ||
| 58 | -```yaml | ||
| 59 | -jtype: Flow | ||
| 60 | -version: '1' | ||
| 61 | -with: | ||
| 62 | - port: 51000 | ||
| 63 | -executors: | ||
| 64 | - - name: clip_t | ||
| 65 | - uses: | ||
| 66 | - jtype: CLIPEncoder | ||
| 67 | - with: | ||
| 68 | - name: 'CN-CLIP/ViT-H-14' | ||
| 69 | - device: 'cuda' # 设备 | ||
| 70 | - minibatch_size: 32 # 批处理大小 | ||
| 71 | - num_worker_preprocess: 4 # 预处理线程 | ||
| 72 | - dtype: 'float16' # 数据类型 | ||
| 73 | - metas: | ||
| 74 | - py_modules: | ||
| 75 | - - clip_server.executors.clip_torch | ||
| 76 | - timeout_ready: 3000000 | ||
| 77 | - replicas: 2 # ← 修改这里(1, 2, 3...) | ||
| 78 | -``` | ||
| 79 | - | ||
| 80 | -### 方法 2:命令行参数 | ||
| 81 | - | ||
| 82 | -```bash | ||
| 83 | -# 启动时指定副本数(需要修改启动脚本) | ||
| 84 | -python -m clip_server \ | ||
| 85 | - --name 'CN-CLIP/ViT-H-14' \ | ||
| 86 | - --replicas 2 \ | ||
| 87 | - --port 51000 | ||
| 88 | -``` | ||
| 89 | - | ||
| 90 | -### 方法 3:使用改进的启动脚本 | ||
| 91 | - | ||
| 92 | -我会在下面提供一个支持副本数的启动脚本。 | ||
| 93 | - | ||
| 94 | ---- | ||
| 95 | - | ||
| 96 | -## 📈 性能对比 | ||
| 97 | - | ||
| 98 | -### 吞吐量测试(理论值) | ||
| 99 | - | ||
| 100 | -**配置**:CN-CLIP/ViT-H-14, Tesla T4, batch_size=32 | ||
| 101 | - | ||
| 102 | -| 副本数 | QPS (请求/秒) | 显存占用 | 推荐场景 | | ||
| 103 | -|--------|--------------|---------|---------| | ||
| 104 | -| 1 | ~80-100 | ~4-5 GB | 开发测试、低并发 | | ||
| 105 | -| 2 | ~150-180 | ~8-10 GB | 生产环境、中等并发 | | ||
| 106 | -| 3 | ~220-260 | ~12-15 GB | 高并发(需24GB+显存)| | ||
| 107 | -| 4 | ~300-350 | ~16-20 GB | 极高并发(需A100等)| | ||
| 108 | - | ||
| 109 | -**注意**: | ||
| 110 | -- 实际性能取决于批处理大小、请求类型(文本/图像) | ||
| 111 | -- 并发请求少时,多副本提升不明显 | ||
| 112 | -- 并发请求多时,多副本显著提升吞吐量 | ||
| 113 | - | ||
| 114 | -### 延迟对比 | ||
| 115 | - | ||
| 116 | -| 副本数 | 单请求延迟 | P99 延迟 | 说明 | | ||
| 117 | -|--------|-----------|---------|------| | ||
| 118 | -| 1 | ~50ms | ~200ms | 无等待,直接处理 | | ||
| 119 | -| 2 | ~50ms | ~150ms | 有等待,但处理快 | | ||
| 120 | -| 4 | ~50ms | ~100ms | 等待少,处理很快 | | ||
| 121 | - | ||
| 122 | -**结论**: | ||
| 123 | -- ✅ 多副本**不降低**单请求延迟 | ||
| 124 | -- ✅ 多副本**显著降低** P99 延迟(高并发时) | ||
| 125 | -- ✅ 多副本**显著提升**吞吐量 | ||
| 126 | - | ||
| 127 | ---- | ||
| 128 | - | ||
| 129 | -## 🎯 配置建议 | ||
| 130 | - | ||
| 131 | -### 场景 1:开发/测试(推荐) | ||
| 132 | - | ||
| 133 | -```yaml | ||
| 134 | -replicas: 1 | ||
| 135 | -minibatch_size: 32 | ||
| 136 | -device: cuda # 或 cpu | ||
| 137 | -``` | ||
| 138 | - | ||
| 139 | -**特点**: | ||
| 140 | -- ✅ 显存占用低(~5GB) | ||
| 141 | -- ✅ 足够测试使用 | ||
| 142 | -- ✅ 启动快 | ||
| 143 | - | ||
| 144 | -### 场景 2:生产环境(中等并发) | ||
| 145 | - | ||
| 146 | -```yaml | ||
| 147 | -replicas: 2 | ||
| 148 | -minibatch_size: 32 | ||
| 149 | -device: cuda | ||
| 150 | -``` | ||
| 151 | - | ||
| 152 | -**前提条件**: | ||
| 153 | -- ✅ GPU 显存 ≥ 12GB | ||
| 154 | -- ✅ 没有其他显存占用程序 | ||
| 155 | -- ✅ 需要 ~10GB 显存 | ||
| 156 | - | ||
| 157 | -**特点**: | ||
| 158 | -- ✅ 吞吐量翻倍(~150-180 QPS) | ||
| 159 | -- ✅ 处理并发请求能力强 | ||
| 160 | -- ⚠️ 需要确保显存充足 | ||
| 161 | - | ||
| 162 | -### 场景 3:生产环境(高并发) | ||
| 163 | - | ||
| 164 | -```yaml | ||
| 165 | -replicas: 2 | ||
| 166 | -minibatch_size: 64 | ||
| 167 | -device: cuda | ||
| 168 | -``` | ||
| 169 | - | ||
| 170 | -**优化点**: | ||
| 171 | -- ✅ 增大批处理(利用多副本优势) | ||
| 172 | -- ✅ 吞吐量进一步提升 | ||
| 173 | -- ⚠️ 需要更多显存(~12-15GB) | ||
| 174 | - | ||
| 175 | -### 场景 4:多 GPU 服务器 | ||
| 176 | - | ||
| 177 | -如果你有多块 GPU: | ||
| 178 | - | ||
| 179 | -```yaml | ||
| 180 | -executors: | ||
| 181 | - - name: clip_t_gpu0 | ||
| 182 | - uses: | ||
| 183 | - jtype: CLIPEncoder | ||
| 184 | - with: | ||
| 185 | - device: 'cuda:0' | ||
| 186 | - replicas: 2 | ||
| 187 | - uses: | ||
| 188 | - jtype: CLIPEncoder | ||
| 189 | - with: | ||
| 190 | - device: 'cuda:1' | ||
| 191 | - replicas: 2 | ||
| 192 | -``` | ||
| 193 | - | ||
| 194 | -**说明**: | ||
| 195 | -- 2块GPU,每块2个副本 = 共4个副本 | ||
| 196 | -- 总吞吐量 ~300+ QPS | ||
| 197 | -- 每块GPU占用 ~8-10GB | ||
| 198 | - | ||
| 199 | ---- | ||
| 200 | - | ||
| 201 | -## 🔍 监控和调试 | ||
| 202 | - | ||
| 203 | -### 查看显存占用 | ||
| 204 | - | ||
| 205 | -```bash | ||
| 206 | -# 实时监控 | ||
| 207 | -watch -n 1 nvidia-smi | ||
| 208 | - | ||
| 209 | -# 查看详细信息 | ||
| 210 | -nvidia-smi --query-gpu=timestamp,name,memory.used,memory.free,utilization.gpu --format=csv | ||
| 211 | - | ||
| 212 | -# 持续监控(每秒刷新,共100次) | ||
| 213 | -nvidia-smi dmon -s u -c 100 | ||
| 214 | -``` | ||
| 215 | - | ||
| 216 | -### 查看副本进程 | ||
| 217 | - | ||
| 218 | -```bash | ||
| 219 | -# 查看所有 clip_server 进程 | ||
| 220 | -ps aux | grep clip_server | ||
| 221 | - | ||
| 222 | -# 应该看到: | ||
| 223 | -# root 12345 clip_server (main) | ||
| 224 | -# root 12346 clip_server (replica 1) | ||
| 225 | -# root 12347 clip_server (replica 2) | ||
| 226 | -# ... | ||
| 227 | - | ||
| 228 | -# 查看进程数 | ||
| 229 | -ps aux | grep clip_server | wc -l | ||
| 230 | - | ||
| 231 | -# 应该 = replicas + 1 | ||
| 232 | -``` | ||
| 233 | - | ||
| 234 | -### 测试吞吐量 | ||
| 235 | - | ||
| 236 | -```bash | ||
| 237 | -# 使用测试脚本 | ||
| 238 | -python scripts/test_cnclip_service.py --batch-size 100 | ||
| 239 | - | ||
| 240 | -# 观察日志 | ||
| 241 | -tail -f logs/cnclip_service.log | grep "encoded" | ||
| 242 | -``` | ||
| 243 | - | ||
| 244 | ---- | ||
| 245 | - | ||
| 246 | -## ⚠️ 常见问题 | ||
| 247 | - | ||
| 248 | -### Q1: 设置2副本后显存不足 (OOM) | ||
| 249 | - | ||
| 250 | -**错误信息**: | ||
| 251 | -``` | ||
| 252 | -RuntimeError: CUDA out of memory. Tried to allocate XXX MiB | ||
| 253 | -``` | ||
| 254 | - | ||
| 255 | -**解决方案**: | ||
| 256 | - | ||
| 257 | -**方案 A**:减小批处理大小 | ||
| 258 | -```yaml | ||
| 259 | -replicas: 2 | ||
| 260 | -minibatch_size: 16 # 从32减到16 | ||
| 261 | -``` | ||
| 262 | - | ||
| 263 | -**方案 B**:使用更小的模型 | ||
| 264 | -```yaml | ||
| 265 | -name: 'CN-CLIP/ViT-L-14' # 从 H-14 改为 L-14 | ||
| 266 | -replicas: 2 | ||
| 267 | -``` | ||
| 268 | - | ||
| 269 | -**方案 C**:减少副本数 | ||
| 270 | -```yaml | ||
| 271 | -replicas: 1 # 回退到单副本 | ||
| 272 | -``` | ||
| 273 | - | ||
| 274 | -### Q2: 多副本反而性能下降 | ||
| 275 | - | ||
| 276 | -**原因**: | ||
| 277 | -- GPU 资源竞争(显存带宽、计算单元) | ||
| 278 | -- 批处理太小(未充分利用并行) | ||
| 279 | -- CPU 预处理成为瓶颈 | ||
| 280 | - | ||
| 281 | -**解决方案**: | ||
| 282 | -```yaml | ||
| 283 | -# 增大批处理 | ||
| 284 | -minibatch_size: 64 | ||
| 285 | - | ||
| 286 | -# 增加预处理线程 | ||
| 287 | -num_worker_preprocess: 8 | ||
| 288 | - | ||
| 289 | -# 或减少副本 | ||
| 290 | -replicas: 1 | ||
| 291 | -``` | ||
| 292 | - | ||
| 293 | -### Q3: 如何知道最佳副本数? | ||
| 294 | - | ||
| 295 | -**实验方法**: | ||
| 296 | - | ||
| 297 | -```bash | ||
| 298 | -# 测试脚本 | ||
| 299 | -for replicas in 1 2 3; do | ||
| 300 | - echo "Testing $replicas replicas..." | ||
| 301 | - | ||
| 302 | - # 修改配置 | ||
| 303 | - sed -i "s/replicas: .*/replicas: $replicas/" torch-flow.yml | ||
| 304 | - | ||
| 305 | - # 重启服务 | ||
| 306 | - ./scripts/stop_cnclip_service.sh | ||
| 307 | - ./scripts/start_cnclip_service.sh | ||
| 308 | - | ||
| 309 | - # 等待启动 | ||
| 310 | - sleep 30 | ||
| 311 | - | ||
| 312 | - # 运行测试 | ||
| 313 | - python scripts/test_cnclip_service.py --batch-size 100 | ||
| 314 | -done | ||
| 315 | - | ||
| 316 | -# 对比结果,选择最优配置 | ||
| 317 | -``` | ||
| 318 | - | ||
| 319 | -**推荐配置**: | ||
| 320 | -- **Tesla T4 (16GB)**: 1-2 副本 | ||
| 321 | -- **RTX 3090 (24GB)**: 2-3 副本 | ||
| 322 | -- **A100 (40GB)**: 3-4 副本 | ||
| 323 | - | ||
| 324 | ---- | ||
| 325 | - | ||
| 326 | -## 📝 快速配置参考 | ||
| 327 | - | ||
| 328 | -### 修改启动脚本支持副本数 | ||
| 329 | - | ||
| 330 | -编辑 `scripts/start_cnclip_service.sh`,添加参数: | ||
| 331 | - | ||
| 332 | -```bash | ||
| 333 | -# 在参数解析部分添加 | ||
| 334 | ---replicas) | ||
| 335 | - REPLICAS="$2" | ||
| 336 | - shift 2 | ||
| 337 | - ;; | ||
| 338 | - | ||
| 339 | -# 在启动命令中使用 | ||
| 340 | -nohup python -m clip_server \ | ||
| 341 | - --name "${MODEL_NAME}" \ | ||
| 342 | - --replicas ${REPLICAS:-1} \ # ← 添加这一行 | ||
| 343 | - --port ${PORT} \ | ||
| 344 | - ... | ||
| 345 | -``` | ||
| 346 | - | ||
| 347 | -### 当前最佳实践(Tesla T4) | ||
| 348 | - | ||
| 349 | -**保守配置**(推荐): | ||
| 350 | -```yaml | ||
| 351 | -replicas: 1 | ||
| 352 | -minibatch_size: 32 | ||
| 353 | -dtype: float16 | ||
| 354 | -``` | ||
| 355 | -- 显存:~5GB | ||
| 356 | -- QPS:~80-100 | ||
| 357 | -- 适合:开发、测试、小规模应用 | ||
| 358 | - | ||
| 359 | -**激进配置**(需测试): | ||
| 360 | -```yaml | ||
| 361 | -replicas: 2 | ||
| 362 | -minibatch_size: 32 | ||
| 363 | -dtype: float16 | ||
| 364 | -``` | ||
| 365 | -- 显存:~10GB | ||
| 366 | -- QPS:~150-180 | ||
| 367 | -- 适合:生产环境、中等并发 | ||
| 368 | -- 前提:清理其他显存占用 | ||
| 369 | - | ||
| 370 | ---- | ||
| 371 | - | ||
| 372 | -## 🎉 总结 | ||
| 373 | - | ||
| 374 | -### 关键要点 | ||
| 375 | - | ||
| 376 | -1. **显存占用**:副本数 × 单副本显存(基本线性增长) | ||
| 377 | -2. **性能提升**:吞吐量接近线性增长,但不完美 | ||
| 378 | -3. **推荐配置**: | ||
| 379 | - - Tesla T4 (16GB): **1-2 副本** | ||
| 380 | - - 确保显存充足 + 预留 20% 余量 | ||
| 381 | -4. **优化建议**: | ||
| 382 | - - 从小副本开始测试 | ||
| 383 | - - 根据实际负载调整 | ||
| 384 | - - 监控显存和性能指标 | ||
| 385 | - | ||
| 386 | -### 你的情况 | ||
| 387 | - | ||
| 388 | -**当前配置**: | ||
| 389 | -```yaml | ||
| 390 | -replicas: 1 # 当前配置 | ||
| 391 | -``` | ||
| 392 | - | ||
| 393 | -**建议**: | ||
| 394 | -- ✅ 先保持 `replicas: 1`,测试性能 | ||
| 395 | -- ✅ 如果吞吐量不够,再尝试 `replicas: 2` | ||
| 396 | -- ⚠️ 确保显存充足(当前空闲2GB可能不够2副本) | ||
| 397 | -- 💡 考虑先清理其他显存占用 | ||
| 398 | - | ||
| 399 | -**快速测试**: | ||
| 400 | -```bash | ||
| 401 | -# 1. 查看当前显存占用 | ||
| 402 | -nvidia-smi | ||
| 403 | - | ||
| 404 | -# 2. 启动1副本,观察显存 | ||
| 405 | -./scripts/start_cnclip_service.sh | ||
| 406 | -nvidia-smi # 应该增加 ~5GB | ||
| 407 | - | ||
| 408 | -# 3. 如果还有空间,尝试2副本 | ||
| 409 | -./scripts/stop_cnclip_service.sh | ||
| 410 | -# 修改 torch-flow.yml 中的 replicas: 2 | ||
| 411 | -./scripts/start_cnclip_service.sh | ||
| 412 | -nvidia-smi # 应该再增加 ~5GB | ||
| 413 | -``` | ||
| 414 | - | ||
| 415 | -需要我帮你修改启动脚本以支持副本数参数吗? |
docs/CNCLIP_SERVICE.md
| 1 | -# CN-CLIP 编码服务使用指南 | 1 | +# CN-CLIP 编码服务 |
| 2 | 2 | ||
| 3 | -## 简介 | 3 | +## 模块说明 |
| 4 | 4 | ||
| 5 | -本服务基于 [clip-as-service](https://github.com/jina-ai/clip-as-service) 提供 CN-CLIP 模型的文本和图像编码功能。 | 5 | +CN-CLIP 编码服务基于 [clip-as-service](https://github.com/jina-ai/clip-as-service) 提供中文 CLIP 模型的文本和图像编码功能。服务使用 gRPC 协议,支持批量编码,返回固定维度的向量表示。 |
| 6 | + | ||
| 7 | +### 功能特性 | ||
| 8 | + | ||
| 9 | +- 文本编码:将中文文本编码为向量 | ||
| 10 | +- 图像编码:将图像(本地文件或远程 URL)编码为向量 | ||
| 11 | +- 混合编码:同时编码文本和图像 | ||
| 12 | +- 批量处理:支持批量编码,提高效率 | ||
| 13 | + | ||
| 14 | +### 技术架构 | ||
| 15 | + | ||
| 16 | +- **框架**: clip-as-service (基于 Jina) | ||
| 17 | +- **模型**: CN-CLIP/ViT-L-14-336(默认) | ||
| 18 | +- **协议**: gRPC(默认,官方推荐) | ||
| 19 | +- **运行时**: PyTorch | ||
| 6 | 20 | ||
| 7 | ## 启动服务 | 21 | ## 启动服务 |
| 8 | 22 | ||
| 23 | +### 基本用法 | ||
| 24 | + | ||
| 9 | ```bash | 25 | ```bash |
| 10 | ./scripts/start_cnclip_service.sh | 26 | ./scripts/start_cnclip_service.sh |
| 11 | ``` | 27 | ``` |
| 12 | 28 | ||
| 13 | ### 启动参数 | 29 | ### 启动参数 |
| 14 | 30 | ||
| 15 | -- `--port PORT`: 服务端口(默认:51000) | ||
| 16 | -- `--device DEVICE`: 设备类型:cuda 或 cpu(默认:自动检测) | ||
| 17 | -- `--batch-size SIZE`: 批处理大小(默认:32) | ||
| 18 | -- `--num-workers NUM`: 预处理线程数(默认:4) | ||
| 19 | -- `--dtype TYPE`: 数据类型:float16 或 float32(默认:float16) | ||
| 20 | -- `--model-name NAME`: 模型名称(默认:CN-CLIP/ViT-H-14) | ||
| 21 | -- `--replicas NUM`: 副本数(默认:1) | 31 | +| 参数 | 说明 | 默认值 | |
| 32 | +|------|------|--------| | ||
| 33 | +| `--port PORT` | 服务端口 | 51000 | | ||
| 34 | +| `--device DEVICE` | 设备类型:cuda 或 cpu | 自动检测 | | ||
| 35 | +| `--batch-size SIZE` | 批处理大小 | 32 | | ||
| 36 | +| `--num-workers NUM` | 预处理线程数 | 4 | | ||
| 37 | +| `--dtype TYPE` | 数据类型:float16 或 float32 | float16 | | ||
| 38 | +| `--model-name NAME` | 模型名称 | CN-CLIP/ViT-L-14-336 | | ||
| 39 | +| `--replicas NUM` | 副本数 | 1 | | ||
| 22 | 40 | ||
| 23 | ### 示例 | 41 | ### 示例 |
| 24 | 42 | ||
| @@ -29,133 +47,102 @@ | @@ -29,133 +47,102 @@ | ||
| 29 | # 指定端口和设备 | 47 | # 指定端口和设备 |
| 30 | ./scripts/start_cnclip_service.sh --port 52000 --device cpu | 48 | ./scripts/start_cnclip_service.sh --port 52000 --device cpu |
| 31 | 49 | ||
| 32 | -# 调整批处理大小 | ||
| 33 | -./scripts/start_cnclip_service.sh --batch-size 16 --dtype float32 | 50 | +# 使用其他模型 |
| 51 | +./scripts/start_cnclip_service.sh --model-name CN-CLIP/ViT-H-14 | ||
| 34 | ``` | 52 | ``` |
| 35 | 53 | ||
| 36 | -## 停止服务 | 54 | +### 停止服务 |
| 37 | 55 | ||
| 38 | ```bash | 56 | ```bash |
| 39 | ./scripts/stop_cnclip_service.sh | 57 | ./scripts/stop_cnclip_service.sh |
| 40 | ``` | 58 | ``` |
| 41 | 59 | ||
| 42 | -## 使用 API | 60 | +## API 接口说明 |
| 43 | 61 | ||
| 44 | -### 编码文本 | 62 | +### Python 客户端 |
| 45 | 63 | ||
| 46 | -```bash | ||
| 47 | -curl -X POST http://localhost:51000/post \ | ||
| 48 | - -H 'Content-Type: application/json' \ | ||
| 49 | - -d '{ | ||
| 50 | - "data": [ | ||
| 51 | - {"text": "这是一个测试文本"}, | ||
| 52 | - {"text": "另一个文本"} | ||
| 53 | - ], | ||
| 54 | - "execEndpoint": "/" | ||
| 55 | - }' | ||
| 56 | -``` | 64 | +服务使用 gRPC 协议,必须使用 Python 客户端: |
| 57 | 65 | ||
| 58 | -### 编码图像(远程 URL) | 66 | +```python |
| 67 | +from clip_client import Client | ||
| 59 | 68 | ||
| 60 | -```bash | ||
| 61 | -curl -X POST http://localhost:51000/post \ | ||
| 62 | - -H 'Content-Type: application/json' \ | ||
| 63 | - -d '{ | ||
| 64 | - "data": [ | ||
| 65 | - {"uri": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg"} | ||
| 66 | - ], | ||
| 67 | - "execEndpoint": "/" | ||
| 68 | - }' | 69 | +# 创建客户端(使用 grpc:// 协议) |
| 70 | +c = Client('grpc://localhost:51000') | ||
| 69 | ``` | 71 | ``` |
| 70 | 72 | ||
| 71 | -### 编码图像(本地文件,base64) | 73 | +### 编码接口 |
| 72 | 74 | ||
| 73 | -```bash | ||
| 74 | -curl -X POST http://localhost:51000/post \ | ||
| 75 | - -H 'Content-Type: application/json' \ | ||
| 76 | - -d "{ | ||
| 77 | - \"data\": [ | ||
| 78 | - {\"blob\": \"$(base64 -w 0 /path/to/image.jpg)\"} | ||
| 79 | - ], | ||
| 80 | - \"execEndpoint\": \"/\" | ||
| 81 | - }" | ||
| 82 | -``` | 75 | +#### 1. 文本编码 |
| 83 | 76 | ||
| 84 | -### 混合编码(文本和图像) | 77 | +```python |
| 78 | +from clip_client import Client | ||
| 85 | 79 | ||
| 86 | -```bash | ||
| 87 | -curl -X POST http://localhost:51000/post \ | ||
| 88 | - -H 'Content-Type: application/json' \ | ||
| 89 | - -d '{ | ||
| 90 | - "data": [ | ||
| 91 | - {"text": "这是一段文本"}, | ||
| 92 | - {"uri": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg"} | ||
| 93 | - ], | ||
| 94 | - "execEndpoint": "/" | ||
| 95 | - }' | ||
| 96 | -``` | 80 | +c = Client('grpc://localhost:51000') |
| 81 | + | ||
| 82 | +# 编码单个文本 | ||
| 83 | +result = c.encode(['这是测试文本']) | ||
| 84 | +print(result.shape) # (1, 1024) | ||
| 97 | 85 | ||
| 98 | -## 响应格式 | ||
| 99 | - | ||
| 100 | -响应为 JSON 格式,编码结果在 `data[].embedding` 字段中: | ||
| 101 | - | ||
| 102 | -```json | ||
| 103 | -{ | ||
| 104 | - "header": {...}, | ||
| 105 | - "data": [ | ||
| 106 | - { | ||
| 107 | - "id": "...", | ||
| 108 | - "text": "这是一个测试文本", | ||
| 109 | - "embedding": [0.123, -0.456, ...] | ||
| 110 | - } | ||
| 111 | - ] | ||
| 112 | -} | 86 | +# 编码多个文本 |
| 87 | +result = c.encode(['文本1', '文本2', '文本3']) | ||
| 88 | +print(result.shape) # (3, 1024) | ||
| 113 | ``` | 89 | ``` |
| 114 | 90 | ||
| 115 | -### 提取 embedding | 91 | +#### 2. 图像编码 |
| 116 | 92 | ||
| 117 | -使用 `jq` 提取 embedding: | 93 | +```python |
| 94 | +# 编码远程图像 URL | ||
| 95 | +result = c.encode(['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']) | ||
| 96 | +print(result.shape) # (1, 1024) | ||
| 118 | 97 | ||
| 119 | -```bash | ||
| 120 | -curl -X POST http://localhost:51000/post \ | ||
| 121 | - -H 'Content-Type: application/json' \ | ||
| 122 | - -d '{"data":[{"text": "测试"}], "execEndpoint":"/"}' | \ | ||
| 123 | - jq -c '.data[] | .embedding' | 98 | +# 编码本地图像文件 |
| 99 | +result = c.encode(['/path/to/image.jpg']) | ||
| 100 | +print(result.shape) # (1, 1024) | ||
| 124 | ``` | 101 | ``` |
| 125 | 102 | ||
| 126 | -## Python 客户端示例 | ||
| 127 | - | ||
| 128 | -**重要**:如果服务配置了 `protocol: http`,客户端必须使用 `http://` 而不是 `grpc://`。 | 103 | +#### 3. 混合编码 |
| 129 | 104 | ||
| 130 | ```python | 105 | ```python |
| 131 | -from clip_client import Client | 106 | +# 同时编码文本和图像 |
| 107 | +result = c.encode([ | ||
| 108 | + '这是文本', | ||
| 109 | + 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg', | ||
| 110 | + '另一个文本' | ||
| 111 | +]) | ||
| 112 | +print(result.shape) # (3, 1024) | ||
| 113 | +``` | ||
| 132 | 114 | ||
| 133 | -# 创建客户端(注意:使用 http:// 而不是 grpc://) | ||
| 134 | -c = Client('http://localhost:51000') | 115 | +### 返回格式 |
| 135 | 116 | ||
| 136 | -# 编码文本 | ||
| 137 | -result = c.encode(['这是测试文本', '另一个文本']) | ||
| 138 | -print(result.shape) # [2, 768] 或其他维度 | 117 | +- **类型**: `numpy.ndarray` |
| 118 | +- **形状**: `(N, 1024)`,其中 N 是输入数量 | ||
| 119 | +- **数据类型**: `float32` | ||
| 120 | +- **维度**: 1024(CN-CLIP 模型的 embedding 维度) | ||
| 139 | 121 | ||
| 140 | -# 编码图像 | ||
| 141 | -result = c.encode(['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']) | ||
| 142 | -print(result.shape) # [1, 768] | 122 | +### 支持的模型 |
| 143 | 123 | ||
| 144 | -# 混合编码 | ||
| 145 | -result = c.encode([ | ||
| 146 | - '这是文本', | ||
| 147 | - 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg' | ||
| 148 | -]) | ||
| 149 | -print(result.shape) # [2, 768] | 124 | +| 模型名称 | 说明 | 推荐场景 | |
| 125 | +|---------|------|---------| | ||
| 126 | +| `CN-CLIP/ViT-B-16` | 基础版本,速度快 | 对速度要求高的场景 | | ||
| 127 | +| `CN-CLIP/ViT-L-14` | 平衡版本 | 通用场景 | | ||
| 128 | +| `CN-CLIP/ViT-L-14-336` | 高分辨率版本(默认) | 需要处理高分辨率图像 | | ||
| 129 | +| `CN-CLIP/ViT-H-14` | 大型版本,精度高 | 对精度要求高的场景 | | ||
| 130 | +| `CN-CLIP/RN50` | ResNet-50 版本 | 兼容性场景 | | ||
| 131 | + | ||
| 132 | +## 测试 | ||
| 133 | + | ||
| 134 | +运行测试脚本: | ||
| 135 | + | ||
| 136 | +```bash | ||
| 137 | +./scripts/test_cnclip_service.sh | ||
| 150 | ``` | 138 | ``` |
| 151 | 139 | ||
| 152 | -## 支持的模型 | 140 | +测试脚本会验证: |
| 141 | +- 文本编码功能 | ||
| 142 | +- 图像编码功能(远程 URL) | ||
| 143 | +- 混合编码功能 | ||
| 153 | 144 | ||
| 154 | -- `CN-CLIP/ViT-B-16`: 基础版本,速度快 | ||
| 155 | -- `CN-CLIP/ViT-L-14`: 平衡版本 | ||
| 156 | -- `CN-CLIP/ViT-L-14-336`: 高分辨率版本 | ||
| 157 | -- `CN-CLIP/ViT-H-14`: 大型版本,精度高(默认) | ||
| 158 | -- `CN-CLIP/RN50`: ResNet-50 版本 | 145 | +每个测试会显示 embedding 的维度和前 20 个数字。 |
| 159 | 146 | ||
| 160 | ## 查看日志 | 147 | ## 查看日志 |
| 161 | 148 | ||
| @@ -165,36 +152,36 @@ tail -f /data/tw/SearchEngine/logs/cnclip_service.log | @@ -165,36 +152,36 @@ tail -f /data/tw/SearchEngine/logs/cnclip_service.log | ||
| 165 | 152 | ||
| 166 | ## 常见问题 | 153 | ## 常见问题 |
| 167 | 154 | ||
| 168 | -### 服务启动失败 | 155 | +### 1. 服务启动失败 |
| 169 | 156 | ||
| 170 | -1. 检查端口是否被占用:`lsof -i :51000` | ||
| 171 | -2. 检查 conda 环境是否正确激活 | ||
| 172 | -3. 查看日志文件获取详细错误信息 | 157 | +- 检查端口是否被占用:`lsof -i :51000` |
| 158 | +- 检查 conda 环境是否正确激活 | ||
| 159 | +- 查看日志文件获取详细错误信息 | ||
| 173 | 160 | ||
| 174 | -### 编码失败 | 161 | +### 2. 客户端连接失败 |
| 175 | 162 | ||
| 176 | -1. 确保请求格式正确,使用 `/post` 端点 | ||
| 177 | -2. 确保 `execEndpoint` 设置为 `"/"` | ||
| 178 | -3. 检查图像 URL 是否可访问 | ||
| 179 | -4. 查看服务日志排查错误 | ||
| 180 | - | ||
| 181 | -### 协议不匹配 | ||
| 182 | - | ||
| 183 | -如果服务配置了 `protocol: http`,客户端必须使用 `http://` 而不是 `grpc://`: | 163 | +确保使用正确的协议: |
| 184 | 164 | ||
| 185 | ```python | 165 | ```python |
| 186 | -# 正确 | ||
| 187 | -c = Client('http://localhost:51000') | ||
| 188 | - | ||
| 189 | -# 错误(会导致连接失败) | 166 | +# 正确:使用 grpc:// |
| 190 | c = Client('grpc://localhost:51000') | 167 | c = Client('grpc://localhost:51000') |
| 168 | + | ||
| 169 | +# 错误:不要使用 http:// | ||
| 170 | +# c = Client('http://localhost:51000') # 会失败 | ||
| 191 | ``` | 171 | ``` |
| 192 | 172 | ||
| 193 | -### 图像编码问题 | 173 | +### 3. 编码失败 |
| 174 | + | ||
| 175 | +- 检查服务是否正常运行 | ||
| 176 | +- 检查输入格式是否正确 | ||
| 177 | +- 查看服务日志排查错误 | ||
| 178 | + | ||
| 179 | +### 4. 依赖安装 | ||
| 194 | 180 | ||
| 195 | -CN-CLIP 模型的图像编码可能存在兼容性问题。如果遇到 `AttributeError: 'str' object has no attribute 'to'` 错误,这可能是 clip-as-service 对 CN-CLIP 图像预处理的支持问题。建议: | 181 | +确保已安装必要的依赖: |
| 196 | 182 | ||
| 197 | -1. 检查 clip-as-service 和 cn-clip 的版本兼容性 | ||
| 198 | -2. 尝试使用本地图像文件而不是远程 URL | ||
| 199 | -3. 查看 [clip-as-service 的 GitHub Issues](https://github.com/jina-ai/clip-as-service/issues) 是否有相关报告 | 183 | +```bash |
| 184 | +pip install clip-client | ||
| 185 | +``` | ||
| 200 | 186 | ||
| 187 | +服务端依赖会在启动脚本中自动检查。 |
docs/GIT_SUBMODULE_GUIDE.md deleted
| @@ -1,323 +0,0 @@ | @@ -1,323 +0,0 @@ | ||
| 1 | -# Git 子模块处理指南 | ||
| 2 | - | ||
| 3 | -## 问题说明 | ||
| 4 | - | ||
| 5 | -当前 `third-party/clip-as-service` 是一个 Git 子模块,指向远程仓库: | ||
| 6 | -- **原始仓库**: https://github.com/jina-ai/clip-as-service.git | ||
| 7 | -- **问题**: 你修改了代码,但不想/无法提交到原仓库 | ||
| 8 | -- **目标**: 将修改纳入自己的主项目管理 | ||
| 9 | - | ||
| 10 | -## 解决方案 | ||
| 11 | - | ||
| 12 | -我们提供三个方案,根据你的需求选择: | ||
| 13 | - | ||
| 14 | -### 方案 A:移除子模块,直接纳入主项目(推荐) | ||
| 15 | - | ||
| 16 | -**适用场景**: | ||
| 17 | -- 你不需要跟随上游更新 | ||
| 18 | -- 你想完全控制这些代码 | ||
| 19 | -- 你会修改这些代码 | ||
| 20 | - | ||
| 21 | -**优点**: | ||
| 22 | -- ✅ 简单直接,完全控制 | ||
| 23 | -- ✅ 所有代码在一个仓库中,易于管理 | ||
| 24 | -- ✅ 修改可以直接提交到主项目 | ||
| 25 | - | ||
| 26 | -**缺点**: | ||
| 27 | -- ❌ 无法方便地获取上游更新 | ||
| 28 | -- ❌ 会增加主仓库大小 | ||
| 29 | - | ||
| 30 | -**操作步骤**: | ||
| 31 | - | ||
| 32 | -```bash | ||
| 33 | -# 1. 进入主项目目录 | ||
| 34 | -cd /data/tw/SearchEngine | ||
| 35 | - | ||
| 36 | -# 2. 移除 Git 对子模块的追踪 | ||
| 37 | -git rm --cached third-party/clip-as-service | ||
| 38 | - | ||
| 39 | -# 3. 删除 .gitmodules 中的配置(如果有) | ||
| 40 | -# 编辑 .gitmodules 文件,删除 clip-as-service 相关部分 | ||
| 41 | - | ||
| 42 | -# 4. 删除子模块的 Git 仓库 | ||
| 43 | -rm -rf third-party/clip-as-service/.git | ||
| 44 | - | ||
| 45 | -# 5. 将子模块目录作为普通目录添加到 Git | ||
| 46 | -git add third-party/clip-as-service/ | ||
| 47 | - | ||
| 48 | -# 6. 提交更改 | ||
| 49 | -git commit -m "feat: 将 clip-as-service 从子模块转为普通目录" | ||
| 50 | - | ||
| 51 | -# 7. 推送到远程仓库 | ||
| 52 | -git push origin main | ||
| 53 | -``` | ||
| 54 | - | ||
| 55 | -**后续操作**: | ||
| 56 | -- 之后对 `third-party/clip-as-service` 的任何修改都可以正常提交 | ||
| 57 | -- 使用 `git add` 和 `git commit` 就可以了 | ||
| 58 | - | ||
| 59 | ---- | ||
| 60 | - | ||
| 61 | -### 方案 B:切换到自己的 Fork | ||
| 62 | - | ||
| 63 | -**适用场景**: | ||
| 64 | -- 你需要跟随上游更新 | ||
| 65 | -- 你想保持 Git 历史记录 | ||
| 66 | -- 你可能向原仓库贡献代码 | ||
| 67 | - | ||
| 68 | -**优点**: | ||
| 69 | -- ✅ 可以合并上游更新 | ||
| 70 | -- ✅ 保持 Git 追踪 | ||
| 71 | -- ✅ 可以参与上游社区 | ||
| 72 | - | ||
| 73 | -**缺点**: | ||
| 74 | -- ❌ 需要 Fork 并维护自己的仓库 | ||
| 75 | -- ❌ 操作相对复杂 | ||
| 76 | - | ||
| 77 | -**操作步骤**: | ||
| 78 | - | ||
| 79 | -```bash | ||
| 80 | -# 1. 在 GitHub 上 Fork 原仓库 | ||
| 81 | -# 访问 https://github.com/jina-ai/clip-as-service | ||
| 82 | -# 点击 Fork 按钮,创建你自己的副本 | ||
| 83 | - | ||
| 84 | -# 2. 克隆你的 Fork(替换为你的用户名) | ||
| 85 | -# 注意:这里使用 --mirror 保留所有分支和标签 | ||
| 86 | -git clone --mirror https://github.com/YOUR_USERNAME/clip-as-service.git | ||
| 87 | - | ||
| 88 | -# 3. 进入主项目目录 | ||
| 89 | -cd /data/tw/SearchEngine | ||
| 90 | - | ||
| 91 | -# 4. 更新 .gitmodules 文件 | ||
| 92 | -# 编辑 .gitmodules,将 URL 改为你的 Fork: | ||
| 93 | -# [submodule "third-party/clip-as-service"] | ||
| 94 | -# path = third-party/clip-as-service | ||
| 95 | -# url = https://github.com/YOUR_USERNAME/clip-as-service.git | ||
| 96 | - | ||
| 97 | -# 5. 初始化新的子模块 | ||
| 98 | -git submodule deinit -f third-party/clip-as-service | ||
| 99 | -git submodule update --init --remote third-party/clip-as-service | ||
| 100 | - | ||
| 101 | -# 6. 进入子模块目录,设置你的 fork 为默认远程 | ||
| 102 | -cd third-party/clip-as-service | ||
| 103 | -git remote set-url origin https://github.com/YOUR_USERNAME/clip-as-service.git | ||
| 104 | -git remote add upstream https://github.com/jina-ai/clip-as-service.git | ||
| 105 | - | ||
| 106 | -# 7. 创建并切换到你的分支 | ||
| 107 | -git checkout -b custom-cnclip-support | ||
| 108 | - | ||
| 109 | -# 8. 提交你的修改 | ||
| 110 | -git add . | ||
| 111 | -git commit -m "feat: 添加 CN-CLIP 自定义配置" | ||
| 112 | - | ||
| 113 | -# 9. 推送到你的 Fork | ||
| 114 | -git push origin custom-cnclip-support | ||
| 115 | - | ||
| 116 | -# 10. 回到主项目,更新子模块引用 | ||
| 117 | -cd /data/tw/SearchEngine | ||
| 118 | -git add third-party/clip-as-service | ||
| 119 | -git commit -m "chore: 更新子模块到自定义版本" | ||
| 120 | -git push origin main | ||
| 121 | -``` | ||
| 122 | - | ||
| 123 | -**后续操作**: | ||
| 124 | -- 在子模块目录中修改代码 | ||
| 125 | -- 提交到你的分支:`git push origin custom-cnclip-support` | ||
| 126 | -- 合并上游更新: | ||
| 127 | - ```bash | ||
| 128 | - cd third-party/clip-as-service | ||
| 129 | - git fetch upstream | ||
| 130 | - git merge upstream/main | ||
| 131 | - git push origin custom-cnclip-support | ||
| 132 | - cd .. | ||
| 133 | - git add third-party/clip-as-service | ||
| 134 | - git commit -m "chore: 合并上游更新" | ||
| 135 | - ``` | ||
| 136 | - | ||
| 137 | ---- | ||
| 138 | - | ||
| 139 | -### 方案 C:使用 Git Subtree 替代 Submodule | ||
| 140 | - | ||
| 141 | -**适用场景**: | ||
| 142 | -- 你想要子模块的灵活性,但不想处理 submodule 的复杂性 | ||
| 143 | -- 你想直接在主项目中管理代码 | ||
| 144 | - | ||
| 145 | -**优点**: | ||
| 146 | -- ✅ 比 submodule 更易用 | ||
| 147 | -- ✅ 可以直接提交到主项目 | ||
| 148 | -- ✅ 可以获取上游更新 | ||
| 149 | - | ||
| 150 | -**缺点**: | ||
| 151 | -- ❌ 命令相对复杂 | ||
| 152 | -- ❌ 重写现有历史 | ||
| 153 | - | ||
| 154 | -**操作步骤**: | ||
| 155 | - | ||
| 156 | -```bash | ||
| 157 | -# 1. 移除现有子模块 | ||
| 158 | -cd /data/tw/SearchEngine | ||
| 159 | -git submodule deinit -f third-party/clip-as-service | ||
| 160 | -git rm -f third-party/clip-as-service | ||
| 161 | -rm -rf .git/modules/third-party/clip-as-service | ||
| 162 | - | ||
| 163 | -# 2. 使用 subtree 添加远程仓库 | ||
| 164 | -git subtree add --prefix=third-party/clip-as-service \ | ||
| 165 | - https://github.com/jina-ai/clip-as-service.git \ | ||
| 166 | - main --squash | ||
| 167 | - | ||
| 168 | -# 3. 提交 | ||
| 169 | -git commit -m "chore: 使用 git subtree 替代 submodule 添加 clip-as-service" | ||
| 170 | - | ||
| 171 | -# 4. 推送 | ||
| 172 | -git push origin main | ||
| 173 | -``` | ||
| 174 | - | ||
| 175 | -**后续操作**: | ||
| 176 | -- 修改代码后直接在主项目中提交 | ||
| 177 | -- 获取上游更新: | ||
| 178 | - ```bash | ||
| 179 | - git subtree pull --prefix=third-party/clip-as-service \ | ||
| 180 | - https://github.com/jina-ai/clip-as-service.git \ | ||
| 181 | - main --squash | ||
| 182 | - ``` | ||
| 183 | - | ||
| 184 | ---- | ||
| 185 | - | ||
| 186 | -## 推荐方案 | ||
| 187 | - | ||
| 188 | -根据你的情况,我**推荐方案 A**,原因如下: | ||
| 189 | - | ||
| 190 | -1. **你的需求明确**:修改 CN-CLIP 配置,提供推理服务 | ||
| 191 | -2. **不需要上游更新**:CLIP-as-service 已经很稳定 | ||
| 192 | -3. **最简单直接**:不需要维护额外的仓库 | ||
| 193 | -4. **易于管理**:所有代码在一个地方 | ||
| 194 | - | ||
| 195 | -## 执行方案 A 的详细步骤 | ||
| 196 | - | ||
| 197 | -```bash | ||
| 198 | -#!/bin/bash | ||
| 199 | -# 这个脚本会自动执行方案 A 的所有步骤 | ||
| 200 | - | ||
| 201 | -set -e | ||
| 202 | - | ||
| 203 | -echo "==========================================" | ||
| 204 | -echo "将 clip-as-service 从子模块转为普通目录" | ||
| 205 | -echo "==========================================" | ||
| 206 | - | ||
| 207 | -cd /data/tw/SearchEngine | ||
| 208 | - | ||
| 209 | -# 1. 备份当前状态(可选) | ||
| 210 | -echo "步骤 1: 备份当前配置..." | ||
| 211 | -if [ -f ".gitmodules" ]; then | ||
| 212 | - cp .gitmodules .gitmodules.backup | ||
| 213 | - echo "✓ 已备份 .gitmodules" | ||
| 214 | -fi | ||
| 215 | - | ||
| 216 | -# 2. 移除 Git 对子模块的追踪 | ||
| 217 | -echo "步骤 2: 移除子模块追踪..." | ||
| 218 | -git rm --cached third-party/clip-as-service || true | ||
| 219 | - | ||
| 220 | -# 3. 删除 .gitmodules 中的子模块配置 | ||
| 221 | -echo "步骤 3: 清理 .gitmodules..." | ||
| 222 | -if [ -f ".gitmodules" ]; then | ||
| 223 | - # 使用 sed 删除 clip-as-service 相关的配置块 | ||
| 224 | - sed -i '/clip-as-service/,+3 d' .gitmodules | ||
| 225 | - | ||
| 226 | - # 如果文件为空,删除它 | ||
| 227 | - if [ ! -s ".gitmodules" ]; then | ||
| 228 | - rm .gitmodules | ||
| 229 | - echo "✓ 已删除空的 .gitmodules 文件" | ||
| 230 | - else | ||
| 231 | - echo "✓ 已更新 .gitmodules" | ||
| 232 | - fi | ||
| 233 | -fi | ||
| 234 | - | ||
| 235 | -# 4. 删除子模块的 Git 仓库 | ||
| 236 | -echo "步骤 4: 删除子模块 Git 仓库..." | ||
| 237 | -rm -rf third-party/clip-as-service/.git | ||
| 238 | -echo "✓ 已删除子模块 Git 仓库" | ||
| 239 | - | ||
| 240 | -# 5. 将子模块目录作为普通目录添加 | ||
| 241 | -echo "步骤 5: 将目录添加到 Git..." | ||
| 242 | -git add third-party/clip-as-service/ | ||
| 243 | -git add .gitmodules 2>/dev/null || true | ||
| 244 | -echo "✓ 已添加到 Git" | ||
| 245 | - | ||
| 246 | -# 6. 显示状态 | ||
| 247 | -echo "" | ||
| 248 | -echo "==========================================" | ||
| 249 | -echo "操作完成!当前状态:" | ||
| 250 | -echo "==========================================" | ||
| 251 | -git status | ||
| 252 | - | ||
| 253 | -echo "" | ||
| 254 | -echo "==========================================" | ||
| 255 | -echo "下一步:" | ||
| 256 | -echo "==========================================" | ||
| 257 | -echo "1. 检查修改: git diff --cached" | ||
| 258 | -echo "2. 提交更改: git commit -m 'feat: 将 clip-as-service 从子模块转为普通目录'" | ||
| 259 | -echo "3. 推送到远程: git push origin main" | ||
| 260 | -echo "" | ||
| 261 | -``` | ||
| 262 | - | ||
| 263 | -## 验证转换成功 | ||
| 264 | - | ||
| 265 | -转换后,你应该可以: | ||
| 266 | - | ||
| 267 | -```bash | ||
| 268 | -# 1. 正常查看文件状态 | ||
| 269 | -git status | ||
| 270 | - | ||
| 271 | -# 2. 修改文件后直接添加 | ||
| 272 | -# 编辑 third-party/clip-as-service/server/clip_server/torch-flow.yml | ||
| 273 | -git add third-party/clip-as-service/server/clip_server/torch-flow.yml | ||
| 274 | -git commit -m "config: 更新 CN-CLIP 配置" | ||
| 275 | - | ||
| 276 | -# 3. 推送到远程 | ||
| 277 | -git push origin main | ||
| 278 | - | ||
| 279 | -# 4. 其他人克隆你的仓库后,不需要特殊的子模块命令 | ||
| 280 | -# git clone https://your-repo-url | ||
| 281 | -# cd SearchEngine | ||
| 282 | -# 所有文件都已经存在,包括 third-party/clip-as-service | ||
| 283 | -``` | ||
| 284 | - | ||
| 285 | -## 常见问题 | ||
| 286 | - | ||
| 287 | -**Q: 转换后会增加仓库大小吗?** | ||
| 288 | -A: 会。clip-as-service 大约几十 MB。如果你担心仓库大小,可以使用 Git LFS 或 .gitignore 排除不必要的文件(如模型权重)。 | ||
| 289 | - | ||
| 290 | -**Q: 如何后续获取原仓库的更新?** | ||
| 291 | -A: 使用方案 A 后,需要手动合并更新: | ||
| 292 | -```bash | ||
| 293 | -cd third-party/clip-as-service | ||
| 294 | -git remote add upstream https://github.com/jina-ai/clip-as-service.git | ||
| 295 | -git fetch upstream | ||
| 296 | -git merge upstream/main | ||
| 297 | -``` | ||
| 298 | - | ||
| 299 | -**Q: 我可以回退到子模块吗?** | ||
| 300 | -A: 可以,但会比较复杂。建议在转换前提交一个保存点: | ||
| 301 | -```bash | ||
| 302 | -git branch backup-before-submodule-removal | ||
| 303 | -``` | ||
| 304 | - | ||
| 305 | -**Q: 其他协作者需要注意什么?** | ||
| 306 | -A: 他们需要重新克隆仓库,或者: | ||
| 307 | -```bash | ||
| 308 | -# 删除旧的子模块引用 | ||
| 309 | -git submodule deinit -f third-party/clip-as-service | ||
| 310 | -rm -rf .git/modules/third-party/clip-as-service | ||
| 311 | - | ||
| 312 | -# 重新拉取 | ||
| 313 | -git pull origin main | ||
| 314 | -``` | ||
| 315 | - | ||
| 316 | -## 总结 | ||
| 317 | - | ||
| 318 | -- **推荐**: 方案 A(移除子模块) | ||
| 319 | -- **适用**: 你的使用场景 | ||
| 320 | -- **优势**: 简单、直接、易于管理 | ||
| 321 | -- **成本**: 略微增加仓库大小 | ||
| 322 | - | ||
| 323 | -如果需要帮助执行这些步骤,请告诉我! |
examples/clip_rest_api.py deleted
| @@ -1,254 +0,0 @@ | @@ -1,254 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -CN-CLIP REST API 包装器 | ||
| 4 | - | ||
| 5 | -提供 HTTP 接口,支持 curl 调用 | ||
| 6 | -""" | ||
| 7 | - | ||
| 8 | -from flask import Flask, request, jsonify | ||
| 9 | -from flask_cors import CORS | ||
| 10 | -from clip_client import Client | ||
| 11 | -import numpy as np | ||
| 12 | -import traceback | ||
| 13 | - | ||
| 14 | -app = Flask(__name__) | ||
| 15 | -CORS(app) # 允许跨域请求 | ||
| 16 | - | ||
| 17 | -# 连接到 CN-CLIP 服务 | ||
| 18 | -try: | ||
| 19 | - client = Client('grpc://localhost:51000') | ||
| 20 | - print("✓ 已连接到 CN-CLIP 服务 (grpc://localhost:51000)") | ||
| 21 | -except Exception as e: | ||
| 22 | - print(f"✗ 连接失败: {e}") | ||
| 23 | - print("请先启动 CN-CLIP 服务: ./scripts/start_cnclip_service.sh") | ||
| 24 | - client = None | ||
| 25 | - | ||
| 26 | - | ||
| 27 | -@app.route('/health', methods=['GET']) | ||
| 28 | -def health(): | ||
| 29 | - """健康检查""" | ||
| 30 | - return jsonify({ | ||
| 31 | - 'status': 'ok' if client else 'error', | ||
| 32 | - 'service': 'cnclip-rest-api', | ||
| 33 | - 'backend': 'grpc://localhost:51000' | ||
| 34 | - }) | ||
| 35 | - | ||
| 36 | - | ||
| 37 | -@app.route('/encode/text', methods=['POST']) | ||
| 38 | -def encode_text(): | ||
| 39 | - """ | ||
| 40 | - 编码文本 | ||
| 41 | - | ||
| 42 | - 请求体: | ||
| 43 | - { | ||
| 44 | - "texts": ["文本1", "文本2"] | ||
| 45 | - } | ||
| 46 | - | ||
| 47 | - 返回: | ||
| 48 | - { | ||
| 49 | - "count": 2, | ||
| 50 | - "shape": [2, 1024], | ||
| 51 | - "embeddings": [[...], [...]] | ||
| 52 | - } | ||
| 53 | - """ | ||
| 54 | - if not client: | ||
| 55 | - return jsonify({'error': 'CN-CLIP 服务未连接'}), 503 | ||
| 56 | - | ||
| 57 | - try: | ||
| 58 | - data = request.json | ||
| 59 | - texts = data.get('texts', []) | ||
| 60 | - | ||
| 61 | - if not texts: | ||
| 62 | - return jsonify({'error': '缺少 texts 参数'}), 400 | ||
| 63 | - | ||
| 64 | - # 编码 | ||
| 65 | - embeddings = client.encode(texts) | ||
| 66 | - | ||
| 67 | - return jsonify({ | ||
| 68 | - 'count': len(texts), | ||
| 69 | - 'shape': embeddings.shape.tolist(), | ||
| 70 | - 'embeddings': embeddings.tolist() | ||
| 71 | - }) | ||
| 72 | - | ||
| 73 | - except Exception as e: | ||
| 74 | - print(f"错误: {e}") | ||
| 75 | - print(traceback.format_exc()) | ||
| 76 | - return jsonify({'error': str(e)}), 500 | ||
| 77 | - | ||
| 78 | - | ||
| 79 | -@app.route('/encode/image', methods=['POST']) | ||
| 80 | -def encode_image(): | ||
| 81 | - """ | ||
| 82 | - 编码图像 | ||
| 83 | - | ||
| 84 | - 请求体: | ||
| 85 | - { | ||
| 86 | - "images": ["https://example.com/image.jpg", "/path/to/local.jpg"] | ||
| 87 | - } | ||
| 88 | - | ||
| 89 | - 返回: | ||
| 90 | - { | ||
| 91 | - "count": 2, | ||
| 92 | - "shape": [2, 1024], | ||
| 93 | - "embeddings": [[...], [...]] | ||
| 94 | - } | ||
| 95 | - """ | ||
| 96 | - if not client: | ||
| 97 | - return jsonify({'error': 'CN-CLIP 服务未连接'}), 503 | ||
| 98 | - | ||
| 99 | - try: | ||
| 100 | - data = request.json | ||
| 101 | - images = data.get('images', []) | ||
| 102 | - | ||
| 103 | - if not images: | ||
| 104 | - return jsonify({'error': '缺少 images 参数'}), 400 | ||
| 105 | - | ||
| 106 | - # 编码 | ||
| 107 | - embeddings = client.encode(images) | ||
| 108 | - | ||
| 109 | - return jsonify({ | ||
| 110 | - 'count': len(images), | ||
| 111 | - 'shape': embeddings.shape.tolist(), | ||
| 112 | - 'embeddings': embeddings.tolist() | ||
| 113 | - }) | ||
| 114 | - | ||
| 115 | - except Exception as e: | ||
| 116 | - print(f"错误: {e}") | ||
| 117 | - print(traceback.format_exc()) | ||
| 118 | - return jsonify({'error': str(e)}), 500 | ||
| 119 | - | ||
| 120 | - | ||
| 121 | -@app.route('/encode/mixed', methods=['POST']) | ||
| 122 | -def encode_mixed(): | ||
| 123 | - """ | ||
| 124 | - 混合编码(文本+图像) | ||
| 125 | - | ||
| 126 | - 请求体: | ||
| 127 | - { | ||
| 128 | - "data": ["文本", "https://example.com/image.jpg"] | ||
| 129 | - } | ||
| 130 | - | ||
| 131 | - 返回: | ||
| 132 | - { | ||
| 133 | - "count": 2, | ||
| 134 | - "shape": [2, 1024], | ||
| 135 | - "embeddings": [[...], [...]] | ||
| 136 | - } | ||
| 137 | - """ | ||
| 138 | - if not client: | ||
| 139 | - return jsonify({'error': 'CN-CLIP 服务未连接'}), 503 | ||
| 140 | - | ||
| 141 | - try: | ||
| 142 | - data = request.json | ||
| 143 | - mixed_data = data.get('data', []) | ||
| 144 | - | ||
| 145 | - if not mixed_data: | ||
| 146 | - return jsonify({'error': '缺少 data 参数'}), 400 | ||
| 147 | - | ||
| 148 | - # 编码 | ||
| 149 | - embeddings = client.encode(mixed_data) | ||
| 150 | - | ||
| 151 | - return jsonify({ | ||
| 152 | - 'count': len(mixed_data), | ||
| 153 | - 'shape': embeddings.shape.tolist(), | ||
| 154 | - 'embeddings': embeddings.tolist() | ||
| 155 | - }) | ||
| 156 | - | ||
| 157 | - except Exception as e: | ||
| 158 | - print(f"错误: {e}") | ||
| 159 | - print(traceback.format_exc()) | ||
| 160 | - return jsonify({'error': str(e)}), 500 | ||
| 161 | - | ||
| 162 | - | ||
| 163 | -@app.route('/similarity', methods=['POST']) | ||
| 164 | -def similarity(): | ||
| 165 | - """ | ||
| 166 | - 计算相似度 | ||
| 167 | - | ||
| 168 | - 请求体: | ||
| 169 | - { | ||
| 170 | - "text": "查询文本", | ||
| 171 | - "images": ["url1", "url2"], | ||
| 172 | - "texts": ["文本1", "文本2"] | ||
| 173 | - } | ||
| 174 | - | ||
| 175 | - 返回: | ||
| 176 | - { | ||
| 177 | - "image_similarities": [0.8, 0.3], | ||
| 178 | - "text_similarities": [0.9, 0.2] | ||
| 179 | - } | ||
| 180 | - """ | ||
| 181 | - if not client: | ||
| 182 | - return jsonify({'error': 'CN-CLIP 服务未连接'}), 503 | ||
| 183 | - | ||
| 184 | - try: | ||
| 185 | - data = request.json | ||
| 186 | - query_text = data.get('text', '') | ||
| 187 | - images = data.get('images', []) | ||
| 188 | - texts = data.get('texts', []) | ||
| 189 | - | ||
| 190 | - if not query_text: | ||
| 191 | - return jsonify({'error': '缺少 text 参数'}), 400 | ||
| 192 | - | ||
| 193 | - from sklearn.metrics.pairwise import cosine_similarity | ||
| 194 | - | ||
| 195 | - # 编码查询文本 | ||
| 196 | - query_embedding = client.encode([query_text]) | ||
| 197 | - | ||
| 198 | - result = {} | ||
| 199 | - | ||
| 200 | - # 计算与图像的相似度 | ||
| 201 | - if images: | ||
| 202 | - image_embeddings = client.encode(images) | ||
| 203 | - similarities = cosine_similarity(query_embedding, image_embeddings)[0] | ||
| 204 | - result['image_similarities'] = similarities.tolist() | ||
| 205 | - result['image_urls'] = images | ||
| 206 | - | ||
| 207 | - # 计算与文本的相似度 | ||
| 208 | - if texts: | ||
| 209 | - text_embeddings = client.encode(texts) | ||
| 210 | - similarities = cosine_similarity(query_embedding, text_embeddings)[0] | ||
| 211 | - result['text_similarities'] = similarities.tolist() | ||
| 212 | - result['texts'] = texts | ||
| 213 | - | ||
| 214 | - return jsonify(result) | ||
| 215 | - | ||
| 216 | - except Exception as e: | ||
| 217 | - print(f"错误: {e}") | ||
| 218 | - print(traceback.format_exc()) | ||
| 219 | - return jsonify({'error': str(e)}), 500 | ||
| 220 | - | ||
| 221 | - | ||
| 222 | -@app.errorhandler(404) | ||
| 223 | -def not_found(error): | ||
| 224 | - return jsonify({'error': '接口不存在'}), 404 | ||
| 225 | - | ||
| 226 | - | ||
| 227 | -@app.errorhandler(500) | ||
| 228 | -def internal_error(error): | ||
| 229 | - return jsonify({'error': '服务器内部错误'}), 500 | ||
| 230 | - | ||
| 231 | - | ||
| 232 | -if __name__ == '__main__': | ||
| 233 | - print("\n" + "=" * 60) | ||
| 234 | - print("CN-CLIP REST API 服务") | ||
| 235 | - print("=" * 60) | ||
| 236 | - print("\n服务地址: http://localhost:6000") | ||
| 237 | - print("\n可用接口:") | ||
| 238 | - print(" POST /health - 健康检查") | ||
| 239 | - print(" POST /encode/text - 编码文本") | ||
| 240 | - print(" POST /encode/image - 编码图像") | ||
| 241 | - print(" POST /encode/mixed - 混合编码") | ||
| 242 | - print(" POST /similarity - 计算相似度") | ||
| 243 | - print("\n示例:") | ||
| 244 | - print(" curl http://localhost:6000/health") | ||
| 245 | - print(" curl -X POST http://localhost:6000/encode/text -H 'Content-Type: application/json' -d '{\"texts\": [\"测试文本\"]}'") | ||
| 246 | - print("\n" + "=" * 60) | ||
| 247 | - print() | ||
| 248 | - | ||
| 249 | - app.run( | ||
| 250 | - host='0.0.0.0', | ||
| 251 | - port=6000, | ||
| 252 | - debug=True, | ||
| 253 | - use_reloader=False # 避免重复启动 | ||
| 254 | - ) |
examples/simple_examples.py deleted
| @@ -1,93 +0,0 @@ | @@ -1,93 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -CN-CLIP 简单示例 | ||
| 4 | - | ||
| 5 | -最常用的文本和图像编码示例 | ||
| 6 | -""" | ||
| 7 | - | ||
| 8 | -from clip_client import Client | ||
| 9 | - | ||
| 10 | -# 初始化客户端 | ||
| 11 | -client = Client('grpc://localhost:51000') | ||
| 12 | - | ||
| 13 | -# ============================================================================ | ||
| 14 | -# 示例 1: 编码文本 | ||
| 15 | -# ============================================================================ | ||
| 16 | -print("示例 1: 文本编码") | ||
| 17 | -print("-" * 50) | ||
| 18 | - | ||
| 19 | -texts = ['一只可爱的猫咪', '美丽的高山风景'] | ||
| 20 | -embeddings = client.encode(texts) | ||
| 21 | - | ||
| 22 | -print(f"输入: {texts}") | ||
| 23 | -print(f"输出形状: {embeddings.shape}") # (2, 1024) | ||
| 24 | -print(f"✓ 编码完成\n") | ||
| 25 | - | ||
| 26 | -# ============================================================================ | ||
| 27 | -# 示例 2: 编码图像(URL) | ||
| 28 | -# ============================================================================ | ||
| 29 | -print("示例 2: 图像编码(URL)") | ||
| 30 | -print("-" * 50) | ||
| 31 | - | ||
| 32 | -image_url = "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" | ||
| 33 | -embedding = client.encode([image_url]) | ||
| 34 | - | ||
| 35 | -print(f"输入: {image_url}") | ||
| 36 | -print(f"输出形状: {embedding.shape}") # (1, 1024) | ||
| 37 | -print(f"✓ 编码完成\n") | ||
| 38 | - | ||
| 39 | -# ============================================================================ | ||
| 40 | -# 示例 3: 编码图像(本地路径) | ||
| 41 | -# ============================================================================ | ||
| 42 | -print("示例 3: 图像编码(本地文件)") | ||
| 43 | -print("-" * 50) | ||
| 44 | - | ||
| 45 | -local_image = "/path/to/local/image.jpg" | ||
| 46 | -# embedding = client.encode([local_image]) | ||
| 47 | -print(f"输入: {local_image}") | ||
| 48 | -print(f"用法: client.encode(['{local_image}'])") | ||
| 49 | -print(f"✓ 编码完成\n") | ||
| 50 | - | ||
| 51 | -# ============================================================================ | ||
| 52 | -# 示例 4: 混合编码(文本+图像) | ||
| 53 | -# ============================================================================ | ||
| 54 | -print("示例 4: 混合编码") | ||
| 55 | -print("-" * 50) | ||
| 56 | - | ||
| 57 | -mixed_data = [ | ||
| 58 | - '一只可爱的猫咪', # 文本 | ||
| 59 | - 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg', # 图像URL | ||
| 60 | -] | ||
| 61 | - | ||
| 62 | -embeddings = client.encode(mixed_data) | ||
| 63 | -print(f"输入: {mixed_data}") | ||
| 64 | -print(f"输出形状: {embeddings.shape}") # (2, 1024) | ||
| 65 | -print(f"✓ 编码完成\n") | ||
| 66 | - | ||
| 67 | -# ============================================================================ | ||
| 68 | -# 示例 5: 批量编码 | ||
| 69 | -# ============================================================================ | ||
| 70 | -print("示例 5: 批量编码(推荐)") | ||
| 71 | -print("-" * 50) | ||
| 72 | - | ||
| 73 | -# 一次编码多条数据(更高效) | ||
| 74 | -batch_data = [f"文本 {i}" for i in range(10)] | ||
| 75 | -embeddings = client.encode(batch_data) | ||
| 76 | - | ||
| 77 | -print(f"输入: {len(batch_data)} 条文本") | ||
| 78 | -print(f"输出形状: {embeddings.shape}") # (10, 1024) | ||
| 79 | -print(f"✓ 批量编码完成\n") | ||
| 80 | - | ||
| 81 | -# ============================================================================ | ||
| 82 | -# 重要提示 | ||
| 83 | -# ============================================================================ | ||
| 84 | -print("重要提示:") | ||
| 85 | -print("-" * 50) | ||
| 86 | -print("1. 输入必须是列表: client.encode(['文本']) ✓") | ||
| 87 | -print(" 不是单个字符串: client.encode('文本') ✗") | ||
| 88 | -print() | ||
| 89 | -print("2. 返回值是 numpy 数组,形状为 (N, 1024)") | ||
| 90 | -print(" N = 输入数据的数量") | ||
| 91 | -print() | ||
| 92 | -print("3. 图像支持 URL 和本地文件路径") | ||
| 93 | -print() |
examples/test_cnclip_example.py deleted
| @@ -1,177 +0,0 @@ | @@ -1,177 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -CN-CLIP 快速测试脚本 | ||
| 4 | - | ||
| 5 | -测试文本和图像编码功能 | ||
| 6 | -""" | ||
| 7 | - | ||
| 8 | -from clip_client import Client | ||
| 9 | -from sklearn.metrics.pairwise import cosine_similarity | ||
| 10 | -import numpy as np | ||
| 11 | - | ||
| 12 | -# 测试图片 | ||
| 13 | -TEST_IMAGE = "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" | ||
| 14 | - | ||
| 15 | -# 测试文本 | ||
| 16 | -TEST_TEXTS = [ | ||
| 17 | - "一只可爱的猫咪", | ||
| 18 | - "美丽的高山风景", | ||
| 19 | - "汽车在公路上行驶", | ||
| 20 | - "现代建筑", | ||
| 21 | -] | ||
| 22 | - | ||
| 23 | -def test_connection(): | ||
| 24 | - """测试服务连接""" | ||
| 25 | - print("=" * 60) | ||
| 26 | - print("测试 1: 连接服务") | ||
| 27 | - print("=" * 60) | ||
| 28 | - | ||
| 29 | - try: | ||
| 30 | - client = Client('grpc://localhost:51000') | ||
| 31 | - print("✓ 服务连接成功") | ||
| 32 | - return client | ||
| 33 | - except Exception as e: | ||
| 34 | - print(f"✗ 连接失败: {e}") | ||
| 35 | - print("\n请确保服务已启动:") | ||
| 36 | - print(" ./scripts/start_cnclip_service.sh") | ||
| 37 | - return None | ||
| 38 | - | ||
| 39 | -def test_text_encoding(client): | ||
| 40 | - """测试文本编码""" | ||
| 41 | - print("\n" + "=" * 60) | ||
| 42 | - print("测试 2: 文本编码") | ||
| 43 | - print("=" * 60) | ||
| 44 | - | ||
| 45 | - print(f"\n测试文本:") | ||
| 46 | - for i, text in enumerate(TEST_TEXTS, 1): | ||
| 47 | - print(f" {i}. {text}") | ||
| 48 | - | ||
| 49 | - try: | ||
| 50 | - embeddings = client.encode(TEST_TEXTS) | ||
| 51 | - print(f"\n✓ 文本编码成功") | ||
| 52 | - print(f" 编码数量: {len(embeddings)}") | ||
| 53 | - print(f" 向量形状: {embeddings.shape}") | ||
| 54 | - print(f" 数据类型: {embeddings.dtype}") | ||
| 55 | - print(f" 值域: [{embeddings.min():.4f}, {embeddings.max():.4f}]") | ||
| 56 | - return embeddings | ||
| 57 | - except Exception as e: | ||
| 58 | - print(f"✗ 文本编码失败: {e}") | ||
| 59 | - return None | ||
| 60 | - | ||
| 61 | -def test_image_encoding(client): | ||
| 62 | - """测试图像编码""" | ||
| 63 | - print("\n" + "=" * 60) | ||
| 64 | - print("测试 3: 图像编码") | ||
| 65 | - print("=" * 60) | ||
| 66 | - | ||
| 67 | - print(f"\n测试图片: {TEST_IMAGE}") | ||
| 68 | - | ||
| 69 | - try: | ||
| 70 | - embeddings = client.encode([TEST_IMAGE]) | ||
| 71 | - print(f"\n✓ 图像编码成功") | ||
| 72 | - print(f" 向量形状: {embeddings.shape}") | ||
| 73 | - print(f" 数据类型: {embeddings.dtype}") | ||
| 74 | - print(f" 值域: [{embeddings.min():.4f}, {embeddings.max():.4f}]") | ||
| 75 | - return embeddings | ||
| 76 | - except Exception as e: | ||
| 77 | - print(f"✗ 图像编码失败: {e}") | ||
| 78 | - return None | ||
| 79 | - | ||
| 80 | -def test_image_text_retrieval(client, image_embedding, text_embeddings): | ||
| 81 | - """测试图文检索""" | ||
| 82 | - print("\n" + "=" * 60) | ||
| 83 | - print("测试 4: 图文检索(计算相似度)") | ||
| 84 | - print("=" * 60) | ||
| 85 | - | ||
| 86 | - print(f"\n使用图片搜索最匹配的文本...") | ||
| 87 | - | ||
| 88 | - try: | ||
| 89 | - # 计算相似度 | ||
| 90 | - similarities = cosine_similarity(image_embedding, text_embeddings)[0] | ||
| 91 | - | ||
| 92 | - print(f"\n相似度排序:") | ||
| 93 | - # 按相似度排序 | ||
| 94 | - sorted_indices = np.argsort(similarities)[::-1] | ||
| 95 | - | ||
| 96 | - for rank, idx in enumerate(sorted_indices, 1): | ||
| 97 | - text = TEST_TEXTS[idx] | ||
| 98 | - score = similarities[idx] | ||
| 99 | - bar = "█" * int(score * 50) | ||
| 100 | - print(f" {rank}. {score:.4f} {bar} {text}") | ||
| 101 | - | ||
| 102 | - print(f"\n最佳匹配: {TEST_TEXTS[sorted_indices[0]]}") | ||
| 103 | - print(f"相似度分数: {similarities[sorted_indices[0]]:.4f}") | ||
| 104 | - | ||
| 105 | - return similarities | ||
| 106 | - except Exception as e: | ||
| 107 | - print(f"✗ 相似度计算失败: {e}") | ||
| 108 | - return None | ||
| 109 | - | ||
| 110 | -def test_batch_encoding(client): | ||
| 111 | - """测试批量编码""" | ||
| 112 | - print("\n" + "=" * 60) | ||
| 113 | - print("测试 5: 批量编码性能") | ||
| 114 | - print("=" * 60) | ||
| 115 | - | ||
| 116 | - import time | ||
| 117 | - | ||
| 118 | - # 准备测试数据 | ||
| 119 | - batch_texts = [f"测试文本 {i}" for i in range(50)] | ||
| 120 | - | ||
| 121 | - print(f"\n编码 {len(batch_texts)} 条文本...") | ||
| 122 | - | ||
| 123 | - try: | ||
| 124 | - start = time.time() | ||
| 125 | - embeddings = client.encode(batch_texts) | ||
| 126 | - elapsed = time.time() - start | ||
| 127 | - | ||
| 128 | - print(f"\n✓ 批量编码成功") | ||
| 129 | - print(f" 耗时: {elapsed:.2f}秒") | ||
| 130 | - print(f" 速度: {len(batch_texts)/elapsed:.2f} 条/秒") | ||
| 131 | - print(f" 平均延迟: {elapsed/len(batch_texts)*1000:.2f}ms/条") | ||
| 132 | - | ||
| 133 | - except Exception as e: | ||
| 134 | - print(f"✗ 批量编码失败: {e}") | ||
| 135 | - | ||
| 136 | -def main(): | ||
| 137 | - print("\n" + "=" * 60) | ||
| 138 | - print("CN-CLIP 服务测试") | ||
| 139 | - print("=" * 60) | ||
| 140 | - print(f"\n测试图片: {TEST_IMAGE}") | ||
| 141 | - print(f"服务地址: grpc://localhost:51000") | ||
| 142 | - | ||
| 143 | - # 测试连接 | ||
| 144 | - client = test_connection() | ||
| 145 | - if not client: | ||
| 146 | - return | ||
| 147 | - | ||
| 148 | - # 测试文本编码 | ||
| 149 | - text_embeddings = test_text_encoding(client) | ||
| 150 | - if text_embeddings is None: | ||
| 151 | - return | ||
| 152 | - | ||
| 153 | - # 测试图像编码 | ||
| 154 | - image_embeddings = test_image_encoding(client) | ||
| 155 | - if image_embeddings is None: | ||
| 156 | - return | ||
| 157 | - | ||
| 158 | - # 测试图文检索 | ||
| 159 | - test_image_text_retrieval(client, image_embeddings, text_embeddings) | ||
| 160 | - | ||
| 161 | - # 测试批量编码性能 | ||
| 162 | - test_batch_encoding(client) | ||
| 163 | - | ||
| 164 | - # 总结 | ||
| 165 | - print("\n" + "=" * 60) | ||
| 166 | - print("测试总结") | ||
| 167 | - print("=" * 60) | ||
| 168 | - print("\n✓ 所有测试通过!") | ||
| 169 | - print("\n服务运行正常,可以开始使用。") | ||
| 170 | - print("\n下一步:") | ||
| 171 | - print(" 1. 查看使用文档: cat docs/CNCLIP_USAGE_GUIDE.md") | ||
| 172 | - print(" 2. 集成到你的项目") | ||
| 173 | - print(" 3. 调整服务配置(如需要)") | ||
| 174 | - print() | ||
| 175 | - | ||
| 176 | -if __name__ == '__main__': | ||
| 177 | - main() |
examples/test_curl_examples.sh deleted
| @@ -1,159 +0,0 @@ | @@ -1,159 +0,0 @@ | ||
| 1 | -#!/bin/bash | ||
| 2 | - | ||
| 3 | -############################################################################### | ||
| 4 | -# CN-CLIP REST API 快速测试脚本 | ||
| 5 | -# | ||
| 6 | -# 用途: | ||
| 7 | -# 测试 REST API 的各种功能 | ||
| 8 | -# | ||
| 9 | -# 使用方法: | ||
| 10 | -# ./examples/test_curl_examples.sh | ||
| 11 | -# | ||
| 12 | -############################################################################### | ||
| 13 | - | ||
| 14 | -set -e | ||
| 15 | - | ||
| 16 | -# 颜色定义 | ||
| 17 | -RED='\033[0;31m' | ||
| 18 | -GREEN='\033[0;32m' | ||
| 19 | -YELLOW='\033[1;33m' | ||
| 20 | -BLUE='\033[0;34m' | ||
| 21 | -NC='\033[0m' | ||
| 22 | - | ||
| 23 | -API_URL="http://localhost:51000" | ||
| 24 | -TEST_IMAGE="https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" | ||
| 25 | - | ||
| 26 | -echo -e "${BLUE}========================================${NC}" | ||
| 27 | -echo -e "${BLUE}CN-CLIP REST API 测试${NC}" | ||
| 28 | -echo -e "${BLUE}========================================${NC}" | ||
| 29 | -echo "" | ||
| 30 | - | ||
| 31 | -# 测试 1: 健康检查 | ||
| 32 | -echo -e "${BLUE}测试 1: 健康检查${NC}" | ||
| 33 | -echo "curl ${API_URL}/health" | ||
| 34 | -echo "" | ||
| 35 | - | ||
| 36 | -# 测试 2: 编码文本 | ||
| 37 | -echo -e "${BLUE}测试 2: 编码文本${NC}" | ||
| 38 | -echo "curl -X POST ${API_URL}/encode/text \\" | ||
| 39 | -echo " -H 'Content-Type: application/json' \\" | ||
| 40 | -echo " -d '{\"texts\": [\"一只可爱的猫咪\"]}'" | ||
| 41 | -echo "" | ||
| 42 | - | ||
| 43 | -response=$(curl -s -X POST "${API_URL}/encode/text" \ | ||
| 44 | - -H "Content-Type: application/json" \ | ||
| 45 | - -d '{"texts": ["一只可爱的猫咪"]}') | ||
| 46 | - | ||
| 47 | -echo "$response" | python3 -m json.tool 2>/dev/null | head -20 || echo "$response" | ||
| 48 | - | ||
| 49 | -if echo "$response" | grep -q '"shape": \[1, 1024\]'; then | ||
| 50 | - echo -e "${GREEN}✓ 文本编码成功${NC}" | ||
| 51 | -else | ||
| 52 | - echo -e "${RED}✗ 文本编码失败${NC}" | ||
| 53 | -fi | ||
| 54 | - | ||
| 55 | -echo "" | ||
| 56 | -echo "按 Enter 继续..." | ||
| 57 | -read | ||
| 58 | - | ||
| 59 | -# 测试 3: 编码图像 | ||
| 60 | -echo -e "${BLUE}测试 3: 编码图像${NC}" | ||
| 61 | -echo "curl -X POST ${API_URL}/encode/image \\" | ||
| 62 | -echo " -H 'Content-Type: application/json' \\" | ||
| 63 | -echo " -d '{\"images\": [\"${TEST_IMAGE}\"]}'" | ||
| 64 | -echo "" | ||
| 65 | - | ||
| 66 | -response=$(curl -s -X POST "${API_URL}/encode/image" \ | ||
| 67 | - -H "Content-Type: application/json" \ | ||
| 68 | - -d "{\"images\": [\"${TEST_IMAGE}\"]}") | ||
| 69 | - | ||
| 70 | -echo "$response" | python3 -m json.tool 2>/dev/null | head -20 || echo "$response" | ||
| 71 | - | ||
| 72 | -if echo "$response" | grep -q '"shape": \[1, 1024\]'; then | ||
| 73 | - echo -e "${GREEN}✓ 图像编码成功${NC}" | ||
| 74 | -else | ||
| 75 | - echo -e "${RED}✗ 图像编码失败${NC}" | ||
| 76 | -fi | ||
| 77 | - | ||
| 78 | -echo "" | ||
| 79 | -echo "按 Enter 继续..." | ||
| 80 | -read | ||
| 81 | - | ||
| 82 | -# 测试 4: 批量编码 | ||
| 83 | -echo -e "${BLUE}测试 4: 批量编码${NC}" | ||
| 84 | -echo "curl -X POST ${API_URL}/encode/text \\" | ||
| 85 | -echo " -H 'Content-Type: application/json' \\" | ||
| 86 | -echo " -d '{\"texts\": [\"文本1\", \"文本2\", \"文本3\"]}'" | ||
| 87 | -echo "" | ||
| 88 | - | ||
| 89 | -response=$(curl -s -X POST "${API_URL}/encode/text" \ | ||
| 90 | - -H "Content-Type: application/json" \ | ||
| 91 | - -d '{"texts": ["文本1", "文本2", "文本3"]}') | ||
| 92 | - | ||
| 93 | -echo "$response" | python3 -m json.tool 2>/dev/null | head -20 || echo "$response" | ||
| 94 | - | ||
| 95 | -if echo "$response" | grep -q '"shape": \[3, 1024\]'; then | ||
| 96 | - echo -e "${GREEN}✓ 批量编码成功${NC}" | ||
| 97 | -else | ||
| 98 | - echo -e "${RED}✗ 批量编码失败${NC}" | ||
| 99 | -fi | ||
| 100 | - | ||
| 101 | -echo "" | ||
| 102 | -echo "按 Enter 继续..." | ||
| 103 | -read | ||
| 104 | - | ||
| 105 | -# 测试 5: 相似度计算 | ||
| 106 | -echo -e "${BLUE}测试 5: 相似度计算${NC}" | ||
| 107 | -echo "curl -X POST ${API_URL}/similarity \\" | ||
| 108 | -echo " -H 'Content-Type: application/json' \\" | ||
| 109 | -echo " -d '{\"text\": \"可爱的猫咪\", \"texts\": [\"一只可爱的小猫\", \"美丽的高山\"]}'" | ||
| 110 | -echo "" | ||
| 111 | - | ||
| 112 | -response=$(curl -s -X POST "${API_URL}/similarity" \ | ||
| 113 | - -H "Content-Type: application/json" \ | ||
| 114 | - -d '{"text": "可爱的猫咪", "texts": ["一只可爱的小猫", "美丽的高山"]}') | ||
| 115 | - | ||
| 116 | -echo "$response" | python3 -m json.tool 2>/dev/null || echo "$response" | ||
| 117 | - | ||
| 118 | -if echo "$response" | grep -q '"text_similarities"'; then | ||
| 119 | - echo -e "${GREEN}✓ 相似度计算成功${NC}" | ||
| 120 | -else | ||
| 121 | - echo -e "${RED}✗ 相似度计算失败${NC}" | ||
| 122 | -fi | ||
| 123 | - | ||
| 124 | -echo "" | ||
| 125 | -echo "按 Enter 继续..." | ||
| 126 | -read | ||
| 127 | - | ||
| 128 | -# 测试 6: 混合编码 | ||
| 129 | -echo -e "${BLUE}测试 6: 混合编码(文本+图像)${NC}" | ||
| 130 | -echo "curl -X POST ${API_URL}/encode/mixed \\" | ||
| 131 | -echo " -H 'Content-Type: application/json' \\" | ||
| 132 | -echo " -d '{\"data\": [\"一只可爱的猫咪\", \"${TEST_IMAGE}\"]}'" | ||
| 133 | -echo "" | ||
| 134 | - | ||
| 135 | -response=$(curl -s -X POST "${API_URL}/encode/mixed" \ | ||
| 136 | - -H "Content-Type: application/json" \ | ||
| 137 | - -d "{\"data\": [\"一只可爱的猫咪\", \"${TEST_IMAGE}\"]}") | ||
| 138 | - | ||
| 139 | -echo "$response" | python3 -m json.tool 2>/dev/null | head -20 || echo "$response" | ||
| 140 | - | ||
| 141 | -if echo "$response" | grep -q '"shape": \[2, 1024\]'; then | ||
| 142 | - echo -e "${GREEN}✓ 混合编码成功${NC}" | ||
| 143 | -else | ||
| 144 | - echo -e "${RED}✗ 混合编码失败${NC}" | ||
| 145 | -fi | ||
| 146 | - | ||
| 147 | -# 总结 | ||
| 148 | -echo "" | ||
| 149 | -echo -e "${BLUE}========================================${NC}" | ||
| 150 | -echo -e "${BLUE}测试总结${NC}" | ||
| 151 | -echo -e "${BLUE}========================================${NC}" | ||
| 152 | -echo "" | ||
| 153 | -echo -e "${GREEN}✓ 所有测试完成!${NC}" | ||
| 154 | -echo "" | ||
| 155 | -echo -e "下一步:" | ||
| 156 | -echo " 1. 查看使用文档: cat docs/CNCLIP_CURL_GUIDE.md" | ||
| 157 | -echo " 2. 在你的代码中调用 API" | ||
| 158 | -echo " 3. 集成到你的应用" | ||
| 159 | -echo "" |
scripts/compare_index_mappings.py deleted
| @@ -1,189 +0,0 @@ | @@ -1,189 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -对比不同租户索引的 mapping 结构 | ||
| 4 | -""" | ||
| 5 | - | ||
| 6 | -import os | ||
| 7 | -import sys | ||
| 8 | -import json | ||
| 9 | -from pathlib import Path | ||
| 10 | -from typing import Dict, Any | ||
| 11 | - | ||
| 12 | -sys.path.insert(0, str(Path(__file__).parent.parent)) | ||
| 13 | - | ||
| 14 | -from utils.es_client import get_es_client_from_env | ||
| 15 | - | ||
| 16 | - | ||
| 17 | -def get_field_type(mapping_dict: Dict, field_path: str) -> Dict[str, Any]: | ||
| 18 | - """递归获取字段的 mapping 信息""" | ||
| 19 | - parts = field_path.split('.') | ||
| 20 | - current = mapping_dict | ||
| 21 | - | ||
| 22 | - for part in parts: | ||
| 23 | - if isinstance(current, dict): | ||
| 24 | - current = current.get(part) | ||
| 25 | - if current is None: | ||
| 26 | - return None | ||
| 27 | - else: | ||
| 28 | - return None | ||
| 29 | - return current | ||
| 30 | - | ||
| 31 | - | ||
| 32 | -def compare_mappings(mapping1: Dict[str, Any], mapping2: Dict[str, Any], index1_name: str, index2_name: str): | ||
| 33 | - """对比两个索引的 mapping""" | ||
| 34 | - props1 = mapping1.get('mappings', {}).get('properties', {}) | ||
| 35 | - props2 = mapping2.get('mappings', {}).get('properties', {}) | ||
| 36 | - | ||
| 37 | - all_fields = set(props1.keys()) | set(props2.keys()) | ||
| 38 | - | ||
| 39 | - print(f"\n{'='*80}") | ||
| 40 | - print(f"对比索引映射结构") | ||
| 41 | - print(f"{'='*80}") | ||
| 42 | - print(f"索引1: {index1_name}") | ||
| 43 | - print(f"索引2: {index2_name}") | ||
| 44 | - print(f"{'='*80}\n") | ||
| 45 | - | ||
| 46 | - differences = [] | ||
| 47 | - same_fields = [] | ||
| 48 | - | ||
| 49 | - for field in sorted(all_fields): | ||
| 50 | - field1 = props1.get(field) | ||
| 51 | - field2 = props2.get(field) | ||
| 52 | - | ||
| 53 | - if field1 is None: | ||
| 54 | - differences.append((field, f"只在 {index2_name} 中存在", field2)) | ||
| 55 | - continue | ||
| 56 | - if field2 is None: | ||
| 57 | - differences.append((field, f"只在 {index1_name} 中存在", field1)) | ||
| 58 | - continue | ||
| 59 | - | ||
| 60 | - type1 = field1.get('type') | ||
| 61 | - type2 = field2.get('type') | ||
| 62 | - | ||
| 63 | - if type1 != type2: | ||
| 64 | - differences.append((field, f"类型不同: {index1_name}={type1}, {index2_name}={type2}", (field1, field2))) | ||
| 65 | - else: | ||
| 66 | - same_fields.append((field, type1)) | ||
| 67 | - | ||
| 68 | - # 打印相同的字段 | ||
| 69 | - print(f"✓ 相同字段 ({len(same_fields)} 个):") | ||
| 70 | - for field, field_type in same_fields[:20]: # 只显示前20个 | ||
| 71 | - print(f" - {field}: {field_type}") | ||
| 72 | - if len(same_fields) > 20: | ||
| 73 | - print(f" ... 还有 {len(same_fields) - 20} 个相同字段") | ||
| 74 | - | ||
| 75 | - # 打印不同的字段 | ||
| 76 | - if differences: | ||
| 77 | - print(f"\n✗ 不同字段 ({len(differences)} 个):") | ||
| 78 | - for field, reason, details in differences: | ||
| 79 | - print(f"\n {field}:") | ||
| 80 | - print(f" {reason}") | ||
| 81 | - if isinstance(details, tuple): | ||
| 82 | - print(f" {index1_name}: {json.dumps(details[0], indent=4, ensure_ascii=False)}") | ||
| 83 | - print(f" {index2_name}: {json.dumps(details[1], indent=4, ensure_ascii=False)}") | ||
| 84 | - else: | ||
| 85 | - print(f" 详情: {json.dumps(details, indent=4, ensure_ascii=False)}") | ||
| 86 | - else: | ||
| 87 | - print(f"\n✓ 所有字段类型一致!") | ||
| 88 | - | ||
| 89 | - # 特别检查 tags 字段 | ||
| 90 | - print(f"\n{'='*80}") | ||
| 91 | - print(f"特别检查: tags 字段") | ||
| 92 | - print(f"{'='*80}") | ||
| 93 | - | ||
| 94 | - tags1 = get_field_type(props1, 'tags') | ||
| 95 | - tags2 = get_field_type(props2, 'tags') | ||
| 96 | - | ||
| 97 | - if tags1: | ||
| 98 | - print(f"\n{index1_name}.tags:") | ||
| 99 | - print(f" 类型: {tags1.get('type')}") | ||
| 100 | - print(f" 完整定义: {json.dumps(tags1, indent=2, ensure_ascii=False)}") | ||
| 101 | - else: | ||
| 102 | - print(f"\n{index1_name}.tags: 不存在") | ||
| 103 | - | ||
| 104 | - if tags2: | ||
| 105 | - print(f"\n{index2_name}.tags:") | ||
| 106 | - print(f" 类型: {tags2.get('type')}") | ||
| 107 | - print(f" 完整定义: {json.dumps(tags2, indent=2, ensure_ascii=False)}") | ||
| 108 | - else: | ||
| 109 | - print(f"\n{index2_name}.tags: 不存在") | ||
| 110 | - | ||
| 111 | - | ||
| 112 | -def main(): | ||
| 113 | - import argparse | ||
| 114 | - | ||
| 115 | - parser = argparse.ArgumentParser(description='对比 Elasticsearch 索引的 mapping 结构') | ||
| 116 | - parser.add_argument('index1', help='第一个索引名称 (例如: search_products_tenant_171)') | ||
| 117 | - parser.add_argument('index2', nargs='?', help='第二个索引名称 (例如: search_products_tenant_162)') | ||
| 118 | - parser.add_argument('--list', action='store_true', help='列出所有以 index1 为前缀的索引') | ||
| 119 | - | ||
| 120 | - args = parser.parse_args() | ||
| 121 | - | ||
| 122 | - # 连接 ES | ||
| 123 | - try: | ||
| 124 | - es_client = get_es_client_from_env() | ||
| 125 | - if not es_client.ping(): | ||
| 126 | - print("✗ 无法连接到 Elasticsearch") | ||
| 127 | - return 1 | ||
| 128 | - print("✓ Elasticsearch 连接成功\n") | ||
| 129 | - except Exception as e: | ||
| 130 | - print(f"✗ 连接 Elasticsearch 失败: {e}") | ||
| 131 | - return 1 | ||
| 132 | - | ||
| 133 | - # 如果指定了 --list,列出所有匹配的索引 | ||
| 134 | - if args.list or not args.index2: | ||
| 135 | - try: | ||
| 136 | - # 使用 cat API 列出所有索引 | ||
| 137 | - indices = es_client.client.cat.indices(format='json') | ||
| 138 | - matching_indices = [idx['index'] for idx in indices if idx['index'].startswith(args.index1)] | ||
| 139 | - | ||
| 140 | - if matching_indices: | ||
| 141 | - print(f"找到 {len(matching_indices)} 个匹配的索引:") | ||
| 142 | - for idx in sorted(matching_indices): | ||
| 143 | - print(f" - {idx}") | ||
| 144 | - return 0 | ||
| 145 | - else: | ||
| 146 | - print(f"未找到以 '{args.index1}' 开头的索引") | ||
| 147 | - return 1 | ||
| 148 | - except Exception as e: | ||
| 149 | - print(f"✗ 列出索引失败: {e}") | ||
| 150 | - return 1 | ||
| 151 | - | ||
| 152 | - # 获取两个索引的 mapping | ||
| 153 | - index1 = args.index1 | ||
| 154 | - index2 = args.index2 | ||
| 155 | - | ||
| 156 | - print(f"正在获取索引映射...") | ||
| 157 | - print(f" 索引1: {index1}") | ||
| 158 | - print(f" 索引2: {index2}\n") | ||
| 159 | - | ||
| 160 | - # 检查索引是否存在 | ||
| 161 | - if not es_client.index_exists(index1): | ||
| 162 | - print(f"✗ 索引 '{index1}' 不存在") | ||
| 163 | - return 1 | ||
| 164 | - | ||
| 165 | - if not es_client.index_exists(index2): | ||
| 166 | - print(f"✗ 索引 '{index2}' 不存在") | ||
| 167 | - return 1 | ||
| 168 | - | ||
| 169 | - # 获取 mapping | ||
| 170 | - mapping1 = es_client.get_mapping(index1) | ||
| 171 | - mapping2 = es_client.get_mapping(index2) | ||
| 172 | - | ||
| 173 | - if not mapping1 or index1 not in mapping1: | ||
| 174 | - print(f"✗ 无法获取索引 '{index1}' 的映射") | ||
| 175 | - return 1 | ||
| 176 | - | ||
| 177 | - if not mapping2 or index2 not in mapping2: | ||
| 178 | - print(f"✗ 无法获取索引 '{index2}' 的映射") | ||
| 179 | - return 1 | ||
| 180 | - | ||
| 181 | - # 对比 mapping | ||
| 182 | - compare_mappings(mapping1[index1], mapping2[index2], index1, index2) | ||
| 183 | - | ||
| 184 | - return 0 | ||
| 185 | - | ||
| 186 | - | ||
| 187 | -if __name__ == '__main__': | ||
| 188 | - sys.exit(main()) | ||
| 189 | - |
scripts/remove_clip_submodule.sh deleted
| @@ -1,210 +0,0 @@ | @@ -1,210 +0,0 @@ | ||
| 1 | -#!/bin/bash | ||
| 2 | - | ||
| 3 | -############################################################################### | ||
| 4 | -# 将 clip-as-service 从 Git 子模块转为普通目录 | ||
| 5 | -# | ||
| 6 | -# 用途: | ||
| 7 | -# 移除 Git 子模块配置,将 clip-as-service 直接纳入主项目管理 | ||
| 8 | -# | ||
| 9 | -# 使用方法: | ||
| 10 | -# ./scripts/remove_clip_submodule.sh | ||
| 11 | -# | ||
| 12 | -# 注意: | ||
| 13 | -# - 此操作会修改 Git 配置,请确保已提交当前更改 | ||
| 14 | -# - 建议先创建备份分支 | ||
| 15 | -# - 执行前请先阅读 docs/GIT_SUBMODULE_GUIDE.md | ||
| 16 | -# | ||
| 17 | -############################################################################### | ||
| 18 | - | ||
| 19 | -set -e # 遇到错误立即退出 | ||
| 20 | - | ||
| 21 | -# 颜色定义 | ||
| 22 | -RED='\033[0;31m' | ||
| 23 | -GREEN='\033[0;32m' | ||
| 24 | -YELLOW='\033[1;33m' | ||
| 25 | -BLUE='\033[0;34m' | ||
| 26 | -NC='\033[0m' # No Color | ||
| 27 | - | ||
| 28 | -# 项目路径 | ||
| 29 | -PROJECT_ROOT="/data/tw/SearchEngine" | ||
| 30 | - | ||
| 31 | -echo -e "${BLUE}========================================${NC}" | ||
| 32 | -echo -e "${BLUE}将 clip-as-service 从子模块转为普通目录${NC}" | ||
| 33 | -echo -e "${BLUE}========================================${NC}" | ||
| 34 | -echo "" | ||
| 35 | - | ||
| 36 | -# 检查是否在项目根目录 | ||
| 37 | -if [ ! -f "${PROJECT_ROOT}/.git" ] && [ ! -d "${PROJECT_ROOT}/.git" ]; then | ||
| 38 | - echo -e "${RED}错误: 当前目录不是 Git 仓库根目录${NC}" | ||
| 39 | - echo -e "${YELLOW}请在项目根目录运行此脚本${NC}" | ||
| 40 | - exit 1 | ||
| 41 | -fi | ||
| 42 | - | ||
| 43 | -cd "${PROJECT_ROOT}" | ||
| 44 | - | ||
| 45 | -# 检查是否有未提交的更改 | ||
| 46 | -if [ -n "$(git status --porcelain)" ]; then | ||
| 47 | - echo -e "${RED}错误: Git 工作区有未提交的更改${NC}" | ||
| 48 | - echo -e "${YELLOW}请先提交或暂存所有更改${NC}" | ||
| 49 | - echo "" | ||
| 50 | - echo "未提交的文件:" | ||
| 51 | - git status --short | ||
| 52 | - echo "" | ||
| 53 | - echo -e "${YELLOW}建议操作:${NC}" | ||
| 54 | - echo " 1. 提交更改: git add . && git commit -m '保存点'" | ||
| 55 | - echo " 2. 或创建备份分支: git branch backup-before-submodule-removal" | ||
| 56 | - exit 1 | ||
| 57 | -fi | ||
| 58 | - | ||
| 59 | -# 确认操作 | ||
| 60 | -echo -e "${YELLOW}此操作将会:${NC}" | ||
| 61 | -echo " 1. 移除 clip-as-service 的子模块配置" | ||
| 62 | -echo " 2. 将其作为普通目录纳入 Git 管理" | ||
| 63 | -echo " 3. 更新 .gitmodules 文件" | ||
| 64 | -echo "" | ||
| 65 | -echo -e "${YELLOW}请确保已阅读: docs/GIT_SUBMODULE_GUIDE.md${NC}" | ||
| 66 | -echo "" | ||
| 67 | -read -p "$(echo -e ${YELLOW}是否继续? [y/N]: ${NC})" -n 1 -r | ||
| 68 | -echo | ||
| 69 | -if [[ ! $REPLY =~ ^[Yy]$ ]]; then | ||
| 70 | - echo -e "${YELLOW}已取消操作${NC}" | ||
| 71 | - exit 0 | ||
| 72 | -fi | ||
| 73 | - | ||
| 74 | -# 创建备份分支 | ||
| 75 | -BACKUP_BRANCH="backup-before-submodule-removal-$(date +%Y%m%d-%H%M%S)" | ||
| 76 | -echo -e "${BLUE}创建备份分支: ${BACKUP_BRANCH}${NC}" | ||
| 77 | -git branch "${BACKUP_BRANCH}" | ||
| 78 | -echo -e "${GREEN}✓ 备份分支已创建${NC}" | ||
| 79 | -echo "" | ||
| 80 | - | ||
| 81 | -# 步骤 1: 备份 .gitmodules | ||
| 82 | -echo -e "${BLUE}步骤 1: 备份配置文件...${NC}" | ||
| 83 | -if [ -f ".gitmodules" ]; then | ||
| 84 | - cp .gitmodules .gitmodules.backup | ||
| 85 | - echo -e "${GREEN}✓ 已备份 .gitmodules → .gitmodules.backup${NC}" | ||
| 86 | -fi | ||
| 87 | - | ||
| 88 | -# 步骤 2: 移除子模块追踪 | ||
| 89 | -echo -e "${BLUE}步骤 2: 移除子模块追踪...${NC}" | ||
| 90 | -if git ls-files --stage | grep -q "160000.*third-party/clip-as-service"; then | ||
| 91 | - git rm --cached third-party/clip-as-service | ||
| 92 | - echo -e "${GREEN}✓ 已移除子模块缓存${NC}" | ||
| 93 | -else | ||
| 94 | - echo -e "${YELLOW}⚠ 子模块未在 Git 索引中,跳过此步骤${NC}" | ||
| 95 | -fi | ||
| 96 | - | ||
| 97 | -# 步骤 3: 清理 .gitmodules | ||
| 98 | -echo -e "${BLUE}步骤 3: 清理 .gitmodules...${NC}" | ||
| 99 | -if [ -f ".gitmodules" ]; then | ||
| 100 | - # 删除 clip-as-service 相关的配置块 | ||
| 101 | - # 从 [submodule "third-party/clip-as-service"] 到下一个空行或文件末尾 | ||
| 102 | - sed -i '/\[submodule "third-party\/clip-as-service"\]/,/\s*$/d' .gitmodules | ||
| 103 | - | ||
| 104 | - # 如果文件为空或只包含空行,删除它 | ||
| 105 | - if [ ! -s ".gitmodules" ] || [ $(wc -l < .gitmodules) -eq 0 ]; then | ||
| 106 | - rm .gitmodules | ||
| 107 | - echo -e "${GREEN}✓ 已删除空的 .gitmodules 文件${NC}" | ||
| 108 | - else | ||
| 109 | - echo -e "${GREEN}✓ 已更新 .gitmodules${NC}" | ||
| 110 | - fi | ||
| 111 | -else | ||
| 112 | - echo -e "${YELLOW}⚠ .gitmodules 文件不存在,跳过此步骤${NC}" | ||
| 113 | -fi | ||
| 114 | - | ||
| 115 | -# 步骤 4: 删除子模块的 Git 仓库 | ||
| 116 | -echo -e "${BLUE}步骤 4: 删除子模块 Git 仓库...${NC}" | ||
| 117 | -if [ -d "third-party/clip-as-service/.git" ]; then | ||
| 118 | - rm -rf third-party/clip-as-service/.git | ||
| 119 | - echo -e "${GREEN}✓ 已删除子模块 Git 仓库${NC}" | ||
| 120 | -else | ||
| 121 | - echo -e "${YELLOW}⚠ 子模块 Git 仓库不存在,跳过此步骤${NC}" | ||
| 122 | -fi | ||
| 123 | - | ||
| 124 | -# 步骤 5: 清理 Git 模块缓存 | ||
| 125 | -echo -e "${BLUE}步骤 5: 清理 Git 模块缓存...${NC}" | ||
| 126 | -if [ -d ".git/modules/third-party/clip-as-service" ]; then | ||
| 127 | - rm -rf .git/modules/third-party/clip-as-service | ||
| 128 | - echo -e "${GREEN}✓ 已清理 Git 模块缓存${NC}" | ||
| 129 | -else | ||
| 130 | - echo -e "${YELLOW}⚠ Git 模块缓存不存在,跳过此步骤${NC}" | ||
| 131 | -fi | ||
| 132 | - | ||
| 133 | -# 步骤 6: 将目录作为普通文件添加 | ||
| 134 | -echo -e "${BLUE}步骤 6: 将目录添加到 Git...${NC}" | ||
| 135 | -git add third-party/clip-as-service/ | ||
| 136 | -if [ -f ".gitmodules" ]; then | ||
| 137 | - git add .gitmodules | ||
| 138 | -fi | ||
| 139 | -echo -e "${GREEN}✓ 已添加到 Git${NC}" | ||
| 140 | - | ||
| 141 | -# 显示状态 | ||
| 142 | -echo "" | ||
| 143 | -echo -e "${BLUE}========================================${NC}" | ||
| 144 | -echo -e "${BLUE}操作完成!当前状态:${NC}" | ||
| 145 | -echo -e "${BLUE}========================================${NC}" | ||
| 146 | -echo "" | ||
| 147 | -git status | ||
| 148 | -echo "" | ||
| 149 | - | ||
| 150 | -# 统计信息 | ||
| 151 | -echo -e "${BLUE}统计信息:${NC}" | ||
| 152 | -FILE_COUNT=$(find third-party/clip-as-service -type f | wc -l) | ||
| 153 | -DIR_SIZE=$(du -sh third-party/clip-as-service | cut -f1) | ||
| 154 | -echo " 文件数量: ${FILE_COUNT}" | ||
| 155 | -echo " 目录大小: ${DIR_SIZE}" | ||
| 156 | -echo "" | ||
| 157 | - | ||
| 158 | -# 下一步提示 | ||
| 159 | -echo -e "${GREEN}========================================${NC}" | ||
| 160 | -echo -e "${GREEN}✓ 子模块转换完成!${NC}" | ||
| 161 | -echo -e "${GREEN}========================================${NC}" | ||
| 162 | -echo "" | ||
| 163 | -echo -e "${BLUE}下一步操作:${NC}" | ||
| 164 | -echo "" | ||
| 165 | -echo "1. 查看将要提交的更改:" | ||
| 166 | -echo " git diff --cached --stat" | ||
| 167 | -echo "" | ||
| 168 | -echo "2. 提交更改:" | ||
| 169 | -echo " git commit -m 'feat: 将 clip-as-service 从子模块转为普通目录'" | ||
| 170 | -echo "" | ||
| 171 | -echo "3. 推送到远程:" | ||
| 172 | -echo " git push origin main" | ||
| 173 | -echo "" | ||
| 174 | -echo -e "${BLUE}注意事项:${NC}" | ||
| 175 | -echo " - 备份分支: ${BACKUP_BRANCH}" | ||
| 176 | -echo " - 如需回退: git reset --hard ${BACKUP_BRANCH}" | ||
| 177 | -echo " - 配置备份: .gitmodules.backup" | ||
| 178 | -echo "" | ||
| 179 | -echo -e "${BLUE}验证转换:${NC}" | ||
| 180 | -echo " cd third-party/clip-as-service" | ||
| 181 | -echo " git status # 应该显示 'not a git repository'" | ||
| 182 | -echo "" | ||
| 183 | - | ||
| 184 | -# 提示查看详细文档 | ||
| 185 | -echo -e "${YELLOW}📖 详细文档: docs/GIT_SUBMODULE_GUIDE.md${NC}" | ||
| 186 | -echo "" | ||
| 187 | - | ||
| 188 | -# 询问是否立即提交 | ||
| 189 | -read -p "$(echo -e ${YELLOW}是否立即提交更改? [Y/n]: ${NC})" -n 1 -r | ||
| 190 | -echo | ||
| 191 | -if [[ $REPLY =~ ^[Yy]$ ]] || [ -z "$REPLY" ]; then | ||
| 192 | - echo -e "${BLUE}正在提交更改...${NC}" | ||
| 193 | - git commit -m "feat: 将 clip-as-service 从子模块转为普通目录 | ||
| 194 | - | ||
| 195 | -- 移除子模块配置,改为普通目录 | ||
| 196 | -- 便于自定义配置和管理 | ||
| 197 | -- 备份分支: ${BACKUP_BRANCH} | ||
| 198 | -" | ||
| 199 | - echo -e "${GREEN}✓ 已提交${NC}" | ||
| 200 | - echo "" | ||
| 201 | - echo -e "${YELLOW}现在可以推送到远程仓库:${NC}" | ||
| 202 | - echo " git push origin main" | ||
| 203 | -else | ||
| 204 | - echo -e "${YELLOW}跳过提交${NC}" | ||
| 205 | - echo -e "${YELLOW}你可以稍后手动提交:${NC}" | ||
| 206 | - echo " git commit -m 'feat: 将 clip-as-service 从子模块转为普通目录'" | ||
| 207 | -fi | ||
| 208 | - | ||
| 209 | -echo "" | ||
| 210 | -echo -e "${GREEN}所有操作完成!${NC}" |
scripts/start_clip_api.sh deleted
| @@ -1,154 +0,0 @@ | @@ -1,154 +0,0 @@ | ||
| 1 | -#!/bin/bash | ||
| 2 | - | ||
| 3 | -############################################################################### | ||
| 4 | -# CN-CLIP REST API 启动脚本 | ||
| 5 | -# | ||
| 6 | -# 用途: | ||
| 7 | -# 启动 REST API 服务,提供 HTTP 接口供 curl 调用 | ||
| 8 | -# | ||
| 9 | -# 使用方法: | ||
| 10 | -# ./scripts/start_clip_api.sh | ||
| 11 | -# | ||
| 12 | -############################################################################### | ||
| 13 | - | ||
| 14 | -set -e | ||
| 15 | - | ||
| 16 | -# 颜色定义 | ||
| 17 | -RED='\033[0;31m' | ||
| 18 | -GREEN='\033[0;32m' | ||
| 19 | -YELLOW='\033[1;33m' | ||
| 20 | -BLUE='\033[0;34m' | ||
| 21 | -NC='\033[0m' | ||
| 22 | - | ||
| 23 | -# 项目路径 | ||
| 24 | -PROJECT_ROOT="/data/tw/SearchEngine" | ||
| 25 | -API_SCRIPT="${PROJECT_ROOT}/examples/clip_rest_api.py" | ||
| 26 | -PID_FILE="${PROJECT_ROOT}/logs/clip_rest_api.pid" | ||
| 27 | -LOG_FILE="${PROJECT_ROOT}/logs/clip_rest_api.log" | ||
| 28 | - | ||
| 29 | -echo -e "${BLUE}========================================${NC}" | ||
| 30 | -echo -e "${BLUE}启动 CN-CLIP REST API 服务${NC}" | ||
| 31 | -echo -e "${BLUE}========================================${NC}" | ||
| 32 | -echo "" | ||
| 33 | - | ||
| 34 | -# 检查 API 脚本 | ||
| 35 | -if [ ! -f "${API_SCRIPT}" ]; then | ||
| 36 | - echo -e "${RED}错误: API 脚本不存在: ${API_SCRIPT}${NC}" | ||
| 37 | - exit 1 | ||
| 38 | -fi | ||
| 39 | - | ||
| 40 | -# 创建日志目录 | ||
| 41 | -mkdir -p "$(dirname "${LOG_FILE}")" | ||
| 42 | - | ||
| 43 | -# 检查是否已有服务运行 | ||
| 44 | -if [ -f "${PID_FILE}" ]; then | ||
| 45 | - OLD_PID=$(cat "${PID_FILE}") | ||
| 46 | - if ps -p ${OLD_PID} > /dev/null 2>&1; then | ||
| 47 | - echo -e "${YELLOW}警告: REST API 服务已在运行 (PID: ${OLD_PID})${NC}" | ||
| 48 | - echo -e "${YELLOW}如需重启,请先运行: ./scripts/stop_clip_api.sh${NC}" | ||
| 49 | - exit 0 | ||
| 50 | - else | ||
| 51 | - echo -e "${YELLOW}清理旧的 PID 文件${NC}" | ||
| 52 | - rm -f "${PID_FILE}" | ||
| 53 | - fi | ||
| 54 | -fi | ||
| 55 | - | ||
| 56 | -# 检查端口是否被占用 | ||
| 57 | -if lsof -Pi :6000 -sTCP:LISTEN -t >/dev/null 2>&1; then | ||
| 58 | - echo -e "${RED}错误: 端口 6000 已被占用${NC}" | ||
| 59 | - echo -e "${YELLOW}请检查是否有其他服务正在使用该端口${NC}" | ||
| 60 | - lsof -i :6000 | grep LISTEN || true | ||
| 61 | - exit 1 | ||
| 62 | -fi | ||
| 63 | - | ||
| 64 | -# 检查 conda 环境 | ||
| 65 | -if [ -z "${CONDA_DEFAULT_ENV}" ] || [ "${CONDA_DEFAULT_ENV}" != "clip_service" ]; then | ||
| 66 | - echo -e "${YELLOW}激活 clip_service 环境...${NC}" | ||
| 67 | - if [ -f "/home/tw/miniconda3/etc/profile.d/conda.sh" ]; then | ||
| 68 | - source "/home/tw/miniconda3/etc/profile.d/conda.sh" | ||
| 69 | - conda activate clip_service | ||
| 70 | - else | ||
| 71 | - echo -e "${RED}错误: 无法找到 conda${NC}" | ||
| 72 | - exit 1 | ||
| 73 | - fi | ||
| 74 | -fi | ||
| 75 | - | ||
| 76 | -# 检查依赖 | ||
| 77 | -echo -e "${BLUE}检查依赖...${NC}" | ||
| 78 | -python -c "import flask" 2>/dev/null || { | ||
| 79 | - echo -e "${YELLOW}Flask 未安装,正在安装...${NC}" | ||
| 80 | - pip install flask flask-cors scikit-learn | ||
| 81 | -} | ||
| 82 | - | ||
| 83 | -# 检查 CN-CLIP 服务 | ||
| 84 | -echo -e "${BLUE}检查 CN-CLIP 服务...${NC}" | ||
| 85 | -if ! ps aux | grep "clip_server" | grep -v grep > /dev/null; then | ||
| 86 | - echo -e "${YELLOW}警告: CN-CLIP 服务未运行${NC}" | ||
| 87 | - echo -e "${YELLOW}请先启动: ./scripts/start_cnclip_service.sh${NC}" | ||
| 88 | - read -p "$(echo -e ${YELLOW}是否继续启动 REST API? [y/N]: ${NC})" -n 1 -r | ||
| 89 | - echo | ||
| 90 | - if [[ ! $REPLY =~ ^[Yy]$ ]]; then | ||
| 91 | - exit 0 | ||
| 92 | - fi | ||
| 93 | -fi | ||
| 94 | - | ||
| 95 | -# 启动 REST API 服务 | ||
| 96 | -echo -e "${BLUE}正在启动 REST API 服务...${NC}" | ||
| 97 | -cd "${PROJECT_ROOT}" | ||
| 98 | - | ||
| 99 | -nohup python "${API_SCRIPT}" > "${LOG_FILE}" 2>&1 & | ||
| 100 | -API_PID=$! | ||
| 101 | -echo ${API_PID} > "${PID_FILE}" | ||
| 102 | - | ||
| 103 | -# 等待服务启动 | ||
| 104 | -echo -e "${YELLOW}等待服务启动...${NC}" | ||
| 105 | -sleep 3 | ||
| 106 | - | ||
| 107 | -# 检查服务是否启动成功 | ||
| 108 | -if ps -p ${API_PID} > /dev/null 2>&1; then | ||
| 109 | - # 检查端口是否监听 | ||
| 110 | - if lsof -Pi :6000 -sTCP:LISTEN -t >/dev/null 2>&1; then | ||
| 111 | - echo -e "${GREEN}========================================${NC}" | ||
| 112 | - echo -e "${GREEN}✓ REST API 服务启动成功!${NC}" | ||
| 113 | - echo -e "${GREEN}========================================${NC}" | ||
| 114 | - echo "" | ||
| 115 | - echo -e "服务信息:" | ||
| 116 | - echo -e " PID: ${API_PID}" | ||
| 117 | - echo -e " 端口: 6000" | ||
| 118 | - echo -e " 日志文件: ${LOG_FILE}" | ||
| 119 | - echo "" | ||
| 120 | - echo -e "测试服务:" | ||
| 121 | - echo -e " curl http://localhost:6000/health" | ||
| 122 | - echo "" | ||
| 123 | - echo -e "使用示例:" | ||
| 124 | - echo -e " # 编码文本" | ||
| 125 | - echo -e " curl -X POST http://localhost:6000/encode/text \\" | ||
| 126 | - echo -e " -H 'Content-Type: application/json' \\" | ||
| 127 | - echo -e " -d '{\"texts\": [\"测试文本\"]}'" | ||
| 128 | - echo "" | ||
| 129 | - echo -e " # 编码图像" | ||
| 130 | - echo -e " curl -X POST http://localhost:6000/encode/image \\" | ||
| 131 | - echo -e " -H 'Content-Type: application/json' \\" | ||
| 132 | - echo -e " -d '{\"images\": [\"https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg\"]}'" | ||
| 133 | - echo "" | ||
| 134 | - echo -e "查看日志:" | ||
| 135 | - echo -e " tail -f ${LOG_FILE}" | ||
| 136 | - echo "" | ||
| 137 | - echo -e "停止服务:" | ||
| 138 | - echo -e " ./scripts/stop_clip_api.sh" | ||
| 139 | - echo "" | ||
| 140 | - else | ||
| 141 | - echo -e "${YELLOW}服务已启动,但端口尚未监听,请稍候...${NC}" | ||
| 142 | - echo -e "${YELLOW}查看日志: tail -f ${LOG_FILE}${NC}" | ||
| 143 | - fi | ||
| 144 | -else | ||
| 145 | - echo -e "${RED}========================================${NC}" | ||
| 146 | - echo -e "${RED}✗ 服务启动失败!${NC}" | ||
| 147 | - echo -e "${RED}========================================${NC}" | ||
| 148 | - echo "" | ||
| 149 | - echo -e "请查看日志获取详细错误信息:" | ||
| 150 | - echo -e " tail -f ${LOG_FILE}" | ||
| 151 | - echo "" | ||
| 152 | - rm -f "${PID_FILE}" | ||
| 153 | - exit 1 | ||
| 154 | -fi |
scripts/start_cnclip_service.sh
| @@ -42,6 +42,7 @@ DEFAULT_BATCH_SIZE=32 | @@ -42,6 +42,7 @@ DEFAULT_BATCH_SIZE=32 | ||
| 42 | DEFAULT_NUM_WORKERS=4 | 42 | DEFAULT_NUM_WORKERS=4 |
| 43 | DEFAULT_DTYPE="float16" | 43 | DEFAULT_DTYPE="float16" |
| 44 | DEFAULT_MODEL_NAME="CN-CLIP/ViT-H-14" | 44 | DEFAULT_MODEL_NAME="CN-CLIP/ViT-H-14" |
| 45 | +# DEFAULT_MODEL_NAME="CN-CLIP/ViT-L-14-336" | ||
| 45 | DEFAULT_REPLICAS=1 # 副本数 | 46 | DEFAULT_REPLICAS=1 # 副本数 |
| 46 | 47 | ||
| 47 | # 项目路径 | 48 | # 项目路径 |
scripts/stop_clip_api.sh deleted
| @@ -1,87 +0,0 @@ | @@ -1,87 +0,0 @@ | ||
| 1 | -#!/bin/bash | ||
| 2 | - | ||
| 3 | -############################################################################### | ||
| 4 | -# CN-CLIP REST API 停止脚本 | ||
| 5 | -# | ||
| 6 | -# 用途: | ||
| 7 | -# 停止 REST API 服务 | ||
| 8 | -# | ||
| 9 | -# 使用方法: | ||
| 10 | -# ./scripts/stop_clip_api.sh | ||
| 11 | -# | ||
| 12 | -############################################################################### | ||
| 13 | - | ||
| 14 | -set -e | ||
| 15 | - | ||
| 16 | -# 颜色定义 | ||
| 17 | -RED='\033[0;31m' | ||
| 18 | -GREEN='\033[0;32m' | ||
| 19 | -YELLOW='\033[1;33m' | ||
| 20 | -BLUE='\033[0;34m' | ||
| 21 | -NC='\033[0m' | ||
| 22 | - | ||
| 23 | -# 项目路径 | ||
| 24 | -PROJECT_ROOT="/data/tw/SearchEngine" | ||
| 25 | -PID_FILE="${PROJECT_ROOT}/logs/clip_rest_api.pid" | ||
| 26 | - | ||
| 27 | -echo -e "${BLUE}========================================${NC}" | ||
| 28 | -echo -e "${BLUE}停止 CN-CLIP REST API 服务${NC}" | ||
| 29 | -echo -e "${BLUE}========================================${NC}" | ||
| 30 | -echo "" | ||
| 31 | - | ||
| 32 | -# 检查 PID 文件 | ||
| 33 | -if [ ! -f "${PID_FILE}" ]; then | ||
| 34 | - echo -e "${YELLOW}警告: 未找到 PID 文件${NC}" | ||
| 35 | - echo -e "${YELLOW}REST API 服务可能未运行${NC}" | ||
| 36 | - | ||
| 37 | - # 尝试查找并终止进程 | ||
| 38 | - if ps aux | grep "clip_rest_api.py" | grep -v grep > /dev/null; then | ||
| 39 | - echo -e "${YELLOW}发现运行中的 API 进程${NC}" | ||
| 40 | - API_PIDS=$(ps aux | grep "clip_rest_api.py" | grep -v grep | awk '{print $2}') | ||
| 41 | - echo -e "${YELLOW}正在终止...${NC}" | ||
| 42 | - for PID in ${API_PIDS}; do | ||
| 43 | - kill ${PID} 2>/dev/null || true | ||
| 44 | - done | ||
| 45 | - sleep 1 | ||
| 46 | - echo -e "${GREEN}✓ 进程已终止${NC}" | ||
| 47 | - fi | ||
| 48 | - exit 0 | ||
| 49 | -fi | ||
| 50 | - | ||
| 51 | -# 读取 PID | ||
| 52 | -PID="$(cat "${PID_FILE}")" | ||
| 53 | - | ||
| 54 | -# 检查进程 | ||
| 55 | -if ps -p "${PID}" > /dev/null 2>&1; then | ||
| 56 | - echo -e "${BLUE}服务信息:${NC}" | ||
| 57 | - echo " PID: ${PID}" | ||
| 58 | - echo "" | ||
| 59 | - | ||
| 60 | - echo -e "${YELLOW}正在停止服务...${NC}" | ||
| 61 | - kill "${PID}" || true | ||
| 62 | - sleep 2 | ||
| 63 | - | ||
| 64 | - # 检查是否还在运行 | ||
| 65 | - if ps -p "${PID}" > /dev/null 2>&1; then | ||
| 66 | - echo -e "${YELLOW}进程仍在运行,强制终止...${NC}" | ||
| 67 | - kill -9 "${PID}" || true | ||
| 68 | - sleep 1 | ||
| 69 | - fi | ||
| 70 | - | ||
| 71 | - # 最终检查 | ||
| 72 | - if ps -p "${PID}" > /dev/null 2>&1; then | ||
| 73 | - echo -e "${RED}错误: 无法停止进程${NC}" | ||
| 74 | - exit 1 | ||
| 75 | - else | ||
| 76 | - echo -e "${GREEN}========================================${NC}" | ||
| 77 | - echo -e "${GREEN}✓ REST API 服务已停止${NC}" | ||
| 78 | - echo -e "${GREEN}========================================${NC}" | ||
| 79 | - fi | ||
| 80 | -else | ||
| 81 | - echo -e "${YELLOW}警告: 进程 ${PID} 不存在${NC}" | ||
| 82 | -fi | ||
| 83 | - | ||
| 84 | -# 清理 PID 文件 | ||
| 85 | -rm -f "${PID_FILE}" | ||
| 86 | -echo "" | ||
| 87 | -echo -e "${GREEN}PID 文件已删除${NC}" |
scripts/stop_cnclip_service.sh
| @@ -89,21 +89,42 @@ echo "" | @@ -89,21 +89,42 @@ echo "" | ||
| 89 | 89 | ||
| 90 | echo -e "${YELLOW}正在停止服务...${NC}" | 90 | echo -e "${YELLOW}正在停止服务...${NC}" |
| 91 | 91 | ||
| 92 | -# 发送 SIGTERM 信号 | ||
| 93 | -kill "${PID}" || true | ||
| 94 | -sleep 2 | ||
| 95 | - | ||
| 96 | -# 检查进程是否还在运行 | ||
| 97 | -if ps -p "${PID}" > /dev/null 2>&1; then | ||
| 98 | - echo -e "${YELLOW}进程仍在运行,发送 SIGKILL...${NC}" | ||
| 99 | - kill -9 "${PID}" || true | 92 | +# 查找所有相关的 clip_server 进程(通过配置文件路径) |
| 93 | +CONFIG_FILE="torch-flow-temp.yml" | ||
| 94 | +ALL_PIDS=$(ps aux | grep "clip_server.*${CONFIG_FILE}" | grep -v grep | awk '{print $2}') | ||
| 95 | + | ||
| 96 | +if [ -z "${ALL_PIDS}" ]; then | ||
| 97 | + # 如果没有找到,至少尝试停止 PID 文件中的进程 | ||
| 98 | + ALL_PIDS="${PID}" | ||
| 99 | +fi | ||
| 100 | + | ||
| 101 | +# 终止所有相关进程 | ||
| 102 | +for P in ${ALL_PIDS}; do | ||
| 103 | + if ps -p "${P}" > /dev/null 2>&1; then | ||
| 104 | + echo -e "${YELLOW}终止进程 ${P}...${NC}" | ||
| 105 | + kill "${P}" 2>/dev/null || true | ||
| 106 | + fi | ||
| 107 | +done | ||
| 108 | + | ||
| 109 | +# 等待进程退出 | ||
| 110 | +sleep 3 | ||
| 111 | + | ||
| 112 | +# 检查是否还有进程在运行,如果有则强制终止 | ||
| 113 | +REMAINING_PIDS=$(ps aux | grep "clip_server.*${CONFIG_FILE}" | grep -v grep | awk '{print $2}') | ||
| 114 | +if [ -n "${REMAINING_PIDS}" ]; then | ||
| 115 | + echo -e "${YELLOW}发现仍在运行的进程,强制终止...${NC}" | ||
| 116 | + for P in ${REMAINING_PIDS}; do | ||
| 117 | + echo -e "${YELLOW}强制终止进程 ${P}...${NC}" | ||
| 118 | + kill -9 "${P}" 2>/dev/null || true | ||
| 119 | + done | ||
| 100 | sleep 1 | 120 | sleep 1 |
| 101 | fi | 121 | fi |
| 102 | 122 | ||
| 103 | # 最终检查 | 123 | # 最终检查 |
| 104 | -if ps -p "${PID}" > /dev/null 2>&1; then | ||
| 105 | - echo -e "${RED}错误: 无法停止进程 ${PID}${NC}" | ||
| 106 | - echo -e "${YELLOW}请手动停止: kill -9 ${PID}${NC}" | 124 | +FINAL_CHECK=$(ps aux | grep "clip_server.*${CONFIG_FILE}" | grep -v grep | wc -l) |
| 125 | +if [ "${FINAL_CHECK}" -gt 0 ]; then | ||
| 126 | + echo -e "${RED}错误: 仍有进程无法停止${NC}" | ||
| 127 | + echo -e "${YELLOW}请手动检查: ps aux | grep clip_server${NC}" | ||
| 107 | exit 1 | 128 | exit 1 |
| 108 | else | 129 | else |
| 109 | echo -e "${GREEN}========================================${NC}" | 130 | echo -e "${GREEN}========================================${NC}" |
scripts/test_cnclip_client.py deleted
| @@ -1,109 +0,0 @@ | @@ -1,109 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -CN-CLIP 服务客户端测试脚本 | ||
| 4 | - | ||
| 5 | -用法: | ||
| 6 | - python scripts/test_cnclip_client.py [--url URL] | ||
| 7 | - | ||
| 8 | -注意:如果服务配置了 protocol: http,必须使用 http:// 而不是 grpc:// | ||
| 9 | -""" | ||
| 10 | - | ||
| 11 | -import sys | ||
| 12 | -import argparse | ||
| 13 | -from pathlib import Path | ||
| 14 | - | ||
| 15 | -# 添加项目路径 | ||
| 16 | -project_root = Path(__file__).parent.parent | ||
| 17 | -sys.path.insert(0, str(project_root)) | ||
| 18 | - | ||
| 19 | -try: | ||
| 20 | - from clip_client import Client | ||
| 21 | -except ImportError: | ||
| 22 | - print("错误: 请先安装 clip-client: pip install clip-client") | ||
| 23 | - sys.exit(1) | ||
| 24 | - | ||
| 25 | - | ||
| 26 | -def test_text_encoding(client): | ||
| 27 | - """测试文本编码""" | ||
| 28 | - print("\n测试文本编码...") | ||
| 29 | - try: | ||
| 30 | - texts = ['这是测试文本', '另一个测试文本'] | ||
| 31 | - result = client.encode(texts) | ||
| 32 | - print(f"✓ 成功! 形状: {result.shape}") | ||
| 33 | - print(f" 输入: {len(texts)} 个文本") | ||
| 34 | - print(f" 输出维度: {result.shape[1]}") | ||
| 35 | - return True | ||
| 36 | - except Exception as e: | ||
| 37 | - print(f"✗ 失败: {e}") | ||
| 38 | - return False | ||
| 39 | - | ||
| 40 | - | ||
| 41 | -def test_image_encoding(client): | ||
| 42 | - """测试图像编码""" | ||
| 43 | - print("\n测试图像编码...") | ||
| 44 | - try: | ||
| 45 | - images = ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] | ||
| 46 | - result = client.encode(images) | ||
| 47 | - print(f"✓ 成功! 形状: {result.shape}") | ||
| 48 | - print(f" 输入: {len(images)} 个图像") | ||
| 49 | - print(f" 输出维度: {result.shape[1]}") | ||
| 50 | - return True | ||
| 51 | - except Exception as e: | ||
| 52 | - print(f"✗ 失败: {e}") | ||
| 53 | - print(" 注意: CN-CLIP 的图像编码可能存在兼容性问题") | ||
| 54 | - return False | ||
| 55 | - | ||
| 56 | - | ||
| 57 | -def main(): | ||
| 58 | - parser = argparse.ArgumentParser(description='CN-CLIP 服务客户端测试') | ||
| 59 | - parser.add_argument( | ||
| 60 | - '--url', | ||
| 61 | - default='http://localhost:51000', | ||
| 62 | - help='服务地址(默认: http://localhost:51000)' | ||
| 63 | - ) | ||
| 64 | - | ||
| 65 | - args = parser.parse_args() | ||
| 66 | - | ||
| 67 | - print("=" * 50) | ||
| 68 | - print("CN-CLIP 服务客户端测试") | ||
| 69 | - print("=" * 50) | ||
| 70 | - print(f"服务地址: {args.url}") | ||
| 71 | - | ||
| 72 | - # 检查协议 | ||
| 73 | - if args.url.startswith('grpc://'): | ||
| 74 | - print("\n⚠ 警告: 服务配置了 protocol: http,请使用 http:// 而不是 grpc://") | ||
| 75 | - print(" 将自动转换为 http://") | ||
| 76 | - args.url = args.url.replace('grpc://', 'http://') | ||
| 77 | - print(f" 新地址: {args.url}") | ||
| 78 | - | ||
| 79 | - try: | ||
| 80 | - client = Client(args.url) | ||
| 81 | - print("✓ 客户端创建成功") | ||
| 82 | - except Exception as e: | ||
| 83 | - print(f"✗ 客户端创建失败: {e}") | ||
| 84 | - sys.exit(1) | ||
| 85 | - | ||
| 86 | - # 运行测试 | ||
| 87 | - results = [] | ||
| 88 | - results.append(test_text_encoding(client)) | ||
| 89 | - results.append(test_image_encoding(client)) | ||
| 90 | - | ||
| 91 | - # 汇总 | ||
| 92 | - print("\n" + "=" * 50) | ||
| 93 | - print("测试结果汇总") | ||
| 94 | - print("=" * 50) | ||
| 95 | - print(f"总测试数: {len(results)}") | ||
| 96 | - print(f"通过: {sum(results)}") | ||
| 97 | - print(f"失败: {len(results) - sum(results)}") | ||
| 98 | - | ||
| 99 | - if all(results): | ||
| 100 | - print("\n✓ 所有测试通过!") | ||
| 101 | - sys.exit(0) | ||
| 102 | - else: | ||
| 103 | - print("\n✗ 部分测试失败") | ||
| 104 | - sys.exit(1) | ||
| 105 | - | ||
| 106 | - | ||
| 107 | -if __name__ == '__main__': | ||
| 108 | - main() | ||
| 109 | - |
scripts/test_cnclip_service.py deleted
| @@ -1,320 +0,0 @@ | @@ -1,320 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -CN-CLIP 服务测试脚本 | ||
| 4 | - | ||
| 5 | -用法: | ||
| 6 | - python scripts/test_cnclip_service.py | ||
| 7 | - | ||
| 8 | -选项: | ||
| 9 | - --url TEXT 服务地址(默认:grpc://localhost:51000) | ||
| 10 | - --text 只测试文本编码 | ||
| 11 | - --image 只测试图像编码 | ||
| 12 | - --batch-size INT 批处理大小(默认:10) | ||
| 13 | - --help 显示帮助信息 | ||
| 14 | -""" | ||
| 15 | - | ||
| 16 | -import sys | ||
| 17 | -import time | ||
| 18 | -import argparse | ||
| 19 | -from pathlib import Path | ||
| 20 | - | ||
| 21 | -# 添加项目路径到 sys.path | ||
| 22 | -project_root = Path(__file__).parent.parent | ||
| 23 | -sys.path.insert(0, str(project_root)) | ||
| 24 | - | ||
| 25 | -# 颜色输出 | ||
| 26 | -class Colors: | ||
| 27 | - GREEN = '\033[0;32m' | ||
| 28 | - RED = '\033[0;31m' | ||
| 29 | - YELLOW = '\033[1;33m' | ||
| 30 | - BLUE = '\033[0;34m' | ||
| 31 | - NC = '\033[0m' | ||
| 32 | - | ||
| 33 | - | ||
| 34 | -def print_success(msg): | ||
| 35 | - print(f"{Colors.GREEN}✓ {msg}{Colors.NC}") | ||
| 36 | - | ||
| 37 | - | ||
| 38 | -def print_error(msg): | ||
| 39 | - print(f"{Colors.RED}✗ {msg}{Colors.NC}") | ||
| 40 | - | ||
| 41 | - | ||
| 42 | -def print_warning(msg): | ||
| 43 | - print(f"{Colors.YELLOW}⚠ {msg}{Colors.NC}") | ||
| 44 | - | ||
| 45 | - | ||
| 46 | -def print_info(msg): | ||
| 47 | - print(f"{Colors.BLUE}ℹ {msg}{Colors.NC}") | ||
| 48 | - | ||
| 49 | - | ||
| 50 | -def test_imports(): | ||
| 51 | - """测试必要的依赖是否安装""" | ||
| 52 | - print("\n" + "="*50) | ||
| 53 | - print("测试 1: 检查依赖") | ||
| 54 | - print("="*50) | ||
| 55 | - | ||
| 56 | - try: | ||
| 57 | - import clip_client | ||
| 58 | - print_success("clip_client 已安装") | ||
| 59 | - except ImportError as e: | ||
| 60 | - print_error(f"clip_client 未安装: {e}") | ||
| 61 | - print_info("请运行: pip install clip-client") | ||
| 62 | - return False | ||
| 63 | - | ||
| 64 | - try: | ||
| 65 | - import numpy as np | ||
| 66 | - print_success("numpy 已安装") | ||
| 67 | - except ImportError as e: | ||
| 68 | - print_error(f"numpy 未安装: {e}") | ||
| 69 | - return False | ||
| 70 | - | ||
| 71 | - return True | ||
| 72 | - | ||
| 73 | - | ||
| 74 | -def test_connection(url): | ||
| 75 | - """测试服务连接""" | ||
| 76 | - print("\n" + "="*50) | ||
| 77 | - print("测试 2: 连接服务") | ||
| 78 | - print("="*50) | ||
| 79 | - print(f"服务地址: {url}") | ||
| 80 | - | ||
| 81 | - try: | ||
| 82 | - from clip_client import Client | ||
| 83 | - | ||
| 84 | - client = Client(url) | ||
| 85 | - print_success("客户端创建成功") | ||
| 86 | - return client | ||
| 87 | - except Exception as e: | ||
| 88 | - print_error(f"连接失败: {e}") | ||
| 89 | - print_info("请确保服务已启动: ./scripts/start_cnclip_service.sh") | ||
| 90 | - return None | ||
| 91 | - | ||
| 92 | - | ||
| 93 | -def test_text_encoding(client, batch_size=10): | ||
| 94 | - """测试文本编码""" | ||
| 95 | - print("\n" + "="*50) | ||
| 96 | - print("测试 3: 文本编码") | ||
| 97 | - print("="*50) | ||
| 98 | - | ||
| 99 | - try: | ||
| 100 | - # 准备测试数据 | ||
| 101 | - test_texts = [ | ||
| 102 | - '你好,世界', | ||
| 103 | - 'CN-CLIP 图像编码服务', | ||
| 104 | - '这是一个测试', | ||
| 105 | - '人工智能', | ||
| 106 | - '机器学习', | ||
| 107 | - '深度学习', | ||
| 108 | - '计算机视觉', | ||
| 109 | - '自然语言处理', | ||
| 110 | - '搜索引擎', | ||
| 111 | - '多模态检索', | ||
| 112 | - ][:batch_size] | ||
| 113 | - | ||
| 114 | - print(f"测试文本数量: {len(test_texts)}") | ||
| 115 | - print(f"示例文本: {test_texts[0]}") | ||
| 116 | - | ||
| 117 | - # 执行编码 | ||
| 118 | - start_time = time.time() | ||
| 119 | - embeddings = client.encode(test_texts) | ||
| 120 | - elapsed_time = time.time() - start_time | ||
| 121 | - | ||
| 122 | - # 验证结果 | ||
| 123 | - assert embeddings.shape[0] == len(test_texts), "向量数量不匹配" | ||
| 124 | - assert embeddings.shape[1] == 1024, "向量维度应该是 1024" | ||
| 125 | - | ||
| 126 | - print_success(f"编码成功") | ||
| 127 | - print(f" 向量形状: {embeddings.shape}") | ||
| 128 | - print(f" 耗时: {elapsed_time:.2f}秒") | ||
| 129 | - print(f" 速度: {len(test_texts)/elapsed_time:.2f} 条/秒") | ||
| 130 | - print(f" 数据类型: {embeddings.dtype}") | ||
| 131 | - | ||
| 132 | - return True | ||
| 133 | - | ||
| 134 | - except Exception as e: | ||
| 135 | - print_error(f"文本编码失败: {e}") | ||
| 136 | - return False | ||
| 137 | - | ||
| 138 | - | ||
| 139 | -def test_image_encoding(client, batch_size=5): | ||
| 140 | - """测试图像编码""" | ||
| 141 | - print("\n" + "="*50) | ||
| 142 | - print("测试 4: 图像编码") | ||
| 143 | - print("="*50) | ||
| 144 | - | ||
| 145 | - try: | ||
| 146 | - # 准备测试数据(使用在线图片) | ||
| 147 | - test_images = [ | ||
| 148 | - 'https://picsum.photos/224', | ||
| 149 | - 'https://picsum.photos/224?random=1', | ||
| 150 | - 'https://picsum.photos/224?random=2', | ||
| 151 | - 'https://picsum.photos/224?random=3', | ||
| 152 | - 'https://picsum.photos/224?random=4', | ||
| 153 | - ][:batch_size] | ||
| 154 | - | ||
| 155 | - print(f"测试图像数量: {len(test_images)}") | ||
| 156 | - print(f"示例 URL: {test_images[0]}") | ||
| 157 | - | ||
| 158 | - # 执行编码 | ||
| 159 | - start_time = time.time() | ||
| 160 | - embeddings = client.encode(test_images) | ||
| 161 | - elapsed_time = time.time() - start_time | ||
| 162 | - | ||
| 163 | - # 验证结果 | ||
| 164 | - assert embeddings.shape[0] == len(test_images), "向量数量不匹配" | ||
| 165 | - assert embeddings.shape[1] == 1024, "向量维度应该是 1024" | ||
| 166 | - | ||
| 167 | - print_success(f"编码成功") | ||
| 168 | - print(f" 向量形状: {embeddings.shape}") | ||
| 169 | - print(f" 耗时: {elapsed_time:.2f}秒") | ||
| 170 | - print(f" 速度: {len(test_images)/elapsed_time:.2f} 条/秒") | ||
| 171 | - print(f" 数据类型: {embeddings.dtype}") | ||
| 172 | - | ||
| 173 | - return True | ||
| 174 | - | ||
| 175 | - except Exception as e: | ||
| 176 | - print_error(f"图像编码失败: {e}") | ||
| 177 | - print_warning("可能需要网络连接来下载测试图片") | ||
| 178 | - return False | ||
| 179 | - | ||
| 180 | - | ||
| 181 | -def test_mixed_encoding(client): | ||
| 182 | - """测试混合编码(文本+图像)""" | ||
| 183 | - print("\n" + "="*50) | ||
| 184 | - print("测试 5: 混合编码") | ||
| 185 | - print("="*50) | ||
| 186 | - | ||
| 187 | - try: | ||
| 188 | - # 准备混合数据 | ||
| 189 | - mixed_data = [ | ||
| 190 | - '这是一段测试文本', | ||
| 191 | - 'https://picsum.photos/224?random=10', | ||
| 192 | - 'CN-CLIP 图像编码', | ||
| 193 | - 'https://picsum.photos/224?random=11', | ||
| 194 | - ] | ||
| 195 | - | ||
| 196 | - print(f"混合数据数量: {len(mixed_data)}") | ||
| 197 | - print(f" 文本: 2 条") | ||
| 198 | - print(f" 图像: 2 条") | ||
| 199 | - | ||
| 200 | - # 执行编码 | ||
| 201 | - start_time = time.time() | ||
| 202 | - embeddings = client.encode(mixed_data) | ||
| 203 | - elapsed_time = time.time() - start_time | ||
| 204 | - | ||
| 205 | - # 验证结果 | ||
| 206 | - assert embeddings.shape[0] == len(mixed_data), "向量数量不匹配" | ||
| 207 | - assert embeddings.shape[1] == 1024, "向量维度应该是 1024" | ||
| 208 | - | ||
| 209 | - print_success(f"混合编码成功") | ||
| 210 | - print(f" 向量形状: {embeddings.shape}") | ||
| 211 | - print(f" 耗时: {elapsed_time:.2f}秒") | ||
| 212 | - | ||
| 213 | - return True | ||
| 214 | - | ||
| 215 | - except Exception as e: | ||
| 216 | - print_error(f"混合编码失败: {e}") | ||
| 217 | - return False | ||
| 218 | - | ||
| 219 | - | ||
| 220 | -def test_single_encoding(client): | ||
| 221 | - """测试单个数据编码""" | ||
| 222 | - print("\n" + "="*50) | ||
| 223 | - print("测试 6: 单个数据编码") | ||
| 224 | - print("="*50) | ||
| 225 | - | ||
| 226 | - try: | ||
| 227 | - # 测试单个文本 | ||
| 228 | - single_text = '测试文本' | ||
| 229 | - print(f"输入: {single_text}") | ||
| 230 | - | ||
| 231 | - start_time = time.time() | ||
| 232 | - embedding = client.encode(single_text) | ||
| 233 | - elapsed_time = time.time() - start_time | ||
| 234 | - | ||
| 235 | - # 注意:单个数据会返回 (1, 1024) 的形状 | ||
| 236 | - if embedding.ndim == 1: | ||
| 237 | - embedding = embedding.reshape(1, -1) | ||
| 238 | - | ||
| 239 | - assert embedding.shape == (1, 1024), f"向量形状应该是 (1, 1024), 实际是 {embedding.shape}" | ||
| 240 | - | ||
| 241 | - print_success(f"单个文本编码成功") | ||
| 242 | - print(f" 向量形状: {embedding.shape}") | ||
| 243 | - print(f" 耗时: {elapsed_time:.2f}秒") | ||
| 244 | - | ||
| 245 | - return True | ||
| 246 | - | ||
| 247 | - except Exception as e: | ||
| 248 | - print_error(f"单个数据编码失败: {e}") | ||
| 249 | - return False | ||
| 250 | - | ||
| 251 | - | ||
| 252 | -def main(): | ||
| 253 | - parser = argparse.ArgumentParser(description='CN-CLIP 服务测试脚本') | ||
| 254 | - parser.add_argument('--url', | ||
| 255 | - default='grpc://localhost:51000', | ||
| 256 | - help='服务地址(默认:grpc://localhost:51000)') | ||
| 257 | - parser.add_argument('--text', | ||
| 258 | - action='store_true', | ||
| 259 | - help='只测试文本编码') | ||
| 260 | - parser.add_argument('--image', | ||
| 261 | - action='store_true', | ||
| 262 | - help='只测试图像编码') | ||
| 263 | - parser.add_argument('--batch-size', | ||
| 264 | - type=int, | ||
| 265 | - default=10, | ||
| 266 | - help='批处理大小(默认:10)') | ||
| 267 | - | ||
| 268 | - args = parser.parse_args() | ||
| 269 | - | ||
| 270 | - print("\n" + "="*50) | ||
| 271 | - print("CN-CLIP 服务测试") | ||
| 272 | - print("="*50) | ||
| 273 | - | ||
| 274 | - # 测试 1: 检查依赖 | ||
| 275 | - if not test_imports(): | ||
| 276 | - sys.exit(1) | ||
| 277 | - | ||
| 278 | - # 测试 2: 连接服务 | ||
| 279 | - client = test_connection(args.url) | ||
| 280 | - if not client: | ||
| 281 | - sys.exit(1) | ||
| 282 | - | ||
| 283 | - # 运行测试 | ||
| 284 | - results = [] | ||
| 285 | - | ||
| 286 | - if args.text: | ||
| 287 | - # 只测试文本编码 | ||
| 288 | - results.append(test_text_encoding(client, args.batch_size)) | ||
| 289 | - elif args.image: | ||
| 290 | - # 只测试图像编码 | ||
| 291 | - results.append(test_image_encoding(client, args.batch_size)) | ||
| 292 | - else: | ||
| 293 | - # 运行所有测试 | ||
| 294 | - results.append(test_text_encoding(client, args.batch_size)) | ||
| 295 | - results.append(test_image_encoding(client, min(args.batch_size, 5))) | ||
| 296 | - results.append(test_mixed_encoding(client)) | ||
| 297 | - results.append(test_single_encoding(client)) | ||
| 298 | - | ||
| 299 | - # 汇总结果 | ||
| 300 | - print("\n" + "="*50) | ||
| 301 | - print("测试结果汇总") | ||
| 302 | - print("="*50) | ||
| 303 | - | ||
| 304 | - total_tests = len(results) | ||
| 305 | - passed_tests = sum(results) | ||
| 306 | - | ||
| 307 | - print(f"总测试数: {total_tests}") | ||
| 308 | - print(f"通过: {passed_tests}") | ||
| 309 | - print(f"失败: {total_tests - passed_tests}") | ||
| 310 | - | ||
| 311 | - if passed_tests == total_tests: | ||
| 312 | - print_success("\n所有测试通过!") | ||
| 313 | - sys.exit(0) | ||
| 314 | - else: | ||
| 315 | - print_error("\n部分测试失败") | ||
| 316 | - sys.exit(1) | ||
| 317 | - | ||
| 318 | - | ||
| 319 | -if __name__ == '__main__': | ||
| 320 | - main() |