diff --git a/.env b/.env index 47cb794..e234b4f 100644 --- a/.env +++ b/.env @@ -33,5 +33,9 @@ CACHE_DIR=.cache API_BASE_URL=http://43.166.252.75:6002 +# 国内 DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b +# 美国 +DASHSCOPE_API_KEY=sk-482cc3ff37a8467dab134a7a46830556 + OPENAI_API_KEY=sk-HvmTMKtuznibZ75l7L2uF2jiaYocCthqd8Cbdkl09KTE7Ft0 diff --git a/.gitignore b/.gitignore index 98f368c..f15f482 100644 --- a/.gitignore +++ b/.gitignore @@ -1,71 +1,72 @@ -# Prerequisites -*.d - -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app - -# Projects -.vscode - -model/* -model.bin.* -*.pyc -*.swp -.pydevproject -.DS_Store -.project -.idea -.data -__pycache__ -*.log -*.bak*/ -.history.txt -log/ -logs/ -.venv/ -nohup.out -temp/ -indexer_input* -log.* -output -data.* -*.json -*.idx -*.npy -*.tgz -*.tar.gz -*.tar -*.pt - -*.log -log/ -logs_*/ - -*.xlsx +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Projects +.vscode + +model/* +model.bin.* +*.pyc +*.swp +.pydevproject +.DS_Store +.project +.idea +.data +__pycache__ +*.log +*.bak*/ +.history.txt +log/ +logs/ +.venv/ +.venv-cnclip/ +nohup.out +temp/ +indexer_input* +log.* +output +data.* +*.json +*.idx +*.npy +*.tgz +*.tar.gz +*.tar +*.pt + +*.log +log/ +logs_*/ + +*.xlsx diff --git a/api/indexer_app.py b/api/indexer_app.py index 1401f39..30da294 100644 --- a/api/indexer_app.py +++ b/api/indexer_app.py @@ -165,6 +165,26 @@ async def startup_event(): try: init_indexer_service(es_host=es_host) logger.info("Indexer service initialized successfully") + + # Eager warmup: build per-tenant transformer bundles at startup to avoid + # first-request latency (config/provider/encoder + transformer wiring). + try: + if _incremental_service is not None and _config is not None: + tenants = [] + # config.tenant_config shape: {"default": {...}, "tenants": {"1": {...}, ...}} + tc = getattr(_config, "tenant_config", None) or {} + if isinstance(tc, dict): + tmap = tc.get("tenants") + if isinstance(tmap, dict): + tenants = [str(k) for k in tmap.keys()] + # If no explicit tenants configured, skip warmup. + if tenants: + warm = _incremental_service.warmup_transformers(tenants) + logger.info("Indexer warmup completed: %s", warm) + else: + logger.info("Indexer warmup skipped (no tenant ids in config.tenant_config.tenants)") + except Exception as e: + logger.warning("Indexer warmup failed (service still starts): %s", e, exc_info=True) except Exception as e: logger.error(f"Failed to initialize indexer service: {e}", exc_info=True) logger.warning("Indexer service will start but may not function correctly") diff --git a/api/routes/indexer.py b/api/routes/indexer.py index 097f879..001b1e2 100644 --- a/api/routes/indexer.py +++ b/api/routes/indexer.py @@ -245,16 +245,30 @@ async def build_docs(request: BuildDocsRequest): break if title_text and str(title_text).strip(): try: + import numpy as np + embeddings = encoder.encode(title_text) if embeddings is not None and len(embeddings) > 0: emb0 = embeddings[0] - import numpy as np - - if isinstance(emb0, np.ndarray): + if isinstance(emb0, np.ndarray) and emb0.size > 0: doc["title_embedding"] = emb0.tolist() - except Exception: + else: + logger.warning( + "build-docs: title_embedding skipped (encoder returned None/invalid for title: %s...)", + title_text[:50], + ) + else: + logger.warning( + "build-docs: title_embedding skipped (encoder returned empty for title: %s...)", + title_text[:50], + ) + except Exception as e: + logger.warning( + "build-docs: title_embedding failed for spu_id=%s: %s", + doc.get("spu_id"), + e, + ) # 构建 doc 接口不因为 embedding 失败而整体失败 - pass docs.append(doc) except Exception as e: diff --git a/docs/CNCLIP_SERVICE说明文档.md b/docs/CNCLIP_SERVICE说明文档.md index 6fa2d4e..4e33904 100644 --- a/docs/CNCLIP_SERVICE说明文档.md +++ b/docs/CNCLIP_SERVICE说明文档.md @@ -1,215 +1,156 @@ -# CN-CLIP 服务(Legacy) +# CN-CLIP 服务(clip-as-service)说明 -> **注意**:当前主流程使用 embedding 服务(端口 6005),见 `docs/QUICKSTART.md` 3.3。本文档为 legacy gRPC 服务说明。 +> 本文是本仓库的 CN-CLIP 运行手册与约束说明。主流程仍是 `embedding` 服务(`6005`);当 `embeddings/config.py` 中 `USE_CLIP_AS_SERVICE=true` 时,`embedding` 会调用本 gRPC 服务(默认 `grpc://127.0.0.1:51000`)生成图片向量。 ---- +## 1. 设计目标与官方对齐 -# TODO(历史) +- 采用 `clip-as-service` 的标准拆分:`clip-server`(服务端)与 `clip-client`(客户端)可独立安装。 +- 服务协议使用 gRPC,符合官方推荐与本项目现有调用链。 +- 保持“主项目环境”和“CN-CLIP 专用环境”解耦,避免 `grpcio/jina/docarray` 与主项目依赖互相污染。 -现在,跟自己 cn_clip 预估的结果,有差别: -这个比较接近: 可能是预处理逻辑有些不一样。 -https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg -normlize后的结果: -0.046295166015625,0.012847900390625,-0.0299530029296875,-0.01629638671875,0.01708984375,0.00487518310546875,0.01284027099609375,0.01348876953125,0.04617632180452347, 0.012860896065831184, -0.030133124440908432, -0.0162516962736845, -0.04617632180452347, 0.012860896065831184, -0.030133124440908432, -0.0162516962736845, 0.01708567887544632, 0.005110889207571745 +官方仓库(安装方式、server/client 分离、基本使用示例): +[jina-ai/clip-as-service](https://github.com/jina-ai/clip-as-service) -以下两个,差别非常大,感觉不是一个模型: -https://aisearch.cdn.bcebos.com/fileManager/GtB5doGAr1skTx38P7fb7Q/182.jpg?authorization=bce-auth-v1%2F7e22d8caf5af46cc9310f1e3021709f3%2F2025-12-30T04%3A45%3A38Z%2F86400%2Fhost%2Ffe222039926cb7ff593021af40268c782b8892598114e24773d0c1bfc976a8df -https://oss.essa.cn/2e353867-7496-4d4e-a7c8-0af50f49f6eb.jpg?x-oss-process=image/resize,m_lfit,w_2048,h_2048 +## 2. 当前架构(本仓库) -curl -X POST "http://43.166.252.75:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[ - { - "id": "test_1", - "pic_url": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" - } - ]' - +- **服务启动脚本**:`scripts/start_cnclip_service.sh` +- **服务停止脚本**:`scripts/stop_cnclip_service.sh` +- **环境初始化脚本**:`scripts/setup_cnclip_venv.sh` +- **统一编排入口**:`scripts/service_ctl.sh`(`restart.sh` 调用它) +- **默认端口**:`51000` +- **默认模型**:`CN-CLIP/ViT-H-14` +- **默认协议**:gRPC +## 3. 环境准备策略(推荐做法) -# CN-CLIP 编码服务 +### 3.1 推荐:专用 venv(`.venv-cnclip`) -## 模块说明 - -CN-CLIP 编码服务基于 [clip-as-service](https://github.com/jina-ai/clip-as-service) 提供中文 CLIP 模型的文本和图像编码功能。服务使用 gRPC 协议,支持批量编码,返回固定维度的向量表示。 +```bash +./scripts/setup_cnclip_venv.sh +``` -### 功能特性 +脚本会创建 `.venv-cnclip`,并处理已知兼容性问题(`grpcio`、`jina`、`docarray`、`pkg_resources` 等),避免在主 `.venv` 中反复冲突。 -- 文本编码:将中文文本编码为向量 -- 图像编码:将图像(本地文件或远程 URL)编码为向量 -- 混合编码:同时编码文本和图像 -- 批量处理:支持批量编码,提高效率 +### 3.2 启动时的环境选择 -### 技术架构 +`start_cnclip_service.sh` 的优先级: -- **框架**: clip-as-service (基于 Jina) -- **模型**: CN-CLIP/ViT-L-14-336(默认) -- **协议**: gRPC(默认,官方推荐) -- **运行时**: PyTorch +1. 若存在 `.venv-cnclip`,优先使用; +2. 否则回退到项目统一环境(`source activate.sh`); +3. 若两者都不可用,启动失败并提示修复动作。 -## 启动服务 +## 4. 服务管理方式(推荐) -### 基本用法 +### 4.1 单独启动/停止 ```bash -./scripts/start_cnclip_service.sh +./scripts/start_cnclip_service.sh --device cuda +./scripts/stop_cnclip_service.sh ``` -### 启动参数 - -| 参数 | 说明 | 默认值 | -|------|------|--------| -| `--port PORT` | 服务端口 | 51000 | -| `--device DEVICE` | 设备类型:cuda 或 cpu | 自动检测 | -| `--batch-size SIZE` | 批处理大小 | 32 | -| `--num-workers NUM` | 预处理线程数 | 4 | -| `--dtype TYPE` | 数据类型:float16 或 float32 | float16 | -| `--model-name NAME` | 模型名称 | CN-CLIP/ViT-L-14-336 | -| `--replicas NUM` | 副本数 | 1 | - -### 示例 +### 4.2 统一编排(推荐日常用法) ```bash -# 使用默认配置启动 -./scripts/start_cnclip_service.sh - -# 指定端口和设备 -./scripts/start_cnclip_service.sh --port 52000 --device cpu - -# 使用其他模型 -./scripts/start_cnclip_service.sh --model-name CN-CLIP/ViT-H-14 +./scripts/service_ctl.sh restart +# 或 +./restart.sh ``` -### 停止服务 +`service_ctl.sh` 在启动 `cnclip` 时默认注入 `CNCLIP_DEVICE=cuda`。 +若机器无 GPU 或希望改用 CPU,可在 `.env` 设置: ```bash -./scripts/stop_cnclip_service.sh +CNCLIP_DEVICE=cpu ``` -## API 接口说明 +## 5. GPU 使用与验证 -### Python 客户端 +### 5.1 必须点 -服务使用 gRPC 协议,必须使用 Python 客户端: +- 启动日志显示 `device: cuda` 仅代表配置传入成功; +- 只有在**首次编码请求触发模型加载后**,`nvidia-smi` 才一定能看到显存占用。 -```python -from clip_client import Client +### 5.2 推荐验证步骤 -# 创建客户端(使用 grpc:// 协议) -c = Client('grpc://localhost:51000') -``` +1) 启动服务: -### 编码接口 +```bash +./scripts/start_cnclip_service.sh --port 51000 --device cuda +``` -#### 1. 文本编码 +2) 发送一次请求(触发模型加载): -```python +```bash +PYTHONPATH="third-party/clip-as-service/client:${PYTHONPATH}" NO_VERSION_CHECK=1 .venv-cnclip/bin/python -c " from clip_client import Client - c = Client('grpc://localhost:51000') - -# 编码单个文本 -result = c.encode(['这是测试文本']) -print(result.shape) # (1, 1024) - -# 编码多个文本 -result = c.encode(['文本1', '文本2', '文本3']) -print(result.shape) # (3, 1024) +r = c.encode(['测试']) +print('shape:', r.shape) +" ``` -#### 2. 图像编码 +3) 观察 GPU: -```python -# 编码远程图像 URL -result = c.encode(['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']) -print(result.shape) # (1, 1024) - -# 编码本地图像文件 -result = c.encode(['/path/to/image.jpg']) -print(result.shape) # (1, 1024) -``` - -#### 3. 混合编码 - -```python -# 同时编码文本和图像 -result = c.encode([ - '这是文本', - 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg', - '另一个文本' -]) -print(result.shape) # (3, 1024) +```bash +nvidia-smi ``` -### 返回格式 +预期: +- `shape` 为 `(1, 1024)`; +- `nvidia-smi` 出现对应 `python`/`clip_server` 进程并有显存占用。 -- **类型**: `numpy.ndarray` -- **形状**: `(N, 1024)`,其中 N 是输入数量 -- **数据类型**: `float32` -- **维度**: 1024(CN-CLIP 模型的 embedding 维度) +## 6. 使用方式(客户端) -### 支持的模型 +### 6.1 在本仓库中(推荐) -| 模型名称 | 说明 | 推荐场景 | -|---------|------|---------| -| `CN-CLIP/ViT-B-16` | 基础版本,速度快 | 对速度要求高的场景 | -| `CN-CLIP/ViT-L-14` | 平衡版本 | 通用场景 | -| `CN-CLIP/ViT-L-14-336` | 高分辨率版本(默认) | 需要处理高分辨率图像 | -| `CN-CLIP/ViT-H-14` | 大型版本,精度高 | 对精度要求高的场景 | -| `CN-CLIP/RN50` | ResNet-50 版本 | 兼容性场景 | +- 服务消费者一般是 `embedding` 服务,不建议业务侧直接连 `cnclip`。 +- 若需手动调试,可在主 `.venv` 安装 client,或通过 `PYTHONPATH` 使用 vendored client。 -## 测试 +示例: -运行测试脚本: +```python +from clip_client import Client -```bash -./scripts/test_cnclip_service.sh +c = Client("grpc://127.0.0.1:51000") +vec = c.encode(["https://example.com/a.jpg", "测试文本"]) +print(vec.shape) # (2, 1024) ``` -测试脚本会验证: -- 文本编码功能 -- 图像编码功能(远程 URL) -- 混合编码功能 +### 6.2 常见误区 -每个测试会显示 embedding 的维度和前 20 个数字。 +- ❌ 用 `http://localhost:51000` 当成 HTTP 服务访问; +- ❌ 只看“启动成功”就判断已用 GPU,不发请求不看 `nvidia-smi`; +- ❌ 在主 `.venv` 直接安装 server 依赖导致依赖树污染。 -## 查看日志 +## 7. 已知兼容性说明(关键信息) -```bash -tail -f /data/tw/saas-search/logs/cnclip_service.log -``` - -## 常见问题 - -### 1. 服务启动失败 - -- 检查端口是否被占用:`lsof -i :51000` -- 检查 conda 环境是否正确激活 -- 查看日志文件获取详细错误信息 +- `clip-as-service` 在本项目场景下依赖链较老,`grpcio`/`jina`/`docarray` 组合在 Python 3.12 上易触发源码构建问题。 +- `setuptools>=82` 移除了 `pkg_resources`;而部分依赖链仍会导入它,因此专用脚本固定了兼容范围。 +- `setup_cnclip_venv.sh` 中存在“为可运行性而做的约束收敛”,这是有意行为,不建议手动放开。 -### 2. 客户端连接失败 +## 8. 排障速查 -确保使用正确的协议: +### 8.1 启动失败 -```python -# 正确:使用 grpc:// -c = Client('grpc://localhost:51000') +- 查看日志:`tail -f logs/cnclip_service.log` +- 检查端口占用:`lsof -i :51000` +- 重新构建环境:`rm -rf .venv-cnclip && ./scripts/setup_cnclip_venv.sh` -# 错误:不要使用 http:// -# c = Client('http://localhost:51000') # 会失败 -``` +### 8.2 连接失败 -### 3. 编码失败 +- 确认客户端使用 `grpc://` 协议; +- 确认端口与服务端一致(默认 `51000`)。 -- 检查服务是否正常运行 -- 检查输入格式是否正确 -- 查看服务日志排查错误 +### 8.3 看不到 GPU 进程 -### 4. 依赖安装 +- 先发一次编码请求,再看 `nvidia-smi`; +- 确认启动参数或环境变量为 `cuda`(`--device cuda` 或 `CNCLIP_DEVICE=cuda`); +- 确认日志中无模型加载异常。 -确保已安装必要的依赖: +## 9. 与其他文档的关系 -```bash -pip install clip-client -``` +- 开发总览:`docs/QUICKSTART.md` +- 系统架构:`docs/DEVELOPER_GUIDE.md` +- 向量服务说明:`embeddings/README.md` -服务端依赖会在启动脚本中自动检查。 +本文件聚焦 CN-CLIP(clip-as-service)专项,不重复解释项目通用内容。 diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md index 6ba7b3a..90003a2 100644 --- a/docs/DEVELOPER_GUIDE.md +++ b/docs/DEVELOPER_GUIDE.md @@ -401,6 +401,7 @@ services: | 运维、日志、多环境、故障 | [Usage-Guide.md](./Usage-Guide.md) | | 索引模块职责与 Java 对接 | [indexer/README.md](../indexer/README.md) | | 向量模块与 clip-as-service | [embeddings/README.md](../embeddings/README.md) | +| CN-CLIP 服务专项(环境/运维/GPU) | [CNCLIP_SERVICE说明文档.md](./CNCLIP_SERVICE说明文档.md) | ### 10.2 仓库内入口 diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index b53fd31..47a011c 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -495,6 +495,7 @@ lsof -i :6004 | `docs/搜索API对接指南.md` | 搜索 API 完整说明 | | `indexer/README.md` | 索引模块职责与接口 | | `embeddings/README.md` | 向量化服务说明 | +| `docs/CNCLIP_SERVICE说明文档.md` | CN-CLIP/clip-as-service 专项(环境、GPU、运维) | | `reranker/README.md` | 重排服务说明 | --- diff --git a/embeddings/clip_as_service_encoder.py b/embeddings/clip_as_service_encoder.py index b7048c5..e016481 100644 --- a/embeddings/clip_as_service_encoder.py +++ b/embeddings/clip_as_service_encoder.py @@ -21,6 +21,8 @@ def _ensure_clip_client_path(): client_path = os.path.join(repo_root, "third-party", "clip-as-service", "client") if os.path.isdir(client_path) and client_path not in sys.path: sys.path.insert(0, client_path) + # Skip client version check to avoid importing helper (pkg_resources); no conda/separate env + os.environ.setdefault("NO_VERSION_CHECK", "1") def _normalize_image_url(url: str) -> str: diff --git a/embeddings/server.py b/embeddings/server.py index 779a1b3..5f46c7a 100644 --- a/embeddings/server.py +++ b/embeddings/server.py @@ -52,6 +52,9 @@ def load_models(): # Load image model: clip-as-service (recommended) or local CN-CLIP + # IMPORTANT: failures here should NOT prevent the whole service from starting. + # If image model cannot be loaded, we keep `_image_model` as None and only + # disable /embed/image while keeping /embed/text fully functional. if open_image_model: try: if CONFIG.USE_CLIP_AS_SERVICE: @@ -69,8 +72,12 @@ def load_models(): ) logger.info("Image model (local CN-CLIP) loaded successfully") except Exception as e: - logger.error(f"Failed to load image model: {e}", exc_info=True) - raise + logger.error( + "Failed to load image model; image embeddings will be disabled but text embeddings remain available: %s", + e, + exc_info=True, + ) + _image_model = None logger.info("All embedding models loaded successfully, service ready") @@ -132,7 +139,9 @@ def embed_text(texts: List[str]) -> List[Optional[List[float]]]: @app.post("/embed/image") def embed_image(images: List[str]) -> List[Optional[List[float]]]: if _image_model is None: - raise RuntimeError("Image model not loaded") + # Graceful degradation: keep API shape but return all None + logger.warning("embed_image called but image model is not loaded; returning all None vectors") + return [None] * len(images) out: List[Optional[List[float]]] = [None] * len(images) # Normalize inputs diff --git a/indexer/incremental_service.py b/indexer/incremental_service.py index cac7710..48b88f4 100644 --- a/indexer/incremental_service.py +++ b/indexer/incremental_service.py @@ -32,20 +32,77 @@ class IncrementalIndexerService: self.category_id_to_name = load_category_mapping(db_engine) logger.info(f"Preloaded {len(self.category_id_to_name)} category mappings") - # 缓存:避免频繁增量请求重复加载config / 构造transformer + # 缓存:避免频繁增量请求重复加载 config / 构造 transformer + # NOTE: 为避免“首请求”懒加载导致超时,尽量在进程启动阶段完成初始化: + # - config.yaml 加载 + # - translator / embedding / image encoder provider 初始化(best-effort) self._config: Optional[Any] = None self._config_lock = threading.Lock() + self._translator: Optional[Any] = None + self._translation_prompts: Optional[Dict[str, Any]] = None + self._searchable_option_dimensions: Optional[List[str]] = None + self._shared_text_encoder: Optional[Any] = None + self._shared_image_encoder: Optional[Any] = None + + self._eager_init() # tenant_id -> (transformer, encoder, enable_embedding) self._transformer_cache: Dict[str, Tuple[Any, Optional[Any], bool]] = {} self._transformer_cache_lock = threading.Lock() + def _eager_init(self) -> None: + """Best-effort eager initialization to reduce first-request latency.""" + try: + self._config = ConfigLoader("config/config.yaml").load_config() + except Exception as e: + logger.warning("Failed to eagerly load config/config.yaml: %s", e, exc_info=True) + self._config = None + return + + try: + self._translation_prompts = getattr(self._config.query_config, "translation_prompts", {}) or {} + self._searchable_option_dimensions = ( + getattr(self._config.spu_config, "searchable_option_dimensions", None) + or ["option1", "option2", "option3"] + ) + except Exception: + self._translation_prompts = {} + self._searchable_option_dimensions = ["option1", "option2", "option3"] + + # Translator provider (best-effort) + try: + from providers import create_translation_provider + + self._translator = create_translation_provider(self._config.query_config) + except Exception as e: + logger.warning("Failed to initialize translation provider at startup: %s", e) + self._translator = None + + # Text embedding encoder (best-effort) + if bool(getattr(self._config.query_config, "enable_text_embedding", False)): + try: + from embeddings.text_encoder import BgeEncoder + + self._shared_text_encoder = BgeEncoder() + except Exception as e: + logger.warning("Failed to initialize BgeEncoder at startup: %s", e) + self._shared_text_encoder = None + + # Image embedding encoder (best-effort; may be unavailable if embedding service not running) + try: + from embeddings.image_encoder import CLIPImageEncoder + + self._shared_image_encoder = CLIPImageEncoder() + except Exception as e: + logger.debug("Image encoder not available for indexer startup: %s", e) + self._shared_image_encoder = None + def _get_config(self) -> Any: """Load config once per process (thread-safe).""" if self._config is not None: return self._config with self._config_lock: if self._config is None: - self._config = ConfigLoader().load_config() + self._config = ConfigLoader("config/config.yaml").load_config() return self._config def _get_transformer_bundle(self, tenant_id: str) -> Tuple[Any, Optional[Any], bool]: @@ -64,26 +121,39 @@ class IncrementalIndexerService: config = self._get_config() enable_embedding = bool(getattr(config.query_config, "enable_text_embedding", False)) - encoder: Optional[Any] = None - if enable_embedding: + # Use shared encoders/providers preloaded at startup when可用; + # 若启动时初始化失败,则在首次请求时做一次兜底初始化,避免永久禁用。 + encoder: Optional[Any] = self._shared_text_encoder if enable_embedding else None + if enable_embedding and encoder is None: try: from embeddings.text_encoder import BgeEncoder + encoder = BgeEncoder() + self._shared_text_encoder = encoder + logger.info("BgeEncoder lazily initialized in _get_transformer_bundle") except Exception as e: - logger.warning(f"Failed to initialize BgeEncoder for tenant_id={tenant_id}: {e}") + logger.warning("Failed to lazily initialize BgeEncoder for tenant_id=%s: %s", tenant_id, e) encoder = None enable_embedding = False - image_encoder: Optional[Any] = None - try: - from embeddings.image_encoder import CLIPImageEncoder - image_encoder = CLIPImageEncoder() - except Exception as e: - logger.debug("Image encoder not available for indexer: %s", e) + image_encoder: Optional[Any] = self._shared_image_encoder + if image_encoder is None: + try: + from embeddings.image_encoder import CLIPImageEncoder + + image_encoder = CLIPImageEncoder() + self._shared_image_encoder = image_encoder + logger.info("CLIPImageEncoder lazily initialized in _get_transformer_bundle") + except Exception as e: + logger.debug("Image encoder not available for indexer (lazy init): %s", e) + image_encoder = None transformer = create_document_transformer( category_id_to_name=self.category_id_to_name, tenant_id=tenant_id, + searchable_option_dimensions=self._searchable_option_dimensions, + translator=self._translator, + translation_prompts=self._translation_prompts, encoder=encoder, enable_title_embedding=False, # batch fill later image_encoder=image_encoder, @@ -97,6 +167,23 @@ class IncrementalIndexerService: self._transformer_cache[str(tenant_id)] = bundle return bundle + def warmup_transformers(self, tenant_ids: List[str]) -> Dict[str, Any]: + """ + Eagerly build transformer bundles for given tenant ids. + This moves per-tenant initialization to startup phase, reducing first-request latency. + """ + start = time.time() + ok = 0 + failed: List[Dict[str, str]] = [] + for tid in tenant_ids or []: + try: + _ = self._get_transformer_bundle(str(tid)) + ok += 1 + except Exception as e: + failed.append({"tenant_id": str(tid), "error": str(e)}) + elapsed_ms = round((time.time() - start) * 1000.0, 3) + return {"requested": len(tenant_ids or []), "warmed": ok, "failed": failed, "elapsed_ms": elapsed_ms} + @staticmethod def _normalize_spu_ids(spu_ids: List[str]) -> List[int]: """Normalize SPU IDs to ints for DB queries; skip non-int IDs.""" diff --git a/indexer/process_products.py b/indexer/process_products.py index d9ca462..5491339 100644 --- a/indexer/process_products.py +++ b/indexer/process_products.py @@ -23,8 +23,11 @@ from config.env_config import REDIS_CONFIG # 配置 BATCH_SIZE = 20 -API_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" -MODEL_NAME = "qwen-max" +# 华北2(北京):https://dashscope.aliyuncs.com/compatible-mode/v1 +# 新加坡:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 +# 美国(弗吉尼亚):https://dashscope-us.aliyuncs.com/compatible-mode/v1 +API_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" +MODEL_NAME = "qwen-flash" API_KEY = os.environ.get("DASHSCOPE_API_KEY") MAX_RETRIES = 3 RETRY_DELAY = 5 # 秒 @@ -398,8 +401,9 @@ def parse_markdown_table(markdown_content: str) -> List[Dict[str, str]]: # 表格行处理 if line.startswith('|'): - # 分隔行(----) - if set(line.replace('|', '').strip()) <= {'-', ':'}: + # 分隔行(---- 或 :---: 等;允许空格,如 "| ---- | ---- |") + sep_chars = line.replace('|', '').strip().replace(' ', '') + if sep_chars and set(sep_chars) <= {'-', ':'}: data_started = True continue diff --git a/query/translator.py b/query/translator.py index 93ac8eb..2f94019 100644 --- a/query/translator.py +++ b/query/translator.py @@ -59,9 +59,12 @@ class Translator: Default model is 'qwen' which uses Alibaba Cloud DashScope API. """ +# 华北2(北京):https://dashscope.aliyuncs.com/compatible-mode/v1 +# 新加坡:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 +# 美国(弗吉尼亚):https://dashscope-us.aliyuncs.com/compatible-mode/v1 DEEPL_API_URL = "https://api.deepl.com/v2/translate" # Pro tier - QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" # 北京地域 + QWEN_BASE_URL = "https://dashscope-us.aliyuncs.com/compatible-mode/v1" # 北京地域 # QWEN_BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" # 新加坡 # 如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/compatible-mode/v1 QWEN_MODEL = "qwen-mt-flash" # 快速翻译模型 diff --git a/scripts/create_tenant_index.sh b/scripts/create_tenant_index.sh index d0a4e44..cde2b4a 100755 --- a/scripts/create_tenant_index.sh +++ b/scripts/create_tenant_index.sh @@ -61,6 +61,7 @@ echo echo "删除索引: $ES_INDEX" echo curl -X DELETE "${ES_HOST}/${ES_INDEX}" $AUTH_PARAM -s -o /dev/null -w "HTTP状态码: %{http_code}\n" + echo echo "创建索引: $ES_INDEX" echo diff --git a/scripts/service_ctl.sh b/scripts/service_ctl.sh index 1236fe7..b652ae8 100755 --- a/scripts/service_ctl.sh +++ b/scripts/service_ctl.sh @@ -138,7 +138,11 @@ start_one() { case "${service}" in clip|cnclip) echo "[start] ${service} (managed by native script)" - bash -lc "${cmd}" >> "${lf}" 2>&1 || true + if [ "${service}" = "cnclip" ]; then + CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1 || true + else + bash -lc "${cmd}" >> "${lf}" 2>&1 || true + fi if is_running_by_pid "${service}" || is_running_by_port "${service}"; then echo "[ok] ${service} started (log=${lf})" else @@ -272,6 +276,7 @@ Default target set (when no service provided): Optional startup flags: START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh + CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip EOF } diff --git a/scripts/setup_cnclip_venv.sh b/scripts/setup_cnclip_venv.sh new file mode 100755 index 0000000..b40c0b6 --- /dev/null +++ b/scripts/setup_cnclip_venv.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# +# 创建 CN-CLIP 服务专用虚拟环境(.venv-cnclip),用于隔离 clip-server 及其依赖 +#(如 grpcio、jina、docarray 等),避免与主项目 .venv 的依赖冲突或构建失败。 +# +# 使用方式: +# ./scripts/setup_cnclip_venv.sh +# +# 完成后,start_cnclip_service.sh 会自动优先使用 .venv-cnclip(若存在)。 +# +set -e + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +VENV_DIR="${PROJECT_ROOT}/.venv-cnclip" +CLIP_SERVER="${PROJECT_ROOT}/third-party/clip-as-service/server" + +echo "==========================================" +echo "CN-CLIP 专用环境 (.venv-cnclip)" +echo "==========================================" +echo "" + +if [ ! -d "${CLIP_SERVER}" ]; then + echo "错误: 未找到 clip-as-service 服务目录: ${CLIP_SERVER}" >&2 + exit 1 +fi + +# 使用系统或当前默认的 python3 创建 venv(不依赖主项目 .venv) +if [ -d "${VENV_DIR}" ]; then + echo "已存在 .venv-cnclip,将复用并更新依赖。" +else + echo "创建虚拟环境: ${VENV_DIR}" + python3 -m venv "${VENV_DIR}" +fi + +# 激活并升级基础工具。固定 setuptools<82,因 82 移除了 pkg_resources,而 jina/hubble 仍依赖它。 +"${VENV_DIR}/bin/pip" install --upgrade pip wheel +"${VENV_DIR}/bin/pip" install 'setuptools>=66,<82' + +# grpcio:jina 要求 <=1.68;1.57 在 Python 3.12 上无预编译 wheel(会触发源码构建与 pkg_resources 报错)。1.68.x 有 3.12 wheel,故先安装 1.68.x,避免后续安装 clip-server 时解析到 1.57。 +echo "安装 grpcio(优先预编译 wheel,兼容 jina<=1.68)..." +if ! "${VENV_DIR}/bin/pip" install --only-binary=grpcio 'grpcio>=1.46.0,<=1.68.1' 2>/dev/null; then + echo "镜像无匹配 wheel,尝试 PyPI..." + if ! "${VENV_DIR}/bin/pip" install --only-binary=grpcio -i https://pypi.org/simple 'grpcio>=1.46.0,<=1.68.1' 2>/dev/null; then + echo "错误: 无法获取 grpcio 预编译包,请检查网络或使用 Python 3.10/3.11。" >&2 + exit 1 + fi +fi + +# 安装 docarray==0.21(clip-server 要求) +echo "安装 docarray(clip-server 要求 0.21)..." +"${VENV_DIR}/bin/pip" install 'docarray==0.21.0' + +# jina 3.27 声明 grpcio<=1.57,会触发源码构建;先装 grpcio 配套的 reflection/health 1.68(wheel),再以 --no-deps 装 jina,最后补齐 jina 的其余依赖(不包含 grpcio) +echo "安装 grpcio-reflection / grpcio-health-checking(与已装 grpcio 1.68 一致)..." +"${VENV_DIR}/bin/pip" install --only-binary=:all: 'grpcio-reflection>=1.46,<=1.68' 'grpcio-health-checking>=1.46,<=1.68' 2>/dev/null || "${VENV_DIR}/bin/pip" install 'grpcio-reflection>=1.46,<=1.68' 'grpcio-health-checking>=1.46,<=1.68' +echo "安装 jina(--no-deps,避免拉取 grpcio 1.57)..." +"${VENV_DIR}/bin/pip" install 'jina>=3.27,<3.28' --no-deps +# 补齐 jina 3.27 的运行时依赖(见 jina 的 install_requires,不含 grpcio)。 +# 关键约束: +# - pydantic<2、opentelemetry-sdk<1.20、urllib3<2:与 jina 3.27 保持一致,减少 resolver 冲突告警 +"${VENV_DIR}/bin/pip" install 'uvicorn[standard]<=0.23.1' 'fastapi>=0.76' 'protobuf>=3.19' 'pyyaml>=5.3' 'pydantic<2' 'prometheus_client>=0.12' 'aiofiles' 'opentelemetry-api>=1.12,<1.20' 'opentelemetry-sdk>=1.14,<1.20' 'opentelemetry-exporter-otlp>=1.12,<1.20' 'opentelemetry-instrumentation-grpc>=0.35' 'opentelemetry-instrumentation-fastapi>=0.33' 'opentelemetry-instrumentation-aiohttp-client>=0.33' 'opentelemetry-exporter-prometheus>=0.33b0' 'websockets' 'python-multipart' 'urllib3<2' + +# 安装 CN-CLIP +echo "安装 cn-clip..." +"${VENV_DIR}/bin/pip" install cn-clip + +# 安装 clip-server 以 --no-deps 方式,避免因 docarray==0.21 解析到旧 jina 并拉取 grpcio 1.57 源码构建。依赖由前面已装的 jina/grpcio/cn-clip 与下面显式安装补齐。 +echo "安装 clip-server[cn_clip](--no-deps,再补齐依赖)..." +"${VENV_DIR}/bin/pip" install -e "${CLIP_SERVER}[cn_clip]" --no-deps +# clip-server 的 install_requires:ftfy, torch, regex, torchvision, jina, docarray, prometheus-client, open_clip_torch, pillow-avif-plugin;jina/cn_clip 已装;补齐 ftfy regex open_clip_torch pillow-avif-plugin(torch/torchvision 由 cn-clip 带入,prometheus_client 由 jina 带入) +"${VENV_DIR}/bin/pip" install 'ftfy' 'regex' 'open_clip_torch>=2.8.0,<2.9.0' 'pillow-avif-plugin' +# grpc_health 需要 protobuf 含 runtime_version(>=4);open_clip_torch 会拉低到 3.20,此处再升级 +"${VENV_DIR}/bin/pip" install 'protobuf>=4,<6' +# jina 3.27 声明的可选依赖,clip 服务需用到 +"${VENV_DIR}/bin/pip" install 'jcloud>=0.0.35' + +echo "" +echo "==========================================" +echo "✓ .venv-cnclip 已就绪" +echo "==========================================" +echo "启动 CN-CLIP 服务时将自动使用此环境:" +echo " ./scripts/start_cnclip_service.sh" +echo "或:" +echo " ./scripts/service_ctl.sh start cnclip" +echo "" diff --git a/scripts/start_clip_service.sh b/scripts/start_clip_service.sh index 856d219..2d1655a 100755 --- a/scripts/start_clip_service.sh +++ b/scripts/start_clip_service.sh @@ -19,19 +19,15 @@ echo "========================================" echo "Starting CLIP vector service (clip-server)" echo "========================================" -# Load conda and activate dedicated environment, if available -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" -if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then - # shellcheck disable=SC1091 - source "$CONDA_ROOT/etc/profile.d/conda.sh" - conda activate clip_service || { - echo "Failed to activate conda env 'clip_service'. Please create it first." >&2 - echo "See CLIP_SERVICE_README.md for setup instructions." >&2 +# Use project unified environment (same as activate.sh / service_ctl) +if [ -z "${VIRTUAL_ENV}" ] && [ -z "${CONDA_DEFAULT_ENV}" ]; then + if [ -f "$(pwd)/activate.sh" ]; then + # shellcheck source=activate.sh + source "$(pwd)/activate.sh" + else + echo "Error: activate.sh not found. Run from project root or source activate.sh first." >&2 exit 1 - } -else - echo "Warning: $CONDA_ROOT/etc/profile.d/conda.sh not found." >&2 - echo "Please activate the 'clip_service' environment manually before running this script." >&2 + fi fi if [ -f "${PID_FILE}" ]; then diff --git a/scripts/start_cnclip_service.sh b/scripts/start_cnclip_service.sh index 513ed45..524f441 100755 --- a/scripts/start_cnclip_service.sh +++ b/scripts/start_cnclip_service.sh @@ -175,44 +175,55 @@ if lsof -Pi :${PORT} -sTCP:LISTEN -t >/dev/null 2>&1; then exit 1 fi -# 检查 conda 环境 -if [ -z "${CONDA_DEFAULT_ENV}" ] || [ "${CONDA_DEFAULT_ENV}" != "clip_service" ]; then - echo -e "${YELLOW}警告: 当前未激活 clip_service 环境${NC}" - echo -e "${YELLOW}正在激活环境...${NC}" - - CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" - if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then - source "$CONDA_ROOT/etc/profile.d/conda.sh" - conda activate clip_service - echo -e "${GREEN}✓ 环境已激活${NC}" +# 优先使用 CN-CLIP 专用环境(避免与主项目依赖冲突;需先运行 ./scripts/setup_cnclip_venv.sh) +# 若无 .venv-cnclip,则使用项目统一环境(activate.sh) +CNCLIP_VENV="${PROJECT_ROOT}/.venv-cnclip" +if [ -x "${CNCLIP_VENV}/bin/python" ]; then + export PATH="${CNCLIP_VENV}/bin:${PATH}" + export VIRTUAL_ENV="${CNCLIP_VENV}" + echo -e "${GREEN}✓ 使用 CN-CLIP 专用环境: .venv-cnclip${NC}" +elif [ -z "${VIRTUAL_ENV}" ] && [ -z "${CONDA_DEFAULT_ENV}" ]; then + echo -e "${BLUE}激活项目环境...${NC}" + if [ -f "${PROJECT_ROOT}/activate.sh" ]; then + # shellcheck source=../activate.sh + source "${PROJECT_ROOT}/activate.sh" else - echo -e "${RED}错误: 无法找到 conda 初始化脚本: $CONDA_ROOT/etc/profile.d/conda.sh${NC}" + echo -e "${RED}错误: 未找到 ${PROJECT_ROOT}/activate.sh${NC}" + echo -e "${YELLOW}建议先创建 CN-CLIP 专用环境: ./scripts/setup_cnclip_venv.sh${NC}" exit 1 fi + echo -e "${GREEN}✓ 使用环境: ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}${NC}" else - echo -e "${GREEN}✓ Conda 环境: ${CONDA_DEFAULT_ENV}${NC}" + echo -e "${GREEN}✓ 使用当前环境: ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}${NC}" fi -# 检查 Python 依赖 +# 检查 Python 依赖(CN-CLIP 服务端需要 cn_clip 与 clip_server) echo -e "${BLUE}检查 Python 依赖...${NC}" python -c "import cn_clip" 2>/dev/null || { echo -e "${RED}错误: cn_clip 未安装${NC}" - echo -e "${YELLOW}请运行: pip install cn-clip${NC}" + echo -e "${YELLOW}在项目环境中安装: pip install cn-clip 或 pip install -r requirements_ml.txt${NC}" exit 1 } -python -c "from clip_client import Client" 2>/dev/null || { - echo -e "${RED}错误: clip_client 未安装${NC}" - echo -e "${YELLOW}请运行: pip install clip-client${NC}" +# clip_server 通过 PYTHONPATH 加载(见下方启动命令),此处仅做可导入性检查 +export PYTHONPATH="${CLIP_SERVER_DIR}:${PYTHONPATH}" +python -c "import clip_server" 2>/dev/null || { + echo -e "${RED}错误: clip_server 不可用${NC}" + echo -e "${YELLOW}推荐使用专用环境(避免与主项目依赖冲突):${NC}" + echo -e "${YELLOW} ./scripts/setup_cnclip_venv.sh${NC}" + echo -e "${YELLOW}或在当前环境中安装: pip install -e third-party/clip-as-service/server[cn_clip]${NC}" exit 1 } -echo -e "${GREEN}✓ 所有依赖已安装${NC}" +echo -e "${GREEN}✓ 所有依赖已就绪${NC}" echo "" -# 自动检测设备 +# 自动检测设备(可通过环境变量 CNCLIP_DEVICE 指定,供 service_ctl/restart 使用) if [ "${DEVICE}" == "auto" ]; then - if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then + if [ -n "${CNCLIP_DEVICE:-}" ]; then + DEVICE="${CNCLIP_DEVICE}" + echo -e "${GREEN}✓ 设备: ${DEVICE}(来自 CNCLIP_DEVICE)${NC}" + elif command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then DEVICE="cuda" echo -e "${GREEN}✓ 检测到 NVIDIA GPU,使用 CUDA${NC}" else @@ -263,7 +274,7 @@ if [ -f "${FLOW_FILE}" ] && [ ! -f "${FLOW_FILE}.original" ]; then echo -e "${YELLOW}已备份原配置文件: ${FLOW_FILE}.original${NC}" fi -# 生成新的配置文件(使用官方默认配置,只指定模型名称) +# 生成新的配置文件(使用官方默认配置,显式传入 device 以便使用 GPU) cat > "${TEMP_FLOW_FILE}" << EOF jtype: Flow version: '1' @@ -275,6 +286,7 @@ executors: jtype: CLIPEncoder with: name: '${MODEL_NAME}' + device: '${DEVICE}' metas: py_modules: - clip_server.executors.clip_torch -- libgit2 0.21.2