Commit 42e3aea637e1c1b1daeb37cd493da5cf2808a4c1
1 parent
d1d356f8
tidy
Showing
32 changed files
with
1073 additions
and
1903 deletions
Show diff stats
.env.example
| ... | ... | @@ -27,6 +27,9 @@ RERANKER_PORT=6007 |
| 27 | 27 | EMBEDDING_SERVICE_URL=http://127.0.0.1:6005 |
| 28 | 28 | TRANSLATION_SERVICE_URL=http://127.0.0.1:6006 |
| 29 | 29 | RERANKER_SERVICE_URL=http://127.0.0.1:6007/rerank |
| 30 | +TRANSLATION_PROVIDER=direct | |
| 31 | +TRANSLATION_MODEL=qwen | |
| 32 | +RERANK_PROVIDER=http | |
| 30 | 33 | |
| 31 | 34 | # Optional startup switches (run.sh / scripts/service_ctl.sh) |
| 32 | 35 | START_EMBEDDING=0 | ... | ... |
README.md
| ... | ... | @@ -68,32 +68,6 @@ query anchor |
| 68 | 68 | |
| 69 | 69 | |
| 70 | 70 | |
| 71 | -对外: | |
| 72 | -embedding服务: | |
| 73 | - curl -X POST http://43.166.252.75:6005/embed/text \ | |
| 74 | - -H "Content-Type: application/json" \ | |
| 75 | - -d '["衣服", "Bohemian Maxi Dress"]' | |
| 76 | - | |
| 77 | - | |
| 78 | -翻译服务: | |
| 79 | -# 方式1:直接运行 | |
| 80 | -python api/translator_app.py | |
| 81 | -# 方式2:使用 uvicorn | |
| 82 | -uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | |
| 83 | - | |
| 84 | -curl -X POST http://localhost:6006/translate -H "Content-Type: application/json" -d '{ | |
| 85 | - "text": "商品名称", | |
| 86 | - "target_lang": "en", | |
| 87 | - "source_lang": "zh" | |
| 88 | - }' | |
| 89 | - | |
| 90 | -localhost替换为 | |
| 91 | -服务器内网地址: | |
| 92 | -10.0.163.168 | |
| 93 | -公网地址: | |
| 94 | -43.166.252.75 | |
| 95 | - | |
| 96 | - | |
| 97 | 71 | # 电商搜索引擎 SaaS |
| 98 | 72 | |
| 99 | 73 | 一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。 |
| ... | ... | @@ -149,18 +123,16 @@ python scripts/recreate_and_import.py \ |
| 149 | 123 | - **可配置化**:字段/索引域/排序表达式/查询改写全部配置驱动 |
| 150 | 124 | - **脚本化流水线**:Mock/CSV 数据 → MySQL → Elasticsearch → API/前端 |
| 151 | 125 | |
| 152 | -## 快速上手(概览) | |
| 126 | +## 新人入口 | |
| 153 | 127 | |
| 154 | -| 步骤 | 去哪里看 | 摘要 | | |
| 155 | -|------|---------|------| | |
| 156 | -| 1. 准备环境 | `docs/环境配置说明.md` / `Usage-Guide.md` | Conda、`activate.sh`、依赖、ES/MySQL、`.env` | | |
| 157 | -| 2. 构造测试数据 | `测试数据指南.md` | Tenant1 Mock、Tenant2 CSV、`mock_data.sh` | | |
| 158 | -| 3. 启动与验证 | `Usage-Guide.md` | `run.sh` 一键启动、分步脚本、日志与健康检查 | | |
| 159 | -| 4. 理解架构 | `系统设计文档.md` | 数据流、配置系统、查询/搜索/索引模块 | | |
| 160 | -| 5. 接入搜索 API | `搜索API对接指南.md` / `搜索API速查表.md` | REST 端点、参数、响应、最佳实践 | | |
| 161 | -| 6. 查字段定义 | `索引字段说明.md` | `search_products` 映射、字段来源、类型与用途 | | |
| 128 | +**→ `docs/QUICKSTART.md`**:环境、服务、模块、请求示例一页搞定。 | |
| 162 | 129 | |
| 163 | -> README 仅保留最常用命令的“索引”。细节以主题文档为准。 | |
| 130 | +| 步骤 | 文档 | | |
| 131 | +|------|------| | |
| 132 | +| 1. 环境与启动 | `docs/QUICKSTART.md` | | |
| 133 | +| 2. 搜索/索引 API | `docs/QUICKSTART.md` §3、`docs/搜索API速查表.md` | | |
| 134 | +| 3. 运维与故障 | `docs/Usage-Guide.md` | | |
| 135 | +| 4. 架构与扩展 | `docs/PROVIDER_ARCHITECTURE.md`、`docs/系统设计文档.md` | | |
| 164 | 136 | |
| 165 | 137 | ### Runtimes & 命令示例 |
| 166 | 138 | |
| ... | ... | @@ -196,28 +168,17 @@ curl -X POST http://localhost:6002/search/ \ |
| 196 | 168 | -d '{"query": "玩具", "size": 10}' |
| 197 | 169 | ``` |
| 198 | 170 | |
| 199 | -## 文档地图 | |
| 200 | - | |
| 201 | -| 文档 | 内容提要 | 适用场景 | | |
| 202 | -|------|----------|----------| | |
| 203 | -| `docs/环境配置说明.md` | 系统要求、`activate.sh`、Conda/依赖、外部服务、CONDA_ROOT | 首次部署、新机器环境 | | |
| 204 | -| `docs/SERVICE_MATRIX.md` | 服务分层、端口、统一启动/停止入口 | 运维值守、联调启动 | | |
| 205 | -| `Usage-Guide.md` | 环境准备、服务启动、配置、日志、验证手册 | 日常运维、调试 | | |
| 206 | -| `基础配置指南.md` | 统一硬编码配置说明、索引结构、查询配置 | 了解系统配置、修改配置 | | |
| 207 | -| `测试数据指南.md` | 两个租户的模拟/CSV 数据构造 & MySQL→ES 流程 | 数据准备、联调 | | |
| 208 | -| `测试Pipeline说明.md` | 测试流水线、CI 脚本、上下文说明 | 自动化测试、追踪流水线 | | |
| 209 | -| `系统设计文档.md` | 架构、配置系统、索引/查询/排序模块细节 | 研发/扩展功能 | | |
| 210 | -| `索引字段说明v2.md` | `search_products` 字段、类型、来源、嵌套结构 | 新增字段、数据对齐 | | |
| 211 | -| `搜索API对接指南.md` | REST API(文本/图片/管理)详解、示例、响应格式 | API 使用、测试 | | |
| 212 | -| `搜索API速查表.md` | 常用请求体、过滤器、分面速查表 | 支持团队快速查阅 | | |
| 213 | -| `Search-API-Examples.md` | Python/JS/cURL 端到端示例 | 客户工程、SDK 参考 | | |
| 214 | -| `环境配置说明.md` + `.env` 模板 | 运行依赖账号、端口、密钥对照表 | 交付 & 运维 | | |
| 215 | - | |
| 216 | -更多补充材料: | |
| 217 | - | |
| 218 | -- `测试数据指南.md`:包含完整工作流脚本示例 | |
| 219 | -- `商品数据源入ES配置规范.md`:数据源映射约定 | |
| 220 | -- `MULTILANG_FEATURE.md`:多语言处理细节 | |
| 171 | +## 文档索引 | |
| 172 | + | |
| 173 | +| 文档 | 用途 | | |
| 174 | +|------|------| | |
| 175 | +| `docs/QUICKSTART.md` | **新人入口**:环境、服务、模块、请求 | | |
| 176 | +| `docs/Usage-Guide.md` | 运维:日志、多环境、故障排查 | | |
| 177 | +| `docs/搜索API速查表.md` | 搜索 API 参数速查 | | |
| 178 | +| `docs/搜索API对接指南.md` | 搜索 API 完整说明 | | |
| 179 | +| `docs/PROVIDER_ARCHITECTURE.md` | 翻译/向量/重排 provider 扩展 | | |
| 180 | +| `docs/环境配置说明.md` | 首次部署、新机器环境 | | |
| 181 | +| `docs/系统设计文档.md` | 架构与模块细节 | | |
| 221 | 182 | |
| 222 | 183 | ## 关键工作流指引 |
| 223 | 184 | ... | ... |
config/__init__.py
| ... | ... | @@ -21,6 +21,20 @@ from .utils import ( |
| 21 | 21 | get_match_fields_for_index, |
| 22 | 22 | get_domain_fields |
| 23 | 23 | ) |
| 24 | +from .service_endpoints import ( | |
| 25 | + resolve_translation_service_url, | |
| 26 | + resolve_embedding_service_url, | |
| 27 | + resolve_reranker_service_url, | |
| 28 | +) | |
| 29 | +from .services_config import ( | |
| 30 | + get_translation_config, | |
| 31 | + get_embedding_config, | |
| 32 | + get_rerank_config, | |
| 33 | + get_translation_base_url, | |
| 34 | + get_embedding_base_url, | |
| 35 | + get_rerank_service_url, | |
| 36 | + ServiceConfig, | |
| 37 | +) | |
| 24 | 38 | |
| 25 | 39 | __all__ = [ |
| 26 | 40 | # Main config classes |
| ... | ... | @@ -38,4 +52,14 @@ __all__ = [ |
| 38 | 52 | 'load_tenant_config', |
| 39 | 53 | 'get_match_fields_for_index', |
| 40 | 54 | 'get_domain_fields', |
| 55 | + 'resolve_translation_service_url', | |
| 56 | + 'resolve_embedding_service_url', | |
| 57 | + 'resolve_reranker_service_url', | |
| 58 | + 'get_translation_config', | |
| 59 | + 'get_embedding_config', | |
| 60 | + 'get_rerank_config', | |
| 61 | + 'get_translation_base_url', | |
| 62 | + 'get_embedding_base_url', | |
| 63 | + 'get_rerank_service_url', | |
| 64 | + 'ServiceConfig', | |
| 41 | 65 | ] | ... | ... |
config/config.yaml
| ... | ... | @@ -99,10 +99,10 @@ query_config: |
| 99 | 99 | chinese_char_limit: 4 |
| 100 | 100 | english_word_limit: 3 |
| 101 | 101 | |
| 102 | - # 翻译API配置 | |
| 102 | + # 翻译API配置(provider/URL 在 services.translation) | |
| 103 | 103 | translation_service: "deepl" |
| 104 | 104 | translation_api_key: null # 通过环境变量设置 |
| 105 | - | |
| 105 | + | |
| 106 | 106 | # 翻译提示词配置(用于提高翻译质量,作为DeepL API的context参数) |
| 107 | 107 | translation_prompts: |
| 108 | 108 | # 商品标题翻译提示词 |
| ... | ... | @@ -133,20 +133,57 @@ function_score: |
| 133 | 133 | boost_mode: "multiply" |
| 134 | 134 | functions: [] |
| 135 | 135 | |
| 136 | -# 重排配置(唯一实现:外部 BGE 重排服务,由请求参数 enable_rerank 控制是否执行) | |
| 137 | -# enable_rerank 且 from+size<=rerank_window 时:从 ES 取前 rerank_window 条、重排后再按 from/size 分页 | |
| 136 | +# 重排配置(provider/URL 在 services.rerank) | |
| 138 | 137 | rerank: |
| 139 | 138 | rerank_window: 1000 |
| 140 | - # service_url: "http://127.0.0.1:6007/rerank" # 可选,不填则用默认端口 6007 | |
| 141 | - timeout_sec: 15.0 # 文档多时重排耗时长,可按需调大 | |
| 139 | + timeout_sec: 15.0 | |
| 142 | 140 | weight_es: 0.4 |
| 143 | 141 | weight_ai: 0.6 |
| 144 | - # 模板:用于将搜索请求/文档字段组装成重排服务输入 | |
| 145 | - # - rerank_query_template:支持 {query} | |
| 146 | - # - rerank_doc_template:支持 {title} {brief} {vendor} {description} {category_path} | |
| 147 | 142 | rerank_query_template: "{query}" |
| 148 | 143 | rerank_doc_template: "{title}" |
| 149 | 144 | |
| 145 | +# 可扩展服务/provider 注册表(单一配置源) | |
| 146 | +services: | |
| 147 | + translation: | |
| 148 | + provider: "direct" # direct | http | google(reserved) | |
| 149 | + base_url: "http://127.0.0.1:6006" | |
| 150 | + model: "qwen" | |
| 151 | + timeout_sec: 10.0 | |
| 152 | + providers: | |
| 153 | + direct: | |
| 154 | + model: "qwen" | |
| 155 | + http: | |
| 156 | + base_url: "http://127.0.0.1:6006" | |
| 157 | + model: "qwen" | |
| 158 | + timeout_sec: 10.0 | |
| 159 | + google: | |
| 160 | + enabled: false | |
| 161 | + project_id: "" | |
| 162 | + location: "global" | |
| 163 | + model: "" | |
| 164 | + embedding: | |
| 165 | + provider: "http" # http | vllm(reserved) | |
| 166 | + base_url: "http://127.0.0.1:6005" | |
| 167 | + providers: | |
| 168 | + http: | |
| 169 | + base_url: "http://127.0.0.1:6005" | |
| 170 | + vllm: | |
| 171 | + enabled: false | |
| 172 | + base_url: "" | |
| 173 | + model: "" | |
| 174 | + note: "reserved for future vLLM embedding backend" | |
| 175 | + rerank: | |
| 176 | + provider: "http" # http | vllm(reserved) | |
| 177 | + base_url: "http://127.0.0.1:6007" | |
| 178 | + providers: | |
| 179 | + http: | |
| 180 | + base_url: "http://127.0.0.1:6007" | |
| 181 | + vllm: | |
| 182 | + enabled: false | |
| 183 | + base_url: "" | |
| 184 | + model: "" | |
| 185 | + note: "reserved for future vLLM reranker backend" | |
| 186 | + | |
| 150 | 187 | # SPU配置(已启用,使用嵌套skus) |
| 151 | 188 | spu_config: |
| 152 | 189 | enabled: true | ... | ... |
config/config_loader.py
| ... | ... | @@ -9,7 +9,6 @@ that define how search should be executed (NOT how data should be indexed). |
| 9 | 9 | """ |
| 10 | 10 | |
| 11 | 11 | import yaml |
| 12 | -import os | |
| 13 | 12 | from typing import Dict, Any, List, Optional |
| 14 | 13 | from dataclasses import dataclass, field |
| 15 | 14 | from pathlib import Path |
| ... | ... | @@ -39,12 +38,12 @@ class QueryConfig: |
| 39 | 38 | # Query rewrite dictionary (loaded from external file) |
| 40 | 39 | rewrite_dictionary: Dict[str, str] = field(default_factory=dict) |
| 41 | 40 | |
| 42 | - # Translation settings | |
| 41 | + # Translation settings (provider/URL in services.translation) | |
| 43 | 42 | translation_service: str = "deepl" |
| 44 | 43 | translation_api_key: Optional[str] = None |
| 45 | 44 | translation_glossary_id: Optional[str] = None |
| 46 | 45 | translation_context: str = "e-commerce product search" |
| 47 | - translation_prompts: Dict[str, str] = field(default_factory=dict) # Translation prompts for different use cases | |
| 46 | + translation_prompts: Dict[str, str] = field(default_factory=dict) | |
| 48 | 47 | |
| 49 | 48 | # Embedding field names |
| 50 | 49 | text_embedding_field: Optional[str] = "title_embedding" |
| ... | ... | @@ -88,17 +87,11 @@ class RankingConfig: |
| 88 | 87 | |
| 89 | 88 | @dataclass |
| 90 | 89 | class RerankConfig: |
| 91 | - """重排配置(唯一实现:调用外部 BGE 重排服务,由请求参数 enable_rerank 控制是否执行)""" | |
| 92 | - # 重排窗口:enable_rerank 且 from+size<=rerank_window 时,从 ES 取前 rerank_window 条重排后再分页 | |
| 90 | + """重排配置(provider/URL 在 services.rerank)""" | |
| 93 | 91 | rerank_window: int = 1000 |
| 94 | - # 可选:重排服务 URL,为空时使用 reranker 模块默认端口 6007 | |
| 95 | - service_url: Optional[str] = None | |
| 96 | 92 | timeout_sec: float = 15.0 |
| 97 | 93 | weight_es: float = 0.4 |
| 98 | 94 | weight_ai: float = 0.6 |
| 99 | - # 模板:用于将搜索请求/文档字段组装成重排服务输入 | |
| 100 | - # - rerank_query_template:支持 {query} | |
| 101 | - # - rerank_doc_template:支持 {title} {brief} {vendor} {description} {category_path} | |
| 102 | 95 | rerank_query_template: str = "{query}" |
| 103 | 96 | rerank_doc_template: str = "{title}" |
| 104 | 97 | |
| ... | ... | @@ -136,6 +129,8 @@ class SearchConfig: |
| 136 | 129 | |
| 137 | 130 | # ES settings |
| 138 | 131 | es_settings: Dict[str, Any] = field(default_factory=dict) |
| 132 | + # Extensible service/provider registry (translation/embedding/rerank/...) | |
| 133 | + services: Dict[str, Any] = field(default_factory=dict) | |
| 139 | 134 | |
| 140 | 135 | |
| 141 | 136 | class ConfigurationError(Exception): |
| ... | ... | @@ -231,13 +226,10 @@ class ConfigLoader: |
| 231 | 226 | |
| 232 | 227 | # Parse query config |
| 233 | 228 | query_config_data = config_data.get("query_config", {}) |
| 234 | - | |
| 235 | - # Load rewrite dictionary from external file | |
| 229 | + services_data = config_data.get("services", {}) if isinstance(config_data.get("services", {}), dict) else {} | |
| 236 | 230 | rewrite_dictionary = self._load_rewrite_dictionary() |
| 237 | - | |
| 238 | - # Parse embedding disable thresholds | |
| 239 | 231 | embedding_thresholds = query_config_data.get("embedding_disable_thresholds", {}) |
| 240 | - | |
| 232 | + | |
| 241 | 233 | query_config = QueryConfig( |
| 242 | 234 | supported_languages=query_config_data.get("supported_languages") or ["zh", "en"], |
| 243 | 235 | default_language=query_config_data.get("default_language") or "en", |
| ... | ... | @@ -272,11 +264,10 @@ class ConfigLoader: |
| 272 | 264 | functions=fs_data.get("functions") or [] |
| 273 | 265 | ) |
| 274 | 266 | |
| 275 | - # Parse Rerank configuration(唯一实现:外部重排服务,由 enable_rerank 控制) | |
| 267 | + # Parse Rerank (provider/URL in services.rerank) | |
| 276 | 268 | rerank_data = config_data.get("rerank", {}) |
| 277 | 269 | rerank = RerankConfig( |
| 278 | 270 | rerank_window=int(rerank_data.get("rerank_window", 1000)), |
| 279 | - service_url=rerank_data.get("service_url") or None, | |
| 280 | 271 | timeout_sec=float(rerank_data.get("timeout_sec", 15.0)), |
| 281 | 272 | weight_es=float(rerank_data.get("weight_es", 0.4)), |
| 282 | 273 | weight_ai=float(rerank_data.get("weight_ai", 0.6)), |
| ... | ... | @@ -306,7 +297,8 @@ class ConfigLoader: |
| 306 | 297 | spu_config=spu_config, |
| 307 | 298 | tenant_config=tenant_config_data, |
| 308 | 299 | es_index_name=config_data.get("es_index_name", "search_products"), |
| 309 | - es_settings=config_data.get("es_settings", {}) | |
| 300 | + es_settings=config_data.get("es_settings", {}), | |
| 301 | + services=services_data | |
| 310 | 302 | ) |
| 311 | 303 | |
| 312 | 304 | def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig: |
| ... | ... | @@ -374,7 +366,7 @@ class ConfigLoader: |
| 374 | 366 | f"Default language '{config.query_config.default_language}' " |
| 375 | 367 | f"not in supported languages: {config.query_config.supported_languages}" |
| 376 | 368 | ) |
| 377 | - | |
| 369 | + | |
| 378 | 370 | return errors |
| 379 | 371 | |
| 380 | 372 | def to_dict(self, config: SearchConfig) -> Dict[str, Any]: |
| ... | ... | @@ -413,7 +405,6 @@ class ConfigLoader: |
| 413 | 405 | }, |
| 414 | 406 | "rerank": { |
| 415 | 407 | "rerank_window": config.rerank.rerank_window, |
| 416 | - "service_url": config.rerank.service_url, | |
| 417 | 408 | "timeout_sec": config.rerank.timeout_sec, |
| 418 | 409 | "weight_es": config.rerank.weight_es, |
| 419 | 410 | "weight_ai": config.rerank.weight_ai, |
| ... | ... | @@ -425,7 +416,8 @@ class ConfigLoader: |
| 425 | 416 | "spu_field": config.spu_config.spu_field, |
| 426 | 417 | "inner_hits_size": config.spu_config.inner_hits_size, |
| 427 | 418 | "searchable_option_dimensions": config.spu_config.searchable_option_dimensions |
| 428 | - } | |
| 419 | + }, | |
| 420 | + "services": config.services, | |
| 429 | 421 | } |
| 430 | 422 | |
| 431 | 423 | def _index_to_dict(self, index: IndexConfig) -> Dict[str, Any]: | ... | ... |
config/env_config.py
| ... | ... | @@ -63,8 +63,11 @@ EMBEDDING_HOST = os.getenv('EMBEDDING_HOST', '127.0.0.1') |
| 63 | 63 | EMBEDDING_PORT = int(os.getenv('EMBEDDING_PORT', 6005)) |
| 64 | 64 | TRANSLATION_HOST = os.getenv('TRANSLATION_HOST', '127.0.0.1') |
| 65 | 65 | TRANSLATION_PORT = int(os.getenv('TRANSLATION_PORT', os.getenv('TRANSLATOR_PORT', 6006))) |
| 66 | +TRANSLATION_PROVIDER = os.getenv('TRANSLATION_PROVIDER', 'direct') | |
| 67 | +TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'qwen') | |
| 66 | 68 | RERANKER_HOST = os.getenv('RERANKER_HOST', '127.0.0.1') |
| 67 | 69 | RERANKER_PORT = int(os.getenv('RERANKER_PORT', 6007)) |
| 70 | +RERANK_PROVIDER = os.getenv('RERANK_PROVIDER', 'http') | |
| 68 | 71 | # API_BASE_URL: 如果未设置,根据API_HOST构建(0.0.0.0使用localhost) |
| 69 | 72 | API_BASE_URL = os.getenv('API_BASE_URL') |
| 70 | 73 | if not API_BASE_URL: | ... | ... |
| ... | ... | @@ -0,0 +1,12 @@ |
| 1 | +""" | |
| 2 | +Endpoint resolvers - delegate to services_config. | |
| 3 | + | |
| 4 | +Deprecated: use config.services_config directly. | |
| 5 | +Kept for backward compatibility. | |
| 6 | +""" | |
| 7 | + | |
| 8 | +from .services_config import ( | |
| 9 | + get_translation_base_url as resolve_translation_service_url, | |
| 10 | + get_embedding_base_url as resolve_embedding_service_url, | |
| 11 | + get_rerank_service_url as resolve_reranker_service_url, | |
| 12 | +) | ... | ... |
| ... | ... | @@ -0,0 +1,169 @@ |
| 1 | +""" | |
| 2 | +Services configuration - single source for translation, embedding, rerank providers. | |
| 3 | + | |
| 4 | +All provider selection and endpoint resolution is centralized here. | |
| 5 | +Priority: env vars > config.yaml > defaults. | |
| 6 | +""" | |
| 7 | + | |
| 8 | +from __future__ import annotations | |
| 9 | + | |
| 10 | +import os | |
| 11 | +from dataclasses import dataclass, field | |
| 12 | +from functools import lru_cache | |
| 13 | +from pathlib import Path | |
| 14 | +from typing import Any, Dict, Optional | |
| 15 | + | |
| 16 | +import yaml | |
| 17 | + | |
| 18 | + | |
| 19 | +@dataclass | |
| 20 | +class ServiceConfig: | |
| 21 | + """Config for one capability (translation/embedding/rerank).""" | |
| 22 | + provider: str | |
| 23 | + providers: Dict[str, Any] = field(default_factory=dict) | |
| 24 | + | |
| 25 | + def get_provider_cfg(self) -> Dict[str, Any]: | |
| 26 | + """Get config for current provider.""" | |
| 27 | + p = (self.provider or "").strip().lower() | |
| 28 | + return self.providers.get(p, {}) if isinstance(self.providers, dict) else {} | |
| 29 | + | |
| 30 | + | |
| 31 | +def _load_services_raw(config_path: Optional[Path] = None) -> Dict[str, Any]: | |
| 32 | + """Load services block from config.yaml.""" | |
| 33 | + if config_path is None: | |
| 34 | + config_path = Path(__file__).parent / "config.yaml" | |
| 35 | + path = Path(config_path) | |
| 36 | + if not path.exists(): | |
| 37 | + return {} | |
| 38 | + try: | |
| 39 | + with open(path, "r", encoding="utf-8") as f: | |
| 40 | + data = yaml.safe_load(f) | |
| 41 | + except Exception: | |
| 42 | + return {} | |
| 43 | + services = data.get("services") if isinstance(data, dict) else {} | |
| 44 | + return services if isinstance(services, dict) else {} | |
| 45 | + | |
| 46 | + | |
| 47 | +def _resolve_translation() -> ServiceConfig: | |
| 48 | + raw = _load_services_raw() | |
| 49 | + cfg = raw.get("translation", {}) if isinstance(raw.get("translation"), dict) else {} | |
| 50 | + providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} | |
| 51 | + | |
| 52 | + provider = ( | |
| 53 | + os.getenv("TRANSLATION_PROVIDER") | |
| 54 | + or cfg.get("provider") | |
| 55 | + or "direct" | |
| 56 | + ) | |
| 57 | + provider = str(provider).strip().lower() | |
| 58 | + | |
| 59 | + # Env override for http base_url | |
| 60 | + env_url = os.getenv("TRANSLATION_SERVICE_URL") | |
| 61 | + if env_url and provider == "http": | |
| 62 | + providers = dict(providers) | |
| 63 | + providers["http"] = dict(providers.get("http", {})) | |
| 64 | + providers["http"]["base_url"] = env_url.rstrip("/") | |
| 65 | + | |
| 66 | + return ServiceConfig(provider=provider, providers=providers) | |
| 67 | + | |
| 68 | + | |
| 69 | +def _resolve_embedding() -> ServiceConfig: | |
| 70 | + raw = _load_services_raw() | |
| 71 | + cfg = raw.get("embedding", {}) if isinstance(raw.get("embedding"), dict) else {} | |
| 72 | + providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} | |
| 73 | + | |
| 74 | + provider = ( | |
| 75 | + os.getenv("EMBEDDING_PROVIDER") | |
| 76 | + or cfg.get("provider") | |
| 77 | + or "http" | |
| 78 | + ) | |
| 79 | + provider = str(provider).strip().lower() | |
| 80 | + | |
| 81 | + env_url = os.getenv("EMBEDDING_SERVICE_URL") | |
| 82 | + if env_url and provider == "http": | |
| 83 | + providers = dict(providers) | |
| 84 | + providers["http"] = dict(providers.get("http", {})) | |
| 85 | + providers["http"]["base_url"] = env_url.rstrip("/") | |
| 86 | + | |
| 87 | + return ServiceConfig(provider=provider, providers=providers) | |
| 88 | + | |
| 89 | + | |
| 90 | +def _resolve_rerank() -> ServiceConfig: | |
| 91 | + raw = _load_services_raw() | |
| 92 | + cfg = raw.get("rerank", {}) if isinstance(raw.get("rerank"), dict) else {} | |
| 93 | + providers = cfg.get("providers", {}) if isinstance(cfg.get("providers"), dict) else {} | |
| 94 | + | |
| 95 | + provider = ( | |
| 96 | + os.getenv("RERANK_PROVIDER") | |
| 97 | + or cfg.get("provider") | |
| 98 | + or "http" | |
| 99 | + ) | |
| 100 | + provider = str(provider).strip().lower() | |
| 101 | + | |
| 102 | + env_url = os.getenv("RERANKER_SERVICE_URL") | |
| 103 | + if env_url: | |
| 104 | + url = env_url.rstrip("/") | |
| 105 | + if not url.endswith("/rerank"): | |
| 106 | + url = f"{url}/rerank" if "/rerank" not in url else url | |
| 107 | + providers = dict(providers) | |
| 108 | + providers["http"] = dict(providers.get("http", {})) | |
| 109 | + providers["http"]["base_url"] = url.replace("/rerank", "") | |
| 110 | + providers["http"]["service_url"] = url | |
| 111 | + | |
| 112 | + return ServiceConfig(provider=provider, providers=providers) | |
| 113 | + | |
| 114 | + | |
| 115 | +@lru_cache(maxsize=1) | |
| 116 | +def get_translation_config() -> ServiceConfig: | |
| 117 | + """Get translation service config.""" | |
| 118 | + return _resolve_translation() | |
| 119 | + | |
| 120 | + | |
| 121 | +@lru_cache(maxsize=1) | |
| 122 | +def get_embedding_config() -> ServiceConfig: | |
| 123 | + """Get embedding service config.""" | |
| 124 | + return _resolve_embedding() | |
| 125 | + | |
| 126 | + | |
| 127 | +@lru_cache(maxsize=1) | |
| 128 | +def get_rerank_config() -> ServiceConfig: | |
| 129 | + """Get rerank service config.""" | |
| 130 | + return _resolve_rerank() | |
| 131 | + | |
| 132 | + | |
| 133 | +def get_translation_base_url() -> str: | |
| 134 | + """Resolve translation HTTP base URL (for http provider).""" | |
| 135 | + base = ( | |
| 136 | + os.getenv("TRANSLATION_SERVICE_URL") | |
| 137 | + or get_translation_config().providers.get("http", {}).get("base_url") | |
| 138 | + or "http://127.0.0.1:6006" | |
| 139 | + ) | |
| 140 | + return str(base).rstrip("/") | |
| 141 | + | |
| 142 | + | |
| 143 | +def get_embedding_base_url() -> str: | |
| 144 | + """Resolve embedding HTTP base URL.""" | |
| 145 | + base = ( | |
| 146 | + os.getenv("EMBEDDING_SERVICE_URL") | |
| 147 | + or get_embedding_config().providers.get("http", {}).get("base_url") | |
| 148 | + or "http://127.0.0.1:6005" | |
| 149 | + ) | |
| 150 | + return str(base).rstrip("/") | |
| 151 | + | |
| 152 | + | |
| 153 | +def get_rerank_service_url() -> str: | |
| 154 | + """Resolve rerank service URL (full path including /rerank).""" | |
| 155 | + base = ( | |
| 156 | + os.getenv("RERANKER_SERVICE_URL") | |
| 157 | + or get_rerank_config().providers.get("http", {}).get("service_url") | |
| 158 | + or get_rerank_config().providers.get("http", {}).get("base_url") | |
| 159 | + or "http://127.0.0.1:6007" | |
| 160 | + ) | |
| 161 | + base = str(base).rstrip("/") | |
| 162 | + return base if base.endswith("/rerank") else f"{base}/rerank" | |
| 163 | + | |
| 164 | + | |
| 165 | +def clear_services_cache() -> None: | |
| 166 | + """Clear cached config (for tests).""" | |
| 167 | + get_translation_config.cache_clear() | |
| 168 | + get_embedding_config.cache_clear() | |
| 169 | + get_rerank_config.cache_clear() | ... | ... |
| ... | ... | @@ -0,0 +1,202 @@ |
| 1 | +# 能力提供者架构评估与统一改造方案 | |
| 2 | + | |
| 3 | +> **已落地**。实现见 `providers/`、`config/services_config.py`。使用与扩展见 `docs/PROVIDER_ARCHITECTURE.md`。 | |
| 4 | + | |
| 5 | +--- | |
| 6 | + | |
| 7 | +## 一、当前状态梳理 | |
| 8 | + | |
| 9 | +### 1.1 两种“可插拔”的辨析 | |
| 10 | + | |
| 11 | +| 模式 | 含义 | 当前是否存在 | | |
| 12 | +|------|------|--------------| | |
| 13 | +| **提供者内部可选择** | 某个 provider(如翻译)内部封装多种实现(如 qwen/deepl),内部切换 | 部分存在:`direct` 的 Translator 内部可选 qwen/deepl | | |
| 14 | +| **平台级多 provider** | 平台定义能力抽象,多个独立 provider 注册,通过配置切换 | 存在:translation 的 direct/http,rerank 的 http/vllm | | |
| 15 | + | |
| 16 | +**结论**:当前是 **平台级可插拔** 为主,但实现不统一、配置分散,造成混乱。 | |
| 17 | + | |
| 18 | +### 1.2 三种能力的实现对比 | |
| 19 | + | |
| 20 | +| 能力 | 抽象层 | Provider 实现 | 配置来源 | 问题 | | |
| 21 | +|------|--------|---------------|----------|------| | |
| 22 | +| **翻译** | `create_translation_client()` | direct, http | `query_config` + `services.translation` | 双重配置源,优先级链复杂 | | |
| 23 | +| **重排** | `create_rerank_client()` | http, vllm(reserved) | `rerank` 块 + `services.rerank` | 同上 | | |
| 24 | +| **向量化** | 无 | 仅 HTTP 直连 | `service_endpoints` 读 `services.embedding` | 无 provider 抽象,只有 endpoint 解析 | | |
| 25 | + | |
| 26 | +### 1.3 配置分散问题 | |
| 27 | + | |
| 28 | +``` | |
| 29 | +config.yaml 中: | |
| 30 | +├── query_config.translation_provider | |
| 31 | +├── query_config.translation_providers | |
| 32 | +├── query_config.translation_service_url | |
| 33 | +├── rerank.rerank_provider | |
| 34 | +├── rerank.rerank_providers | |
| 35 | +├── rerank.service_url | |
| 36 | +└── services.{translation,embedding,rerank} # 又一整套 | |
| 37 | +``` | |
| 38 | + | |
| 39 | +config_loader 用冗长的优先级链合并(env > query_config > services > defaults),维护成本高。 | |
| 40 | + | |
| 41 | +--- | |
| 42 | + | |
| 43 | +## 二、统一架构原则 | |
| 44 | + | |
| 45 | +### 2.1 设计目标 | |
| 46 | + | |
| 47 | +1. **单一配置源**:每种能力只在一个地方配置 | |
| 48 | +2. **统一抽象模式**:translation / embedding / rerank 采用相同结构 | |
| 49 | +3. **平台级可插拔**:能力 = 接口 + 多 provider 实现,通过配置切换 | |
| 50 | +4. **丢弃历史包袱**:移除冗余配置、合并重复逻辑 | |
| 51 | + | |
| 52 | +### 2.2 推荐方案:平台级 Provider Registry | |
| 53 | + | |
| 54 | +**核心思想**:平台定义“能力”,每种能力有统一接口;多个 provider 实现该接口;配置只在一个地方。 | |
| 55 | + | |
| 56 | +``` | |
| 57 | +┌─────────────────────────────────────────────────────────────┐ | |
| 58 | +│ Platform (Search Engine) │ | |
| 59 | +├─────────────────────────────────────────────────────────────┤ | |
| 60 | +│ Capability: Translation Embedding Rerank │ | |
| 61 | +│ │ │ │ │ │ | |
| 62 | +│ ▼ ▼ ▼ ▼ │ | |
| 63 | +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ | |
| 64 | +│ │ direct │ │ http │ │ http │ ← Provider 实现 │ | |
| 65 | +│ │ http │ │ vllm │ │ vllm │ (可扩展) │ | |
| 66 | +│ │ google │ │ │ │ │ │ | |
| 67 | +│ └─────────┘ └─────────┘ └─────────┘ │ | |
| 68 | +│ ▲ ▲ ▲ │ | |
| 69 | +│ └────────────┴────────────┴── 统一从 services.* 读取 │ | |
| 70 | +└─────────────────────────────────────────────────────────────┘ | |
| 71 | +``` | |
| 72 | + | |
| 73 | +--- | |
| 74 | + | |
| 75 | +## 三、统一改造方案 | |
| 76 | + | |
| 77 | +### 3.1 配置结构(单一源) | |
| 78 | + | |
| 79 | +**只保留 `services` 块**,移除 query_config / rerank 中的 provider 相关字段: | |
| 80 | + | |
| 81 | +```yaml | |
| 82 | +services: | |
| 83 | + translation: | |
| 84 | + provider: "direct" # 当前使用的 provider | |
| 85 | + providers: | |
| 86 | + direct: | |
| 87 | + model: "qwen" | |
| 88 | + http: | |
| 89 | + base_url: "http://127.0.0.1:6006" | |
| 90 | + model: "qwen" | |
| 91 | + timeout_sec: 10.0 | |
| 92 | + google: | |
| 93 | + enabled: false | |
| 94 | + # ... | |
| 95 | + | |
| 96 | + embedding: | |
| 97 | + provider: "http" | |
| 98 | + providers: | |
| 99 | + http: | |
| 100 | + base_url: "http://127.0.0.1:6005" | |
| 101 | + vllm: | |
| 102 | + enabled: false | |
| 103 | + # ... | |
| 104 | + | |
| 105 | + rerank: | |
| 106 | + provider: "http" | |
| 107 | + providers: | |
| 108 | + http: | |
| 109 | + base_url: "http://127.0.0.1:6007" | |
| 110 | + service_path: "/rerank" | |
| 111 | + vllm: | |
| 112 | + enabled: false | |
| 113 | + # ... | |
| 114 | +``` | |
| 115 | + | |
| 116 | +**环境变量**(部署态覆盖)保持简洁: | |
| 117 | +- `TRANSLATION_PROVIDER`, `TRANSLATION_SERVICE_URL` | |
| 118 | +- `EMBEDDING_PROVIDER`, `EMBEDDING_SERVICE_URL` | |
| 119 | +- `RERANK_PROVIDER`, `RERANKER_SERVICE_URL` | |
| 120 | + | |
| 121 | +### 3.2 统一 Provider 创建入口 | |
| 122 | + | |
| 123 | +新建 `providers/` 模块,统一工厂: | |
| 124 | + | |
| 125 | +```python | |
| 126 | +# providers/__init__.py | |
| 127 | +def create_translation_provider(config: ServicesConfig) -> TranslationProvider | |
| 128 | +def create_embedding_provider(config: ServicesConfig) -> EmbeddingProvider | |
| 129 | +def create_rerank_provider(config: ServicesConfig) -> RerankProvider | |
| 130 | +``` | |
| 131 | + | |
| 132 | +每个 factory 从 `config.services["translation"]` 等读取,不再从 query_config / rerank 块读取。 | |
| 133 | + | |
| 134 | +### 3.3 能力接口(Protocol) | |
| 135 | + | |
| 136 | +```python | |
| 137 | +# providers/base.py | |
| 138 | +class TranslationProvider(Protocol): | |
| 139 | + def translate(self, text, target_lang, ...) -> Optional[str]: ... | |
| 140 | + def translate_for_indexing(self, ...) -> Dict[str, Optional[str]]: ... | |
| 141 | + | |
| 142 | +class EmbeddingProvider(Protocol): | |
| 143 | + def encode_text(self, texts: List[str]) -> np.ndarray: ... | |
| 144 | + def encode_image(self, url: str) -> Optional[np.ndarray]: ... | |
| 145 | + | |
| 146 | +class RerankProvider(Protocol): | |
| 147 | + def rerank(self, query: str, docs: List[str], timeout: float) -> Tuple[Optional[List[float]], ...]: ... | |
| 148 | +``` | |
| 149 | + | |
| 150 | +### 3.4 迁移步骤 | |
| 151 | + | |
| 152 | +| 步骤 | 内容 | | |
| 153 | +|------|------| | |
| 154 | +| 1 | 新建 `config/services_config.py`,定义 `ServicesConfig`,只从 `services` 块加载 | | |
| 155 | +| 2 | 新建 `providers/` 目录,实现 `create_*_provider()`,迁移 translation/rerank 逻辑 | | |
| 156 | +| 3 | 为 embedding 增加 provider 抽象(HttpEmbeddingProvider),封装 BgeEncoder/CLIPImageEncoder 的 HTTP 调用 | | |
| 157 | +| 4 | 从 `query_config` 移除 translation_provider/providers/service_url 等 | | |
| 158 | +| 5 | 从 `rerank` 块移除 rerank_provider/providers/service_url 等 | | |
| 159 | +| 6 | 精简 `config_loader.py`,删除冗长的 provider 合并逻辑 | | |
| 160 | +| 7 | 更新 `config.yaml`,删除重复配置 | | |
| 161 | +| 8 | 调用方改为使用 `create_*_provider(services_config)` | | |
| 162 | + | |
| 163 | +--- | |
| 164 | + | |
| 165 | +## 四、回答核心问题 | |
| 166 | + | |
| 167 | +### Q1: 可插拔是提供者内部可选择,还是平台多 provider? | |
| 168 | + | |
| 169 | +**答**:采用 **平台级多 provider**。每种能力(translation/embedding/rerank)在平台层定义接口,多个独立 provider 实现该接口,通过配置切换。提供者内部(如 direct 的 qwen/deepl)可作为该 provider 的子选项,但不作为平台级扩展点。 | |
| 170 | + | |
| 171 | +### Q2: 现在是两者都有吗? | |
| 172 | + | |
| 173 | +**答**:之前是混合状态——配置分散、三种能力实现不一致。改造后 **只保留平台级可插拔**,结构统一。 | |
| 174 | + | |
| 175 | +### Q3: 如何减少混乱、架构清晰? | |
| 176 | + | |
| 177 | +**答**: | |
| 178 | +1. **单一配置源**:`services.{translation,embedding,rerank}` | |
| 179 | +2. **统一模式**:每种能力 = Protocol + factory + 多 provider 实现 | |
| 180 | +3. **丢弃冗余**:删除 query_config/rerank 中的 provider 配置,删除 service_endpoints 中的重复解析逻辑 | |
| 181 | + | |
| 182 | +--- | |
| 183 | + | |
| 184 | +## 五、改造后的目录结构 | |
| 185 | + | |
| 186 | +``` | |
| 187 | +providers/ | |
| 188 | +├── __init__.py # create_*_provider 导出 | |
| 189 | +├── base.py # Protocol 定义 | |
| 190 | +├── translation/ | |
| 191 | +│ ├── direct.py # 进程内 Translator | |
| 192 | +│ ├── http.py # HttpTranslationClient | |
| 193 | +│ └── ... | |
| 194 | +├── embedding/ | |
| 195 | +│ ├── http.py # HttpEmbeddingProvider (封装 BgeEncoder/CLIP 的 HTTP) | |
| 196 | +│ └── vllm.py # reserved | |
| 197 | +└── rerank/ | |
| 198 | + ├── http.py # HttpRerankClient | |
| 199 | + └── vllm.py # reserved | |
| 200 | +``` | |
| 201 | + | |
| 202 | +调用方(query_parser, searcher, indexer)只依赖 `providers.create_*_provider(services_config)`,不关心具体实现。 | ... | ... |
docs/CNCLIP_SERVICE说明文档.md
| ... | ... | @@ -0,0 +1,61 @@ |
| 1 | +# Provider 架构与扩展指南 | |
| 2 | + | |
| 3 | +本文档说明如何统一管理翻译、向量化、重排等“能力提供者(provider)”。 | |
| 4 | + | |
| 5 | +## 1. 设计目标 | |
| 6 | + | |
| 7 | +- **调用方稳定**:业务代码不关心具体供应商,只调用统一接口。 | |
| 8 | +- **配置可切换**:通过配置切换 provider,不改业务代码。 | |
| 9 | +- **单一配置源**:所有 provider 配置在 `config/config.yaml` 的 `services` 块。 | |
| 10 | + | |
| 11 | +## 2. 当前落地状态 | |
| 12 | + | |
| 13 | +### 2.1 统一入口 | |
| 14 | + | |
| 15 | +- **模块**:`providers/` | |
| 16 | +- **工厂**:`create_translation_provider()`, `create_rerank_provider()`, `create_embedding_provider()` | |
| 17 | +- **配置**:`config/services_config.py` 从 `services` 块加载,env 可覆盖 | |
| 18 | + | |
| 19 | +### 2.2 翻译 | |
| 20 | + | |
| 21 | +- `providers/translation.py`:`direct`(进程内 Translator)、`http`(HTTP 服务) | |
| 22 | +- 调用方:`query/query_parser.py`, `indexer/indexing_utils.py` | |
| 23 | + | |
| 24 | +### 2.3 重排 | |
| 25 | + | |
| 26 | +- `providers/rerank.py`:`http`(vllm 预留) | |
| 27 | +- 调用方:`search/rerank_client.py` → `run_rerank()` | |
| 28 | + | |
| 29 | +### 2.4 向量化 | |
| 30 | + | |
| 31 | +- `providers/embedding.py`:`http`(vllm 预留) | |
| 32 | +- 封装 `BgeEncoder` / `CLIPImageEncoder`,URL 来自 `services_config` | |
| 33 | + | |
| 34 | +## 3. 配置 | |
| 35 | + | |
| 36 | +**单一配置源**:`config/config.yaml` 的 `services` 块。 | |
| 37 | + | |
| 38 | +```yaml | |
| 39 | +services: | |
| 40 | + translation: | |
| 41 | + provider: "direct" # direct | http | |
| 42 | + providers: | |
| 43 | + direct: { model: "qwen" } | |
| 44 | + http: { base_url: "http://127.0.0.1:6006", model: "qwen", timeout_sec: 10.0 } | |
| 45 | + embedding: | |
| 46 | + provider: "http" | |
| 47 | + providers: | |
| 48 | + http: { base_url: "http://127.0.0.1:6005" } | |
| 49 | + rerank: | |
| 50 | + provider: "http" | |
| 51 | + providers: | |
| 52 | + http: { base_url: "http://127.0.0.1:6007" } | |
| 53 | +``` | |
| 54 | + | |
| 55 | +**环境变量**(部署态覆盖):`TRANSLATION_PROVIDER`, `TRANSLATION_SERVICE_URL`, `EMBEDDING_SERVICE_URL`, `RERANKER_SERVICE_URL` | |
| 56 | + | |
| 57 | +## 4. 新增 provider | |
| 58 | + | |
| 59 | +1. 在 `providers/<capability>.py` 中实现新 provider 类 | |
| 60 | +2. 在 `create_*_provider()` 中注册分支 | |
| 61 | +3. 在 `config/config.yaml` 的 `services.<capability>.providers` 中补充参数 | ... | ... |
| ... | ... | @@ -0,0 +1,122 @@ |
| 1 | +# 开发者快速上手 | |
| 2 | + | |
| 3 | +新人入口文档:环境、服务、模块、请求示例一页搞定。 | |
| 4 | + | |
| 5 | +## 1. 环境 | |
| 6 | + | |
| 7 | +```bash | |
| 8 | +source activate.sh | |
| 9 | +# 首次:./scripts/create_venv.sh 或 conda env create -f environment.yml | |
| 10 | +``` | |
| 11 | + | |
| 12 | +依赖:Python 3.8+、Elasticsearch 8.x、MySQL、Redis(可选)。详见 `docs/环境配置说明.md`。 | |
| 13 | + | |
| 14 | +## 2. 服务与端口 | |
| 15 | + | |
| 16 | +| 服务 | 端口 | 默认启动 | 说明 | | |
| 17 | +|------|-----:|:--------:|------| | |
| 18 | +| backend | 6002 | ✓ | 搜索 API | | |
| 19 | +| indexer | 6004 | ✓ | 索引 API | | |
| 20 | +| frontend | 6003 | ✓ | 调试 UI | | |
| 21 | +| embedding | 6005 | - | 向量服务 | | |
| 22 | +| translator | 6006 | - | 翻译服务 | | |
| 23 | +| reranker | 6007 | - | 重排服务 | | |
| 24 | + | |
| 25 | +```bash | |
| 26 | +./run.sh | |
| 27 | +# 全功能:START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | |
| 28 | +./scripts/service_ctl.sh status | |
| 29 | +./scripts/stop.sh | |
| 30 | +``` | |
| 31 | + | |
| 32 | +## 3. 模块与请求 | |
| 33 | + | |
| 34 | +### 3.1 搜索 API(backend 6002) | |
| 35 | + | |
| 36 | +```bash | |
| 37 | +# 文本搜索 | |
| 38 | +curl -X POST http://localhost:6002/search/ \ | |
| 39 | + -H "Content-Type: application/json" \ | |
| 40 | + -H "X-Tenant-ID: 162" \ | |
| 41 | + -d '{"query": "玩具", "size": 10}' | |
| 42 | + | |
| 43 | +# 图片搜索 | |
| 44 | +curl -X POST http://localhost:6002/search/image \ | |
| 45 | + -H "Content-Type: application/json" \ | |
| 46 | + -H "X-Tenant-ID: 162" \ | |
| 47 | + -d '{"image_url": "https://example.com/img.jpg", "size": 10}' | |
| 48 | + | |
| 49 | +# 建议 | |
| 50 | +curl "http://localhost:6002/search/suggestions?q=玩&size=5" -H "X-Tenant-ID: 162" | |
| 51 | +``` | |
| 52 | + | |
| 53 | +API 文档:http://localhost:6002/docs | |
| 54 | + | |
| 55 | +### 3.2 索引 API(indexer 6004) | |
| 56 | + | |
| 57 | +```bash | |
| 58 | +# 创建租户索引 | |
| 59 | +./scripts/create_tenant_index.sh 162 | |
| 60 | + | |
| 61 | +# 全量索引 | |
| 62 | +curl -X POST http://localhost:6004/indexer/reindex \ | |
| 63 | + -H "Content-Type: application/json" \ | |
| 64 | + -d '{"tenant_id": "162", "batch_size": 500}' | |
| 65 | + | |
| 66 | +# 构建文档(不写 ES,供上游调用) | |
| 67 | +curl -X POST http://localhost:6004/indexer/build-docs \ | |
| 68 | + -H "Content-Type: application/json" \ | |
| 69 | + -d '{"tenant_id": "162", "items": [{"spu": {...}, "skus": [...], "options": [...]}]}' | |
| 70 | +``` | |
| 71 | + | |
| 72 | +### 3.3 向量服务(embedding 6005) | |
| 73 | + | |
| 74 | +```bash | |
| 75 | +./scripts/start_embedding_service.sh | |
| 76 | + | |
| 77 | +# 文本向量 | |
| 78 | +curl -X POST http://localhost:6005/embed/text \ | |
| 79 | + -H "Content-Type: application/json" \ | |
| 80 | + -d '["衣服", "Bohemian Maxi Dress"]' | |
| 81 | + | |
| 82 | +# 图片向量(URL 列表) | |
| 83 | +curl -X POST http://localhost:6005/embed/image \ | |
| 84 | + -H "Content-Type: application/json" \ | |
| 85 | + -d '["https://example.com/img.jpg"]' | |
| 86 | +``` | |
| 87 | + | |
| 88 | +### 3.4 翻译服务(translator 6006) | |
| 89 | + | |
| 90 | +```bash | |
| 91 | +./scripts/start_translator.sh | |
| 92 | + | |
| 93 | +curl -X POST http://localhost:6006/translate \ | |
| 94 | + -H "Content-Type: application/json" \ | |
| 95 | + -d '{"text": "商品名称", "target_lang": "en", "source_lang": "zh"}' | |
| 96 | +``` | |
| 97 | + | |
| 98 | +### 3.5 重排服务(reranker 6007) | |
| 99 | + | |
| 100 | +```bash | |
| 101 | +./scripts/start_reranker.sh | |
| 102 | + | |
| 103 | +curl -X POST http://localhost:6007/rerank \ | |
| 104 | + -H "Content-Type: application/json" \ | |
| 105 | + -d '{"query": "wireless mouse", "docs": ["logitech mx master", "usb cable"]}' | |
| 106 | +``` | |
| 107 | + | |
| 108 | +## 4. 配置 | |
| 109 | + | |
| 110 | +- **主配置**:`config/config.yaml`(搜索行为、字段权重、分面等) | |
| 111 | +- **服务 provider**:`config/config.yaml` 的 `services` 块(翻译/向量/重排的 provider 与 URL) | |
| 112 | +- **环境变量**:`.env`(DB、ES、Redis、API Key 等) | |
| 113 | + | |
| 114 | +## 5. 延伸阅读 | |
| 115 | + | |
| 116 | +| 文档 | 用途 | | |
| 117 | +|------|------| | |
| 118 | +| `docs/Usage-Guide.md` | 运维:日志、多环境、故障排查 | | |
| 119 | +| `docs/搜索API速查表.md` | 搜索 API 参数速查 | | |
| 120 | +| `docs/搜索API对接指南.md` | 搜索 API 完整说明 | | |
| 121 | +| `docs/PROVIDER_ARCHITECTURE.md` | 翻译/向量/重排 provider 扩展 | | |
| 122 | +| `indexer/README.md` | 索引模块职责与接口 | | ... | ... |
docs/SERVICE_MATRIX.md deleted
| ... | ... | @@ -1,57 +0,0 @@ |
| 1 | -# 服务矩阵(Service Matrix) | |
| 2 | - | |
| 3 | -本文档定义当前项目的服务分层、默认启动策略与脚本入口。 | |
| 4 | - | |
| 5 | -## 1. 服务分层 | |
| 6 | - | |
| 7 | -| 服务 | 角色 | 默认端口 | 是否默认启动 | 启动脚本 | 停止方式 | | |
| 8 | -|---|---|---:|---|---|---| | |
| 9 | -| backend | 核心搜索 API | 6002 | 是 | `scripts/start_backend.sh` | `scripts/service_ctl.sh stop backend` | | |
| 10 | -| indexer | 核心索引 API | 6004 | 是 | `scripts/start_indexer.sh` | `scripts/service_ctl.sh stop indexer` | | |
| 11 | -| frontend | 调试 UI | 6003 | 是 | `scripts/start_frontend.sh` | `scripts/service_ctl.sh stop frontend` | | |
| 12 | -| embedding | 向量服务(文本/图片) | 6005 | 否(按需) | `scripts/start_embedding_service.sh` | `scripts/service_ctl.sh stop embedding` | | |
| 13 | -| translator | 翻译服务(qwen/deepl) | 6006 | 否(按需) | `scripts/start_translator.sh` | `scripts/service_ctl.sh stop translator` | | |
| 14 | -| reranker | 重排服务(BGE) | 6007 | 否(按需) | `scripts/start_reranker.sh` | `scripts/service_ctl.sh stop reranker` | | |
| 15 | -| clip | CLIP 替代服务(legacy/可选) | 51000 | 否(按需) | `scripts/start_clip_service.sh` | `scripts/service_ctl.sh stop clip` | | |
| 16 | -| cnclip | CN-CLIP gRPC 服务(legacy/可选) | 51000 | 否(按需) | `scripts/start_cnclip_service.sh` | `scripts/service_ctl.sh stop cnclip` | | |
| 17 | - | |
| 18 | -> 说明:`clip` 与 `cnclip` 都是 legacy 服务,脚本内部自带后台化与 PID 管理,`service_ctl.sh` 仅做编排与委托。 | |
| 19 | - | |
| 20 | -## 2. 统一控制入口 | |
| 21 | - | |
| 22 | -- 推荐统一入口:`scripts/service_ctl.sh` | |
| 23 | -- 支持命令:`start` / `stop` / `restart` / `status` | |
| 24 | - | |
| 25 | -示例: | |
| 26 | - | |
| 27 | -```bash | |
| 28 | -# 启动核心服务(backend/indexer/frontend) | |
| 29 | -./scripts/service_ctl.sh start | |
| 30 | - | |
| 31 | -# 启动指定服务 | |
| 32 | -./scripts/service_ctl.sh start backend indexer frontend translator reranker | |
| 33 | - | |
| 34 | -# 查看所有服务状态 | |
| 35 | -./scripts/service_ctl.sh status | |
| 36 | - | |
| 37 | -# 停止全部已知服务 | |
| 38 | -./scripts/service_ctl.sh stop | |
| 39 | -``` | |
| 40 | - | |
| 41 | -## 3. 默认与可选服务策略 | |
| 42 | - | |
| 43 | -- `./run.sh` 默认只启动核心服务:`backend/indexer/frontend` | |
| 44 | -- 如需启动可选能力,使用环境变量: | |
| 45 | - | |
| 46 | -```bash | |
| 47 | -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | |
| 48 | -``` | |
| 49 | - | |
| 50 | -## 4. 兼容入口 | |
| 51 | - | |
| 52 | -以下脚本仍保留,用于兼容旧习惯,但内部已委托到统一控制脚本: | |
| 53 | - | |
| 54 | -- `run.sh` | |
| 55 | -- `restart.sh` | |
| 56 | -- `scripts/start.sh` | |
| 57 | -- `scripts/stop.sh` |
docs/向量化模块和API说明文档.md
| 1 | -# 向量化模块和API说明文档 | |
| 1 | +# 向量化模块 | |
| 2 | 2 | |
| 3 | -本文档详细说明saas-search项目中的向量化模块架构、API接口、配置方法和使用指南。 | |
| 3 | +**快速上手**:见 `docs/QUICKSTART.md` 第 3.3 节。 | |
| 4 | 4 | |
| 5 | -## 目录 | |
| 5 | +## 服务接口 | |
| 6 | 6 | |
| 7 | -1. [概述](#概述) | |
| 8 | - - 1.1 [向量化模块简介](#11-向量化模块简介) | |
| 9 | - - 1.2 [技术选型](#12-技术选型) | |
| 10 | - - 1.3 [应用场景](#13-应用场景) | |
| 7 | +- `POST /embed/text`:文本向量,入参 `["text1", "text2"]`,出参 `[[...], [...]]` | |
| 8 | +- `POST /embed/image`:图片向量,入参 `["url1", "url2"]`,出参 `[[...], [...]]` | |
| 11 | 9 | |
| 12 | -2. [向量化服务架构](#向量化服务架构) | |
| 13 | - - 2.1 [本地向量化服务](#21-本地向量化服务) | |
| 14 | - - 2.2 [云端向量化服务](#22-云端向量化服务) | |
| 15 | - - 2.3 [架构对比](#23-架构对比) | |
| 10 | +## 配置 | |
| 16 | 11 | |
| 17 | -3. [本地向量化服务](#本地向量化服务) | |
| 18 | - - 3.1 [服务启动](#31-服务启动) | |
| 19 | - - 3.2 [服务配置](#32-服务配置) | |
| 20 | - - 3.3 [模型说明](#33-模型说明) | |
| 12 | +- Provider/URL:`config/config.yaml` 的 `services.embedding` | |
| 13 | +- 模型路径:`embeddings/config.py` 或 env `TEXT_MODEL_DIR`、`IMAGE_MODEL_DIR` | |
| 21 | 14 | |
| 22 | -4. [云端向量化服务](#云端向量化服务) | |
| 23 | - - 4.1 [阿里云DashScope](#41-阿里云dashscope) | |
| 24 | - - 4.2 [API Key配置](#42-api-key配置) | |
| 25 | - - 4.3 [使用方式](#43-使用方式) | |
| 26 | - | |
| 27 | -5. [Embedding API详细说明](#embedding-api详细说明) | |
| 28 | - - 5.1 [API概览](#51-api概览) | |
| 29 | - - 5.2 [健康检查接口](#52-健康检查接口) | |
| 30 | - - 5.3 [文本向量化接口](#53-文本向量化接口) | |
| 31 | - - 5.4 [图片向量化接口](#54-图片向量化接口) | |
| 32 | - - 5.5 [错误处理](#55-错误处理) | |
| 33 | - | |
| 34 | -6. [配置说明](#配置说明) | |
| 35 | - - 6.1 [服务配置](#61-服务配置) | |
| 36 | - - 6.2 [模型配置](#62-模型配置) | |
| 37 | - - 6.3 [批处理配置](#63-批处理配置) | |
| 38 | - | |
| 39 | -7. [客户端集成示例](#客户端集成示例) | |
| 40 | - - 7.1 [Python客户端](#71-python客户端) | |
| 41 | - - 7.2 [Java客户端](#72-java客户端) | |
| 42 | - - 7.3 [cURL示例](#73-curl示例) | |
| 43 | - | |
| 44 | -8. [性能对比与优化](#性能对比与优化) | |
| 45 | - - 8.1 [性能对比](#81-性能对比) | |
| 46 | - - 8.2 [成本对比](#82-成本对比) | |
| 47 | - - 8.3 [优化建议](#83-优化建议) | |
| 48 | - | |
| 49 | -9. [故障排查](#故障排查) | |
| 50 | - - 9.1 [常见问题](#91-常见问题) | |
| 51 | - - 9.2 [日志查看](#92-日志查看) | |
| 52 | - - 9.3 [性能调优](#93-性能调优) | |
| 53 | - | |
| 54 | -10. [附录](#附录) | |
| 55 | - - 10.1 [向量维度说明](#101-向量维度说明) | |
| 56 | - - 10.2 [模型版本信息](#102-模型版本信息) | |
| 57 | - - 10.3 [相关文档](#103-相关文档) | |
| 58 | - | |
| 59 | ---- | |
| 60 | - | |
| 61 | -## 概述 | |
| 62 | - | |
| 63 | -### 1.1 向量化模块简介 | |
| 64 | - | |
| 65 | -saas-search项目实现了完整的文本和图片向量化能力,支持两种部署方式: | |
| 66 | - | |
| 67 | -1. **本地向量化服务**:独立部署的微服务,基于本地GPU/CPU运行BGE-M3和CN-CLIP模型 | |
| 68 | -2. **云端向量化服务**:集成阿里云DashScope API,按使用量付费 | |
| 69 | - | |
| 70 | -向量化模块是搜索引擎的核心组件,为语义搜索、图片搜索提供AI驱动的相似度计算能力。 | |
| 71 | - | |
| 72 | -### 1.2 技术选型 | |
| 73 | - | |
| 74 | -| 功能 | 本地服务 | 云端服务 | | |
| 75 | -|------|---------|---------| | |
| 76 | -| **文本模型** | BGE-M3 (Xorbits/bge-m3) | text-embedding-v4 | | |
| 77 | -| **图片模型** | CN-CLIP (ViT-H-14) | - | | |
| 78 | -| **向量维度** | 1024 | 1024 | | |
| 79 | -| **服务框架** | FastAPI | 阿里云API | | |
| 80 | -| **部署方式** | Docker/本地 | 云端API | | |
| 81 | - | |
| 82 | -### 1.3 应用场景 | |
| 83 | - | |
| 84 | -- **语义搜索**:查询文本向量化,与商品向量计算相似度 | |
| 85 | -- **图片搜索**:商品图片向量化,支持以图搜图 | |
| 86 | -- **混合检索**:BM25 + 向量相似度组合排序 | |
| 87 | -- **多语言搜索**:中英文跨语言语义理解 | |
| 88 | - | |
| 89 | ---- | |
| 90 | - | |
| 91 | -## 向量化服务架构 | |
| 92 | - | |
| 93 | -### 2.1 本地向量化服务 | |
| 94 | - | |
| 95 | -``` | |
| 96 | -┌─────────────────────────────────────────┐ | |
| 97 | -│ Embedding Microservice (FastAPI) │ | |
| 98 | -│ Port: 6005, Workers: 1 │ | |
| 99 | -└──────────────┬──────────────────────────┘ | |
| 100 | - │ | |
| 101 | - ┌───────┴───────┐ | |
| 102 | - │ │ | |
| 103 | -┌──────▼──────┐ ┌────▼─────┐ | |
| 104 | -│ BGE-M3 │ │ CN-CLIP │ | |
| 105 | -│ Text Model │ │ Image │ | |
| 106 | -│ (CUDA/CPU) │ │ Model │ | |
| 107 | -└─────────────┘ └──────────┘ | |
| 108 | -``` | |
| 109 | - | |
| 110 | -**核心特性**: | |
| 111 | -- 独立部署,可横向扩展 | |
| 112 | -- GPU加速支持 | |
| 113 | -- 线程安全设计 | |
| 114 | -- 启动时预加载模型 | |
| 115 | - | |
| 116 | -### 2.2 云端向量化服务 | |
| 117 | - | |
| 118 | -``` | |
| 119 | -┌─────────────────────────────────────┐ | |
| 120 | -│ saas-search Main Service │ | |
| 121 | -│ (uses CloudTextEncoder) │ | |
| 122 | -└──────────────┬──────────────────────┘ | |
| 123 | - │ | |
| 124 | - ▼ | |
| 125 | -┌─────────────────────────────────────┐ | |
| 126 | -│ Aliyun DashScope API │ | |
| 127 | -│ text-embedding-v4 │ | |
| 128 | -│ (HTTP/REST) │ | |
| 129 | -└─────────────────────────────────────┘ | |
| 130 | -``` | |
| 131 | - | |
| 132 | -**核心特性**: | |
| 133 | -- 无需GPU资源 | |
| 134 | -- 按使用量计费 | |
| 135 | -- 自动扩展 | |
| 136 | -- 低运维成本 | |
| 137 | - | |
| 138 | -### 2.3 架构对比 | |
| 139 | - | |
| 140 | -| 维度 | 本地服务 | 云端服务 | | |
| 141 | -|------|---------|---------| | |
| 142 | -| **初始成本** | 高(GPU服务器) | 低(按需付费) | | |
| 143 | -| **运行成本** | 固定 | 变动(按调用量) | | |
| 144 | -| **延迟** | <100ms | 300-400ms | | |
| 145 | -| **吞吐量** | 高(~32 qps) | 中(~2-3 qps) | | |
| 146 | -| **离线支持** | ✅ | ❌ | | |
| 147 | -| **维护成本** | 高 | 低 | | |
| 148 | -| **扩展性** | 手动扩展 | 自动扩展 | | |
| 149 | -| **适用场景** | 大规模生产环境 | 初期开发/小规模应用 | | |
| 150 | - | |
| 151 | ---- | |
| 152 | - | |
| 153 | -## 本地向量化服务 | |
| 154 | - | |
| 155 | -### 3.1 服务启动 | |
| 156 | - | |
| 157 | -#### 方式1:使用脚本启动(推荐) | |
| 158 | - | |
| 159 | -```bash | |
| 160 | -# 启动向量化服务 | |
| 161 | -./scripts/start_embedding_service.sh | |
| 162 | -``` | |
| 163 | - | |
| 164 | -脚本特性: | |
| 165 | -- 自动激活conda环境 | |
| 166 | -- 读取配置文件获取端口 | |
| 167 | -- 单worker模式启动服务 | |
| 168 | - | |
| 169 | -#### 方式2:手动启动 | |
| 170 | - | |
| 171 | -```bash | |
| 172 | -# 激活环境(推荐使用项目根目录 activate.sh;新机器按需 export CONDA_ROOT) | |
| 173 | -# 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3 | |
| 174 | -cd /data/saas-search | |
| 175 | -source activate.sh | |
| 176 | - | |
| 177 | -# 启动服务 | |
| 178 | -python -m uvicorn embeddings.server:app \ | |
| 179 | - --host 0.0.0.0 \ | |
| 180 | - --port 6005 \ | |
| 181 | - --workers 1 | |
| 182 | -``` | |
| 183 | - | |
| 184 | -#### 方式3:Docker部署(生产环境) | |
| 185 | - | |
| 186 | -```bash | |
| 187 | -# 构建镜像 | |
| 188 | -docker build -t searchengine-embedding:latest . | |
| 189 | - | |
| 190 | -# 启动容器 | |
| 191 | -docker run -d \ | |
| 192 | - --name embedding-service \ | |
| 193 | - --gpus all \ | |
| 194 | - -p 6005:6005 \ | |
| 195 | - searchengine-embedding:latest | |
| 196 | -``` | |
| 197 | - | |
| 198 | -### 3.2 服务配置 | |
| 199 | - | |
| 200 | -配置文件:`embeddings/config.py` | |
| 201 | - | |
| 202 | -```python | |
| 203 | -class EmbeddingConfig: | |
| 204 | - # 服务配置 | |
| 205 | - HOST = "0.0.0.0" # 监听地址 | |
| 206 | - PORT = 6005 # 监听端口 | |
| 207 | - | |
| 208 | - # 文本模型 (BGE-M3) | |
| 209 | - TEXT_MODEL_DIR = "Xorbits/bge-m3" # 模型路径/HuggingFace ID | |
| 210 | - TEXT_DEVICE = "cuda" # 设备: "cuda" 或 "cpu" | |
| 211 | - TEXT_BATCH_SIZE = 32 # 批处理大小 | |
| 212 | - | |
| 213 | - # 图片模型 (CN-CLIP) | |
| 214 | - IMAGE_MODEL_NAME = "ViT-H-14" # 模型名称 | |
| 215 | - IMAGE_DEVICE = None # None=自动, "cuda", "cpu" | |
| 216 | - IMAGE_BATCH_SIZE = 8 # 批处理大小 | |
| 217 | -``` | |
| 218 | - | |
| 219 | -### 3.3 模型说明 | |
| 220 | - | |
| 221 | -#### BGE-M3 文本模型 | |
| 222 | - | |
| 223 | -- **模型ID**: `Xorbits/bge-m3` | |
| 224 | -- **向量维度**: 1024 | |
| 225 | -- **支持语言**: 中文、英文、多语言(100+) | |
| 226 | -- **特性**: 强大的语义理解能力,支持长文本 | |
| 227 | -- **部署**: 自动从HuggingFace下载 | |
| 228 | - | |
| 229 | -#### CN-CLIP 图片模型 | |
| 230 | - | |
| 231 | -- **模型**: ViT-H-14 (Chinese CLIP) | |
| 232 | -- **向量维度**: 1024 | |
| 233 | -- **输入**: 图片URL或本地路径 | |
| 234 | -- **特性**: 中文图文理解,适合电商场景 | |
| 235 | -- **预处理**: 自动下载、缩放、归一化 | |
| 236 | - | |
| 237 | ---- | |
| 238 | - | |
| 239 | -## 云端向量化服务 | |
| 240 | - | |
| 241 | -### 4.1 阿里云DashScope | |
| 242 | - | |
| 243 | -**服务地址**: | |
| 244 | -- 北京地域:`https://dashscope.aliyuncs.com/compatible-mode/v1` | |
| 245 | -- 新加坡地域:`https://dashscope-intl.aliyuncs.com/compatible-mode/v1` | |
| 246 | - | |
| 247 | -**模型信息**: | |
| 248 | -- **模型名**: `text-embedding-v4` | |
| 249 | -- **向量维度**: 1024 | |
| 250 | -- **输入限制**: 单次最多2048个文本,每个文本最大8192 token | |
| 251 | -- **速率限制**: 根据API套餐不同而不同 | |
| 252 | - | |
| 253 | -### 4.2 API Key配置 | |
| 254 | - | |
| 255 | -#### 方式1:环境变量(推荐) | |
| 256 | - | |
| 257 | -```bash | |
| 258 | -# 临时设置 | |
| 259 | -export DASHSCOPE_API_KEY="sk-your-api-key-here" | |
| 260 | - | |
| 261 | -# 永久设置(添加到 ~/.bashrc 或 ~/.zshrc) | |
| 262 | -echo 'export DASHSCOPE_API_KEY="sk-your-api-key-here"' >> ~/.bashrc | |
| 263 | -source ~/.bashrc | |
| 264 | -``` | |
| 265 | - | |
| 266 | -#### 方式2:.env文件 | |
| 267 | - | |
| 268 | -在项目根目录创建`.env`文件: | |
| 269 | - | |
| 270 | -```bash | |
| 271 | -DASHSCOPE_API_KEY=sk-your-api-key-here | |
| 272 | -``` | |
| 273 | - | |
| 274 | -**获取API Key**:https://help.aliyun.com/zh/model-studio/get-api-key | |
| 275 | - | |
| 276 | -### 4.3 使用方式 | |
| 277 | - | |
| 278 | -```python | |
| 279 | -from embeddings.cloud_text_encoder import CloudTextEncoder | |
| 280 | - | |
| 281 | -# 初始化编码器(自动从环境变量读取API Key) | |
| 282 | -encoder = CloudTextEncoder() | |
| 283 | - | |
| 284 | -# 单个文本向量化 | |
| 285 | -text = "衣服的质量杠杠的" | |
| 286 | -embedding = encoder.encode(text) | |
| 287 | -print(embedding.shape) # (1, 1024) | |
| 288 | - | |
| 289 | -# 批量向量化 | |
| 290 | -texts = ["文本1", "文本2", "文本3"] | |
| 291 | -embeddings = encoder.encode(texts) | |
| 292 | -print(embeddings.shape) # (3, 1024) | |
| 293 | - | |
| 294 | -# 大批量处理(自动分批) | |
| 295 | -large_texts = [f"商品 {i}" for i in range(1000)] | |
| 296 | -embeddings = encoder.encode_batch(large_texts, batch_size=32) | |
| 297 | -``` | |
| 298 | - | |
| 299 | -**自定义配置**: | |
| 300 | - | |
| 301 | -```python | |
| 302 | -# 使用新加坡地域 | |
| 303 | -encoder = CloudTextEncoder( | |
| 304 | - api_key="sk-xxx", | |
| 305 | - base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1" | |
| 306 | -) | |
| 307 | -``` | |
| 308 | - | |
| 309 | ---- | |
| 310 | - | |
| 311 | -## Embedding API详细说明 | |
| 312 | - | |
| 313 | -### 5.1 API概览 | |
| 314 | - | |
| 315 | -本地向量化服务提供RESTful API接口: | |
| 316 | - | |
| 317 | -| 端点 | 方法 | 功能 | | |
| 318 | -|------|------|------| | |
| 319 | -| `/health` | GET | 健康检查 | | |
| 320 | -| `/embed/text` | POST | 文本向量化 | | |
| 321 | -| `/embed/image` | POST | 图片向量化 | | |
| 322 | - | |
| 323 | -**服务地址**: | |
| 324 | -- 默认:`http://localhost:6005` | |
| 325 | -- 生产:`http://<your-server>:6005` | |
| 326 | - | |
| 327 | -### 5.2 健康检查接口 | |
| 328 | - | |
| 329 | -```http | |
| 330 | -GET /health | |
| 331 | -``` | |
| 332 | - | |
| 333 | -**响应示例**: | |
| 334 | -```json | |
| 335 | -{ | |
| 336 | - "status": "ok", | |
| 337 | - "text_model_loaded": true, | |
| 338 | - "image_model_loaded": true | |
| 339 | -} | |
| 340 | -``` | |
| 341 | - | |
| 342 | -**字段说明**: | |
| 343 | -- `status`: 服务状态,"ok"表示正常 | |
| 344 | -- `text_model_loaded`: 文本模型是否加载成功 | |
| 345 | -- `image_model_loaded`: 图片模型是否加载成功 | |
| 346 | - | |
| 347 | -**cURL示例**: | |
| 348 | -```bash | |
| 349 | -curl http://localhost:6005/health | |
| 350 | -``` | |
| 351 | - | |
| 352 | -### 5.3 文本向量化接口 | |
| 353 | - | |
| 354 | -```http | |
| 355 | -POST /embed/text | |
| 356 | -Content-Type: application/json | |
| 357 | -``` | |
| 358 | - | |
| 359 | -#### 请求格式 | |
| 360 | - | |
| 361 | -**请求体**(JSON数组): | |
| 362 | -```json | |
| 363 | -[ | |
| 364 | - "衣服的质量杠杠的", | |
| 365 | - "Bohemian Maxi Dress", | |
| 366 | - "Vintage Denim Jacket" | |
| 367 | -] | |
| 368 | -``` | |
| 369 | - | |
| 370 | -**参数说明**: | |
| 371 | -- 类型:`List[str]` | |
| 372 | -- 长度:建议≤100(避免超时) | |
| 373 | -- 单个文本:建议≤512个字符 | |
| 374 | - | |
| 375 | -#### 响应格式 | |
| 376 | - | |
| 377 | -**成功响应**(200 OK): | |
| 378 | -```json | |
| 379 | -[ | |
| 380 | - [0.1234, -0.5678, 0.9012, ..., 0.3456], // 1024维向量 | |
| 381 | - [0.2345, 0.6789, -0.1234, ..., 0.4567], // 1024维向量 | |
| 382 | - [0.3456, -0.7890, 0.2345, ..., 0.5678] // 1024维向量 | |
| 383 | -] | |
| 384 | -``` | |
| 385 | - | |
| 386 | -**字段说明**: | |
| 387 | -- 类型:`List[List[float]]` | |
| 388 | -- 每个向量:1024个浮点数 | |
| 389 | -- 对齐原则:输出数组与输入数组按索引一一对应 | |
| 390 | -- 失败项:返回`null` | |
| 391 | - | |
| 392 | -**错误示例**: | |
| 393 | -```json | |
| 394 | -[ | |
| 395 | - [0.1234, -0.5678, ...], // 成功 | |
| 396 | - null, // 失败(空文本或其他错误) | |
| 397 | - [0.3456, 0.7890, ...] // 成功 | |
| 398 | -] | |
| 399 | -``` | |
| 400 | - | |
| 401 | -#### cURL示例 | |
| 402 | - | |
| 403 | -```bash | |
| 404 | -# 单个文本 | |
| 405 | -curl -X POST http://localhost:6005/embed/text \ | |
| 406 | - -H "Content-Type: application/json" \ | |
| 407 | - -d '["测试查询文本"]' | |
| 408 | - | |
| 409 | -# 批量文本 | |
| 410 | -curl -X POST http://localhost:6005/embed/text \ | |
| 411 | - -H "Content-Type: application/json" \ | |
| 412 | - -d '["红色连衣裙", "blue jeans", "vintage dress"]' | |
| 413 | -``` | |
| 414 | - | |
| 415 | -#### Python示例 | |
| 416 | - | |
| 417 | -```python | |
| 418 | -import requests | |
| 419 | -import numpy as np | |
| 420 | - | |
| 421 | -def embed_texts(texts): | |
| 422 | - """文本向量化""" | |
| 423 | - response = requests.post( | |
| 424 | - "http://localhost:6005/embed/text", | |
| 425 | - json=texts, | |
| 426 | - timeout=30 | |
| 427 | - ) | |
| 428 | - response.raise_for_status() | |
| 429 | - embeddings = response.json() | |
| 430 | - | |
| 431 | - # 转换为numpy数组 | |
| 432 | - valid_embeddings = [e for e in embeddings if e is not None] | |
| 433 | - return np.array(valid_embeddings) | |
| 434 | - | |
| 435 | -# 使用 | |
| 436 | -texts = ["红色连衣裙", "blue jeans"] | |
| 437 | -embeddings = embed_texts(texts) | |
| 438 | -print(f"Shape: {embeddings.shape}") # (2, 1024) | |
| 439 | - | |
| 440 | -# 计算相似度 | |
| 441 | -similarity = np.dot(embeddings[0], embeddings[1]) | |
| 442 | -print(f"Similarity: {similarity}") | |
| 443 | -``` | |
| 444 | - | |
| 445 | -### 5.4 图片向量化接口 | |
| 446 | - | |
| 447 | -```http | |
| 448 | -POST /embed/image | |
| 449 | -Content-Type: application/json | |
| 450 | -``` | |
| 451 | - | |
| 452 | -#### 请求格式 | |
| 453 | - | |
| 454 | -**请求体**(JSON数组): | |
| 455 | -```json | |
| 456 | -[ | |
| 457 | - "https://example.com/product1.jpg", | |
| 458 | - "https://example.com/product2.png", | |
| 459 | - "/local/path/to/product3.jpg" | |
| 460 | -] | |
| 461 | -``` | |
| 462 | - | |
| 463 | -**参数说明**: | |
| 464 | -- 类型:`List[str]` | |
| 465 | -- 支持:HTTP URL或本地文件路径 | |
| 466 | -- 格式:JPG、PNG等常见图片格式 | |
| 467 | -- 长度:建议≤10(图片处理较慢) | |
| 468 | - | |
| 469 | -#### 响应格式 | |
| 470 | - | |
| 471 | -**成功响应**(200 OK): | |
| 472 | -```json | |
| 473 | -[ | |
| 474 | - [0.1234, 0.5678, 0.9012, ..., 0.3456], // 1024维向量 | |
| 475 | - null, // 失败(图片无效或下载失败) | |
| 476 | - [0.3456, 0.7890, 0.2345, ..., 0.5678] // 1024维向量 | |
| 477 | -] | |
| 478 | -``` | |
| 479 | - | |
| 480 | -**特性**: | |
| 481 | -- 自动下载:HTTP URL自动下载图片 | |
| 482 | -- 逐个处理:串行处理(带锁保证线程安全) | |
| 483 | -- 容错:单个失败不影响其他图片 | |
| 484 | - | |
| 485 | -#### cURL示例 | |
| 486 | - | |
| 487 | -```bash | |
| 488 | -# 单个图片(URL) | |
| 489 | -curl -X POST http://localhost:6005/embed/image \ | |
| 490 | - -H "Content-Type: application/json" \ | |
| 491 | - -d '["https://example.com/product.jpg"]' | |
| 492 | - | |
| 493 | -# 多个图片(混合URL和本地路径) | |
| 494 | -curl -X POST http://localhost:6005/embed/image \ | |
| 495 | - -H "Content-Type: application/json" \ | |
| 496 | - -d '["https://example.com/img1.jpg", "/data/images/img2.png"]' | |
| 497 | -``` | |
| 498 | - | |
| 499 | -#### Python示例 | |
| 500 | - | |
| 501 | -```python | |
| 502 | -import requests | |
| 503 | -import numpy as np | |
| 504 | - | |
| 505 | -def embed_images(image_urls): | |
| 506 | - """图片向量化""" | |
| 507 | - response = requests.post( | |
| 508 | - "http://localhost:6005/embed/image", | |
| 509 | - json=image_urls, | |
| 510 | - timeout=120 # 图片处理较慢,设置更长超时 | |
| 511 | - ) | |
| 512 | - response.raise_for_status() | |
| 513 | - embeddings = response.json() | |
| 514 | - | |
| 515 | - # 过滤成功的向量化结果 | |
| 516 | - valid_embeddings = [(url, emb) for url, emb in zip(image_urls, embeddings) if emb is not None] | |
| 517 | - return valid_embeddings | |
| 518 | - | |
| 519 | -# 使用 | |
| 520 | -image_urls = [ | |
| 521 | - "https://example.com/dress1.jpg", | |
| 522 | - "https://example.com/dress2.jpg" | |
| 523 | -] | |
| 524 | - | |
| 525 | -results = embed_images(image_urls) | |
| 526 | -for url, embedding in results: | |
| 527 | - print(f"{url}: {len(embedding)} dimensions") | |
| 528 | -``` | |
| 529 | - | |
| 530 | -### 5.5 错误处理 | |
| 531 | - | |
| 532 | -#### HTTP状态码 | |
| 533 | - | |
| 534 | -| 状态码 | 含义 | 处理方式 | | |
| 535 | -|--------|------|---------| | |
| 536 | -| 200 | 成功 | 正常处理响应 | | |
| 537 | -| 500 | 服务器错误 | 检查服务日志 | | |
| 538 | -| 503 | 服务不可用 | 模型未加载,检查启动日志 | | |
| 539 | - | |
| 540 | -#### 常见错误场景 | |
| 541 | - | |
| 542 | -1. **模型未加载** | |
| 543 | -```json | |
| 544 | -{ | |
| 545 | - "detail": "Runtime Error: Text model not loaded" | |
| 546 | -} | |
| 547 | -``` | |
| 548 | -**解决**:检查服务启动日志,确认模型加载成功 | |
| 549 | - | |
| 550 | -2. **无效输入** | |
| 551 | -```json | |
| 552 | -[null, null] | |
| 553 | -``` | |
| 554 | -**原因**:输入包含空字符串或None | |
| 555 | - | |
| 556 | -3. **图片下载失败** | |
| 557 | -```json | |
| 558 | -[ | |
| 559 | - [0.123, ...], | |
| 560 | - null // URL无效或网络问题 | |
| 561 | -] | |
| 562 | -``` | |
| 563 | -**解决**:检查URL是否可访问 | |
| 564 | - | |
| 565 | ---- | |
| 566 | - | |
| 567 | -## 配置说明 | |
| 568 | - | |
| 569 | -### 6.1 服务配置 | |
| 570 | - | |
| 571 | -编辑 `embeddings/config.py` 修改服务配置: | |
| 572 | - | |
| 573 | -```python | |
| 574 | -class EmbeddingConfig: | |
| 575 | - # ========== 服务配置 ========== | |
| 576 | - HOST = "0.0.0.0" # 监听所有网卡 | |
| 577 | - PORT = 6005 # 默认端口 | |
| 578 | -``` | |
| 579 | - | |
| 580 | -**生产环境建议**: | |
| 581 | -- 使用反向代理(Nginx)处理SSL | |
| 582 | -- 配置防火墙规则限制访问 | |
| 583 | -- 使用Docker容器隔离 | |
| 584 | - | |
| 585 | -### 6.2 模型配置 | |
| 586 | - | |
| 587 | -#### 文本模型配置 | |
| 588 | - | |
| 589 | -```python | |
| 590 | -# ========== BGE-M3 文本模型 ========== | |
| 591 | -TEXT_MODEL_DIR = "Xorbits/bge-m3" # HuggingFace模型ID | |
| 592 | -TEXT_DEVICE = "cuda" # 设备选择 | |
| 593 | -TEXT_BATCH_SIZE = 32 # 批处理大小 | |
| 594 | -``` | |
| 595 | - | |
| 596 | -**DEVICE选择**: | |
| 597 | -- `"cuda"`: GPU加速(推荐,需要CUDA) | |
| 598 | -- `"cpu"`: CPU模式(较慢,但兼容性好) | |
| 599 | - | |
| 600 | -**批处理大小建议**: | |
| 601 | -- GPU(16GB显存):32-64 | |
| 602 | -- GPU(8GB显存):16-32 | |
| 603 | -- CPU:8-16 | |
| 604 | - | |
| 605 | -#### 图片模型配置 | |
| 606 | - | |
| 607 | -```python | |
| 608 | -# ========== CN-CLIP 图片模型 ========== | |
| 609 | -IMAGE_MODEL_NAME = "ViT-H-14" # 模型名称 | |
| 610 | -IMAGE_DEVICE = None # None=自动检测 | |
| 611 | -IMAGE_BATCH_SIZE = 8 # 批处理大小 | |
| 612 | -``` | |
| 613 | - | |
| 614 | -**IMAGE_DEVICE选择**: | |
| 615 | -- `None`: 自动检测(推荐) | |
| 616 | -- `"cuda"`: 强制使用GPU | |
| 617 | -- `"cpu"`: 强制使用CPU | |
| 618 | - | |
| 619 | -### 6.3 批处理配置 | |
| 620 | - | |
| 621 | -**批处理大小调优**: | |
| 622 | - | |
| 623 | -| 场景 | 文本Batch Size | 图片Batch Size | 说明 | | |
| 624 | -|------|---------------|---------------|------| | |
| 625 | -| 开发测试 | 16 | 1 | 快速响应 | | |
| 626 | -| 生产环境(GPU) | 32-64 | 4-8 | 平衡性能 | | |
| 627 | -| 生产环境(CPU) | 8-16 | 1-2 | 避免内存溢出 | | |
| 628 | -| 离线批处理 | 128+ | 16+ | 最大化吞吐 | | |
| 629 | - | |
| 630 | -**批处理建议**: | |
| 631 | -1. 监控GPU内存使用:`nvidia-smi` | |
| 632 | -2. 逐步增加batch_size直到OOM | |
| 633 | -3. 预留20%内存余量 | |
| 634 | - | |
| 635 | ---- | |
| 636 | - | |
| 637 | -## 客户端集成示例 | |
| 638 | - | |
| 639 | -### 7.1 Python客户端 | |
| 640 | - | |
| 641 | -#### 基础客户端类 | |
| 642 | - | |
| 643 | -```python | |
| 644 | -import requests | |
| 645 | -from typing import List, Optional | |
| 646 | -import numpy as np | |
| 647 | - | |
| 648 | -class EmbeddingServiceClient: | |
| 649 | - """向量化服务客户端""" | |
| 650 | - | |
| 651 | - def __init__(self, base_url: str = "http://localhost:6005"): | |
| 652 | - self.base_url = base_url.rstrip('/') | |
| 653 | - self.timeout = 30 | |
| 654 | - | |
| 655 | - def health_check(self) -> dict: | |
| 656 | - """健康检查""" | |
| 657 | - response = requests.get(f"{self.base_url}/health", timeout=5) | |
| 658 | - response.raise_for_status() | |
| 659 | - return response.json() | |
| 660 | - | |
| 661 | - def embed_text(self, text: str) -> Optional[List[float]]: | |
| 662 | - """单个文本向量化""" | |
| 663 | - result = self.embed_texts([text]) | |
| 664 | - return result[0] if result else None | |
| 665 | - | |
| 666 | - def embed_texts(self, texts: List[str]) -> List[Optional[List[float]]]: | |
| 667 | - """批量文本向量化""" | |
| 668 | - if not texts: | |
| 669 | - return [] | |
| 670 | - | |
| 671 | - response = requests.post( | |
| 672 | - f"{self.base_url}/embed/text", | |
| 673 | - json=texts, | |
| 674 | - timeout=self.timeout | |
| 675 | - ) | |
| 676 | - response.raise_for_status() | |
| 677 | - return response.json() | |
| 678 | - | |
| 679 | - def embed_image(self, image_url: str) -> Optional[List[float]]: | |
| 680 | - """单个图片向量化""" | |
| 681 | - result = self.embed_images([image_url]) | |
| 682 | - return result[0] if result else None | |
| 683 | - | |
| 684 | - def embed_images(self, image_urls: List[str]) -> List[Optional[List[float]]]: | |
| 685 | - """批量图片向量化""" | |
| 686 | - if not image_urls: | |
| 687 | - return [] | |
| 688 | - | |
| 689 | - response = requests.post( | |
| 690 | - f"{self.base_url}/embed/image", | |
| 691 | - json=image_urls, | |
| 692 | - timeout=120 # 图片处理需要更长时间 | |
| 693 | - ) | |
| 694 | - response.raise_for_status() | |
| 695 | - return response.json() | |
| 696 | - | |
| 697 | - def embed_texts_to_numpy(self, texts: List[str]) -> Optional[np.ndarray]: | |
| 698 | - """批量文本向量化,返回numpy数组""" | |
| 699 | - embeddings = self.embed_texts(texts) | |
| 700 | - valid_embeddings = [e for e in embeddings if e is not None] | |
| 701 | - if not valid_embeddings: | |
| 702 | - return None | |
| 703 | - return np.array(valid_embeddings, dtype=np.float32) | |
| 704 | - | |
| 705 | -# 使用示例 | |
| 706 | -if __name__ == "__main__": | |
| 707 | - client = EmbeddingServiceClient() | |
| 708 | - | |
| 709 | - # 健康检查 | |
| 710 | - health = client.health_check() | |
| 711 | - print(f"Service status: {health}") | |
| 712 | - | |
| 713 | - # 文本向量化 | |
| 714 | - texts = ["红色连衣裙", "blue jeans", "vintage dress"] | |
| 715 | - embeddings = client.embed_texts_to_numpy(texts) | |
| 716 | - print(f"Embeddings shape: {embeddings.shape}") | |
| 717 | - | |
| 718 | - # 计算相似度 | |
| 719 | - from sklearn.metrics.pairwise import cosine_similarity | |
| 720 | - similarities = cosine_similarity(embeddings) | |
| 721 | - print(f"Similarity matrix:\n{similarities}") | |
| 722 | -``` | |
| 723 | - | |
| 724 | -#### 高级用法:异步客户端 | |
| 725 | - | |
| 726 | -```python | |
| 727 | -import aiohttp | |
| 728 | -import asyncio | |
| 729 | -from typing import List, Optional | |
| 730 | - | |
| 731 | -class AsyncEmbeddingClient: | |
| 732 | - """异步向量化服务客户端""" | |
| 733 | - | |
| 734 | - def __init__(self, base_url: str = "http://localhost:6005"): | |
| 735 | - self.base_url = base_url.rstrip('/') | |
| 736 | - self.session: Optional[aiohttp.ClientSession] = None | |
| 737 | - | |
| 738 | - async def __aenter__(self): | |
| 739 | - self.session = aiohttp.ClientSession() | |
| 740 | - return self | |
| 741 | - | |
| 742 | - async def __aexit__(self, exc_type, exc_val, exc_tb): | |
| 743 | - if self.session: | |
| 744 | - await self.session.close() | |
| 745 | - | |
| 746 | - async def embed_texts(self, texts: List[str]) -> List[Optional[List[float]]]: | |
| 747 | - """异步批量文本向量化""" | |
| 748 | - if not texts: | |
| 749 | - return [] | |
| 750 | - | |
| 751 | - if not self.session: | |
| 752 | - raise RuntimeError("Client not initialized. Use 'async with'.") | |
| 753 | - | |
| 754 | - async with self.session.post( | |
| 755 | - f"{self.base_url}/embed/text", | |
| 756 | - json=texts, | |
| 757 | - timeout=aiohttp.ClientTimeout(total=30) | |
| 758 | - ) as response: | |
| 759 | - response.raise_for_status() | |
| 760 | - return await response.json() | |
| 761 | - | |
| 762 | -# 使用示例 | |
| 763 | -async def main(): | |
| 764 | - async with AsyncEmbeddingClient() as client: | |
| 765 | - texts = ["text1", "text2", "text3"] | |
| 766 | - embeddings = await client.embed_texts(texts) | |
| 767 | - print(f"Got {len(embeddings)} embeddings") | |
| 768 | - | |
| 769 | -asyncio.run(main()) | |
| 770 | -``` | |
| 771 | - | |
| 772 | -### 7.2 Java客户端 | |
| 773 | - | |
| 774 | -#### 基础客户端类 | |
| 775 | - | |
| 776 | -```java | |
| 777 | -import java.net.URI; | |
| 778 | -import java.net.http.HttpClient; | |
| 779 | -import java.net.http.HttpRequest; | |
| 780 | -import java.net.http.HttpResponse; | |
| 781 | -import java.time.Duration; | |
| 782 | -import java.util.List; | |
| 783 | -import com.fasterxml.jackson.databind.ObjectMapper; | |
| 784 | -import com.fasterxml.jackson.databind.JsonNode; | |
| 785 | -import com.fasterxml.jackson.databind.node.ArrayNode; | |
| 786 | - | |
| 787 | -public class EmbeddingServiceClient { | |
| 788 | - private final HttpClient httpClient; | |
| 789 | - private final ObjectMapper objectMapper; | |
| 790 | - private final String baseUrl; | |
| 791 | - | |
| 792 | - public EmbeddingServiceClient(String baseUrl) { | |
| 793 | - this.baseUrl = baseUrl.replaceAll("/$", ""); | |
| 794 | - this.httpClient = HttpClient.newBuilder() | |
| 795 | - .connectTimeout(Duration.ofSeconds(10)) | |
| 796 | - .build(); | |
| 797 | - this.objectMapper = new ObjectMapper(); | |
| 798 | - } | |
| 799 | - | |
| 800 | - /** | |
| 801 | - * 健康检查 | |
| 802 | - */ | |
| 803 | - public HealthStatus healthCheck() throws Exception { | |
| 804 | - HttpRequest request = HttpRequest.newBuilder() | |
| 805 | - .uri(URI.create(baseUrl + "/health")) | |
| 806 | - .timeout(Duration.ofSeconds(5)) | |
| 807 | - .GET() | |
| 808 | - .build(); | |
| 809 | - | |
| 810 | - HttpResponse<String> response = httpClient.send( | |
| 811 | - request, | |
| 812 | - HttpResponse.BodyHandlers.ofString() | |
| 813 | - ); | |
| 814 | - | |
| 815 | - JsonNode json = objectMapper.readTree(response.body()); | |
| 816 | - return new HealthStatus( | |
| 817 | - json.get("status").asText(), | |
| 818 | - json.get("text_model_loaded").asBoolean(), | |
| 819 | - json.get("image_model_loaded").asBoolean() | |
| 820 | - ); | |
| 821 | - } | |
| 822 | - | |
| 823 | - /** | |
| 824 | - * 批量文本向量化 | |
| 825 | - */ | |
| 826 | - public List<float[]> embedTexts(List<String> texts) throws Exception { | |
| 827 | - // 构建请求体 | |
| 828 | - ArrayNode requestBody = objectMapper.createArrayNode(); | |
| 829 | - for (String text : texts) { | |
| 830 | - requestBody.add(text); | |
| 831 | - } | |
| 832 | - | |
| 833 | - HttpRequest request = HttpRequest.newBuilder() | |
| 834 | - .uri(URI.create(baseUrl + "/embed/text")) | |
| 835 | - .header("Content-Type", "application/json") | |
| 836 | - .timeout(Duration.ofSeconds(30)) | |
| 837 | - .POST(HttpRequest.BodyPublishers.ofString( | |
| 838 | - objectMapper.writeValueAsString(requestBody) | |
| 839 | - )) | |
| 840 | - .build(); | |
| 841 | - | |
| 842 | - HttpResponse<String> response = httpClient.send( | |
| 843 | - request, | |
| 844 | - HttpResponse.BodyHandlers.ofString() | |
| 845 | - ); | |
| 846 | - | |
| 847 | - if (response.statusCode() != 200) { | |
| 848 | - throw new RuntimeException("API error: " + response.body()); | |
| 849 | - } | |
| 850 | - | |
| 851 | - // 解析响应 | |
| 852 | - JsonNode root = objectMapper.readTree(response.body()); | |
| 853 | - List<float[]> embeddings = new java.util.ArrayList<>(); | |
| 854 | - | |
| 855 | - for (JsonNode item : root) { | |
| 856 | - if (item.isNull()) { | |
| 857 | - embeddings.add(null); | |
| 858 | - } else { | |
| 859 | - float[] vector = objectMapper.treeToValue(item, float[].class); | |
| 860 | - embeddings.add(vector); | |
| 861 | - } | |
| 862 | - } | |
| 863 | - | |
| 864 | - return embeddings; | |
| 865 | - } | |
| 866 | - | |
| 867 | - /** | |
| 868 | - * 计算余弦相似度 | |
| 869 | - */ | |
| 870 | - public static float cosineSimilarity(float[] v1, float[] v2) { | |
| 871 | - if (v1.length != v2.length) { | |
| 872 | - throw new IllegalArgumentException("Vectors must be same length"); | |
| 873 | - } | |
| 874 | - | |
| 875 | - float dotProduct = 0.0f; | |
| 876 | - float norm1 = 0.0f; | |
| 877 | - float norm2 = 0.0f; | |
| 878 | - | |
| 879 | - for (int i = 0; i < v1.length; i++) { | |
| 880 | - dotProduct += v1[i] * v2[i]; | |
| 881 | - norm1 += v1[i] * v1[i]; | |
| 882 | - norm2 += v2[i] * v2[i]; | |
| 883 | - } | |
| 884 | - | |
| 885 | - return (float) (dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2))); | |
| 886 | - } | |
| 887 | - | |
| 888 | - // 健康状态数据类 | |
| 889 | - public static class HealthStatus { | |
| 890 | - public final String status; | |
| 891 | - public final boolean textModelLoaded; | |
| 892 | - public final boolean imageModelLoaded; | |
| 893 | - | |
| 894 | - public HealthStatus(String status, boolean textModelLoaded, boolean imageModelLoaded) { | |
| 895 | - this.status = status; | |
| 896 | - this.textModelLoaded = textModelLoaded; | |
| 897 | - this.imageModelLoaded = imageModelLoaded; | |
| 898 | - } | |
| 899 | - | |
| 900 | - @Override | |
| 901 | - public String toString() { | |
| 902 | - return String.format("HealthStatus{status='%s', textModelLoaded=%b, imageModelLoaded=%b}", | |
| 903 | - status, textModelLoaded, imageModelLoaded); | |
| 904 | - } | |
| 905 | - } | |
| 906 | - | |
| 907 | - // 使用示例 | |
| 908 | - public static void main(String[] args) throws Exception { | |
| 909 | - EmbeddingServiceClient client = new EmbeddingServiceClient("http://localhost:6005"); | |
| 910 | - | |
| 911 | - // 健康检查 | |
| 912 | - HealthStatus health = client.healthCheck(); | |
| 913 | - System.out.println("Health: " + health); | |
| 914 | - | |
| 915 | - // 文本向量化 | |
| 916 | - List<String> texts = List.of("红色连衣裙", "blue jeans", "vintage dress"); | |
| 917 | - List<float[]> embeddings = client.embedTexts(texts); | |
| 918 | - | |
| 919 | - System.out.println("Got " + embeddings.size() + " embeddings"); | |
| 920 | - for (int i = 0; i < embeddings.size(); i++) { | |
| 921 | - System.out.println("Embedding " + i + " dimensions: " + | |
| 922 | - (embeddings.get(i) != null ? embeddings.get(i).length : "null")); | |
| 923 | - } | |
| 924 | - | |
| 925 | - // 计算相似度 | |
| 926 | - if (embeddings.get(0) != null && embeddings.get(1) != null) { | |
| 927 | - float similarity = cosineSimilarity(embeddings.get(0), embeddings.get(1)); | |
| 928 | - System.out.println("Similarity between text 0 and 1: " + similarity); | |
| 929 | - } | |
| 930 | - } | |
| 931 | -} | |
| 932 | -``` | |
| 933 | - | |
| 934 | -**Maven依赖**(`pom.xml`): | |
| 935 | - | |
| 936 | -```xml | |
| 937 | -<dependencies> | |
| 938 | - <dependency> | |
| 939 | - <groupId>com.fasterxml.jackson.core</groupId> | |
| 940 | - <artifactId>jackson-databind</artifactId> | |
| 941 | - <version>2.15.2</version> | |
| 942 | - </dependency> | |
| 943 | -</dependencies> | |
| 944 | -``` | |
| 945 | - | |
| 946 | -### 7.3 cURL示例 | |
| 947 | - | |
| 948 | -#### 健康检查 | |
| 949 | - | |
| 950 | -```bash | |
| 951 | -curl http://localhost:6005/health | |
| 952 | -``` | |
| 953 | - | |
| 954 | -#### 文本向量化 | |
| 955 | - | |
| 956 | -```bash | |
| 957 | -# 单个文本 | |
| 958 | -curl -X POST http://localhost:6005/embed/text \ | |
| 959 | - -H "Content-Type: application/json" \ | |
| 960 | - -d '["衣服的质量杠杠的"]' \ | |
| 961 | - | jq '.[0][0:10]' # 打印前10维 | |
| 962 | - | |
| 963 | -# 批量文本 | |
| 964 | -curl -X POST http://localhost:6005/embed/text \ | |
| 965 | - -H "Content-Type: application/json" \ | |
| 966 | - -d '["红色连衣裙", "blue jeans", "vintage dress"]' \ | |
| 967 | - | jq '. | length' # 检查返回数量 | |
| 968 | -``` | |
| 969 | - | |
| 970 | -#### 图片向量化 | |
| 971 | - | |
| 972 | -```bash | |
| 973 | -# URL图片 | |
| 974 | -curl -X POST http://localhost:6005/embed/image \ | |
| 975 | - -H "Content-Type: application/json" \ | |
| 976 | - -d '["https://example.com/product.jpg"]' \ | |
| 977 | - | jq '.[0][0:5]' | |
| 978 | - | |
| 979 | -# 本地图片 | |
| 980 | -curl -X POST http://localhost:6005/embed/image \ | |
| 981 | - -H "Content-Type: application/json" \ | |
| 982 | - -d '["/data/images/product.jpg"]' | |
| 983 | -``` | |
| 984 | - | |
| 985 | -#### 错误处理示例 | |
| 986 | - | |
| 987 | -```bash | |
| 988 | -# 检查服务状态 | |
| 989 | -if ! curl -f http://localhost:6005/health > /dev/null 2>&1; then | |
| 990 | - echo "Embedding service is not healthy!" | |
| 991 | - exit 1 | |
| 992 | -fi | |
| 993 | - | |
| 994 | -# 调用API并检查错误 | |
| 995 | -response=$(curl -s -X POST http://localhost:6005/embed/text \ | |
| 996 | - -H "Content-Type: application/json" \ | |
| 997 | - -d '["test query"]') | |
| 998 | - | |
| 999 | -if echo "$response" | jq -e '.[0] == null' > /dev/null; then | |
| 1000 | - echo "Embedding failed!" | |
| 1001 | - echo "$response" | |
| 1002 | - exit 1 | |
| 1003 | -fi | |
| 1004 | - | |
| 1005 | -echo "Embedding succeeded!" | |
| 1006 | -``` | |
| 1007 | - | |
| 1008 | ---- | |
| 1009 | - | |
| 1010 | -## 性能对比与优化 | |
| 1011 | - | |
| 1012 | -### 8.1 性能对比 | |
| 1013 | - | |
| 1014 | -#### 本地服务性能 | |
| 1015 | - | |
| 1016 | -| 操作 | 硬件配置 | 延迟 | 吞吐量 | | |
| 1017 | -|------|---------|------|--------| | |
| 1018 | -| 文本向量化(单个) | GPU (RTX 3090) | ~80ms | ~12 qps | | |
| 1019 | -| 文本向量化(批量32) | GPU (RTX 3090) | ~2.5s | ~256 qps | | |
| 1020 | -| 文本向量化(单个) | CPU (16核) | ~500ms | ~2 qps | | |
| 1021 | -| 图片向量化(单个) | GPU (RTX 3090) | ~150ms | ~6 qps | | |
| 1022 | -| 图片向量化(批量4) | GPU (RTX 3090) | ~600ms | ~6 qps | | |
| 1023 | - | |
| 1024 | -#### 云端服务性能 | |
| 1025 | - | |
| 1026 | -| 操作 | 指标 | 值 | | |
| 1027 | -|------|------|-----| | |
| 1028 | -| 文本向量化(单个) | 延迟 | 300-400ms | | |
| 1029 | -| 文本向量化(批量) | 吞吐量 | ~2-3 qps | | |
| 1030 | -| API限制 | 速率限制 | 取决于套餐 | | |
| 1031 | -| 可用性 | SLA | 99.9% | | |
| 1032 | - | |
| 1033 | -### 8.2 成本对比 | |
| 1034 | - | |
| 1035 | -#### 本地服务成本 | |
| 1036 | - | |
| 1037 | -| 配置 | 硬件成本(月) | 电费(月) | 总成本(月) | | |
| 1038 | -|------|--------------|-----------|------------| | |
| 1039 | -| GPU服务器 (RTX 3090) | ¥3000 | ¥500 | ¥3500 | | |
| 1040 | -| GPU服务器 (A100) | ¥8000 | ¥800 | ¥8800 | | |
| 1041 | -| CPU服务器(16核) | ¥800 | ¥200 | ¥1000 | | |
| 1042 | - | |
| 1043 | -#### 云端服务成本 | |
| 1044 | - | |
| 1045 | -阿里云DashScope定价(参考): | |
| 1046 | - | |
| 1047 | -| 套餐 | 价格 | 调用量 | 适用场景 | | |
| 1048 | -|------|------|--------|---------| | |
| 1049 | -| 按量付费 | ¥0.0007/1K tokens | 无限制 | 测试/小规模 | | |
| 1050 | -| 基础版 | ¥100/月 | 1M tokens | 小规模应用 | | |
| 1051 | -| 专业版 | ¥500/月 | 10M tokens | 中等规模 | | |
| 1052 | -| 企业版 | 定制 | 无限制 | 大规模 | | |
| 1053 | - | |
| 1054 | -**成本计算示例**: | |
| 1055 | - | |
| 1056 | -假设每天10万次搜索,每次查询平均10个token: | |
| 1057 | -- 日调用量:1M tokens | |
| 1058 | -- 月调用量:30M tokens | |
| 1059 | -- 月成本:30 × 0.7 = ¥21(按量付费) | |
| 1060 | - | |
| 1061 | -### 8.3 优化建议 | |
| 1062 | - | |
| 1063 | -#### 本地服务优化 | |
| 1064 | - | |
| 1065 | -1. **GPU利用率优化** | |
| 1066 | -```python | |
| 1067 | -# 增加批处理大小 | |
| 1068 | -TEXT_BATCH_SIZE = 64 # 从32增加到64 | |
| 1069 | -``` | |
| 1070 | - | |
| 1071 | -2. **模型量化** | |
| 1072 | -```python | |
| 1073 | -# 使用半精度浮点数(节省显存) | |
| 1074 | -import torch | |
| 1075 | -model = model.half() # FP16 | |
| 1076 | -``` | |
| 1077 | - | |
| 1078 | -3. **预热模型** | |
| 1079 | -```python | |
| 1080 | -# 服务启动后预热 | |
| 1081 | -@app.on_event("startup") | |
| 1082 | -async def warmup(): | |
| 1083 | - _text_model.encode(["warmup"], device="cuda") | |
| 1084 | -``` | |
| 1085 | - | |
| 1086 | -4. **连接池优化** | |
| 1087 | -```python | |
| 1088 | -# uvicorn配置 | |
| 1089 | ---workers 1 \ # 单worker(GPU模型限制) | |
| 1090 | ---backlog 2048 \ # 增加连接队列 | |
| 1091 | ---limit-concurrency 32 # 限制并发数 | |
| 1092 | -``` | |
| 1093 | - | |
| 1094 | -#### 云端服务优化 | |
| 1095 | - | |
| 1096 | -1. **批量合并** | |
| 1097 | -```python | |
| 1098 | -# 累积多个请求后批量调用 | |
| 1099 | -class BatchEncoder: | |
| 1100 | - def __init__(self, batch_size=32, timeout=0.1): | |
| 1101 | - self.batch_size = batch_size | |
| 1102 | - self.timeout = timeout | |
| 1103 | - self.queue = [] | |
| 1104 | - | |
| 1105 | - async def encode(self, text: str): | |
| 1106 | - # 等待批量积累 | |
| 1107 | - future = asyncio.Future() | |
| 1108 | - self.queue.append((text, future)) | |
| 1109 | - | |
| 1110 | - if len(self.queue) >= self.batch_size: | |
| 1111 | - self._flush() | |
| 1112 | - | |
| 1113 | - return await future | |
| 1114 | -``` | |
| 1115 | - | |
| 1116 | -2. **本地缓存** | |
| 1117 | -```python | |
| 1118 | -import hashlib | |
| 1119 | -import pickle | |
| 1120 | - | |
| 1121 | -class CachedEncoder: | |
| 1122 | - def __init__(self, cache_file="embedding_cache.pkl"): | |
| 1123 | - self.cache = self._load_cache(cache_file) | |
| 1124 | - | |
| 1125 | - def encode(self, text: str): | |
| 1126 | - key = hashlib.md5(text.encode()).hexdigest() | |
| 1127 | - if key in self.cache: | |
| 1128 | - return self.cache[key] | |
| 1129 | - | |
| 1130 | - embedding = self._call_api(text) | |
| 1131 | - self.cache[key] = embedding | |
| 1132 | - return embedding | |
| 1133 | -``` | |
| 1134 | - | |
| 1135 | -3. **降级策略** | |
| 1136 | -```python | |
| 1137 | -class HybridEncoder: | |
| 1138 | - def __init__(self): | |
| 1139 | - self.cloud_encoder = CloudTextEncoder() | |
| 1140 | - self.local_encoder = None # 按需加载 | |
| 1141 | - | |
| 1142 | - def encode(self, text: str): | |
| 1143 | - try: | |
| 1144 | - return self.cloud_encoder.encode(text) | |
| 1145 | - except Exception as e: | |
| 1146 | - logger.warning(f"Cloud API failed: {e}, falling back to local") | |
| 1147 | - if not self.local_encoder: | |
| 1148 | - self.local_encoder = BgeEncoder() | |
| 1149 | - return self.local_encoder.encode(text) | |
| 1150 | -``` | |
| 1151 | - | |
| 1152 | ---- | |
| 1153 | - | |
| 1154 | -## 故障排查 | |
| 1155 | - | |
| 1156 | -### 9.1 常见问题 | |
| 1157 | - | |
| 1158 | -#### 问题1:服务无法启动 | |
| 1159 | - | |
| 1160 | -**症状**: | |
| 1161 | -```bash | |
| 1162 | -$ ./scripts/start_embedding_service.sh | |
| 1163 | -Error: Port 6005 already in use | |
| 1164 | -``` | |
| 1165 | - | |
| 1166 | -**解决**: | |
| 1167 | -```bash | |
| 1168 | -# 检查端口占用 | |
| 1169 | -lsof -i :6005 | |
| 1170 | - | |
| 1171 | -# 杀死占用进程 | |
| 1172 | -kill -9 <PID> | |
| 1173 | - | |
| 1174 | -# 或者修改配置文件中的端口 | |
| 1175 | -# embeddings/config.py: PORT = 6006 | |
| 1176 | -``` | |
| 1177 | - | |
| 1178 | -#### 问题2:CUDA Out of Memory | |
| 1179 | - | |
| 1180 | -**症状**: | |
| 1181 | -``` | |
| 1182 | -RuntimeError: CUDA out of memory. Tried to allocate 2.00 GiB | |
| 1183 | -``` | |
| 1184 | - | |
| 1185 | -**解决**: | |
| 1186 | -```python | |
| 1187 | -# 减小批处理大小 | |
| 1188 | -TEXT_BATCH_SIZE = 16 # 从32减少到16 | |
| 1189 | - | |
| 1190 | -# 或者使用CPU模式 | |
| 1191 | -TEXT_DEVICE = "cpu" | |
| 1192 | -``` | |
| 1193 | - | |
| 1194 | -#### 问题3:模型下载失败 | |
| 1195 | - | |
| 1196 | -**症状**: | |
| 1197 | -``` | |
| 1198 | -OSError: Can't load tokenizer for 'Xorbits/bge-m3' | |
| 1199 | -``` | |
| 1200 | - | |
| 1201 | -**解决**: | |
| 1202 | -```bash | |
| 1203 | -# 手动下载模型 | |
| 1204 | -huggingface-cli download Xorbits/bge-m3 | |
| 1205 | - | |
| 1206 | -# 或使用镜像 | |
| 1207 | -export HF_ENDPOINT=https://hf-mirror.com | |
| 1208 | -``` | |
| 1209 | - | |
| 1210 | -#### 问题4:云端API Key无效 | |
| 1211 | - | |
| 1212 | -**症状**: | |
| 1213 | -``` | |
| 1214 | -ERROR: DASHSCOPE_API_KEY environment variable is not set! | |
| 1215 | -``` | |
| 1216 | - | |
| 1217 | -**解决**: | |
| 1218 | -```bash | |
| 1219 | -# 设置环境变量 | |
| 1220 | -export DASHSCOPE_API_KEY="sk-your-key" | |
| 1221 | - | |
| 1222 | -# 验证 | |
| 1223 | -echo $DASHSCOPE_API_KEY | |
| 1224 | -``` | |
| 1225 | - | |
| 1226 | -#### 问题5:API速率限制 | |
| 1227 | - | |
| 1228 | -**症状**: | |
| 1229 | -``` | |
| 1230 | -Rate limit exceeded. Please try again later. | |
| 1231 | -``` | |
| 1232 | - | |
| 1233 | -**解决**: | |
| 1234 | -```python | |
| 1235 | -# 添加延迟 | |
| 1236 | -import time | |
| 1237 | -for batch in batches: | |
| 1238 | - embeddings = encoder.encode_batch(batch) | |
| 1239 | - time.sleep(0.1) # 每批之间延迟100ms | |
| 1240 | -``` | |
| 1241 | - | |
| 1242 | -### 9.2 日志查看 | |
| 1243 | - | |
| 1244 | -#### 服务日志 | |
| 1245 | - | |
| 1246 | -```bash | |
| 1247 | -# 查看实时日志 | |
| 1248 | -./scripts/start_embedding_service.sh 2>&1 | tee embedding.log | |
| 1249 | - | |
| 1250 | -# 或使用systemd(如果配置了服务) | |
| 1251 | -journalctl -u embedding-service -f | |
| 1252 | -``` | |
| 1253 | - | |
| 1254 | -#### Python应用日志 | |
| 1255 | - | |
| 1256 | -```python | |
| 1257 | -import logging | |
| 1258 | - | |
| 1259 | -# 配置日志 | |
| 1260 | -logging.basicConfig( | |
| 1261 | - level=logging.INFO, | |
| 1262 | - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| 1263 | -) | |
| 1264 | - | |
| 1265 | -logger = logging.getLogger(__name__) | |
| 1266 | - | |
| 1267 | -# 使用 | |
| 1268 | -logger.info("Encoding texts...") | |
| 1269 | -logger.error("Encoding failed: %s", str(e)) | |
| 1270 | -``` | |
| 1271 | - | |
| 1272 | -#### GPU监控 | |
| 1273 | - | |
| 1274 | -```bash | |
| 1275 | -# 实时监控GPU使用 | |
| 1276 | -watch -n 1 nvidia-smi | |
| 1277 | - | |
| 1278 | -# 查看详细信息 | |
| 1279 | -nvidia-smi --query-gpu=timestamp,name,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.used,memory.free --format=csv | |
| 1280 | -``` | |
| 1281 | - | |
| 1282 | -### 9.3 性能调优 | |
| 1283 | - | |
| 1284 | -#### 性能分析 | |
| 1285 | - | |
| 1286 | -```python | |
| 1287 | -import time | |
| 1288 | -import numpy as np | |
| 1289 | - | |
| 1290 | -def benchmark_encoder(encoder, texts, iterations=100): | |
| 1291 | - """性能基准测试""" | |
| 1292 | - times = [] | |
| 1293 | - | |
| 1294 | - for i in range(iterations): | |
| 1295 | - start = time.time() | |
| 1296 | - embeddings = encoder.encode(texts) | |
| 1297 | - end = time.time() | |
| 1298 | - times.append(end - start) | |
| 1299 | - | |
| 1300 | - times = np.array(times) | |
| 1301 | - print(f"Mean: {times.mean():.3f}s") | |
| 1302 | - print(f"Std: {times.std():.3f}s") | |
| 1303 | - print(f"Min: {times.min():.3f}s") | |
| 1304 | - print(f"Max: {times.max():.3f}s") | |
| 1305 | - print(f"QPS: {len(texts) / times.mean():.2f}") | |
| 1306 | - | |
| 1307 | -# 使用 | |
| 1308 | -benchmark_encoder(encoder, texts=["test"] * 32, iterations=100) | |
| 1309 | -``` | |
| 1310 | - | |
| 1311 | -#### 内存分析 | |
| 1312 | - | |
| 1313 | -```bash | |
| 1314 | -# Python内存分析 | |
| 1315 | -pip install memory_profiler | |
| 1316 | - | |
| 1317 | -# 在代码中添加 | |
| 1318 | -from memory_profiler import profile | |
| 1319 | - | |
| 1320 | -@profile | |
| 1321 | -def encode_batch(texts): | |
| 1322 | - return encoder.encode(texts) | |
| 1323 | - | |
| 1324 | -# 运行 | |
| 1325 | -python -m memory_profiler script.py | |
| 1326 | -``` | |
| 1327 | - | |
| 1328 | ---- | |
| 1329 | - | |
| 1330 | -## 附录 | |
| 1331 | - | |
| 1332 | -### 10.1 向量维度说明 | |
| 1333 | - | |
| 1334 | -#### 为什么是1024维? | |
| 1335 | - | |
| 1336 | -1. **表达能力**:1024维可以捕捉丰富的语义信息 | |
| 1337 | -2. **计算效率**:维度适中,计算速度快 | |
| 1338 | -3. **存储平衡**:向量大小合理(每个向量约4KB) | |
| 1339 | -4. **模型选择**:BGE-M3和text-embedding-v4都使用1024维 | |
| 1340 | - | |
| 1341 | -#### 向量存储计算 | |
| 1342 | - | |
| 1343 | -``` | |
| 1344 | -单个向量大小 = 1024 × 4字节(FP32) = 4KB | |
| 1345 | -100万向量大小 = 4KB × 1,000,000 = 4GB | |
| 1346 | -1000万向量大小 = 4KB × 10,000,000 = 40GB | |
| 1347 | -``` | |
| 1348 | - | |
| 1349 | -### 10.2 模型版本信息 | |
| 1350 | - | |
| 1351 | -#### BGE-M3 | |
| 1352 | - | |
| 1353 | -- **HuggingFace ID**: `Xorbits/bge-m3` | |
| 1354 | -- **论文**: [BGE-M3: Multi-Functionality, Multi-Linguality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation](https://arxiv.org/abs/2402.03616) | |
| 1355 | -- **GitHub**: https://github.com/FlagOpen/FlagEmbedding | |
| 1356 | -- **特性**: | |
| 1357 | - - 支持100+种语言 | |
| 1358 | - - 最大支持8192 token长度 | |
| 1359 | - - 丰富的语义表达能力 | |
| 1360 | - | |
| 1361 | -#### CN-CLIP | |
| 1362 | - | |
| 1363 | -- **模型**: ViT-H-14 | |
| 1364 | -- **论文**: [Chinese CLIP: Contrastive Language-Image Pretraining in Chinese](https://arxiv.org/abs/2211.01935) | |
| 1365 | -- **GitHub**: https://github.com/OFA-Sys/Chinese-CLIP | |
| 1366 | -- **特性**: | |
| 1367 | - - 中文图文理解 | |
| 1368 | - - 支持图片检索和文本检索 | |
| 1369 | - - 适合电商场景 | |
| 1370 | - | |
| 1371 | -#### Aliyun text-embedding-v4 | |
| 1372 | - | |
| 1373 | -- **提供商**: 阿里云DashScope | |
| 1374 | -- **文档**: https://help.aliyun.com/zh/model-studio/getting-started/models | |
| 1375 | -- **特性**: | |
| 1376 | - - 云端API,无需部署 | |
| 1377 | - - 高可用性(99.9% SLA) | |
| 1378 | - - 自动扩展 | |
| 1379 | - | |
| 1380 | -### 10.3 相关文档 | |
| 1381 | - | |
| 1382 | -#### 项目文档 | |
| 1383 | - | |
| 1384 | -- **搜索API对接指南**: `docs/搜索API对接指南.md` | |
| 1385 | -- **索引字段说明**: `docs/索引字段说明v2.md` | |
| 1386 | -- **系统设计文档**: `docs/系统设计文档.md` | |
| 1387 | -- **CLAUDE项目指南**: `CLAUDE.md` | |
| 1388 | - | |
| 1389 | -#### 外部参考 | |
| 1390 | - | |
| 1391 | -- **BGE-M3官方文档**: https://github.com/FlagOpen/FlagEmbedding/tree/master/BGE_M3 | |
| 1392 | -- **阿里云DashScope**: https://help.aliyun.com/zh/model-studio/ | |
| 1393 | -- **Elasticsearch向量搜索**: https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html | |
| 1394 | -- **FastAPI文档**: https://fastapi.tiangolo.com/ | |
| 1395 | - | |
| 1396 | -#### 测试脚本 | |
| 1397 | - | |
| 1398 | -```bash | |
| 1399 | -# 本地向量化服务测试 | |
| 1400 | -./scripts/test_embedding_service.sh | |
| 1401 | - | |
| 1402 | -# 云端向量化服务测试 | |
| 1403 | -python scripts/test_cloud_embedding.py | |
| 1404 | - | |
| 1405 | -# 性能基准测试 | |
| 1406 | -python scripts/benchmark_embeddings.py | |
| 1407 | -``` | |
| 1408 | - | |
| 1409 | ---- | |
| 1410 | - | |
| 1411 | -## 版本历史 | |
| 1412 | - | |
| 1413 | -| 版本 | 日期 | 变更说明 | | |
| 1414 | -|------|------|---------| | |
| 1415 | -| v1.0 | 2025-12-23 | 初始版本,完整的向量化模块文档 | | |
| 1416 | - | |
| 1417 | ---- | |
| 1418 | - | |
| 1419 | -## 联系方式 | |
| 1420 | - | |
| 1421 | -如有问题或建议,请联系项目维护者。 | |
| 1422 | - | |
| 1423 | -**项目仓库**: `/data/saas-search` | |
| 1424 | - | |
| 1425 | -**相关文档目录**: `docs/` | |
| 15 | +详见 `embeddings/README.md`。 | ... | ... |
docs/翻译模块说明.md
| 1 | -# 翻译模块说明(Qwen / DeepL) | |
| 1 | +# 翻译模块 | |
| 2 | 2 | |
| 3 | -本文档汇总翻译模块的**接口使用说明**与**Python 模块用法**,对应代码: | |
| 3 | +**快速上手**:见 `docs/QUICKSTART.md` 第 3.4 节。 | |
| 4 | 4 | |
| 5 | -- HTTP 服务:`api/translator_app.py` | |
| 6 | -- Python 模块:`query/translator.py` | |
| 5 | +## 环境变量 | |
| 7 | 6 | |
| 8 | ---- | |
| 9 | - | |
| 10 | -## 1. 功能概述 | |
| 11 | - | |
| 12 | -当前翻译模块支持两种后端: | |
| 13 | - | |
| 14 | -- **Qwen(默认)**:通过阿里云百炼 DashScope 的 OpenAI 兼容接口调用 `qwen-mt-flash` | |
| 15 | -- **DeepL**:通过 DeepL API 调用翻译(保留原有能力) | |
| 16 | - | |
| 17 | -两种方式均支持: | |
| 18 | - | |
| 19 | -- **Redis 缓存**(如启用):同文案同目标语言命中缓存直接返回 | |
| 20 | -- **`source_lang` 自动检测**:当 `source_lang` 为空或 `"auto"` 时启用自动检测(Qwen 使用 `"auto"`) | |
| 21 | - | |
| 22 | ---- | |
| 23 | - | |
| 24 | -## 2. 环境变量与配置 | |
| 25 | - | |
| 26 | -项目会在 `config/env_config.py` 中加载项目根目录的 `.env`,常用变量如下: | |
| 27 | - | |
| 28 | -```env | |
| 29 | -# Qwen / DashScope | |
| 7 | +```bash | |
| 8 | +# Qwen(默认) | |
| 30 | 9 | DASHSCOPE_API_KEY=sk-xxx |
| 31 | 10 | |
| 32 | 11 | # DeepL |
| 33 | 12 | DEEPL_AUTH_KEY=xxx |
| 34 | 13 | |
| 35 | -# 可选:翻译服务默认模型(HTTP 服务启动后若请求不传 model,则使用此默认值) | |
| 14 | +# 可选 | |
| 36 | 15 | TRANSLATION_MODEL=qwen # 或 deepl |
| 37 | 16 | ``` |
| 38 | 17 | |
| 39 | -说明: | |
| 40 | - | |
| 41 | -- **Qwen** 使用 `DASHSCOPE_API_KEY` | |
| 42 | -- **DeepL** 使用 `DEEPL_AUTH_KEY` | |
| 43 | -- `.env` 中的 `OPENAI_API_KEY` 不是本翻译模块必须项(当前实现用的是 `DASHSCOPE_API_KEY`) | |
| 44 | - | |
| 45 | ---- | |
| 46 | - | |
| 47 | -## 3. HTTP 翻译服务(`api/translator_app.py`) | |
| 48 | - | |
| 49 | -### 3.1 启动命令 | |
| 50 | - | |
| 51 | -推荐(热更新): | |
| 52 | - | |
| 53 | -```bash | |
| 54 | -cd /data/saas-search | |
| 55 | -uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | |
| 56 | -``` | |
| 57 | - | |
| 58 | -指定默认模型(不传请求 `model` 时生效): | |
| 59 | - | |
| 60 | -```bash | |
| 61 | -cd /data/saas-search | |
| 62 | -export TRANSLATION_MODEL=qwen # 或 deepl | |
| 63 | -uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload | |
| 64 | -``` | |
| 65 | - | |
| 66 | -### 3.2 接口列表 | |
| 67 | - | |
| 68 | -- **GET** `/health`:健康检查(返回默认模型、已初始化模型列表等) | |
| 69 | -- **POST** `/translate`:翻译文本 | |
| 70 | -- **GET** `/docs`:Swagger UI | |
| 71 | - | |
| 72 | -### 3.3 `/translate` 请求参数 | |
| 73 | - | |
| 74 | -请求体(JSON): | |
| 75 | - | |
| 76 | -```json | |
| 77 | -{ | |
| 78 | - "text": "要翻译的文本", | |
| 79 | - "target_lang": "en", | |
| 80 | - "source_lang": "auto", | |
| 81 | - "model": "qwen" | |
| 82 | -} | |
| 83 | -``` | |
| 84 | - | |
| 85 | -- **text**:必填,待翻译文本 | |
| 86 | -- **target_lang**:必填,目标语言代码(见“语言支持”) | |
| 87 | -- **source_lang**:可选,源语言代码;不传或传 `"auto"` 时自动检测 | |
| 88 | -- **model**:可选,`"qwen"` 或 `"deepl"`;默认 `"qwen"` | |
| 89 | - | |
| 90 | -### 3.4 `/translate` 返回参数 | |
| 91 | - | |
| 92 | -响应体(JSON,成功时): | |
| 93 | - | |
| 94 | -```json | |
| 95 | -{ | |
| 96 | - "text": "商品名称", | |
| 97 | - "target_lang": "en", | |
| 98 | - "source_lang": "zh", | |
| 99 | - "translated_text": "Product name", | |
| 100 | - "status": "success", | |
| 101 | - "model": "qwen" | |
| 102 | -} | |
| 103 | -``` | |
| 104 | - | |
| 105 | -### 3.5 请求示例(curl) | |
| 106 | - | |
| 107 | -健康检查: | |
| 108 | - | |
| 109 | -```bash | |
| 110 | -curl http://localhost:6006/health | |
| 111 | -``` | |
| 112 | - | |
| 113 | -默认(qwen)中文 → 英文: | |
| 114 | - | |
| 115 | -```bash | |
| 116 | -curl -X POST http://localhost:6006/translate \ | |
| 117 | - -H "Content-Type: application/json" \ | |
| 118 | - -d '{"text":"我看到这个视频后没有笑","target_lang":"en","source_lang":"auto"}' | |
| 119 | -``` | |
| 120 | - | |
| 121 | -显式指定 qwen,英文 → 简体中文: | |
| 122 | - | |
| 123 | -```bash | |
| 124 | -curl -X POST http://localhost:6006/translate \ | |
| 125 | - -H "Content-Type: application/json" \ | |
| 126 | - -d '{"text":"Product name","target_lang":"zh","source_lang":"en","model":"qwen"}' | |
| 127 | -``` | |
| 128 | - | |
| 129 | -繁体中文(`zh_tw`)测试: | |
| 130 | - | |
| 131 | -```bash | |
| 132 | -curl -X POST http://localhost:6006/translate \ | |
| 133 | - -H "Content-Type: application/json" \ | |
| 134 | - -d '{"text":"商品名稱","target_lang":"zh_tw","source_lang":"auto","model":"qwen"}' | |
| 135 | -``` | |
| 136 | - | |
| 137 | -切换 DeepL: | |
| 138 | - | |
| 139 | -```bash | |
| 140 | -curl -X POST http://localhost:6006/translate \ | |
| 141 | - -H "Content-Type: application/json" \ | |
| 142 | - -d '{"text":"商品名称","target_lang":"en","source_lang":"zh","model":"deepl"}' | |
| 143 | -``` | |
| 144 | - | |
| 145 | -### 3.6 关于提示词(Prompt) | |
| 146 | - | |
| 147 | -HTTP 服务内部使用了固定提示词 `TRANSLATION_PROMPT`(适用于“商品 SKU 英文名”场景),并通过 `prompt` 参数传入 `Translator.translate()`。 | |
| 148 | - | |
| 149 | -- **DeepL**:`prompt` 会作为 DeepL 的 `context` 使用(影响翻译但不被翻译) | |
| 150 | -- **Qwen**:当前实现未将 `prompt/context` 传给 Qwen 的 `translation_options`(即对 Qwen 不生效) | |
| 151 | - | |
| 152 | ---- | |
| 153 | - | |
| 154 | -## 4. Python 翻译模块(`query/translator.py`) | |
| 155 | - | |
| 156 | -### 4.1 基本用法 | |
| 157 | - | |
| 158 | -```python | |
| 159 | -from query.translator import Translator | |
| 160 | - | |
| 161 | -# 默认使用 qwen | |
| 162 | -translator = Translator() | |
| 163 | - | |
| 164 | -result = translator.translate( | |
| 165 | - text="我看到这个视频后没有笑", | |
| 166 | - target_lang="en", | |
| 167 | - source_lang="auto", | |
| 168 | -) | |
| 169 | -print(result) | |
| 170 | -``` | |
| 171 | - | |
| 172 | -显式选择模型: | |
| 173 | - | |
| 174 | -```python | |
| 175 | -translator_qwen = Translator(model="qwen") | |
| 176 | -translator_deepl = Translator(model="deepl") | |
| 177 | -``` | |
| 178 | - | |
| 179 | -### 4.2 关键参数 | |
| 180 | - | |
| 181 | -- `Translator(model="qwen" | "deepl")`:选择翻译模型,默认 `"qwen"` | |
| 182 | -- `translate(text, target_lang, source_lang=None, context=None, prompt=None)`: | |
| 183 | - - `target_lang` / `source_lang`:语言代码(见“语言支持”) | |
| 184 | - - `source_lang` 为空或 `"auto"`:自动检测 | |
| 185 | - - `prompt`: | |
| 186 | - - DeepL:作为 `context` 使用 | |
| 187 | - - Qwen:当前未使用 | |
| 188 | - | |
| 189 | -### 4.3 缓存(Redis) | |
| 190 | - | |
| 191 | -`Translator(use_cache=True)` 时会连接 Redis 并缓存翻译结果。 | |
| 192 | - | |
| 193 | -- Redis 连接配置来自 `config/env_config.py` 的 `REDIS_CONFIG` | |
| 194 | -- 缓存 key 前缀默认 `trans`(可用 `REDIS_TRANSLATION_CACHE_PREFIX` 覆盖) | |
| 195 | - | |
| 196 | ---- | |
| 197 | - | |
| 198 | -## 5. Qwen 语言支持(按 qwen-mt-plus/flash/turbo 标准) | |
| 199 | - | |
| 200 | -> 以下为 Qwen 翻译模型支持的语言(**代码 → 英文名**),并已用于 `query/translator.py` 的映射。 | |
| 201 | - | |
| 202 | -| 代码 | 英文名 | | |
| 203 | -|------|--------| | |
| 204 | -| en | English | | |
| 205 | -| zh | Chinese | | |
| 206 | -| zh_tw | Traditional Chinese | | |
| 207 | -| ru | Russian | | |
| 208 | -| ja | Japanese | | |
| 209 | -| ko | Korean | | |
| 210 | -| es | Spanish | | |
| 211 | -| fr | French | | |
| 212 | -| pt | Portuguese | | |
| 213 | -| de | German | | |
| 214 | -| it | Italian | | |
| 215 | -| th | Thai | | |
| 216 | -| vi | Vietnamese | | |
| 217 | -| id | Indonesian | | |
| 218 | -| ms | Malay | | |
| 219 | -| ar | Arabic | | |
| 220 | -| hi | Hindi | | |
| 221 | -| he | Hebrew | | |
| 222 | -| my | Burmese | | |
| 223 | -| ta | Tamil | | |
| 224 | -| ur | Urdu | | |
| 225 | -| bn | Bengali | | |
| 226 | -| pl | Polish | | |
| 227 | -| nl | Dutch | | |
| 228 | -| ro | Romanian | | |
| 229 | -| tr | Turkish | | |
| 230 | -| km | Khmer | | |
| 231 | -| lo | Lao | | |
| 232 | -| yue | Cantonese | | |
| 233 | -| cs | Czech | | |
| 234 | -| el | Greek | | |
| 235 | -| sv | Swedish | | |
| 236 | -| hu | Hungarian | | |
| 237 | -| da | Danish | | |
| 238 | -| fi | Finnish | | |
| 239 | -| uk | Ukrainian | | |
| 240 | -| bg | Bulgarian | | |
| 241 | - | |
| 242 | ---- | |
| 243 | - | |
| 244 | -## 6. 常见问题(FAQ) | |
| 245 | - | |
| 246 | -### 6.1 Qwen 调用报错 / 无法初始化 | |
| 247 | - | |
| 248 | -- 确认 `.env` 中已配置 `DASHSCOPE_API_KEY` | |
| 249 | -- 确认安装依赖:`openai`(Python 包) | |
| 250 | -- 如在海外地域使用模型,将 `base_url` 切换为 `https://dashscope-intl.aliyuncs.com/compatible-mode/v1` | |
| 251 | - | |
| 252 | -### 6.2 DeepL 返回 403 / 翻译失败 | |
| 253 | - | |
| 254 | -- 确认 `.env` 中已配置 `DEEPL_AUTH_KEY` | |
| 255 | -- 若使用的是 Pro key,请使用 `https://api.deepl.com/v2/translate`(当前代码即为该地址) | |
| 18 | +## Provider 配置 | |
| 256 | 19 | |
| 20 | +Provider 与 URL 在 `config/config.yaml` 的 `services.translation`。详见 `docs/PROVIDER_ARCHITECTURE.md`。 | ... | ... |
embeddings/README.md
embeddings/image_encoder.py
| ... | ... | @@ -15,6 +15,8 @@ from typing import List, Optional, Union, Dict, Any |
| 15 | 15 | |
| 16 | 16 | logger = logging.getLogger(__name__) |
| 17 | 17 | |
| 18 | +from config.services_config import get_embedding_base_url | |
| 19 | + | |
| 18 | 20 | |
| 19 | 21 | class CLIPImageEncoder: |
| 20 | 22 | """ |
| ... | ... | @@ -30,7 +32,7 @@ class CLIPImageEncoder: |
| 30 | 32 | with cls._lock: |
| 31 | 33 | if cls._instance is None: |
| 32 | 34 | cls._instance = super(CLIPImageEncoder, cls).__new__(cls) |
| 33 | - resolved_url = service_url or os.getenv("EMBEDDING_SERVICE_URL", "http://localhost:6005") | |
| 35 | + resolved_url = service_url or os.getenv("EMBEDDING_SERVICE_URL") or get_embedding_base_url() | |
| 34 | 36 | logger.info(f"Creating CLIPImageEncoder instance with service URL: {resolved_url}") |
| 35 | 37 | cls._instance.service_url = resolved_url |
| 36 | 38 | cls._instance.endpoint = f"{resolved_url}/embed/image" | ... | ... |
embeddings/text_encoder.py
| ... | ... | @@ -18,6 +18,8 @@ import logging |
| 18 | 18 | |
| 19 | 19 | logger = logging.getLogger(__name__) |
| 20 | 20 | |
| 21 | +from config.services_config import get_embedding_base_url | |
| 22 | + | |
| 21 | 23 | # Try to import REDIS_CONFIG, but allow import to fail |
| 22 | 24 | try: |
| 23 | 25 | from config.env_config import REDIS_CONFIG |
| ... | ... | @@ -38,7 +40,7 @@ class BgeEncoder: |
| 38 | 40 | with cls._lock: |
| 39 | 41 | if cls._instance is None: |
| 40 | 42 | cls._instance = super(BgeEncoder, cls).__new__(cls) |
| 41 | - resolved_url = service_url or os.getenv("EMBEDDING_SERVICE_URL", "http://localhost:6005") | |
| 43 | + resolved_url = service_url or os.getenv("EMBEDDING_SERVICE_URL") or get_embedding_base_url() | |
| 42 | 44 | logger.info(f"Creating BgeEncoder instance with service URL: {resolved_url}") |
| 43 | 45 | cls._instance.service_url = resolved_url |
| 44 | 46 | cls._instance.endpoint = f"{resolved_url}/embed/text" | ... | ... |
indexer/README.md
indexer/indexing_utils.py
| ... | ... | @@ -99,13 +99,8 @@ def create_document_transformer( |
| 99 | 99 | index_langs = tenant_config.get("index_languages") or [] |
| 100 | 100 | need_translator = len(index_langs) > 1 |
| 101 | 101 | if translator is None and need_translator: |
| 102 | - from query.translator import Translator | |
| 103 | - translator = Translator( | |
| 104 | - api_key=config.query_config.translation_api_key, | |
| 105 | - use_cache=True, | |
| 106 | - glossary_id=config.query_config.translation_glossary_id, | |
| 107 | - translation_context=config.query_config.translation_context | |
| 108 | - ) | |
| 102 | + from providers import create_translation_provider | |
| 103 | + translator = create_translation_provider(config.query_config) | |
| 109 | 104 | |
| 110 | 105 | if translation_prompts is None: |
| 111 | 106 | translation_prompts = config.query_config.translation_prompts | ... | ... |
| ... | ... | @@ -0,0 +1,15 @@ |
| 1 | +""" | |
| 2 | +Pluggable providers for translation, embedding, rerank. | |
| 3 | + | |
| 4 | +All provider selection is driven by config/services_config (services block). | |
| 5 | +""" | |
| 6 | + | |
| 7 | +from .translation import create_translation_provider | |
| 8 | +from .rerank import create_rerank_provider | |
| 9 | +from .embedding import create_embedding_provider | |
| 10 | + | |
| 11 | +__all__ = [ | |
| 12 | + "create_translation_provider", | |
| 13 | + "create_rerank_provider", | |
| 14 | + "create_embedding_provider", | |
| 15 | +] | ... | ... |
| ... | ... | @@ -0,0 +1,41 @@ |
| 1 | +""" | |
| 2 | +Embedding provider - HTTP service (vllm reserved). | |
| 3 | + | |
| 4 | +Returns text/image encoders configured via services_config. | |
| 5 | +""" | |
| 6 | + | |
| 7 | +from __future__ import annotations | |
| 8 | + | |
| 9 | +from config.services_config import get_embedding_config, get_embedding_base_url | |
| 10 | + | |
| 11 | + | |
| 12 | +def create_embedding_provider() -> "EmbeddingProvider": | |
| 13 | + """Create embedding provider from services config.""" | |
| 14 | + cfg = get_embedding_config() | |
| 15 | + provider = (cfg.provider or "http").strip().lower() | |
| 16 | + if provider == "vllm": | |
| 17 | + import logging | |
| 18 | + logging.getLogger(__name__).warning("embedding provider 'vllm' is reserved, using HTTP.") | |
| 19 | + return EmbeddingProvider() | |
| 20 | + | |
| 21 | + | |
| 22 | +class EmbeddingProvider: | |
| 23 | + """ | |
| 24 | + Provides text and image encoders. Both use HTTP embedding service | |
| 25 | + configured via services_config. | |
| 26 | + """ | |
| 27 | + | |
| 28 | + def __init__(self) -> None: | |
| 29 | + self._base_url = get_embedding_base_url() | |
| 30 | + | |
| 31 | + @property | |
| 32 | + def text_encoder(self): | |
| 33 | + """Lazy-created text encoder (BgeEncoder).""" | |
| 34 | + from embeddings.text_encoder import BgeEncoder | |
| 35 | + return BgeEncoder(service_url=self._base_url) | |
| 36 | + | |
| 37 | + @property | |
| 38 | + def image_encoder(self): | |
| 39 | + """Lazy-created image encoder (CLIPImageEncoder).""" | |
| 40 | + from embeddings.image_encoder import CLIPImageEncoder | |
| 41 | + return CLIPImageEncoder(service_url=self._base_url) | ... | ... |
| ... | ... | @@ -0,0 +1,68 @@ |
| 1 | +""" | |
| 2 | +Rerank provider - HTTP service (vllm reserved). | |
| 3 | +""" | |
| 4 | + | |
| 5 | +from __future__ import annotations | |
| 6 | + | |
| 7 | +import logging | |
| 8 | +from typing import Any, Dict, List, Optional, Tuple | |
| 9 | + | |
| 10 | +import requests | |
| 11 | + | |
| 12 | +from config.services_config import get_rerank_config, get_rerank_service_url | |
| 13 | + | |
| 14 | +logger = logging.getLogger(__name__) | |
| 15 | + | |
| 16 | + | |
| 17 | +class HttpRerankProvider: | |
| 18 | + """Rerank via HTTP service.""" | |
| 19 | + | |
| 20 | + def __init__(self, service_url: str): | |
| 21 | + self.service_url = (service_url or "").rstrip("/") | |
| 22 | + | |
| 23 | + def rerank( | |
| 24 | + self, | |
| 25 | + query: str, | |
| 26 | + docs: List[str], | |
| 27 | + timeout_sec: float, | |
| 28 | + ) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]]]: | |
| 29 | + if not docs: | |
| 30 | + return [], {} | |
| 31 | + try: | |
| 32 | + payload = {"query": (query or "").strip(), "docs": docs} | |
| 33 | + response = requests.post(self.service_url, json=payload, timeout=timeout_sec) | |
| 34 | + if response.status_code != 200: | |
| 35 | + logger.warning( | |
| 36 | + "Rerank service HTTP %s: %s", | |
| 37 | + response.status_code, | |
| 38 | + (response.text or "")[:200], | |
| 39 | + ) | |
| 40 | + return None, None | |
| 41 | + data = response.json() | |
| 42 | + scores = data.get("scores") | |
| 43 | + if not isinstance(scores, list): | |
| 44 | + return None, None | |
| 45 | + return scores, data.get("meta") or {} | |
| 46 | + except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout) as exc: | |
| 47 | + logger.warning( | |
| 48 | + "Rerank request timed out after %.1fs (docs=%d); returning ES order. %s", | |
| 49 | + timeout_sec, | |
| 50 | + len(docs), | |
| 51 | + exc, | |
| 52 | + ) | |
| 53 | + return None, None | |
| 54 | + except Exception as exc: | |
| 55 | + logger.warning("Rerank request failed: %s", exc, exc_info=True) | |
| 56 | + return None, None | |
| 57 | + | |
| 58 | + | |
| 59 | +def create_rerank_provider() -> HttpRerankProvider: | |
| 60 | + """Create rerank provider from services config.""" | |
| 61 | + cfg = get_rerank_config() | |
| 62 | + provider = (cfg.provider or "http").strip().lower() | |
| 63 | + | |
| 64 | + if provider == "vllm": | |
| 65 | + logger.warning("rerank provider 'vllm' is reserved, using HTTP.") | |
| 66 | + | |
| 67 | + url = get_rerank_service_url() | |
| 68 | + return HttpRerankProvider(service_url=url) | ... | ... |
| ... | ... | @@ -0,0 +1,182 @@ |
| 1 | +""" | |
| 2 | +Translation provider - direct (in-process) or HTTP service. | |
| 3 | +""" | |
| 4 | + | |
| 5 | +from __future__ import annotations | |
| 6 | + | |
| 7 | +import logging | |
| 8 | +from typing import Any, Dict, List, Optional, Union | |
| 9 | + | |
| 10 | +from concurrent.futures import Future, ThreadPoolExecutor | |
| 11 | +import requests | |
| 12 | + | |
| 13 | +from config.services_config import get_translation_config, get_translation_base_url | |
| 14 | + | |
| 15 | +logger = logging.getLogger(__name__) | |
| 16 | + | |
| 17 | + | |
| 18 | +class HttpTranslationProvider: | |
| 19 | + """Translation via HTTP service.""" | |
| 20 | + | |
| 21 | + def __init__( | |
| 22 | + self, | |
| 23 | + base_url: str, | |
| 24 | + model: str = "qwen", | |
| 25 | + timeout_sec: float = 10.0, | |
| 26 | + translation_context: Optional[str] = None, | |
| 27 | + ): | |
| 28 | + self.base_url = (base_url or "").rstrip("/") | |
| 29 | + self.model = model or "qwen" | |
| 30 | + self.timeout_sec = float(timeout_sec or 10.0) | |
| 31 | + self.translation_context = translation_context or "e-commerce product search" | |
| 32 | + self.executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="http-translator") | |
| 33 | + | |
| 34 | + def _translate_once( | |
| 35 | + self, | |
| 36 | + text: str, | |
| 37 | + target_lang: str, | |
| 38 | + source_lang: Optional[str] = None, | |
| 39 | + ) -> Optional[str]: | |
| 40 | + if not text or not str(text).strip(): | |
| 41 | + return text | |
| 42 | + try: | |
| 43 | + url = f"{self.base_url}/translate" | |
| 44 | + payload = { | |
| 45 | + "text": text, | |
| 46 | + "target_lang": target_lang, | |
| 47 | + "source_lang": source_lang or "auto", | |
| 48 | + "model": self.model, | |
| 49 | + } | |
| 50 | + response = requests.post(url, json=payload, timeout=self.timeout_sec) | |
| 51 | + if response.status_code != 200: | |
| 52 | + logger.warning( | |
| 53 | + "HTTP translator failed: status=%s body=%s", | |
| 54 | + response.status_code, | |
| 55 | + (response.text or "")[:200], | |
| 56 | + ) | |
| 57 | + return None | |
| 58 | + data = response.json() | |
| 59 | + translated = data.get("translated_text") | |
| 60 | + return translated if translated is not None else None | |
| 61 | + except Exception as exc: | |
| 62 | + logger.warning("HTTP translator request failed: %s", exc, exc_info=True) | |
| 63 | + return None | |
| 64 | + | |
| 65 | + def translate( | |
| 66 | + self, | |
| 67 | + text: str, | |
| 68 | + target_lang: str, | |
| 69 | + source_lang: Optional[str] = None, | |
| 70 | + context: Optional[str] = None, | |
| 71 | + prompt: Optional[str] = None, | |
| 72 | + ) -> Optional[str]: | |
| 73 | + del context, prompt | |
| 74 | + result = self._translate_once(text=text, target_lang=target_lang, source_lang=source_lang) | |
| 75 | + return result if result is not None else text | |
| 76 | + | |
| 77 | + def translate_multi( | |
| 78 | + self, | |
| 79 | + text: str, | |
| 80 | + target_langs: List[str], | |
| 81 | + source_lang: Optional[str] = None, | |
| 82 | + context: Optional[str] = None, | |
| 83 | + async_mode: bool = True, | |
| 84 | + prompt: Optional[str] = None, | |
| 85 | + ) -> Dict[str, Optional[str]]: | |
| 86 | + del context, async_mode, prompt | |
| 87 | + out: Dict[str, Optional[str]] = {} | |
| 88 | + for lang in target_langs: | |
| 89 | + out[lang] = self.translate(text, lang, source_lang=source_lang) | |
| 90 | + return out | |
| 91 | + | |
| 92 | + def translate_multi_async( | |
| 93 | + self, | |
| 94 | + text: str, | |
| 95 | + target_langs: List[str], | |
| 96 | + source_lang: Optional[str] = None, | |
| 97 | + context: Optional[str] = None, | |
| 98 | + prompt: Optional[str] = None, | |
| 99 | + ) -> Dict[str, Union[str, Future]]: | |
| 100 | + del context, prompt | |
| 101 | + out: Dict[str, Union[str, Future]] = {} | |
| 102 | + for lang in target_langs: | |
| 103 | + out[lang] = self.executor.submit(self.translate, text, lang, source_lang) | |
| 104 | + return out | |
| 105 | + | |
| 106 | + def translate_for_indexing( | |
| 107 | + self, | |
| 108 | + text: str, | |
| 109 | + shop_language: str, | |
| 110 | + source_lang: Optional[str] = None, | |
| 111 | + context: Optional[str] = None, | |
| 112 | + prompt: Optional[str] = None, | |
| 113 | + index_languages: Optional[List[str]] = None, | |
| 114 | + ) -> Dict[str, Optional[str]]: | |
| 115 | + del context, prompt | |
| 116 | + langs = index_languages if index_languages else ["en", "zh"] | |
| 117 | + source = source_lang or shop_language or "auto" | |
| 118 | + out: Dict[str, Optional[str]] = {} | |
| 119 | + for lang in langs: | |
| 120 | + if lang == shop_language: | |
| 121 | + out[lang] = text | |
| 122 | + else: | |
| 123 | + out[lang] = self.translate(text, target_lang=lang, source_lang=source) | |
| 124 | + return out | |
| 125 | + | |
| 126 | + | |
| 127 | +def create_translation_provider(query_config: Any = None) -> Any: | |
| 128 | + """ | |
| 129 | + Create translation provider from services config. | |
| 130 | + | |
| 131 | + query_config: optional, for api_key/glossary_id/context (used by direct provider). | |
| 132 | + """ | |
| 133 | + cfg = get_translation_config() | |
| 134 | + provider = cfg.provider | |
| 135 | + pc = cfg.get_provider_cfg() | |
| 136 | + | |
| 137 | + if provider in ("direct", "local", "inprocess"): | |
| 138 | + from query.translator import Translator | |
| 139 | + model = pc.get("model") or "qwen" | |
| 140 | + qc = query_config or _empty_query_config() | |
| 141 | + return Translator( | |
| 142 | + model=model, | |
| 143 | + api_key=getattr(qc, "translation_api_key", None), | |
| 144 | + use_cache=True, | |
| 145 | + glossary_id=getattr(qc, "translation_glossary_id", None), | |
| 146 | + translation_context=getattr(qc, "translation_context", "e-commerce product search"), | |
| 147 | + ) | |
| 148 | + | |
| 149 | + if provider in ("http", "service"): | |
| 150 | + base_url = get_translation_base_url() | |
| 151 | + model = pc.get("model") or "qwen" | |
| 152 | + timeout = pc.get("timeout_sec", 10.0) | |
| 153 | + qc = query_config or _empty_query_config() | |
| 154 | + return HttpTranslationProvider( | |
| 155 | + base_url=base_url, | |
| 156 | + model=model, | |
| 157 | + timeout_sec=float(timeout), | |
| 158 | + translation_context=getattr(qc, "translation_context", "e-commerce product search"), | |
| 159 | + ) | |
| 160 | + | |
| 161 | + logger.warning( | |
| 162 | + "Unsupported translation provider '%s', fallback to direct.", | |
| 163 | + provider, | |
| 164 | + ) | |
| 165 | + from query.translator import Translator | |
| 166 | + qc = query_config or _empty_query_config() | |
| 167 | + return Translator( | |
| 168 | + model=pc.get("model") or "qwen", | |
| 169 | + api_key=getattr(qc, "translation_api_key", None), | |
| 170 | + use_cache=True, | |
| 171 | + glossary_id=getattr(qc, "translation_glossary_id", None), | |
| 172 | + translation_context=getattr(qc, "translation_context", "e-commerce product search"), | |
| 173 | + ) | |
| 174 | + | |
| 175 | + | |
| 176 | +def _empty_query_config() -> Any: | |
| 177 | + """Minimal object with default translation attrs.""" | |
| 178 | + class _QC: | |
| 179 | + translation_api_key = None | |
| 180 | + translation_glossary_id = None | |
| 181 | + translation_context = "e-commerce product search" | |
| 182 | + return _QC() | ... | ... |
query/__init__.py
| ... | ... | @@ -2,12 +2,15 @@ |
| 2 | 2 | |
| 3 | 3 | from .language_detector import LanguageDetector |
| 4 | 4 | from .translator import Translator |
| 5 | +from .translation_client import HttpTranslationClient, create_translation_client | |
| 5 | 6 | from .query_rewriter import QueryRewriter, QueryNormalizer |
| 6 | 7 | from .query_parser import QueryParser, ParsedQuery |
| 7 | 8 | |
| 8 | 9 | __all__ = [ |
| 9 | 10 | 'LanguageDetector', |
| 10 | 11 | 'Translator', |
| 12 | + 'HttpTranslationClient', | |
| 13 | + 'create_translation_client', | |
| 11 | 14 | 'QueryRewriter', |
| 12 | 15 | 'QueryNormalizer', |
| 13 | 16 | 'QueryParser', | ... | ... |
query/query_parser.py
| ... | ... | @@ -13,7 +13,7 @@ from concurrent.futures import Future, ThreadPoolExecutor, as_completed |
| 13 | 13 | from embeddings import BgeEncoder |
| 14 | 14 | from config import SearchConfig |
| 15 | 15 | from .language_detector import LanguageDetector |
| 16 | -from .translator import Translator | |
| 16 | +from providers import create_translation_provider | |
| 17 | 17 | from .query_rewriter import QueryRewriter, QueryNormalizer |
| 18 | 18 | |
| 19 | 19 | logger = logging.getLogger(__name__) |
| ... | ... | @@ -78,7 +78,7 @@ class QueryParser: |
| 78 | 78 | self, |
| 79 | 79 | config: SearchConfig, |
| 80 | 80 | text_encoder: Optional[BgeEncoder] = None, |
| 81 | - translator: Optional[Translator] = None | |
| 81 | + translator: Optional[Any] = None | |
| 82 | 82 | ): |
| 83 | 83 | """ |
| 84 | 84 | Initialize query parser. |
| ... | ... | @@ -123,16 +123,13 @@ class QueryParser: |
| 123 | 123 | return self._text_encoder |
| 124 | 124 | |
| 125 | 125 | @property |
| 126 | - def translator(self) -> Translator: | |
| 126 | + def translator(self) -> Any: | |
| 127 | 127 | """Lazy load translator.""" |
| 128 | 128 | if self._translator is None: |
| 129 | - logger.info("Initializing translator (lazy load)...") | |
| 130 | - self._translator = Translator( | |
| 131 | - api_key=self.config.query_config.translation_api_key, | |
| 132 | - use_cache=True, | |
| 133 | - glossary_id=self.config.query_config.translation_glossary_id, | |
| 134 | - translation_context=self.config.query_config.translation_context | |
| 135 | - ) | |
| 129 | + from config.services_config import get_translation_config | |
| 130 | + cfg = get_translation_config() | |
| 131 | + logger.info("Initializing translator (provider=%s)...", cfg.provider) | |
| 132 | + self._translator = create_translation_provider(self.config.query_config) | |
| 136 | 133 | return self._translator |
| 137 | 134 | |
| 138 | 135 | def _simple_tokenize(self, text: str) -> List[str]: | ... | ... |
| ... | ... | @@ -0,0 +1,20 @@ |
| 1 | +""" | |
| 2 | +Translation client - delegates to providers. | |
| 3 | + | |
| 4 | +Deprecated: use providers.create_translation_provider() instead. | |
| 5 | +Kept for backward compatibility. | |
| 6 | +""" | |
| 7 | + | |
| 8 | +from __future__ import annotations | |
| 9 | + | |
| 10 | +from typing import Any | |
| 11 | + | |
| 12 | +from providers.translation import ( | |
| 13 | + HttpTranslationProvider as HttpTranslationClient, | |
| 14 | + create_translation_provider, | |
| 15 | +) | |
| 16 | + | |
| 17 | + | |
| 18 | +def create_translation_client(query_config: Any) -> Any: | |
| 19 | + """Backward compat: delegate to create_translation_provider.""" | |
| 20 | + return create_translation_provider(query_config) | ... | ... |
reranker/README.md
search/rerank_client.py
| ... | ... | @@ -8,9 +8,10 @@ |
| 8 | 8 | """ |
| 9 | 9 | |
| 10 | 10 | from typing import Dict, Any, List, Optional, Tuple |
| 11 | -import os | |
| 12 | 11 | import logging |
| 13 | 12 | |
| 13 | +from providers import create_rerank_provider | |
| 14 | + | |
| 14 | 15 | logger = logging.getLogger(__name__) |
| 15 | 16 | |
| 16 | 17 | # 默认融合权重:ES 归一化分数权重、重排分数权重(相加为 1) |
| ... | ... | @@ -78,46 +79,17 @@ def build_docs_from_hits( |
| 78 | 79 | def call_rerank_service( |
| 79 | 80 | query: str, |
| 80 | 81 | docs: List[str], |
| 81 | - service_url: str, | |
| 82 | 82 | timeout_sec: float = DEFAULT_TIMEOUT_SEC, |
| 83 | 83 | ) -> Tuple[Optional[List[float]], Optional[Dict[str, Any]]]: |
| 84 | 84 | """ |
| 85 | 85 | 调用重排服务 POST /rerank,返回分数列表与 meta。 |
| 86 | - | |
| 87 | - Args: | |
| 88 | - query: 搜索查询字符串 | |
| 89 | - docs: 文档文本列表(与 ES hits 顺序一致) | |
| 90 | - service_url: 完整 URL,如 http://127.0.0.1:6007/rerank | |
| 91 | - timeout_sec: 请求超时秒数 | |
| 92 | - | |
| 93 | - Returns: | |
| 94 | - (scores, meta):成功时 scores 与 docs 等长,meta 为服务返回的 meta; | |
| 95 | - 失败时返回 (None, None) | |
| 86 | + Provider 和 URL 从 services_config 读取。 | |
| 96 | 87 | """ |
| 97 | 88 | if not docs: |
| 98 | 89 | return [], {} |
| 99 | 90 | try: |
| 100 | - import requests | |
| 101 | - payload = {"query": (query or "").strip(), "docs": docs} | |
| 102 | - response = requests.post(service_url, json=payload, timeout=timeout_sec) | |
| 103 | - if response.status_code != 200: | |
| 104 | - logger.warning( | |
| 105 | - "Rerank service HTTP %s: %s", | |
| 106 | - response.status_code, | |
| 107 | - (response.text or "")[:200], | |
| 108 | - ) | |
| 109 | - return None, None | |
| 110 | - data = response.json() | |
| 111 | - scores = data.get("scores") | |
| 112 | - if not isinstance(scores, list): | |
| 113 | - return None, None | |
| 114 | - return scores, data.get("meta") or {} | |
| 115 | - except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout) as e: | |
| 116 | - logger.warning( | |
| 117 | - "Rerank request timed out after %.1fs (docs=%d); returning ES order. %s", | |
| 118 | - timeout_sec, len(docs), e, | |
| 119 | - ) | |
| 120 | - return None, None | |
| 91 | + client = create_rerank_provider() | |
| 92 | + return client.rerank(query=query, docs=docs, timeout_sec=timeout_sec) | |
| 121 | 93 | except Exception as e: |
| 122 | 94 | logger.warning("Rerank request failed: %s", e, exc_info=True) |
| 123 | 95 | return None, None |
| ... | ... | @@ -199,7 +171,6 @@ def run_rerank( |
| 199 | 171 | query: str, |
| 200 | 172 | es_response: Dict[str, Any], |
| 201 | 173 | language: str = "zh", |
| 202 | - service_url: Optional[str] = None, | |
| 203 | 174 | timeout_sec: float = DEFAULT_TIMEOUT_SEC, |
| 204 | 175 | weight_es: float = DEFAULT_WEIGHT_ES, |
| 205 | 176 | weight_ai: float = DEFAULT_WEIGHT_AI, |
| ... | ... | @@ -208,41 +179,19 @@ def run_rerank( |
| 208 | 179 | ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]], List[Dict[str, Any]]]: |
| 209 | 180 | """ |
| 210 | 181 | 完整重排流程:从 es_response 取 hits -> 构造 docs -> 调服务 -> 融合分数并重排 -> 更新 max_score。 |
| 211 | - | |
| 212 | - Args: | |
| 213 | - query: 搜索查询 | |
| 214 | - es_response: ES 原始响应(其中的 hits["hits"] 会被原地修改) | |
| 215 | - language: 文档文本使用的语言 | |
| 216 | - service_url: 重排服务 URL,为 None 时使用默认 127.0.0.1:6007 | |
| 217 | - timeout_sec: 请求超时 | |
| 218 | - weight_es: ES 分数权重 | |
| 219 | - weight_ai: 重排分数权重 | |
| 220 | - | |
| 221 | - Returns: | |
| 222 | - (es_response, rerank_meta, fused_debug): | |
| 223 | - - es_response: 已更新 hits 与 max_score 的响应(同一引用) | |
| 224 | - - rerank_meta: 重排服务返回的 meta,失败时为 None | |
| 225 | - - fused_debug: 每条文档的融合信息,供 debug 使用 | |
| 182 | + Provider 和 URL 从 services_config 读取。 | |
| 226 | 183 | """ |
| 227 | - try: | |
| 228 | - from reranker.config import CONFIG as RERANKER_CONFIG | |
| 229 | - except Exception: | |
| 230 | - RERANKER_CONFIG = None | |
| 231 | - | |
| 232 | - url = service_url or os.getenv("RERANKER_SERVICE_URL") | |
| 233 | - if not url and RERANKER_CONFIG is not None: | |
| 234 | - url = f"http://127.0.0.1:{RERANKER_CONFIG.PORT}/rerank" | |
| 235 | - if not url: | |
| 236 | - url = "http://127.0.0.1:6007/rerank" | |
| 237 | - | |
| 238 | 184 | hits = es_response.get("hits", {}).get("hits") or [] |
| 239 | 185 | if not hits: |
| 240 | 186 | return es_response, None, [] |
| 241 | 187 | |
| 242 | - # Apply query template (supports {query}) | |
| 243 | 188 | query_text = str(rerank_query_template).format_map({"query": query}) |
| 244 | 189 | docs = build_docs_from_hits(hits, language=language, doc_template=rerank_doc_template) |
| 245 | - scores, meta = call_rerank_service(query_text, docs, url, timeout_sec=timeout_sec) | |
| 190 | + scores, meta = call_rerank_service( | |
| 191 | + query_text, | |
| 192 | + docs, | |
| 193 | + timeout_sec=timeout_sec, | |
| 194 | + ) | |
| 246 | 195 | |
| 247 | 196 | if scores is None or len(scores) != len(hits): |
| 248 | 197 | return es_response, None, [] | ... | ... |
search/searcher.py
| ... | ... | @@ -392,7 +392,6 @@ class Searcher: |
| 392 | 392 | query=rerank_query, |
| 393 | 393 | es_response=es_response, |
| 394 | 394 | language=language, |
| 395 | - service_url=rc.service_url, | |
| 396 | 395 | timeout_sec=rc.timeout_sec, |
| 397 | 396 | weight_es=rc.weight_es, |
| 398 | 397 | weight_ai=rc.weight_ai, |
| ... | ... | @@ -401,11 +400,8 @@ class Searcher: |
| 401 | 400 | ) |
| 402 | 401 | |
| 403 | 402 | if rerank_meta is not None: |
| 404 | - rerank_url = ( | |
| 405 | - rc.service_url | |
| 406 | - or os.getenv("RERANKER_SERVICE_URL") | |
| 407 | - or "http://127.0.0.1:6007/rerank" | |
| 408 | - ) | |
| 403 | + from config.services_config import get_rerank_service_url | |
| 404 | + rerank_url = get_rerank_service_url() | |
| 409 | 405 | context.metadata.setdefault("rerank_info", {}) |
| 410 | 406 | context.metadata["rerank_info"].update({ |
| 411 | 407 | "service_url": rerank_url, | ... | ... |
start_reranker.sh deleted
tests/test_suggestions.py
| ... | ... | @@ -122,11 +122,12 @@ def test_resolve_query_language_uses_request_params_when_log_missing(): |
| 122 | 122 | |
| 123 | 123 | @pytest.mark.unit |
| 124 | 124 | def test_resolve_query_language_fallback_to_primary(): |
| 125 | - """当无任何语言线索时,应回落到租户 primary_language。""" | |
| 125 | + """当无任何语言线索时(无 script 检测),应回落到租户 primary_language。""" | |
| 126 | 126 | fake_es = FakeESClient() |
| 127 | 127 | builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None) |
| 128 | + # "123" 无 CJK/Latin 等 script,_detect_script_language 返回 None | |
| 128 | 129 | lang, conf, source, conflict = builder._resolve_query_language( |
| 129 | - query="some text", | |
| 130 | + query="123", | |
| 130 | 131 | log_language=None, |
| 131 | 132 | request_params=None, |
| 132 | 133 | index_languages=["zh", "en"], | ... | ... |