Commit 2e3670ab2404580769ceedac7154de7144b2ebde
1 parent
7299bae6
index services
Showing
9 changed files
with
400 additions
and
209 deletions
Show diff stats
.env.example
| 1 | # Environment Configuration Template | 1 | # Environment Configuration Template |
| 2 | -# Copy this file to .env and update with your actual values | 2 | +# Copy this file to .env and update with your actual values. |
| 3 | +# 生产/测试å‡è¯ä¸Žè¿œç¨‹ç™»å½•æ–¹å¼è§ docs/QUICKSTART.md §1.6 | ||
| 3 | 4 | ||
| 4 | -# Elasticsearch Configuration (v8.18) | 5 | +# Elasticsearch (生产默认 10.200.16.14:9200,本地å¯ç”¨ localhost) |
| 5 | ES_HOST=http://localhost:9200 | 6 | ES_HOST=http://localhost:9200 |
| 6 | -ES_USERNAME= | 7 | +ES_USERNAME=saas |
| 7 | ES_PASSWORD= | 8 | ES_PASSWORD= |
| 8 | 9 | ||
| 9 | -# Redis Configuration (for caching) | 10 | +# Redis (生产默认 10.200.16.14:6479) |
| 10 | REDIS_HOST=localhost | 11 | REDIS_HOST=localhost |
| 11 | REDIS_PORT=6479 | 12 | REDIS_PORT=6479 |
| 12 | REDIS_PASSWORD= | 13 | REDIS_PASSWORD= |
| @@ -43,9 +44,9 @@ IMAGE_MODEL_DIR=/data/tw/models/cn-clip # å·²ç»æ”¹ä¸ºweb请求了,ä¸ä½¿ç”¨æœ | @@ -43,9 +44,9 @@ IMAGE_MODEL_DIR=/data/tw/models/cn-clip # å·²ç»æ”¹ä¸ºweb请求了,ä¸ä½¿ç”¨æœ | ||
| 43 | # Cache Directory | 44 | # Cache Directory |
| 44 | CACHE_DIR=.cache | 45 | CACHE_DIR=.cache |
| 45 | 46 | ||
| 46 | -# MySQL Database Configuration (Shoplazza) | ||
| 47 | -DB_HOST= | ||
| 48 | -DB_PORT=3306 | ||
| 49 | -DB_DATABASE= | ||
| 50 | -DB_USERNAME= | 47 | +# MySQL (Shoplazza,生产默认 10.200.16.14:3316) |
| 48 | +DB_HOST=localhost | ||
| 49 | +DB_PORT=3316 | ||
| 50 | +DB_DATABASE=saas | ||
| 51 | +DB_USERNAME=saas | ||
| 51 | DB_PASSWORD= | 52 | DB_PASSWORD= |
CLAUDE.md
| @@ -28,7 +28,7 @@ This is a **production-ready Multi-Tenant E-Commerce Search SaaS** platform spec | @@ -28,7 +28,7 @@ This is a **production-ready Multi-Tenant E-Commerce Search SaaS** platform spec | ||
| 28 | # Optional on new machine: if conda is ~/anaconda3/bin/conda → export CONDA_ROOT=$HOME/anaconda3 | 28 | # Optional on new machine: if conda is ~/anaconda3/bin/conda → export CONDA_ROOT=$HOME/anaconda3 |
| 29 | source activate.sh | 29 | source activate.sh |
| 30 | ``` | 30 | ``` |
| 31 | -See `docs/环境配置说明.md` for first-time env creation (`conda env create -f environment.yml` or `pip install -r requirements.txt`). | 31 | +See `docs/QUICKSTART.md` §1.4–1.8 for first-time env creation and production credentials (venv: `./scripts/create_venv.sh`; conda: `conda env create -f environment.yml` or `pip install -r requirements.txt`). |
| 32 | 32 | ||
| 33 | **Database Configuration:** | 33 | **Database Configuration:** |
| 34 | ```yaml | 34 | ```yaml |
README.md
| @@ -81,7 +81,7 @@ START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | @@ -81,7 +81,7 @@ START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | ||
| 81 | | 2. 运行与排障 | `docs/Usage-Guide.md` | | 81 | | 2. 运行与排障 | `docs/Usage-Guide.md` | |
| 82 | | 3. API 详细说明 | `docs/搜索API对接指南.md` | | 82 | | 3. API 详细说明 | `docs/搜索API对接指南.md` | |
| 83 | | 4. 快速参数速查 | `docs/搜索API速查表.md` | | 83 | | 4. 快速参数速查 | `docs/搜索API速查表.md` | |
| 84 | -| 5. 首次环境搭建 | `docs/环境配置说明.md` | | 84 | +| 5. 首次环境搭建、生产凭证 | `docs/QUICKSTART.md` §1.4–1.8 | |
| 85 | 85 | ||
| 86 | --- | 86 | --- |
| 87 | 87 |
docs/DEVELOPER_GUIDE.md
| @@ -48,7 +48,7 @@ | @@ -48,7 +48,7 @@ | ||
| 48 | - [系统设计文档.md](./系统设计文档.md) — 索引结构、数据流、通用化设计 | 48 | - [系统设计文档.md](./系统设计文档.md) — 索引结构、数据流、通用化设计 |
| 49 | - [基础配置指南.md](./基础配置指南.md) — 索引与查询配置说明 | 49 | - [基础配置指南.md](./基础配置指南.md) — 索引与查询配置说明 |
| 50 | - [搜索API对接指南.md](./搜索API对接指南.md) — 搜索/索引/管理接口完整说明 | 50 | - [搜索API对接指南.md](./搜索API对接指南.md) — 搜索/索引/管理接口完整说明 |
| 51 | -- [环境配置说明.md](./环境配置说明.md) — 首次部署、新机器环境 | 51 | +- [QUICKSTART.md](./QUICKSTART.md) §1.4–1.8 — 系统要求、Python 环境、外部服务与生产凭证、店匠数据源(原环境配置说明已并入) |
| 52 | - [Usage-Guide.md](./Usage-Guide.md) — 运维、日志、多环境、故障排查 | 52 | - [Usage-Guide.md](./Usage-Guide.md) — 运维、日志、多环境、故障排查 |
| 53 | 53 | ||
| 54 | --- | 54 | --- |
| @@ -258,7 +258,7 @@ services: | @@ -258,7 +258,7 @@ services: | ||
| 258 | - 能力选择:`TRANSLATION_PROVIDER`、`EMBEDDING_PROVIDER`、`RERANK_PROVIDER`、`RERANK_BACKEND` | 258 | - 能力选择:`TRANSLATION_PROVIDER`、`EMBEDDING_PROVIDER`、`RERANK_PROVIDER`、`RERANK_BACKEND` |
| 259 | - 环境与索引:`ES_HOST`、`ES_INDEX_NAMESPACE`、`RUNTIME_ENV`、DB 与 Redis 等 | 259 | - 环境与索引:`ES_HOST`、`ES_INDEX_NAMESPACE`、`RUNTIME_ENV`、DB 与 Redis 等 |
| 260 | 260 | ||
| 261 | -详见 [环境配置说明.md](./环境配置说明.md)、[Usage-Guide.md](./Usage-Guide.md)。 | 261 | +详见 [QUICKSTART.md](./QUICKSTART.md) §1.6(.env 与生产凭证)、[Usage-Guide.md](./Usage-Guide.md)。 |
| 262 | 262 | ||
| 263 | --- | 263 | --- |
| 264 | 264 | ||
| @@ -370,7 +370,7 @@ services: | @@ -370,7 +370,7 @@ services: | ||
| 370 | | 索引与查询配置说明 | [基础配置指南.md](./基础配置指南.md) | | 370 | | 索引与查询配置说明 | [基础配置指南.md](./基础配置指南.md) | |
| 371 | | 搜索/索引 API 完整说明 | [搜索API对接指南.md](./搜索API对接指南.md) | | 371 | | 搜索/索引 API 完整说明 | [搜索API对接指南.md](./搜索API对接指南.md) | |
| 372 | | 搜索 API 参数速查 | [搜索API速查表.md](./搜索API速查表.md) | | 372 | | 搜索 API 参数速查 | [搜索API速查表.md](./搜索API速查表.md) | |
| 373 | -| 首次部署、新机器环境 | [环境配置说明.md](./环境配置说明.md) | | 373 | +| 首次部署、新机器环境、生产凭证 | [QUICKSTART.md](./QUICKSTART.md) §1.4–1.8 | |
| 374 | | 运维、日志、多环境、故障 | [Usage-Guide.md](./Usage-Guide.md) | | 374 | | 运维、日志、多环境、故障 | [Usage-Guide.md](./Usage-Guide.md) | |
| 375 | | 索引模块职责与 Java 对接 | [indexer/README.md](../indexer/README.md) | | 375 | | 索引模块职责与 Java 对接 | [indexer/README.md](../indexer/README.md) | |
| 376 | | 向量模块与 clip-as-service | [embeddings/README.md](../embeddings/README.md) | | 376 | | 向量模块与 clip-as-service | [embeddings/README.md](../embeddings/README.md) | |
docs/QUICKSTART.md
| @@ -22,6 +22,7 @@ | @@ -22,6 +22,7 @@ | ||
| 22 | ## 目录 | 22 | ## 目录 |
| 23 | 23 | ||
| 24 | 1. [快速上手](#1-快速上手) | 24 | 1. [快速上手](#1-快速上手) |
| 25 | + - [1.1 环境准备](#11-环境准备) / [1.2 服务与端口](#12-服务与端口) / [1.3 常用 API](#13-常用-api-请求示例) / [1.4 系统要求](#14-系统要求) / [1.5 Python 运行环境](#15-python-运行环境详细) / [1.6 外部服务与 .env](#16-外部服务与-env含生产凭证) / [1.7 店匠数据源](#17-店匠数据源说明) / [1.8 相关脚本](#18-相关脚本) / [1.9 配置入口总览](#19-配置入口总览) | ||
| 25 | 2. [基础配置与搜索行为](#2-基础配置与搜索行为) | 26 | 2. [基础配置与搜索行为](#2-基础配置与搜索行为) |
| 26 | 3. [Provider 架构](#3-provider-架构) | 27 | 3. [Provider 架构](#3-provider-架构) |
| 27 | 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank) | 28 | 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank) |
| @@ -37,10 +38,8 @@ | @@ -37,10 +38,8 @@ | ||
| 37 | 38 | ||
| 38 | ```bash | 39 | ```bash |
| 39 | source activate.sh | 40 | source activate.sh |
| 40 | -# 首次推荐: | 41 | +# 首次创建环境: |
| 41 | ./scripts/create_venv.sh | 42 | ./scripts/create_venv.sh |
| 42 | -# 或使用 conda: | ||
| 43 | -# conda env create -f environment.yml | ||
| 44 | ``` | 43 | ``` |
| 45 | 44 | ||
| 46 | 依赖:Python 3.8+、Elasticsearch 8.x、MySQL、Redis(可选,缓存用途)。 | 45 | 依赖:Python 3.8+、Elasticsearch 8.x、MySQL、Redis(可选,缓存用途)。 |
| @@ -52,7 +51,7 @@ INSTALL_ML=1 ./scripts/create_venv.sh | @@ -52,7 +51,7 @@ INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 52 | source activate.sh | 51 | source activate.sh |
| 53 | ``` | 52 | ``` |
| 54 | 53 | ||
| 55 | -详细环境说明见 `docs/环境配置说明.md`。 | 54 | +更详细的系统要求、Python 环境、外部服务与生产凭证见 [1.4–1.8](#14-系统要求)。 |
| 56 | 55 | ||
| 57 | ### 1.2 服务与端口 | 56 | ### 1.2 服务与端口 |
| 58 | 57 | ||
| @@ -162,7 +161,134 @@ curl -X POST http://localhost:6007/rerank \ | @@ -162,7 +161,134 @@ curl -X POST http://localhost:6007/rerank \ | ||
| 162 | -d '{"query":"wireless mouse","docs":["logitech mx master","usb cable"]}' | 161 | -d '{"query":"wireless mouse","docs":["logitech mx master","usb cable"]}' |
| 163 | ``` | 162 | ``` |
| 164 | 163 | ||
| 165 | -### 1.4 配置入口总览 | 164 | +### 1.4 系统要求 |
| 165 | + | ||
| 166 | +- **操作系统**:Linux(推荐 Ubuntu 18.04+) | ||
| 167 | +- **Python**:3.10(由 venv 提供) | ||
| 168 | +- **内存**:建议 8GB+(含模型与 ES) | ||
| 169 | +- **磁盘**:10GB+(含模型与索引) | ||
| 170 | + | ||
| 171 | +### 1.5 Python 运行环境(详细) | ||
| 172 | + | ||
| 173 | +项目根目录的 `activate.sh` 激活 **`.venv`** 并加载当前目录 `.env`。 | ||
| 174 | + | ||
| 175 | +```bash | ||
| 176 | +cd /data/saas-search | ||
| 177 | +./scripts/create_venv.sh | ||
| 178 | +source activate.sh | ||
| 179 | +``` | ||
| 180 | + | ||
| 181 | +如需运行本地 embedding / 图像编码(torch/transformers 等): | ||
| 182 | + | ||
| 183 | +```bash | ||
| 184 | +INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 185 | +source activate.sh | ||
| 186 | +``` | ||
| 187 | + | ||
| 188 | +### 1.6 外部服务与 .env(含生产凭证) | ||
| 189 | + | ||
| 190 | +以下为 **AI 生产环境** 统一使用的地址与凭证(Redis / ES / MySQL 均以此为准)。本地开发可将 `DB_HOST`/`ES_HOST`/`REDIS_HOST` 改为 `localhost`(服务在本机时)。 | ||
| 191 | + | ||
| 192 | +| 服务 | 地址(生产) | 端口 | 说明 | | ||
| 193 | +|------|--------------|------|------| | ||
| 194 | +| **MySQL** | 10.200.16.14 / localhost | 3316 | 店匠 SPU/SKU 数据 | | ||
| 195 | +| **Redis** | 10.200.16.14 / localhost | 6479 | Embedding/翻译缓存 | | ||
| 196 | +| **Elasticsearch** | 10.200.16.14 / localhost | 9200 | 搜索索引 | | ||
| 197 | + | ||
| 198 | +**MySQL**(3 个用户均可远程登录): | ||
| 199 | + | ||
| 200 | +| 用户 | 密码 | | ||
| 201 | +|------|------| | ||
| 202 | +| root | qY8tgodLoA&KT#yQ | | ||
| 203 | +| saas | 6dlpco6dVGuqzt^l | | ||
| 204 | +| sa | C#HU!GPps7ck8tsM | | ||
| 205 | + | ||
| 206 | +创建远程用户(如尚未创建): | ||
| 207 | + | ||
| 208 | +```sql | ||
| 209 | +mysql -uroot -p'qY8tgodLoA&KT#yQ' | ||
| 210 | +CREATE USER 'saas'@'%' IDENTIFIED BY '6dlpco6dVGuqzt^l'; | ||
| 211 | +CREATE USER 'sa'@'%' IDENTIFIED BY 'C#HU!GPps7ck8tsM'; | ||
| 212 | +``` | ||
| 213 | + | ||
| 214 | +**Redis**: | ||
| 215 | + | ||
| 216 | +- 密码:`dxEkegEZ@C5SXWKv` | ||
| 217 | +- 远程登录示例:`redis-cli -h 43.166.252.75 -p 6479`(需带 `-a` 传密码时按 redis-cli 文档操作) | ||
| 218 | + | ||
| 219 | +**Elasticsearch**: | ||
| 220 | + | ||
| 221 | +- 用户名/密码:`saas` / `4hOaLaf41y2VuI8y` | ||
| 222 | +- 访问示例: | ||
| 223 | + | ||
| 224 | +```bash | ||
| 225 | +curl -u 'saas:4hOaLaf41y2VuI8y' \ | ||
| 226 | + -X GET 'http://localhost:9200/search_products_tenant_111/_search?pretty' \ | ||
| 227 | + -H 'Content-Type: application/json' \ | ||
| 228 | + -d '{ | ||
| 229 | + "size": 11, | ||
| 230 | + "_source": ["title"], | ||
| 231 | + "query": { | ||
| 232 | + "bool": { | ||
| 233 | + "filter": [ | ||
| 234 | + { "term": {"spu_id" : 206150} } | ||
| 235 | + ] | ||
| 236 | + } | ||
| 237 | + } | ||
| 238 | + }' | ||
| 239 | +``` | ||
| 240 | + | ||
| 241 | +在项目根目录创建 `.env`(可复制 `.env.example` 后按环境修改): | ||
| 242 | + | ||
| 243 | +```env | ||
| 244 | +# MySQL(生产以 10.200.16.14 或 localhost 为准) | ||
| 245 | +DB_HOST=10.200.16.14 | ||
| 246 | +DB_PORT=3316 | ||
| 247 | +DB_DATABASE=saas | ||
| 248 | +DB_USERNAME=saas | ||
| 249 | +DB_PASSWORD=6dlpco6dVGuqzt^l | ||
| 250 | + | ||
| 251 | +# Elasticsearch | ||
| 252 | +ES_HOST=http://10.200.16.14:9200 | ||
| 253 | +ES_USERNAME=saas | ||
| 254 | +ES_PASSWORD=4hOaLaf41y2VuI8y | ||
| 255 | + | ||
| 256 | +# Redis(可选) | ||
| 257 | +REDIS_HOST=10.200.16.14 | ||
| 258 | +REDIS_PORT=6479 | ||
| 259 | +REDIS_PASSWORD=dxEkegEZ@C5SXWKv | ||
| 260 | + | ||
| 261 | +# DeepL 翻译(按需) | ||
| 262 | +DEEPL_AUTH_KEY=your-key | ||
| 263 | + | ||
| 264 | +# API | ||
| 265 | +API_HOST=0.0.0.0 | ||
| 266 | +API_PORT=6002 | ||
| 267 | +``` | ||
| 268 | + | ||
| 269 | +> 生产环境请妥善保管凭证;本地/测试可改用上述值或自建实例。 | ||
| 270 | + | ||
| 271 | +### 1.7 店匠数据源说明 | ||
| 272 | + | ||
| 273 | +saas-search 以 MySQL 中的店匠标准表为权威数据源: | ||
| 274 | + | ||
| 275 | +- `shoplazza_product_spu`:SPU 商品主表 | ||
| 276 | +- `shoplazza_product_sku`:SKU 变体表 | ||
| 277 | + | ||
| 278 | +**shoplazza_product_sku 字段节选**:`id`, `spu_id`, `shop_id`, `title`, `sku`, `price`, `compare_at_price`, `option1/2/3`, `inventory_quantity`, `image_src`, `tenant_id`, `create_time`, `update_time`, `deleted` 等。完整字段与 ES 对应关系见 `INDEX_FIELDS_DOCUMENTATION.md`(若有)。 | ||
| 279 | + | ||
| 280 | +### 1.8 相关脚本 | ||
| 281 | + | ||
| 282 | +- **`activate.sh`**(项目根目录):激活 Python 环境并加载 `.env`,日常开发/部署以本脚本为准。 | ||
| 283 | +- `scripts/create_venv.sh`:创建 venv(可选 `INSTALL_ML=1` 安装 ML 依赖) | ||
| 284 | +- `scripts/mock_data.sh`:生成 Tenant1 Mock + Tenant2 CSV 并导入 MySQL | ||
| 285 | +- `scripts/create_tenant_index.sh <tenant_id>`:创建租户 ES 索引结构 | ||
| 286 | +- `POST /indexer/reindex`:从 MySQL 全量导入到 ES | ||
| 287 | +- `run.sh` / `scripts/stop.sh`:服务启停;`scripts/service_ctl.sh`:start/stop/restart/status | ||
| 288 | + | ||
| 289 | +更多脚本与验证命令见 `docs/Usage-Guide.md`。 | ||
| 290 | + | ||
| 291 | +### 1.9 配置入口总览 | ||
| 166 | 292 | ||
| 167 | - **搜索行为配置**:`config/config.yaml` | 293 | - **搜索行为配置**:`config/config.yaml` |
| 168 | - **索引结构定义**:`mappings/search_products.json` | 294 | - **索引结构定义**:`mappings/search_products.json` |
docs/Usage-Guide.md
| @@ -29,7 +29,7 @@ | @@ -29,7 +29,7 @@ | ||
| 29 | 29 | ||
| 30 | #### 1. 安装 Python 依赖与激活环境 | 30 | #### 1. 安装 Python 依赖与激活环境 |
| 31 | 31 | ||
| 32 | -**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。目前推荐 venv(`.venv`);Conda 仅作为兼容回退(需要 `CONDA_ROOT`)。详见 `docs/环境配置说明.md`。 | 32 | +**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。目前推荐 venv(`.venv`);Conda 仅作为兼容回退(需要 `CONDA_ROOT`)。系统要求、Python 环境、生产凭证与 `.env` 模板见 [QUICKSTART.md](./QUICKSTART.md) §1.4–1.8。 |
| 33 | 33 | ||
| 34 | ```bash | 34 | ```bash |
| 35 | cd /data/saas-search | 35 | cd /data/saas-search |
docs/搜索API对接指南.md
| @@ -37,6 +37,8 @@ | @@ -37,6 +37,8 @@ | ||
| 37 | - 5.2 [增量索引接口](#52-增量索引接口) | 37 | - 5.2 [增量索引接口](#52-增量索引接口) |
| 38 | - 5.3 [查询文档接口](#53-查询文档接口) | 38 | - 5.3 [查询文档接口](#53-查询文档接口) |
| 39 | - 5.4 [索引健康检查接口](#54-索引健康检查接口) | 39 | - 5.4 [索引健康检查接口](#54-索引健康检查接口) |
| 40 | + - 5.5 [文档构建接口(正式对接)](#55-文档构建接口正式对接推荐) | ||
| 41 | + - 5.6 [文档构建接口(测试/自测)](#56-文档构建接口测试--自测) | ||
| 40 | 42 | ||
| 41 | 6. [管理接口](#管理接口) | 43 | 6. [管理接口](#管理接口) |
| 42 | - 6.1 [健康检查](#61-健康检查) | 44 | - 6.1 [健康检查](#61-健康检查) |
| @@ -863,6 +865,17 @@ curl "http://localhost:6002/search/12345" -H "X-Tenant-ID: 162" | @@ -863,6 +865,17 @@ curl "http://localhost:6002/search/12345" -H "X-Tenant-ID: 162" | ||
| 863 | 865 | ||
| 864 | ## 索引接口 | 866 | ## 索引接口 |
| 865 | 867 | ||
| 868 | +本节内容与 `api/routes/indexer.py` 中的索引相关服务一致,包含以下接口: | ||
| 869 | + | ||
| 870 | +| 接口 | 方法 | 路径 | 说明 | | ||
| 871 | +|------|------|------|------| | ||
| 872 | +| 全量重建索引 | POST | `/indexer/reindex` | 将指定租户所有 SPU 导入 ES(不删现有索引) | | ||
| 873 | +| 增量索引 | POST | `/indexer/index` | 按 SPU ID 列表索引/删除,支持自动检测删除与显式删除 | | ||
| 874 | +| 查询文档 | POST | `/indexer/documents` | 按 SPU ID 列表查询 ES 文档,不写入 ES | | ||
| 875 | +| 构建 ES 文档(正式) | POST | `/indexer/build-docs` | 由上游提供 MySQL 行数据,返回 ES-ready 文档,不写 ES | | ||
| 876 | +| 构建 ES 文档(测试) | POST | `/indexer/build-docs-from-db` | 由本服务查库并构建文档,仅测试/调试用 | | ||
| 877 | +| 索引健康检查 | GET | `/indexer/health` | 检查索引服务与数据库连接状态 | | ||
| 878 | + | ||
| 866 | ### 5.0 为租户创建索引 | 879 | ### 5.0 为租户创建索引 |
| 867 | 880 | ||
| 868 | 为租户创建索引需要两个步骤: | 881 | 为租户创建索引需要两个步骤: |
| @@ -1259,7 +1272,7 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | @@ -1259,7 +1272,7 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | ||
| 1259 | ### 5.4 索引健康检查接口 | 1272 | ### 5.4 索引健康检查接口 |
| 1260 | 1273 | ||
| 1261 | - **端点**: `GET /indexer/health` | 1274 | - **端点**: `GET /indexer/health` |
| 1262 | -- **描述**: 检查索引服务的健康状态 | 1275 | +- **描述**: 检查索引服务健康状态(与 `api/routes/indexer.py` 中 `indexer_health_check` 一致) |
| 1263 | 1276 | ||
| 1264 | #### 响应格式 | 1277 | #### 响应格式 |
| 1265 | 1278 | ||
| @@ -1273,6 +1286,18 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | @@ -1273,6 +1286,18 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | ||
| 1273 | } | 1286 | } |
| 1274 | ``` | 1287 | ``` |
| 1275 | 1288 | ||
| 1289 | +| 字段 | 类型 | 说明 | | ||
| 1290 | +|------|------|------| | ||
| 1291 | +| `status` | string | `available`(服务可用)、`unavailable`(未初始化)、`error`(异常) | | ||
| 1292 | +| `database` | string | 数据库连接状态,如 `connected` 或 `disconnected: ...` | | ||
| 1293 | +| `preloaded_data.category_mappings` | integer | 已加载的分类映射数量 | | ||
| 1294 | + | ||
| 1295 | +#### 请求示例 | ||
| 1296 | + | ||
| 1297 | +```bash | ||
| 1298 | +curl -X GET "http://localhost:6004/indexer/health" | ||
| 1299 | +``` | ||
| 1300 | + | ||
| 1276 | ### 5.5 文档构建接口(正式对接推荐) | 1301 | ### 5.5 文档构建接口(正式对接推荐) |
| 1277 | 1302 | ||
| 1278 | #### 5.5.1 `POST /indexer/build-docs` | 1303 | #### 5.5.1 `POST /indexer/build-docs` |
| @@ -1298,6 +1323,11 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | @@ -1298,6 +1323,11 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | ||
| 1298 | } | 1323 | } |
| 1299 | ``` | 1324 | ``` |
| 1300 | 1325 | ||
| 1326 | +| 参数 | 类型 | 必填 | 说明 | | ||
| 1327 | +|------|------|------|------| | ||
| 1328 | +| `tenant_id` | string | Y | 租户 ID | | ||
| 1329 | +| `items` | array | Y | 需构建 doc 的 SPU 列表(每项含 `spu`、`skus`、`options`),**单次最多 200 条** | | ||
| 1330 | + | ||
| 1301 | > `spu` / `skus` / `options` 字段应当直接使用从 `shoplazza_product_spu` / `shoplazza_product_sku` / `shoplazza_product_option` 查询出的行字段。 | 1331 | > `spu` / `skus` / `options` 字段应当直接使用从 `shoplazza_product_spu` / `shoplazza_product_sku` / `shoplazza_product_option` 查询出的行字段。 |
| 1302 | 1332 | ||
| 1303 | #### 响应示例(节选) | 1333 | #### 响应示例(节选) |
| @@ -1334,6 +1364,15 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | @@ -1334,6 +1364,15 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | ||
| 1334 | } | 1364 | } |
| 1335 | ``` | 1365 | ``` |
| 1336 | 1366 | ||
| 1367 | +| 字段 | 类型 | 说明 | | ||
| 1368 | +|------|------|------| | ||
| 1369 | +| `tenant_id` | string | 租户 ID | | ||
| 1370 | +| `docs` | array | 构建成功的 ES 文档列表,与 `mappings/search_products.json` 一致 | | ||
| 1371 | +| `total` | integer | 请求的 items 总数 | | ||
| 1372 | +| `success_count` | integer | 成功构建数量 | | ||
| 1373 | +| `failed_count` | integer | 失败数量 | | ||
| 1374 | +| `failed` | array | 失败项列表,每项含 `spu_id`、`error` | | ||
| 1375 | + | ||
| 1337 | #### 使用建议 | 1376 | #### 使用建议 |
| 1338 | 1377 | ||
| 1339 | - **生产环境推荐流程**: | 1378 | - **生产环境推荐流程**: |
| @@ -1347,17 +1386,26 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | @@ -1347,17 +1386,26 @@ curl -X POST "http://localhost:6004/indexer/documents" \ | ||
| 1347 | #### 5.6.1 `POST /indexer/build-docs-from-db` | 1386 | #### 5.6.1 `POST /indexer/build-docs-from-db` |
| 1348 | 1387 | ||
| 1349 | - **描述**: | 1388 | - **描述**: |
| 1350 | - 仅用于测试/调试:调用方只提供 `tenant_id` 和 `spu_ids`,由 indexer 服务内部从 MySQL 查询 SPU/SKU/Option,然后调用与 `/indexer/build-docs` 相同的文档构建逻辑,返回 ES-ready doc。 | 1389 | + 仅用于测试/调试:调用方只提供 `tenant_id` 和 `spu_ids`,由 indexer 服务内部从 MySQL 查询 SPU/SKU/Option,然后调用与 `/indexer/build-docs` 相同的文档构建逻辑,返回 ES-ready doc。**生产环境请使用 `/indexer/build-docs`,由上游查库并写 ES。** |
| 1351 | 1390 | ||
| 1352 | #### 请求参数 | 1391 | #### 请求参数 |
| 1353 | 1392 | ||
| 1354 | ```json | 1393 | ```json |
| 1355 | { | 1394 | { |
| 1356 | "tenant_id": "170", | 1395 | "tenant_id": "170", |
| 1357 | - "spu_ids": ["223167"] | 1396 | + "spu_ids": ["223167", "223168"] |
| 1358 | } | 1397 | } |
| 1359 | ``` | 1398 | ``` |
| 1360 | 1399 | ||
| 1400 | +| 参数 | 类型 | 必填 | 说明 | | ||
| 1401 | +|------|------|------|------| | ||
| 1402 | +| `tenant_id` | string | Y | 租户 ID | | ||
| 1403 | +| `spu_ids` | array[string] | Y | SPU ID 列表,**单次最多 200 个** | | ||
| 1404 | + | ||
| 1405 | +#### 响应格式 | ||
| 1406 | + | ||
| 1407 | +与 `/indexer/build-docs` 相同:`tenant_id`、`docs`、`total`、`success_count`、`failed_count`、`failed`。 | ||
| 1408 | + | ||
| 1361 | #### 请求示例 | 1409 | #### 请求示例 |
| 1362 | 1410 | ||
| 1363 | ```bash | 1411 | ```bash |
| @@ -1368,12 +1416,6 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | @@ -1368,12 +1416,6 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | ||
| 1368 | 1416 | ||
| 1369 | 返回结构与 `/indexer/build-docs` 相同,可直接用于对比 ES 实际文档或调试字段映射问题。 | 1417 | 返回结构与 `/indexer/build-docs` 相同,可直接用于对比 ES 实际文档或调试字段映射问题。 |
| 1370 | 1418 | ||
| 1371 | -#### 请求示例 | ||
| 1372 | - | ||
| 1373 | -```bash | ||
| 1374 | -curl -X GET "http://localhost:6004/indexer/health" | ||
| 1375 | -``` | ||
| 1376 | - | ||
| 1377 | --- | 1419 | --- |
| 1378 | 1420 | ||
| 1379 | ## 管理接口 | 1421 | ## 管理接口 |
docs/环境配置说明.md
| 1 | +# 环境配置说明(已并入) | ||
| 1 | 2 | ||
| 2 | -## 1. 系统要求 | ||
| 3 | - | ||
| 4 | -- **操作系统**:Linux(推荐 Ubuntu 18.04+) | ||
| 5 | -- **Conda**:Miniconda3 或 Anaconda(用于 Python 环境隔离) | ||
| 6 | -- **Python**:3.10(由 Conda 环境提供) | ||
| 7 | -- **内存**:建议 8GB+(含模型与 ES) | ||
| 8 | -- **磁盘**:10GB+(含模型与索引) | ||
| 9 | - | ||
| 10 | ---- | ||
| 11 | - | ||
| 12 | -## 2. Python 运行环境 | ||
| 13 | - | ||
| 14 | -本项目历史上使用 Conda 管理环境;目前推荐使用 **venv**(更轻量、对 CI/容器更友好)。项目根目录下的 `activate.sh` 已升级为 **优先激活 `.venv`,并兼容 Conda 回退**,且会自动加载当前目录下的 `.env`(忽略注释与空行)。 | ||
| 15 | - | ||
| 16 | -### 2.1 venv(推荐) | ||
| 17 | - | ||
| 18 | -首次创建 venv: | ||
| 19 | - | ||
| 20 | -```bash | ||
| 21 | -cd /data/saas-search | ||
| 22 | -./scripts/create_venv.sh | ||
| 23 | -source activate.sh | ||
| 24 | -``` | ||
| 25 | - | ||
| 26 | -如需运行本地 embedding / 图像编码服务(torch/transformers 等依赖较重): | ||
| 27 | - | ||
| 28 | -```bash | ||
| 29 | -cd /data/saas-search | ||
| 30 | -INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 31 | -source activate.sh | ||
| 32 | -``` | ||
| 33 | - | ||
| 34 | -日常使用: | ||
| 35 | - | ||
| 36 | -```bash | ||
| 37 | -cd /data/saas-search | ||
| 38 | -source activate.sh | ||
| 39 | -``` | ||
| 40 | - | ||
| 41 | -### 2.2 Conda(兼容旧流程) | ||
| 42 | - | ||
| 43 | -`activate.sh` 会在未发现 `.venv` 时回退激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行: | ||
| 44 | - | ||
| 45 | -```bash | ||
| 46 | -# 你的 conda 在 ~/anaconda3/bin/conda,则 CONDA_ROOT=~/anaconda3 | ||
| 47 | -export CONDA_ROOT=$HOME/anaconda3 # 或你的 Conda 安装路径(如 /home/ubuntu/anaconda3) | ||
| 48 | -source activate.sh | ||
| 49 | -``` | ||
| 50 | - | ||
| 51 | -**新机器首次部署(创建 Conda 环境)**:若本机尚未创建 `searchengine` 环境,任选其一: | ||
| 52 | - | ||
| 53 | -- **方式 A(推荐,与 environment.yml 一致)**: | ||
| 54 | - ```bash | ||
| 55 | - cd /data/saas-search | ||
| 56 | - export CONDA_ROOT=$HOME/anaconda3 # 或你的 Conda 安装路径 | ||
| 57 | - conda env create -f environment.yml | ||
| 58 | - source activate.sh | ||
| 59 | - ``` | ||
| 60 | -- **方式 B(仅 pip)**: | ||
| 61 | - ```bash | ||
| 62 | - conda create -n searchengine python=3.10 -y | ||
| 63 | - conda activate searchengine | ||
| 64 | - cd /data/saas-search | ||
| 65 | - pip install -r requirements.txt | ||
| 66 | - ``` | ||
| 67 | - | ||
| 68 | -之后日常使用执行 `source activate.sh` 即可(如需可先 `export CONDA_ROOT=...`)。 | ||
| 69 | - | ||
| 70 | ---- | ||
| 71 | - | ||
| 72 | -## 3. 外部服务与端口 | ||
| 73 | - | ||
| 74 | -| 服务 | 默认地址 | 说明 | | ||
| 75 | -|------|----------|------| | ||
| 76 | -| Elasticsearch | `http://localhost:9200` | 可通过 Docker 单节点启动 | | ||
| 77 | -| MySQL | `120.79.247.228:3316` | 存放店匠 SPU/SKU 数据 | | ||
| 78 | -| Redis(可选) | `localhost:6479` | Embedding/翻译缓存 | | ||
| 79 | - | ||
| 80 | -示例:使用 Docker 启动 Elasticsearch | ||
| 81 | - | ||
| 82 | -```bash | ||
| 83 | -docker run -d \ | ||
| 84 | - --name elasticsearch \ | ||
| 85 | - -p 9200:9200 \ | ||
| 86 | - -e "discovery.type=single-node" \ | ||
| 87 | - -e "ES_JAVA_OPTS=-Xms2g -Xmx2g" \ | ||
| 88 | - elasticsearch:8.11.0 | ||
| 89 | -``` | ||
| 90 | - | ||
| 91 | ---- | ||
| 92 | - | ||
| 93 | -## 4. 环境变量与 `.env` 模板 | ||
| 94 | - | ||
| 95 | -在项目根目录创建 `.env`,并根据环境替换敏感信息: | ||
| 96 | - | ||
| 97 | -```env | ||
| 98 | -# MySQL | ||
| 99 | -DB_HOST=120.79.247.228 | ||
| 100 | -DB_PORT=3316 | ||
| 101 | -DB_DATABASE=saas | ||
| 102 | -DB_USERNAME=saas | ||
| 103 | -DB_PASSWORD=P89cZHS5d7dFyc9R | ||
| 104 | - | ||
| 105 | -# Elasticsearch | ||
| 106 | -ES_HOST=http://localhost:9200 | ||
| 107 | -ES_USERNAME=saas | ||
| 108 | -ES_PASSWORD=4hOaLaf41y2VuI8y | ||
| 109 | - | ||
| 110 | -# Redis(可选) | ||
| 111 | -REDIS_HOST=localhost | ||
| 112 | -REDIS_PORT=6479 | ||
| 113 | -REDIS_PASSWORD=BMfv5aI31kgHWtlx | ||
| 114 | - | ||
| 115 | -# DeepL 翻译 | ||
| 116 | -DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | ||
| 117 | - | ||
| 118 | -# API | ||
| 119 | -API_HOST=0.0.0.0 | ||
| 120 | -API_PORT=6002 | ||
| 121 | -``` | ||
| 122 | - | ||
| 123 | ---- | ||
| 124 | - | ||
| 125 | -## 5. 服务凭证速查 | ||
| 126 | - | ||
| 127 | -| 项目 | 值 | | ||
| 128 | -|------|----| | ||
| 129 | -| **MySQL** | host `120.79.247.228`, port `3316`, user `saas`, password `P89cZHS5d7dFyc9R` | | ||
| 130 | -| **Elasticsearch** | host `http://localhost:9200`, user `saas`, password `4hOaLaf41y2VuI8y` | | ||
| 131 | -| **Redis(可选)** | host `localhost`, port `6479`, password `BMfv5aI31kgHWtlx` | | ||
| 132 | -| **DeepL** | `c9293ab4-ad25-479b-919f-ab4e63b429ed` | | ||
| 133 | - | ||
| 134 | -> 所有凭证仅用于本地/测试环境,生产环境需替换并妥善保管。 | ||
| 135 | - | ||
| 136 | ---- | ||
| 137 | - | ||
| 138 | -## 6. 店匠数据源说明 | ||
| 139 | - | ||
| 140 | -saas-search 以 MySQL 中的店匠标准表为权威数据源: | ||
| 141 | - | ||
| 142 | -- `shoplazza_product_spu`:SPU 商品主表 | ||
| 143 | -- `shoplazza_product_sku`:SKU 变体表 | ||
| 144 | - | ||
| 145 | -### `shoplazza_product_sku` 字段节选 | ||
| 146 | - | ||
| 147 | -| 字段 | 类型 | 描述 | | ||
| 148 | -|------|------|------| | ||
| 149 | -| `id` | bigint(20) | SKU 主键 | | ||
| 150 | -| `spu_id` | bigint(20) | 对应 SPU | | ||
| 151 | -| `shop_id` | bigint(20) | 店铺 ID | | ||
| 152 | -| `shoplazza_product_id` | varchar(64) | 店匠商品 ID | | ||
| 153 | -| `title` | varchar(500) | 变体标题 | | ||
| 154 | -| `sku` | varchar(100) | SKU 编码 | | ||
| 155 | -| `price` | decimal(10,2) | 售价 | | ||
| 156 | -| `compare_at_price` | decimal(10,2) | 原价 | | ||
| 157 | -| `option1/2/3` | varchar(255) | 颜色/尺码等选项 | | ||
| 158 | -| `inventory_quantity` | int(11) | 库存 | | ||
| 159 | -| `image_src` | varchar(500) | 图片 | | ||
| 160 | -| `tenant_id` | bigint(20) | 租户 | | ||
| 161 | -| `create_time` | datetime | 创建时间 | | ||
| 162 | -| `update_time` | datetime | 更新时间 | | ||
| 163 | -| `deleted` | bit(1) | 逻辑删除标记 | | ||
| 164 | - | ||
| 165 | -> 完整字段、索引映射与 ES 对应关系详见 `INDEX_FIELDS_DOCUMENTATION.md`。 | ||
| 166 | - | ||
| 167 | ---- | ||
| 168 | - | ||
| 169 | -## 7. 相关脚本 | ||
| 170 | - | ||
| 171 | -- **`activate.sh`**(项目根目录):激活 Conda 环境 `searchengine` 并加载 `.env`,**日常开发/部署以本脚本为准**。 | ||
| 172 | -- `scripts/mock_data.sh`:一次性生成 Tenant1 Mock + Tenant2 CSV 数据并导入 MySQL | ||
| 173 | -- `scripts/create_tenant_index.sh <tenant_id>`:创建租户索引结构 | ||
| 174 | -- `POST /indexer/reindex`:从 MySQL 导入到 Elasticsearch(推荐) | ||
| 175 | -- `run.sh` / `restart.sh`:服务启动/重启(统一走 `scripts/service_ctl.sh`) | ||
| 176 | -- `scripts/service_ctl.sh`:统一服务管理(start/stop/restart/status) | ||
| 177 | - | ||
| 178 | -**新机器部署**:若 Conda 未安装在默认路径(如 `/home/tw/miniconda3`),请在执行上述脚本前设置 `CONDA_ROOT`。例如你的 conda 是 `~/anaconda3/bin/conda`(即 `/home/ubuntu/anaconda3/bin/conda`),则设置:`export CONDA_ROOT=$HOME/anaconda3`。可将该行写入 `~/.bashrc` 或部署说明。 | ||
| 179 | - | ||
| 180 | -更多脚本参数、日志与验证命令参见 `Usage-Guide.md` 与 `测试数据指南.md`。 | 3 | +**本文内容已合并至 [QUICKSTART.md](./QUICKSTART.md) 第 1 节「快速上手」**,包括: |
| 181 | 4 | ||
| 5 | +- **1.4 系统要求**:操作系统、Python、内存与磁盘 | ||
| 6 | +- **1.5 Python 运行环境(详细)**:venv / Conda 创建与日常使用 | ||
| 7 | +- **1.6 外部服务与 .env(含生产凭证)**:MySQL / Redis / Elasticsearch 地址、端口、凭证及远程登录方式、`.env` 模板 | ||
| 8 | +- **1.7 店匠数据源说明**:SPU/SKU 表与字段节选 | ||
| 9 | +- **1.8 相关脚本**:activate.sh、create_venv、mock_data、create_tenant_index、run.sh 等 | ||
| 182 | 10 | ||
| 11 | +请直接查阅 [QUICKSTART.md §1.4–1.8](./QUICKSTART.md#14-系统要求)。 |
tests/ci/test_service_api_contracts.py
| @@ -4,6 +4,7 @@ from types import SimpleNamespace | @@ -4,6 +4,7 @@ from types import SimpleNamespace | ||
| 4 | from typing import Any, Dict, List | 4 | from typing import Any, Dict, List |
| 5 | 5 | ||
| 6 | import numpy as np | 6 | import numpy as np |
| 7 | +import pandas as pd | ||
| 7 | import pytest | 8 | import pytest |
| 8 | from fastapi.testclient import TestClient | 9 | from fastapi.testclient import TestClient |
| 9 | 10 | ||
| @@ -111,9 +112,31 @@ class _FakeTransformer: | @@ -111,9 +112,31 @@ class _FakeTransformer: | ||
| 111 | } | 112 | } |
| 112 | 113 | ||
| 113 | 114 | ||
| 115 | +class _FakeDbConnection: | ||
| 116 | + """Minimal fake for indexer health check: connect().execute(text('SELECT 1')).""" | ||
| 117 | + | ||
| 118 | + def __enter__(self): | ||
| 119 | + return self | ||
| 120 | + | ||
| 121 | + def __exit__(self, *args): | ||
| 122 | + pass | ||
| 123 | + | ||
| 124 | + def execute(self, stmt): | ||
| 125 | + pass | ||
| 126 | + | ||
| 127 | + | ||
| 128 | +class _FakeDbEngine: | ||
| 129 | + def connect(self): | ||
| 130 | + return _FakeDbConnection() | ||
| 131 | + | ||
| 132 | + | ||
| 114 | class _FakeIncrementalService: | 133 | class _FakeIncrementalService: |
| 134 | + def __init__(self): | ||
| 135 | + self.db_engine = _FakeDbEngine() | ||
| 136 | + self.category_id_to_name = {} | ||
| 137 | + | ||
| 115 | def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None): | 138 | def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None): |
| 116 | - return { | 139 | + out = { |
| 117 | "tenant_id": tenant_id, | 140 | "tenant_id": tenant_id, |
| 118 | "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids], | 141 | "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids], |
| 119 | "delete_spu_ids": [], | 142 | "delete_spu_ids": [], |
| @@ -121,10 +144,33 @@ class _FakeIncrementalService: | @@ -121,10 +144,33 @@ class _FakeIncrementalService: | ||
| 121 | "success_count": len(spu_ids), | 144 | "success_count": len(spu_ids), |
| 122 | "failed_count": 0, | 145 | "failed_count": 0, |
| 123 | } | 146 | } |
| 147 | + if delete_spu_ids: | ||
| 148 | + out["delete_spu_ids"] = [{"spu_id": s, "status": "deleted"} for s in delete_spu_ids] | ||
| 149 | + out["total"] += len(delete_spu_ids) | ||
| 150 | + out["success_count"] += len(delete_spu_ids) | ||
| 151 | + return out | ||
| 152 | + | ||
| 153 | + def get_spu_document(self, tenant_id: str, spu_id: str): | ||
| 154 | + return { | ||
| 155 | + "tenant_id": tenant_id, | ||
| 156 | + "spu_id": spu_id, | ||
| 157 | + "title": {"zh": "Fake doc"}, | ||
| 158 | + } | ||
| 124 | 159 | ||
| 125 | def _get_transformer_bundle(self, tenant_id: str): | 160 | def _get_transformer_bundle(self, tenant_id: str): |
| 126 | return _FakeTransformer(), None, False | 161 | return _FakeTransformer(), None, False |
| 127 | 162 | ||
| 163 | + def _load_spus_for_spu_ids(self, tenant_id: str, spu_ids: List[str], include_deleted: bool = False): | ||
| 164 | + if not spu_ids: | ||
| 165 | + return pd.DataFrame() | ||
| 166 | + return pd.DataFrame([{"id": int(s), "title": "Fake", "tenant_id": tenant_id} for s in spu_ids]) | ||
| 167 | + | ||
| 168 | + def _load_skus_for_spu_ids(self, tenant_id: str, spu_ids: List[str]): | ||
| 169 | + return pd.DataFrame() | ||
| 170 | + | ||
| 171 | + def _load_options_for_spu_ids(self, tenant_id: str, spu_ids: List[str]): | ||
| 172 | + return pd.DataFrame() | ||
| 173 | + | ||
| 128 | 174 | ||
| 129 | @pytest.fixture | 175 | @pytest.fixture |
| 130 | def indexer_client(monkeypatch): | 176 | def indexer_client(monkeypatch): |
| @@ -173,6 +219,153 @@ def test_indexer_build_docs_contract(indexer_client: TestClient): | @@ -173,6 +219,153 @@ def test_indexer_build_docs_contract(indexer_client: TestClient): | ||
| 173 | assert data["docs"][0]["spu_id"] == "1" | 219 | assert data["docs"][0]["spu_id"] == "1" |
| 174 | 220 | ||
| 175 | 221 | ||
| 222 | +def test_indexer_build_docs_from_db_contract(indexer_client: TestClient): | ||
| 223 | + """POST /indexer/build-docs-from-db: tenant_id + spu_ids, returns same shape as build-docs.""" | ||
| 224 | + response = indexer_client.post( | ||
| 225 | + "/indexer/build-docs-from-db", | ||
| 226 | + json={"tenant_id": "162", "spu_ids": ["1001", "1002"]}, | ||
| 227 | + ) | ||
| 228 | + assert response.status_code == 200 | ||
| 229 | + data = response.json() | ||
| 230 | + assert data["tenant_id"] == "162" | ||
| 231 | + assert "docs" in data | ||
| 232 | + assert data["success_count"] == 2 | ||
| 233 | + assert len(data["docs"]) == 2 | ||
| 234 | + assert data["docs"][0]["spu_id"] == "1001" | ||
| 235 | + | ||
| 236 | + | ||
| 237 | +def test_indexer_documents_contract(indexer_client: TestClient): | ||
| 238 | + """POST /indexer/documents: tenant_id + spu_ids, returns success/failed lists (no ES write).""" | ||
| 239 | + response = indexer_client.post( | ||
| 240 | + "/indexer/documents", | ||
| 241 | + json={"tenant_id": "162", "spu_ids": ["1001", "1002"]}, | ||
| 242 | + ) | ||
| 243 | + assert response.status_code == 200 | ||
| 244 | + data = response.json() | ||
| 245 | + assert "success" in data and "failed" in data | ||
| 246 | + assert data["total"] == 2 | ||
| 247 | + assert data["success_count"] == 2 | ||
| 248 | + assert data["failed_count"] == 0 | ||
| 249 | + assert len(data["success"]) == 2 | ||
| 250 | + assert data["success"][0]["spu_id"] == "1001" | ||
| 251 | + assert "document" in data["success"][0] | ||
| 252 | + assert data["success"][0]["document"]["title"]["zh"] == "Fake doc" | ||
| 253 | + | ||
| 254 | + | ||
| 255 | +def test_indexer_health_contract(indexer_client: TestClient): | ||
| 256 | + """GET /indexer/health: returns status and database/preloaded_data.""" | ||
| 257 | + response = indexer_client.get("/indexer/health") | ||
| 258 | + assert response.status_code == 200 | ||
| 259 | + data = response.json() | ||
| 260 | + assert "status" in data | ||
| 261 | + assert data["status"] in ("available", "unavailable", "error") | ||
| 262 | + assert "database" in data or "message" in data | ||
| 263 | + if "preloaded_data" in data: | ||
| 264 | + assert "category_mappings" in data["preloaded_data"] | ||
| 265 | + | ||
| 266 | + | ||
| 267 | +def test_indexer_incremental_with_delete_spu_ids(indexer_client: TestClient): | ||
| 268 | + """POST /indexer/index with delete_spu_ids: explicit delete path.""" | ||
| 269 | + response = indexer_client.post( | ||
| 270 | + "/indexer/index", | ||
| 271 | + json={ | ||
| 272 | + "tenant_id": "162", | ||
| 273 | + "spu_ids": ["1001"], | ||
| 274 | + "delete_spu_ids": ["2001", "2002"], | ||
| 275 | + }, | ||
| 276 | + ) | ||
| 277 | + assert response.status_code == 200 | ||
| 278 | + data = response.json() | ||
| 279 | + assert data["success_count"] == 3 | ||
| 280 | + assert len(data["spu_ids"]) == 1 | ||
| 281 | + assert len(data["delete_spu_ids"]) == 2 | ||
| 282 | + assert data["delete_spu_ids"][0]["status"] == "deleted" | ||
| 283 | + | ||
| 284 | + | ||
| 285 | +def test_indexer_index_validation_both_empty(indexer_client: TestClient): | ||
| 286 | + """POST /indexer/index: 400 when spu_ids and delete_spu_ids both empty.""" | ||
| 287 | + response = indexer_client.post( | ||
| 288 | + "/indexer/index", | ||
| 289 | + json={"tenant_id": "162", "spu_ids": [], "delete_spu_ids": []}, | ||
| 290 | + ) | ||
| 291 | + assert response.status_code == 400 | ||
| 292 | + | ||
| 293 | + | ||
| 294 | +def test_indexer_index_validation_max_spu_ids(indexer_client: TestClient): | ||
| 295 | + """POST /indexer/index: 400 when spu_ids > 100.""" | ||
| 296 | + response = indexer_client.post( | ||
| 297 | + "/indexer/index", | ||
| 298 | + json={"tenant_id": "162", "spu_ids": [str(i) for i in range(101)], "delete_spu_ids": []}, | ||
| 299 | + ) | ||
| 300 | + assert response.status_code == 400 | ||
| 301 | + | ||
| 302 | + | ||
| 303 | +def test_indexer_build_docs_validation_empty_items(indexer_client: TestClient): | ||
| 304 | + """POST /indexer/build-docs: 400 when items empty.""" | ||
| 305 | + response = indexer_client.post( | ||
| 306 | + "/indexer/build-docs", | ||
| 307 | + json={"tenant_id": "162", "items": []}, | ||
| 308 | + ) | ||
| 309 | + assert response.status_code == 400 | ||
| 310 | + | ||
| 311 | + | ||
| 312 | +def test_indexer_documents_validation_empty_spu_ids(indexer_client: TestClient): | ||
| 313 | + """POST /indexer/documents: 400 when spu_ids empty.""" | ||
| 314 | + response = indexer_client.post( | ||
| 315 | + "/indexer/documents", | ||
| 316 | + json={"tenant_id": "162", "spu_ids": []}, | ||
| 317 | + ) | ||
| 318 | + assert response.status_code == 400 | ||
| 319 | + | ||
| 320 | + | ||
| 321 | +def test_indexer_build_docs_from_db_validation_empty_spu_ids(indexer_client: TestClient): | ||
| 322 | + """POST /indexer/build-docs-from-db: 400 when spu_ids empty.""" | ||
| 323 | + response = indexer_client.post( | ||
| 324 | + "/indexer/build-docs-from-db", | ||
| 325 | + json={"tenant_id": "162", "spu_ids": []}, | ||
| 326 | + ) | ||
| 327 | + assert response.status_code == 400 | ||
| 328 | + | ||
| 329 | + | ||
| 330 | +def test_indexer_build_docs_validation_max_items(indexer_client: TestClient): | ||
| 331 | + """POST /indexer/build-docs: 400 when items > 200.""" | ||
| 332 | + response = indexer_client.post( | ||
| 333 | + "/indexer/build-docs", | ||
| 334 | + json={ | ||
| 335 | + "tenant_id": "162", | ||
| 336 | + "items": [{"spu": {"id": i, "title": "x"}, "skus": [], "options": []} for i in range(201)], | ||
| 337 | + }, | ||
| 338 | + ) | ||
| 339 | + assert response.status_code == 400 | ||
| 340 | + | ||
| 341 | + | ||
| 342 | +def test_indexer_build_docs_from_db_validation_max_spu_ids(indexer_client: TestClient): | ||
| 343 | + """POST /indexer/build-docs-from-db: 400 when spu_ids > 200.""" | ||
| 344 | + response = indexer_client.post( | ||
| 345 | + "/indexer/build-docs-from-db", | ||
| 346 | + json={"tenant_id": "162", "spu_ids": [str(i) for i in range(201)]}, | ||
| 347 | + ) | ||
| 348 | + assert response.status_code == 400 | ||
| 349 | + | ||
| 350 | + | ||
| 351 | +def test_indexer_documents_validation_max_spu_ids(indexer_client: TestClient): | ||
| 352 | + """POST /indexer/documents: 400 when spu_ids > 100.""" | ||
| 353 | + response = indexer_client.post( | ||
| 354 | + "/indexer/documents", | ||
| 355 | + json={"tenant_id": "162", "spu_ids": [str(i) for i in range(101)]}, | ||
| 356 | + ) | ||
| 357 | + assert response.status_code == 400 | ||
| 358 | + | ||
| 359 | + | ||
| 360 | +def test_indexer_index_validation_max_delete_spu_ids(indexer_client: TestClient): | ||
| 361 | + """POST /indexer/index: 400 when delete_spu_ids > 100.""" | ||
| 362 | + response = indexer_client.post( | ||
| 363 | + "/indexer/index", | ||
| 364 | + json={"tenant_id": "162", "spu_ids": [], "delete_spu_ids": [str(i) for i in range(101)]}, | ||
| 365 | + ) | ||
| 366 | + assert response.status_code == 400 | ||
| 367 | + | ||
| 368 | + | ||
| 176 | class _FakeTextModel: | 369 | class _FakeTextModel: |
| 177 | def encode_batch(self, texts, batch_size=32, device="cpu"): | 370 | def encode_batch(self, texts, batch_size=32, device="cpu"): |
| 178 | return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts] | 371 | return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts] |