Commit 38f530ff071f42c3bae325ebef73eb0080b4e87a
1 parent
d586fd1f
文档完善
Showing
9 changed files
with
933 additions
and
1004 deletions
Show diff stats
| @@ -11,9 +11,6 @@ REDIS_PASSWORD=BMfv5aI31kgHWtlx | @@ -11,9 +11,6 @@ REDIS_PASSWORD=BMfv5aI31kgHWtlx | ||
| 11 | # DeepL Translation API | 11 | # DeepL Translation API |
| 12 | DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | 12 | DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed |
| 13 | 13 | ||
| 14 | -# Customer Configuration | ||
| 15 | -CUSTOMER_ID=customer1 | ||
| 16 | - | ||
| 17 | # API Service Configuration | 14 | # API Service Configuration |
| 18 | API_HOST=0.0.0.0 | 15 | API_HOST=0.0.0.0 |
| 19 | API_PORT=6002 | 16 | API_PORT=6002 |
| @@ -26,8 +23,8 @@ DB_USERNAME=saas | @@ -26,8 +23,8 @@ DB_USERNAME=saas | ||
| 26 | DB_PASSWORD=P89cZHS5d7dFyc9R | 23 | DB_PASSWORD=P89cZHS5d7dFyc9R |
| 27 | 24 | ||
| 28 | # Model Directories | 25 | # Model Directories |
| 29 | -TEXT_MODEL_DIR=/data/tw/models/bge-m3 | ||
| 30 | -IMAGE_MODEL_DIR=/data/tw/models/cn-clip | 26 | +TEXT_MODEL_DIR=/data/tw/models/bge-m3 # 已经改为web请求了,不使用本地模型 |
| 27 | +IMAGE_MODEL_DIR=/data/tw/models/cn-clip # 已经改为web请求了,不使用本地模型 | ||
| 31 | 28 | ||
| 32 | # Cache Directory | 29 | # Cache Directory |
| 33 | CACHE_DIR=.cache | 30 | CACHE_DIR=.cache |
.env.example
| @@ -14,16 +14,13 @@ REDIS_PASSWORD= | @@ -14,16 +14,13 @@ REDIS_PASSWORD= | ||
| 14 | # DeepL Translation API | 14 | # DeepL Translation API |
| 15 | DEEPL_AUTH_KEY= | 15 | DEEPL_AUTH_KEY= |
| 16 | 16 | ||
| 17 | -# Customer Configuration | ||
| 18 | -CUSTOMER_ID=customer1 | ||
| 19 | - | ||
| 20 | # API Service Configuration | 17 | # API Service Configuration |
| 21 | API_HOST=0.0.0.0 | 18 | API_HOST=0.0.0.0 |
| 22 | API_PORT=6002 | 19 | API_PORT=6002 |
| 23 | 20 | ||
| 24 | # Embedding Models | 21 | # Embedding Models |
| 25 | -TEXT_MODEL_DIR=/data/tw/models/bge-m3 | ||
| 26 | -IMAGE_MODEL_DIR=/data/tw/models/cn-clip | 22 | +TEXT_MODEL_DIR=/data/tw/models/bge-m3 # 已经改为web请求了,不使用本地模型 |
| 23 | +IMAGE_MODEL_DIR=/data/tw/models/cn-clip # 已经改为web请求了,不使用本地模型 | ||
| 27 | 24 | ||
| 28 | # Cache Directory | 25 | # Cache Directory |
| 29 | CACHE_DIR=.cache | 26 | CACHE_DIR=.cache |
| @@ -0,0 +1,481 @@ | @@ -0,0 +1,481 @@ | ||
| 1 | +# 测试数据构造指南 - SearchEngine | ||
| 2 | + | ||
| 3 | +本文档说明如何构造测试数据,包括两种数据源的准备和导入流程。 | ||
| 4 | + | ||
| 5 | +## 目录 | ||
| 6 | + | ||
| 7 | +1. [数据说明](#数据说明) | ||
| 8 | +2. [构造Mock数据(tenant_id=1)](#构造mock数据tenant_id1) | ||
| 9 | +3. [从CSV导入数据(tenant_id=2)](#从csv导入数据tenant_id2) | ||
| 10 | +4. [从MySQL导入到Elasticsearch](#从mysql导入到elasticsearch) | ||
| 11 | +5. [完整工作流程](#完整工作流程) | ||
| 12 | +6. [常见问题](#常见问题) | ||
| 13 | + | ||
| 14 | +--- | ||
| 15 | + | ||
| 16 | +## 数据说明 | ||
| 17 | + | ||
| 18 | +系统支持两种测试数据源: | ||
| 19 | + | ||
| 20 | +1. **Tenant ID = 1**: 自动生成的Mock数据(使用 `generate_test_data.py` 生成) | ||
| 21 | +2. **Tenant ID = 2**: 从CSV文件导入的真实数据(使用 `import_tenant2_csv.py` 导入) | ||
| 22 | + | ||
| 23 | +### 数据表结构 | ||
| 24 | + | ||
| 25 | +系统使用店匠标准表结构: | ||
| 26 | + | ||
| 27 | +- **SPU表**: `shoplazza_product_spu` - 商品SPU数据 | ||
| 28 | +- **SKU表**: `shoplazza_product_sku` - 商品SKU数据 | ||
| 29 | + | ||
| 30 | +表结构详见 `INDEX_FIELDS_DOCUMENTATION.md`。 | ||
| 31 | + | ||
| 32 | +--- | ||
| 33 | + | ||
| 34 | +## 构造Mock数据(tenant_id=1) | ||
| 35 | + | ||
| 36 | +### 使用一键脚本(推荐) | ||
| 37 | + | ||
| 38 | +`mock_data.sh` 脚本会自动生成并导入 tenant_id=1 的Mock数据: | ||
| 39 | + | ||
| 40 | +```bash | ||
| 41 | +cd /home/tw/SearchEngine | ||
| 42 | +./scripts/mock_data.sh | ||
| 43 | +``` | ||
| 44 | + | ||
| 45 | +脚本会自动: | ||
| 46 | +- 生成 1000 个SPU的Mock数据 | ||
| 47 | +- 导入数据到MySQL | ||
| 48 | +- 自动计算起始ID,避免主键冲突 | ||
| 49 | + | ||
| 50 | +### 手动分步执行 | ||
| 51 | + | ||
| 52 | +如果需要自定义参数,可以分步执行: | ||
| 53 | + | ||
| 54 | +#### 步骤1: 生成Mock测试数据 | ||
| 55 | + | ||
| 56 | +```bash | ||
| 57 | +python scripts/generate_test_data.py \ | ||
| 58 | + --num-spus 1000 \ | ||
| 59 | + --tenant-id "1" \ | ||
| 60 | + --output test_data_tenant1.sql \ | ||
| 61 | + --db-host 120.79.247.228 \ | ||
| 62 | + --db-port 3316 \ | ||
| 63 | + --db-database saas \ | ||
| 64 | + --db-username saas \ | ||
| 65 | + --db-password <密码> | ||
| 66 | +``` | ||
| 67 | + | ||
| 68 | +参数说明: | ||
| 69 | +- `--num-spus`: 生成的SPU数量(默认:1000) | ||
| 70 | +- `--tenant-id`: 租户ID(默认:1) | ||
| 71 | +- `--output`: 输出的SQL文件路径 | ||
| 72 | +- `--db-host`, `--db-port`, `--db-database`, `--db-username`, `--db-password`: 数据库连接信息 | ||
| 73 | + | ||
| 74 | +#### 步骤2: 导入数据到MySQL | ||
| 75 | + | ||
| 76 | +```bash | ||
| 77 | +python scripts/import_test_data.py \ | ||
| 78 | + --db-host 120.79.247.228 \ | ||
| 79 | + --db-port 3316 \ | ||
| 80 | + --db-database saas \ | ||
| 81 | + --db-username saas \ | ||
| 82 | + --db-password <密码> \ | ||
| 83 | + --sql-file test_data_tenant1.sql \ | ||
| 84 | + --tenant-id "1" | ||
| 85 | +``` | ||
| 86 | + | ||
| 87 | +参数说明: | ||
| 88 | +- `--sql-file`: SQL文件路径 | ||
| 89 | +- `--tenant-id`: 租户ID(用于清理旧数据) | ||
| 90 | +- 其他参数:数据库连接信息 | ||
| 91 | + | ||
| 92 | +**注意**: 导入会先清理该 tenant_id 的旧数据,再导入新数据。 | ||
| 93 | + | ||
| 94 | +--- | ||
| 95 | + | ||
| 96 | +## 从CSV导入数据(tenant_id=2) | ||
| 97 | + | ||
| 98 | +### 使用一键脚本(推荐) | ||
| 99 | + | ||
| 100 | +`mock_data.sh` 脚本会自动从CSV文件导入 tenant_id=2 的数据: | ||
| 101 | + | ||
| 102 | +```bash | ||
| 103 | +cd /home/tw/SearchEngine | ||
| 104 | +./scripts/mock_data.sh | ||
| 105 | +``` | ||
| 106 | + | ||
| 107 | +**前提条件**: 确保CSV文件存在于以下路径: | ||
| 108 | +``` | ||
| 109 | +data/customer1/goods_with_pic.5years_congku.csv.shuf.1w | ||
| 110 | +``` | ||
| 111 | + | ||
| 112 | +如果CSV文件路径不同,需要修改 `scripts/mock_data.sh` 中的 `TENANT2_CSV_FILE` 变量。 | ||
| 113 | + | ||
| 114 | +### CSV文件格式要求 | ||
| 115 | + | ||
| 116 | +CSV文件需要包含以下列(列名不区分大小写): | ||
| 117 | + | ||
| 118 | +- `skuId` - SKU ID | ||
| 119 | +- `name` - 商品名称 | ||
| 120 | +- `name_pinyin` - 拼音(可选) | ||
| 121 | +- `create_time` - 创建时间(格式:YYYY-MM-DD HH:MM:SS) | ||
| 122 | +- `ruSkuName` - 俄文SKU名称(可选) | ||
| 123 | +- `enSpuName` - 英文SPU名称(可选) | ||
| 124 | +- `categoryName` - 类别名称 | ||
| 125 | +- `supplierName` - 供应商名称 | ||
| 126 | +- `brandName` - 品牌名称 | ||
| 127 | +- `file_id` - 文件ID(可选) | ||
| 128 | +- `days_since_last_update` - 更新天数(可选) | ||
| 129 | +- `id` - 商品ID(可选) | ||
| 130 | +- `imageUrl` - 图片URL(可选) | ||
| 131 | + | ||
| 132 | +### 手动分步执行 | ||
| 133 | + | ||
| 134 | +如果需要自定义参数,可以分步执行: | ||
| 135 | + | ||
| 136 | +#### 步骤1: 从CSV生成SQL文件 | ||
| 137 | + | ||
| 138 | +```bash | ||
| 139 | +python scripts/import_tenant2_csv.py \ | ||
| 140 | + --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \ | ||
| 141 | + --tenant-id "2" \ | ||
| 142 | + --output customer1_data.sql \ | ||
| 143 | + --db-host 120.79.247.228 \ | ||
| 144 | + --db-port 3316 \ | ||
| 145 | + --db-database saas \ | ||
| 146 | + --db-username saas \ | ||
| 147 | + --db-password <密码> | ||
| 148 | +``` | ||
| 149 | + | ||
| 150 | +参数说明: | ||
| 151 | +- `--csv-file`: CSV文件路径 | ||
| 152 | +- `--tenant-id`: 租户ID(默认:2) | ||
| 153 | +- `--output`: 输出的SQL文件路径 | ||
| 154 | +- 其他参数:数据库连接信息 | ||
| 155 | + | ||
| 156 | +#### 步骤2: 导入数据到MySQL | ||
| 157 | + | ||
| 158 | +```bash | ||
| 159 | +python scripts/import_test_data.py \ | ||
| 160 | + --db-host 120.79.247.228 \ | ||
| 161 | + --db-port 3316 \ | ||
| 162 | + --db-database saas \ | ||
| 163 | + --db-username saas \ | ||
| 164 | + --db-password <密码> \ | ||
| 165 | + --sql-file customer1_data.sql \ | ||
| 166 | + --tenant-id "2" | ||
| 167 | +``` | ||
| 168 | + | ||
| 169 | +**注意**: | ||
| 170 | +- CSV导入会先清理该 tenant_id 的旧数据,再导入新数据 | ||
| 171 | +- 脚本会自动计算起始ID,避免主键冲突 | ||
| 172 | + | ||
| 173 | +--- | ||
| 174 | + | ||
| 175 | +## 从MySQL导入到Elasticsearch | ||
| 176 | + | ||
| 177 | +数据导入到MySQL后,需要使用 `ingest.sh` 脚本将数据从MySQL导入到Elasticsearch。 | ||
| 178 | + | ||
| 179 | +### 基本用法 | ||
| 180 | + | ||
| 181 | +```bash | ||
| 182 | +./scripts/ingest.sh <tenant_id> [recreate_index] | ||
| 183 | +``` | ||
| 184 | + | ||
| 185 | +参数说明: | ||
| 186 | +- `tenant_id`: **必需**,租户ID,用于筛选数据库中的数据 | ||
| 187 | +- `recreate_index`: 可选,是否删除并重建索引(true/false,默认:false) | ||
| 188 | + | ||
| 189 | +### 使用示例 | ||
| 190 | + | ||
| 191 | +#### 重建索引并导入数据(推荐首次导入) | ||
| 192 | + | ||
| 193 | +```bash | ||
| 194 | +# 导入tenant_id=1的数据并重建索引 | ||
| 195 | +./scripts/ingest.sh 1 true | ||
| 196 | + | ||
| 197 | +# 导入tenant_id=2的数据并重建索引 | ||
| 198 | +./scripts/ingest.sh 2 true | ||
| 199 | +``` | ||
| 200 | + | ||
| 201 | +#### 增量导入(不重建索引) | ||
| 202 | + | ||
| 203 | +```bash | ||
| 204 | +# 增量导入tenant_id=1的数据 | ||
| 205 | +./scripts/ingest.sh 1 false | ||
| 206 | + | ||
| 207 | +# 增量导入tenant_id=2的数据 | ||
| 208 | +./scripts/ingest.sh 2 false | ||
| 209 | +``` | ||
| 210 | + | ||
| 211 | +### 手动执行 | ||
| 212 | + | ||
| 213 | +如果需要自定义参数,可以手动执行: | ||
| 214 | + | ||
| 215 | +```bash | ||
| 216 | +python scripts/ingest_shoplazza.py \ | ||
| 217 | + --db-host 120.79.247.228 \ | ||
| 218 | + --db-port 3316 \ | ||
| 219 | + --db-database saas \ | ||
| 220 | + --db-username saas \ | ||
| 221 | + --db-password <密码> \ | ||
| 222 | + --tenant-id 1 \ | ||
| 223 | + --es-host http://localhost:9200 \ | ||
| 224 | + --recreate \ | ||
| 225 | + --batch-size 500 | ||
| 226 | +``` | ||
| 227 | + | ||
| 228 | +参数说明: | ||
| 229 | +- `--db-host`, `--db-port`, `--db-database`, `--db-username`, `--db-password`: MySQL连接信息 | ||
| 230 | +- `--tenant-id`: 租户ID(必需) | ||
| 231 | +- `--es-host`: Elasticsearch地址 | ||
| 232 | +- `--recreate`: 是否重建索引 | ||
| 233 | +- `--batch-size`: 批量处理大小(默认:500) | ||
| 234 | + | ||
| 235 | +### 检查可用的 tenant_id | ||
| 236 | + | ||
| 237 | +如果导入时显示 "No documents to index",脚本会自动显示调试信息,包括: | ||
| 238 | +- 该 tenant_id 的统计信息(总数、活跃数、已删除数) | ||
| 239 | +- 数据库中存在的其他 tenant_id 列表 | ||
| 240 | + | ||
| 241 | +也可以直接查询数据库: | ||
| 242 | + | ||
| 243 | +```sql | ||
| 244 | +-- 查看有哪些 tenant_id | ||
| 245 | +SELECT tenant_id, COUNT(*) as count, | ||
| 246 | + SUM(CASE WHEN deleted = 0 THEN 1 ELSE 0 END) as active | ||
| 247 | +FROM shoplazza_product_spu | ||
| 248 | +GROUP BY tenant_id; | ||
| 249 | + | ||
| 250 | +-- 检查特定 tenant_id 的数据 | ||
| 251 | +SELECT COUNT(*) FROM shoplazza_product_spu | ||
| 252 | +WHERE tenant_id = 2 AND deleted = 0; | ||
| 253 | +``` | ||
| 254 | + | ||
| 255 | +**注意**: | ||
| 256 | +- 只有 `deleted=0` 的记录会被导入 | ||
| 257 | +- 首次运行会下载模型文件(BGE-M3和CN-CLIP),大约需要10-30分钟 | ||
| 258 | +- 确保MySQL中存在对应 tenant_id 的数据 | ||
| 259 | + | ||
| 260 | +--- | ||
| 261 | + | ||
| 262 | +## 完整工作流程 | ||
| 263 | + | ||
| 264 | +### 完整示例:构造并导入所有测试数据 | ||
| 265 | + | ||
| 266 | +```bash | ||
| 267 | +# 1. 构造并导入 tenant_id=1 的Mock数据到MySQL | ||
| 268 | +./scripts/mock_data.sh | ||
| 269 | + | ||
| 270 | +# 脚本会自动完成: | ||
| 271 | +# - 生成 tenant_id=1 的Mock数据(1000个SPU) | ||
| 272 | +# - 从CSV导入 tenant_id=2 的数据 | ||
| 273 | +# - 导入数据到MySQL | ||
| 274 | + | ||
| 275 | +# 2. 从MySQL导入 tenant_id=1 的数据到ES | ||
| 276 | +./scripts/ingest.sh 1 true | ||
| 277 | + | ||
| 278 | +# 3. 从MySQL导入 tenant_id=2 的数据到ES | ||
| 279 | +./scripts/ingest.sh 2 true | ||
| 280 | + | ||
| 281 | +# 4. 验证数据导入 | ||
| 282 | +curl http://localhost:9200/search_products/_count | ||
| 283 | +``` | ||
| 284 | + | ||
| 285 | +### 分步执行示例 | ||
| 286 | + | ||
| 287 | +如果需要更细粒度的控制,可以分步执行: | ||
| 288 | + | ||
| 289 | +```bash | ||
| 290 | +# ===== Part 1: 构造 tenant_id=1 的Mock数据 ===== | ||
| 291 | + | ||
| 292 | +# 1.1 生成Mock数据 | ||
| 293 | +python scripts/generate_test_data.py \ | ||
| 294 | + --num-spus 1000 \ | ||
| 295 | + --tenant-id "1" \ | ||
| 296 | + --output test_data_tenant1.sql \ | ||
| 297 | + --db-host 120.79.247.228 \ | ||
| 298 | + --db-port 3316 \ | ||
| 299 | + --db-database saas \ | ||
| 300 | + --db-username saas \ | ||
| 301 | + --db-password <密码> | ||
| 302 | + | ||
| 303 | +# 1.2 导入到MySQL | ||
| 304 | +python scripts/import_test_data.py \ | ||
| 305 | + --db-host 120.79.247.228 \ | ||
| 306 | + --db-port 3316 \ | ||
| 307 | + --db-database saas \ | ||
| 308 | + --db-username saas \ | ||
| 309 | + --db-password <密码> \ | ||
| 310 | + --sql-file test_data_tenant1.sql \ | ||
| 311 | + --tenant-id "1" | ||
| 312 | + | ||
| 313 | +# ===== Part 2: 从CSV导入 tenant_id=2 的数据 ===== | ||
| 314 | + | ||
| 315 | +# 2.1 从CSV生成SQL | ||
| 316 | +python scripts/import_tenant2_csv.py \ | ||
| 317 | + --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \ | ||
| 318 | + --tenant-id "2" \ | ||
| 319 | + --output customer1_data.sql \ | ||
| 320 | + --db-host 120.79.247.228 \ | ||
| 321 | + --db-port 3316 \ | ||
| 322 | + --db-database saas \ | ||
| 323 | + --db-username saas \ | ||
| 324 | + --db-password <密码> | ||
| 325 | + | ||
| 326 | +# 2.2 导入到MySQL | ||
| 327 | +python scripts/import_test_data.py \ | ||
| 328 | + --db-host 120.79.247.228 \ | ||
| 329 | + --db-port 3316 \ | ||
| 330 | + --db-database saas \ | ||
| 331 | + --db-username saas \ | ||
| 332 | + --db-password <密码> \ | ||
| 333 | + --sql-file customer1_data.sql \ | ||
| 334 | + --tenant-id "2" | ||
| 335 | + | ||
| 336 | +# ===== Part 3: 从MySQL导入到ES ===== | ||
| 337 | + | ||
| 338 | +# 3.1 导入 tenant_id=1 的数据到ES | ||
| 339 | +./scripts/ingest.sh 1 true | ||
| 340 | + | ||
| 341 | +# 3.2 导入 tenant_id=2 的数据到ES | ||
| 342 | +./scripts/ingest.sh 2 true | ||
| 343 | + | ||
| 344 | +# ===== Part 4: 验证 ===== | ||
| 345 | + | ||
| 346 | +# 4.1 检查ES中的数据量 | ||
| 347 | +curl http://localhost:9200/search_products/_count | ||
| 348 | + | ||
| 349 | +# 4.2 测试搜索 | ||
| 350 | +curl -X POST http://localhost:6002/search/ \ | ||
| 351 | + -H "Content-Type: application/json" \ | ||
| 352 | + -H "X-Tenant-ID: 1" \ | ||
| 353 | + -d '{"query": "玩具", "size": 10}' | ||
| 354 | +``` | ||
| 355 | + | ||
| 356 | +--- | ||
| 357 | + | ||
| 358 | +## 常见问题 | ||
| 359 | + | ||
| 360 | +### Q1: 数据导入失败 | ||
| 361 | + | ||
| 362 | +**症状**: `Error during data ingestion` | ||
| 363 | + | ||
| 364 | +**解决方案**: | ||
| 365 | +```bash | ||
| 366 | +# 检查MySQL数据是否存在 | ||
| 367 | +mysql -h 120.79.247.228 -P 3316 -u saas -p saas -e \ | ||
| 368 | + "SELECT COUNT(*) FROM shoplazza_product_spu WHERE tenant_id=1" | ||
| 369 | + | ||
| 370 | +# 检查ES索引是否存在 | ||
| 371 | +curl http://localhost:9200/search_products | ||
| 372 | + | ||
| 373 | +# 查看详细错误日志 | ||
| 374 | +python scripts/ingest_shoplazza.py --tenant-id 1 --recreate | ||
| 375 | +``` | ||
| 376 | + | ||
| 377 | +### Q2: CSV文件找不到 | ||
| 378 | + | ||
| 379 | +**症状**: `ERROR: CSV file not found` | ||
| 380 | + | ||
| 381 | +**解决方案**: | ||
| 382 | +```bash | ||
| 383 | +# 检查CSV文件是否存在 | ||
| 384 | +ls -lh data/customer1/goods_with_pic.5years_congku.csv.shuf.1w | ||
| 385 | + | ||
| 386 | +# 如果路径不同,修改 scripts/mock_data.sh 中的 TENANT2_CSV_FILE 变量 | ||
| 387 | +``` | ||
| 388 | + | ||
| 389 | +### Q3: 导入时没有数据 | ||
| 390 | + | ||
| 391 | +**症状**: `WARNING: No documents to index` 或 `Transformed 0 SPU documents` | ||
| 392 | + | ||
| 393 | +**可能原因**: | ||
| 394 | +1. 数据库中不存在该 tenant_id 的数据 | ||
| 395 | +2. 数据都被标记为 `deleted=1` | ||
| 396 | +3. tenant_id 类型不匹配 | ||
| 397 | + | ||
| 398 | +**解决步骤**: | ||
| 399 | + | ||
| 400 | +1. **查看调试信息**: 脚本会自动显示调试信息,包括: | ||
| 401 | + ``` | ||
| 402 | + DEBUG: tenant_id=1000: total=0, active=0, deleted=0 | ||
| 403 | + DEBUG: Available tenant_ids in shoplazza_product_spu: | ||
| 404 | + tenant_id=1: total=100, active=100 | ||
| 405 | + tenant_id=2: total=50, active=50 | ||
| 406 | + ``` | ||
| 407 | + | ||
| 408 | +2. **检查数据库**: 直接查询MySQL确认数据 | ||
| 409 | + ```sql | ||
| 410 | + -- 查看有哪些 tenant_id | ||
| 411 | + SELECT tenant_id, COUNT(*) as count, | ||
| 412 | + SUM(CASE WHEN deleted = 0 THEN 1 ELSE 0 END) as active | ||
| 413 | + FROM shoplazza_product_spu | ||
| 414 | + GROUP BY tenant_id; | ||
| 415 | + | ||
| 416 | + -- 检查特定 tenant_id 的数据 | ||
| 417 | + SELECT COUNT(*) FROM shoplazza_product_spu | ||
| 418 | + WHERE tenant_id = 2 AND deleted = 0; | ||
| 419 | + ``` | ||
| 420 | + | ||
| 421 | +3. **如果数据库中没有数据,需要先导入数据**: | ||
| 422 | + - 如果有CSV文件,使用CSV导入脚本 | ||
| 423 | + - 如果没有CSV文件,可以使用mock数据生成脚本 | ||
| 424 | + | ||
| 425 | +4. **使用正确的 tenant_id**: 根据调试信息显示的可用 tenant_id,使用正确的值重新导入 | ||
| 426 | + ```bash | ||
| 427 | + ./scripts/ingest.sh 2 true # 使用调试信息中显示的 tenant_id | ||
| 428 | + ``` | ||
| 429 | + | ||
| 430 | +### Q4: 模型下载慢或失败 | ||
| 431 | + | ||
| 432 | +**症状**: 首次运行时模型下载很慢或超时 | ||
| 433 | + | ||
| 434 | +**解决方案**: | ||
| 435 | +```bash | ||
| 436 | +# 跳过embedding快速测试(不推荐,但可以快速验证流程) | ||
| 437 | +# 注意:这会导致搜索功能不完整 | ||
| 438 | + | ||
| 439 | +# 或手动下载模型到指定目录 | ||
| 440 | +# TEXT_MODEL_DIR=/data/tw/models/bge-m3 | ||
| 441 | +# IMAGE_MODEL_DIR=/data/tw/models/cn-clip | ||
| 442 | +``` | ||
| 443 | + | ||
| 444 | +### Q5: 内存不足 | ||
| 445 | + | ||
| 446 | +**症状**: `Out of memory` | ||
| 447 | + | ||
| 448 | +**解决方案**: | ||
| 449 | +```bash | ||
| 450 | +# 减少批量大小 | ||
| 451 | +python scripts/ingest_shoplazza.py \ | ||
| 452 | + --tenant-id 1 \ | ||
| 453 | + --batch-size 200 # 默认500,可以减少到100-200 | ||
| 454 | +``` | ||
| 455 | + | ||
| 456 | +### Q6: 主键冲突 | ||
| 457 | + | ||
| 458 | +**症状**: `Duplicate entry` 错误 | ||
| 459 | + | ||
| 460 | +**解决方案**: | ||
| 461 | +- Mock数据脚本会自动计算起始ID,避免冲突 | ||
| 462 | +- 如果仍有冲突,可以手动清理旧数据: | ||
| 463 | + ```sql | ||
| 464 | + DELETE FROM shoplazza_product_spu WHERE tenant_id = 1; | ||
| 465 | + DELETE FROM shoplazza_product_sku WHERE tenant_id = 1; | ||
| 466 | + ``` | ||
| 467 | + | ||
| 468 | +--- | ||
| 469 | + | ||
| 470 | +## 相关文档 | ||
| 471 | + | ||
| 472 | +- **使用文档**: `USAGE_GUIDE.md` - 环境、启动、配置、日志查看 | ||
| 473 | +- **字段说明文档**: `INDEX_FIELDS_DOCUMENTATION.md` - 索引字段详细说明 | ||
| 474 | +- **API接口文档**: `API_INTEGRATION_GUIDE.md` - 完整的API对接指南 | ||
| 475 | +- **README**: `README.md` - 项目概述和快速开始 | ||
| 476 | + | ||
| 477 | +--- | ||
| 478 | + | ||
| 479 | +**文档版本**: v2.0 | ||
| 480 | +**最后更新**: 2024-12 | ||
| 481 | + |
| @@ -0,0 +1,441 @@ | @@ -0,0 +1,441 @@ | ||
| 1 | +# 使用指南 - SearchEngine | ||
| 2 | + | ||
| 3 | +本文档提供完整的使用指南,包括环境准备、服务启动、配置说明、日志查看等。 | ||
| 4 | + | ||
| 5 | +## 目录 | ||
| 6 | + | ||
| 7 | +1. [环境准备](#环境准备) | ||
| 8 | +2. [服务启动](#服务启动) | ||
| 9 | +3. [配置说明](#配置说明) | ||
| 10 | +4. [查看日志](#查看日志) | ||
| 11 | +5. [测试验证](#测试验证) | ||
| 12 | +6. [常见问题](#常见问题) | ||
| 13 | + | ||
| 14 | +--- | ||
| 15 | + | ||
| 16 | +## 环境准备 | ||
| 17 | + | ||
| 18 | +### 系统要求 | ||
| 19 | + | ||
| 20 | +- **操作系统**: Linux (推荐 CentOS 7+ / Ubuntu 18.04+) | ||
| 21 | +- **Python**: 3.8+ | ||
| 22 | +- **内存**: 建议 8GB+ | ||
| 23 | +- **磁盘**: 10GB+ (包含模型文件) | ||
| 24 | +- **Elasticsearch**: 8.x (可通过Docker运行) | ||
| 25 | + | ||
| 26 | +### 安装依赖 | ||
| 27 | + | ||
| 28 | +#### 1. 安装Python依赖 | ||
| 29 | + | ||
| 30 | +```bash | ||
| 31 | +cd /home/tw/SearchEngine | ||
| 32 | +pip install -r requirements.txt | ||
| 33 | +``` | ||
| 34 | + | ||
| 35 | +#### 2. 启动Elasticsearch | ||
| 36 | + | ||
| 37 | +**方式1: 使用Docker(推荐)** | ||
| 38 | + | ||
| 39 | +```bash | ||
| 40 | +docker run -d \ | ||
| 41 | + --name elasticsearch \ | ||
| 42 | + -p 9200:9200 \ | ||
| 43 | + -e "discovery.type=single-node" \ | ||
| 44 | + -e "ES_JAVA_OPTS=-Xms2g -Xmx2g" \ | ||
| 45 | + elasticsearch:8.11.0 | ||
| 46 | +``` | ||
| 47 | + | ||
| 48 | +**方式2: 本地安装** | ||
| 49 | + | ||
| 50 | +参考 [Elasticsearch官方文档](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/install-elasticsearch.html) | ||
| 51 | + | ||
| 52 | +#### 3. 配置环境变量 | ||
| 53 | + | ||
| 54 | +创建 `.env` 文件: | ||
| 55 | + | ||
| 56 | +```bash | ||
| 57 | +# MySQL配置 | ||
| 58 | +DB_HOST=120.79.247.228 | ||
| 59 | +DB_PORT=3316 | ||
| 60 | +DB_DATABASE=saas | ||
| 61 | +DB_USERNAME=saas | ||
| 62 | +DB_PASSWORD=your_password | ||
| 63 | + | ||
| 64 | +# Elasticsearch配置 | ||
| 65 | +ES_HOST=http://localhost:9200 | ||
| 66 | +ES_USERNAME=essa | ||
| 67 | +ES_PASSWORD=4hOaLaf41y2VuI8y | ||
| 68 | + | ||
| 69 | +# Redis配置(可选,用于缓存) | ||
| 70 | +REDIS_HOST=localhost | ||
| 71 | +REDIS_PORT=6479 | ||
| 72 | +REDIS_PASSWORD=BMfv5aI31kgHWtlx | ||
| 73 | + | ||
| 74 | +# DeepL翻译API(可选) | ||
| 75 | +DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | ||
| 76 | + | ||
| 77 | +# API服务配置 | ||
| 78 | +API_HOST=0.0.0.0 | ||
| 79 | +API_PORT=6002 | ||
| 80 | +``` | ||
| 81 | + | ||
| 82 | +--- | ||
| 83 | + | ||
| 84 | +## 服务启动 | ||
| 85 | + | ||
| 86 | +### 方式1: 一键启动(推荐) | ||
| 87 | + | ||
| 88 | +```bash | ||
| 89 | +cd /home/tw/SearchEngine | ||
| 90 | +./run.sh | ||
| 91 | +``` | ||
| 92 | + | ||
| 93 | +这个脚本会自动: | ||
| 94 | +1. 创建日志目录 | ||
| 95 | +2. 启动后端API服务(后台运行) | ||
| 96 | +3. 启动前端Web界面(后台运行) | ||
| 97 | +4. 等待服务就绪 | ||
| 98 | + | ||
| 99 | +启动完成后,访问: | ||
| 100 | +- **前端界面**: http://localhost:6003 | ||
| 101 | +- **后端API**: http://localhost:6002 | ||
| 102 | +- **API文档**: http://localhost:6002/docs | ||
| 103 | + | ||
| 104 | +### 方式2: 分步启动 | ||
| 105 | + | ||
| 106 | +#### 启动后端服务 | ||
| 107 | + | ||
| 108 | +```bash | ||
| 109 | +./scripts/start_backend.sh | ||
| 110 | +``` | ||
| 111 | + | ||
| 112 | +后端API会在 http://localhost:6002 启动 | ||
| 113 | + | ||
| 114 | +#### 启动前端服务 | ||
| 115 | + | ||
| 116 | +```bash | ||
| 117 | +./scripts/start_frontend.sh | ||
| 118 | +``` | ||
| 119 | + | ||
| 120 | +前端界面会在 http://localhost:6003 启动 | ||
| 121 | + | ||
| 122 | +### 方式3: 手动启动 | ||
| 123 | + | ||
| 124 | +#### 启动后端API服务 | ||
| 125 | + | ||
| 126 | +```bash | ||
| 127 | +python -m api.app \ | ||
| 128 | + --host 0.0.0.0 \ | ||
| 129 | + --port 6002 \ | ||
| 130 | + --es-host http://localhost:9200 \ | ||
| 131 | + --reload | ||
| 132 | +``` | ||
| 133 | + | ||
| 134 | +#### 启动前端服务(可选) | ||
| 135 | + | ||
| 136 | +```bash | ||
| 137 | +# 使用Python简单HTTP服务器 | ||
| 138 | +cd frontend | ||
| 139 | +python -m http.server 6003 | ||
| 140 | +``` | ||
| 141 | + | ||
| 142 | +### 停止服务 | ||
| 143 | + | ||
| 144 | +```bash | ||
| 145 | +# 停止后端 | ||
| 146 | +kill $(cat logs/backend.pid) | ||
| 147 | + | ||
| 148 | +# 停止前端 | ||
| 149 | +kill $(cat logs/frontend.pid) | ||
| 150 | + | ||
| 151 | +# 或使用停止脚本 | ||
| 152 | +./scripts/stop.sh | ||
| 153 | +``` | ||
| 154 | + | ||
| 155 | +### 服务端口 | ||
| 156 | + | ||
| 157 | +| 服务 | 端口 | URL | | ||
| 158 | +|------|------|-----| | ||
| 159 | +| Elasticsearch | 9200 | http://localhost:9200 | | ||
| 160 | +| Backend API | 6002 | http://localhost:6002 | | ||
| 161 | +| Frontend Web | 6003 | http://localhost:6003 | | ||
| 162 | +| API Docs | 6002 | http://localhost:6002/docs | | ||
| 163 | + | ||
| 164 | +--- | ||
| 165 | + | ||
| 166 | +## 配置说明 | ||
| 167 | + | ||
| 168 | +### 环境配置文件 (.env) | ||
| 169 | + | ||
| 170 | +主要配置项说明: | ||
| 171 | + | ||
| 172 | +```bash | ||
| 173 | +# Elasticsearch配置 | ||
| 174 | +ES_HOST=http://localhost:9200 | ||
| 175 | +ES_USERNAME=essa | ||
| 176 | +ES_PASSWORD=4hOaLaf41y2VuI8y | ||
| 177 | + | ||
| 178 | +# MySQL配置 | ||
| 179 | +DB_HOST=120.79.247.228 | ||
| 180 | +DB_PORT=3316 | ||
| 181 | +DB_DATABASE=saas | ||
| 182 | +DB_USERNAME=saas | ||
| 183 | +DB_PASSWORD=your_password | ||
| 184 | + | ||
| 185 | +# Redis配置(可选,用于缓存) | ||
| 186 | +REDIS_HOST=localhost | ||
| 187 | +REDIS_PORT=6479 | ||
| 188 | +REDIS_PASSWORD=BMfv5aI31kgHWtlx | ||
| 189 | + | ||
| 190 | +# DeepL翻译API | ||
| 191 | +DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | ||
| 192 | + | ||
| 193 | +# API服务配置 | ||
| 194 | +API_HOST=0.0.0.0 | ||
| 195 | +API_PORT=6002 | ||
| 196 | +``` | ||
| 197 | + | ||
| 198 | +### 修改配置 | ||
| 199 | + | ||
| 200 | +1. 编辑 `.env` 文件 | ||
| 201 | +2. 重启相关服务 | ||
| 202 | + | ||
| 203 | +--- | ||
| 204 | + | ||
| 205 | +## 查看日志 | ||
| 206 | + | ||
| 207 | +### 日志文件位置 | ||
| 208 | + | ||
| 209 | +日志文件存储在 `logs/` 目录下: | ||
| 210 | + | ||
| 211 | +- `logs/backend.log` - 后端服务日志 | ||
| 212 | +- `logs/frontend.log` - 前端服务日志 | ||
| 213 | +- `logs/search_engine.log` - 应用主日志(按天轮转) | ||
| 214 | +- `logs/errors.log` - 错误日志(按天轮转) | ||
| 215 | + | ||
| 216 | +### 查看实时日志 | ||
| 217 | + | ||
| 218 | +```bash | ||
| 219 | +# 查看后端日志 | ||
| 220 | +tail -f logs/backend.log | ||
| 221 | + | ||
| 222 | +# 查看前端日志 | ||
| 223 | +tail -f logs/frontend.log | ||
| 224 | + | ||
| 225 | +# 查看应用主日志 | ||
| 226 | +tail -f logs/search_engine.log | ||
| 227 | + | ||
| 228 | +# 查看错误日志 | ||
| 229 | +tail -f logs/errors.log | ||
| 230 | +``` | ||
| 231 | + | ||
| 232 | +### 日志级别 | ||
| 233 | + | ||
| 234 | +日志级别可以通过环境变量 `LOG_LEVEL` 设置: | ||
| 235 | + | ||
| 236 | +```bash | ||
| 237 | +# 在 .env 文件中设置 | ||
| 238 | +LOG_LEVEL=DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL | ||
| 239 | +``` | ||
| 240 | + | ||
| 241 | +### 日志轮转 | ||
| 242 | + | ||
| 243 | +日志文件按天自动轮转,保留30天的历史日志。 | ||
| 244 | + | ||
| 245 | +--- | ||
| 246 | + | ||
| 247 | +## 测试验证 | ||
| 248 | + | ||
| 249 | +### 1. 健康检查 | ||
| 250 | + | ||
| 251 | +```bash | ||
| 252 | +curl http://localhost:6002/admin/health | ||
| 253 | +``` | ||
| 254 | + | ||
| 255 | +**预期响应**: | ||
| 256 | +```json | ||
| 257 | +{ | ||
| 258 | + "status": "healthy", | ||
| 259 | + "elasticsearch": "connected" | ||
| 260 | +} | ||
| 261 | +``` | ||
| 262 | + | ||
| 263 | +### 2. 索引统计 | ||
| 264 | + | ||
| 265 | +```bash | ||
| 266 | +curl http://localhost:6002/admin/stats | ||
| 267 | +``` | ||
| 268 | + | ||
| 269 | +### 3. 简单搜索测试 | ||
| 270 | + | ||
| 271 | +```bash | ||
| 272 | +curl -X POST http://localhost:6002/search/ \ | ||
| 273 | + -H "Content-Type: application/json" \ | ||
| 274 | + -H "X-Tenant-ID: 2" \ | ||
| 275 | + -d '{ | ||
| 276 | + "query": "玩具", | ||
| 277 | + "size": 10 | ||
| 278 | + }' | ||
| 279 | +``` | ||
| 280 | + | ||
| 281 | +或者通过查询参数: | ||
| 282 | + | ||
| 283 | +```bash | ||
| 284 | +curl -X POST "http://localhost:6002/search/?tenant_id=2" \ | ||
| 285 | + -H "Content-Type: application/json" \ | ||
| 286 | + -d '{ | ||
| 287 | + "query": "玩具", | ||
| 288 | + "size": 10 | ||
| 289 | + }' | ||
| 290 | +``` | ||
| 291 | + | ||
| 292 | +### 4. 带过滤器的搜索 | ||
| 293 | + | ||
| 294 | +```bash | ||
| 295 | +curl -X POST http://localhost:6002/search/ \ | ||
| 296 | + -H "Content-Type: application/json" \ | ||
| 297 | + -H "X-Tenant-ID: 2" \ | ||
| 298 | + -d '{ | ||
| 299 | + "query": "玩具", | ||
| 300 | + "size": 10, | ||
| 301 | + "filters": { | ||
| 302 | + "categoryName_keyword": ["玩具", "益智玩具"] | ||
| 303 | + }, | ||
| 304 | + "range_filters": { | ||
| 305 | + "price": {"gte": 50, "lte": 200} | ||
| 306 | + } | ||
| 307 | + }' | ||
| 308 | +``` | ||
| 309 | + | ||
| 310 | +### 5. 分面搜索测试 | ||
| 311 | + | ||
| 312 | +```bash | ||
| 313 | +curl -X POST http://localhost:6002/search/ \ | ||
| 314 | + -H "Content-Type: application/json" \ | ||
| 315 | + -H "X-Tenant-ID: 2" \ | ||
| 316 | + -d '{ | ||
| 317 | + "query": "玩具", | ||
| 318 | + "size": 10, | ||
| 319 | + "facets": [ | ||
| 320 | + {"field": "categoryName_keyword", "size": 15}, | ||
| 321 | + {"field": "brandName_keyword", "size": 15} | ||
| 322 | + ] | ||
| 323 | + }' | ||
| 324 | +``` | ||
| 325 | + | ||
| 326 | +### 6. 图片搜索测试 | ||
| 327 | + | ||
| 328 | +```bash | ||
| 329 | +curl -X POST http://localhost:6002/search/image \ | ||
| 330 | + -H "Content-Type: application/json" \ | ||
| 331 | + -H "X-Tenant-ID: 2" \ | ||
| 332 | + -d '{ | ||
| 333 | + "image_url": "https://oss.essa.cn/example.jpg", | ||
| 334 | + "size": 10 | ||
| 335 | + }' | ||
| 336 | +``` | ||
| 337 | + | ||
| 338 | +### 7. 前端界面测试 | ||
| 339 | + | ||
| 340 | +访问 http://localhost:6003 或 http://localhost:6002/ 进行可视化测试。 | ||
| 341 | + | ||
| 342 | +**注意**: 所有搜索接口都需要通过 `X-Tenant-ID` 请求头或 `tenant_id` 查询参数指定租户ID。 | ||
| 343 | + | ||
| 344 | +--- | ||
| 345 | + | ||
| 346 | +## 常见问题 | ||
| 347 | + | ||
| 348 | +### Q1: MySQL连接失败 | ||
| 349 | + | ||
| 350 | +**症状**: `Failed to connect to MySQL` | ||
| 351 | + | ||
| 352 | +**解决方案**: | ||
| 353 | +```bash | ||
| 354 | +# 检查MySQL服务状态 | ||
| 355 | +mysql -h 120.79.247.228 -P 3316 -u saas -p -e "SELECT 1" | ||
| 356 | + | ||
| 357 | +# 检查配置 | ||
| 358 | +cat .env | grep DB_ | ||
| 359 | +``` | ||
| 360 | + | ||
| 361 | +### Q2: Elasticsearch连接失败 | ||
| 362 | + | ||
| 363 | +**症状**: `Failed to connect to Elasticsearch` | ||
| 364 | + | ||
| 365 | +**解决方案**: | ||
| 366 | +```bash | ||
| 367 | +# 检查ES服务状态 | ||
| 368 | +curl http://localhost:9200 | ||
| 369 | + | ||
| 370 | +# 检查ES版本 | ||
| 371 | +curl http://localhost:9200 | grep version | ||
| 372 | + | ||
| 373 | +# 确认配置 | ||
| 374 | +cat .env | grep ES_ | ||
| 375 | +``` | ||
| 376 | + | ||
| 377 | +### Q3: 服务启动失败 | ||
| 378 | + | ||
| 379 | +**症状**: `Address already in use` 或端口被占用 | ||
| 380 | + | ||
| 381 | +**解决方案**: | ||
| 382 | +```bash | ||
| 383 | +# 查看占用端口的进程 | ||
| 384 | +lsof -i :6002 # 后端 | ||
| 385 | +lsof -i :6003 # 前端 | ||
| 386 | +lsof -i :9200 # ES | ||
| 387 | + | ||
| 388 | +# 杀掉进程 | ||
| 389 | +kill -9 <PID> | ||
| 390 | + | ||
| 391 | +# 或修改端口配置 | ||
| 392 | +``` | ||
| 393 | + | ||
| 394 | +### Q4: 搜索无结果 | ||
| 395 | + | ||
| 396 | +**症状**: 搜索返回空结果 | ||
| 397 | + | ||
| 398 | +**解决方案**: | ||
| 399 | +```bash | ||
| 400 | +# 检查ES中是否有数据 | ||
| 401 | +curl http://localhost:9200/search_products/_count | ||
| 402 | + | ||
| 403 | +# 检查tenant_id过滤是否正确 | ||
| 404 | +curl -X POST http://localhost:6002/search/ \ | ||
| 405 | + -H "Content-Type: application/json" \ | ||
| 406 | + -H "X-Tenant-ID: 2" \ | ||
| 407 | + -d '{"query": "*", "size": 10, "debug": true}' | ||
| 408 | +``` | ||
| 409 | + | ||
| 410 | +### Q5: 前端无法连接后端 | ||
| 411 | + | ||
| 412 | +**症状**: CORS错误 | ||
| 413 | + | ||
| 414 | +**解决方案**: | ||
| 415 | +- 确保后端在 http://localhost:6002 运行 | ||
| 416 | +- 检查浏览器控制台错误信息 | ||
| 417 | +- 检查后端日志中的CORS配置 | ||
| 418 | + | ||
| 419 | +### Q6: 翻译不工作 | ||
| 420 | + | ||
| 421 | +**症状**: 翻译返回原文 | ||
| 422 | + | ||
| 423 | +**解决方案**: | ||
| 424 | +- 检查DEEPL_AUTH_KEY是否正确 | ||
| 425 | +- 如果没有API key,系统会使用mock模式(返回原文) | ||
| 426 | + | ||
| 427 | +--- | ||
| 428 | + | ||
| 429 | +## 相关文档 | ||
| 430 | + | ||
| 431 | +- **测试数据构造文档**: `TEST_DATA_GUIDE.md` - 如何构造和导入测试数据 | ||
| 432 | +- **API接口文档**: `API_INTEGRATION_GUIDE.md` - 完整的API对接指南 | ||
| 433 | +- **字段说明文档**: `INDEX_FIELDS_DOCUMENTATION.md` - 索引字段详细说明 | ||
| 434 | +- **设计文档**: `设计文档.md` - 系统架构和设计说明 | ||
| 435 | +- **README**: `README.md` - 项目概述和快速开始 | ||
| 436 | + | ||
| 437 | +--- | ||
| 438 | + | ||
| 439 | +**文档版本**: v2.0 | ||
| 440 | +**最后更新**: 2024-12 | ||
| 441 | + |
USER_GUIDE.md deleted
| @@ -1,539 +0,0 @@ | @@ -1,539 +0,0 @@ | ||
| 1 | -# 使用指南 - SearchEngine | ||
| 2 | - | ||
| 3 | -## 快速启动(推荐) | ||
| 4 | - | ||
| 5 | -### 一键启动所有服务 | ||
| 6 | - | ||
| 7 | -```bash | ||
| 8 | -cd /data/tw/SearchEngine | ||
| 9 | -./start_all.sh | ||
| 10 | -``` | ||
| 11 | - | ||
| 12 | -这个脚本会自动完成: | ||
| 13 | -1. 设置conda环境 | ||
| 14 | -2. 检查并导入测试数据(如果需要) | ||
| 15 | -3. 启动后端API服务(后台运行) | ||
| 16 | -4. 启动前端Web界面 | ||
| 17 | - | ||
| 18 | -启动完成后,访问: | ||
| 19 | -- **前端界面**: http://localhost:6003 | ||
| 20 | -- **后端API**: http://localhost:6002 | ||
| 21 | -- **API文档**: http://localhost:6002/docs | ||
| 22 | - | ||
| 23 | -### 停止服务 | ||
| 24 | - | ||
| 25 | -```bash | ||
| 26 | -# 停止后端 | ||
| 27 | -kill $(cat logs/backend.pid) | ||
| 28 | - | ||
| 29 | -# 前端按 Ctrl+C | ||
| 30 | -``` | ||
| 31 | - | ||
| 32 | ---- | ||
| 33 | - | ||
| 34 | -## 分步启动(自定义) | ||
| 35 | - | ||
| 36 | -### 1. 环境设置 | ||
| 37 | - | ||
| 38 | -```bash | ||
| 39 | -cd /data/tw/SearchEngine | ||
| 40 | -./setup.sh | ||
| 41 | -``` | ||
| 42 | - | ||
| 43 | -这会: | ||
| 44 | -- 创建/激活conda环境 `searchengine` | ||
| 45 | -- 加载配置文件 | ||
| 46 | -- 检查Elasticsearch连接 | ||
| 47 | - | ||
| 48 | -### 2. 数据导入 | ||
| 49 | - | ||
| 50 | -#### 2.1 从MySQL导入到Elasticsearch | ||
| 51 | - | ||
| 52 | -数据导入脚本需要指定 `tenant_id` 参数,用于从MySQL中筛选对应租户的数据。 | ||
| 53 | - | ||
| 54 | -**基本用法**: | ||
| 55 | -```bash | ||
| 56 | -./scripts/ingest.sh <tenant_id> [recreate_index] | ||
| 57 | -``` | ||
| 58 | - | ||
| 59 | -参数说明: | ||
| 60 | -- `tenant_id`: **必需**,租户ID,用于筛选数据库中的数据 | ||
| 61 | -- `recreate_index`: 可选,是否删除并重建索引(true/false,默认:false) | ||
| 62 | - | ||
| 63 | -**示例**: | ||
| 64 | - | ||
| 65 | -快速测试(tenant_id=2,重建索引): | ||
| 66 | -```bash | ||
| 67 | -./scripts/ingest.sh 2 true | ||
| 68 | -``` | ||
| 69 | - | ||
| 70 | -增量导入(tenant_id=2,不重建索引): | ||
| 71 | -```bash | ||
| 72 | -./scripts/ingest.sh 2 false | ||
| 73 | -``` | ||
| 74 | - | ||
| 75 | -**检查可用的 tenant_id**: | ||
| 76 | - | ||
| 77 | -如果导入时显示 "No documents to index",脚本会自动显示调试信息,包括: | ||
| 78 | -- 该 tenant_id 的统计信息(总数、活跃数、已删除数) | ||
| 79 | -- 数据库中存在的其他 tenant_id 列表 | ||
| 80 | - | ||
| 81 | -#### 2.2 构造测试数据(Mock Data) | ||
| 82 | - | ||
| 83 | -`mock_data.sh` 脚本用于构造完整的测试数据,包含两部分: | ||
| 84 | - | ||
| 85 | -1. **tenant_id=1**: 自动生成的mock数据(1000条SPU) | ||
| 86 | -2. **tenant_id=2**: 从CSV文件导入的数据(10000条SPU) | ||
| 87 | - | ||
| 88 | -**使用方法**: | ||
| 89 | - | ||
| 90 | -将CSV文件放在 `data/customer1/goods_with_pic.5years_congku.csv.shuf.1w`,然后直接运行: | ||
| 91 | -```bash | ||
| 92 | -./scripts/mock_data.sh | ||
| 93 | -``` | ||
| 94 | - | ||
| 95 | -脚本会自动: | ||
| 96 | -- 生成 tenant_id=1 的mock数据并导入MySQL | ||
| 97 | -- 从CSV文件读取数据,生成 tenant_id=2 的数据并导入MySQL | ||
| 98 | -- 自动计算起始ID,避免主键冲突 | ||
| 99 | - | ||
| 100 | -**注意**: | ||
| 101 | -- 所有配置(数据库地址、CSV路径等)都写死在脚本中,这是测试数据构造脚本,不需要配置化 | ||
| 102 | -- 如果CSV文件路径不同,需要修改 `scripts/mock_data.sh` 中的 `TENANT2_CSV_FILE` 变量 | ||
| 103 | - | ||
| 104 | -**手动分步执行**(如果需要自定义参数): | ||
| 105 | - | ||
| 106 | -1. **生成SQL文件**: | ||
| 107 | -```bash | ||
| 108 | -python scripts/import_tenant2_csv.py \ | ||
| 109 | - --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \ | ||
| 110 | - --tenant-id 2 \ | ||
| 111 | - --output customer1_data.sql \ | ||
| 112 | - --db-host 120.79.247.228 \ | ||
| 113 | - --db-port 3316 \ | ||
| 114 | - --db-database saas \ | ||
| 115 | - --db-username saas \ | ||
| 116 | - --db-password <密码> | ||
| 117 | -``` | ||
| 118 | - | ||
| 119 | -2. **导入SQL到MySQL**: | ||
| 120 | -```bash | ||
| 121 | -python scripts/import_test_data.py \ | ||
| 122 | - --db-host 120.79.247.228 \ | ||
| 123 | - --db-port 3316 \ | ||
| 124 | - --db-database saas \ | ||
| 125 | - --db-username saas \ | ||
| 126 | - --db-password <密码> \ | ||
| 127 | - --sql-file customer1_data.sql \ | ||
| 128 | - --tenant-id 2 | ||
| 129 | -``` | ||
| 130 | - | ||
| 131 | -**CSV文件格式要求**: | ||
| 132 | - | ||
| 133 | -CSV文件需要包含以下列(列名不区分大小写): | ||
| 134 | -- `skuId` - SKU ID | ||
| 135 | -- `name` - 商品名称 | ||
| 136 | -- `name_pinyin` - 拼音(可选) | ||
| 137 | -- `create_time` - 创建时间(格式:YYYY-MM-DD HH:MM:SS) | ||
| 138 | -- `ruSkuName` - 俄文SKU名称(可选) | ||
| 139 | -- `enSpuName` - 英文SPU名称(可选) | ||
| 140 | -- `categoryName` - 类别名称 | ||
| 141 | -- `supplierName` - 供应商名称 | ||
| 142 | -- `brandName` - 品牌名称 | ||
| 143 | -- `file_id` - 文件ID(可选) | ||
| 144 | -- `days_since_last_update` - 更新天数(可选) | ||
| 145 | -- `id` - 商品ID(可选) | ||
| 146 | -- `imageUrl` - 图片URL(可选) | ||
| 147 | - | ||
| 148 | -**注意**: | ||
| 149 | -- 首次运行会下载模型文件(BGE-M3和CN-CLIP),大约需要10-30分钟 | ||
| 150 | -- 确保MySQL中存在对应 tenant_id 的数据(`shoplazza_product_spu` 和 `shoplazza_product_sku` 表) | ||
| 151 | -- 只有 `deleted=0` 的记录会被导入 | ||
| 152 | -- CSV导入会先清理该 tenant_id 的旧数据,再导入新数据 | ||
| 153 | - | ||
| 154 | -### 3. 启动后端 | ||
| 155 | - | ||
| 156 | -```bash | ||
| 157 | -./scripts/start_backend.sh | ||
| 158 | -``` | ||
| 159 | - | ||
| 160 | -后端API会在 http://localhost:6002 启动 | ||
| 161 | - | ||
| 162 | -### 4. 启动前端 | ||
| 163 | - | ||
| 164 | -```bash | ||
| 165 | -./scripts/start_frontend.sh | ||
| 166 | -``` | ||
| 167 | - | ||
| 168 | -前端界面会在 http://localhost:6003 启动 | ||
| 169 | - | ||
| 170 | ---- | ||
| 171 | - | ||
| 172 | -## 配置说明 | ||
| 173 | - | ||
| 174 | -### 环境配置文件 (.env) | ||
| 175 | - | ||
| 176 | -```bash | ||
| 177 | -# Elasticsearch配置 | ||
| 178 | -ES_HOST=http://localhost:9200 | ||
| 179 | -ES_USERNAME=essa | ||
| 180 | -ES_PASSWORD=4hOaLaf41y2VuI8y | ||
| 181 | - | ||
| 182 | -# Redis配置(可选,用于缓存) | ||
| 183 | -REDIS_HOST=localhost | ||
| 184 | -REDIS_PORT=6479 | ||
| 185 | -REDIS_PASSWORD=BMfv5aI31kgHWtlx | ||
| 186 | - | ||
| 187 | -# DeepL翻译API | ||
| 188 | -DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | ||
| 189 | - | ||
| 190 | -# 客户配置 | ||
| 191 | -TENANT_ID=tenant1 | ||
| 192 | - | ||
| 193 | -# API服务配置 | ||
| 194 | -API_HOST=0.0.0.0 | ||
| 195 | -API_PORT=6002 | ||
| 196 | -``` | ||
| 197 | - | ||
| 198 | -### 修改配置 | ||
| 199 | - | ||
| 200 | -1. 编辑 `.env` 文件 | ||
| 201 | -2. 重启相关服务 | ||
| 202 | - | ||
| 203 | ---- | ||
| 204 | - | ||
| 205 | -## 使用Web界面 | ||
| 206 | - | ||
| 207 | -### 搜索功能 | ||
| 208 | - | ||
| 209 | -1. **简单搜索**: 直接输入关键词 | ||
| 210 | - - 中文: "芭比娃娃" | ||
| 211 | - - 英文: "fire control set" | ||
| 212 | - - 俄文: "Наборы для пожаротушения" | ||
| 213 | - | ||
| 214 | -2. **布尔搜索**: 使用操作符 | ||
| 215 | - - AND: "toy AND barbie" | ||
| 216 | - - OR: "barbie OR doll" | ||
| 217 | - - ANDNOT: "toy ANDNOT cheap" | ||
| 218 | - - 组合: "toy AND (barbie OR doll) ANDNOT cheap" | ||
| 219 | - | ||
| 220 | -3. **域搜索**: 指定搜索域 | ||
| 221 | - - 品牌: "brand:ZHU LIN" | ||
| 222 | - - 类别: "category:玩具" | ||
| 223 | - | ||
| 224 | -### 搜索选项 | ||
| 225 | - | ||
| 226 | -- **启用翻译**: 自动翻译查询到其他语言 | ||
| 227 | -- **启用语义搜索**: 使用embedding进行语义匹配 | ||
| 228 | -- **启用自定义排序**: 使用配置的ranking表达式 | ||
| 229 | -- **结果数量**: 10/20/50条 | ||
| 230 | - | ||
| 231 | ---- | ||
| 232 | - | ||
| 233 | -## API使用 | ||
| 234 | - | ||
| 235 | -### 搜索接口(v3.0 更新) | ||
| 236 | - | ||
| 237 | -**基础搜索**(需要指定 tenant_id): | ||
| 238 | -```bash | ||
| 239 | -curl -X POST http://localhost:6002/search/ \ | ||
| 240 | - -H "Content-Type: application/json" \ | ||
| 241 | - -H "X-Tenant-ID: 2" \ | ||
| 242 | - -d '{ | ||
| 243 | - "query": "芭比娃娃", | ||
| 244 | - "size": 20 | ||
| 245 | - }' | ||
| 246 | -``` | ||
| 247 | - | ||
| 248 | -或者通过查询参数: | ||
| 249 | -```bash | ||
| 250 | -curl -X POST "http://localhost:6002/search/?tenant_id=2" \ | ||
| 251 | - -H "Content-Type: application/json" \ | ||
| 252 | - -d '{ | ||
| 253 | - "query": "芭比娃娃", | ||
| 254 | - "size": 20 | ||
| 255 | - }' | ||
| 256 | -``` | ||
| 257 | - | ||
| 258 | -**带过滤器的搜索**: | ||
| 259 | -```bash | ||
| 260 | -curl -X POST http://localhost:6002/search/ \ | ||
| 261 | - -H "Content-Type: application/json" \ | ||
| 262 | - -H "X-Tenant-ID: 2" \ | ||
| 263 | - -d '{ | ||
| 264 | - "query": "玩具", | ||
| 265 | - "size": 20, | ||
| 266 | - "filters": { | ||
| 267 | - "categoryName_keyword": ["玩具", "益智玩具"] | ||
| 268 | - }, | ||
| 269 | - "range_filters": { | ||
| 270 | - "price": {"gte": 50, "lte": 200} | ||
| 271 | - } | ||
| 272 | - }' | ||
| 273 | -``` | ||
| 274 | - | ||
| 275 | -**带分面搜索**: | ||
| 276 | -```bash | ||
| 277 | -curl -X POST http://localhost:6002/search/ \ | ||
| 278 | - -H "Content-Type: application/json" \ | ||
| 279 | - -H "X-Tenant-ID: 2" \ | ||
| 280 | - -d '{ | ||
| 281 | - "query": "玩具", | ||
| 282 | - "size": 20, | ||
| 283 | - "facets": [ | ||
| 284 | - {"field": "categoryName_keyword", "size": 15}, | ||
| 285 | - {"field": "brandName_keyword", "size": 15} | ||
| 286 | - ] | ||
| 287 | - }' | ||
| 288 | -``` | ||
| 289 | - | ||
| 290 | -### 图片搜索 | ||
| 291 | - | ||
| 292 | -```bash | ||
| 293 | -curl -X POST http://localhost:6002/search/image \ | ||
| 294 | - -H "Content-Type: application/json" \ | ||
| 295 | - -H "X-Tenant-ID: 2" \ | ||
| 296 | - -d '{ | ||
| 297 | - "image_url": "https://oss.essa.cn/example.jpg", | ||
| 298 | - "size": 10 | ||
| 299 | - }' | ||
| 300 | -``` | ||
| 301 | - | ||
| 302 | -**注意**: 所有搜索接口都需要通过 `X-Tenant-ID` 请求头或 `tenant_id` 查询参数指定租户ID。 | ||
| 303 | - | ||
| 304 | -### 健康检查 | ||
| 305 | - | ||
| 306 | -```bash | ||
| 307 | -curl http://localhost:6002/admin/health | ||
| 308 | -``` | ||
| 309 | - | ||
| 310 | -### 查看配置 | ||
| 311 | - | ||
| 312 | -```bash | ||
| 313 | -curl http://localhost:6002/admin/config | ||
| 314 | -``` | ||
| 315 | - | ||
| 316 | -### 索引统计 | ||
| 317 | - | ||
| 318 | -```bash | ||
| 319 | -curl http://localhost:6002/admin/stats | ||
| 320 | -``` | ||
| 321 | - | ||
| 322 | ---- | ||
| 323 | - | ||
| 324 | -## 常见问题 | ||
| 325 | - | ||
| 326 | -### 1. Elasticsearch连接失败 | ||
| 327 | - | ||
| 328 | -**问题**: `Failed to connect to Elasticsearch` | ||
| 329 | - | ||
| 330 | -**解决**: | ||
| 331 | -```bash | ||
| 332 | -# 检查ES是否运行 | ||
| 333 | -curl http://localhost:9200 | ||
| 334 | - | ||
| 335 | -# 检查配置 | ||
| 336 | -cat .env | grep ES_ | ||
| 337 | -``` | ||
| 338 | - | ||
| 339 | -### 2. 导入数据时内存不足 | ||
| 340 | - | ||
| 341 | -**问题**: `Out of memory` | ||
| 342 | - | ||
| 343 | -**解决**: | ||
| 344 | -```bash | ||
| 345 | -# 减少batch size或跳过embedding | ||
| 346 | -./scripts/ingest.sh 1000 true | ||
| 347 | -``` | ||
| 348 | - | ||
| 349 | -### 3. 模型下载失败 | ||
| 350 | - | ||
| 351 | -**问题**: 模型文件下载超时 | ||
| 352 | - | ||
| 353 | -**解决**: | ||
| 354 | -- 检查网络连接 | ||
| 355 | -- 使用国内镜像源 | ||
| 356 | -- 手动下载模型到指定目录 | ||
| 357 | - | ||
| 358 | -### 4. 翻译不工作 | ||
| 359 | - | ||
| 360 | -**问题**: 翻译返回原文 | ||
| 361 | - | ||
| 362 | -**解决**: | ||
| 363 | -- 检查DEEPL_AUTH_KEY是否正确 | ||
| 364 | -- 如果没有API key,系统会使用mock模式(返回原文) | ||
| 365 | - | ||
| 366 | -### 5. 前端无法连接后端 | ||
| 367 | - | ||
| 368 | -**问题**: CORS错误 | ||
| 369 | - | ||
| 370 | -**解决**: | ||
| 371 | -- 确保后端在 http://localhost:6002 运行 | ||
| 372 | -- 检查浏览器控制台错误信息 | ||
| 373 | - | ||
| 374 | -### 6. 数据导入时没有数据 | ||
| 375 | - | ||
| 376 | -**问题**: `WARNING: No documents to index` 或 `Transformed 0 SPU documents` | ||
| 377 | - | ||
| 378 | -**可能原因**: | ||
| 379 | -1. 数据库中不存在该 tenant_id 的数据 | ||
| 380 | -2. 数据都被标记为 `deleted=1` | ||
| 381 | -3. tenant_id 类型不匹配 | ||
| 382 | - | ||
| 383 | -**解决步骤**: | ||
| 384 | - | ||
| 385 | -1. **查看调试信息**: 脚本会自动显示调试信息,包括: | ||
| 386 | - ``` | ||
| 387 | - DEBUG: tenant_id=1000: total=0, active=0, deleted=0 | ||
| 388 | - DEBUG: Available tenant_ids in shoplazza_product_spu: | ||
| 389 | - tenant_id=1: total=100, active=100 | ||
| 390 | - tenant_id=2: total=50, active=50 | ||
| 391 | - ``` | ||
| 392 | - | ||
| 393 | -2. **检查数据库**: 直接查询MySQL确认数据 | ||
| 394 | - ```sql | ||
| 395 | - -- 查看有哪些 tenant_id | ||
| 396 | - SELECT tenant_id, COUNT(*) as count, | ||
| 397 | - SUM(CASE WHEN deleted = 0 THEN 1 ELSE 0 END) as active | ||
| 398 | - FROM shoplazza_product_spu | ||
| 399 | - GROUP BY tenant_id; | ||
| 400 | - | ||
| 401 | - -- 检查特定 tenant_id 的数据 | ||
| 402 | - SELECT COUNT(*) FROM shoplazza_product_spu | ||
| 403 | - WHERE tenant_id = 1000 AND deleted = 0; | ||
| 404 | - ``` | ||
| 405 | - | ||
| 406 | -3. **如果数据库中没有数据,需要先导入数据**: | ||
| 407 | - - 如果有CSV文件,使用CSV导入脚本(见"2.2 从CSV导入数据到MySQL") | ||
| 408 | - - 如果没有CSV文件,可以使用mock数据生成脚本 | ||
| 409 | - | ||
| 410 | -4. **使用正确的 tenant_id**: 根据调试信息显示的可用 tenant_id,使用正确的值重新导入 | ||
| 411 | - ```bash | ||
| 412 | - ./scripts/ingest.sh 2 true # 使用调试信息中显示的 tenant_id | ||
| 413 | - ``` | ||
| 414 | - | ||
| 415 | ---- | ||
| 416 | - | ||
| 417 | -## 开发和调试 | ||
| 418 | - | ||
| 419 | -### 查看日志 | ||
| 420 | - | ||
| 421 | -```bash | ||
| 422 | -# 后端日志 | ||
| 423 | -tail -f logs/backend.log | ||
| 424 | - | ||
| 425 | -# 实时日志(如果前台运行) | ||
| 426 | -./scripts/start_backend.sh | ||
| 427 | -``` | ||
| 428 | - | ||
| 429 | -### Python命令行测试 | ||
| 430 | - | ||
| 431 | -```bash | ||
| 432 | -# 激活环境 | ||
| 433 | -source /home/tw/miniconda3/etc/profile.d/conda.sh | ||
| 434 | -conda activate searchengine | ||
| 435 | - | ||
| 436 | -# 测试搜索(需要指定 tenant_id) | ||
| 437 | -python -c " | ||
| 438 | -from config import ConfigLoader | ||
| 439 | -from utils.es_client import ESClient | ||
| 440 | -from search.searcher import Searcher | ||
| 441 | -from config.env_config import ES_CONFIG | ||
| 442 | - | ||
| 443 | -config_loader = ConfigLoader('config/config.yaml') | ||
| 444 | -config = config_loader.load_config() | ||
| 445 | - | ||
| 446 | -es_client = ESClient(hosts=[ES_CONFIG['host']], | ||
| 447 | - username=ES_CONFIG.get('username'), | ||
| 448 | - password=ES_CONFIG.get('password')) | ||
| 449 | - | ||
| 450 | -searcher = Searcher(config, es_client) | ||
| 451 | -result = searcher.search('芭比娃娃', tenant_id='2', size=5) | ||
| 452 | - | ||
| 453 | -print(f'找到 {result.total} 个结果') | ||
| 454 | -for hit in result.hits: | ||
| 455 | - print(f' - {hit[\"title\"]} (分数: {hit[\"_score\"]:.4f})') | ||
| 456 | -" | ||
| 457 | -``` | ||
| 458 | - | ||
| 459 | -### 重新导入数据 | ||
| 460 | - | ||
| 461 | -```bash | ||
| 462 | -# 删除现有索引并重新导入(需要指定 tenant_id) | ||
| 463 | -./scripts/ingest.sh <tenant_id> true | ||
| 464 | - | ||
| 465 | -# 例如:导入 tenant_id=2 的数据并重建索引 | ||
| 466 | -./scripts/ingest.sh 2 true | ||
| 467 | -``` | ||
| 468 | - | ||
| 469 | -### 检查数据库中的 tenant_id | ||
| 470 | - | ||
| 471 | -如果不知道应该使用哪个 tenant_id,可以: | ||
| 472 | - | ||
| 473 | -1. **运行导入脚本查看调试信息**(即使没有数据也会显示): | ||
| 474 | - ```bash | ||
| 475 | - ./scripts/ingest.sh 999 true | ||
| 476 | - ``` | ||
| 477 | - 脚本会显示数据库中存在的 tenant_id 列表 | ||
| 478 | - | ||
| 479 | -2. **直接查询数据库**: | ||
| 480 | - ```bash | ||
| 481 | - mysql -h 120.79.247.228 -P 3316 -u saas -p saas -e \ | ||
| 482 | - "SELECT tenant_id, COUNT(*) as count FROM shoplazza_product_spu GROUP BY tenant_id;" | ||
| 483 | -``` | ||
| 484 | - | ||
| 485 | ---- | ||
| 486 | - | ||
| 487 | -## 性能优化 | ||
| 488 | - | ||
| 489 | -### 1. 使用embedding缓存 | ||
| 490 | - | ||
| 491 | -首次生成embedding后会自动缓存到 `.cache/` 目录,后续导入会更快。 | ||
| 492 | - | ||
| 493 | -### 2. 批量大小调整 | ||
| 494 | - | ||
| 495 | -```bash | ||
| 496 | -# 修改批量大小(在ingest_tenant1.py中) | ||
| 497 | ---batch-size 200 # 默认100 | ||
| 498 | -``` | ||
| 499 | - | ||
| 500 | -### 3. GPU加速 | ||
| 501 | - | ||
| 502 | -确保CUDA可用以加速embedding生成: | ||
| 503 | -```bash | ||
| 504 | -python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" | ||
| 505 | -``` | ||
| 506 | - | ||
| 507 | ---- | ||
| 508 | - | ||
| 509 | -## 项目结构 | ||
| 510 | - | ||
| 511 | -``` | ||
| 512 | -SearchEngine/ | ||
| 513 | -├── .env # 环境配置 | ||
| 514 | -├── setup.sh # 环境设置脚本 | ||
| 515 | -├── start_all.sh # 一键启动脚本 | ||
| 516 | -├── scripts/ # 运行脚本 | ||
| 517 | -│ ├── ingest.sh # 数据导入 | ||
| 518 | -│ ├── start_backend.sh # 启动后端 | ||
| 519 | -│ └── start_frontend.sh # 启动前端 | ||
| 520 | -├── frontend/ # Web前端 | ||
| 521 | -│ ├── index.html | ||
| 522 | -│ └── static/ | ||
| 523 | -├── logs/ # 日志文件 | ||
| 524 | -├── config/ # 配置模块 | ||
| 525 | -├── indexer/ # 数据导入 | ||
| 526 | -├── query/ # 查询处理 | ||
| 527 | -├── search/ # 搜索引擎 | ||
| 528 | -├── embeddings/ # 向量模型 | ||
| 529 | -└── api/ # REST API | ||
| 530 | -``` | ||
| 531 | - | ||
| 532 | ---- | ||
| 533 | - | ||
| 534 | -## 支持 | ||
| 535 | - | ||
| 536 | -遇到问题请查看: | ||
| 537 | -- **日志**: `logs/backend.log` | ||
| 538 | -- **API文档**: http://localhost:6002/docs | ||
| 539 | -- **配置**: `config/schema/tenant1_config.yaml` |
docs/店匠相关资料/python-app-demo/main.py
| @@ -69,7 +69,7 @@ def auth(): | @@ -69,7 +69,7 @@ def auth(): | ||
| 69 | if not shop: | 69 | if not shop: |
| 70 | return jsonify({"error": "shop parameter is required"}), 400 | 70 | return jsonify({"error": "shop parameter is required"}), 400 |
| 71 | 71 | ||
| 72 | - scopes = "read_customer" | 72 | + scopes = "read_tenant" |
| 73 | state = secrets.token_hex(16) # 生成随机state | 73 | state = secrets.token_hex(16) # 生成随机state |
| 74 | 74 | ||
| 75 | auth_url = ( | 75 | auth_url = ( |
| @@ -117,8 +117,8 @@ def auth_callback(): | @@ -117,8 +117,8 @@ def auth_callback(): | ||
| 117 | 117 | ||
| 118 | # 这里可以调用Shoplazza OpenAPI获取更多数据 | 118 | # 这里可以调用Shoplazza OpenAPI获取更多数据 |
| 119 | # headers = {"Access-Token": token_data.get("access_token")} | 119 | # headers = {"Access-Token": token_data.get("access_token")} |
| 120 | - # customers_url = f"https://{shop}/openapi/2022-01/customers" | ||
| 121 | - # customers_response = requests.get(customers_url, headers=headers) | 120 | + # tenants_url = f"https://{shop}/openapi/2022-01/tenants" |
| 121 | + # tenants_response = requests.get(tenants_url, headers=headers) | ||
| 122 | # return redirect(LAST_URL) | 122 | # return redirect(LAST_URL) |
| 123 | 123 | ||
| 124 | except requests.exceptions.RequestException as e: | 124 | except requests.exceptions.RequestException as e: |
scripts/import_tenant2_csv.py
| @@ -303,7 +303,7 @@ def generate_sql_inserts(spus: list, skus: list, output_file: str): | @@ -303,7 +303,7 @@ def generate_sql_inserts(spus: list, skus: list, output_file: str): | ||
| 303 | output_file: Output file path | 303 | output_file: Output file path |
| 304 | """ | 304 | """ |
| 305 | with open(output_file, 'w', encoding='utf-8') as f: | 305 | with open(output_file, 'w', encoding='utf-8') as f: |
| 306 | - f.write("-- SPU Data from customer1 CSV\n") | 306 | + f.write("-- SPU Data from tenant2 CSV\n") |
| 307 | f.write("INSERT INTO shoplazza_product_spu (\n") | 307 | f.write("INSERT INTO shoplazza_product_spu (\n") |
| 308 | f.write(" id, shop_id, shoplazza_id, handle, title, brief, description, spu,\n") | 308 | f.write(" id, shop_id, shoplazza_id, handle, title, brief, description, spu,\n") |
| 309 | f.write(" vendor, vendor_url, seo_title, seo_description, seo_keywords,\n") | 309 | f.write(" vendor, vendor_url, seo_title, seo_description, seo_keywords,\n") |
| @@ -345,7 +345,7 @@ def generate_sql_inserts(spus: list, skus: list, output_file: str): | @@ -345,7 +345,7 @@ def generate_sql_inserts(spus: list, skus: list, output_file: str): | ||
| 345 | else: | 345 | else: |
| 346 | f.write(";\n\n") | 346 | f.write(";\n\n") |
| 347 | 347 | ||
| 348 | - f.write("-- SKU Data from customer1 CSV\n") | 348 | + f.write("-- SKU Data from tenant2 CSV\n") |
| 349 | f.write("INSERT INTO shoplazza_product_sku (\n") | 349 | f.write("INSERT INTO shoplazza_product_sku (\n") |
| 350 | f.write(" id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, shoplazza_image_id,\n") | 350 | f.write(" id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, shoplazza_image_id,\n") |
| 351 | f.write(" title, sku, barcode, position, price, compare_at_price, cost_price,\n") | 351 | f.write(" title, sku, barcode, position, price, compare_at_price, cost_price,\n") |
| @@ -425,7 +425,7 @@ def main(): | @@ -425,7 +425,7 @@ def main(): | ||
| 425 | parser.add_argument('--csv-file', required=True, help='CSV file path') | 425 | parser.add_argument('--csv-file', required=True, help='CSV file path') |
| 426 | parser.add_argument('--tenant-id', default='2', help='Tenant ID (default: 2)') | 426 | parser.add_argument('--tenant-id', default='2', help='Tenant ID (default: 2)') |
| 427 | parser.add_argument('--start-spu-id', type=int, default=None, help='Starting SPU ID (default: auto-calculate from DB)') | 427 | parser.add_argument('--start-spu-id', type=int, default=None, help='Starting SPU ID (default: auto-calculate from DB)') |
| 428 | - parser.add_argument('--output', default='customer1_data.sql', help='Output SQL file (default: customer1_data.sql)') | 428 | + parser.add_argument('--output', default='tenant2_data.sql', help='Output SQL file (default: tenant2_data.sql)') |
| 429 | parser.add_argument('--db-host', help='Database host (for auto-calculating start IDs)') | 429 | parser.add_argument('--db-host', help='Database host (for auto-calculating start IDs)') |
| 430 | parser.add_argument('--db-port', type=int, default=3306, help='Database port (default: 3306)') | 430 | parser.add_argument('--db-port', type=int, default=3306, help='Database port (default: 3306)') |
| 431 | parser.add_argument('--db-database', help='Database name (for auto-calculating start IDs)') | 431 | parser.add_argument('--db-database', help='Database name (for auto-calculating start IDs)') |
scripts/mock_data.sh
| @@ -121,7 +121,7 @@ echo -e "\n${YELLOW}========================================${NC}" | @@ -121,7 +121,7 @@ echo -e "\n${YELLOW}========================================${NC}" | ||
| 121 | echo -e "${YELLOW}Part 2/2: 生成并导入 tenant_id=2 的CSV数据${NC}" | 121 | echo -e "${YELLOW}Part 2/2: 生成并导入 tenant_id=2 的CSV数据${NC}" |
| 122 | echo -e "${YELLOW}========================================${NC}" | 122 | echo -e "${YELLOW}========================================${NC}" |
| 123 | 123 | ||
| 124 | -TENANT2_SQL_FILE="customer1_data.sql" | 124 | +TENANT2_SQL_FILE="tenant2_data.sql" |
| 125 | 125 | ||
| 126 | echo -e "\n${YELLOW}Step 2.1: 从CSV生成数据${NC}" | 126 | echo -e "\n${YELLOW}Step 2.1: 从CSV生成数据${NC}" |
| 127 | python scripts/import_tenant2_csv.py \ | 127 | python scripts/import_tenant2_csv.py \ |
user_guide.md deleted
| @@ -1,448 +0,0 @@ | @@ -1,448 +0,0 @@ | ||
| 1 | -# 使用指南 | ||
| 2 | - | ||
| 3 | -本文档提供完整的使用指南,包括环境准备、数据导入、服务启动、测试等。 | ||
| 4 | - | ||
| 5 | -## 目录 | ||
| 6 | - | ||
| 7 | -1. [环境准备](#环境准备) | ||
| 8 | -2. [数据准备](#数据准备) | ||
| 9 | -3. [数据导入](#数据导入) | ||
| 10 | -4. [服务启动](#服务启动) | ||
| 11 | -5. [测试验证](#测试验证) | ||
| 12 | -6. [常见问题](#常见问题) | ||
| 13 | - | ||
| 14 | ---- | ||
| 15 | - | ||
| 16 | -## 环境准备 | ||
| 17 | - | ||
| 18 | -### 系统要求 | ||
| 19 | - | ||
| 20 | -- **操作系统**: Linux (推荐 CentOS 7+ / Ubuntu 18.04+) | ||
| 21 | -- **Python**: 3.8+ | ||
| 22 | -- **内存**: 建议 8GB+ | ||
| 23 | -- **磁盘**: 10GB+ (包含模型文件) | ||
| 24 | -- **Elasticsearch**: 8.x (可通过Docker运行) | ||
| 25 | - | ||
| 26 | -### 安装依赖 | ||
| 27 | - | ||
| 28 | -#### 1. 安装Python依赖 | ||
| 29 | - | ||
| 30 | -```bash | ||
| 31 | -cd /home/tw/SearchEngine | ||
| 32 | -pip install -r requirements.txt | ||
| 33 | -``` | ||
| 34 | - | ||
| 35 | -#### 2. 启动Elasticsearch | ||
| 36 | - | ||
| 37 | -**方式1: 使用Docker(推荐)** | ||
| 38 | - | ||
| 39 | -```bash | ||
| 40 | -docker run -d \ | ||
| 41 | - --name elasticsearch \ | ||
| 42 | - -p 9200:9200 \ | ||
| 43 | - -e "discovery.type=single-node" \ | ||
| 44 | - -e "ES_JAVA_OPTS=-Xms2g -Xmx2g" \ | ||
| 45 | - elasticsearch:8.11.0 | ||
| 46 | -``` | ||
| 47 | - | ||
| 48 | -**方式2: 本地安装** | ||
| 49 | - | ||
| 50 | -参考 [Elasticsearch官方文档](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/install-elasticsearch.html) | ||
| 51 | - | ||
| 52 | -#### 3. 配置环境变量(可选) | ||
| 53 | - | ||
| 54 | -创建 `.env` 文件: | ||
| 55 | - | ||
| 56 | -```bash | ||
| 57 | -# MySQL配置 | ||
| 58 | -DB_HOST=120.79.247.228 | ||
| 59 | -DB_PORT=3316 | ||
| 60 | -DB_DATABASE=saas | ||
| 61 | -DB_USERNAME=saas | ||
| 62 | -DB_PASSWORD=your_password | ||
| 63 | - | ||
| 64 | -# Elasticsearch配置 | ||
| 65 | -ES_HOST=http://localhost:9200 | ||
| 66 | - | ||
| 67 | -# DeepL翻译API(可选) | ||
| 68 | -DEEPL_AUTH_KEY=your_deepl_api_key | ||
| 69 | -``` | ||
| 70 | - | ||
| 71 | ---- | ||
| 72 | - | ||
| 73 | -## 数据准备 | ||
| 74 | - | ||
| 75 | -### Mock数据说明 | ||
| 76 | - | ||
| 77 | -项目提供了两套写死的Mock数据,用于测试: | ||
| 78 | - | ||
| 79 | -1. **Tenant ID = 1**: 生成的Mock数据(100个SPU) | ||
| 80 | -2. **Tenant ID = 2**: 从CSV导入的真实数据 | ||
| 81 | - | ||
| 82 | -### 数据表结构 | ||
| 83 | - | ||
| 84 | -系统使用店匠标准表结构: | ||
| 85 | - | ||
| 86 | -- **SPU表**: `shoplazza_product_spu` - 商品SPU数据 | ||
| 87 | -- **SKU表**: `shoplazza_product_sku` - 商品SKU数据 | ||
| 88 | - | ||
| 89 | -表结构详见 `INDEX_FIELDS_DOCUMENTATION.md`。 | ||
| 90 | - | ||
| 91 | ---- | ||
| 92 | - | ||
| 93 | -## 数据导入 | ||
| 94 | - | ||
| 95 | -### 步骤1: 导入Mock数据到MySQL | ||
| 96 | - | ||
| 97 | -#### 方式1: 使用脚本(推荐) | ||
| 98 | - | ||
| 99 | -```bash | ||
| 100 | -# 导入tenant_id=1的Mock数据(默认100个SPU) | ||
| 101 | -./scripts/mock_data.sh | ||
| 102 | - | ||
| 103 | -# 指定tenant_id和SPU数量 | ||
| 104 | -./scripts/mock_data.sh 1 200 | ||
| 105 | - | ||
| 106 | -# 使用显式参数 | ||
| 107 | -./scripts/mock_data.sh --mode mock --tenant-id 1 --num-spus 200 | ||
| 108 | -``` | ||
| 109 | - | ||
| 110 | -#### 方式2: 导入CSV数据(tenant_id=2) | ||
| 111 | - | ||
| 112 | -```bash | ||
| 113 | -# 导入customer1的CSV数据 | ||
| 114 | -./scripts/mock_data.sh --mode csv \ | ||
| 115 | - --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \ | ||
| 116 | - --tenant-id 2 \ | ||
| 117 | - --start-spu-id 1 | ||
| 118 | -``` | ||
| 119 | - | ||
| 120 | -#### 方式3: 手动导入SQL | ||
| 121 | - | ||
| 122 | -```bash | ||
| 123 | -# 导入tenant_id=1的测试数据 | ||
| 124 | -mysql -h 120.79.247.228 -P 3316 -u saas -p saas < test_data_tenant1.sql | ||
| 125 | - | ||
| 126 | -# 导入tenant_id=2的CSV数据 | ||
| 127 | -mysql -h 120.79.247.228 -P 3316 -u saas -p saas < customer1_data.sql | ||
| 128 | -``` | ||
| 129 | - | ||
| 130 | -### 步骤2: 从MySQL导入数据到Elasticsearch | ||
| 131 | - | ||
| 132 | -#### 使用脚本(推荐) | ||
| 133 | - | ||
| 134 | -```bash | ||
| 135 | -# 导入tenant_id=1的数据 | ||
| 136 | -./scripts/ingest.sh 1 | ||
| 137 | - | ||
| 138 | -# 重建索引并导入数据 | ||
| 139 | -./scripts/ingest.sh 1 true | ||
| 140 | - | ||
| 141 | -# 导入tenant_id=2的数据 | ||
| 142 | -./scripts/ingest.sh 2 | ||
| 143 | -``` | ||
| 144 | - | ||
| 145 | -#### 手动执行 | ||
| 146 | - | ||
| 147 | -```bash | ||
| 148 | -python scripts/ingest_shoplazza.py \ | ||
| 149 | - --db-host 120.79.247.228 \ | ||
| 150 | - --db-port 3316 \ | ||
| 151 | - --db-database saas \ | ||
| 152 | - --db-username saas \ | ||
| 153 | - --db-password your_password \ | ||
| 154 | - --tenant-id 1 \ | ||
| 155 | - --config base \ | ||
| 156 | - --es-host http://localhost:9200 \ | ||
| 157 | - --recreate \ | ||
| 158 | - --batch-size 500 | ||
| 159 | -``` | ||
| 160 | - | ||
| 161 | -### 完整工作流程示例 | ||
| 162 | - | ||
| 163 | -```bash | ||
| 164 | -# 1. 导入tenant_id=1的Mock数据(100个SPU) | ||
| 165 | -./scripts/mock_data.sh 1 100 | ||
| 166 | - | ||
| 167 | -# 2. 导入tenant_id=2的CSV数据 | ||
| 168 | -./scripts/mock_data.sh --mode csv \ | ||
| 169 | - --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \ | ||
| 170 | - --tenant-id 2 | ||
| 171 | - | ||
| 172 | -# 3. 将两个租户的数据导入ES | ||
| 173 | -./scripts/ingest.sh 1 | ||
| 174 | -./scripts/ingest.sh 2 | ||
| 175 | - | ||
| 176 | -# 4. 验证数据导入 | ||
| 177 | -curl http://localhost:9200/search_products/_count | ||
| 178 | -``` | ||
| 179 | - | ||
| 180 | ---- | ||
| 181 | - | ||
| 182 | -## 服务启动 | ||
| 183 | - | ||
| 184 | -### 方式1: 使用启动脚本(推荐) | ||
| 185 | - | ||
| 186 | -```bash | ||
| 187 | -# 启动前端和后端服务 | ||
| 188 | -./run.sh | ||
| 189 | - | ||
| 190 | -# 重启所有服务 | ||
| 191 | -./restart.sh | ||
| 192 | -``` | ||
| 193 | - | ||
| 194 | -### 方式2: 手动启动 | ||
| 195 | - | ||
| 196 | -#### 启动后端API服务 | ||
| 197 | - | ||
| 198 | -```bash | ||
| 199 | -python -m api.app \ | ||
| 200 | - --host 0.0.0.0 \ | ||
| 201 | - --port 6002 \ | ||
| 202 | - --es-host http://localhost:9200 \ | ||
| 203 | - --reload | ||
| 204 | -``` | ||
| 205 | - | ||
| 206 | -#### 启动前端服务(可选) | ||
| 207 | - | ||
| 208 | -```bash | ||
| 209 | -# 使用Python简单HTTP服务器 | ||
| 210 | -cd frontend | ||
| 211 | -python -m http.server 6003 | ||
| 212 | -``` | ||
| 213 | - | ||
| 214 | -### 服务端口 | ||
| 215 | - | ||
| 216 | -| 服务 | 端口 | URL | | ||
| 217 | -|------|------|-----| | ||
| 218 | -| Elasticsearch | 9200 | http://localhost:9200 | | ||
| 219 | -| Backend API | 6002 | http://localhost:6002 | | ||
| 220 | -| Frontend Web | 6003 | http://localhost:6003 | | ||
| 221 | -| API Docs | 6002 | http://localhost:6002/docs | | ||
| 222 | - | ||
| 223 | ---- | ||
| 224 | - | ||
| 225 | -## 测试验证 | ||
| 226 | - | ||
| 227 | -### 1. 健康检查 | ||
| 228 | - | ||
| 229 | -```bash | ||
| 230 | -curl http://localhost:6002/admin/health | ||
| 231 | -``` | ||
| 232 | - | ||
| 233 | -**预期响应**: | ||
| 234 | -```json | ||
| 235 | -{ | ||
| 236 | - "status": "healthy", | ||
| 237 | - "elasticsearch": "connected" | ||
| 238 | -} | ||
| 239 | -``` | ||
| 240 | - | ||
| 241 | -### 2. 索引统计 | ||
| 242 | - | ||
| 243 | -```bash | ||
| 244 | -curl http://localhost:6002/admin/stats | ||
| 245 | -``` | ||
| 246 | - | ||
| 247 | -### 3. 简单搜索测试 | ||
| 248 | - | ||
| 249 | -```bash | ||
| 250 | -curl -X POST http://localhost:6002/search/ \ | ||
| 251 | - -H "Content-Type: application/json" \ | ||
| 252 | - -d '{ | ||
| 253 | - "query": "玩具", | ||
| 254 | - "size": 10 | ||
| 255 | - }' | ||
| 256 | -``` | ||
| 257 | - | ||
| 258 | -### 4. 带过滤器的搜索 | ||
| 259 | - | ||
| 260 | -```bash | ||
| 261 | -curl -X POST http://localhost:6002/search/ \ | ||
| 262 | - -H "Content-Type: application/json" \ | ||
| 263 | - -d '{ | ||
| 264 | - "query": "玩具", | ||
| 265 | - "size": 10, | ||
| 266 | - "filters": { | ||
| 267 | - "category_keyword": "益智玩具" | ||
| 268 | - }, | ||
| 269 | - "range_filters": { | ||
| 270 | - "min_price": { | ||
| 271 | - "gte": 50, | ||
| 272 | - "lte": 200 | ||
| 273 | - } | ||
| 274 | - } | ||
| 275 | - }' | ||
| 276 | -``` | ||
| 277 | - | ||
| 278 | -### 5. 分面搜索测试 | ||
| 279 | - | ||
| 280 | -```bash | ||
| 281 | -curl -X POST http://localhost:6002/search/ \ | ||
| 282 | - -H "Content-Type: application/json" \ | ||
| 283 | - -d '{ | ||
| 284 | - "query": "玩具", | ||
| 285 | - "size": 10, | ||
| 286 | - "facets": [ | ||
| 287 | - "category_keyword", | ||
| 288 | - "vendor_keyword" | ||
| 289 | - ] | ||
| 290 | - }' | ||
| 291 | -``` | ||
| 292 | - | ||
| 293 | -### 6. 布尔表达式搜索 | ||
| 294 | - | ||
| 295 | -```bash | ||
| 296 | -curl -X POST http://localhost:6002/search/ \ | ||
| 297 | - -H "Content-Type: application/json" \ | ||
| 298 | - -d '{ | ||
| 299 | - "query": "玩具 AND (乐高 OR 芭比)", | ||
| 300 | - "size": 10 | ||
| 301 | - }' | ||
| 302 | -``` | ||
| 303 | - | ||
| 304 | -### 7. 图片搜索测试 | ||
| 305 | - | ||
| 306 | -```bash | ||
| 307 | -curl -X POST http://localhost:6002/search/image \ | ||
| 308 | - -H "Content-Type: application/json" \ | ||
| 309 | - -d '{ | ||
| 310 | - "image_url": "https://example.com/image.jpg", | ||
| 311 | - "size": 10 | ||
| 312 | - }' | ||
| 313 | -``` | ||
| 314 | - | ||
| 315 | -### 8. 前端界面测试 | ||
| 316 | - | ||
| 317 | -访问 http://localhost:6003 或 http://localhost:6002/ 进行可视化测试。 | ||
| 318 | - | ||
| 319 | ---- | ||
| 320 | - | ||
| 321 | -## 常见问题 | ||
| 322 | - | ||
| 323 | -### Q1: MySQL连接失败 | ||
| 324 | - | ||
| 325 | -**症状**: `Failed to connect to MySQL` | ||
| 326 | - | ||
| 327 | -**解决方案**: | ||
| 328 | -```bash | ||
| 329 | -# 检查MySQL服务状态 | ||
| 330 | -mysql -h 120.79.247.228 -P 3316 -u saas -p -e "SELECT 1" | ||
| 331 | - | ||
| 332 | -# 检查配置 | ||
| 333 | -cat .env | grep DB_ | ||
| 334 | -``` | ||
| 335 | - | ||
| 336 | -### Q2: Elasticsearch连接失败 | ||
| 337 | - | ||
| 338 | -**症状**: `Failed to connect to Elasticsearch` | ||
| 339 | - | ||
| 340 | -**解决方案**: | ||
| 341 | -```bash | ||
| 342 | -# 检查ES服务状态 | ||
| 343 | -curl http://localhost:9200 | ||
| 344 | - | ||
| 345 | -# 检查ES版本 | ||
| 346 | -curl http://localhost:9200 | grep version | ||
| 347 | - | ||
| 348 | -# 确认配置 | ||
| 349 | -cat .env | grep ES_ | ||
| 350 | -``` | ||
| 351 | - | ||
| 352 | -### Q3: 数据导入失败 | ||
| 353 | - | ||
| 354 | -**症状**: `Error during data ingestion` | ||
| 355 | - | ||
| 356 | -**解决方案**: | ||
| 357 | -```bash | ||
| 358 | -# 检查MySQL数据是否存在 | ||
| 359 | -mysql -h 120.79.247.228 -P 3316 -u saas -p saas -e "SELECT COUNT(*) FROM shoplazza_product_spu WHERE tenant_id=1" | ||
| 360 | - | ||
| 361 | -# 检查ES索引是否存在 | ||
| 362 | -curl http://localhost:9200/search_products | ||
| 363 | - | ||
| 364 | -# 查看详细错误日志 | ||
| 365 | -python scripts/ingest_shoplazza.py --tenant-id 1 --recreate | ||
| 366 | -``` | ||
| 367 | - | ||
| 368 | -### Q4: 服务启动失败 | ||
| 369 | - | ||
| 370 | -**症状**: `Address already in use` 或端口被占用 | ||
| 371 | - | ||
| 372 | -**解决方案**: | ||
| 373 | -```bash | ||
| 374 | -# 查看占用端口的进程 | ||
| 375 | -lsof -i :6002 # 后端 | ||
| 376 | -lsof -i :6003 # 前端 | ||
| 377 | -lsof -i :9200 # ES | ||
| 378 | - | ||
| 379 | -# 杀掉进程 | ||
| 380 | -kill -9 <PID> | ||
| 381 | - | ||
| 382 | -# 或修改端口配置 | ||
| 383 | -``` | ||
| 384 | - | ||
| 385 | -### Q5: 模型下载慢或失败 | ||
| 386 | - | ||
| 387 | -**症状**: 首次运行时模型下载很慢或超时 | ||
| 388 | - | ||
| 389 | -**解决方案**: | ||
| 390 | -```bash | ||
| 391 | -# 跳过embedding快速测试 | ||
| 392 | -./scripts/ingest.sh 1 true | ||
| 393 | - | ||
| 394 | -# 或手动下载模型到指定目录 | ||
| 395 | -# TEXT_MODEL_DIR=/data/tw/models/bge-m3 | ||
| 396 | -# IMAGE_MODEL_DIR=/data/tw/models/cn-clip | ||
| 397 | -``` | ||
| 398 | - | ||
| 399 | -### Q6: 搜索无结果 | ||
| 400 | - | ||
| 401 | -**症状**: 搜索返回空结果 | ||
| 402 | - | ||
| 403 | -**解决方案**: | ||
| 404 | -```bash | ||
| 405 | -# 检查ES中是否有数据 | ||
| 406 | -curl http://localhost:9200/search_products/_count | ||
| 407 | - | ||
| 408 | -# 检查tenant_id过滤是否正确 | ||
| 409 | -curl -X POST http://localhost:6002/search/ \ | ||
| 410 | - -H "Content-Type: application/json" \ | ||
| 411 | - -d '{"query": "*", "size": 10, "debug": true}' | ||
| 412 | - | ||
| 413 | -# 查看调试信息 | ||
| 414 | -``` | ||
| 415 | - | ||
| 416 | -### Q7: 向量计算很慢 | ||
| 417 | - | ||
| 418 | -**症状**: 生成embedding很慢 | ||
| 419 | - | ||
| 420 | -**解决方案**: | ||
| 421 | -- 使用GPU加速(如果可用) | ||
| 422 | -- 减少批量大小 | ||
| 423 | -- 启用向量缓存 | ||
| 424 | -- 首次导入时跳过embedding,后续再生成 | ||
| 425 | - | ||
| 426 | ---- | ||
| 427 | - | ||
| 428 | -## 下一步 | ||
| 429 | - | ||
| 430 | -1. **查看API文档**: 参考 `API_INTEGRATION_GUIDE.md` 了解完整的API接口 | ||
| 431 | -2. **查看字段说明**: 参考 `INDEX_FIELDS_DOCUMENTATION.md` 了解索引字段 | ||
| 432 | -3. **查看设计文档**: 参考 `设计文档.md` 了解系统架构 | ||
| 433 | -4. **自定义配置**: 编辑 `config/config.yaml` 调整搜索配置 | ||
| 434 | - | ||
| 435 | ---- | ||
| 436 | - | ||
| 437 | -## 相关文档 | ||
| 438 | - | ||
| 439 | -- **API接口文档**: `API_INTEGRATION_GUIDE.md` - 完整的API对接指南 | ||
| 440 | -- **字段说明文档**: `INDEX_FIELDS_DOCUMENTATION.md` - 索引字段详细说明 | ||
| 441 | -- **设计文档**: `设计文档.md` - 系统架构和设计说明 | ||
| 442 | -- **README**: `README.md` - 项目概述和快速开始 | ||
| 443 | - | ||
| 444 | ---- | ||
| 445 | - | ||
| 446 | -**文档版本**: v1.0 | ||
| 447 | -**最后更新**: 2024-12 | ||
| 448 | - |