Commit 115047eef51c78b2081884845d157a4651f79cdf
1 parent
d79810d5
为一个租户灌入测试数据;实例的启动代码(包括前后端)
Showing
11 changed files
with
1569 additions
and
0 deletions
Show diff stats
| ... | ... | @@ -0,0 +1,317 @@ |
| 1 | +# 部署检查清单 | |
| 2 | + | |
| 3 | +## 完成情况 | |
| 4 | + | |
| 5 | +✅ **环境配置** | |
| 6 | +- [x] .env 配置文件(包含ES、Redis、DeepL配置) | |
| 7 | +- [x] environment.yml (conda环境配置) | |
| 8 | +- [x] config/env_config.py (统一配置管理) | |
| 9 | +- [x] 代码已更新使用新配置 | |
| 10 | + | |
| 11 | +✅ **启动脚本** | |
| 12 | +- [x] setup.sh (环境设置) | |
| 13 | +- [x] start_all.sh (一键启动) | |
| 14 | +- [x] scripts/ingest.sh (数据导入) | |
| 15 | +- [x] scripts/start_backend.sh (后端服务) | |
| 16 | +- [x] scripts/start_frontend.sh (前端服务) | |
| 17 | + | |
| 18 | +✅ **Web前端** | |
| 19 | +- [x] HTML界面 (frontend/index.html) | |
| 20 | +- [x] CSS样式 (frontend/static/css/style.css) | |
| 21 | +- [x] JavaScript逻辑 (frontend/static/js/app.js) | |
| 22 | +- [x] 前端服务器 (scripts/frontend_server.py) | |
| 23 | + | |
| 24 | +✅ **文档** | |
| 25 | +- [x] USER_GUIDE.md (用户指南) | |
| 26 | +- [x] README.md (项目说明) | |
| 27 | +- [x] QUICKSTART.md (快速开始) | |
| 28 | +- [x] IMPLEMENTATION_SUMMARY.md (实现总结) | |
| 29 | + | |
| 30 | +## 使用步骤 | |
| 31 | + | |
| 32 | +### 方式1: 一键启动(推荐) | |
| 33 | + | |
| 34 | +```bash | |
| 35 | +cd /data/tw/SearchEngine | |
| 36 | +./start_all.sh | |
| 37 | +``` | |
| 38 | + | |
| 39 | +然后访问: http://localhost:8080 | |
| 40 | + | |
| 41 | +### 方式2: 分步启动 | |
| 42 | + | |
| 43 | +```bash | |
| 44 | +# 1. 环境设置 | |
| 45 | +./setup.sh | |
| 46 | + | |
| 47 | +# 2. 导入数据(1000条,快速测试) | |
| 48 | +./scripts/ingest.sh 1000 true | |
| 49 | + | |
| 50 | +# 3. 启动后端(新终端) | |
| 51 | +./scripts/start_backend.sh | |
| 52 | + | |
| 53 | +# 4. 启动前端(新终端) | |
| 54 | +./scripts/start_frontend.sh | |
| 55 | +``` | |
| 56 | + | |
| 57 | +## 系统要求 | |
| 58 | + | |
| 59 | +### 软件要求 | |
| 60 | +- [x] Python 3.10 | |
| 61 | +- [x] Conda (Miniconda/Anaconda) | |
| 62 | +- [x] Elasticsearch 8.18 | |
| 63 | +- [ ] CUDA (可选,用于GPU加速) | |
| 64 | + | |
| 65 | +### 配置要求 | |
| 66 | +- [x] ES连接信息 | |
| 67 | +- [x] DeepL API Key | |
| 68 | +- [ ] Redis连接(可选) | |
| 69 | + | |
| 70 | +### 硬件要求 | |
| 71 | +- 内存: 建议8GB+ | |
| 72 | +- 磁盘: 10GB+ (包含模型文件) | |
| 73 | +- GPU: 可选(加速embedding生成) | |
| 74 | + | |
| 75 | +## 配置信息 | |
| 76 | + | |
| 77 | +当前配置(.env文件): | |
| 78 | +``` | |
| 79 | +ES_HOST=http://localhost:9200 | |
| 80 | +ES_USERNAME=essa | |
| 81 | +ES_PASSWORD=4hOaLaf41y2VuI8y | |
| 82 | + | |
| 83 | +REDIS_HOST=localhost | |
| 84 | +REDIS_PORT=6479 | |
| 85 | +REDIS_PASSWORD=BMfv5aI31kgHWtlx | |
| 86 | + | |
| 87 | +DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | |
| 88 | + | |
| 89 | +CUSTOMER_ID=customer1 | |
| 90 | +API_HOST=0.0.0.0 | |
| 91 | +API_PORT=8000 | |
| 92 | +``` | |
| 93 | + | |
| 94 | +## 服务端口 | |
| 95 | + | |
| 96 | +| 服务 | 端口 | URL | | |
| 97 | +|------|------|-----| | |
| 98 | +| Elasticsearch | 9200 | http://localhost:9200 | | |
| 99 | +| Backend API | 8000 | http://localhost:8000 | | |
| 100 | +| Frontend Web | 8080 | http://localhost:8080 | | |
| 101 | +| API Docs | 8000 | http://localhost:8000/docs | | |
| 102 | + | |
| 103 | +## 测试流程 | |
| 104 | + | |
| 105 | +### 1. 环境测试 | |
| 106 | + | |
| 107 | +```bash | |
| 108 | +# 激活环境 | |
| 109 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 110 | +conda activate searchengine | |
| 111 | + | |
| 112 | +# 检查Python | |
| 113 | +python --version # 应该显示 Python 3.10.x | |
| 114 | + | |
| 115 | +# 检查配置 | |
| 116 | +python -c "from config.env_config import print_config; print_config()" | |
| 117 | +``` | |
| 118 | + | |
| 119 | +### 2. ES连接测试 | |
| 120 | + | |
| 121 | +```bash | |
| 122 | +# 直接测试 | |
| 123 | +curl http://localhost:9200 -u essa:4hOaLaf41y2VuI8y | |
| 124 | + | |
| 125 | +# Python测试 | |
| 126 | +python -c " | |
| 127 | +from config.env_config import get_es_config | |
| 128 | +from utils.es_client import ESClient | |
| 129 | +es_config = get_es_config() | |
| 130 | +client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) | |
| 131 | +print('ES Connected:', client.ping()) | |
| 132 | +" | |
| 133 | +``` | |
| 134 | + | |
| 135 | +### 3. 数据导入测试 | |
| 136 | + | |
| 137 | +```bash | |
| 138 | +# 导入100条测试数据(跳过embedding以加快速度) | |
| 139 | +./scripts/ingest.sh 100 true | |
| 140 | + | |
| 141 | +# 检查导入结果 | |
| 142 | +python -c " | |
| 143 | +from config.env_config import get_es_config | |
| 144 | +from utils.es_client import ESClient | |
| 145 | +from config import ConfigLoader | |
| 146 | + | |
| 147 | +es_config = get_es_config() | |
| 148 | +es_client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) | |
| 149 | +config = ConfigLoader('config/schema').load_customer_config('customer1') | |
| 150 | +count = es_client.count(config.es_index_name) | |
| 151 | +print(f'Documents in index: {count}') | |
| 152 | +" | |
| 153 | +``` | |
| 154 | + | |
| 155 | +### 4. API测试 | |
| 156 | + | |
| 157 | +```bash | |
| 158 | +# 启动后端(后台) | |
| 159 | +nohup ./scripts/start_backend.sh > logs/backend.log 2>&1 & | |
| 160 | + | |
| 161 | +# 等待启动 | |
| 162 | +sleep 5 | |
| 163 | + | |
| 164 | +# 测试健康检查 | |
| 165 | +curl http://localhost:8000/admin/health | |
| 166 | + | |
| 167 | +# 测试搜索 | |
| 168 | +curl -X POST http://localhost:8000/search/ \ | |
| 169 | + -H "Content-Type: application/json" \ | |
| 170 | + -d '{"query": "消防", "size": 5}' | |
| 171 | +``` | |
| 172 | + | |
| 173 | +### 5. 前端测试 | |
| 174 | + | |
| 175 | +```bash | |
| 176 | +# 启动前端 | |
| 177 | +./scripts/start_frontend.sh | |
| 178 | + | |
| 179 | +# 然后在浏览器访问: http://localhost:8080 | |
| 180 | +``` | |
| 181 | + | |
| 182 | +## 故障排除 | |
| 183 | + | |
| 184 | +### 问题1: conda环境创建失败 | |
| 185 | + | |
| 186 | +**症状**: `conda env create` 报错 | |
| 187 | + | |
| 188 | +**解决**: | |
| 189 | +```bash | |
| 190 | +# 检查conda版本 | |
| 191 | +conda --version | |
| 192 | + | |
| 193 | +# 更新conda | |
| 194 | +conda update -n base conda | |
| 195 | + | |
| 196 | +# 重试创建 | |
| 197 | +conda env create -f environment.yml | |
| 198 | +``` | |
| 199 | + | |
| 200 | +### 问题2: ES连接失败 | |
| 201 | + | |
| 202 | +**症状**: `Failed to connect to Elasticsearch` | |
| 203 | + | |
| 204 | +**解决**: | |
| 205 | +```bash | |
| 206 | +# 检查ES状态 | |
| 207 | +curl http://localhost:9200 -u essa:4hOaLaf41y2VuI8y | |
| 208 | + | |
| 209 | +# 检查ES版本 | |
| 210 | +curl http://localhost:9200 -u essa:4hOaLaf41y2VuI8y | grep version | |
| 211 | + | |
| 212 | +# 确认配置 | |
| 213 | +cat .env | grep ES_ | |
| 214 | +``` | |
| 215 | + | |
| 216 | +### 问题3: 模型下载慢 | |
| 217 | + | |
| 218 | +**症状**: 首次运行时模型下载很慢或超时 | |
| 219 | + | |
| 220 | +**解决**: | |
| 221 | +```bash | |
| 222 | +# 跳过embedding快速测试 | |
| 223 | +./scripts/ingest.sh 1000 true | |
| 224 | + | |
| 225 | +# 或手动下载模型到指定目录 | |
| 226 | +# TEXT_MODEL_DIR=/data/tw/models/bge-m3 | |
| 227 | +# IMAGE_MODEL_DIR=/data/tw/models/cn-clip | |
| 228 | +``` | |
| 229 | + | |
| 230 | +### 问题4: 端口被占用 | |
| 231 | + | |
| 232 | +**症状**: `Address already in use` | |
| 233 | + | |
| 234 | +**解决**: | |
| 235 | +```bash | |
| 236 | +# 查看占用端口的进程 | |
| 237 | +lsof -i :8000 # 后端 | |
| 238 | +lsof -i :8080 # 前端 | |
| 239 | + | |
| 240 | +# 杀掉进程 | |
| 241 | +kill -9 <PID> | |
| 242 | + | |
| 243 | +# 或修改.env中的端口 | |
| 244 | +``` | |
| 245 | + | |
| 246 | +## 下一步 | |
| 247 | + | |
| 248 | +1. **测试搜索功能** | |
| 249 | + - 打开 http://localhost:8080 | |
| 250 | + - 尝试不同的搜索查询 | |
| 251 | + - 测试布尔操作符 | |
| 252 | + | |
| 253 | +2. **查看API文档** | |
| 254 | + - 访问 http://localhost:8000/docs | |
| 255 | + - 了解所有可用的API端点 | |
| 256 | + | |
| 257 | +3. **自定义配置** | |
| 258 | + - 编辑 `config/schema/customer1_config.yaml` | |
| 259 | + - 添加查询重写规则 | |
| 260 | + - 调整ranking表达式 | |
| 261 | + | |
| 262 | +4. **导入更多数据** | |
| 263 | + - 导入完整的10000条数据 | |
| 264 | + - 生成embeddings以启用语义搜索 | |
| 265 | + | |
| 266 | +5. **性能优化** | |
| 267 | + - 启用Redis缓存 | |
| 268 | + - 调整ES分片数量 | |
| 269 | + - 使用GPU加速 | |
| 270 | + | |
| 271 | +## 项目文件清单 | |
| 272 | + | |
| 273 | +**新增文件 (环境和启动相关)**: | |
| 274 | +- [x] .env - 环境配置 | |
| 275 | +- [x] .env.example - 配置模板 | |
| 276 | +- [x] environment.yml - Conda环境 | |
| 277 | +- [x] config/env_config.py - 配置管理 | |
| 278 | +- [x] setup.sh - 环境设置 | |
| 279 | +- [x] start_all.sh - 一键启动 | |
| 280 | +- [x] scripts/ingest.sh - 数据导入 | |
| 281 | +- [x] scripts/start_backend.sh - 后端启动 | |
| 282 | +- [x] scripts/start_frontend.sh - 前端启动 | |
| 283 | +- [x] scripts/frontend_server.py - 前端服务器 | |
| 284 | +- [x] frontend/index.html - 前端页面 | |
| 285 | +- [x] frontend/static/css/style.css - 样式 | |
| 286 | +- [x] frontend/static/js/app.js - 前端逻辑 | |
| 287 | +- [x] USER_GUIDE.md - 用户指南 | |
| 288 | +- [x] DEPLOYMENT.md - 本文件 | |
| 289 | + | |
| 290 | +**总计**: 14个新文件 | |
| 291 | + | |
| 292 | +## 验证完成 | |
| 293 | + | |
| 294 | +运行以下命令验证所有文件都已创建: | |
| 295 | + | |
| 296 | +```bash | |
| 297 | +cd /data/tw/SearchEngine | |
| 298 | + | |
| 299 | +# 检查关键文件 | |
| 300 | +ls -la .env setup.sh start_all.sh | |
| 301 | +ls -la scripts/*.sh scripts/*.py | |
| 302 | +ls -la frontend/index.html | |
| 303 | +ls -la frontend/static/css/style.css | |
| 304 | +ls -la frontend/static/js/app.js | |
| 305 | + | |
| 306 | +# 检查可执行权限 | |
| 307 | +ls -l *.sh scripts/*.sh | grep "x" | |
| 308 | +``` | |
| 309 | + | |
| 310 | +所有文件应该都存在且脚本有执行权限。 | |
| 311 | + | |
| 312 | +## 支持联系 | |
| 313 | + | |
| 314 | +如有问题,请检查: | |
| 315 | +1. logs/backend.log - 后端日志 | |
| 316 | +2. 浏览器控制台 - 前端错误 | |
| 317 | +3. USER_GUIDE.md - 详细使用说明 | ... | ... |
| ... | ... | @@ -0,0 +1,343 @@ |
| 1 | +# 使用指南 - SearchEngine | |
| 2 | + | |
| 3 | +## 快速启动(推荐) | |
| 4 | + | |
| 5 | +### 一键启动所有服务 | |
| 6 | + | |
| 7 | +```bash | |
| 8 | +cd /data/tw/SearchEngine | |
| 9 | +./start_all.sh | |
| 10 | +``` | |
| 11 | + | |
| 12 | +这个脚本会自动完成: | |
| 13 | +1. 设置conda环境 | |
| 14 | +2. 检查并导入测试数据(如果需要) | |
| 15 | +3. 启动后端API服务(后台运行) | |
| 16 | +4. 启动前端Web界面 | |
| 17 | + | |
| 18 | +启动完成后,访问: | |
| 19 | +- **前端界面**: http://localhost:8080 | |
| 20 | +- **后端API**: http://localhost:8000 | |
| 21 | +- **API文档**: http://localhost:8000/docs | |
| 22 | + | |
| 23 | +### 停止服务 | |
| 24 | + | |
| 25 | +```bash | |
| 26 | +# 停止后端 | |
| 27 | +kill $(cat logs/backend.pid) | |
| 28 | + | |
| 29 | +# 前端按 Ctrl+C | |
| 30 | +``` | |
| 31 | + | |
| 32 | +--- | |
| 33 | + | |
| 34 | +## 分步启动(自定义) | |
| 35 | + | |
| 36 | +### 1. 环境设置 | |
| 37 | + | |
| 38 | +```bash | |
| 39 | +cd /data/tw/SearchEngine | |
| 40 | +./setup.sh | |
| 41 | +``` | |
| 42 | + | |
| 43 | +这会: | |
| 44 | +- 创建/激活conda环境 `searchengine` | |
| 45 | +- 加载配置文件 | |
| 46 | +- 检查Elasticsearch连接 | |
| 47 | + | |
| 48 | +### 2. 数据导入 | |
| 49 | + | |
| 50 | +#### 快速测试(1000条,不生成embedding) | |
| 51 | +```bash | |
| 52 | +./scripts/ingest.sh 1000 true | |
| 53 | +``` | |
| 54 | + | |
| 55 | +#### 完整导入(10000条,包含embedding) | |
| 56 | +```bash | |
| 57 | +./scripts/ingest.sh 10000 false | |
| 58 | +``` | |
| 59 | + | |
| 60 | +**注意**: 首次运行会下载模型文件(BGE-M3和CN-CLIP),大约需要10-30分钟。 | |
| 61 | + | |
| 62 | +### 3. 启动后端 | |
| 63 | + | |
| 64 | +```bash | |
| 65 | +./scripts/start_backend.sh | |
| 66 | +``` | |
| 67 | + | |
| 68 | +后端API会在 http://localhost:8000 启动 | |
| 69 | + | |
| 70 | +### 4. 启动前端 | |
| 71 | + | |
| 72 | +```bash | |
| 73 | +./scripts/start_frontend.sh | |
| 74 | +``` | |
| 75 | + | |
| 76 | +前端界面会在 http://localhost:8080 启动 | |
| 77 | + | |
| 78 | +--- | |
| 79 | + | |
| 80 | +## 配置说明 | |
| 81 | + | |
| 82 | +### 环境配置文件 (.env) | |
| 83 | + | |
| 84 | +```bash | |
| 85 | +# Elasticsearch配置 | |
| 86 | +ES_HOST=http://localhost:9200 | |
| 87 | +ES_USERNAME=essa | |
| 88 | +ES_PASSWORD=4hOaLaf41y2VuI8y | |
| 89 | + | |
| 90 | +# Redis配置(可选,用于缓存) | |
| 91 | +REDIS_HOST=localhost | |
| 92 | +REDIS_PORT=6479 | |
| 93 | +REDIS_PASSWORD=BMfv5aI31kgHWtlx | |
| 94 | + | |
| 95 | +# DeepL翻译API | |
| 96 | +DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed | |
| 97 | + | |
| 98 | +# 客户配置 | |
| 99 | +CUSTOMER_ID=customer1 | |
| 100 | + | |
| 101 | +# API服务配置 | |
| 102 | +API_HOST=0.0.0.0 | |
| 103 | +API_PORT=8000 | |
| 104 | +``` | |
| 105 | + | |
| 106 | +### 修改配置 | |
| 107 | + | |
| 108 | +1. 编辑 `.env` 文件 | |
| 109 | +2. 重启相关服务 | |
| 110 | + | |
| 111 | +--- | |
| 112 | + | |
| 113 | +## 使用Web界面 | |
| 114 | + | |
| 115 | +### 搜索功能 | |
| 116 | + | |
| 117 | +1. **简单搜索**: 直接输入关键词 | |
| 118 | + - 中文: "芭比娃娃" | |
| 119 | + - 英文: "fire control set" | |
| 120 | + - 俄文: "Наборы для пожаротушения" | |
| 121 | + | |
| 122 | +2. **布尔搜索**: 使用操作符 | |
| 123 | + - AND: "toy AND barbie" | |
| 124 | + - OR: "barbie OR doll" | |
| 125 | + - ANDNOT: "toy ANDNOT cheap" | |
| 126 | + - 组合: "toy AND (barbie OR doll) ANDNOT cheap" | |
| 127 | + | |
| 128 | +3. **域搜索**: 指定搜索域 | |
| 129 | + - 品牌: "brand:ZHU LIN" | |
| 130 | + - 类别: "category:玩具" | |
| 131 | + | |
| 132 | +### 搜索选项 | |
| 133 | + | |
| 134 | +- **启用翻译**: 自动翻译查询到其他语言 | |
| 135 | +- **启用语义搜索**: 使用embedding进行语义匹配 | |
| 136 | +- **启用自定义排序**: 使用配置的ranking表达式 | |
| 137 | +- **结果数量**: 10/20/50条 | |
| 138 | + | |
| 139 | +--- | |
| 140 | + | |
| 141 | +## API使用 | |
| 142 | + | |
| 143 | +### 搜索接口 | |
| 144 | + | |
| 145 | +```bash | |
| 146 | +curl -X POST http://localhost:8000/search/ \ | |
| 147 | + -H "Content-Type: application/json" \ | |
| 148 | + -d '{ | |
| 149 | + "query": "芭比娃娃", | |
| 150 | + "size": 10, | |
| 151 | + "enable_translation": true, | |
| 152 | + "enable_embedding": true | |
| 153 | + }' | |
| 154 | +``` | |
| 155 | + | |
| 156 | +### 图片搜索 | |
| 157 | + | |
| 158 | +```bash | |
| 159 | +curl -X POST http://localhost:8000/search/image \ | |
| 160 | + -H "Content-Type: application/json" \ | |
| 161 | + -d '{ | |
| 162 | + "image_url": "https://oss.essa.cn/example.jpg", | |
| 163 | + "size": 10 | |
| 164 | + }' | |
| 165 | +``` | |
| 166 | + | |
| 167 | +### 健康检查 | |
| 168 | + | |
| 169 | +```bash | |
| 170 | +curl http://localhost:8000/admin/health | |
| 171 | +``` | |
| 172 | + | |
| 173 | +### 查看配置 | |
| 174 | + | |
| 175 | +```bash | |
| 176 | +curl http://localhost:8000/admin/config | |
| 177 | +``` | |
| 178 | + | |
| 179 | +### 索引统计 | |
| 180 | + | |
| 181 | +```bash | |
| 182 | +curl http://localhost:8000/admin/stats | |
| 183 | +``` | |
| 184 | + | |
| 185 | +--- | |
| 186 | + | |
| 187 | +## 常见问题 | |
| 188 | + | |
| 189 | +### 1. Elasticsearch连接失败 | |
| 190 | + | |
| 191 | +**问题**: `Failed to connect to Elasticsearch` | |
| 192 | + | |
| 193 | +**解决**: | |
| 194 | +```bash | |
| 195 | +# 检查ES是否运行 | |
| 196 | +curl http://localhost:9200 | |
| 197 | + | |
| 198 | +# 检查配置 | |
| 199 | +cat .env | grep ES_ | |
| 200 | +``` | |
| 201 | + | |
| 202 | +### 2. 导入数据时内存不足 | |
| 203 | + | |
| 204 | +**问题**: `Out of memory` | |
| 205 | + | |
| 206 | +**解决**: | |
| 207 | +```bash | |
| 208 | +# 减少batch size或跳过embedding | |
| 209 | +./scripts/ingest.sh 1000 true | |
| 210 | +``` | |
| 211 | + | |
| 212 | +### 3. 模型下载失败 | |
| 213 | + | |
| 214 | +**问题**: 模型文件下载超时 | |
| 215 | + | |
| 216 | +**解决**: | |
| 217 | +- 检查网络连接 | |
| 218 | +- 使用国内镜像源 | |
| 219 | +- 手动下载模型到指定目录 | |
| 220 | + | |
| 221 | +### 4. 翻译不工作 | |
| 222 | + | |
| 223 | +**问题**: 翻译返回原文 | |
| 224 | + | |
| 225 | +**解决**: | |
| 226 | +- 检查DEEPL_AUTH_KEY是否正确 | |
| 227 | +- 如果没有API key,系统会使用mock模式(返回原文) | |
| 228 | + | |
| 229 | +### 5. 前端无法连接后端 | |
| 230 | + | |
| 231 | +**问题**: CORS错误 | |
| 232 | + | |
| 233 | +**解决**: | |
| 234 | +- 确保后端在 http://localhost:8000 运行 | |
| 235 | +- 检查浏览器控制台错误信息 | |
| 236 | + | |
| 237 | +--- | |
| 238 | + | |
| 239 | +## 开发和调试 | |
| 240 | + | |
| 241 | +### 查看日志 | |
| 242 | + | |
| 243 | +```bash | |
| 244 | +# 后端日志 | |
| 245 | +tail -f logs/backend.log | |
| 246 | + | |
| 247 | +# 实时日志(如果前台运行) | |
| 248 | +./scripts/start_backend.sh | |
| 249 | +``` | |
| 250 | + | |
| 251 | +### Python命令行测试 | |
| 252 | + | |
| 253 | +```bash | |
| 254 | +# 激活环境 | |
| 255 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 256 | +conda activate searchengine | |
| 257 | + | |
| 258 | +# 测试搜索 | |
| 259 | +python -c " | |
| 260 | +from config import ConfigLoader | |
| 261 | +from utils import ESClient | |
| 262 | +from search import Searcher | |
| 263 | +from config.env_config import get_es_config | |
| 264 | + | |
| 265 | +config_loader = ConfigLoader('config/schema') | |
| 266 | +config = config_loader.load_customer_config('customer1') | |
| 267 | + | |
| 268 | +es_config = get_es_config() | |
| 269 | +es_client = ESClient(hosts=[es_config['host']], | |
| 270 | + username=es_config.get('username'), | |
| 271 | + password=es_config.get('password')) | |
| 272 | + | |
| 273 | +searcher = Searcher(config, es_client) | |
| 274 | +result = searcher.search('芭比娃娃', size=5) | |
| 275 | + | |
| 276 | +print(f'找到 {result.total} 个结果') | |
| 277 | +for hit in result.hits: | |
| 278 | + print(f' - {hit[\"_source\"][\"name\"]} (分数: {hit[\"_score\"]:.4f})') | |
| 279 | +" | |
| 280 | +``` | |
| 281 | + | |
| 282 | +### 重新导入数据 | |
| 283 | + | |
| 284 | +```bash | |
| 285 | +# 删除现有索引并重新导入 | |
| 286 | +./scripts/ingest.sh 1000 true | |
| 287 | +``` | |
| 288 | + | |
| 289 | +--- | |
| 290 | + | |
| 291 | +## 性能优化 | |
| 292 | + | |
| 293 | +### 1. 使用embedding缓存 | |
| 294 | + | |
| 295 | +首次生成embedding后会自动缓存到 `.cache/` 目录,后续导入会更快。 | |
| 296 | + | |
| 297 | +### 2. 批量大小调整 | |
| 298 | + | |
| 299 | +```bash | |
| 300 | +# 修改批量大小(在ingest_customer1.py中) | |
| 301 | +--batch-size 200 # 默认100 | |
| 302 | +``` | |
| 303 | + | |
| 304 | +### 3. GPU加速 | |
| 305 | + | |
| 306 | +确保CUDA可用以加速embedding生成: | |
| 307 | +```bash | |
| 308 | +python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" | |
| 309 | +``` | |
| 310 | + | |
| 311 | +--- | |
| 312 | + | |
| 313 | +## 项目结构 | |
| 314 | + | |
| 315 | +``` | |
| 316 | +SearchEngine/ | |
| 317 | +├── .env # 环境配置 | |
| 318 | +├── setup.sh # 环境设置脚本 | |
| 319 | +├── start_all.sh # 一键启动脚本 | |
| 320 | +├── scripts/ # 运行脚本 | |
| 321 | +│ ├── ingest.sh # 数据导入 | |
| 322 | +│ ├── start_backend.sh # 启动后端 | |
| 323 | +│ └── start_frontend.sh # 启动前端 | |
| 324 | +├── frontend/ # Web前端 | |
| 325 | +│ ├── index.html | |
| 326 | +│ └── static/ | |
| 327 | +├── logs/ # 日志文件 | |
| 328 | +├── config/ # 配置模块 | |
| 329 | +├── indexer/ # 数据导入 | |
| 330 | +├── query/ # 查询处理 | |
| 331 | +├── search/ # 搜索引擎 | |
| 332 | +├── embeddings/ # 向量模型 | |
| 333 | +└── api/ # REST API | |
| 334 | +``` | |
| 335 | + | |
| 336 | +--- | |
| 337 | + | |
| 338 | +## 支持 | |
| 339 | + | |
| 340 | +遇到问题请查看: | |
| 341 | +- **日志**: `logs/backend.log` | |
| 342 | +- **API文档**: http://localhost:8000/docs | |
| 343 | +- **配置**: `config/schema/customer1_config.yaml` | ... | ... |
| ... | ... | @@ -0,0 +1,59 @@ |
| 1 | +<!DOCTYPE html> | |
| 2 | +<html lang="zh-CN"> | |
| 3 | +<head> | |
| 4 | + <meta charset="UTF-8"> | |
| 5 | + <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| 6 | + <title>电商搜索引擎 - SearchEngine Demo</title> | |
| 7 | + <link rel="stylesheet" href="/static/css/style.css"> | |
| 8 | +</head> | |
| 9 | +<body> | |
| 10 | + <div class="container"> | |
| 11 | + <header> | |
| 12 | + <h1>🔍 电商搜索引擎</h1> | |
| 13 | + <p class="subtitle">E-Commerce Search Engine - Customer1 Demo</p> | |
| 14 | + </header> | |
| 15 | + | |
| 16 | + <div class="search-section"> | |
| 17 | + <div class="search-box"> | |
| 18 | + <input type="text" id="searchInput" placeholder="输入搜索关键词... (支持中文、英文、俄文)" | |
| 19 | + onkeypress="handleKeyPress(event)"> | |
| 20 | + <button onclick="performSearch()" class="search-button">搜索</button> | |
| 21 | + </div> | |
| 22 | + | |
| 23 | + <div class="search-options"> | |
| 24 | + <label><input type="checkbox" id="enableTranslation" checked> 启用翻译</label> | |
| 25 | + <label><input type="checkbox" id="enableEmbedding" checked> 启用语义搜索</label> | |
| 26 | + <label><input type="checkbox" id="enableRerank" checked> 启用自定义排序</label> | |
| 27 | + <select id="resultSize"> | |
| 28 | + <option value="10">10条结果</option> | |
| 29 | + <option value="20">20条结果</option> | |
| 30 | + <option value="50">50条结果</option> | |
| 31 | + </select> | |
| 32 | + </div> | |
| 33 | + | |
| 34 | + <div class="search-examples"> | |
| 35 | + <strong>搜索示例:</strong> | |
| 36 | + <button class="example-btn" onclick="setQuery('芭比娃娃')">芭比娃娃</button> | |
| 37 | + <button class="example-btn" onclick="setQuery('toy AND (barbie OR doll)')">布尔查询</button> | |
| 38 | + <button class="example-btn" onclick="setQuery('消防')">消防</button> | |
| 39 | + <button class="example-btn" onclick="setQuery('fire control set')">英文查询</button> | |
| 40 | + </div> | |
| 41 | + </div> | |
| 42 | + | |
| 43 | + <div id="loading" class="loading" style="display: none;"> | |
| 44 | + <div class="spinner"></div> | |
| 45 | + <p>搜索中...</p> | |
| 46 | + </div> | |
| 47 | + | |
| 48 | + <div id="results" class="results-section"></div> | |
| 49 | + | |
| 50 | + <div id="queryInfo" class="query-info"></div> | |
| 51 | + </div> | |
| 52 | + | |
| 53 | + <footer> | |
| 54 | + <p>SearchEngine © 2025 | API服务地址: <span id="apiUrl">http://localhost:8000</span></p> | |
| 55 | + </footer> | |
| 56 | + | |
| 57 | + <script src="/static/js/app.js"></script> | |
| 58 | +</body> | |
| 59 | +</html> | ... | ... |
| ... | ... | @@ -0,0 +1,282 @@ |
| 1 | +/* SearchEngine Frontend Styles */ | |
| 2 | + | |
| 3 | +* { | |
| 4 | + margin: 0; | |
| 5 | + padding: 0; | |
| 6 | + box-sizing: border-box; | |
| 7 | +} | |
| 8 | + | |
| 9 | +body { | |
| 10 | + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Microsoft YaHei", sans-serif; | |
| 11 | + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| 12 | + min-height: 100vh; | |
| 13 | + padding: 20px; | |
| 14 | +} | |
| 15 | + | |
| 16 | +.container { | |
| 17 | + max-width: 1200px; | |
| 18 | + margin: 0 auto; | |
| 19 | +} | |
| 20 | + | |
| 21 | +header { | |
| 22 | + text-align: center; | |
| 23 | + color: white; | |
| 24 | + margin-bottom: 40px; | |
| 25 | +} | |
| 26 | + | |
| 27 | +header h1 { | |
| 28 | + font-size: 3em; | |
| 29 | + margin-bottom: 10px; | |
| 30 | +} | |
| 31 | + | |
| 32 | +.subtitle { | |
| 33 | + font-size: 1.2em; | |
| 34 | + opacity: 0.9; | |
| 35 | +} | |
| 36 | + | |
| 37 | +.search-section { | |
| 38 | + background: white; | |
| 39 | + border-radius: 15px; | |
| 40 | + padding: 30px; | |
| 41 | + box-shadow: 0 10px 40px rgba(0,0,0,0.2); | |
| 42 | + margin-bottom: 30px; | |
| 43 | +} | |
| 44 | + | |
| 45 | +.search-box { | |
| 46 | + display: flex; | |
| 47 | + gap: 10px; | |
| 48 | + margin-bottom: 20px; | |
| 49 | +} | |
| 50 | + | |
| 51 | +#searchInput { | |
| 52 | + flex: 1; | |
| 53 | + padding: 15px 20px; | |
| 54 | + font-size: 16px; | |
| 55 | + border: 2px solid #e0e0e0; | |
| 56 | + border-radius: 10px; | |
| 57 | + transition: border-color 0.3s; | |
| 58 | +} | |
| 59 | + | |
| 60 | +#searchInput:focus { | |
| 61 | + outline: none; | |
| 62 | + border-color: #667eea; | |
| 63 | +} | |
| 64 | + | |
| 65 | +.search-button { | |
| 66 | + padding: 15px 40px; | |
| 67 | + font-size: 16px; | |
| 68 | + font-weight: bold; | |
| 69 | + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| 70 | + color: white; | |
| 71 | + border: none; | |
| 72 | + border-radius: 10px; | |
| 73 | + cursor: pointer; | |
| 74 | + transition: transform 0.2s; | |
| 75 | +} | |
| 76 | + | |
| 77 | +.search-button:hover { | |
| 78 | + transform: translateY(-2px); | |
| 79 | +} | |
| 80 | + | |
| 81 | +.search-options { | |
| 82 | + display: flex; | |
| 83 | + gap: 20px; | |
| 84 | + align-items: center; | |
| 85 | + margin-bottom: 15px; | |
| 86 | + flex-wrap: wrap; | |
| 87 | +} | |
| 88 | + | |
| 89 | +.search-options label { | |
| 90 | + display: flex; | |
| 91 | + align-items: center; | |
| 92 | + gap: 5px; | |
| 93 | + cursor: pointer; | |
| 94 | +} | |
| 95 | + | |
| 96 | +.search-options select { | |
| 97 | + padding: 5px 10px; | |
| 98 | + border: 2px solid #e0e0e0; | |
| 99 | + border-radius: 5px; | |
| 100 | + font-size: 14px; | |
| 101 | +} | |
| 102 | + | |
| 103 | +.search-examples { | |
| 104 | + padding: 15px; | |
| 105 | + background: #f5f5f5; | |
| 106 | + border-radius: 10px; | |
| 107 | +} | |
| 108 | + | |
| 109 | +.example-btn { | |
| 110 | + padding: 8px 15px; | |
| 111 | + margin: 5px; | |
| 112 | + background: white; | |
| 113 | + border: 1px solid #ddd; | |
| 114 | + border-radius: 5px; | |
| 115 | + cursor: pointer; | |
| 116 | + transition: all 0.2s; | |
| 117 | +} | |
| 118 | + | |
| 119 | +.example-btn:hover { | |
| 120 | + background: #667eea; | |
| 121 | + color: white; | |
| 122 | + border-color: #667eea; | |
| 123 | +} | |
| 124 | + | |
| 125 | +.loading { | |
| 126 | + text-align: center; | |
| 127 | + padding: 40px; | |
| 128 | + color: white; | |
| 129 | +} | |
| 130 | + | |
| 131 | +.spinner { | |
| 132 | + width: 50px; | |
| 133 | + height: 50px; | |
| 134 | + margin: 0 auto 20px; | |
| 135 | + border: 4px solid rgba(255,255,255,0.3); | |
| 136 | + border-top-color: white; | |
| 137 | + border-radius: 50%; | |
| 138 | + animation: spin 1s linear infinite; | |
| 139 | +} | |
| 140 | + | |
| 141 | +@keyframes spin { | |
| 142 | + to { transform: rotate(360deg); } | |
| 143 | +} | |
| 144 | + | |
| 145 | +.results-section { | |
| 146 | + background: white; | |
| 147 | + border-radius: 15px; | |
| 148 | + padding: 30px; | |
| 149 | + box-shadow: 0 10px 40px rgba(0,0,0,0.2); | |
| 150 | +} | |
| 151 | + | |
| 152 | +.results-header { | |
| 153 | + margin-bottom: 20px; | |
| 154 | + padding-bottom: 15px; | |
| 155 | + border-bottom: 2px solid #e0e0e0; | |
| 156 | +} | |
| 157 | + | |
| 158 | +.results-header h2 { | |
| 159 | + color: #333; | |
| 160 | + margin-bottom: 10px; | |
| 161 | +} | |
| 162 | + | |
| 163 | +.results-stats { | |
| 164 | + color: #666; | |
| 165 | + font-size: 14px; | |
| 166 | +} | |
| 167 | + | |
| 168 | +.result-item { | |
| 169 | + padding: 20px; | |
| 170 | + margin-bottom: 15px; | |
| 171 | + border: 1px solid #e0e0e0; | |
| 172 | + border-radius: 10px; | |
| 173 | + transition: all 0.3s; | |
| 174 | +} | |
| 175 | + | |
| 176 | +.result-item:hover { | |
| 177 | + box-shadow: 0 5px 15px rgba(0,0,0,0.1); | |
| 178 | + border-color: #667eea; | |
| 179 | +} | |
| 180 | + | |
| 181 | +.result-header { | |
| 182 | + display: flex; | |
| 183 | + justify-content: space-between; | |
| 184 | + align-items: start; | |
| 185 | + margin-bottom: 10px; | |
| 186 | +} | |
| 187 | + | |
| 188 | +.result-title { | |
| 189 | + font-size: 18px; | |
| 190 | + font-weight: bold; | |
| 191 | + color: #333; | |
| 192 | + margin-bottom: 5px; | |
| 193 | +} | |
| 194 | + | |
| 195 | +.result-score { | |
| 196 | + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| 197 | + color: white; | |
| 198 | + padding: 5px 12px; | |
| 199 | + border-radius: 20px; | |
| 200 | + font-size: 12px; | |
| 201 | + font-weight: bold; | |
| 202 | +} | |
| 203 | + | |
| 204 | +.result-meta { | |
| 205 | + display: flex; | |
| 206 | + gap: 15px; | |
| 207 | + flex-wrap: wrap; | |
| 208 | + font-size: 14px; | |
| 209 | + color: #666; | |
| 210 | + margin-bottom: 10px; | |
| 211 | +} | |
| 212 | + | |
| 213 | +.result-meta span { | |
| 214 | + background: #f5f5f5; | |
| 215 | + padding: 4px 10px; | |
| 216 | + border-radius: 5px; | |
| 217 | +} | |
| 218 | + | |
| 219 | +.result-image { | |
| 220 | + max-width: 150px; | |
| 221 | + max-height: 150px; | |
| 222 | + border-radius: 8px; | |
| 223 | + margin-top: 10px; | |
| 224 | +} | |
| 225 | + | |
| 226 | +.query-info { | |
| 227 | + background: white; | |
| 228 | + border-radius: 15px; | |
| 229 | + padding: 20px; | |
| 230 | + margin-top: 20px; | |
| 231 | + box-shadow: 0 10px 40px rgba(0,0,0,0.2); | |
| 232 | +} | |
| 233 | + | |
| 234 | +.query-info h3 { | |
| 235 | + color: #333; | |
| 236 | + margin-bottom: 15px; | |
| 237 | +} | |
| 238 | + | |
| 239 | +.info-grid { | |
| 240 | + display: grid; | |
| 241 | + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); | |
| 242 | + gap: 15px; | |
| 243 | +} | |
| 244 | + | |
| 245 | +.info-item { | |
| 246 | + padding: 15px; | |
| 247 | + background: #f5f5f5; | |
| 248 | + border-radius: 8px; | |
| 249 | +} | |
| 250 | + | |
| 251 | +.info-item strong { | |
| 252 | + display: block; | |
| 253 | + color: #667eea; | |
| 254 | + margin-bottom: 5px; | |
| 255 | +} | |
| 256 | + | |
| 257 | +footer { | |
| 258 | + text-align: center; | |
| 259 | + color: white; | |
| 260 | + margin-top: 40px; | |
| 261 | + padding: 20px; | |
| 262 | + opacity: 0.8; | |
| 263 | +} | |
| 264 | + | |
| 265 | +.error-message { | |
| 266 | + background: #ff4444; | |
| 267 | + color: white; | |
| 268 | + padding: 20px; | |
| 269 | + border-radius: 10px; | |
| 270 | + margin-bottom: 20px; | |
| 271 | +} | |
| 272 | + | |
| 273 | +.no-results { | |
| 274 | + text-align: center; | |
| 275 | + padding: 40px; | |
| 276 | + color: #666; | |
| 277 | +} | |
| 278 | + | |
| 279 | +.no-results h3 { | |
| 280 | + font-size: 24px; | |
| 281 | + margin-bottom: 10px; | |
| 282 | +} | ... | ... |
| ... | ... | @@ -0,0 +1,237 @@ |
| 1 | +// SearchEngine Frontend JavaScript | |
| 2 | + | |
| 3 | +// API endpoint | |
| 4 | +const API_BASE_URL = 'http://localhost:8000'; | |
| 5 | + | |
| 6 | +// Update API URL display | |
| 7 | +document.getElementById('apiUrl').textContent = API_BASE_URL; | |
| 8 | + | |
| 9 | +// Handle Enter key in search input | |
| 10 | +function handleKeyPress(event) { | |
| 11 | + if (event.key === 'Enter') { | |
| 12 | + performSearch(); | |
| 13 | + } | |
| 14 | +} | |
| 15 | + | |
| 16 | +// Set query from example buttons | |
| 17 | +function setQuery(query) { | |
| 18 | + document.getElementById('searchInput').value = query; | |
| 19 | + performSearch(); | |
| 20 | +} | |
| 21 | + | |
| 22 | +// Perform search | |
| 23 | +async function performSearch() { | |
| 24 | + const query = document.getElementById('searchInput').value.trim(); | |
| 25 | + | |
| 26 | + if (!query) { | |
| 27 | + alert('请输入搜索关键词'); | |
| 28 | + return; | |
| 29 | + } | |
| 30 | + | |
| 31 | + // Get options | |
| 32 | + const size = parseInt(document.getElementById('resultSize').value); | |
| 33 | + const enableTranslation = document.getElementById('enableTranslation').checked; | |
| 34 | + const enableEmbedding = document.getElementById('enableEmbedding').checked; | |
| 35 | + const enableRerank = document.getElementById('enableRerank').checked; | |
| 36 | + | |
| 37 | + // Show loading | |
| 38 | + document.getElementById('loading').style.display = 'block'; | |
| 39 | + document.getElementById('results').innerHTML = ''; | |
| 40 | + document.getElementById('queryInfo').innerHTML = ''; | |
| 41 | + | |
| 42 | + try { | |
| 43 | + const response = await fetch(`${API_BASE_URL}/search/`, { | |
| 44 | + method: 'POST', | |
| 45 | + headers: { | |
| 46 | + 'Content-Type': 'application/json', | |
| 47 | + }, | |
| 48 | + body: JSON.stringify({ | |
| 49 | + query: query, | |
| 50 | + size: size, | |
| 51 | + enable_translation: enableTranslation, | |
| 52 | + enable_embedding: enableEmbedding, | |
| 53 | + enable_rerank: enableRerank | |
| 54 | + }) | |
| 55 | + }); | |
| 56 | + | |
| 57 | + if (!response.ok) { | |
| 58 | + throw new Error(`HTTP ${response.status}: ${response.statusText}`); | |
| 59 | + } | |
| 60 | + | |
| 61 | + const data = await response.json(); | |
| 62 | + displayResults(data); | |
| 63 | + displayQueryInfo(data.query_info); | |
| 64 | + | |
| 65 | + } catch (error) { | |
| 66 | + console.error('Search error:', error); | |
| 67 | + document.getElementById('results').innerHTML = ` | |
| 68 | + <div class="error-message"> | |
| 69 | + <strong>搜索出错:</strong> ${error.message} | |
| 70 | + <br><br> | |
| 71 | + <small>请确保后端服务正在运行 (http://localhost:8000)</small> | |
| 72 | + </div> | |
| 73 | + `; | |
| 74 | + } finally { | |
| 75 | + document.getElementById('loading').style.display = 'none'; | |
| 76 | + } | |
| 77 | +} | |
| 78 | + | |
| 79 | +// Display search results | |
| 80 | +function displayResults(data) { | |
| 81 | + const resultsDiv = document.getElementById('results'); | |
| 82 | + | |
| 83 | + if (!data.hits || data.hits.length === 0) { | |
| 84 | + resultsDiv.innerHTML = ` | |
| 85 | + <div class="no-results"> | |
| 86 | + <h3>😔 没有找到结果</h3> | |
| 87 | + <p>请尝试其他关键词</p> | |
| 88 | + </div> | |
| 89 | + `; | |
| 90 | + return; | |
| 91 | + } | |
| 92 | + | |
| 93 | + let html = ` | |
| 94 | + <div class="results-header"> | |
| 95 | + <h2>搜索结果</h2> | |
| 96 | + <div class="results-stats"> | |
| 97 | + 找到 <strong>${data.total}</strong> 个结果, | |
| 98 | + 耗时 <strong>${data.took_ms}</strong> 毫秒, | |
| 99 | + 最高分 <strong>${data.max_score.toFixed(4)}</strong> | |
| 100 | + </div> | |
| 101 | + </div> | |
| 102 | + `; | |
| 103 | + | |
| 104 | + data.hits.forEach((hit, index) => { | |
| 105 | + const source = hit._source; | |
| 106 | + const score = hit._custom_score || hit._score; | |
| 107 | + | |
| 108 | + html += ` | |
| 109 | + <div class="result-item"> | |
| 110 | + <div class="result-header"> | |
| 111 | + <div> | |
| 112 | + <div class="result-title">${index + 1}. ${escapeHtml(source.name || 'N/A')}</div> | |
| 113 | + ${source.enSpuName ? `<div style="color: #666; font-size: 14px;">${escapeHtml(source.enSpuName)}</div>` : ''} | |
| 114 | + ${source.ruSkuName ? `<div style="color: #999; font-size: 13px;">${escapeHtml(source.ruSkuName)}</div>` : ''} | |
| 115 | + </div> | |
| 116 | + <div class="result-score"> | |
| 117 | + ${score.toFixed(4)} | |
| 118 | + </div> | |
| 119 | + </div> | |
| 120 | + | |
| 121 | + <div class="result-meta"> | |
| 122 | + ${source.categoryName ? `<span>📁 ${escapeHtml(source.categoryName)}</span>` : ''} | |
| 123 | + ${source.brandName ? `<span>🏷️ ${escapeHtml(source.brandName)}</span>` : ''} | |
| 124 | + ${source.supplierName ? `<span>🏭 ${escapeHtml(source.supplierName)}</span>` : ''} | |
| 125 | + ${source.create_time ? `<span>📅 ${formatDate(source.create_time)}</span>` : ''} | |
| 126 | + </div> | |
| 127 | + | |
| 128 | + ${source.imageUrl ? ` | |
| 129 | + <img src="${escapeHtml(source.imageUrl)}" | |
| 130 | + alt="${escapeHtml(source.name)}" | |
| 131 | + class="result-image" | |
| 132 | + onerror="this.style.display='none'"> | |
| 133 | + ` : ''} | |
| 134 | + | |
| 135 | + <div style="margin-top: 10px; font-size: 12px; color: #999;"> | |
| 136 | + ID: ${source.skuId || 'N/A'} | |
| 137 | + </div> | |
| 138 | + </div> | |
| 139 | + `; | |
| 140 | + }); | |
| 141 | + | |
| 142 | + resultsDiv.innerHTML = html; | |
| 143 | +} | |
| 144 | + | |
| 145 | +// Display query processing information | |
| 146 | +function displayQueryInfo(queryInfo) { | |
| 147 | + if (!queryInfo) return; | |
| 148 | + | |
| 149 | + const queryInfoDiv = document.getElementById('queryInfo'); | |
| 150 | + | |
| 151 | + let html = ` | |
| 152 | + <h3>查询处理信息</h3> | |
| 153 | + <div class="info-grid"> | |
| 154 | + <div class="info-item"> | |
| 155 | + <strong>原始查询</strong> | |
| 156 | + ${escapeHtml(queryInfo.original_query || 'N/A')} | |
| 157 | + </div> | |
| 158 | + <div class="info-item"> | |
| 159 | + <strong>重写后查询</strong> | |
| 160 | + ${escapeHtml(queryInfo.rewritten_query || 'N/A')} | |
| 161 | + </div> | |
| 162 | + <div class="info-item"> | |
| 163 | + <strong>检测语言</strong> | |
| 164 | + ${getLanguageName(queryInfo.detected_language)} | |
| 165 | + </div> | |
| 166 | + <div class="info-item"> | |
| 167 | + <strong>查询域</strong> | |
| 168 | + ${escapeHtml(queryInfo.domain || 'default')} | |
| 169 | + </div> | |
| 170 | + </div> | |
| 171 | + `; | |
| 172 | + | |
| 173 | + // Show translations if any | |
| 174 | + if (queryInfo.translations && Object.keys(queryInfo.translations).length > 0) { | |
| 175 | + html += '<h4 style="margin-top: 20px; margin-bottom: 10px;">翻译结果</h4><div class="info-grid">'; | |
| 176 | + for (const [lang, translation] of Object.entries(queryInfo.translations)) { | |
| 177 | + if (translation) { | |
| 178 | + html += ` | |
| 179 | + <div class="info-item"> | |
| 180 | + <strong>${getLanguageName(lang)}</strong> | |
| 181 | + ${escapeHtml(translation)} | |
| 182 | + </div> | |
| 183 | + `; | |
| 184 | + } | |
| 185 | + } | |
| 186 | + html += '</div>'; | |
| 187 | + } | |
| 188 | + | |
| 189 | + // Show embedding info | |
| 190 | + if (queryInfo.has_vector) { | |
| 191 | + html += ` | |
| 192 | + <div style="margin-top: 15px; padding: 10px; background: #e8f5e9; border-radius: 5px;"> | |
| 193 | + ✓ 使用了语义向量搜索 | |
| 194 | + </div> | |
| 195 | + `; | |
| 196 | + } | |
| 197 | + | |
| 198 | + queryInfoDiv.innerHTML = html; | |
| 199 | +} | |
| 200 | + | |
| 201 | +// Helper functions | |
| 202 | +function escapeHtml(text) { | |
| 203 | + if (!text) return ''; | |
| 204 | + const div = document.createElement('div'); | |
| 205 | + div.textContent = text; | |
| 206 | + return div.innerHTML; | |
| 207 | +} | |
| 208 | + | |
| 209 | +function formatDate(dateStr) { | |
| 210 | + try { | |
| 211 | + const date = new Date(dateStr); | |
| 212 | + return date.toLocaleDateString('zh-CN'); | |
| 213 | + } catch { | |
| 214 | + return dateStr; | |
| 215 | + } | |
| 216 | +} | |
| 217 | + | |
| 218 | +function getLanguageName(code) { | |
| 219 | + const names = { | |
| 220 | + 'zh': '中文', | |
| 221 | + 'en': 'English', | |
| 222 | + 'ru': 'Русский', | |
| 223 | + 'ar': 'العربية', | |
| 224 | + 'ja': '日本語', | |
| 225 | + 'unknown': '未知' | |
| 226 | + }; | |
| 227 | + return names[code] || code; | |
| 228 | +} | |
| 229 | + | |
| 230 | +// Initialize page | |
| 231 | +document.addEventListener('DOMContentLoaded', function() { | |
| 232 | + console.log('SearchEngine Frontend loaded'); | |
| 233 | + console.log('API Base URL:', API_BASE_URL); | |
| 234 | + | |
| 235 | + // Focus on search input | |
| 236 | + document.getElementById('searchInput').focus(); | |
| 237 | +}); | ... | ... |
| ... | ... | @@ -0,0 +1,40 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +Simple HTTP server for SearchEngine frontend. | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import http.server | |
| 7 | +import socketserver | |
| 8 | +import os | |
| 9 | +import sys | |
| 10 | + | |
| 11 | +# Change to frontend directory | |
| 12 | +frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend') | |
| 13 | +os.chdir(frontend_dir) | |
| 14 | + | |
| 15 | +PORT = 8080 | |
| 16 | + | |
| 17 | +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): | |
| 18 | + """Custom request handler with CORS support.""" | |
| 19 | + | |
| 20 | + def end_headers(self): | |
| 21 | + # Add CORS headers | |
| 22 | + self.send_header('Access-Control-Allow-Origin', '*') | |
| 23 | + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | |
| 24 | + self.send_header('Access-Control-Allow-Headers', 'Content-Type') | |
| 25 | + super().end_headers() | |
| 26 | + | |
| 27 | + def do_OPTIONS(self): | |
| 28 | + self.send_response(200) | |
| 29 | + self.end_headers() | |
| 30 | + | |
| 31 | +if __name__ == '__main__': | |
| 32 | + with socketserver.TCPServer(("", PORT), MyHTTPRequestHandler) as httpd: | |
| 33 | + print(f"Frontend server started at http://localhost:{PORT}") | |
| 34 | + print(f"Serving files from: {os.getcwd()}") | |
| 35 | + print("\nPress Ctrl+C to stop the server") | |
| 36 | + try: | |
| 37 | + httpd.serve_forever() | |
| 38 | + except KeyboardInterrupt: | |
| 39 | + print("\nServer stopped") | |
| 40 | + sys.exit(0) | ... | ... |
| ... | ... | @@ -0,0 +1,50 @@ |
| 1 | +#!/bin/bash | |
| 2 | + | |
| 3 | +# Data Ingestion Script for Customer1 | |
| 4 | + | |
| 5 | +set -e | |
| 6 | + | |
| 7 | +cd "$(dirname "$0")/.." | |
| 8 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 9 | +conda activate searchengine | |
| 10 | + | |
| 11 | +GREEN='\033[0;32m' | |
| 12 | +YELLOW='\033[1;33m' | |
| 13 | +NC='\033[0m' | |
| 14 | + | |
| 15 | +echo -e "${GREEN}========================================${NC}" | |
| 16 | +echo -e "${GREEN}Customer1 Data Ingestion${NC}" | |
| 17 | +echo -e "${GREEN}========================================${NC}" | |
| 18 | + | |
| 19 | +# Default values | |
| 20 | +LIMIT=${1:-1000} | |
| 21 | +SKIP_EMBEDDINGS=${2:-false} | |
| 22 | + | |
| 23 | +echo -e "\n${YELLOW}Configuration:${NC}" | |
| 24 | +echo " Limit: $LIMIT documents" | |
| 25 | +echo " Skip embeddings: $SKIP_EMBEDDINGS" | |
| 26 | + | |
| 27 | +CSV_FILE="data/customer1/goods_with_pic.5years_congku.csv.shuf.1w" | |
| 28 | + | |
| 29 | +if [ ! -f "$CSV_FILE" ]; then | |
| 30 | + echo "Error: CSV file not found: $CSV_FILE" | |
| 31 | + exit 1 | |
| 32 | +fi | |
| 33 | + | |
| 34 | +# Build command | |
| 35 | +CMD="python data/customer1/ingest_customer1.py \ | |
| 36 | + --csv $CSV_FILE \ | |
| 37 | + --limit $LIMIT \ | |
| 38 | + --recreate-index \ | |
| 39 | + --batch-size 100" | |
| 40 | + | |
| 41 | +if [ "$SKIP_EMBEDDINGS" = "true" ]; then | |
| 42 | + CMD="$CMD --skip-embeddings" | |
| 43 | +fi | |
| 44 | + | |
| 45 | +echo -e "\n${YELLOW}Starting ingestion...${NC}" | |
| 46 | +eval $CMD | |
| 47 | + | |
| 48 | +echo -e "\n${GREEN}========================================${NC}" | |
| 49 | +echo -e "${GREEN}Ingestion Complete!${NC}" | |
| 50 | +echo -e "${GREEN}========================================${NC}" | ... | ... |
| ... | ... | @@ -0,0 +1,34 @@ |
| 1 | +#!/bin/bash | |
| 2 | + | |
| 3 | +# Start Backend API Service | |
| 4 | + | |
| 5 | +set -e | |
| 6 | + | |
| 7 | +cd "$(dirname "$0")/.." | |
| 8 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 9 | +conda activate searchengine | |
| 10 | + | |
| 11 | +GREEN='\033[0;32m' | |
| 12 | +YELLOW='\033[1;33m' | |
| 13 | +NC='\033[0m' | |
| 14 | + | |
| 15 | +echo -e "${GREEN}========================================${NC}" | |
| 16 | +echo -e "${GREEN}Starting Backend API Service${NC}" | |
| 17 | +echo -e "${GREEN}========================================${NC}" | |
| 18 | + | |
| 19 | +# Load config | |
| 20 | +source .env | |
| 21 | + | |
| 22 | +echo -e "\n${YELLOW}Configuration:${NC}" | |
| 23 | +echo " Customer: $CUSTOMER_ID" | |
| 24 | +echo " API Host: $API_HOST" | |
| 25 | +echo " API Port: $API_PORT" | |
| 26 | +echo " ES Host: $ES_HOST" | |
| 27 | + | |
| 28 | +echo -e "\n${YELLOW}Starting service...${NC}" | |
| 29 | +python -m api.app \ | |
| 30 | + --host $API_HOST \ | |
| 31 | + --port $API_PORT \ | |
| 32 | + --customer $CUSTOMER_ID \ | |
| 33 | + --es-host $ES_HOST | |
| 34 | + | ... | ... |
| ... | ... | @@ -0,0 +1,28 @@ |
| 1 | +#!/bin/bash | |
| 2 | + | |
| 3 | +# Start Frontend Server | |
| 4 | + | |
| 5 | +set -e | |
| 6 | + | |
| 7 | +cd "$(dirname "$0")/.." | |
| 8 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 9 | +conda activate searchengine | |
| 10 | + | |
| 11 | +GREEN='\033[0;32m' | |
| 12 | +YELLOW='\033[1;33m' | |
| 13 | +NC='\033[0m' | |
| 14 | + | |
| 15 | +echo -e "${GREEN}========================================${NC}" | |
| 16 | +echo -e "${GREEN}Starting Frontend Server${NC}" | |
| 17 | +echo -e "${GREEN}========================================${NC}" | |
| 18 | + | |
| 19 | +PORT=8080 | |
| 20 | + | |
| 21 | +echo -e "\n${YELLOW}Frontend will be available at:${NC}" | |
| 22 | +echo -e " ${GREEN}http://localhost:$PORT${NC}" | |
| 23 | +echo "" | |
| 24 | +echo -e "${YELLOW}Make sure the backend API is running at:${NC}" | |
| 25 | +echo -e " ${GREEN}http://localhost:8000${NC}" | |
| 26 | +echo "" | |
| 27 | + | |
| 28 | +python scripts/frontend_server.py | ... | ... |
| ... | ... | @@ -0,0 +1,80 @@ |
| 1 | +#!/bin/bash | |
| 2 | + | |
| 3 | +# SearchEngine Setup and Startup Script | |
| 4 | +# This script sets up the environment and starts all services | |
| 5 | + | |
| 6 | +set -e # Exit on error | |
| 7 | + | |
| 8 | +# Colors for output | |
| 9 | +RED='\033[0;31m' | |
| 10 | +GREEN='\033[0;32m' | |
| 11 | +YELLOW='\033[1;33m' | |
| 12 | +NC='\033[0m' # No Color | |
| 13 | + | |
| 14 | +echo -e "${GREEN}========================================${NC}" | |
| 15 | +echo -e "${GREEN}SearchEngine Setup Script${NC}" | |
| 16 | +echo -e "${GREEN}========================================${NC}" | |
| 17 | + | |
| 18 | +# Change to project directory | |
| 19 | +cd "$(dirname "$0")" | |
| 20 | +PROJECT_ROOT=$(pwd) | |
| 21 | + | |
| 22 | +echo -e "\n${YELLOW}Step 1: Setting up Conda environment${NC}" | |
| 23 | +# Check if conda is available | |
| 24 | +if ! command -v conda &> /dev/null; then | |
| 25 | + echo -e "${RED}Error: conda not found. Please install Miniconda or Anaconda${NC}" | |
| 26 | + exit 1 | |
| 27 | +fi | |
| 28 | + | |
| 29 | +# Source conda | |
| 30 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 31 | + | |
| 32 | +# Check if environment exists | |
| 33 | +if conda env list | grep -q "searchengine"; then | |
| 34 | + echo -e "${GREEN}Environment 'searchengine' already exists${NC}" | |
| 35 | + conda activate searchengine | |
| 36 | +else | |
| 37 | + echo -e "${YELLOW}Creating conda environment 'searchengine'...${NC}" | |
| 38 | + conda env create -f environment.yml | |
| 39 | + conda activate searchengine | |
| 40 | + echo -e "${GREEN}Environment created successfully!${NC}" | |
| 41 | +fi | |
| 42 | + | |
| 43 | +# Verify environment | |
| 44 | +echo -e "\n${YELLOW}Current Python version:${NC}" | |
| 45 | +python --version | |
| 46 | + | |
| 47 | +echo -e "\n${YELLOW}Step 2: Loading configuration${NC}" | |
| 48 | +# Check if .env exists | |
| 49 | +if [ ! -f ".env" ]; then | |
| 50 | + echo -e "${YELLOW}Creating .env from .env.example...${NC}" | |
| 51 | + cp .env.example .env | |
| 52 | + echo -e "${GREEN}.env file created. Please update it with your actual configuration.${NC}" | |
| 53 | +fi | |
| 54 | + | |
| 55 | +# Display configuration | |
| 56 | +echo -e "${GREEN}Configuration loaded:${NC}" | |
| 57 | +python -c "from config.env_config import print_config; print_config()" | |
| 58 | + | |
| 59 | +echo -e "\n${YELLOW}Step 3: Checking Elasticsearch connection${NC}" | |
| 60 | +python -c " | |
| 61 | +from config.env_config import get_es_config | |
| 62 | +from utils.es_client import ESClient | |
| 63 | +es_config = get_es_config() | |
| 64 | +client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) | |
| 65 | +if client.ping(): | |
| 66 | + print('✓ Elasticsearch is reachable') | |
| 67 | +else: | |
| 68 | + print('✗ Elasticsearch connection failed') | |
| 69 | + exit(1) | |
| 70 | +" | |
| 71 | + | |
| 72 | +echo -e "\n${GREEN}========================================${NC}" | |
| 73 | +echo -e "${GREEN}Setup Complete!${NC}" | |
| 74 | +echo -e "${GREEN}========================================${NC}" | |
| 75 | +echo "" | |
| 76 | +echo -e "Next steps:" | |
| 77 | +echo -e " 1. Ingest data: ${YELLOW}./scripts/ingest.sh${NC}" | |
| 78 | +echo -e " 2. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}" | |
| 79 | +echo -e " 3. Start frontend: ${YELLOW}./scripts/start_frontend.sh${NC}" | |
| 80 | +echo "" | ... | ... |
| ... | ... | @@ -0,0 +1,99 @@ |
| 1 | +#!/bin/bash | |
| 2 | + | |
| 3 | +# One-click startup script for SearchEngine | |
| 4 | +# This script starts everything you need | |
| 5 | + | |
| 6 | +set -e | |
| 7 | + | |
| 8 | +cd "$(dirname "$0")" | |
| 9 | + | |
| 10 | +GREEN='\033[0;32m' | |
| 11 | +YELLOW='\033[1;33m' | |
| 12 | +RED='\033[0;31m' | |
| 13 | +NC='\033[0m' | |
| 14 | + | |
| 15 | +echo -e "${GREEN}========================================${NC}" | |
| 16 | +echo -e "${GREEN}SearchEngine一键启动脚本${NC}" | |
| 17 | +echo -e "${GREEN}========================================${NC}" | |
| 18 | + | |
| 19 | +# Step 1: Setup environment | |
| 20 | +echo -e "\n${YELLOW}Step 1/4: 设置环境${NC}" | |
| 21 | +./setup.sh | |
| 22 | + | |
| 23 | +# Step 2: Check if data is already ingested | |
| 24 | +echo -e "\n${YELLOW}Step 2/4: 检查数据${NC}" | |
| 25 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 26 | +conda activate searchengine | |
| 27 | + | |
| 28 | +# Check if index exists | |
| 29 | +INDEX_EXISTS=$(python -c " | |
| 30 | +from config.env_config import get_es_config | |
| 31 | +from utils.es_client import ESClient | |
| 32 | +from config import ConfigLoader | |
| 33 | + | |
| 34 | +es_config = get_es_config() | |
| 35 | +es_client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) | |
| 36 | + | |
| 37 | +config_loader = ConfigLoader('config/schema') | |
| 38 | +config = config_loader.load_customer_config('customer1') | |
| 39 | + | |
| 40 | +if es_client.index_exists(config.es_index_name): | |
| 41 | + doc_count = es_client.count(config.es_index_name) | |
| 42 | + print(f'{doc_count}') | |
| 43 | +else: | |
| 44 | + print('0') | |
| 45 | +" 2>/dev/null || echo "0") | |
| 46 | + | |
| 47 | +if [ "$INDEX_EXISTS" = "0" ]; then | |
| 48 | + echo -e "${YELLOW}索引不存在,开始导入数据...${NC}" | |
| 49 | + echo -e "${YELLOW}注意: 首次导入会下载模型文件,可能需要10-30分钟${NC}" | |
| 50 | + echo -e "${YELLOW}导入1000条数据进行快速测试(跳过embedding以加快速度)${NC}" | |
| 51 | + ./scripts/ingest.sh 1000 true | |
| 52 | +else | |
| 53 | + echo -e "${GREEN}数据已存在,包含 $INDEX_EXISTS 条文档${NC}" | |
| 54 | +fi | |
| 55 | + | |
| 56 | +# Step 3: Start backend in background | |
| 57 | +echo -e "\n${YELLOW}Step 3/4: 启动后端服务${NC}" | |
| 58 | +echo -e "${YELLOW}后端服务将在后台运行...${NC}" | |
| 59 | +nohup ./scripts/start_backend.sh > logs/backend.log 2>&1 & | |
| 60 | +BACKEND_PID=$! | |
| 61 | +echo $BACKEND_PID > logs/backend.pid | |
| 62 | +echo -e "${GREEN}后端服务已启动 (PID: $BACKEND_PID)${NC}" | |
| 63 | +echo -e "${GREEN}日志文件: logs/backend.log${NC}" | |
| 64 | + | |
| 65 | +# Wait for backend to start | |
| 66 | +echo -e "${YELLOW}等待后端服务启动...${NC}" | |
| 67 | +sleep 5 | |
| 68 | + | |
| 69 | +# Check if backend is running | |
| 70 | +if curl -s http://localhost:8000/admin/health > /dev/null 2>&1; then | |
| 71 | + echo -e "${GREEN}✓ 后端服务运行正常${NC}" | |
| 72 | +else | |
| 73 | + echo -e "${RED}✗ 后端服务启动失败,请检查日志: logs/backend.log${NC}" | |
| 74 | + exit 1 | |
| 75 | +fi | |
| 76 | + | |
| 77 | +# Step 4: Start frontend | |
| 78 | +echo -e "\n${YELLOW}Step 4/4: 启动前端服务${NC}" | |
| 79 | +echo -e "${GREEN}========================================${NC}" | |
| 80 | +echo -e "${GREEN}所有服务启动完成!${NC}" | |
| 81 | +echo -e "${GREEN}========================================${NC}" | |
| 82 | +echo "" | |
| 83 | +echo -e "访问地址:" | |
| 84 | +echo -e " ${GREEN}前端界面: http://localhost:8080${NC}" | |
| 85 | +echo -e " ${GREEN}后端API: http://localhost:8000${NC}" | |
| 86 | +echo -e " ${GREEN}API文档: http://localhost:8000/docs${NC}" | |
| 87 | +echo "" | |
| 88 | +echo -e "日志文件:" | |
| 89 | +echo -e " 后端: logs/backend.log" | |
| 90 | +echo "" | |
| 91 | +echo -e "停止服务:" | |
| 92 | +echo -e " 后端: kill \$(cat logs/backend.pid)" | |
| 93 | +echo -e " 前端: Ctrl+C" | |
| 94 | +echo "" | |
| 95 | +echo -e "${YELLOW}正在启动前端服务...${NC}" | |
| 96 | +echo -e "${YELLOW}按 Ctrl+C 停止前端服务${NC}" | |
| 97 | +echo "" | |
| 98 | + | |
| 99 | +./scripts/start_frontend.sh | ... | ... |