From 115047eef51c78b2081884845d157a4651f79cdf Mon Sep 17 00:00:00 2001 From: tangwang Date: Sat, 8 Nov 2025 00:24:52 +0800 Subject: [PATCH] 为一个租户灌入测试数据;实例的启动代码(包括前后端) --- DEPLOYMENT.md | 317 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ USER_GUIDE.md | 343 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ frontend/index.html | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ frontend/static/css/style.css | 282 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ frontend/static/js/app.js | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/frontend_server.py | 40 ++++++++++++++++++++++++++++++++++++++++ scripts/ingest.sh | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/start_backend.sh | 34 ++++++++++++++++++++++++++++++++++ scripts/start_frontend.sh | 28 ++++++++++++++++++++++++++++ setup.sh | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ start_all.sh | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 1569 insertions(+), 0 deletions(-) create mode 100644 DEPLOYMENT.md create mode 100644 USER_GUIDE.md create mode 100644 frontend/index.html create mode 100644 frontend/static/css/style.css create mode 100644 frontend/static/js/app.js create mode 100755 scripts/frontend_server.py create mode 100755 scripts/ingest.sh create mode 100755 scripts/start_backend.sh create mode 100755 scripts/start_frontend.sh create mode 100755 setup.sh create mode 100755 start_all.sh diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..2e2ad64 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,317 @@ +# 部署检查清单 + +## 完成情况 + +✅ **环境配置** +- [x] .env 配置文件(包含ES、Redis、DeepL配置) +- [x] environment.yml (conda环境配置) +- [x] config/env_config.py (统一配置管理) +- [x] 代码已更新使用新配置 + +✅ **启动脚本** +- [x] setup.sh (环境设置) +- [x] start_all.sh (一键启动) +- [x] scripts/ingest.sh (数据导入) +- [x] scripts/start_backend.sh (后端服务) +- [x] scripts/start_frontend.sh (前端服务) + +✅ **Web前端** +- [x] HTML界面 (frontend/index.html) +- [x] CSS样式 (frontend/static/css/style.css) +- [x] JavaScript逻辑 (frontend/static/js/app.js) +- [x] 前端服务器 (scripts/frontend_server.py) + +✅ **文档** +- [x] USER_GUIDE.md (用户指南) +- [x] README.md (项目说明) +- [x] QUICKSTART.md (快速开始) +- [x] IMPLEMENTATION_SUMMARY.md (实现总结) + +## 使用步骤 + +### 方式1: 一键启动(推荐) + +```bash +cd /data/tw/SearchEngine +./start_all.sh +``` + +然后访问: http://localhost:8080 + +### 方式2: 分步启动 + +```bash +# 1. 环境设置 +./setup.sh + +# 2. 导入数据(1000条,快速测试) +./scripts/ingest.sh 1000 true + +# 3. 启动后端(新终端) +./scripts/start_backend.sh + +# 4. 启动前端(新终端) +./scripts/start_frontend.sh +``` + +## 系统要求 + +### 软件要求 +- [x] Python 3.10 +- [x] Conda (Miniconda/Anaconda) +- [x] Elasticsearch 8.18 +- [ ] CUDA (可选,用于GPU加速) + +### 配置要求 +- [x] ES连接信息 +- [x] DeepL API Key +- [ ] Redis连接(可选) + +### 硬件要求 +- 内存: 建议8GB+ +- 磁盘: 10GB+ (包含模型文件) +- GPU: 可选(加速embedding生成) + +## 配置信息 + +当前配置(.env文件): +``` +ES_HOST=http://localhost:9200 +ES_USERNAME=essa +ES_PASSWORD=4hOaLaf41y2VuI8y + +REDIS_HOST=localhost +REDIS_PORT=6479 +REDIS_PASSWORD=BMfv5aI31kgHWtlx + +DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed + +CUSTOMER_ID=customer1 +API_HOST=0.0.0.0 +API_PORT=8000 +``` + +## 服务端口 + +| 服务 | 端口 | URL | +|------|------|-----| +| Elasticsearch | 9200 | http://localhost:9200 | +| Backend API | 8000 | http://localhost:8000 | +| Frontend Web | 8080 | http://localhost:8080 | +| API Docs | 8000 | http://localhost:8000/docs | + +## 测试流程 + +### 1. 环境测试 + +```bash +# 激活环境 +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +# 检查Python +python --version # 应该显示 Python 3.10.x + +# 检查配置 +python -c "from config.env_config import print_config; print_config()" +``` + +### 2. ES连接测试 + +```bash +# 直接测试 +curl http://localhost:9200 -u essa:4hOaLaf41y2VuI8y + +# Python测试 +python -c " +from config.env_config import get_es_config +from utils.es_client import ESClient +es_config = get_es_config() +client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) +print('ES Connected:', client.ping()) +" +``` + +### 3. 数据导入测试 + +```bash +# 导入100条测试数据(跳过embedding以加快速度) +./scripts/ingest.sh 100 true + +# 检查导入结果 +python -c " +from config.env_config import get_es_config +from utils.es_client import ESClient +from config import ConfigLoader + +es_config = get_es_config() +es_client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) +config = ConfigLoader('config/schema').load_customer_config('customer1') +count = es_client.count(config.es_index_name) +print(f'Documents in index: {count}') +" +``` + +### 4. API测试 + +```bash +# 启动后端(后台) +nohup ./scripts/start_backend.sh > logs/backend.log 2>&1 & + +# 等待启动 +sleep 5 + +# 测试健康检查 +curl http://localhost:8000/admin/health + +# 测试搜索 +curl -X POST http://localhost:8000/search/ \ + -H "Content-Type: application/json" \ + -d '{"query": "消防", "size": 5}' +``` + +### 5. 前端测试 + +```bash +# 启动前端 +./scripts/start_frontend.sh + +# 然后在浏览器访问: http://localhost:8080 +``` + +## 故障排除 + +### 问题1: conda环境创建失败 + +**症状**: `conda env create` 报错 + +**解决**: +```bash +# 检查conda版本 +conda --version + +# 更新conda +conda update -n base conda + +# 重试创建 +conda env create -f environment.yml +``` + +### 问题2: ES连接失败 + +**症状**: `Failed to connect to Elasticsearch` + +**解决**: +```bash +# 检查ES状态 +curl http://localhost:9200 -u essa:4hOaLaf41y2VuI8y + +# 检查ES版本 +curl http://localhost:9200 -u essa:4hOaLaf41y2VuI8y | grep version + +# 确认配置 +cat .env | grep ES_ +``` + +### 问题3: 模型下载慢 + +**症状**: 首次运行时模型下载很慢或超时 + +**解决**: +```bash +# 跳过embedding快速测试 +./scripts/ingest.sh 1000 true + +# 或手动下载模型到指定目录 +# TEXT_MODEL_DIR=/data/tw/models/bge-m3 +# IMAGE_MODEL_DIR=/data/tw/models/cn-clip +``` + +### 问题4: 端口被占用 + +**症状**: `Address already in use` + +**解决**: +```bash +# 查看占用端口的进程 +lsof -i :8000 # 后端 +lsof -i :8080 # 前端 + +# 杀掉进程 +kill -9 + +# 或修改.env中的端口 +``` + +## 下一步 + +1. **测试搜索功能** + - 打开 http://localhost:8080 + - 尝试不同的搜索查询 + - 测试布尔操作符 + +2. **查看API文档** + - 访问 http://localhost:8000/docs + - 了解所有可用的API端点 + +3. **自定义配置** + - 编辑 `config/schema/customer1_config.yaml` + - 添加查询重写规则 + - 调整ranking表达式 + +4. **导入更多数据** + - 导入完整的10000条数据 + - 生成embeddings以启用语义搜索 + +5. **性能优化** + - 启用Redis缓存 + - 调整ES分片数量 + - 使用GPU加速 + +## 项目文件清单 + +**新增文件 (环境和启动相关)**: +- [x] .env - 环境配置 +- [x] .env.example - 配置模板 +- [x] environment.yml - Conda环境 +- [x] config/env_config.py - 配置管理 +- [x] setup.sh - 环境设置 +- [x] start_all.sh - 一键启动 +- [x] scripts/ingest.sh - 数据导入 +- [x] scripts/start_backend.sh - 后端启动 +- [x] scripts/start_frontend.sh - 前端启动 +- [x] scripts/frontend_server.py - 前端服务器 +- [x] frontend/index.html - 前端页面 +- [x] frontend/static/css/style.css - 样式 +- [x] frontend/static/js/app.js - 前端逻辑 +- [x] USER_GUIDE.md - 用户指南 +- [x] DEPLOYMENT.md - 本文件 + +**总计**: 14个新文件 + +## 验证完成 + +运行以下命令验证所有文件都已创建: + +```bash +cd /data/tw/SearchEngine + +# 检查关键文件 +ls -la .env setup.sh start_all.sh +ls -la scripts/*.sh scripts/*.py +ls -la frontend/index.html +ls -la frontend/static/css/style.css +ls -la frontend/static/js/app.js + +# 检查可执行权限 +ls -l *.sh scripts/*.sh | grep "x" +``` + +所有文件应该都存在且脚本有执行权限。 + +## 支持联系 + +如有问题,请检查: +1. logs/backend.log - 后端日志 +2. 浏览器控制台 - 前端错误 +3. USER_GUIDE.md - 详细使用说明 diff --git a/USER_GUIDE.md b/USER_GUIDE.md new file mode 100644 index 0000000..cdadcdf --- /dev/null +++ b/USER_GUIDE.md @@ -0,0 +1,343 @@ +# 使用指南 - SearchEngine + +## 快速启动(推荐) + +### 一键启动所有服务 + +```bash +cd /data/tw/SearchEngine +./start_all.sh +``` + +这个脚本会自动完成: +1. 设置conda环境 +2. 检查并导入测试数据(如果需要) +3. 启动后端API服务(后台运行) +4. 启动前端Web界面 + +启动完成后,访问: +- **前端界面**: http://localhost:8080 +- **后端API**: http://localhost:8000 +- **API文档**: http://localhost:8000/docs + +### 停止服务 + +```bash +# 停止后端 +kill $(cat logs/backend.pid) + +# 前端按 Ctrl+C +``` + +--- + +## 分步启动(自定义) + +### 1. 环境设置 + +```bash +cd /data/tw/SearchEngine +./setup.sh +``` + +这会: +- 创建/激活conda环境 `searchengine` +- 加载配置文件 +- 检查Elasticsearch连接 + +### 2. 数据导入 + +#### 快速测试(1000条,不生成embedding) +```bash +./scripts/ingest.sh 1000 true +``` + +#### 完整导入(10000条,包含embedding) +```bash +./scripts/ingest.sh 10000 false +``` + +**注意**: 首次运行会下载模型文件(BGE-M3和CN-CLIP),大约需要10-30分钟。 + +### 3. 启动后端 + +```bash +./scripts/start_backend.sh +``` + +后端API会在 http://localhost:8000 启动 + +### 4. 启动前端 + +```bash +./scripts/start_frontend.sh +``` + +前端界面会在 http://localhost:8080 启动 + +--- + +## 配置说明 + +### 环境配置文件 (.env) + +```bash +# Elasticsearch配置 +ES_HOST=http://localhost:9200 +ES_USERNAME=essa +ES_PASSWORD=4hOaLaf41y2VuI8y + +# Redis配置(可选,用于缓存) +REDIS_HOST=localhost +REDIS_PORT=6479 +REDIS_PASSWORD=BMfv5aI31kgHWtlx + +# DeepL翻译API +DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed + +# 客户配置 +CUSTOMER_ID=customer1 + +# API服务配置 +API_HOST=0.0.0.0 +API_PORT=8000 +``` + +### 修改配置 + +1. 编辑 `.env` 文件 +2. 重启相关服务 + +--- + +## 使用Web界面 + +### 搜索功能 + +1. **简单搜索**: 直接输入关键词 + - 中文: "芭比娃娃" + - 英文: "fire control set" + - 俄文: "Наборы для пожаротушения" + +2. **布尔搜索**: 使用操作符 + - AND: "toy AND barbie" + - OR: "barbie OR doll" + - ANDNOT: "toy ANDNOT cheap" + - 组合: "toy AND (barbie OR doll) ANDNOT cheap" + +3. **域搜索**: 指定搜索域 + - 品牌: "brand:ZHU LIN" + - 类别: "category:玩具" + +### 搜索选项 + +- **启用翻译**: 自动翻译查询到其他语言 +- **启用语义搜索**: 使用embedding进行语义匹配 +- **启用自定义排序**: 使用配置的ranking表达式 +- **结果数量**: 10/20/50条 + +--- + +## API使用 + +### 搜索接口 + +```bash +curl -X POST http://localhost:8000/search/ \ + -H "Content-Type: application/json" \ + -d '{ + "query": "芭比娃娃", + "size": 10, + "enable_translation": true, + "enable_embedding": true + }' +``` + +### 图片搜索 + +```bash +curl -X POST http://localhost:8000/search/image \ + -H "Content-Type: application/json" \ + -d '{ + "image_url": "https://oss.essa.cn/example.jpg", + "size": 10 + }' +``` + +### 健康检查 + +```bash +curl http://localhost:8000/admin/health +``` + +### 查看配置 + +```bash +curl http://localhost:8000/admin/config +``` + +### 索引统计 + +```bash +curl http://localhost:8000/admin/stats +``` + +--- + +## 常见问题 + +### 1. Elasticsearch连接失败 + +**问题**: `Failed to connect to Elasticsearch` + +**解决**: +```bash +# 检查ES是否运行 +curl http://localhost:9200 + +# 检查配置 +cat .env | grep ES_ +``` + +### 2. 导入数据时内存不足 + +**问题**: `Out of memory` + +**解决**: +```bash +# 减少batch size或跳过embedding +./scripts/ingest.sh 1000 true +``` + +### 3. 模型下载失败 + +**问题**: 模型文件下载超时 + +**解决**: +- 检查网络连接 +- 使用国内镜像源 +- 手动下载模型到指定目录 + +### 4. 翻译不工作 + +**问题**: 翻译返回原文 + +**解决**: +- 检查DEEPL_AUTH_KEY是否正确 +- 如果没有API key,系统会使用mock模式(返回原文) + +### 5. 前端无法连接后端 + +**问题**: CORS错误 + +**解决**: +- 确保后端在 http://localhost:8000 运行 +- 检查浏览器控制台错误信息 + +--- + +## 开发和调试 + +### 查看日志 + +```bash +# 后端日志 +tail -f logs/backend.log + +# 实时日志(如果前台运行) +./scripts/start_backend.sh +``` + +### Python命令行测试 + +```bash +# 激活环境 +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +# 测试搜索 +python -c " +from config import ConfigLoader +from utils import ESClient +from search import Searcher +from config.env_config import get_es_config + +config_loader = ConfigLoader('config/schema') +config = config_loader.load_customer_config('customer1') + +es_config = get_es_config() +es_client = ESClient(hosts=[es_config['host']], + username=es_config.get('username'), + password=es_config.get('password')) + +searcher = Searcher(config, es_client) +result = searcher.search('芭比娃娃', size=5) + +print(f'找到 {result.total} 个结果') +for hit in result.hits: + print(f' - {hit[\"_source\"][\"name\"]} (分数: {hit[\"_score\"]:.4f})') +" +``` + +### 重新导入数据 + +```bash +# 删除现有索引并重新导入 +./scripts/ingest.sh 1000 true +``` + +--- + +## 性能优化 + +### 1. 使用embedding缓存 + +首次生成embedding后会自动缓存到 `.cache/` 目录,后续导入会更快。 + +### 2. 批量大小调整 + +```bash +# 修改批量大小(在ingest_customer1.py中) +--batch-size 200 # 默认100 +``` + +### 3. GPU加速 + +确保CUDA可用以加速embedding生成: +```bash +python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" +``` + +--- + +## 项目结构 + +``` +SearchEngine/ +├── .env # 环境配置 +├── setup.sh # 环境设置脚本 +├── start_all.sh # 一键启动脚本 +├── scripts/ # 运行脚本 +│ ├── ingest.sh # 数据导入 +│ ├── start_backend.sh # 启动后端 +│ └── start_frontend.sh # 启动前端 +├── frontend/ # Web前端 +│ ├── index.html +│ └── static/ +├── logs/ # 日志文件 +├── config/ # 配置模块 +├── indexer/ # 数据导入 +├── query/ # 查询处理 +├── search/ # 搜索引擎 +├── embeddings/ # 向量模型 +└── api/ # REST API +``` + +--- + +## 支持 + +遇到问题请查看: +- **日志**: `logs/backend.log` +- **API文档**: http://localhost:8000/docs +- **配置**: `config/schema/customer1_config.yaml` diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..9bc8ca8 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,59 @@ + + + + + + 电商搜索引擎 - SearchEngine Demo + + + +
+
+

🔍 电商搜索引擎

+

E-Commerce Search Engine - Customer1 Demo

+
+ +
+ + +
+ + + + +
+ +
+ 搜索示例: + + + + +
+
+ + + +
+ +
+
+ +
+

SearchEngine © 2025 | API服务地址: http://localhost:8000

+
+ + + + diff --git a/frontend/static/css/style.css b/frontend/static/css/style.css new file mode 100644 index 0000000..d3abda7 --- /dev/null +++ b/frontend/static/css/style.css @@ -0,0 +1,282 @@ +/* SearchEngine Frontend Styles */ + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Microsoft YaHei", sans-serif; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + min-height: 100vh; + padding: 20px; +} + +.container { + max-width: 1200px; + margin: 0 auto; +} + +header { + text-align: center; + color: white; + margin-bottom: 40px; +} + +header h1 { + font-size: 3em; + margin-bottom: 10px; +} + +.subtitle { + font-size: 1.2em; + opacity: 0.9; +} + +.search-section { + background: white; + border-radius: 15px; + padding: 30px; + box-shadow: 0 10px 40px rgba(0,0,0,0.2); + margin-bottom: 30px; +} + +.search-box { + display: flex; + gap: 10px; + margin-bottom: 20px; +} + +#searchInput { + flex: 1; + padding: 15px 20px; + font-size: 16px; + border: 2px solid #e0e0e0; + border-radius: 10px; + transition: border-color 0.3s; +} + +#searchInput:focus { + outline: none; + border-color: #667eea; +} + +.search-button { + padding: 15px 40px; + font-size: 16px; + font-weight: bold; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + border: none; + border-radius: 10px; + cursor: pointer; + transition: transform 0.2s; +} + +.search-button:hover { + transform: translateY(-2px); +} + +.search-options { + display: flex; + gap: 20px; + align-items: center; + margin-bottom: 15px; + flex-wrap: wrap; +} + +.search-options label { + display: flex; + align-items: center; + gap: 5px; + cursor: pointer; +} + +.search-options select { + padding: 5px 10px; + border: 2px solid #e0e0e0; + border-radius: 5px; + font-size: 14px; +} + +.search-examples { + padding: 15px; + background: #f5f5f5; + border-radius: 10px; +} + +.example-btn { + padding: 8px 15px; + margin: 5px; + background: white; + border: 1px solid #ddd; + border-radius: 5px; + cursor: pointer; + transition: all 0.2s; +} + +.example-btn:hover { + background: #667eea; + color: white; + border-color: #667eea; +} + +.loading { + text-align: center; + padding: 40px; + color: white; +} + +.spinner { + width: 50px; + height: 50px; + margin: 0 auto 20px; + border: 4px solid rgba(255,255,255,0.3); + border-top-color: white; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +.results-section { + background: white; + border-radius: 15px; + padding: 30px; + box-shadow: 0 10px 40px rgba(0,0,0,0.2); +} + +.results-header { + margin-bottom: 20px; + padding-bottom: 15px; + border-bottom: 2px solid #e0e0e0; +} + +.results-header h2 { + color: #333; + margin-bottom: 10px; +} + +.results-stats { + color: #666; + font-size: 14px; +} + +.result-item { + padding: 20px; + margin-bottom: 15px; + border: 1px solid #e0e0e0; + border-radius: 10px; + transition: all 0.3s; +} + +.result-item:hover { + box-shadow: 0 5px 15px rgba(0,0,0,0.1); + border-color: #667eea; +} + +.result-header { + display: flex; + justify-content: space-between; + align-items: start; + margin-bottom: 10px; +} + +.result-title { + font-size: 18px; + font-weight: bold; + color: #333; + margin-bottom: 5px; +} + +.result-score { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + padding: 5px 12px; + border-radius: 20px; + font-size: 12px; + font-weight: bold; +} + +.result-meta { + display: flex; + gap: 15px; + flex-wrap: wrap; + font-size: 14px; + color: #666; + margin-bottom: 10px; +} + +.result-meta span { + background: #f5f5f5; + padding: 4px 10px; + border-radius: 5px; +} + +.result-image { + max-width: 150px; + max-height: 150px; + border-radius: 8px; + margin-top: 10px; +} + +.query-info { + background: white; + border-radius: 15px; + padding: 20px; + margin-top: 20px; + box-shadow: 0 10px 40px rgba(0,0,0,0.2); +} + +.query-info h3 { + color: #333; + margin-bottom: 15px; +} + +.info-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 15px; +} + +.info-item { + padding: 15px; + background: #f5f5f5; + border-radius: 8px; +} + +.info-item strong { + display: block; + color: #667eea; + margin-bottom: 5px; +} + +footer { + text-align: center; + color: white; + margin-top: 40px; + padding: 20px; + opacity: 0.8; +} + +.error-message { + background: #ff4444; + color: white; + padding: 20px; + border-radius: 10px; + margin-bottom: 20px; +} + +.no-results { + text-align: center; + padding: 40px; + color: #666; +} + +.no-results h3 { + font-size: 24px; + margin-bottom: 10px; +} diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js new file mode 100644 index 0000000..3af062d --- /dev/null +++ b/frontend/static/js/app.js @@ -0,0 +1,237 @@ +// SearchEngine Frontend JavaScript + +// API endpoint +const API_BASE_URL = 'http://localhost:8000'; + +// Update API URL display +document.getElementById('apiUrl').textContent = API_BASE_URL; + +// Handle Enter key in search input +function handleKeyPress(event) { + if (event.key === 'Enter') { + performSearch(); + } +} + +// Set query from example buttons +function setQuery(query) { + document.getElementById('searchInput').value = query; + performSearch(); +} + +// Perform search +async function performSearch() { + const query = document.getElementById('searchInput').value.trim(); + + if (!query) { + alert('请输入搜索关键词'); + return; + } + + // Get options + const size = parseInt(document.getElementById('resultSize').value); + const enableTranslation = document.getElementById('enableTranslation').checked; + const enableEmbedding = document.getElementById('enableEmbedding').checked; + const enableRerank = document.getElementById('enableRerank').checked; + + // Show loading + document.getElementById('loading').style.display = 'block'; + document.getElementById('results').innerHTML = ''; + document.getElementById('queryInfo').innerHTML = ''; + + try { + const response = await fetch(`${API_BASE_URL}/search/`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: query, + size: size, + enable_translation: enableTranslation, + enable_embedding: enableEmbedding, + enable_rerank: enableRerank + }) + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const data = await response.json(); + displayResults(data); + displayQueryInfo(data.query_info); + + } catch (error) { + console.error('Search error:', error); + document.getElementById('results').innerHTML = ` +
+ 搜索出错: ${error.message} +

+ 请确保后端服务正在运行 (http://localhost:8000) +
+ `; + } finally { + document.getElementById('loading').style.display = 'none'; + } +} + +// Display search results +function displayResults(data) { + const resultsDiv = document.getElementById('results'); + + if (!data.hits || data.hits.length === 0) { + resultsDiv.innerHTML = ` +
+

😔 没有找到结果

+

请尝试其他关键词

+
+ `; + return; + } + + let html = ` +
+

搜索结果

+
+ 找到 ${data.total} 个结果, + 耗时 ${data.took_ms} 毫秒, + 最高分 ${data.max_score.toFixed(4)} +
+
+ `; + + data.hits.forEach((hit, index) => { + const source = hit._source; + const score = hit._custom_score || hit._score; + + html += ` +
+
+
+
${index + 1}. ${escapeHtml(source.name || 'N/A')}
+ ${source.enSpuName ? `
${escapeHtml(source.enSpuName)}
` : ''} + ${source.ruSkuName ? `
${escapeHtml(source.ruSkuName)}
` : ''} +
+
+ ${score.toFixed(4)} +
+
+ +
+ ${source.categoryName ? `📁 ${escapeHtml(source.categoryName)}` : ''} + ${source.brandName ? `🏷️ ${escapeHtml(source.brandName)}` : ''} + ${source.supplierName ? `🏭 ${escapeHtml(source.supplierName)}` : ''} + ${source.create_time ? `📅 ${formatDate(source.create_time)}` : ''} +
+ + ${source.imageUrl ? ` + ${escapeHtml(source.name)} + ` : ''} + +
+ ID: ${source.skuId || 'N/A'} +
+
+ `; + }); + + resultsDiv.innerHTML = html; +} + +// Display query processing information +function displayQueryInfo(queryInfo) { + if (!queryInfo) return; + + const queryInfoDiv = document.getElementById('queryInfo'); + + let html = ` +

查询处理信息

+
+
+ 原始查询 + ${escapeHtml(queryInfo.original_query || 'N/A')} +
+
+ 重写后查询 + ${escapeHtml(queryInfo.rewritten_query || 'N/A')} +
+
+ 检测语言 + ${getLanguageName(queryInfo.detected_language)} +
+
+ 查询域 + ${escapeHtml(queryInfo.domain || 'default')} +
+
+ `; + + // Show translations if any + if (queryInfo.translations && Object.keys(queryInfo.translations).length > 0) { + html += '

翻译结果

'; + for (const [lang, translation] of Object.entries(queryInfo.translations)) { + if (translation) { + html += ` +
+ ${getLanguageName(lang)} + ${escapeHtml(translation)} +
+ `; + } + } + html += '
'; + } + + // Show embedding info + if (queryInfo.has_vector) { + html += ` +
+ ✓ 使用了语义向量搜索 +
+ `; + } + + queryInfoDiv.innerHTML = html; +} + +// Helper functions +function escapeHtml(text) { + if (!text) return ''; + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; +} + +function formatDate(dateStr) { + try { + const date = new Date(dateStr); + return date.toLocaleDateString('zh-CN'); + } catch { + return dateStr; + } +} + +function getLanguageName(code) { + const names = { + 'zh': '中文', + 'en': 'English', + 'ru': 'Русский', + 'ar': 'العربية', + 'ja': '日本語', + 'unknown': '未知' + }; + return names[code] || code; +} + +// Initialize page +document.addEventListener('DOMContentLoaded', function() { + console.log('SearchEngine Frontend loaded'); + console.log('API Base URL:', API_BASE_URL); + + // Focus on search input + document.getElementById('searchInput').focus(); +}); diff --git a/scripts/frontend_server.py b/scripts/frontend_server.py new file mode 100755 index 0000000..fc8bafa --- /dev/null +++ b/scripts/frontend_server.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +""" +Simple HTTP server for SearchEngine frontend. +""" + +import http.server +import socketserver +import os +import sys + +# Change to frontend directory +frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend') +os.chdir(frontend_dir) + +PORT = 8080 + +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): + """Custom request handler with CORS support.""" + + def end_headers(self): + # Add CORS headers + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') + self.send_header('Access-Control-Allow-Headers', 'Content-Type') + super().end_headers() + + def do_OPTIONS(self): + self.send_response(200) + self.end_headers() + +if __name__ == '__main__': + with socketserver.TCPServer(("", PORT), MyHTTPRequestHandler) as httpd: + print(f"Frontend server started at http://localhost:{PORT}") + print(f"Serving files from: {os.getcwd()}") + print("\nPress Ctrl+C to stop the server") + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nServer stopped") + sys.exit(0) diff --git a/scripts/ingest.sh b/scripts/ingest.sh new file mode 100755 index 0000000..f048ed0 --- /dev/null +++ b/scripts/ingest.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Data Ingestion Script for Customer1 + +set -e + +cd "$(dirname "$0")/.." +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}Customer1 Data Ingestion${NC}" +echo -e "${GREEN}========================================${NC}" + +# Default values +LIMIT=${1:-1000} +SKIP_EMBEDDINGS=${2:-false} + +echo -e "\n${YELLOW}Configuration:${NC}" +echo " Limit: $LIMIT documents" +echo " Skip embeddings: $SKIP_EMBEDDINGS" + +CSV_FILE="data/customer1/goods_with_pic.5years_congku.csv.shuf.1w" + +if [ ! -f "$CSV_FILE" ]; then + echo "Error: CSV file not found: $CSV_FILE" + exit 1 +fi + +# Build command +CMD="python data/customer1/ingest_customer1.py \ + --csv $CSV_FILE \ + --limit $LIMIT \ + --recreate-index \ + --batch-size 100" + +if [ "$SKIP_EMBEDDINGS" = "true" ]; then + CMD="$CMD --skip-embeddings" +fi + +echo -e "\n${YELLOW}Starting ingestion...${NC}" +eval $CMD + +echo -e "\n${GREEN}========================================${NC}" +echo -e "${GREEN}Ingestion Complete!${NC}" +echo -e "${GREEN}========================================${NC}" diff --git a/scripts/start_backend.sh b/scripts/start_backend.sh new file mode 100755 index 0000000..416ae51 --- /dev/null +++ b/scripts/start_backend.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Start Backend API Service + +set -e + +cd "$(dirname "$0")/.." +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}Starting Backend API Service${NC}" +echo -e "${GREEN}========================================${NC}" + +# Load config +source .env + +echo -e "\n${YELLOW}Configuration:${NC}" +echo " Customer: $CUSTOMER_ID" +echo " API Host: $API_HOST" +echo " API Port: $API_PORT" +echo " ES Host: $ES_HOST" + +echo -e "\n${YELLOW}Starting service...${NC}" +python -m api.app \ + --host $API_HOST \ + --port $API_PORT \ + --customer $CUSTOMER_ID \ + --es-host $ES_HOST + diff --git a/scripts/start_frontend.sh b/scripts/start_frontend.sh new file mode 100755 index 0000000..c596b86 --- /dev/null +++ b/scripts/start_frontend.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Start Frontend Server + +set -e + +cd "$(dirname "$0")/.." +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}Starting Frontend Server${NC}" +echo -e "${GREEN}========================================${NC}" + +PORT=8080 + +echo -e "\n${YELLOW}Frontend will be available at:${NC}" +echo -e " ${GREEN}http://localhost:$PORT${NC}" +echo "" +echo -e "${YELLOW}Make sure the backend API is running at:${NC}" +echo -e " ${GREEN}http://localhost:8000${NC}" +echo "" + +python scripts/frontend_server.py diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..d310273 --- /dev/null +++ b/setup.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# SearchEngine Setup and Startup Script +# This script sets up the environment and starts all services + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}SearchEngine Setup Script${NC}" +echo -e "${GREEN}========================================${NC}" + +# Change to project directory +cd "$(dirname "$0")" +PROJECT_ROOT=$(pwd) + +echo -e "\n${YELLOW}Step 1: Setting up Conda environment${NC}" +# Check if conda is available +if ! command -v conda &> /dev/null; then + echo -e "${RED}Error: conda not found. Please install Miniconda or Anaconda${NC}" + exit 1 +fi + +# Source conda +source /home/tw/miniconda3/etc/profile.d/conda.sh + +# Check if environment exists +if conda env list | grep -q "searchengine"; then + echo -e "${GREEN}Environment 'searchengine' already exists${NC}" + conda activate searchengine +else + echo -e "${YELLOW}Creating conda environment 'searchengine'...${NC}" + conda env create -f environment.yml + conda activate searchengine + echo -e "${GREEN}Environment created successfully!${NC}" +fi + +# Verify environment +echo -e "\n${YELLOW}Current Python version:${NC}" +python --version + +echo -e "\n${YELLOW}Step 2: Loading configuration${NC}" +# Check if .env exists +if [ ! -f ".env" ]; then + echo -e "${YELLOW}Creating .env from .env.example...${NC}" + cp .env.example .env + echo -e "${GREEN}.env file created. Please update it with your actual configuration.${NC}" +fi + +# Display configuration +echo -e "${GREEN}Configuration loaded:${NC}" +python -c "from config.env_config import print_config; print_config()" + +echo -e "\n${YELLOW}Step 3: Checking Elasticsearch connection${NC}" +python -c " +from config.env_config import get_es_config +from utils.es_client import ESClient +es_config = get_es_config() +client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) +if client.ping(): + print('✓ Elasticsearch is reachable') +else: + print('✗ Elasticsearch connection failed') + exit(1) +" + +echo -e "\n${GREEN}========================================${NC}" +echo -e "${GREEN}Setup Complete!${NC}" +echo -e "${GREEN}========================================${NC}" +echo "" +echo -e "Next steps:" +echo -e " 1. Ingest data: ${YELLOW}./scripts/ingest.sh${NC}" +echo -e " 2. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}" +echo -e " 3. Start frontend: ${YELLOW}./scripts/start_frontend.sh${NC}" +echo "" diff --git a/start_all.sh b/start_all.sh new file mode 100755 index 0000000..d6a7166 --- /dev/null +++ b/start_all.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# One-click startup script for SearchEngine +# This script starts everything you need + +set -e + +cd "$(dirname "$0")" + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}SearchEngine一键启动脚本${NC}" +echo -e "${GREEN}========================================${NC}" + +# Step 1: Setup environment +echo -e "\n${YELLOW}Step 1/4: 设置环境${NC}" +./setup.sh + +# Step 2: Check if data is already ingested +echo -e "\n${YELLOW}Step 2/4: 检查数据${NC}" +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +# Check if index exists +INDEX_EXISTS=$(python -c " +from config.env_config import get_es_config +from utils.es_client import ESClient +from config import ConfigLoader + +es_config = get_es_config() +es_client = ESClient(hosts=[es_config['host']], username=es_config.get('username'), password=es_config.get('password')) + +config_loader = ConfigLoader('config/schema') +config = config_loader.load_customer_config('customer1') + +if es_client.index_exists(config.es_index_name): + doc_count = es_client.count(config.es_index_name) + print(f'{doc_count}') +else: + print('0') +" 2>/dev/null || echo "0") + +if [ "$INDEX_EXISTS" = "0" ]; then + echo -e "${YELLOW}索引不存在,开始导入数据...${NC}" + echo -e "${YELLOW}注意: 首次导入会下载模型文件,可能需要10-30分钟${NC}" + echo -e "${YELLOW}导入1000条数据进行快速测试(跳过embedding以加快速度)${NC}" + ./scripts/ingest.sh 1000 true +else + echo -e "${GREEN}数据已存在,包含 $INDEX_EXISTS 条文档${NC}" +fi + +# Step 3: Start backend in background +echo -e "\n${YELLOW}Step 3/4: 启动后端服务${NC}" +echo -e "${YELLOW}后端服务将在后台运行...${NC}" +nohup ./scripts/start_backend.sh > logs/backend.log 2>&1 & +BACKEND_PID=$! +echo $BACKEND_PID > logs/backend.pid +echo -e "${GREEN}后端服务已启动 (PID: $BACKEND_PID)${NC}" +echo -e "${GREEN}日志文件: logs/backend.log${NC}" + +# Wait for backend to start +echo -e "${YELLOW}等待后端服务启动...${NC}" +sleep 5 + +# Check if backend is running +if curl -s http://localhost:8000/admin/health > /dev/null 2>&1; then + echo -e "${GREEN}✓ 后端服务运行正常${NC}" +else + echo -e "${RED}✗ 后端服务启动失败,请检查日志: logs/backend.log${NC}" + exit 1 +fi + +# Step 4: Start frontend +echo -e "\n${YELLOW}Step 4/4: 启动前端服务${NC}" +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}所有服务启动完成!${NC}" +echo -e "${GREEN}========================================${NC}" +echo "" +echo -e "访问地址:" +echo -e " ${GREEN}前端界面: http://localhost:8080${NC}" +echo -e " ${GREEN}后端API: http://localhost:8000${NC}" +echo -e " ${GREEN}API文档: http://localhost:8000/docs${NC}" +echo "" +echo -e "日志文件:" +echo -e " 后端: logs/backend.log" +echo "" +echo -e "停止服务:" +echo -e " 后端: kill \$(cat logs/backend.pid)" +echo -e " 前端: Ctrl+C" +echo "" +echo -e "${YELLOW}正在启动前端服务...${NC}" +echo -e "${YELLOW}按 Ctrl+C 停止前端服务${NC}" +echo "" + +./scripts/start_frontend.sh -- libgit2 0.21.2