Commit d1d356f88c7ffd1dbaec67f432b2a4f44344e4ab

Authored by tangwang
1 parent 3b84605d

脚本优化

@@ -17,6 +17,21 @@ DEEPL_AUTH_KEY= @@ -17,6 +17,21 @@ DEEPL_AUTH_KEY=
17 # API Service Configuration 17 # API Service Configuration
18 API_HOST=0.0.0.0 18 API_HOST=0.0.0.0
19 API_PORT=6002 19 API_PORT=6002
  20 +INDEXER_HOST=0.0.0.0
  21 +INDEXER_PORT=6004
  22 +
  23 +# Optional service ports
  24 +EMBEDDING_PORT=6005
  25 +TRANSLATION_PORT=6006
  26 +RERANKER_PORT=6007
  27 +EMBEDDING_SERVICE_URL=http://127.0.0.1:6005
  28 +TRANSLATION_SERVICE_URL=http://127.0.0.1:6006
  29 +RERANKER_SERVICE_URL=http://127.0.0.1:6007/rerank
  30 +
  31 +# Optional startup switches (run.sh / scripts/service_ctl.sh)
  32 +START_EMBEDDING=0
  33 +START_TRANSLATOR=0
  34 +START_RERANKER=0
20 35
21 # Embedding Models 36 # Embedding Models
22 TEXT_MODEL_DIR=/data/tw/models/bge-m3 # 已经改为web请求了,不使用本地模型 37 TEXT_MODEL_DIR=/data/tw/models/bge-m3 # 已经改为web请求了,不使用本地模型
@@ -154,7 +154,7 @@ python scripts/recreate_and_import.py \ @@ -154,7 +154,7 @@ python scripts/recreate_and_import.py \
154 | 步骤 | 去哪里看 | 摘要 | 154 | 步骤 | 去哪里看 | 摘要 |
155 |------|---------|------| 155 |------|---------|------|
156 | 1. 准备环境 | `docs/环境配置说明.md` / `Usage-Guide.md` | Conda、`activate.sh`、依赖、ES/MySQL、`.env` | 156 | 1. 准备环境 | `docs/环境配置说明.md` / `Usage-Guide.md` | Conda、`activate.sh`、依赖、ES/MySQL、`.env` |
157 -| 2. 构造测试数据 | `测试数据指南.md` | Tenant1 Mock、Tenant2 CSV、`mock_data.sh` / `ingest.sh` | 157 +| 2. 构造测试数据 | `测试数据指南.md` | Tenant1 Mock、Tenant2 CSV、`mock_data.sh` |
158 | 3. 启动与验证 | `Usage-Guide.md` | `run.sh` 一键启动、分步脚本、日志与健康检查 | 158 | 3. 启动与验证 | `Usage-Guide.md` | `run.sh` 一键启动、分步脚本、日志与健康检查 |
159 | 4. 理解架构 | `系统设计文档.md` | 数据流、配置系统、查询/搜索/索引模块 | 159 | 4. 理解架构 | `系统设计文档.md` | 数据流、配置系统、查询/搜索/索引模块 |
160 | 5. 接入搜索 API | `搜索API对接指南.md` / `搜索API速查表.md` | REST 端点、参数、响应、最佳实践 | 160 | 5. 接入搜索 API | `搜索API对接指南.md` / `搜索API速查表.md` | REST 端点、参数、响应、最佳实践 |
@@ -173,19 +173,21 @@ docker run -d --name es -p 9200:9200 elasticsearch:8.11.0 @@ -173,19 +173,21 @@ docker run -d --name es -p 9200:9200 elasticsearch:8.11.0
173 # 2. 构造测试数据并导入 MySQL 173 # 2. 构造测试数据并导入 MySQL
174 ./scripts/mock_data.sh # 详见 TEST_DATA_GUIDE.md 174 ./scripts/mock_data.sh # 详见 TEST_DATA_GUIDE.md
175 175
176 -# 3. 从 MySQL 注入到 Elasticsearch  
177 -./scripts/ingest.sh 1 true  
178 -./scripts/ingest.sh 2 true 176 +# 3. 创建租户索引结构并导入数据(推荐)
  177 +./scripts/create_tenant_index.sh 162
  178 +curl -X POST "http://localhost:6004/indexer/reindex" \
  179 + -H "Content-Type: application/json" \
  180 + -d '{"tenant_id":"162","batch_size":500}'
179 181
180 -# 4. 启动服务 182 +# 4. 启动核心服务(backend/indexer/frontend)
181 ./run.sh 183 ./run.sh
182 184
183 -# (可选)启动本地向量服务(BGE-M3 / CN-CLIP,本地模型推理)  
184 -# 提供: POST http://localhost:6005/embed/text  
185 -# POST http://localhost:6005/embed/image  
186 -./scripts/start_embedding_service.sh 185 +# (可选)附加启动 embedding / translator / reranker
  186 +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
187 # 187 #
188 -# 详细说明见:`embeddings/README.md` 188 +# 查看服务状态 / 停止
  189 +./scripts/service_ctl.sh status
  190 +./scripts/stop.sh
189 191
190 # 5. 调用文本搜索 API 192 # 5. 调用文本搜索 API
191 curl -X POST http://localhost:6002/search/ \ 193 curl -X POST http://localhost:6002/search/ \
@@ -199,6 +201,7 @@ curl -X POST http://localhost:6002/search/ \ @@ -199,6 +201,7 @@ curl -X POST http://localhost:6002/search/ \
199 | 文档 | 内容提要 | 适用场景 | 201 | 文档 | 内容提要 | 适用场景 |
200 |------|----------|----------| 202 |------|----------|----------|
201 | `docs/环境配置说明.md` | 系统要求、`activate.sh`、Conda/依赖、外部服务、CONDA_ROOT | 首次部署、新机器环境 | 203 | `docs/环境配置说明.md` | 系统要求、`activate.sh`、Conda/依赖、外部服务、CONDA_ROOT | 首次部署、新机器环境 |
  204 +| `docs/SERVICE_MATRIX.md` | 服务分层、端口、统一启动/停止入口 | 运维值守、联调启动 |
202 | `Usage-Guide.md` | 环境准备、服务启动、配置、日志、验证手册 | 日常运维、调试 | 205 | `Usage-Guide.md` | 环境准备、服务启动、配置、日志、验证手册 | 日常运维、调试 |
203 | `基础配置指南.md` | 统一硬编码配置说明、索引结构、查询配置 | 了解系统配置、修改配置 | 206 | `基础配置指南.md` | 统一硬编码配置说明、索引结构、查询配置 | 了解系统配置、修改配置 |
204 | `测试数据指南.md` | 两个租户的模拟/CSV 数据构造 & MySQL→ES 流程 | 数据准备、联调 | 207 | `测试数据指南.md` | 两个租户的模拟/CSV 数据构造 & MySQL→ES 流程 | 数据准备、联调 |
@@ -220,7 +223,7 @@ curl -X POST http://localhost:6002/search/ \ @@ -220,7 +223,7 @@ curl -X POST http://localhost:6002/search/ \
220 223
221 - **数据构建 → MySQL → Elasticsearch** 224 - **数据构建 → MySQL → Elasticsearch**
222 - `scripts/mock_data.sh`:Tenant1 Mock + Tenant2 CSV 一条龙 225 - `scripts/mock_data.sh`:Tenant1 Mock + Tenant2 CSV 一条龙
223 - - `scripts/ingest.sh <tenant_id> [recreate]`:驱动 `indexer/` 模块写入 `search_products` 226 + - `scripts/create_tenant_index.sh <tenant_id>` + `POST /indexer/reindex`:推荐导入链路
224 - 详解:`测试数据指南.md` 227 - 详解:`测试数据指南.md`
225 228
226 - **索引富化 & Java 对接** 229 - **索引富化 & Java 对接**
config/env_config.py
@@ -55,10 +55,26 @@ DASHSCOPE_API_KEY = os.getenv(&#39;DASHSCOPE_API_KEY&#39;) @@ -55,10 +55,26 @@ DASHSCOPE_API_KEY = os.getenv(&#39;DASHSCOPE_API_KEY&#39;)
55 # API Service Configuration 55 # API Service Configuration
56 API_HOST = os.getenv('API_HOST', '0.0.0.0') 56 API_HOST = os.getenv('API_HOST', '0.0.0.0')
57 API_PORT = int(os.getenv('API_PORT', 6002)) 57 API_PORT = int(os.getenv('API_PORT', 6002))
  58 +# Indexer service
  59 +INDEXER_HOST = os.getenv('INDEXER_HOST', '0.0.0.0')
  60 +INDEXER_PORT = int(os.getenv('INDEXER_PORT', 6004))
  61 +# Optional dependent services
  62 +EMBEDDING_HOST = os.getenv('EMBEDDING_HOST', '127.0.0.1')
  63 +EMBEDDING_PORT = int(os.getenv('EMBEDDING_PORT', 6005))
  64 +TRANSLATION_HOST = os.getenv('TRANSLATION_HOST', '127.0.0.1')
  65 +TRANSLATION_PORT = int(os.getenv('TRANSLATION_PORT', os.getenv('TRANSLATOR_PORT', 6006)))
  66 +RERANKER_HOST = os.getenv('RERANKER_HOST', '127.0.0.1')
  67 +RERANKER_PORT = int(os.getenv('RERANKER_PORT', 6007))
58 # API_BASE_URL: 如果未设置,根据API_HOST构建(0.0.0.0使用localhost) 68 # API_BASE_URL: 如果未设置,根据API_HOST构建(0.0.0.0使用localhost)
59 API_BASE_URL = os.getenv('API_BASE_URL') 69 API_BASE_URL = os.getenv('API_BASE_URL')
60 if not API_BASE_URL: 70 if not API_BASE_URL:
61 API_BASE_URL = f'http://localhost:{API_PORT}' if API_HOST == '0.0.0.0' else f'http://{API_HOST}:{API_PORT}' 71 API_BASE_URL = f'http://localhost:{API_PORT}' if API_HOST == '0.0.0.0' else f'http://{API_HOST}:{API_PORT}'
  72 +INDEXER_BASE_URL = os.getenv('INDEXER_BASE_URL') or (
  73 + f'http://localhost:{INDEXER_PORT}' if INDEXER_HOST == '0.0.0.0' else f'http://{INDEXER_HOST}:{INDEXER_PORT}'
  74 +)
  75 +EMBEDDING_SERVICE_URL = os.getenv('EMBEDDING_SERVICE_URL') or f'http://{EMBEDDING_HOST}:{EMBEDDING_PORT}'
  76 +TRANSLATION_SERVICE_URL = os.getenv('TRANSLATION_SERVICE_URL') or f'http://{TRANSLATION_HOST}:{TRANSLATION_PORT}'
  77 +RERANKER_SERVICE_URL = os.getenv('RERANKER_SERVICE_URL') or f'http://{RERANKER_HOST}:{RERANKER_PORT}/rerank'
62 78
63 # Model Directories 79 # Model Directories
64 TEXT_MODEL_DIR = os.getenv('TEXT_MODEL_DIR', '/data/tw/models/bge-m3') 80 TEXT_MODEL_DIR = os.getenv('TEXT_MODEL_DIR', '/data/tw/models/bge-m3')
@@ -244,7 +244,7 @@ http://&lt;kibana-server-ip&gt;:5601 @@ -244,7 +244,7 @@ http://&lt;kibana-server-ip&gt;:5601
244 - `ES_HOST=http://127.0.0.1:9200` 244 - `ES_HOST=http://127.0.0.1:9200`
245 - 按你项目已有脚本执行索引初始化和数据导入(在 9.3 上推荐使用 bfloat16 版 mapping): 245 - 按你项目已有脚本执行索引初始化和数据导入(在 9.3 上推荐使用 bfloat16 版 mapping):
246 - 创建索引:使用 `mappings/search_products.json`(bfloat16 版本) 246 - 创建索引:使用 `mappings/search_products.json`(bfloat16 版本)
247 - - 执行你现有的数据导入脚本:`./scripts/ingest.sh <tenant_id> true` 或 `python main.py ingest ...` 247 + - 推荐导入流程:`./scripts/create_tenant_index.sh <tenant_id>` + `POST /indexer/reindex`
248 248
249 --- 249 ---
250 250
docs/SERVICE_MATRIX.md 0 → 100644
@@ -0,0 +1,57 @@ @@ -0,0 +1,57 @@
  1 +# 服务矩阵(Service Matrix)
  2 +
  3 +本文档定义当前项目的服务分层、默认启动策略与脚本入口。
  4 +
  5 +## 1. 服务分层
  6 +
  7 +| 服务 | 角色 | 默认端口 | 是否默认启动 | 启动脚本 | 停止方式 |
  8 +|---|---|---:|---|---|---|
  9 +| backend | 核心搜索 API | 6002 | 是 | `scripts/start_backend.sh` | `scripts/service_ctl.sh stop backend` |
  10 +| indexer | 核心索引 API | 6004 | 是 | `scripts/start_indexer.sh` | `scripts/service_ctl.sh stop indexer` |
  11 +| frontend | 调试 UI | 6003 | 是 | `scripts/start_frontend.sh` | `scripts/service_ctl.sh stop frontend` |
  12 +| embedding | 向量服务(文本/图片) | 6005 | 否(按需) | `scripts/start_embedding_service.sh` | `scripts/service_ctl.sh stop embedding` |
  13 +| translator | 翻译服务(qwen/deepl) | 6006 | 否(按需) | `scripts/start_translator.sh` | `scripts/service_ctl.sh stop translator` |
  14 +| reranker | 重排服务(BGE) | 6007 | 否(按需) | `scripts/start_reranker.sh` | `scripts/service_ctl.sh stop reranker` |
  15 +| clip | CLIP 替代服务(legacy/可选) | 51000 | 否(按需) | `scripts/start_clip_service.sh` | `scripts/service_ctl.sh stop clip` |
  16 +| cnclip | CN-CLIP gRPC 服务(legacy/可选) | 51000 | 否(按需) | `scripts/start_cnclip_service.sh` | `scripts/service_ctl.sh stop cnclip` |
  17 +
  18 +> 说明:`clip` 与 `cnclip` 都是 legacy 服务,脚本内部自带后台化与 PID 管理,`service_ctl.sh` 仅做编排与委托。
  19 +
  20 +## 2. 统一控制入口
  21 +
  22 +- 推荐统一入口:`scripts/service_ctl.sh`
  23 +- 支持命令:`start` / `stop` / `restart` / `status`
  24 +
  25 +示例:
  26 +
  27 +```bash
  28 +# 启动核心服务(backend/indexer/frontend)
  29 +./scripts/service_ctl.sh start
  30 +
  31 +# 启动指定服务
  32 +./scripts/service_ctl.sh start backend indexer frontend translator reranker
  33 +
  34 +# 查看所有服务状态
  35 +./scripts/service_ctl.sh status
  36 +
  37 +# 停止全部已知服务
  38 +./scripts/service_ctl.sh stop
  39 +```
  40 +
  41 +## 3. 默认与可选服务策略
  42 +
  43 +- `./run.sh` 默认只启动核心服务:`backend/indexer/frontend`
  44 +- 如需启动可选能力,使用环境变量:
  45 +
  46 +```bash
  47 +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
  48 +```
  49 +
  50 +## 4. 兼容入口
  51 +
  52 +以下脚本仍保留,用于兼容旧习惯,但内部已委托到统一控制脚本:
  53 +
  54 +- `run.sh`
  55 +- `restart.sh`
  56 +- `scripts/start.sh`
  57 +- `scripts/stop.sh`
docs/Usage-Guide.md
@@ -112,16 +112,42 @@ cd /data/saas-search @@ -112,16 +112,42 @@ cd /data/saas-search
112 112
113 这个脚本会自动: 113 这个脚本会自动:
114 1. 创建日志目录 114 1. 创建日志目录
115 -2. 启动后端API服务(后台运行)  
116 -3. 启动前端Web界面(后台运行)  
117 -4. 等待服务就绪 115 +2. 启动核心服务(backend/indexer/frontend)
  116 +3. 写入 PID 到 `logs/*.pid`
  117 +4. 执行健康检查
118 118
119 启动完成后,访问: 119 启动完成后,访问:
120 - **前端界面**: http://localhost:6003 120 - **前端界面**: http://localhost:6003
121 - **后端API**: http://localhost:6002 121 - **后端API**: http://localhost:6002
122 - **API文档**: http://localhost:6002/docs 122 - **API文档**: http://localhost:6002/docs
  123 +- **索引API**: http://localhost:6004/docs
123 124
124 -### 方式2: 分步启动(单环境) 125 +可选:全功能模式(同时启动 embedding/translator/reranker):
  126 +
  127 +```bash
  128 +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
  129 +```
  130 +
  131 +### 方式2: 统一控制脚本(推荐)
  132 +
  133 +```bash
  134 +# 查看状态
  135 +./scripts/service_ctl.sh status
  136 +
  137 +# 启动核心服务(默认)
  138 +./scripts/service_ctl.sh start
  139 +
  140 +# 启动指定服务
  141 +./scripts/service_ctl.sh start backend indexer frontend translator reranker
  142 +
  143 +# 停止全部服务(含可选服务)
  144 +./scripts/service_ctl.sh stop
  145 +
  146 +# 重启
  147 +./scripts/service_ctl.sh restart
  148 +```
  149 +
  150 +### 方式3: 分步启动(单环境)
125 151
126 #### 启动后端服务 152 #### 启动后端服务
127 153
@@ -139,7 +165,7 @@ cd /data/saas-search @@ -139,7 +165,7 @@ cd /data/saas-search
139 165
140 前端界面会在 http://localhost:6003 启动 166 前端界面会在 http://localhost:6003 启动
141 167
142 -### 方式3: 多环境示例(prod / uat) 168 +### 方式4: 多环境示例(prod / uat)
143 169
144 假设有两套环境: 170 假设有两套环境:
145 171
@@ -182,7 +208,7 @@ cp .env.prod .env @@ -182,7 +208,7 @@ cp .env.prod .env
182 ./scripts/start_indexer.sh 208 ./scripts/start_indexer.sh
183 ``` 209 ```
184 210
185 -### 方式4: 手动启动 211 +### 方式5: 手动启动
186 212
187 #### 启动后端API服务 213 #### 启动后端API服务
188 214
@@ -205,14 +231,11 @@ python -m http.server 6003 @@ -205,14 +231,11 @@ python -m http.server 6003
205 ### 停止服务 231 ### 停止服务
206 232
207 ```bash 233 ```bash
208 -# 停止后端  
209 -kill $(cat logs/backend.pid)  
210 -  
211 -# 停止前端  
212 -kill $(cat logs/frontend.pid)  
213 -  
214 -# 或使用停止脚本 234 +# 推荐:统一停止
215 ./scripts/stop.sh 235 ./scripts/stop.sh
  236 +
  237 +# 或使用统一控制脚本
  238 +./scripts/service_ctl.sh stop
216 ``` 239 ```
217 240
218 ### 服务端口 241 ### 服务端口
@@ -221,8 +244,12 @@ kill $(cat logs/frontend.pid) @@ -221,8 +244,12 @@ kill $(cat logs/frontend.pid)
221 |------|------|-----| 244 |------|------|-----|
222 | Elasticsearch | 9200 | http://localhost:9200 | 245 | Elasticsearch | 9200 | http://localhost:9200 |
223 | Backend API | 6002 | http://localhost:6002 | 246 | Backend API | 6002 | http://localhost:6002 |
  247 +| Indexer API | 6004 | http://localhost:6004 |
224 | Frontend Web | 6003 | http://localhost:6003 | 248 | Frontend Web | 6003 | http://localhost:6003 |
225 -| API Docs | 6002 | http://localhost:6002/docs | 249 +| Embedding (optional) | 6005 | http://localhost:6005 |
  250 +| Translation (optional) | 6006 | http://localhost:6006 |
  251 +| Reranker (optional) | 6007 | http://localhost:6007 |
  252 +| API Docs | 6002 / 6004 | http://localhost:6002/docs / http://localhost:6004/docs |
226 253
227 --- 254 ---
228 255
@@ -256,6 +283,20 @@ DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed @@ -256,6 +283,20 @@ DEEPL_AUTH_KEY=c9293ab4-ad25-479b-919f-ab4e63b429ed
256 # API服务配置 283 # API服务配置
257 API_HOST=0.0.0.0 284 API_HOST=0.0.0.0
258 API_PORT=6002 285 API_PORT=6002
  286 +
  287 +# Indexer服务配置
  288 +INDEXER_HOST=0.0.0.0
  289 +INDEXER_PORT=6004
  290 +
  291 +# Optional service ports
  292 +EMBEDDING_PORT=6005
  293 +TRANSLATION_PORT=6006
  294 +RERANKER_PORT=6007
  295 +
  296 +# Optional startup switches (for run.sh / service_ctl.sh)
  297 +START_EMBEDDING=0
  298 +START_TRANSLATOR=0
  299 +START_RERANKER=0
259 ``` 300 ```
260 301
261 ### 修改配置 302 ### 修改配置
@@ -272,7 +313,11 @@ API_PORT=6002 @@ -272,7 +313,11 @@ API_PORT=6002
272 日志文件存储在 `logs/` 目录下: 313 日志文件存储在 `logs/` 目录下:
273 314
274 - `logs/backend.log` - 后端服务日志 315 - `logs/backend.log` - 后端服务日志
  316 +- `logs/indexer.log` - 索引服务日志
275 - `logs/frontend.log` - 前端服务日志 317 - `logs/frontend.log` - 前端服务日志
  318 +- `logs/embedding.log` - 向量服务日志(可选)
  319 +- `logs/translator.log` - 翻译服务日志(可选)
  320 +- `logs/reranker.log` - 重排服务日志(可选)
276 - `logs/search_engine.log` - 应用主日志(按天轮转) 321 - `logs/search_engine.log` - 应用主日志(按天轮转)
277 - `logs/errors.log` - 错误日志(按天轮转) 322 - `logs/errors.log` - 错误日志(按天轮转)
278 323
docs/基础配置指南.md
@@ -138,8 +138,8 @@ @@ -138,8 +138,8 @@
138 ### 修改索引结构 138 ### 修改索引结构
139 139
140 编辑 `mappings/search_products.json`,然后: 140 编辑 `mappings/search_products.json`,然后:
141 -1. 删除旧索引: `scripts/recreate_and_import.py --recreate`  
142 -2. 重新导入数据: `scripts/ingest.sh <tenant_id> true` 141 +1. 重建租户索引结构: `./scripts/create_tenant_index.sh <tenant_id>`
  142 +2. 重新导入数据: `POST /indexer/reindex`
143 143
144 ### 修改查询配置 144 ### 修改查询配置
145 145
docs/搜索API对接指南.md
@@ -958,7 +958,7 @@ tail -f logs/api.log @@ -958,7 +958,7 @@ tail -f logs/api.log
958 tail -f logs/*.log 958 tail -f logs/*.log
959 ``` 959 ```
960 960
961 -> ⚠️ **重要提示**:如需 **创建索引结构**,请参考 [5.0 为租户创建索引](#50-为租户创建索引) 章节,使用 `scripts/recreate_all_tenant_indices.py` 脚本。创建后需要调用 `/indexer/reindex` 导入数据。 961 +> ⚠️ **重要提示**:如需 **创建索引结构**,请参考 [5.0 为租户创建索引](#50-为租户创建索引) 章节,使用 `./scripts/create_tenant_index.sh <tenant_id>`。创建后需要调用 `/indexer/reindex` 导入数据。
962 962
963 **查看索引日志**: 963 **查看索引日志**:
964 964
docs/环境配置说明.md
@@ -170,8 +170,10 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源: @@ -170,8 +170,10 @@ saas-search 以 MySQL 中的店匠标准表为权威数据源:
170 170
171 - **`activate.sh`**(项目根目录):激活 Conda 环境 `searchengine` 并加载 `.env`,**日常开发/部署以本脚本为准**。 171 - **`activate.sh`**(项目根目录):激活 Conda 环境 `searchengine` 并加载 `.env`,**日常开发/部署以本脚本为准**。
172 - `scripts/mock_data.sh`:一次性生成 Tenant1 Mock + Tenant2 CSV 数据并导入 MySQL 172 - `scripts/mock_data.sh`:一次性生成 Tenant1 Mock + Tenant2 CSV 数据并导入 MySQL
173 -- `scripts/ingest.sh <tenant_id> [recreate]`:从 MySQL 写入 Elasticsearch  
174 -- `run.sh` / `restart.sh`:服务启动/重启(内部会调用 `start_backend.sh` 等,同样使用 `CONDA_ROOT`) 173 +- `scripts/create_tenant_index.sh <tenant_id>`:创建租户索引结构
  174 +- `POST /indexer/reindex`:从 MySQL 导入到 Elasticsearch(推荐)
  175 +- `run.sh` / `restart.sh`:服务启动/重启(统一走 `scripts/service_ctl.sh`)
  176 +- `scripts/service_ctl.sh`:统一服务管理(start/stop/restart/status)
175 177
176 **新机器部署**:若 Conda 未安装在默认路径(如 `/home/tw/miniconda3`),请在执行上述脚本前设置 `CONDA_ROOT`。例如你的 conda 是 `~/anaconda3/bin/conda`(即 `/home/ubuntu/anaconda3/bin/conda`),则设置:`export CONDA_ROOT=$HOME/anaconda3`。可将该行写入 `~/.bashrc` 或部署说明。 178 **新机器部署**:若 Conda 未安装在默认路径(如 `/home/tw/miniconda3`),请在执行上述脚本前设置 `CONDA_ROOT`。例如你的 conda 是 `~/anaconda3/bin/conda`(即 `/home/ubuntu/anaconda3/bin/conda`),则设置:`export CONDA_ROOT=$HOME/anaconda3`。可将该行写入 `~/.bashrc` 或部署说明。
177 179
docs/翻译模块说明.md
@@ -51,14 +51,14 @@ TRANSLATION_MODEL=qwen # 或 deepl @@ -51,14 +51,14 @@ TRANSLATION_MODEL=qwen # 或 deepl
51 推荐(热更新): 51 推荐(热更新):
52 52
53 ```bash 53 ```bash
54 -cd /home/tw/saas-search 54 +cd /data/saas-search
55 uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload 55 uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
56 ``` 56 ```
57 57
58 指定默认模型(不传请求 `model` 时生效): 58 指定默认模型(不传请求 `model` 时生效):
59 59
60 ```bash 60 ```bash
61 -cd /home/tw/saas-search 61 +cd /data/saas-search
62 export TRANSLATION_MODEL=qwen # 或 deepl 62 export TRANSLATION_MODEL=qwen # 或 deepl
63 uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload 63 uvicorn api.translator_app:app --host 0.0.0.0 --port 6006 --reload
64 ``` 64 ```
embeddings/config.py
@@ -8,12 +8,13 @@ Edit values here to configure: @@ -8,12 +8,13 @@ Edit values here to configure:
8 """ 8 """
9 9
10 from typing import Optional 10 from typing import Optional
  11 +import os
11 12
12 13
13 class EmbeddingConfig(object): 14 class EmbeddingConfig(object):
14 # Server 15 # Server
15 - HOST = "0.0.0.0"  
16 - PORT = 6005 16 + HOST = os.getenv("EMBEDDING_HOST", "0.0.0.0")
  17 + PORT = int(os.getenv("EMBEDDING_PORT", 6005))
17 18
18 # Text embeddings (BGE-M3) 19 # Text embeddings (BGE-M3)
19 TEXT_MODEL_DIR = "Xorbits/bge-m3" 20 TEXT_MODEL_DIR = "Xorbits/bge-m3"
reranker/config.py
1 """Reranker service configuration (simple Python config).""" 1 """Reranker service configuration (simple Python config)."""
2 2
  3 +import os
  4 +
3 5
4 class RerankerConfig(object): 6 class RerankerConfig(object):
5 # Server 7 # Server
6 - HOST = "0.0.0.0"  
7 - PORT = 6007 8 + HOST = os.getenv("RERANKER_HOST", "0.0.0.0")
  9 + PORT = int(os.getenv("RERANKER_PORT", 6007))
8 10
9 # Model 11 # Model
10 MODEL_NAME = "BAAI/bge-reranker-v2-m3" 12 MODEL_NAME = "BAAI/bge-reranker-v2-m3"
1 #!/bin/bash 1 #!/bin/bash
2 2
3 -# Restart script for saas-search services  
4 -# This script stops all services first, then starts them again 3 +# Unified restart script for saas-search services
5 4
6 cd "$(dirname "$0")" 5 cd "$(dirname "$0")"
7 6
8 -GREEN='\033[0;32m'  
9 -YELLOW='\033[1;33m'  
10 -RED='\033[0;31m'  
11 -NC='\033[0m'  
12 -  
13 -echo -e "${GREEN}========================================${NC}"  
14 -echo -e "${GREEN}saas-search服务重启脚本${NC}"  
15 -echo -e "${GREEN}========================================${NC}"  
16 -  
17 -# Step 1: Stop all services  
18 -echo -e "\n${YELLOW}Step 1/2: 停止现有服务${NC}"  
19 -if [ -f "./scripts/stop.sh" ]; then  
20 - ./scripts/stop.sh  
21 - if [ $? -eq 0 ]; then  
22 - echo -e "${GREEN}✓ 所有服务已成功停止${NC}"  
23 - else  
24 - echo -e "${YELLOW}⚠ 停止服务时出现警告,继续重启流程${NC}"  
25 - fi  
26 -else  
27 - echo -e "${RED}✗ 停止脚本不存在,无法安全重启${NC}"  
28 - exit 1  
29 -fi  
30 -  
31 -# Wait a moment for services to fully stop  
32 -echo -e "\n${YELLOW}等待服务完全关闭...${NC}"  
33 -sleep 3  
34 -  
35 -# Step 2: Start all services  
36 -echo -e "\n${YELLOW}Step 2/2: 重新启动服务${NC}"  
37 -if [ -f "./scripts/start.sh" ]; then  
38 - ./scripts/start.sh  
39 - if [ $? -eq 0 ]; then  
40 - echo -e "${GREEN}========================================${NC}"  
41 - echo -e "${GREEN}服务重启完成!${NC}"  
42 - echo -e "${GREEN}========================================${NC}"  
43 - else  
44 - echo -e "${RED}✗ 服务启动失败${NC}"  
45 - exit 1  
46 - fi  
47 -else  
48 - echo -e "${RED}✗ 启动脚本不存在,无法完成重启${NC}"  
49 - exit 1  
50 -fi  
51 \ No newline at end of file 7 \ No newline at end of file
  8 +./scripts/service_ctl.sh restart
1 #!/bin/bash 1 #!/bin/bash
2 2
3 -# Production startup script for saas-search services  
4 -# This script starts frontend and backend services (no data ingestion) 3 +# Unified startup script for saas-search services
5 4
6 cd "$(dirname "$0")" 5 cd "$(dirname "$0")"
7 6
8 -GREEN='\033[0;32m'  
9 -YELLOW='\033[1;33m'  
10 -RED='\033[0;31m'  
11 -NC='\033[0m'  
12 -  
13 -echo -e "${GREEN}========================================${NC}"  
14 -echo -e "${GREEN}saas-search服务启动脚本${NC}"  
15 -echo -e "${GREEN}========================================${NC}"  
16 -  
17 -# Create logs directory if it doesn't exist  
18 -mkdir -p logs  
19 -  
20 -# Call unified start script  
21 -./scripts/start.sh  
22 \ No newline at end of file 7 \ No newline at end of file
  8 +./scripts/service_ctl.sh start
scripts/indexer__old_2025_11/ingest.sh
@@ -2,6 +2,11 @@ @@ -2,6 +2,11 @@
2 2
3 # Unified data ingestion script for saas-search 3 # Unified data ingestion script for saas-search
4 # Ingests data from MySQL to Elasticsearch 4 # Ingests data from MySQL to Elasticsearch
  5 +#
  6 +# [LEGACY] 此脚本仅保留用于历史兼容,不建议新流程继续使用。
  7 +# 推荐改用:
  8 +# 1) ./scripts/create_tenant_index.sh <tenant_id>
  9 +# 2) POST /indexer/reindex
5 10
6 cd "$(dirname "$0")/.." 11 cd "$(dirname "$0")/.."
7 source /home/tw/miniconda3/etc/profile.d/conda.sh 12 source /home/tw/miniconda3/etc/profile.d/conda.sh
scripts/mock_data.sh
@@ -159,6 +159,6 @@ echo -e &quot;${GREEN}数据导入完成!${NC}&quot; @@ -159,6 +159,6 @@ echo -e &quot;${GREEN}数据导入完成!${NC}&quot;
159 echo -e "${GREEN}========================================${NC}" 159 echo -e "${GREEN}========================================${NC}"
160 echo "" 160 echo ""
161 echo -e "下一步:" 161 echo -e "下一步:"
162 -echo -e " ${YELLOW}./scripts/ingest.sh 1 true${NC} - 从MySQL灌入tenant_id=1数据到ES"  
163 -echo -e " ${YELLOW}./scripts/ingest.sh 2 true${NC} - 从MySQL灌入tenant_id=2数据到ES" 162 +echo -e " ${YELLOW}./scripts/create_tenant_index.sh <tenant_id>${NC} - 创建租户索引结构"
  163 +echo -e " ${YELLOW}curl -X POST http://localhost:6004/indexer/reindex ...${NC} - 从MySQL导入到ES"
164 echo "" 164 echo ""
scripts/service_ctl.sh 0 → 100755
@@ -0,0 +1,326 @@ @@ -0,0 +1,326 @@
  1 +#!/bin/bash
  2 +#
  3 +# Unified service lifecycle controller for saas-search.
  4 +# Supports: start / stop / restart / status
  5 +#
  6 +
  7 +set -euo pipefail
  8 +
  9 +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  10 +LOG_DIR="${PROJECT_ROOT}/logs"
  11 +
  12 +mkdir -p "${LOG_DIR}"
  13 +
  14 +CORE_SERVICES=("backend" "indexer" "frontend")
  15 +OPTIONAL_SERVICES=("embedding" "translator" "reranker")
  16 +LEGACY_SERVICES=("clip" "cnclip")
  17 +
  18 +all_services() {
  19 + echo "${CORE_SERVICES[@]} ${OPTIONAL_SERVICES[@]} ${LEGACY_SERVICES[@]}"
  20 +}
  21 +
  22 +load_env_file() {
  23 + local env_file="${PROJECT_ROOT}/.env"
  24 + if [ -f "${env_file}" ]; then
  25 + set -a
  26 + # shellcheck disable=SC1090
  27 + source "${env_file}"
  28 + set +a
  29 + fi
  30 +}
  31 +
  32 +get_port() {
  33 + local service="$1"
  34 + case "${service}" in
  35 + backend) echo "${API_PORT:-6002}" ;;
  36 + indexer) echo "${INDEXER_PORT:-6004}" ;;
  37 + frontend) echo "${FRONTEND_PORT:-6003}" ;;
  38 + embedding) echo "${EMBEDDING_PORT:-6005}" ;;
  39 + translator) echo "${TRANSLATION_PORT:-${TRANSLATOR_PORT:-6006}}" ;;
  40 + reranker) echo "${RERANKER_PORT:-6007}" ;;
  41 + clip) echo "${CLIP_PORT:-51000}" ;;
  42 + cnclip) echo "${CNCLIP_PORT:-51000}" ;;
  43 + *) echo "" ;;
  44 + esac
  45 +}
  46 +
  47 +pid_file() {
  48 + local service="$1"
  49 + case "${service}" in
  50 + clip) echo "${LOG_DIR}/clip_service.pid" ;;
  51 + cnclip) echo "${LOG_DIR}/cnclip_service.pid" ;;
  52 + *) echo "${LOG_DIR}/${service}.pid" ;;
  53 + esac
  54 +}
  55 +
  56 +log_file() {
  57 + local service="$1"
  58 + echo "${LOG_DIR}/${service}.log"
  59 +}
  60 +
  61 +service_start_cmd() {
  62 + local service="$1"
  63 + case "${service}" in
  64 + backend) echo "./scripts/start_backend.sh" ;;
  65 + indexer) echo "./scripts/start_indexer.sh" ;;
  66 + frontend) echo "./scripts/start_frontend.sh" ;;
  67 + embedding) echo "./scripts/start_embedding_service.sh" ;;
  68 + translator) echo "./scripts/start_translator.sh" ;;
  69 + reranker) echo "./scripts/start_reranker.sh" ;;
  70 + clip) echo "./scripts/start_clip_service.sh" ;;
  71 + cnclip) echo "./scripts/start_cnclip_service.sh" ;;
  72 + *) return 1 ;;
  73 + esac
  74 +}
  75 +
  76 +wait_for_health() {
  77 + local service="$1"
  78 + local max_retries="${2:-30}"
  79 + local interval_sec="${3:-1}"
  80 + local port
  81 + port="$(get_port "${service}")"
  82 + local path="/health"
  83 +
  84 + case "${service}" in
  85 + backend) path="/health" ;;
  86 + indexer) path="/health" ;;
  87 + frontend) path="/" ;;
  88 + embedding) path="/health" ;;
  89 + translator) path="/health" ;;
  90 + reranker) path="/health" ;;
  91 + *) return 0 ;;
  92 + esac
  93 +
  94 + local i=0
  95 + while [ "${i}" -lt "${max_retries}" ]; do
  96 + if curl -sf "http://127.0.0.1:${port}${path}" >/dev/null 2>&1; then
  97 + return 0
  98 + fi
  99 + i=$((i + 1))
  100 + sleep "${interval_sec}"
  101 + done
  102 + return 1
  103 +}
  104 +
  105 +is_running_by_pid() {
  106 + local service="$1"
  107 + local pf
  108 + pf="$(pid_file "${service}")"
  109 + if [ ! -f "${pf}" ]; then
  110 + return 1
  111 + fi
  112 + local pid
  113 + pid="$(cat "${pf}" 2>/dev/null || true)"
  114 + [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null
  115 +}
  116 +
  117 +is_running_by_port() {
  118 + local service="$1"
  119 + local port
  120 + port="$(get_port "${service}")"
  121 + [ -n "${port}" ] && lsof -ti:"${port}" >/dev/null 2>&1
  122 +}
  123 +
  124 +start_one() {
  125 + local service="$1"
  126 + cd "${PROJECT_ROOT}"
  127 + local cmd
  128 + cmd="$(service_start_cmd "${service}")"
  129 + local pf lf
  130 + pf="$(pid_file "${service}")"
  131 + lf="$(log_file "${service}")"
  132 +
  133 + if is_running_by_pid "${service}" || is_running_by_port "${service}"; then
  134 + echo "[skip] ${service} already running"
  135 + return 0
  136 + fi
  137 +
  138 + case "${service}" in
  139 + clip|cnclip)
  140 + echo "[start] ${service} (managed by native script)"
  141 + bash -lc "${cmd}" >> "${lf}" 2>&1 || true
  142 + if is_running_by_pid "${service}" || is_running_by_port "${service}"; then
  143 + echo "[ok] ${service} started (log=${lf})"
  144 + else
  145 + echo "[warn] ${service} may not be running, inspect ${lf}"
  146 + fi
  147 + ;;
  148 + backend|indexer|frontend|embedding|translator|reranker)
  149 + echo "[start] ${service}"
  150 + nohup bash -lc "${cmd}" > "${lf}" 2>&1 &
  151 + local pid=$!
  152 + echo "${pid}" > "${pf}"
  153 + if wait_for_health "${service}"; then
  154 + echo "[ok] ${service} healthy (pid=${pid}, log=${lf})"
  155 + else
  156 + echo "[warn] ${service} health check timeout, inspect ${lf}"
  157 + fi
  158 + ;;
  159 + *)
  160 + echo "[warn] ${service} unsupported start path"
  161 + ;;
  162 + esac
  163 +}
  164 +
  165 +stop_one() {
  166 + local service="$1"
  167 + cd "${PROJECT_ROOT}"
  168 + if [ "${service}" = "clip" ]; then
  169 + echo "[stop] clip (managed by native script)"
  170 + bash -lc "./scripts/stop_clip_service.sh" || true
  171 + return 0
  172 + fi
  173 + if [ "${service}" = "cnclip" ]; then
  174 + echo "[stop] cnclip (managed by native script)"
  175 + bash -lc "./scripts/stop_cnclip_service.sh" || true
  176 + return 0
  177 + fi
  178 +
  179 + local pf
  180 + pf="$(pid_file "${service}")"
  181 +
  182 + if [ -f "${pf}" ]; then
  183 + local pid
  184 + pid="$(cat "${pf}" 2>/dev/null || true)"
  185 + if [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
  186 + echo "[stop] ${service} pid=${pid}"
  187 + kill -TERM "${pid}" 2>/dev/null || true
  188 + sleep 1
  189 + if kill -0 "${pid}" 2>/dev/null; then
  190 + kill -KILL "${pid}" 2>/dev/null || true
  191 + fi
  192 + fi
  193 + rm -f "${pf}"
  194 + fi
  195 +
  196 + local port
  197 + port="$(get_port "${service}")"
  198 + if [ -n "${port}" ]; then
  199 + local pids
  200 + pids="$(lsof -ti:${port} 2>/dev/null || true)"
  201 + if [ -n "${pids}" ]; then
  202 + echo "[stop] ${service} port=${port} pids=${pids}"
  203 + for pid in ${pids}; do
  204 + kill -TERM "${pid}" 2>/dev/null || true
  205 + done
  206 + sleep 1
  207 + pids="$(lsof -ti:${port} 2>/dev/null || true)"
  208 + for pid in ${pids}; do
  209 + kill -KILL "${pid}" 2>/dev/null || true
  210 + done
  211 + fi
  212 + fi
  213 +}
  214 +
  215 +status_one() {
  216 + local service="$1"
  217 + local port
  218 + port="$(get_port "${service}")"
  219 + local running="no"
  220 + local pid_info="-"
  221 +
  222 + if is_running_by_pid "${service}"; then
  223 + running="yes"
  224 + pid_info="$(cat "$(pid_file "${service}")" 2>/dev/null || echo "-")"
  225 + elif is_running_by_port "${service}"; then
  226 + running="yes"
  227 + pid_info="$(lsof -ti:${port} 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo "-")"
  228 + fi
  229 +
  230 + printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}"
  231 +}
  232 +
  233 +resolve_targets() {
  234 + local scope="$1"
  235 + shift || true
  236 +
  237 + if [ "$#" -gt 0 ]; then
  238 + echo "$*"
  239 + return
  240 + fi
  241 +
  242 + case "${scope}" in
  243 + start)
  244 + local targets=("${CORE_SERVICES[@]}")
  245 + if [ "${START_EMBEDDING:-0}" = "1" ]; then targets+=("embedding"); fi
  246 + if [ "${START_TRANSLATOR:-0}" = "1" ]; then targets+=("translator"); fi
  247 + if [ "${START_RERANKER:-0}" = "1" ]; then targets+=("reranker"); fi
  248 + echo "${targets[@]}"
  249 + ;;
  250 + stop|restart|status)
  251 + echo "$(all_services)"
  252 + ;;
  253 + *)
  254 + echo ""
  255 + ;;
  256 + esac
  257 +}
  258 +
  259 +usage() {
  260 + cat <<'EOF'
  261 +Usage:
  262 + ./scripts/service_ctl.sh start [service...]
  263 + ./scripts/service_ctl.sh stop [service...]
  264 + ./scripts/service_ctl.sh restart [service...]
  265 + ./scripts/service_ctl.sh status [service...]
  266 +
  267 +Default target set (when no service provided):
  268 + start -> backend indexer frontend (+ optional by env flags)
  269 + stop -> all known services
  270 + restart -> all known services
  271 + status -> all known services
  272 +
  273 +Optional startup flags:
  274 + START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
  275 +EOF
  276 +}
  277 +
  278 +main() {
  279 + if [ "$#" -lt 1 ]; then
  280 + usage
  281 + exit 1
  282 + fi
  283 +
  284 + local action="$1"
  285 + shift || true
  286 +
  287 + load_env_file
  288 + local targets
  289 + targets="$(resolve_targets "${action}" "$@")"
  290 + if [ -z "${targets}" ]; then
  291 + usage
  292 + exit 1
  293 + fi
  294 +
  295 + case "${action}" in
  296 + start)
  297 + for svc in ${targets}; do
  298 + start_one "${svc}"
  299 + done
  300 + ;;
  301 + stop)
  302 + for svc in ${targets}; do
  303 + stop_one "${svc}"
  304 + done
  305 + ;;
  306 + restart)
  307 + for svc in ${targets}; do
  308 + stop_one "${svc}"
  309 + done
  310 + for svc in ${targets}; do
  311 + start_one "${svc}"
  312 + done
  313 + ;;
  314 + status)
  315 + for svc in ${targets}; do
  316 + status_one "${svc}"
  317 + done
  318 + ;;
  319 + *)
  320 + usage
  321 + exit 1
  322 + ;;
  323 + esac
  324 +}
  325 +
  326 +main "$@"
1 #!/bin/bash 1 #!/bin/bash
2 2
3 -# Unified startup script for saas-search services  
4 -# This script starts both frontend and backend services 3 +# Backward-compatible start entrypoint.
  4 +# Delegates to unified service controller.
5 5
6 -cd "$(dirname "$0")/.."  
7 -  
8 -GREEN='\033[0;32m'  
9 -YELLOW='\033[1;33m'  
10 -RED='\033[0;31m'  
11 -NC='\033[0m'  
12 -  
13 -echo -e "${GREEN}========================================${NC}"  
14 -echo -e "${GREEN}saas-search服务启动脚本${NC}"  
15 -echo -e "${GREEN}========================================${NC}"  
16 -  
17 -# Create logs directory if it doesn't exist  
18 -mkdir -p logs  
19 -  
20 -# Step 1: Start backend in background (search API)  
21 -echo -e "\n${YELLOW}Step 1/3: 启动后端搜索服务${NC}"  
22 -echo -e "${YELLOW}后端搜索服务将在后台运行...${NC}"  
23 -  
24 -nohup ./scripts/start_backend.sh > logs/backend.log 2>&1 &  
25 -BACKEND_PID=$!  
26 -echo $BACKEND_PID > logs/backend.pid  
27 -echo -e "${GREEN}后端搜索服务已启动 (PID: $BACKEND_PID)${NC}"  
28 -echo -e "${GREEN}日志文件: logs/backend.log${NC}"  
29 -  
30 -# Wait for backend to start  
31 -echo -e "${YELLOW}等待后端搜索服务启动...${NC}"  
32 -MAX_RETRIES=30  
33 -RETRY_COUNT=0  
34 -BACKEND_READY=false  
35 -  
36 -while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do  
37 - sleep 2  
38 - if curl -s http://localhost:6002/health > /dev/null 2>&1; then  
39 - BACKEND_READY=true  
40 - break  
41 - fi  
42 - RETRY_COUNT=$((RETRY_COUNT + 1))  
43 - echo -e "${YELLOW} 等待中... ($RETRY_COUNT/$MAX_RETRIES)${NC}"  
44 -done  
45 -  
46 -# Check if backend is running  
47 -if [ "$BACKEND_READY" = true ]; then  
48 - echo -e "${GREEN}✓ 后端搜索服务运行正常${NC}"  
49 -else  
50 - echo -e "${RED}✗ 后端搜索服务启动失败,请检查日志: logs/backend.log${NC}"  
51 - echo -e "${YELLOW}提示: 后端服务可能需要更多时间启动,或者检查端口是否被占用${NC}"  
52 - exit 1  
53 -fi 6 +set -e
54 7
55 -# Step 2: Start indexer in background  
56 -echo -e "\n${YELLOW}Step 2/3: 启动索引服务${NC}"  
57 -echo -e "${YELLOW}索引服务将在后台运行...${NC}"  
58 -  
59 -nohup ./scripts/start_indexer.sh > logs/indexer.log 2>&1 &  
60 -INDEXER_PID=$!  
61 -echo $INDEXER_PID > logs/indexer.pid  
62 -echo -e "${GREEN}索引服务已启动 (PID: $INDEXER_PID)${NC}"  
63 -echo -e "${GREEN}日志文件: logs/indexer.log${NC}"  
64 -  
65 -# Wait for indexer to start  
66 -echo -e "${YELLOW}等待索引服务启动...${NC}"  
67 -MAX_RETRIES=30  
68 -RETRY_COUNT=0  
69 -INDEXER_READY=false  
70 -  
71 -while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do  
72 - sleep 2  
73 - if curl -s http://localhost:6004/health > /dev/null 2>&1; then  
74 - INDEXER_READY=true  
75 - break  
76 - fi  
77 - RETRY_COUNT=$((RETRY_COUNT + 1))  
78 - echo -e "${YELLOW} 等待中... ($RETRY_COUNT/$MAX_RETRIES)${NC}"  
79 -done  
80 -  
81 -if [ "$INDEXER_READY" = true ]; then  
82 - echo -e "${GREEN}✓ 索引服务运行正常${NC}"  
83 -else  
84 - echo -e "${YELLOW}⚠ 索引服务可能还在启动中,请稍后访问 (日志: logs/indexer.log)${NC}"  
85 -fi  
86 -  
87 -# Step 3: Start frontend in background  
88 -echo -e "\n${YELLOW}Step 3/3: 启动前端服务${NC}"  
89 -echo -e "${YELLOW}前端服务将在后台运行...${NC}"  
90 -  
91 -nohup ./scripts/start_frontend.sh > logs/frontend.log 2>&1 &  
92 -FRONTEND_PID=$!  
93 -echo $FRONTEND_PID > logs/frontend.pid  
94 -echo -e "${GREEN}前端服务已启动 (PID: $FRONTEND_PID)${NC}"  
95 -echo -e "${GREEN}日志文件: logs/frontend.log${NC}"  
96 -  
97 -# Wait for frontend to start  
98 -echo -e "${YELLOW}等待前端服务启动...${NC}"  
99 -MAX_RETRIES=15  
100 -RETRY_COUNT=0  
101 -FRONTEND_READY=false 8 +cd "$(dirname "$0")/.."
102 9
103 -while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do  
104 - sleep 2  
105 - if curl -s http://localhost:6003/ > /dev/null 2>&1; then  
106 - FRONTEND_READY=true  
107 - break  
108 - fi  
109 - RETRY_COUNT=$((RETRY_COUNT + 1))  
110 - echo -e "${YELLOW} 等待中... ($RETRY_COUNT/$MAX_RETRIES)${NC}"  
111 -done 10 +echo "========================================"
  11 +echo "saas-search 服务启动"
  12 +echo "========================================"
  13 +echo "默认启动核心服务: backend/indexer/frontend"
  14 +echo "可选服务通过环境变量开启:"
  15 +echo " START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh"
  16 +echo
112 17
113 -# Check if frontend is running  
114 -if [ "$FRONTEND_READY" = true ]; then  
115 - echo -e "${GREEN}✓ 前端服务运行正常${NC}"  
116 -else  
117 - echo -e "${YELLOW}⚠ 前端服务可能还在启动中,请稍后访问${NC}"  
118 -fi 18 +./scripts/service_ctl.sh start
119 19
120 -echo -e "${GREEN}========================================${NC}"  
121 -echo -e "${GREEN}所有服务启动完成!${NC}"  
122 -echo -e "${GREEN}========================================${NC}"  
123 -echo ""  
124 -echo -e "访问地址:"  
125 -echo -e " ${GREEN}前端界面: http://localhost:6003${NC}"  
126 -echo -e " ${GREEN}后端API: http://localhost:6002${NC}"  
127 -echo -e " ${GREEN}API文档: http://localhost:6002/docs${NC}"  
128 -echo ""  
129 -echo -e "日志文件:"  
130 -echo -e " 后端: logs/backend.log"  
131 -echo -e " 前端: logs/frontend.log"  
132 -echo ""  
133 -echo -e "停止服务:"  
134 -echo -e " 所有服务: ./scripts/stop.sh"  
135 -echo -e " 单独停止后端: kill \$(cat logs/backend.pid)"  
136 -echo -e " 单独停止前端: kill \$(cat logs/frontend.pid)"  
137 -echo "" 20 +echo
  21 +echo "当前服务状态:"
  22 +./scripts/service_ctl.sh status backend indexer frontend embedding translator reranker
138 23
scripts/start_embedding_service.sh
@@ -14,8 +14,11 @@ cd &quot;$(dirname &quot;$0&quot;)/..&quot; @@ -14,8 +14,11 @@ cd &quot;$(dirname &quot;$0&quot;)/..&quot;
14 14
15 source ./activate.sh 15 source ./activate.sh
16 16
17 -EMBEDDING_SERVICE_HOST=$(python -c "from embeddings.config import CONFIG; print(CONFIG.HOST)")  
18 -EMBEDDING_SERVICE_PORT=$(python -c "from embeddings.config import CONFIG; print(CONFIG.PORT)") 17 +DEFAULT_EMBEDDING_SERVICE_HOST=$(python -c "from embeddings.config import CONFIG; print(CONFIG.HOST)")
  18 +DEFAULT_EMBEDDING_SERVICE_PORT=$(python -c "from embeddings.config import CONFIG; print(CONFIG.PORT)")
  19 +
  20 +EMBEDDING_SERVICE_HOST="${EMBEDDING_HOST:-${DEFAULT_EMBEDDING_SERVICE_HOST}}"
  21 +EMBEDDING_SERVICE_PORT="${EMBEDDING_PORT:-${DEFAULT_EMBEDDING_SERVICE_PORT}}"
19 22
20 echo "========================================" 23 echo "========================================"
21 echo "Starting Local Embedding Service" 24 echo "Starting Local Embedding Service"
scripts/start_reranker.sh 0 → 100755
@@ -0,0 +1,24 @@ @@ -0,0 +1,24 @@
  1 +#!/bin/bash
  2 +#
  3 +# Start Reranker Service
  4 +#
  5 +
  6 +set -e
  7 +
  8 +cd "$(dirname "$0")/.."
  9 +source ./activate.sh
  10 +
  11 +RERANKER_HOST="${RERANKER_HOST:-0.0.0.0}"
  12 +RERANKER_PORT="${RERANKER_PORT:-6007}"
  13 +
  14 +echo "========================================"
  15 +echo "Starting Reranker Service"
  16 +echo "========================================"
  17 +echo "Host: ${RERANKER_HOST}"
  18 +echo "Port: ${RERANKER_PORT}"
  19 +echo
  20 +
  21 +exec python -m uvicorn reranker.server:app \
  22 + --host "${RERANKER_HOST}" \
  23 + --port "${RERANKER_PORT}" \
  24 + --workers 1
scripts/start_servers.py
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 """ 2 """
3 Production-ready server startup script with proper error handling and monitoring. 3 Production-ready server startup script with proper error handling and monitoring.
  4 +
  5 +[LEGACY]
  6 +This script is kept for historical compatibility.
  7 +Preferred entrypoint is:
  8 + ./scripts/service_ctl.sh start
4 """ 9 """
5 10
6 import os 11 import os
scripts/start_translator.sh 0 → 100755
@@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
  1 +#!/bin/bash
  2 +#
  3 +# Start Translation Service
  4 +#
  5 +
  6 +set -e
  7 +
  8 +cd "$(dirname "$0")/.."
  9 +source ./activate.sh
  10 +
  11 +TRANSLATION_HOST="${TRANSLATION_HOST:-0.0.0.0}"
  12 +TRANSLATION_PORT="${TRANSLATION_PORT:-${TRANSLATOR_PORT:-6006}}"
  13 +
  14 +echo "========================================"
  15 +echo "Starting Translation Service"
  16 +echo "========================================"
  17 +echo "Host: ${TRANSLATION_HOST}"
  18 +echo "Port: ${TRANSLATION_PORT}"
  19 +echo "Default model: ${TRANSLATION_MODEL:-qwen}"
  20 +echo
  21 +
  22 +exec python -m uvicorn api.translator_app:app \
  23 + --host "${TRANSLATION_HOST}" \
  24 + --port "${TRANSLATION_PORT}" \
  25 + --workers 1
1 #!/bin/bash 1 #!/bin/bash
2 2
3 -# Stop script for Search Engine services  
4 -# This script stops both backend and frontend servers 3 +# Backward-compatible stop entrypoint.
  4 +# Delegates to unified service controller.
5 5
6 -echo "========================================"  
7 -echo "Stopping Search Engine Services"  
8 -echo "========================================"  
9 -  
10 -# Kill processes on port 6002 (backend - search API)  
11 -BACKEND_PIDS=$(lsof -ti:6002 2>/dev/null)  
12 -if [ ! -z "$BACKEND_PIDS" ]; then  
13 - echo "Stopping backend server(s) on port 6002..."  
14 - for PID in $BACKEND_PIDS; do  
15 - echo " Killing PID: $PID"  
16 - kill -TERM $PID 2>/dev/null || true  
17 - done  
18 - sleep 2  
19 - # Force kill if still running  
20 - REMAINING_PIDS=$(lsof -ti:6002 2>/dev/null)  
21 - if [ ! -z "$REMAINING_PIDS" ]; then  
22 - echo " Force killing remaining processes..."  
23 - for PID in $REMAINING_PIDS; do  
24 - kill -KILL $PID 2>/dev/null || true  
25 - done  
26 - fi  
27 - echo "Backend server stopped."  
28 -else  
29 - echo "No backend server found running on port 6002."  
30 -fi  
31 -  
32 -# Kill processes on port 6004 (indexer API)  
33 -INDEXER_PIDS=$(lsof -ti:6004 2>/dev/null)  
34 -if [ ! -z "$INDEXER_PIDS" ]; then  
35 - echo "Stopping indexer server(s) on port 6004..."  
36 - for PID in $INDEXER_PIDS; do  
37 - echo " Killing PID: $PID"  
38 - kill -TERM $PID 2>/dev/null || true  
39 - done  
40 - sleep 2  
41 - # Force kill if still running  
42 - REMAINING_PIDS=$(lsof -ti:6004 2>/dev/null)  
43 - if [ ! -z "$REMAINING_PIDS" ]; then  
44 - echo " Force killing remaining processes..."  
45 - for PID in $REMAINING_PIDS; do  
46 - kill -KILL $PID 2>/dev/null || true  
47 - done  
48 - fi  
49 - echo "Indexer server stopped."  
50 -else  
51 - echo "No indexer server found running on port 6004."  
52 -fi 6 +set -e
53 7
54 -# Kill processes on port 6003 (frontend)  
55 -FRONTEND_PIDS=$(lsof -ti:6003 2>/dev/null)  
56 -if [ ! -z "$FRONTEND_PIDS" ]; then  
57 - echo "Stopping frontend server(s) on port 6003..."  
58 - for PID in $FRONTEND_PIDS; do  
59 - echo " Killing PID: $PID"  
60 - kill -TERM $PID 2>/dev/null || true  
61 - done  
62 - sleep 2  
63 - # Force kill if still running  
64 - REMAINING_PIDS=$(lsof -ti:6003 2>/dev/null)  
65 - if [ ! -z "$REMAINING_PIDS" ]; then  
66 - echo " Force killing remaining processes..."  
67 - for PID in $REMAINING_PIDS; do  
68 - kill -KILL $PID 2>/dev/null || true  
69 - done  
70 - fi  
71 - echo "Frontend server stopped."  
72 -else  
73 - echo "No frontend server found running on port 6003."  
74 -fi 8 +cd "$(dirname "$0")/.."
75 9
76 -# Also stop any processes using PID files  
77 -if [ -f "logs/backend.pid" ]; then  
78 - BACKEND_PID=$(cat logs/backend.pid 2>/dev/null)  
79 - if [ ! -z "$BACKEND_PID" ] && kill -0 $BACKEND_PID 2>/dev/null; then  
80 - echo "Stopping backend server via PID file (PID: $BACKEND_PID)..."  
81 - kill -TERM $BACKEND_PID 2>/dev/null || true  
82 - sleep 2  
83 - kill -KILL $BACKEND_PID 2>/dev/null || true  
84 - fi  
85 - rm -f logs/backend.pid  
86 -fi  
87 -  
88 -[ -f "logs/indexer.pid" ] && INDEXER_PID=$(cat logs/indexer.pid 2>/dev/null)  
89 -if [ ! -z "$INDEXER_PID" ] && kill -0 $INDEXER_PID 2>/dev/null; then  
90 - echo "Stopping indexer server via PID file (PID: $INDEXER_PID)..."  
91 - kill -TERM $INDEXER_PID 2>/dev/null || true  
92 - sleep 2  
93 - kill -KILL $INDEXER_PID 2>/dev/null || true  
94 -fi  
95 -rm -f logs/indexer.pid 10 +echo "========================================"
  11 +echo "Stopping saas-search services"
  12 +echo "========================================"
96 13
97 -if [ -f "logs/frontend.pid" ]; then  
98 - FRONTEND_PID=$(cat logs/frontend.pid 2>/dev/null)  
99 - if [ ! -z "$FRONTEND_PID" ] && kill -0 $FRONTEND_PID 2>/dev/null; then  
100 - echo "Stopping frontend server via PID file (PID: $FRONTEND_PID)..."  
101 - kill -TERM $FRONTEND_PID 2>/dev/null || true  
102 - sleep 2  
103 - kill -KILL $FRONTEND_PID 2>/dev/null || true  
104 - fi  
105 - rm -f logs/frontend.pid  
106 -fi 14 +./scripts/service_ctl.sh stop
107 15
108 -echo "========================================"  
109 -echo "All services stopped successfully!"  
110 -echo "========================================"  
111 \ No newline at end of file 16 \ No newline at end of file
  17 +echo "Done."
scripts/stop_clip_service.sh
@@ -4,7 +4,8 @@ @@ -4,7 +4,8 @@
4 # 4 #
5 set -e 5 set -e
6 6
7 -LOG_DIR="/home/tw/saas-search/logs" 7 +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  8 +LOG_DIR="${PROJECT_ROOT}/logs"
8 PID_FILE="${LOG_DIR}/clip_service.pid" 9 PID_FILE="${LOG_DIR}/clip_service.pid"
9 10
10 echo "========================================" 11 echo "========================================"
scripts/stop_cnclip_service.sh
@@ -21,7 +21,7 @@ BLUE=&#39;\033[0;34m&#39; @@ -21,7 +21,7 @@ BLUE=&#39;\033[0;34m&#39;
21 NC='\033[0m' # No Color 21 NC='\033[0m' # No Color
22 22
23 # 项目路径 23 # 项目路径
24 -PROJECT_ROOT="/data/tw/saas-search" 24 +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
25 LOG_DIR="${PROJECT_ROOT}/logs" 25 LOG_DIR="${PROJECT_ROOT}/logs"
26 PID_FILE="${LOG_DIR}/cnclip_service.pid" 26 PID_FILE="${LOG_DIR}/cnclip_service.pid"
27 27
scripts/stop_reranker.sh 0 → 100755
@@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
  1 +#!/bin/bash
  2 +#
  3 +# Stop Reranker Service
  4 +#
  5 +
  6 +set -e
  7 +
  8 +cd "$(dirname "$0")/.."
  9 +
  10 +PID_FILE="logs/reranker.pid"
  11 +RERANKER_PORT="${RERANKER_PORT:-6007}"
  12 +
  13 +echo "========================================"
  14 +echo "Stopping Reranker Service"
  15 +echo "========================================"
  16 +
  17 +if [ -f "${PID_FILE}" ]; then
  18 + PID="$(cat "${PID_FILE}" 2>/dev/null || true)"
  19 + if [ -n "${PID}" ] && kill -0 "${PID}" 2>/dev/null; then
  20 + echo "Stopping PID from file: ${PID}"
  21 + kill -TERM "${PID}" 2>/dev/null || true
  22 + sleep 1
  23 + if kill -0 "${PID}" 2>/dev/null; then
  24 + kill -KILL "${PID}" 2>/dev/null || true
  25 + fi
  26 + fi
  27 + rm -f "${PID_FILE}"
  28 +fi
  29 +
  30 +PORT_PIDS="$(lsof -ti:${RERANKER_PORT} 2>/dev/null || true)"
  31 +if [ -n "${PORT_PIDS}" ]; then
  32 + echo "Stopping process on port ${RERANKER_PORT}: ${PORT_PIDS}"
  33 + for PID in ${PORT_PIDS}; do
  34 + kill -TERM "${PID}" 2>/dev/null || true
  35 + done
  36 + sleep 1
  37 + PORT_PIDS="$(lsof -ti:${RERANKER_PORT} 2>/dev/null || true)"
  38 + for PID in ${PORT_PIDS}; do
  39 + kill -KILL "${PID}" 2>/dev/null || true
  40 + done
  41 +fi
  42 +
  43 +echo "Reranker service stopped."
scripts/stop_translator.sh 0 → 100755
@@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
  1 +#!/bin/bash
  2 +#
  3 +# Stop Translation Service
  4 +#
  5 +
  6 +set -e
  7 +
  8 +cd "$(dirname "$0")/.."
  9 +
  10 +PID_FILE="logs/translator.pid"
  11 +TRANSLATION_PORT="${TRANSLATION_PORT:-${TRANSLATOR_PORT:-6006}}"
  12 +
  13 +echo "========================================"
  14 +echo "Stopping Translation Service"
  15 +echo "========================================"
  16 +
  17 +if [ -f "${PID_FILE}" ]; then
  18 + PID="$(cat "${PID_FILE}" 2>/dev/null || true)"
  19 + if [ -n "${PID}" ] && kill -0 "${PID}" 2>/dev/null; then
  20 + echo "Stopping PID from file: ${PID}"
  21 + kill -TERM "${PID}" 2>/dev/null || true
  22 + sleep 1
  23 + if kill -0 "${PID}" 2>/dev/null; then
  24 + kill -KILL "${PID}" 2>/dev/null || true
  25 + fi
  26 + fi
  27 + rm -f "${PID_FILE}"
  28 +fi
  29 +
  30 +PORT_PIDS="$(lsof -ti:${TRANSLATION_PORT} 2>/dev/null || true)"
  31 +if [ -n "${PORT_PIDS}" ]; then
  32 + echo "Stopping process on port ${TRANSLATION_PORT}: ${PORT_PIDS}"
  33 + for PID in ${PORT_PIDS}; do
  34 + kill -TERM "${PID}" 2>/dev/null || true
  35 + done
  36 + sleep 1
  37 + PORT_PIDS="$(lsof -ti:${TRANSLATION_PORT} 2>/dev/null || true)"
  38 + for PID in ${PORT_PIDS}; do
  39 + kill -KILL "${PID}" 2>/dev/null || true
  40 + done
  41 +fi
  42 +
  43 +echo "Translation service stopped."
search/rerank_client.py
@@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
8 """ 8 """
9 9
10 from typing import Dict, Any, List, Optional, Tuple 10 from typing import Dict, Any, List, Optional, Tuple
  11 +import os
11 import logging 12 import logging
12 13
13 logger = logging.getLogger(__name__) 14 logger = logging.getLogger(__name__)
@@ -228,7 +229,7 @@ def run_rerank( @@ -228,7 +229,7 @@ def run_rerank(
228 except Exception: 229 except Exception:
229 RERANKER_CONFIG = None 230 RERANKER_CONFIG = None
230 231
231 - url = service_url 232 + url = service_url or os.getenv("RERANKER_SERVICE_URL")
232 if not url and RERANKER_CONFIG is not None: 233 if not url and RERANKER_CONFIG is not None:
233 url = f"http://127.0.0.1:{RERANKER_CONFIG.PORT}/rerank" 234 url = f"http://127.0.0.1:{RERANKER_CONFIG.PORT}/rerank"
234 if not url: 235 if not url:
search/searcher.py
@@ -5,6 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting. @@ -5,6 +5,7 @@ Handles query parsing, boolean expressions, ranking, and result formatting.
5 """ 5 """
6 6
7 from typing import Dict, Any, List, Optional, Union 7 from typing import Dict, Any, List, Optional, Union
  8 +import os
8 import time, json 9 import time, json
9 import logging 10 import logging
10 11
@@ -400,11 +401,11 @@ class Searcher: @@ -400,11 +401,11 @@ class Searcher:
400 ) 401 )
401 402
402 if rerank_meta is not None: 403 if rerank_meta is not None:
403 - try:  
404 - from reranker.config import CONFIG as RERANKER_CONFIG  
405 - rerank_url = f"http://127.0.0.1:{RERANKER_CONFIG.PORT}/rerank"  
406 - except Exception:  
407 - rerank_url = "http://127.0.0.1:6007/rerank" 404 + rerank_url = (
  405 + rc.service_url
  406 + or os.getenv("RERANKER_SERVICE_URL")
  407 + or "http://127.0.0.1:6007/rerank"
  408 + )
408 context.metadata.setdefault("rerank_info", {}) 409 context.metadata.setdefault("rerank_info", {})
409 context.metadata["rerank_info"].update({ 410 context.metadata["rerank_info"].update({
410 "service_url": rerank_url, 411 "service_url": rerank_url,