diff --git a/.env.example b/.env.example index b0a3bfb..c65fca8 100644 --- a/.env.example +++ b/.env.example @@ -24,8 +24,10 @@ INDEXER_HOST=0.0.0.0 INDEXER_PORT=6004 # Optional service ports +FRONTEND_PORT=6003 EMBEDDING_PORT=6005 TEI_PORT=8080 +CNCLIP_PORT=51000 TRANSLATION_PORT=6006 RERANKER_PORT=6007 EMBEDDING_SERVICE_URL=http://127.0.0.1:6005 @@ -35,7 +37,7 @@ TRANSLATION_PROVIDER=direct TRANSLATION_MODEL=qwen EMBEDDING_BACKEND=tei TEI_BASE_URL=http://127.0.0.1:8080 -TEI_USE_GPU=1 +TEI_DEVICE=cuda TEI_VERSION=1.9 TEI_MAX_BATCH_TOKENS=2048 TEI_MAX_CLIENT_BATCH_SIZE=8 @@ -43,13 +45,6 @@ TEI_HEALTH_TIMEOUT_SEC=300 RERANK_PROVIDER=http RERANK_BACKEND=qwen3_vllm -# Optional startup switches (run.sh / scripts/service_ctl.sh) -START_EMBEDDING=0 -START_TRANSLATOR=0 -START_RERANKER=0 -START_TEI=0 -START_CNCLIP=0 - # Cache Directory CACHE_DIR=.cache diff --git a/README.md b/README.md index e6a7641..d63ce8c 100644 --- a/README.md +++ b/README.md @@ -29,12 +29,15 @@ source activate.sh ./run.sh # 可选:附加能力服务(按需开启) -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./run.sh +./scripts/service_ctl.sh start embedding translator reranker tei cnclip # 查看状态 ./scripts/service_ctl.sh status ``` +服务管理全盘说明(入口职责、默认行为、全量启停方式)见: +- `docs/Usage-Guide.md` -> `服务管理总览` + 核心端口: - `6002` backend(`/search/*`, `/admin/*`) diff --git a/config/env_config.py b/config/env_config.py index fcda748..936c388 100644 --- a/config/env_config.py +++ b/config/env_config.py @@ -62,7 +62,7 @@ INDEXER_PORT = int(os.getenv('INDEXER_PORT', 6004)) EMBEDDING_HOST = os.getenv('EMBEDDING_HOST', '127.0.0.1') EMBEDDING_PORT = int(os.getenv('EMBEDDING_PORT', 6005)) TRANSLATION_HOST = os.getenv('TRANSLATION_HOST', '127.0.0.1') -TRANSLATION_PORT = int(os.getenv('TRANSLATION_PORT', os.getenv('TRANSLATOR_PORT', 6006))) +TRANSLATION_PORT = int(os.getenv('TRANSLATION_PORT', 6006)) TRANSLATION_PROVIDER = os.getenv('TRANSLATION_PROVIDER', 'direct') TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'qwen') RERANKER_HOST = os.getenv('RERANKER_HOST', '127.0.0.1') diff --git a/docs/CNCLIP_SERVICE说明文档.md b/docs/CNCLIP_SERVICE说明文档.md index 36c74c1..650630a 100644 --- a/docs/CNCLIP_SERVICE说明文档.md +++ b/docs/CNCLIP_SERVICE说明文档.md @@ -70,7 +70,8 @@ cd /data/saas-search ### 6.1 通过统一编排启动 ```bash -START_EMBEDDING=1 START_TEI=1 START_CNCLIP=1 ./scripts/service_ctl.sh start +./scripts/service_ctl.sh start cnclip +# 或一次启动可选能力:./scripts/service_ctl.sh start embedding tei cnclip ``` ### 6.2 设备选择优先级 @@ -110,6 +111,11 @@ cat third-party/clip-as-service/server/torch-flow-temp.yml - GPU 模式:`device: 'cuda'` - CPU 模式:`device: 'cpu'` +### 7.2.1 日志与 PID 文件 + +- 日志:`logs/cnclip.log` +- PID:`logs/cnclip.pid` + ### 7.3 发送一次编码请求(触发模型加载) ```bash diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index d921996..7d2d603 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -66,18 +66,20 @@ source activate.sh ```bash ./run.sh # 启动全部能力 -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./run.sh -# 等价方式(直接使用服务控制器) -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./scripts/service_ctl.sh start +# 追加可选能力服务(显式指定) +TEI_DEVICE=cuda CNCLIP_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker # 说明: # - reranker 为 GPU 强制模式(资源不足会直接启动失败) -# - TEI 默认使用 GPU;当 TEI_USE_GPU=1 且 GPU 不可用时会直接失败(不会自动降级到 CPU) +# - TEI 默认使用 GPU;当 TEI_DEVICE=cuda 且 GPU 不可用时会直接失败(不会自动降级到 CPU) # - cnclip 默认使用 cuda;若显式配置为 cuda 且 GPU 不可用会直接失败(不会自动降级到 cpu) ./scripts/service_ctl.sh status ./scripts/stop.sh ``` +服务管理方式(入口职责、默认行为、全量拉起顺序)见: +- `docs/Usage-Guide.md` -> `服务管理总览` + ### 1.3 常用 API 请求示例 #### 搜索 API(backend 6002) @@ -135,7 +137,7 @@ API 文档:`http://localhost:6004/docs` ```bash # TEI(文本向量后端,默认) # GPU(需 nvidia-container-toolkit) -TEI_USE_GPU=1 ./scripts/start_tei_service.sh +TEI_DEVICE=cuda ./scripts/start_tei_service.sh # Embedding API(会校验 TEI /health) ./scripts/start_embedding_service.sh @@ -151,7 +153,7 @@ curl -X POST http://localhost:6005/embed/image \ 说明: - TEI 默认镜像按 `TEI_VERSION` 组装:`cuda-`(默认 `1.9`)。 -- `TEI_USE_GPU=1` 时会严格校验 Docker GPU runtime;未配置会直接报错退出。 +- `TEI_DEVICE=cuda` 时会严格校验 Docker GPU runtime;未配置会直接报错退出。 - `/embed/image` 依赖 `cnclip`(`grpc://127.0.0.1:51000`),未启动时 embedding 服务会启动失败。 #### Translator 服务(6006) @@ -514,6 +516,8 @@ curl http://localhost:6007/health - `logs/embedding.log` - `logs/translator.log` - `logs/reranker.log` +- `logs/tei.log` +- `logs/cnclip.log` - `logs/search_engine.log` - `logs/errors.log` diff --git a/docs/TEI_SERVICE说明文档.md b/docs/TEI_SERVICE说明文档.md index 103a9fa..b20d3ef 100644 --- a/docs/TEI_SERVICE说明文档.md +++ b/docs/TEI_SERVICE说明文档.md @@ -63,13 +63,13 @@ docker info --format '{{json .Runtimes}}' ### 5.1 GPU 模式启动(默认) ```bash -TEI_USE_GPU=1 ./scripts/start_tei_service.sh +TEI_DEVICE=cuda ./scripts/start_tei_service.sh ``` 预期输出包含: - `Image: ghcr.io/huggingface/text-embeddings-inference:turing-...` 或 `cuda-...`(脚本按 GPU 架构自动选择) -- `Mode: gpu` +- `Mode: cuda` - `TEI is ready and output probe passed: http://127.0.0.1:8080` 说明: @@ -79,7 +79,7 @@ TEI_USE_GPU=1 ./scripts/start_tei_service.sh ### 5.2 CPU 模式启动(显式) ```bash -TEI_USE_GPU=0 ./scripts/start_tei_service.sh +TEI_DEVICE=cpu ./scripts/start_tei_service.sh ``` 预期输出包含: @@ -135,7 +135,7 @@ curl -sS -X POST "http://127.0.0.1:6005/embed/text" \ `scripts/start_tei_service.sh` 支持下列变量: -- `TEI_USE_GPU`:`1/0`(或 `true/false`),默认 `1` +- `TEI_DEVICE`:`cuda/cpu`,默认 `cuda` - `TEI_CONTAINER_NAME`:容器名,默认 `saas-search-tei` - `TEI_PORT`:宿主机端口,默认 `8080` - `TEI_MODEL_ID`:默认 `Qwen/Qwen3-Embedding-0.6B` @@ -152,7 +152,7 @@ curl -sS -X POST "http://127.0.0.1:6005/embed/text" \ 启动全套(含 TEI): ```bash -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 TEI_USE_GPU=1 ./scripts/service_ctl.sh start +TEI_DEVICE=cuda ./scripts/service_ctl.sh start embedding translator reranker tei cnclip ``` 仅启动 TEI: @@ -167,9 +167,11 @@ START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./scripts/service_ctl.sh status tei ``` +日志文件:`logs/tei.log` + ## 9. 常见问题与排障 -### 9.1 `ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured` +### 9.1 `ERROR: TEI_DEVICE=cuda but Docker nvidia runtime is not configured` - 原因:Docker 未配置 NVIDIA runtime。 - 处理:按本文 4.2 配置后重启 Docker。 @@ -181,7 +183,7 @@ START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ```bash ./scripts/stop_tei_service.sh -TEI_USE_GPU=0 ./scripts/start_tei_service.sh # 或改为 1 +TEI_DEVICE=cpu ./scripts/start_tei_service.sh # 或改为 cuda ``` ### 9.3 embedding 服务报 TEI 不可达 diff --git a/docs/Usage-Guide.md b/docs/Usage-Guide.md index e4c53d8..9cd4786 100644 --- a/docs/Usage-Guide.md +++ b/docs/Usage-Guide.md @@ -8,10 +8,11 @@ 1. [环境准备](#环境准备) 2. [服务启动](#服务启动) -3. [配置说明](#配置说明) -4. [查看日志](#查看日志) -5. [测试验证](#测试验证) -6. [常见问题](#常见问题) +3. [服务管理总览](#全盘串讲服务管理方式) +4. [配置说明](#配置说明) +5. [查看日志](#查看日志) +6. [测试验证](#测试验证) +7. [常见问题](#常见问题) --- @@ -50,10 +51,10 @@ TEI 文本向量服务使用 Docker 容器: ```bash # GPU(需 nvidia-container-toolkit) -TEI_USE_GPU=1 ./scripts/start_tei_service.sh +TEI_DEVICE=cuda ./scripts/start_tei_service.sh # CPU -TEI_USE_GPU=0 ./scripts/start_tei_service.sh +TEI_DEVICE=cpu ./scripts/start_tei_service.sh ``` 专项说明: @@ -138,7 +139,7 @@ cd /data/saas-search 可选:全功能模式(同时启动 embedding/translator/reranker/tei/cnclip): ```bash -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./run.sh +TEI_DEVICE=cuda CNCLIP_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker ``` ### 方式2: 统一控制脚本(推荐) @@ -241,6 +242,49 @@ cd frontend python -m http.server 6003 ``` +## 服务管理总览 + +### 1) 入口脚本职责 + +- `./run.sh`:仅启动核心服务(`backend/indexer/frontend`)。 +- `./restart.sh`:重启逻辑为“先停所有已知服务,再启动核心服务”。 +- `./scripts/stop.sh`:停止所有已知服务。 +- `./scripts/service_ctl.sh`:统一控制器,支持 `start/stop/restart/status`,是唯一推荐入口。 + +### 2) `service_ctl.sh` 的默认行为 + +- `start`(不带服务名):启动核心服务 `backend/indexer/frontend`。 +- `stop`(不带服务名):停止全部已知服务(含可选服务)。 +- `restart`(不带服务名):先停全部,再只启动核心服务。 +- `status`(不带服务名):显示全部已知服务状态。 + +### 3) 全量服务一键拉起 + +```bash +TEI_DEVICE=cuda CNCLIP_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker +``` + +说明: +- `TEI_DEVICE` / `CNCLIP_DEVICE` 统一使用 `cuda|cpu`。 +- 显式把 `tei`、`cnclip` 放在前面,避免 `embedding` 因依赖未就绪启动失败。 + +### 4) 常用运维命令 + +```bash +# 先重启核心,再拉起可选服务(最常用) +./restart.sh +TEI_DEVICE=cuda CNCLIP_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker + +# 查看全量状态 +./scripts/service_ctl.sh status + +# 仅重启某个服务 +./scripts/service_ctl.sh restart embedding + +# 停止全部 +./scripts/service_ctl.sh stop +``` + ### 停止服务 ```bash @@ -303,16 +347,12 @@ INDEXER_HOST=0.0.0.0 INDEXER_PORT=6004 # Optional service ports +FRONTEND_PORT=6003 EMBEDDING_PORT=6005 +TEI_PORT=8080 +CNCLIP_PORT=51000 TRANSLATION_PORT=6006 RERANKER_PORT=6007 - -# Optional startup switches (for run.sh / service_ctl.sh) -START_EMBEDDING=0 -START_TRANSLATOR=0 -START_RERANKER=0 -START_TEI=0 -START_CNCLIP=0 ``` ### 修改配置 @@ -334,6 +374,8 @@ START_CNCLIP=0 - `logs/embedding.log` - 向量服务日志(可选) - `logs/translator.log` - 翻译服务日志(可选) - `logs/reranker.log` - 重排服务日志(可选) +- `logs/tei.log` - TEI 启停日志(可选) +- `logs/cnclip.log` - CN-CLIP 启停日志(可选) - `logs/search_engine.log` - 应用主日志(按天轮转) - `logs/errors.log` - 错误日志(按天轮转) diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index d98d811..e62a8c6 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -1655,6 +1655,7 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ | 重排服务 | 6007 | `http://localhost:6007` | 对检索结果进行二次排序 | 生产环境请将 `localhost` 替换为实际服务地址。 +服务管理入口与完整启停规则见:`docs/Usage-Guide.md` -> `服务管理总览`。 ### 7.1 向量服务(Embedding) @@ -1663,7 +1664,7 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ - **依赖**: - 文本向量后端默认走 TEI(`http://127.0.0.1:8080`) - 图片向量依赖 `cnclip`(`grpc://127.0.0.1:51000`) - - TEI 默认使用 GPU(`TEI_USE_GPU=1`);当配置为 GPU 且不可用时会启动失败(不会自动降级到 CPU) + - TEI 默认使用 GPU(`TEI_DEVICE=cuda`);当配置为 GPU 且不可用时会启动失败(不会自动降级到 CPU) - cnclip 默认使用 `cuda`;若配置为 `cuda` 但 GPU 不可用会启动失败(不会自动降级到 `cpu`) #### 7.1.1 `POST /embed/text` — 文本向量化 diff --git a/embeddings/README.md b/embeddings/README.md index 6c3bb9b..730f515 100644 --- a/embeddings/README.md +++ b/embeddings/README.md @@ -65,10 +65,10 @@ ```bash # GPU(需 nvidia-container-toolkit) -TEI_USE_GPU=1 ./scripts/start_tei_service.sh +TEI_DEVICE=cuda ./scripts/start_tei_service.sh # CPU -TEI_USE_GPU=0 ./scripts/start_tei_service.sh +TEI_DEVICE=cpu ./scripts/start_tei_service.sh ./scripts/start_embedding_service.sh ``` @@ -82,4 +82,4 @@ TEI_USE_GPU=0 ./scripts/start_tei_service.sh - `IMAGE_NORMALIZE_EMBEDDINGS`(默认 true) - `USE_CLIP_AS_SERVICE`, `CLIP_AS_SERVICE_SERVER`:图片向量(clip-as-service) - `IMAGE_MODEL_NAME`, `IMAGE_DEVICE`:本地 CN-CLIP(当 `USE_CLIP_AS_SERVICE=false` 时) -- TEI 相关:`TEI_USE_GPU`、`TEI_VERSION`、`TEI_MAX_BATCH_TOKENS`、`TEI_MAX_CLIENT_BATCH_SIZE`、`TEI_HEALTH_TIMEOUT_SEC` +- TEI 相关:`TEI_DEVICE`、`TEI_VERSION`、`TEI_MAX_BATCH_TOKENS`、`TEI_MAX_CLIENT_BATCH_SIZE`、`TEI_HEALTH_TIMEOUT_SEC` diff --git a/reranker/DEPLOYMENT_AND_TUNING.md b/reranker/DEPLOYMENT_AND_TUNING.md index 48a82e0..54b03a2 100644 --- a/reranker/DEPLOYMENT_AND_TUNING.md +++ b/reranker/DEPLOYMENT_AND_TUNING.md @@ -78,7 +78,7 @@ services: ```bash ./scripts/start_reranker.sh -./scripts/stop_reranker.sh +./scripts/service_ctl.sh stop reranker ``` 健康检查: diff --git a/restart.sh b/restart.sh index cf2d62a..49c036f 100755 --- a/restart.sh +++ b/restart.sh @@ -2,6 +2,8 @@ # Unified restart script for saas-search services +set -euo pipefail + cd "$(dirname "$0")" ./scripts/service_ctl.sh restart diff --git a/run.sh b/run.sh index 2afccb8..403554b 100755 --- a/run.sh +++ b/run.sh @@ -2,6 +2,8 @@ # Unified startup script for saas-search services +set -euo pipefail + cd "$(dirname "$0")" ./scripts/service_ctl.sh start diff --git a/scripts/benchmark_reranker_1000docs.sh b/scripts/benchmark_reranker_1000docs.sh index 8f06e85..d898740 100755 --- a/scripts/benchmark_reranker_1000docs.sh +++ b/scripts/benchmark_reranker_1000docs.sh @@ -33,7 +33,7 @@ TMP_CASES="/tmp/rerank_1000_shortdocs_cases.json" mkdir -p "${OUT_DIR}" cleanup() { - ./scripts/stop_reranker.sh >/dev/null 2>&1 || true + ./scripts/service_ctl.sh stop reranker >/dev/null 2>&1 || true } trap cleanup EXIT diff --git a/scripts/frontend_server.py b/scripts/frontend_server.py index 0e0ab04..77dac02 100755 --- a/scripts/frontend_server.py +++ b/scripts/frontend_server.py @@ -30,8 +30,8 @@ BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstr frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend') os.chdir(frontend_dir) -# Get port from environment variable or default -PORT = int(os.getenv('PORT', 6003)) +# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback. +PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003))) # Configure logging to suppress scanner noise logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') diff --git a/scripts/service_ctl.sh b/scripts/service_ctl.sh index 8d99aae..a018cb9 100755 --- a/scripts/service_ctl.sh +++ b/scripts/service_ctl.sh @@ -28,7 +28,7 @@ get_port() { indexer) echo "${INDEXER_PORT:-6004}" ;; frontend) echo "${FRONTEND_PORT:-6003}" ;; embedding) echo "${EMBEDDING_PORT:-6005}" ;; - translator) echo "${TRANSLATION_PORT:-${TRANSLATOR_PORT:-6006}}" ;; + translator) echo "${TRANSLATION_PORT:-6006}" ;; reranker) echo "${RERANKER_PORT:-6007}" ;; tei) echo "${TEI_PORT:-8080}" ;; cnclip) echo "${CNCLIP_PORT:-51000}" ;; @@ -38,10 +38,7 @@ get_port() { pid_file() { local service="$1" - case "${service}" in - cnclip) echo "${LOG_DIR}/cnclip_service.pid" ;; - *) echo "${LOG_DIR}/${service}.pid" ;; - esac + echo "${LOG_DIR}/${service}.pid" } log_file() { @@ -64,6 +61,24 @@ service_start_cmd() { esac } +service_exists() { + local service="$1" + case "${service}" in + backend|indexer|frontend|embedding|translator|reranker|tei|cnclip) return 0 ;; + *) return 1 ;; + esac +} + +validate_targets() { + local targets="$1" + for svc in ${targets}; do + if ! service_exists "${svc}"; then + echo "[error] unknown service: ${svc}" >&2 + return 1 + fi + done +} + wait_for_health() { local service="$1" local max_retries="${2:-30}" @@ -188,9 +203,15 @@ start_one() { cnclip|tei) echo "[start] ${service} (managed by native script)" if [ "${service}" = "cnclip" ]; then - CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1 + if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1; then + echo "[error] ${service} start script failed, inspect ${lf}" >&2 + return 1 + fi else - bash -lc "${cmd}" >> "${lf}" 2>&1 + if ! bash -lc "${cmd}" >> "${lf}" 2>&1; then + echo "[error] ${service} start script failed, inspect ${lf}" >&2 + return 1 + fi fi if [ "${service}" = "tei" ]; then if is_running_tei_container; then @@ -244,6 +265,24 @@ start_one() { esac } +cleanup_reranker_orphans() { + local engine_pids + engine_pids="$(pgrep -f 'VLLM::EngineCore' 2>/dev/null || true)" + if [ -z "${engine_pids}" ]; then + return 0 + fi + + echo "[stop] reranker orphan engines=${engine_pids}" + for pid in ${engine_pids}; do + kill -TERM "${pid}" 2>/dev/null || true + done + sleep 1 + engine_pids="$(pgrep -f 'VLLM::EngineCore' 2>/dev/null || true)" + for pid in ${engine_pids}; do + kill -KILL "${pid}" 2>/dev/null || true + done +} + stop_one() { local service="$1" cd "${PROJECT_ROOT}" @@ -257,11 +296,6 @@ stop_one() { bash -lc "./scripts/stop_tei_service.sh" || true return 0 fi - if [ "${service}" = "reranker" ]; then - echo "[stop] reranker (managed by native script)" - bash -lc "./scripts/stop_reranker.sh" || true - return 0 - fi local pf pf="$(pid_file "${service}")" @@ -297,6 +331,10 @@ stop_one() { done fi fi + + if [ "${service}" = "reranker" ]; then + cleanup_reranker_orphans + fi } status_one() { @@ -340,13 +378,7 @@ resolve_targets() { case "${scope}" in start) - local targets=("${CORE_SERVICES[@]}") - if [ "${START_TEI:-0}" = "1" ]; then targets+=("tei"); fi - if [ "${START_CNCLIP:-0}" = "1" ]; then targets+=("cnclip"); fi - if [ "${START_EMBEDDING:-0}" = "1" ]; then targets+=("embedding"); fi - if [ "${START_TRANSLATOR:-0}" = "1" ]; then targets+=("translator"); fi - if [ "${START_RERANKER:-0}" = "1" ]; then targets+=("reranker"); fi - echo "${targets[@]}" + echo "${CORE_SERVICES[@]}" ;; stop|status) echo "$(all_services)" @@ -370,14 +402,14 @@ Usage: ./scripts/service_ctl.sh status [service...] Default target set (when no service provided): - start -> backend indexer frontend (+ optional by env flags) + start -> backend indexer frontend stop -> all known services restart -> stop all known services, then start with start targets status -> all known services -Optional startup flags: - START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./run.sh - START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./scripts/service_ctl.sh start +Optional service startup: + ./scripts/service_ctl.sh start embedding translator reranker tei cnclip + TEI_DEVICE=cuda|cpu ./scripts/service_ctl.sh start tei CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip EOF } @@ -404,6 +436,7 @@ main() { usage exit 1 fi + validate_targets "${targets}" case "${action}" in start) diff --git a/scripts/start.sh b/scripts/start.sh index e7184cf..5c30615 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -1,9 +1,9 @@ #!/bin/bash -# Backward-compatible start entrypoint. +# Service start entrypoint. # Delegates to unified service controller. -set -e +set -euo pipefail cd "$(dirname "$0")/.." @@ -11,12 +11,12 @@ echo "========================================" echo "saas-search 服务启动" echo "========================================" echo "默认启动核心服务: backend/indexer/frontend" -echo "可选服务通过环境变量开启:" -echo " START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 START_TEI=1 START_CNCLIP=1 ./run.sh" +echo "可选服务请显式指定:" +echo " ./scripts/service_ctl.sh start embedding translator reranker tei cnclip" echo ./scripts/service_ctl.sh start echo echo "当前服务状态:" -./scripts/service_ctl.sh status backend indexer frontend embedding translator reranker tei +./scripts/service_ctl.sh status backend indexer frontend embedding translator reranker tei cnclip diff --git a/scripts/start_backend.sh b/scripts/start_backend.sh index 49c2d27..f6be6c0 100755 --- a/scripts/start_backend.sh +++ b/scripts/start_backend.sh @@ -2,7 +2,7 @@ # Start Backend API Service -set -e +set -euo pipefail cd "$(dirname "$0")/.." source ./activate.sh @@ -15,24 +15,25 @@ echo -e "${GREEN}========================================${NC}" echo -e "${GREEN}Starting Backend API Service${NC}" echo -e "${GREEN}========================================${NC}" +API_HOST="${API_HOST:-0.0.0.0}" +API_PORT="${API_PORT:-6002}" +ES_HOST="${ES_HOST:-http://localhost:9200}" +ES_USERNAME="${ES_USERNAME:-}" +ES_PASSWORD="${ES_PASSWORD:-}" + echo -e "\n${YELLOW}Configuration:${NC}" -echo " API Host: ${API_HOST:-0.0.0.0}" -echo " API Port: ${API_PORT:-6002}" -echo " ES Host: ${ES_HOST:-http://localhost:9200}" +echo " API Host: ${API_HOST}" +echo " API Port: ${API_PORT}" +echo " ES Host: ${ES_HOST}" echo " ES Username: ${ES_USERNAME:-not set}" echo -e "\n${YELLOW}Starting backend API service (search + admin)...${NC}" # Export environment variables for the Python process -export API_HOST=${API_HOST:-0.0.0.0} -export API_PORT=${API_PORT:-6002} -export ES_HOST=${ES_HOST:-http://localhost:9200} -export ES_USERNAME=${ES_USERNAME:-} -export ES_PASSWORD=${ES_PASSWORD:-} - -python main.py serve \ - --host $API_HOST \ - --port $API_PORT \ - --es-host $ES_HOST +export API_HOST API_PORT ES_HOST ES_USERNAME ES_PASSWORD +exec python main.py serve \ + --host "${API_HOST}" \ + --port "${API_PORT}" \ + --es-host "${ES_HOST}" diff --git a/scripts/start_cnclip_service.sh b/scripts/start_cnclip_service.sh index e308c38..3069b28 100755 --- a/scripts/start_cnclip_service.sh +++ b/scripts/start_cnclip_service.sh @@ -12,9 +12,6 @@ # 选项: # --port PORT 服务端口(默认:51000) # --device DEVICE 设备类型:cuda 或 cpu(默认:cuda) -# --batch-size SIZE 批处理大小(默认:32) -# --num-workers NUM 预处理线程数(默认:4) -# --dtype TYPE 数据类型:float16 或 float32(默认:float16) # --model-name NAME 模型名称(默认:CN-CLIP/ViT-H-14) # --replicas NUM 副本数(默认:1) # --help 显示帮助信息 @@ -22,11 +19,10 @@ # 示例: # ./scripts/start_cnclip_service.sh # ./scripts/start_cnclip_service.sh --port 52000 --device cuda -# ./scripts/start_cnclip_service.sh --batch-size 16 --dtype float32 # ############################################################################### -set -e # 遇到错误立即退出 +set -euo pipefail # 颜色定义 RED='\033[0;31m' @@ -38,9 +34,6 @@ NC='\033[0m' # No Color # 默认配置 DEFAULT_PORT=51000 DEFAULT_DEVICE="cuda" -DEFAULT_BATCH_SIZE=32 -DEFAULT_NUM_WORKERS=4 -DEFAULT_DTYPE="float16" DEFAULT_MODEL_NAME="CN-CLIP/ViT-H-14" # DEFAULT_MODEL_NAME="CN-CLIP/ViT-L-14-336" DEFAULT_REPLICAS=1 # 副本数 @@ -49,8 +42,8 @@ DEFAULT_REPLICAS=1 # 副本数 PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" CLIP_SERVER_DIR="${PROJECT_ROOT}/third-party/clip-as-service/server" LOG_DIR="${PROJECT_ROOT}/logs" -PID_FILE="${LOG_DIR}/cnclip_service.pid" -LOG_FILE="${LOG_DIR}/cnclip_service.log" +PID_FILE="${LOG_DIR}/cnclip.pid" +LOG_FILE="${LOG_DIR}/cnclip.log" # 帮助信息 show_help() { @@ -59,11 +52,8 @@ show_help() { echo "用法: $0 [选项]" echo "" echo "选项:" - echo " --port PORT 服务端口(默认:${DEFAULT_PORT})" + echo " --port PORT 服务端口(默认:${CNCLIP_PORT:-${DEFAULT_PORT}})" echo " --device DEVICE 设备类型:cuda 或 cpu(默认:cuda)" - echo " --batch-size SIZE 批处理大小(默认:${DEFAULT_BATCH_SIZE})" - echo " --num-workers NUM 预处理线程数(默认:${DEFAULT_NUM_WORKERS})" - echo " --dtype TYPE 数据类型:float16 或 float32(默认:${DEFAULT_DTYPE})" echo " --model-name NAME 模型名称(默认:${DEFAULT_MODEL_NAME})" echo " --replicas NUM 副本数(默认:${DEFAULT_REPLICAS})" echo " --help 显示此帮助信息" @@ -72,23 +62,19 @@ show_help() { echo " $0 # 使用默认配置启动" echo " $0 --port 52000 --device cuda # 指定 CUDA 模式,端口 52000" echo " $0 --port 52000 --device cpu # 显式使用 CPU 模式" - echo " $0 --batch-size 16 --dtype float32 # 小批处理,float32 精度" echo " $0 --replicas 2 # 启动2个副本(需8-10GB显存)" echo "" echo "支持的模型:" echo " - CN-CLIP/ViT-B-16 基础版本,速度快" echo " - CN-CLIP/ViT-L-14 平衡版本" - echo " - CN-CLIP/ViT-L-14-336 高分辨率版本(默认)" - echo " - CN-CLIP/ViT-H-14 大型版本,精度高" + echo " - CN-CLIP/ViT-L-14-336 高分辨率版本" + echo " - CN-CLIP/ViT-H-14 大型版本,精度高(默认)" echo " - CN-CLIP/RN50 ResNet-50 版本" } # 解析命令行参数 -PORT=${DEFAULT_PORT} +PORT="${CNCLIP_PORT:-${DEFAULT_PORT}}" DEVICE=${DEFAULT_DEVICE} -BATCH_SIZE=${DEFAULT_BATCH_SIZE} -NUM_WORKERS=${DEFAULT_NUM_WORKERS} -DTYPE=${DEFAULT_DTYPE} MODEL_NAME=${DEFAULT_MODEL_NAME} REPLICAS=${DEFAULT_REPLICAS} @@ -102,18 +88,6 @@ while [[ $# -gt 0 ]]; do DEVICE="$2" shift 2 ;; - --batch-size) - BATCH_SIZE="$2" - shift 2 - ;; - --num-workers) - NUM_WORKERS="$2" - shift 2 - ;; - --dtype) - DTYPE="$2" - shift 2 - ;; --model-name) MODEL_NAME="$2" shift 2 @@ -197,7 +171,7 @@ python -c "import cn_clip" 2>/dev/null || { } # clip_server 通过 PYTHONPATH 加载(见下方启动命令),此处仅做可导入性检查 -export PYTHONPATH="${CLIP_SERVER_DIR}:${PYTHONPATH}" +export PYTHONPATH="${CLIP_SERVER_DIR}${PYTHONPATH:+:${PYTHONPATH}}" python -c "import clip_server" 2>/dev/null || { echo -e "${RED}错误: clip_server 不可用${NC}" echo -e "${YELLOW}请重建专用环境: ./scripts/setup_cnclip_venv.sh${NC}" @@ -251,7 +225,7 @@ fi cd "${CLIP_SERVER_DIR}" # 设置环境变量 -export PYTHONPATH="${CLIP_SERVER_DIR}:${PYTHONPATH}" +export PYTHONPATH="${CLIP_SERVER_DIR}${PYTHONPATH:+:${PYTHONPATH}}" export NO_VERSION_CHECK=1 # 跳过版本检查 # 启动服务 diff --git a/scripts/start_frontend.sh b/scripts/start_frontend.sh index 5abe75f..a87b1cb 100755 --- a/scripts/start_frontend.sh +++ b/scripts/start_frontend.sh @@ -2,7 +2,7 @@ # Start Frontend Server -set -e +set -euo pipefail cd "$(dirname "$0")/.." source ./activate.sh @@ -17,6 +17,7 @@ echo -e "${GREEN}========================================${NC}" FRONTEND_PORT="${FRONTEND_PORT:-6003}" API_PORT="${API_PORT:-6002}" +PORT="${FRONTEND_PORT}" echo -e "\n${YELLOW}Frontend will be available at:${NC}" echo -e " ${GREEN}http://localhost:${FRONTEND_PORT}${NC}" @@ -25,4 +26,5 @@ echo -e "${YELLOW}Make sure the backend API is running at:${NC}" echo -e " ${GREEN}http://localhost:${API_PORT}${NC}" echo "" -python scripts/frontend_server.py +export FRONTEND_PORT API_PORT PORT +exec python scripts/frontend_server.py diff --git a/scripts/start_indexer.sh b/scripts/start_indexer.sh index 8924ae8..5878680 100755 --- a/scripts/start_indexer.sh +++ b/scripts/start_indexer.sh @@ -2,7 +2,7 @@ # Start dedicated Indexer API Service -set -e +set -euo pipefail cd "$(dirname "$0")/.." source ./activate.sh @@ -15,24 +15,25 @@ echo -e "${GREEN}========================================${NC}" echo -e "${GREEN}Starting Indexer API Service${NC}" echo -e "${GREEN}========================================${NC}" +INDEXER_HOST="${INDEXER_HOST:-0.0.0.0}" +INDEXER_PORT="${INDEXER_PORT:-6004}" +ES_HOST="${ES_HOST:-http://localhost:9200}" +ES_USERNAME="${ES_USERNAME:-}" +ES_PASSWORD="${ES_PASSWORD:-}" + echo -e "\n${YELLOW}Configuration:${NC}" -echo " INDEXER Host: ${INDEXER_HOST:-0.0.0.0}" -echo " INDEXER Port: ${INDEXER_PORT:-6004}" -echo " ES Host: ${ES_HOST:-http://localhost:9200}" +echo " INDEXER Host: ${INDEXER_HOST}" +echo " INDEXER Port: ${INDEXER_PORT}" +echo " ES Host: ${ES_HOST}" echo " ES Username: ${ES_USERNAME:-not set}" echo -e "\n${YELLOW}Starting indexer service...${NC}" # Export environment variables for the Python process -export INDEXER_HOST=${INDEXER_HOST:-0.0.0.0} -export INDEXER_PORT=${INDEXER_PORT:-6004} -export ES_HOST=${ES_HOST:-http://localhost:9200} -export ES_USERNAME=${ES_USERNAME:-} -export ES_PASSWORD=${ES_PASSWORD:-} - -python main.py serve-indexer \ - --host $INDEXER_HOST \ - --port $INDEXER_PORT \ - --es-host $ES_HOST +export INDEXER_HOST INDEXER_PORT ES_HOST ES_USERNAME ES_PASSWORD +exec python main.py serve-indexer \ + --host "${INDEXER_HOST}" \ + --port "${INDEXER_PORT}" \ + --es-host "${ES_HOST}" diff --git a/scripts/start_servers.py b/scripts/start_servers.py deleted file mode 100755 index baa9a5b..0000000 --- a/scripts/start_servers.py +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env python3 -""" -Production-ready server startup script with proper error handling and monitoring. - -[LEGACY] -This script is kept for historical compatibility. -Preferred entrypoint is: - ./scripts/service_ctl.sh start -""" - -import os -import sys -import signal -import time -import subprocess -import logging -import argparse -from typing import Dict, List, Optional -import multiprocessing -import threading - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - handlers=[ - logging.StreamHandler(), - logging.FileHandler('/tmp/search_engine_startup.log', mode='a') - ] -) -logger = logging.getLogger(__name__) - -class ServerManager: - """Manages frontend and API server processes.""" - - def __init__(self): - self.processes: Dict[str, subprocess.Popen] = {} - self.running = True - - def start_frontend_server(self) -> bool: - """Start the frontend server.""" - try: - frontend_script = os.path.join(os.path.dirname(__file__), 'frontend_server.py') - - cmd = [sys.executable, frontend_script] - env = os.environ.copy() - env['PYTHONUNBUFFERED'] = '1' - - process = subprocess.Popen( - cmd, - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, - bufsize=1 - ) - - self.processes['frontend'] = process - logger.info(f"Frontend server started with PID: {process.pid}") - - # Start monitoring thread - threading.Thread( - target=self._monitor_output, - args=('frontend', process), - daemon=True - ).start() - - return True - - except Exception as e: - logger.error(f"Failed to start frontend server: {e}") - return False - - def start_api_server(self, es_host: str = "http://localhost:9200") -> bool: - """Start the API server.""" - try: - cmd = [ - sys.executable, 'main.py', 'serve', - '--es-host', es_host, - '--host', '0.0.0.0', - '--port', '6002' - ] - - env = os.environ.copy() - env['PYTHONUNBUFFERED'] = '1' - env['ES_HOST'] = es_host - - process = subprocess.Popen( - cmd, - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, - bufsize=1 - ) - - self.processes['api'] = process - logger.info(f"API server started with PID: {process.pid}") - - # Start monitoring thread - threading.Thread( - target=self._monitor_output, - args=('api', process), - daemon=True - ).start() - - return True - - except Exception as e: - logger.error(f"Failed to start API server: {e}") - return False - - def _monitor_output(self, name: str, process: subprocess.Popen): - """Monitor process output and log appropriately.""" - try: - for line in iter(process.stdout.readline, ''): - if line.strip() and self.running: - # Filter out scanner noise for frontend server - if name == 'frontend': - noise_patterns = [ - 'code 400', - 'Bad request version', - 'Bad request syntax', - 'Bad HTTP/0.9 request type' - ] - if any(pattern in line for pattern in noise_patterns): - continue - - logger.info(f"[{name}] {line.strip()}") - - except Exception as e: - if self.running: - logger.error(f"Error monitoring {name} output: {e}") - - def check_servers(self) -> bool: - """Check if all servers are still running.""" - all_running = True - - for name, process in self.processes.items(): - if process.poll() is not None: - logger.error(f"{name} server has stopped with exit code: {process.returncode}") - all_running = False - - return all_running - - def stop_all(self): - """Stop all servers gracefully.""" - logger.info("Stopping all servers...") - self.running = False - - for name, process in self.processes.items(): - try: - logger.info(f"Stopping {name} server (PID: {process.pid})...") - - # Try graceful shutdown first - process.terminate() - - # Wait up to 10 seconds for graceful shutdown - try: - process.wait(timeout=10) - logger.info(f"{name} server stopped gracefully") - except subprocess.TimeoutExpired: - # Force kill if graceful shutdown fails - logger.warning(f"{name} server didn't stop gracefully, forcing...") - process.kill() - process.wait() - logger.info(f"{name} server stopped forcefully") - - except Exception as e: - logger.error(f"Error stopping {name} server: {e}") - - self.processes.clear() - logger.info("All servers stopped") - -def signal_handler(signum, frame): - """Handle shutdown signals.""" - logger.info(f"Received signal {signum}, shutting down...") - if 'manager' in globals(): - manager.stop_all() - sys.exit(0) - -def main(): - """Main function to start all servers.""" - global manager - - parser = argparse.ArgumentParser(description='Start saas-search servers (multi-tenant)') - parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host') - parser.add_argument('--check-dependencies', action='store_true', help='Check dependencies before starting') - args = parser.parse_args() - - logger.info("Starting saas-search servers (multi-tenant)...") - logger.info(f"Elasticsearch: {args.es_host}") - - # Check dependencies if requested - if args.check_dependencies: - logger.info("Checking dependencies...") - try: - import slowapi - import anyio - logger.info("✓ All dependencies available") - except ImportError as e: - logger.error(f"✗ Missing dependency: {e}") - logger.info("Please run: pip install -r requirements_server.txt") - sys.exit(1) - - manager = ServerManager() - - # Set up signal handlers - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - # Start servers - if not manager.start_api_server(args.es_host): - logger.error("Failed to start API server") - sys.exit(1) - - # Wait a moment before starting frontend server - time.sleep(2) - - if not manager.start_frontend_server(): - logger.error("Failed to start frontend server") - manager.stop_all() - sys.exit(1) - - logger.info("All servers started successfully!") - logger.info("Frontend: http://localhost:6003") - logger.info("API: http://localhost:6002") - logger.info("API Docs: http://localhost:6002/docs") - logger.info("Press Ctrl+C to stop all servers") - - # Monitor servers - while manager.running: - if not manager.check_servers(): - logger.error("One or more servers have stopped unexpectedly") - manager.stop_all() - sys.exit(1) - - time.sleep(5) # Check every 5 seconds - - except KeyboardInterrupt: - logger.info("Received interrupt signal") - except Exception as e: - logger.error(f"Unexpected error: {e}") - finally: - manager.stop_all() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/scripts/start_tei_service.sh b/scripts/start_tei_service.sh index aaea508..666d11e 100755 --- a/scripts/start_tei_service.sh +++ b/scripts/start_tei_service.sh @@ -27,14 +27,10 @@ TEI_DTYPE="${TEI_DTYPE:-float16}" HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}" TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}" -USE_GPU_RAW="${TEI_USE_GPU:-1}" -USE_GPU="$(echo "${USE_GPU_RAW}" | tr '[:upper:]' '[:lower:]')" -if [[ "${USE_GPU}" == "1" || "${USE_GPU}" == "true" || "${USE_GPU}" == "yes" ]]; then - USE_GPU="1" -elif [[ "${USE_GPU}" == "0" || "${USE_GPU}" == "false" || "${USE_GPU}" == "no" ]]; then - USE_GPU="0" -else - echo "ERROR: invalid TEI_USE_GPU=${USE_GPU_RAW}. Use 1/0 (or true/false)." >&2 +TEI_DEVICE_RAW="${TEI_DEVICE:-cuda}" +TEI_DEVICE="$(echo "${TEI_DEVICE_RAW}" | tr '[:upper:]' '[:lower:]')" +if [[ "${TEI_DEVICE}" != "cuda" && "${TEI_DEVICE}" != "cpu" ]]; then + echo "ERROR: invalid TEI_DEVICE=${TEI_DEVICE_RAW}. Use cuda/cpu." >&2 exit 1 fi @@ -50,19 +46,19 @@ detect_gpu_tei_image() { fi } -if [[ "${USE_GPU}" == "1" ]]; then +if [[ "${TEI_DEVICE}" == "cuda" ]]; then if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then - echo "ERROR: TEI_USE_GPU=1 but NVIDIA GPU is not available. No CPU fallback." >&2 + echo "ERROR: TEI_DEVICE=cuda but NVIDIA GPU is not available. No CPU fallback." >&2 exit 1 fi if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then - echo "ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured." >&2 + echo "ERROR: TEI_DEVICE=cuda but Docker nvidia runtime is not configured." >&2 echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2 exit 1 fi TEI_IMAGE="${TEI_IMAGE:-$(detect_gpu_tei_image)}" GPU_ARGS=(--gpus all) - TEI_MODE="gpu" + TEI_MODE="cuda" else TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}" GPU_ARGS=() @@ -81,9 +77,9 @@ if [[ -n "${existing_id}" ]]; then if [[ "${current_image}" == *":cuda-"* || "${current_image}" == *":turing-"* ]]; then current_is_gpu_image=1 fi - if [[ "${USE_GPU}" == "1" ]]; then + if [[ "${TEI_DEVICE}" == "cuda" ]]; then if [[ "${current_is_gpu_image}" -eq 1 ]] && [[ "${device_req}" != "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then - echo "TEI already running (GPU): ${TEI_CONTAINER_NAME}" + echo "TEI already running (CUDA): ${TEI_CONTAINER_NAME}" exit 0 fi echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}" diff --git a/scripts/start_translator.sh b/scripts/start_translator.sh index 5f504c2..4f45adc 100755 --- a/scripts/start_translator.sh +++ b/scripts/start_translator.sh @@ -3,13 +3,13 @@ # Start Translation Service # -set -e +set -euo pipefail cd "$(dirname "$0")/.." source ./activate.sh TRANSLATION_HOST="${TRANSLATION_HOST:-0.0.0.0}" -TRANSLATION_PORT="${TRANSLATION_PORT:-${TRANSLATOR_PORT:-6006}}" +TRANSLATION_PORT="${TRANSLATION_PORT:-6006}" echo "========================================" echo "Starting Translation Service" diff --git a/scripts/stop.sh b/scripts/stop.sh index 3b96402..f4b39da 100755 --- a/scripts/stop.sh +++ b/scripts/stop.sh @@ -1,9 +1,9 @@ #!/bin/bash -# Backward-compatible stop entrypoint. +# Service stop entrypoint. # Delegates to unified service controller. -set -e +set -euo pipefail cd "$(dirname "$0")/.." diff --git a/scripts/stop_cnclip_service.sh b/scripts/stop_cnclip_service.sh index df933db..24af68f 100755 --- a/scripts/stop_cnclip_service.sh +++ b/scripts/stop_cnclip_service.sh @@ -11,7 +11,7 @@ # ############################################################################### -set -e +set -euo pipefail # 颜色定义 RED='\033[0;31m' @@ -23,7 +23,7 @@ NC='\033[0m' # No Color # 项目路径 PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" LOG_DIR="${PROJECT_ROOT}/logs" -PID_FILE="${LOG_DIR}/cnclip_service.pid" +PID_FILE="${LOG_DIR}/cnclip.pid" echo -e "${BLUE}========================================${NC}" echo -e "${BLUE}停止 CN-CLIP 服务${NC}" diff --git a/scripts/stop_reranker.sh b/scripts/stop_reranker.sh deleted file mode 100755 index e397741..0000000 --- a/scripts/stop_reranker.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# -# Stop Reranker Service -# - -set -e - -cd "$(dirname "$0")/.." - -PID_FILE="logs/reranker.pid" -RERANKER_PORT="${RERANKER_PORT:-6007}" - -echo "========================================" -echo "Stopping Reranker Service" -echo "========================================" - -if [ -f "${PID_FILE}" ]; then - PID="$(cat "${PID_FILE}" 2>/dev/null || true)" - if [ -n "${PID}" ] && kill -0 "${PID}" 2>/dev/null; then - echo "Stopping PID from file: ${PID}" - kill -TERM "${PID}" 2>/dev/null || true - sleep 1 - if kill -0 "${PID}" 2>/dev/null; then - kill -KILL "${PID}" 2>/dev/null || true - fi - fi - rm -f "${PID_FILE}" -fi - -PORT_PIDS="$(lsof -ti:${RERANKER_PORT} 2>/dev/null || true)" -if [ -n "${PORT_PIDS}" ]; then - echo "Stopping process on port ${RERANKER_PORT}: ${PORT_PIDS}" - for PID in ${PORT_PIDS}; do - kill -TERM "${PID}" 2>/dev/null || true - done - sleep 1 - PORT_PIDS="$(lsof -ti:${RERANKER_PORT} 2>/dev/null || true)" - for PID in ${PORT_PIDS}; do - kill -KILL "${PID}" 2>/dev/null || true - done -fi - -# Cleanup orphaned vLLM engine workers that may survive a failed startup. -ENGINE_PIDS="$(pgrep -f 'VLLM::EngineCore' 2>/dev/null || true)" -if [ -n "${ENGINE_PIDS}" ]; then - echo "Stopping orphaned vLLM engine processes: ${ENGINE_PIDS}" - for PID in ${ENGINE_PIDS}; do - kill -TERM "${PID}" 2>/dev/null || true - done - sleep 1 - ENGINE_PIDS="$(pgrep -f 'VLLM::EngineCore' 2>/dev/null || true)" - for PID in ${ENGINE_PIDS}; do - kill -KILL "${PID}" 2>/dev/null || true - done -fi - -echo "Reranker service stopped." diff --git a/scripts/stop_translator.sh b/scripts/stop_translator.sh deleted file mode 100755 index 7966874..0000000 --- a/scripts/stop_translator.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# -# Stop Translation Service -# - -set -e - -cd "$(dirname "$0")/.." - -PID_FILE="logs/translator.pid" -TRANSLATION_PORT="${TRANSLATION_PORT:-${TRANSLATOR_PORT:-6006}}" - -echo "========================================" -echo "Stopping Translation Service" -echo "========================================" - -if [ -f "${PID_FILE}" ]; then - PID="$(cat "${PID_FILE}" 2>/dev/null || true)" - if [ -n "${PID}" ] && kill -0 "${PID}" 2>/dev/null; then - echo "Stopping PID from file: ${PID}" - kill -TERM "${PID}" 2>/dev/null || true - sleep 1 - if kill -0 "${PID}" 2>/dev/null; then - kill -KILL "${PID}" 2>/dev/null || true - fi - fi - rm -f "${PID_FILE}" -fi - -PORT_PIDS="$(lsof -ti:${TRANSLATION_PORT} 2>/dev/null || true)" -if [ -n "${PORT_PIDS}" ]; then - echo "Stopping process on port ${TRANSLATION_PORT}: ${PORT_PIDS}" - for PID in ${PORT_PIDS}; do - kill -TERM "${PID}" 2>/dev/null || true - done - sleep 1 - PORT_PIDS="$(lsof -ti:${TRANSLATION_PORT} 2>/dev/null || true)" - for PID in ${PORT_PIDS}; do - kill -KILL "${PID}" 2>/dev/null || true - done -fi - -echo "Translation service stopped." -- libgit2 0.21.2