Commit c6da6bcacccd6dcd48142ad6df382cfc04890c85

Authored by tangwang
1 parent 7913e2fb

add status.sh

README.md
... ... @@ -28,8 +28,8 @@ source activate.sh
28 28 # 推荐:一键拉起全部服务(含监控守护)
29 29 ./run.sh all # 薄封装:等价于 ./scripts/service_ctl.sh up all
30 30  
31   -# 查看状态(含 monitor daemon 状态)
32   -./scripts/service_ctl.sh status
  31 +# 查看状态 + 健康检查(含 monitor daemon 状态)
  32 +./status.sh # 薄封装:等价于 ./scripts/service_ctl.sh status
33 33  
34 34 # 重启指定服务集合(示例:全部)
35 35 ./restart.sh all # 薄封装:等价于 ./scripts/service_ctl.sh restart all
... ...
docs/Usage-Guide.md
... ... @@ -249,6 +249,7 @@ python -m http.server 6003
249 249 - `./run.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh up [all|service...]`。
250 250 - `./restart.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh restart [all|service...]`。
251 251 - `./scripts/stop.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh down [all|service...]`。
  252 +- `./status.sh`:薄封装,直接调用 `./scripts/service_ctl.sh status`(包含进程/端口 + HTTP 健康检查结果)。
252 253 - `./scripts/service_ctl.sh`:统一控制器,支持 `up/down/start/stop/restart/status/monitor*`(带参数行为完全由此脚本定义)。
253 254  
254 255 ### 2) `service_ctl.sh` 的默认行为
... ... @@ -258,7 +259,7 @@ python -m http.server 6003
258 259 - `start`:**必须显式指定** 服务或 `all`(不自动拉起 monitor daemon)。
259 260 - `stop`:**必须显式指定** 服务或 `all`;若 monitor daemon 运行会先停止它。
260 261 - `restart`:**必须显式指定** 服务或 `all`。
261   -- `status`(不带服务名):显示全部已知服务状态 + monitor daemon 状态
  262 +- `status`(不带服务名):显示全部已知服务状态 + monitor daemon 状态,并对支持的服务执行 HTTP/容器级健康检查
262 263  
263 264 ### 3) 全量服务一键拉起
264 265  
... ... @@ -276,7 +277,7 @@ python -m http.server 6003
276 277 # 一键拉起完整栈(推荐)
277 278 ./scripts/service_ctl.sh up all # 或 ./run.sh all
278 279  
279   -# 查看全量状态
  280 +# 查看全量状态 + 健康检查(进程/端口 + HTTP)
280 281 ./scripts/service_ctl.sh status
281 282  
282 283 # 仅重启某个服务
... ...
scripts/service_ctl.sh
... ... @@ -96,7 +96,6 @@ health_path_for_service() {
96 96 local service="$1"
97 97 case "${service}" in
98 98 backend|indexer|embedding|translator|reranker|tei) echo "/health" ;;
99   - frontend) echo "/" ;;
100 99 *) echo "" ;;
101 100 esac
102 101 }
... ... @@ -583,6 +582,8 @@ status_one() {
583 582 port="$(get_port "${service}")"
584 583 local running="no"
585 584 local pid_info="-"
  585 + local health="down"
  586 + local health_body=""
586 587  
587 588 if [ "${service}" = "tei" ]; then
588 589 local cid
... ... @@ -591,8 +592,22 @@ status_one() {
591 592 if [ -n "${cid}" ]; then
592 593 running="yes"
593 594 pid_info="${cid:0:12}"
  595 + # TEI: container 级别 running 后再尝试 HTTP /health
  596 + local path
  597 + path="$(health_path_for_service "${service}")"
  598 + if [ -n "${port}" ] && [ -n "${path}" ]; then
  599 + if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
  600 + health="ok"
  601 + else
  602 + health="fail"
  603 + fi
  604 + fi
  605 + fi
  606 + if [ -n "${health_body}" ]; then
  607 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s body=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" "${health_body}"
  608 + else
  609 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}"
594 610 fi
595   - printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}"
596 611 return
597 612 fi
598 613  
... ... @@ -604,7 +619,26 @@ status_one() {
604 619 pid_info="$(lsof -ti:${port} 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo "-")"
605 620 fi
606 621  
607   - printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}"
  622 + if [ "${running}" = "yes" ]; then
  623 + local path
  624 + path="$(health_path_for_service "${service}")"
  625 + if [ -n "${port}" ] && [ -n "${path}" ]; then
  626 + if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
  627 + health="ok"
  628 + else
  629 + health="fail"
  630 + fi
  631 + else
  632 + # 没有 HTTP 健康检查端点(如 cnclip),运行即可视为 ok
  633 + health="ok"
  634 + fi
  635 + fi
  636 +
  637 + if [ -n "${health_body}" ]; then
  638 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s body=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" "${health_body}"
  639 + else
  640 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}"
  641 + fi
608 642 }
609 643  
610 644 service_is_running() {
... ...
status.sh 0 → 100644
... ... @@ -0,0 +1,8 @@
  1 +#!/bin/bash
  2 +
  3 +set -euo pipefail
  4 +
  5 +cd "$(dirname "$0")"
  6 +
  7 +./scripts/service_ctl.sh status "$@"
  8 +
... ...