Commit c6da6bcacccd6dcd48142ad6df382cfc04890c85

Authored by tangwang
1 parent 7913e2fb

add status.sh

@@ -28,8 +28,8 @@ source activate.sh @@ -28,8 +28,8 @@ source activate.sh
28 # 推荐:一键拉起全部服务(含监控守护) 28 # 推荐:一键拉起全部服务(含监控守护)
29 ./run.sh all # 薄封装:等价于 ./scripts/service_ctl.sh up all 29 ./run.sh all # 薄封装:等价于 ./scripts/service_ctl.sh up all
30 30
31 -# 查看状态(含 monitor daemon 状态)  
32 -./scripts/service_ctl.sh status 31 +# 查看状态 + 健康检查(含 monitor daemon 状态)
  32 +./status.sh # 薄封装:等价于 ./scripts/service_ctl.sh status
33 33
34 # 重启指定服务集合(示例:全部) 34 # 重启指定服务集合(示例:全部)
35 ./restart.sh all # 薄封装:等价于 ./scripts/service_ctl.sh restart all 35 ./restart.sh all # 薄封装:等价于 ./scripts/service_ctl.sh restart all
docs/Usage-Guide.md
@@ -249,6 +249,7 @@ python -m http.server 6003 @@ -249,6 +249,7 @@ python -m http.server 6003
249 - `./run.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh up [all|service...]`。 249 - `./run.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh up [all|service...]`。
250 - `./restart.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh restart [all|service...]`。 250 - `./restart.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh restart [all|service...]`。
251 - `./scripts/stop.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh down [all|service...]`。 251 - `./scripts/stop.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh down [all|service...]`。
  252 +- `./status.sh`:薄封装,直接调用 `./scripts/service_ctl.sh status`(包含进程/端口 + HTTP 健康检查结果)。
252 - `./scripts/service_ctl.sh`:统一控制器,支持 `up/down/start/stop/restart/status/monitor*`(带参数行为完全由此脚本定义)。 253 - `./scripts/service_ctl.sh`:统一控制器,支持 `up/down/start/stop/restart/status/monitor*`(带参数行为完全由此脚本定义)。
253 254
254 ### 2) `service_ctl.sh` 的默认行为 255 ### 2) `service_ctl.sh` 的默认行为
@@ -258,7 +259,7 @@ python -m http.server 6003 @@ -258,7 +259,7 @@ python -m http.server 6003
258 - `start`:**必须显式指定** 服务或 `all`(不自动拉起 monitor daemon)。 259 - `start`:**必须显式指定** 服务或 `all`(不自动拉起 monitor daemon)。
259 - `stop`:**必须显式指定** 服务或 `all`;若 monitor daemon 运行会先停止它。 260 - `stop`:**必须显式指定** 服务或 `all`;若 monitor daemon 运行会先停止它。
260 - `restart`:**必须显式指定** 服务或 `all`。 261 - `restart`:**必须显式指定** 服务或 `all`。
261 -- `status`(不带服务名):显示全部已知服务状态 + monitor daemon 状态 262 +- `status`(不带服务名):显示全部已知服务状态 + monitor daemon 状态,并对支持的服务执行 HTTP/容器级健康检查
262 263
263 ### 3) 全量服务一键拉起 264 ### 3) 全量服务一键拉起
264 265
@@ -276,7 +277,7 @@ python -m http.server 6003 @@ -276,7 +277,7 @@ python -m http.server 6003
276 # 一键拉起完整栈(推荐) 277 # 一键拉起完整栈(推荐)
277 ./scripts/service_ctl.sh up all # 或 ./run.sh all 278 ./scripts/service_ctl.sh up all # 或 ./run.sh all
278 279
279 -# 查看全量状态 280 +# 查看全量状态 + 健康检查(进程/端口 + HTTP)
280 ./scripts/service_ctl.sh status 281 ./scripts/service_ctl.sh status
281 282
282 # 仅重启某个服务 283 # 仅重启某个服务
scripts/service_ctl.sh
@@ -96,7 +96,6 @@ health_path_for_service() { @@ -96,7 +96,6 @@ health_path_for_service() {
96 local service="$1" 96 local service="$1"
97 case "${service}" in 97 case "${service}" in
98 backend|indexer|embedding|translator|reranker|tei) echo "/health" ;; 98 backend|indexer|embedding|translator|reranker|tei) echo "/health" ;;
99 - frontend) echo "/" ;;  
100 *) echo "" ;; 99 *) echo "" ;;
101 esac 100 esac
102 } 101 }
@@ -583,6 +582,8 @@ status_one() { @@ -583,6 +582,8 @@ status_one() {
583 port="$(get_port "${service}")" 582 port="$(get_port "${service}")"
584 local running="no" 583 local running="no"
585 local pid_info="-" 584 local pid_info="-"
  585 + local health="down"
  586 + local health_body=""
586 587
587 if [ "${service}" = "tei" ]; then 588 if [ "${service}" = "tei" ]; then
588 local cid 589 local cid
@@ -591,8 +592,22 @@ status_one() { @@ -591,8 +592,22 @@ status_one() {
591 if [ -n "${cid}" ]; then 592 if [ -n "${cid}" ]; then
592 running="yes" 593 running="yes"
593 pid_info="${cid:0:12}" 594 pid_info="${cid:0:12}"
  595 + # TEI: container 级别 running 后再尝试 HTTP /health
  596 + local path
  597 + path="$(health_path_for_service "${service}")"
  598 + if [ -n "${port}" ] && [ -n "${path}" ]; then
  599 + if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
  600 + health="ok"
  601 + else
  602 + health="fail"
  603 + fi
  604 + fi
  605 + fi
  606 + if [ -n "${health_body}" ]; then
  607 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s body=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" "${health_body}"
  608 + else
  609 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}"
594 fi 610 fi
595 - printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}"  
596 return 611 return
597 fi 612 fi
598 613
@@ -604,7 +619,26 @@ status_one() { @@ -604,7 +619,26 @@ status_one() {
604 pid_info="$(lsof -ti:${port} 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo "-")" 619 pid_info="$(lsof -ti:${port} 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo "-")"
605 fi 620 fi
606 621
607 - printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" 622 + if [ "${running}" = "yes" ]; then
  623 + local path
  624 + path="$(health_path_for_service "${service}")"
  625 + if [ -n "${port}" ] && [ -n "${path}" ]; then
  626 + if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then
  627 + health="ok"
  628 + else
  629 + health="fail"
  630 + fi
  631 + else
  632 + # 没有 HTTP 健康检查端点(如 cnclip),运行即可视为 ok
  633 + health="ok"
  634 + fi
  635 + fi
  636 +
  637 + if [ -n "${health_body}" ]; then
  638 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s body=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" "${health_body}"
  639 + else
  640 + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}"
  641 + fi
608 } 642 }
609 643
610 service_is_running() { 644 service_is_running() {
status.sh 0 → 100644
@@ -0,0 +1,8 @@ @@ -0,0 +1,8 @@
  1 +#!/bin/bash
  2 +
  3 +set -euo pipefail
  4 +
  5 +cd "$(dirname "$0")"
  6 +
  7 +./scripts/service_ctl.sh status "$@"
  8 +