Commit c6da6bcacccd6dcd48142ad6df382cfc04890c85
1 parent
7913e2fb
add status.sh
Showing
4 changed files
with
50 additions
and
7 deletions
Show diff stats
README.md
| @@ -28,8 +28,8 @@ source activate.sh | @@ -28,8 +28,8 @@ source activate.sh | ||
| 28 | # 推荐:一键拉起全部服务(含监控守护) | 28 | # 推荐:一键拉起全部服务(含监控守护) |
| 29 | ./run.sh all # 薄封装:等价于 ./scripts/service_ctl.sh up all | 29 | ./run.sh all # 薄封装:等价于 ./scripts/service_ctl.sh up all |
| 30 | 30 | ||
| 31 | -# 查看状态(含 monitor daemon 状态) | ||
| 32 | -./scripts/service_ctl.sh status | 31 | +# 查看状态 + 健康检查(含 monitor daemon 状态) |
| 32 | +./status.sh # 薄封装:等价于 ./scripts/service_ctl.sh status | ||
| 33 | 33 | ||
| 34 | # 重启指定服务集合(示例:全部) | 34 | # 重启指定服务集合(示例:全部) |
| 35 | ./restart.sh all # 薄封装:等价于 ./scripts/service_ctl.sh restart all | 35 | ./restart.sh all # 薄封装:等价于 ./scripts/service_ctl.sh restart all |
docs/Usage-Guide.md
| @@ -249,6 +249,7 @@ python -m http.server 6003 | @@ -249,6 +249,7 @@ python -m http.server 6003 | ||
| 249 | - `./run.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh up [all|service...]`。 | 249 | - `./run.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh up [all|service...]`。 |
| 250 | - `./restart.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh restart [all|service...]`。 | 250 | - `./restart.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh restart [all|service...]`。 |
| 251 | - `./scripts/stop.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh down [all|service...]`。 | 251 | - `./scripts/stop.sh [all|service...]`:薄封装,直接调用 `./scripts/service_ctl.sh down [all|service...]`。 |
| 252 | +- `./status.sh`:薄封装,直接调用 `./scripts/service_ctl.sh status`(包含进程/端口 + HTTP 健康检查结果)。 | ||
| 252 | - `./scripts/service_ctl.sh`:统一控制器,支持 `up/down/start/stop/restart/status/monitor*`(带参数行为完全由此脚本定义)。 | 253 | - `./scripts/service_ctl.sh`:统一控制器,支持 `up/down/start/stop/restart/status/monitor*`(带参数行为完全由此脚本定义)。 |
| 253 | 254 | ||
| 254 | ### 2) `service_ctl.sh` 的默认行为 | 255 | ### 2) `service_ctl.sh` 的默认行为 |
| @@ -258,7 +259,7 @@ python -m http.server 6003 | @@ -258,7 +259,7 @@ python -m http.server 6003 | ||
| 258 | - `start`:**必须显式指定** 服务或 `all`(不自动拉起 monitor daemon)。 | 259 | - `start`:**必须显式指定** 服务或 `all`(不自动拉起 monitor daemon)。 |
| 259 | - `stop`:**必须显式指定** 服务或 `all`;若 monitor daemon 运行会先停止它。 | 260 | - `stop`:**必须显式指定** 服务或 `all`;若 monitor daemon 运行会先停止它。 |
| 260 | - `restart`:**必须显式指定** 服务或 `all`。 | 261 | - `restart`:**必须显式指定** 服务或 `all`。 |
| 261 | -- `status`(不带服务名):显示全部已知服务状态 + monitor daemon 状态。 | 262 | +- `status`(不带服务名):显示全部已知服务状态 + monitor daemon 状态,并对支持的服务执行 HTTP/容器级健康检查。 |
| 262 | 263 | ||
| 263 | ### 3) 全量服务一键拉起 | 264 | ### 3) 全量服务一键拉起 |
| 264 | 265 | ||
| @@ -276,7 +277,7 @@ python -m http.server 6003 | @@ -276,7 +277,7 @@ python -m http.server 6003 | ||
| 276 | # 一键拉起完整栈(推荐) | 277 | # 一键拉起完整栈(推荐) |
| 277 | ./scripts/service_ctl.sh up all # 或 ./run.sh all | 278 | ./scripts/service_ctl.sh up all # 或 ./run.sh all |
| 278 | 279 | ||
| 279 | -# 查看全量状态 | 280 | +# 查看全量状态 + 健康检查(进程/端口 + HTTP) |
| 280 | ./scripts/service_ctl.sh status | 281 | ./scripts/service_ctl.sh status |
| 281 | 282 | ||
| 282 | # 仅重启某个服务 | 283 | # 仅重启某个服务 |
scripts/service_ctl.sh
| @@ -96,7 +96,6 @@ health_path_for_service() { | @@ -96,7 +96,6 @@ health_path_for_service() { | ||
| 96 | local service="$1" | 96 | local service="$1" |
| 97 | case "${service}" in | 97 | case "${service}" in |
| 98 | backend|indexer|embedding|translator|reranker|tei) echo "/health" ;; | 98 | backend|indexer|embedding|translator|reranker|tei) echo "/health" ;; |
| 99 | - frontend) echo "/" ;; | ||
| 100 | *) echo "" ;; | 99 | *) echo "" ;; |
| 101 | esac | 100 | esac |
| 102 | } | 101 | } |
| @@ -583,6 +582,8 @@ status_one() { | @@ -583,6 +582,8 @@ status_one() { | ||
| 583 | port="$(get_port "${service}")" | 582 | port="$(get_port "${service}")" |
| 584 | local running="no" | 583 | local running="no" |
| 585 | local pid_info="-" | 584 | local pid_info="-" |
| 585 | + local health="down" | ||
| 586 | + local health_body="" | ||
| 586 | 587 | ||
| 587 | if [ "${service}" = "tei" ]; then | 588 | if [ "${service}" = "tei" ]; then |
| 588 | local cid | 589 | local cid |
| @@ -591,8 +592,22 @@ status_one() { | @@ -591,8 +592,22 @@ status_one() { | ||
| 591 | if [ -n "${cid}" ]; then | 592 | if [ -n "${cid}" ]; then |
| 592 | running="yes" | 593 | running="yes" |
| 593 | pid_info="${cid:0:12}" | 594 | pid_info="${cid:0:12}" |
| 595 | + # TEI: container 级别 running 后再尝试 HTTP /health | ||
| 596 | + local path | ||
| 597 | + path="$(health_path_for_service "${service}")" | ||
| 598 | + if [ -n "${port}" ] && [ -n "${path}" ]; then | ||
| 599 | + if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then | ||
| 600 | + health="ok" | ||
| 601 | + else | ||
| 602 | + health="fail" | ||
| 603 | + fi | ||
| 604 | + fi | ||
| 605 | + fi | ||
| 606 | + if [ -n "${health_body}" ]; then | ||
| 607 | + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s body=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" "${health_body}" | ||
| 608 | + else | ||
| 609 | + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" | ||
| 594 | fi | 610 | fi |
| 595 | - printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" | ||
| 596 | return | 611 | return |
| 597 | fi | 612 | fi |
| 598 | 613 | ||
| @@ -604,7 +619,26 @@ status_one() { | @@ -604,7 +619,26 @@ status_one() { | ||
| 604 | pid_info="$(lsof -ti:${port} 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo "-")" | 619 | pid_info="$(lsof -ti:${port} 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo "-")" |
| 605 | fi | 620 | fi |
| 606 | 621 | ||
| 607 | - printf "%-10s running=%-3s port=%-6s pid=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" | 622 | + if [ "${running}" = "yes" ]; then |
| 623 | + local path | ||
| 624 | + path="$(health_path_for_service "${service}")" | ||
| 625 | + if [ -n "${port}" ] && [ -n "${path}" ]; then | ||
| 626 | + if health_body="$(curl -fsS "http://127.0.0.1:${port}${path}" 2>/dev/null)"; then | ||
| 627 | + health="ok" | ||
| 628 | + else | ||
| 629 | + health="fail" | ||
| 630 | + fi | ||
| 631 | + else | ||
| 632 | + # 没有 HTTP 健康检查端点(如 cnclip),运行即可视为 ok | ||
| 633 | + health="ok" | ||
| 634 | + fi | ||
| 635 | + fi | ||
| 636 | + | ||
| 637 | + if [ -n "${health_body}" ]; then | ||
| 638 | + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s body=%s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" "${health_body}" | ||
| 639 | + else | ||
| 640 | + printf "%-10s running=%-3s port=%-6s pid=%s health=%-4s\n" "${service}" "${running}" "${port:--}" "${pid_info}" "${health}" | ||
| 641 | + fi | ||
| 608 | } | 642 | } |
| 609 | 643 | ||
| 610 | service_is_running() { | 644 | service_is_running() { |