docs(ops): add executable post-start quick check [skip ci]
This commit is contained in:
273
scripts/reboot-recovery/post-start-quick-check.sh
Executable file
273
scripts/reboot-recovery/post-start-quick-check.sh
Executable file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
|
||||
# One-entry read-only post-reboot check. This wrapper intentionally delegates
|
||||
# deep checks to the existing recovery scripts and does not restart, patch,
|
||||
# delete, import, reload, or write runtime state.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
SSH_CONNECT_TIMEOUT="${SSH_CONNECT_TIMEOUT:-6}"
|
||||
RUN_COLD_START=1
|
||||
RUN_MOMO=1
|
||||
RUN_BACKUP=1
|
||||
RUN_ROUTES=1
|
||||
RUN_CPU=1
|
||||
NO_COLOR_FLAG=0
|
||||
|
||||
PASS_COUNT=0
|
||||
WARN_COUNT=0
|
||||
BLOCKED_COUNT=0
|
||||
|
||||
HOSTS=(
|
||||
"192.168.0.110"
|
||||
"192.168.0.120"
|
||||
"192.168.0.121"
|
||||
"192.168.0.188"
|
||||
)
|
||||
|
||||
ROUTES=(
|
||||
"https://awoooi.wooo.work/api/v1/health"
|
||||
"https://awoooi.wooo.work/zh-TW/iwooos"
|
||||
"https://mo.wooo.work/health"
|
||||
"https://stock.wooo.work/"
|
||||
)
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: post-start-quick-check.sh [options]
|
||||
|
||||
Read-only post-reboot quick check for 110 / 120 / 121 / 188.
|
||||
|
||||
Options:
|
||||
--skip-cold-start Do not run full-stack-cold-start-check.sh.
|
||||
--skip-momo Do not run momo-drive-token-source-recovery-preflight.sh.
|
||||
--skip-backup Do not run /backup/scripts/backup-status.sh on 110.
|
||||
--skip-routes Do not curl public route smoke targets.
|
||||
--skip-cpu Do not read 110 CPU / process summary.
|
||||
--no-color Disable ANSI color.
|
||||
-h, --help Show this help.
|
||||
|
||||
Exit codes:
|
||||
0 = no blockers.
|
||||
1 = warnings only.
|
||||
2 = blockers observed.
|
||||
|
||||
This script never reads token content and never writes runtime state.
|
||||
USAGE
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--skip-cold-start)
|
||||
RUN_COLD_START=0
|
||||
;;
|
||||
--skip-momo)
|
||||
RUN_MOMO=0
|
||||
;;
|
||||
--skip-backup)
|
||||
RUN_BACKUP=0
|
||||
;;
|
||||
--skip-routes)
|
||||
RUN_ROUTES=0
|
||||
;;
|
||||
--skip-cpu)
|
||||
RUN_CPU=0
|
||||
;;
|
||||
--no-color)
|
||||
NO_COLOR_FLAG=1
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
printf 'Unknown argument: %s\n' "$1" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ -n "${NO_COLOR:-}" || "$NO_COLOR_FLAG" -eq 1 ]]; then
|
||||
RED=""
|
||||
GREEN=""
|
||||
YELLOW=""
|
||||
BLUE=""
|
||||
NC=""
|
||||
else
|
||||
RED=$'\033[0;31m'
|
||||
GREEN=$'\033[0;32m'
|
||||
YELLOW=$'\033[1;33m'
|
||||
BLUE=$'\033[0;34m'
|
||||
NC=$'\033[0m'
|
||||
fi
|
||||
|
||||
section() {
|
||||
printf '\n%s=== %s ===%s\n' "$BLUE" "$1" "$NC"
|
||||
}
|
||||
|
||||
ok() {
|
||||
PASS_COUNT=$((PASS_COUNT + 1))
|
||||
printf '%sOK%s %s\n' "$GREEN" "$NC" "$*"
|
||||
}
|
||||
|
||||
warn() {
|
||||
WARN_COUNT=$((WARN_COUNT + 1))
|
||||
printf '%sWARN%s %s\n' "$YELLOW" "$NC" "$*"
|
||||
}
|
||||
|
||||
blocked() {
|
||||
BLOCKED_COUNT=$((BLOCKED_COUNT + 1))
|
||||
printf '%sBLOCKED%s %s\n' "$RED" "$NC" "$*"
|
||||
}
|
||||
|
||||
ssh_read() {
|
||||
local user_host="$1"
|
||||
local command="$2"
|
||||
ssh -o BatchMode=yes -o ConnectTimeout="$SSH_CONNECT_TIMEOUT" "$user_host" "$command"
|
||||
}
|
||||
|
||||
run_and_capture() {
|
||||
local label="$1"
|
||||
shift
|
||||
local tmp
|
||||
tmp="$(mktemp -t post-start-quick-check.XXXXXX)"
|
||||
if "$@" >"$tmp" 2>&1; then
|
||||
ok "$label"
|
||||
cat "$tmp"
|
||||
rm -f "$tmp"
|
||||
return 0
|
||||
fi
|
||||
local rc=$?
|
||||
cat "$tmp"
|
||||
rm -f "$tmp"
|
||||
return "$rc"
|
||||
}
|
||||
|
||||
section "主機 / SSH"
|
||||
for host in "${HOSTS[@]}"; do
|
||||
if ping -c 1 -W 1 "$host" >/dev/null 2>&1; then
|
||||
ok "PING_OK $host"
|
||||
else
|
||||
blocked "PING_FAIL $host"
|
||||
fi
|
||||
|
||||
if nc -z -w 2 "$host" 22 >/dev/null 2>&1; then
|
||||
ok "SSH_PORT_OK $host"
|
||||
else
|
||||
blocked "SSH_PORT_FAIL $host"
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$RUN_COLD_START" -eq 1 ]]; then
|
||||
section "Cold-start scorecard"
|
||||
cold_tmp="$(mktemp -t post-start-cold-start.XXXXXX)"
|
||||
if bash "$ROOT_DIR/scripts/reboot-recovery/full-stack-cold-start-check.sh" --monitor-read-only --no-color --watch --interval 1 --max-attempts 1 >"$cold_tmp" 2>&1; then
|
||||
ok "cold-start command exited 0"
|
||||
else
|
||||
blocked "cold-start command returned non-zero"
|
||||
fi
|
||||
cat "$cold_tmp"
|
||||
cold_summary="$(grep -E 'PASS=[0-9]+ WARN=[0-9]+ BLOCKED=[0-9]+' "$cold_tmp" | tail -n 1 || true)"
|
||||
if [[ -n "$cold_summary" ]]; then
|
||||
ok "cold-start summary: $cold_summary"
|
||||
else
|
||||
warn "cold-start summary not found"
|
||||
fi
|
||||
rm -f "$cold_tmp"
|
||||
fi
|
||||
|
||||
if [[ "$RUN_MOMO" -eq 1 ]]; then
|
||||
section "MOMO freshness"
|
||||
momo_tmp="$(mktemp -t post-start-momo.XXXXXX)"
|
||||
bash "$ROOT_DIR/scripts/reboot-recovery/momo-drive-token-source-recovery-preflight.sh" >"$momo_tmp" 2>&1
|
||||
momo_rc=$?
|
||||
cat "$momo_tmp"
|
||||
case "$momo_rc" in
|
||||
0)
|
||||
ok "MOMO preflight clean"
|
||||
;;
|
||||
1)
|
||||
warn "MOMO preflight has warnings"
|
||||
;;
|
||||
*)
|
||||
blocked "MOMO preflight has blockers"
|
||||
;;
|
||||
esac
|
||||
grep -E 'MOMO_DRIVE_TOKEN_SOURCE_PREFLIGHT|MOMO_HEALTH_VERSION|DB_MONTHLY_SYNC|DB_DAILY_FRESHNESS|DB_LATEST_DAILY_IMPORT_JOB' "$momo_tmp" || true
|
||||
rm -f "$momo_tmp"
|
||||
fi
|
||||
|
||||
if [[ "$RUN_BACKUP" -eq 1 ]]; then
|
||||
section "Backup / offsite / escrow"
|
||||
backup_tmp="$(mktemp -t post-start-backup.XXXXXX)"
|
||||
if ssh_read "wooo@192.168.0.110" '/backup/scripts/backup-status.sh --no-notify --no-refresh' >"$backup_tmp" 2>&1; then
|
||||
ok "backup-status readback succeeded"
|
||||
else
|
||||
blocked "backup-status readback failed"
|
||||
fi
|
||||
cat "$backup_tmp"
|
||||
if grep -Eq 'core_blockers=0|CORE_BLOCKERS[ =]0' "$backup_tmp"; then
|
||||
ok "backup core blockers are 0"
|
||||
else
|
||||
warn "backup core blocker summary not confirmed"
|
||||
fi
|
||||
if grep -Eq 'escrow_missing=0|ESCROW_MISSING_COUNT[ =]0' "$backup_tmp"; then
|
||||
ok "credential escrow missing is 0"
|
||||
elif grep -Eq 'escrow_missing=[1-9]|ESCROW_MISSING_COUNT[ =][1-9]' "$backup_tmp"; then
|
||||
warn "credential escrow still missing; DR_COMPLETE is forbidden"
|
||||
else
|
||||
warn "credential escrow count not found"
|
||||
fi
|
||||
rm -f "$backup_tmp"
|
||||
fi
|
||||
|
||||
if [[ "$RUN_ROUTES" -eq 1 ]]; then
|
||||
section "Public routes"
|
||||
for url in "${ROUTES[@]}"; do
|
||||
code="$(curl -k -sS -o /dev/null -w '%{http_code}' --max-time 12 "$url" 2>/dev/null || true)"
|
||||
case "$code" in
|
||||
2*|3*)
|
||||
ok "$code $url"
|
||||
;;
|
||||
*)
|
||||
blocked "${code:-curl_failed} $url"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ "$RUN_CPU" -eq 1 ]]; then
|
||||
section "110 CPU / process attribution"
|
||||
cpu_tmp="$(mktemp -t post-start-cpu.XXXXXX)"
|
||||
if ssh_read "wooo@192.168.0.110" 'uptime; vmstat 1 5; ps -eo pid,ppid,pgid,stat,pcpu,pmem,comm,args --sort=-pcpu | head -25' >"$cpu_tmp" 2>&1; then
|
||||
ok "110 CPU/process readback succeeded"
|
||||
else
|
||||
warn "110 CPU/process readback failed"
|
||||
fi
|
||||
cat "$cpu_tmp"
|
||||
if grep -Eiq 'chrome|chromium|playwright' "$cpu_tmp"; then
|
||||
warn "browser/smoke process is visible; classify orphan vs active parent before action"
|
||||
fi
|
||||
if grep -Eiq 'gitea|actions|runner|npm|pnpm|pytest|pip-audit' "$cpu_tmp"; then
|
||||
ok "active CI/build/test load is visible"
|
||||
fi
|
||||
rm -f "$cpu_tmp"
|
||||
fi
|
||||
|
||||
section "總結"
|
||||
printf 'POST_START_QUICK_CHECK PASS=%s WARN=%s BLOCKED=%s\n' "$PASS_COUNT" "$WARN_COUNT" "$BLOCKED_COUNT"
|
||||
|
||||
if [[ "$BLOCKED_COUNT" -gt 0 ]]; then
|
||||
printf 'RESULT=BLOCKED\n'
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ "$WARN_COUNT" -gt 0 ]]; then
|
||||
printf 'RESULT=DEGRADED\n'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf 'RESULT=GREEN\n'
|
||||
exit 0
|
||||
Reference in New Issue
Block a user