#!/usr/bin/env bash # 2026-04-28 ogt + Claude Opus 4.7: P2-2 Session 啟動健康驗證 # 來源:tool-expert 統一治理方案 # 目的:每次 Claude session 啟動時快速確認 5 主機 + 關鍵服務可達 # 純 read-only(curl + ssh -o BatchMode),不修改任何狀態 # # 用法: # bash scripts/health_check_session.sh # 或加 alias: alias awoooi-health='bash ~/awoooi/scripts/health_check_session.sh' set -uo pipefail # 不要 -e,個別 check 失敗不阻擋全部 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' NC='\033[0m' ok() { printf "${GREEN}[OK]${NC} %s\n" "$1"; } fail() { printf "${RED}[FAIL]${NC} %s\n" "$1"; } warn() { printf "${YELLOW}[WARN]${NC} %s\n" "$1"; } check_url() { local name=$1 url=$2 local code code=$(curl -sk --max-time 3 -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000") if [[ "$code" =~ ^[23] ]]; then ok "$name → $url ($code)" return 0 elif [[ "$code" =~ ^[45] ]]; then warn "$name → $url ($code, 服務有回應但非 2xx/3xx)" return 0 else fail "$name → $url (unreachable)" return 1 fi } check_ssh() { local name=$1 host=$2 if ssh -o ConnectTimeout=3 -o BatchMode=yes -o StrictHostKeyChecking=accept-new \ "$host" "echo ok" 2>/dev/null | grep -q ok; then ok "SSH $name ($host)" return 0 else fail "SSH $name ($host) — 無法連線(timeout / 認證失敗 / 主機不可達)" return 1 fi } ROOT="$(cd "$(dirname "$0")/.." && pwd)" TS="$(date '+%Y-%m-%d %H:%M %Z')" echo "==========================================" echo "AWOOOI Session Health Check $TS" echo "==========================================" echo "" echo "--- K8s 控制平面 ---" check_url "K3s VIP API" "https://192.168.0.125:6443/healthz" check_url "ArgoCD (121)" "https://192.168.0.121:30443" echo "" echo "--- AI 推理層 ---" check_url "Ollama 111 GPU" "http://192.168.0.111:11434/api/tags" echo "" echo "--- 觀測層 ---" check_url "Prometheus 110" "http://192.168.0.110:9090/-/healthy" check_url "Alertmanager 110" "http://192.168.0.110:9093/-/healthy" check_url "Gitea 110" "http://192.168.0.110:3001" check_url "Langfuse 110" "http://192.168.0.110:3100" echo "" echo "--- AWOOOI 核心服務 (prod NodePort) ---" check_url "AWOOOI API (125)" "http://192.168.0.125:32334/api/v1/health" echo "" echo "--- SSH 連通 ---" check_ssh "awoooi-devops (110)" "wooo@192.168.0.110" check_ssh "k3s-1 (120)" "wooo@192.168.0.120" check_ssh "k3s-2 (121)" "wooo@192.168.0.121" check_ssh "ollama-111-gpu (ProxyJump 110)" "ollama-111-gpu" echo "" echo "--- Config Drift Check ---" if [ -x "$ROOT/scripts/check_config_drift.py" ]; then python3 "$ROOT/scripts/check_config_drift.py" || warn "config drift detected (見上方 [DRIFT] 行)" else warn "drift checker 不存在 ($ROOT/scripts/check_config_drift.py)" fi echo "" echo "--- Git 狀態 ---" if [ -d "$ROOT/.git" ]; then cd "$ROOT" || exit branch=$(git branch --show-current 2>/dev/null || echo "") upstream_diff=$(git rev-list --count "@{u}..HEAD" 2>/dev/null || echo "?") echo " 分支: $branch (本地超前上游 $upstream_diff 個 commit)" if ! git diff --quiet 2>/dev/null; then warn " 有未 commit 的變更(git status 自查)" else ok " 工作目錄 clean" fi fi echo "" echo "==========================================" echo "Session Health Check 結束" echo "=========================================="