Files
ewoooc/scripts/auto-repair/master-auto-repair.sh
ogt 1b4f3a7bbe
Some checks failed
CD Pipeline / deploy (push) Failing after 59s
feat: EwoooC 初始化 — 完整專案推版至 Gitea
- 建立 Gitea Actions CD pipeline (.gitea/workflows/cd.yaml)
- 部署模式: rsync Python 檔案至 188 → docker restart (volume mount)
- Dockerfile/requirements 變動時自動重建 Docker image
- 部署通知: Telegram (開始/成功/失敗)
- 健康檢查: https://mo.wooo.work/health (最多 5 次重試)
- 同步最新 CLAUDE.md / ADR-008 / memory (2026-04-19)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 01:21:13 +08:00

114 lines
3.2 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# 主自動修復協調腳本 (UAT + GCP 雙環境)
# 功能:協調所有自動修復腳本,確保 UAT 和 GCP 系統穩定性
# 執行位置UAT 主機 (192.168.0.110)
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
LOG_FILE="/var/log/master_auto_repair.log"
LOCK_FILE="/tmp/master_auto_repair.lock"
# 確保 gcloud 在 PATH 中 (Cron 不讀取 .bashrc)
export PATH="$PATH:/home/wooo/google-cloud-sdk/bin"
export KUBECONFIG="/home/wooo/.kube/config"
# 環境標記
ENV_UAT="🟦 UAT"
ENV_GCP="🟥 GCP"
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [Master] $1" | tee -a "$LOG_FILE"
}
# 防止重複執行
acquire_lock() {
exec 200>"$LOCK_FILE"
if ! flock -n 200; then
log "另一個實例正在運行,退出"
exit 0
fi
}
# 執行子腳本
run_script() {
local name=$1
local script=$2
local timeout=${3:-300} # 預設超時 5 分鐘
if [ -x "$script" ]; then
log "執行: $name"
timeout "$timeout" bash "$script" 2>&1 | while read line; do
log "[$name] $line"
done
local exit_code=$?
if [ $exit_code -eq 124 ]; then
log "警告: $name 執行超時"
fi
else
log "警告: $script 不存在或不可執行"
fi
}
# 檢查 gcloud 是否可用 (GCP 腳本需要)
check_gcloud() {
if command -v gcloud &> /dev/null; then
return 0
else
log "警告: gcloud 未安裝,跳過 GCP 腳本"
return 1
fi
}
main() {
acquire_lock
log "========================================="
log "開始主自動修復流程 (UAT + GCP 雙環境)"
log "========================================="
local current_minute=$(date +%M)
# ============================================
# UAT 環境修復
# ============================================
log "--- $ENV_UAT 環境檢查 ---"
# 1. UAT OOM 處理 (每 15 分鐘)
if [ $((current_minute % 15)) -eq 0 ]; then
run_script "UAT OOM Handler" "$SCRIPT_DIR/oom-handler.sh" 120
fi
# 2. UAT PostgreSQL 維護 (每 30 分鐘)
if [ $((current_minute % 30)) -eq 0 ]; then
run_script "UAT PostgreSQL Repair" "$SCRIPT_DIR/postgres-repair.sh" 180
fi
# 3. UAT 自動回滾檢測 (每 5 分鐘)
run_script "UAT Auto Rollback" "$SCRIPT_DIR/auto-rollback.sh" 120
# ============================================
# GCP 環境修復 (透過 gcloud SSH)
# ============================================
if check_gcloud; then
log "--- $ENV_GCP 環境檢查 ---"
# 4. GCP OOM 處理 (每 15 分鐘)
if [ $((current_minute % 15)) -eq 0 ]; then
run_script "GCP OOM Handler" "$SCRIPT_DIR/oom-handler-gcp.sh" 180
fi
# 5. GCP PostgreSQL 維護 (每 30 分鐘)
if [ $((current_minute % 30)) -eq 0 ]; then
run_script "GCP PostgreSQL Repair" "$SCRIPT_DIR/postgres-repair-gcp.sh" 240
fi
# 6. GCP 自動回滾檢測 (每 5 分鐘)
run_script "GCP Auto Rollback" "$SCRIPT_DIR/auto-rollback-gcp.sh" 180
fi
log "========================================="
log "主自動修復流程完成 (UAT + GCP)"
log "========================================="
}
main "$@"