Some checks failed
CD Pipeline / deploy (push) Failing after 59s
- 建立 Gitea Actions CD pipeline (.gitea/workflows/cd.yaml) - 部署模式: rsync Python 檔案至 188 → docker restart (volume mount) - Dockerfile/requirements 變動時自動重建 Docker image - 部署通知: Telegram (開始/成功/失敗) - 健康檢查: https://mo.wooo.work/health (最多 5 次重試) - 同步最新 CLAUDE.md / ADR-008 / memory (2026-04-19) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
114 lines
3.2 KiB
Bash
Executable File
114 lines
3.2 KiB
Bash
Executable File
#!/bin/bash
|
||
# 主自動修復協調腳本 (UAT + GCP 雙環境)
|
||
# 功能:協調所有自動修復腳本,確保 UAT 和 GCP 系統穩定性
|
||
# 執行位置:UAT 主機 (192.168.0.110)
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||
LOG_FILE="/var/log/master_auto_repair.log"
|
||
LOCK_FILE="/tmp/master_auto_repair.lock"
|
||
|
||
# 確保 gcloud 在 PATH 中 (Cron 不讀取 .bashrc)
|
||
export PATH="$PATH:/home/wooo/google-cloud-sdk/bin"
|
||
export KUBECONFIG="/home/wooo/.kube/config"
|
||
|
||
# 環境標記
|
||
ENV_UAT="🟦 UAT"
|
||
ENV_GCP="🟥 GCP"
|
||
|
||
log() {
|
||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [Master] $1" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
# 防止重複執行
|
||
acquire_lock() {
|
||
exec 200>"$LOCK_FILE"
|
||
if ! flock -n 200; then
|
||
log "另一個實例正在運行,退出"
|
||
exit 0
|
||
fi
|
||
}
|
||
|
||
# 執行子腳本
|
||
run_script() {
|
||
local name=$1
|
||
local script=$2
|
||
local timeout=${3:-300} # 預設超時 5 分鐘
|
||
|
||
if [ -x "$script" ]; then
|
||
log "執行: $name"
|
||
timeout "$timeout" bash "$script" 2>&1 | while read line; do
|
||
log "[$name] $line"
|
||
done
|
||
local exit_code=$?
|
||
if [ $exit_code -eq 124 ]; then
|
||
log "警告: $name 執行超時"
|
||
fi
|
||
else
|
||
log "警告: $script 不存在或不可執行"
|
||
fi
|
||
}
|
||
|
||
# 檢查 gcloud 是否可用 (GCP 腳本需要)
|
||
check_gcloud() {
|
||
if command -v gcloud &> /dev/null; then
|
||
return 0
|
||
else
|
||
log "警告: gcloud 未安裝,跳過 GCP 腳本"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
main() {
|
||
acquire_lock
|
||
|
||
log "========================================="
|
||
log "開始主自動修復流程 (UAT + GCP 雙環境)"
|
||
log "========================================="
|
||
|
||
local current_minute=$(date +%M)
|
||
|
||
# ============================================
|
||
# UAT 環境修復
|
||
# ============================================
|
||
log "--- $ENV_UAT 環境檢查 ---"
|
||
|
||
# 1. UAT OOM 處理 (每 15 分鐘)
|
||
if [ $((current_minute % 15)) -eq 0 ]; then
|
||
run_script "UAT OOM Handler" "$SCRIPT_DIR/oom-handler.sh" 120
|
||
fi
|
||
|
||
# 2. UAT PostgreSQL 維護 (每 30 分鐘)
|
||
if [ $((current_minute % 30)) -eq 0 ]; then
|
||
run_script "UAT PostgreSQL Repair" "$SCRIPT_DIR/postgres-repair.sh" 180
|
||
fi
|
||
|
||
# 3. UAT 自動回滾檢測 (每 5 分鐘)
|
||
run_script "UAT Auto Rollback" "$SCRIPT_DIR/auto-rollback.sh" 120
|
||
|
||
# ============================================
|
||
# GCP 環境修復 (透過 gcloud SSH)
|
||
# ============================================
|
||
if check_gcloud; then
|
||
log "--- $ENV_GCP 環境檢查 ---"
|
||
|
||
# 4. GCP OOM 處理 (每 15 分鐘)
|
||
if [ $((current_minute % 15)) -eq 0 ]; then
|
||
run_script "GCP OOM Handler" "$SCRIPT_DIR/oom-handler-gcp.sh" 180
|
||
fi
|
||
|
||
# 5. GCP PostgreSQL 維護 (每 30 分鐘)
|
||
if [ $((current_minute % 30)) -eq 0 ]; then
|
||
run_script "GCP PostgreSQL Repair" "$SCRIPT_DIR/postgres-repair-gcp.sh" 240
|
||
fi
|
||
|
||
# 6. GCP 自動回滾檢測 (每 5 分鐘)
|
||
run_script "GCP Auto Rollback" "$SCRIPT_DIR/auto-rollback-gcp.sh" 180
|
||
fi
|
||
|
||
log "========================================="
|
||
log "主自動修復流程完成 (UAT + GCP)"
|
||
log "========================================="
|
||
}
|
||
|
||
main "$@"
|