#!/bin/bash # scripts/ops/deploy-alerts.sh # 部署統一告警規則到 110 Prometheus # 2026-04-05 Claude Code: Sprint 1 自動化部署 # 用法: bash scripts/ops/deploy-alerts.sh [--dry-run] set -eo pipefail RULES_FILE="ops/monitoring/alerts-unified.yml" TARGET_HOST="192.168.0.110" TARGET_PATH="/home/wooo/monitoring/alerts.yml" PROMETHEUS_URL="http://${TARGET_HOST}:9090" DRY_RUN="${1:-}" log() { echo "[$(date '+%H:%M:%S')] $*"; } # 確認檔案存在 if [ ! -f "$RULES_FILE" ]; then echo "ERROR: $RULES_FILE not found" exit 1 fi # 驗證 YAML 語法 python3 -c "import yaml; yaml.safe_load(open('$RULES_FILE'))" || { echo "ERROR: YAML syntax error"; exit 1; } log "✅ YAML 語法驗證通過" # Dry run 模式 if [ "$DRY_RUN" = "--dry-run" ]; then log "DRY RUN: would deploy $RULES_FILE to ${TARGET_HOST}:${TARGET_PATH}" RULE_COUNT=$(grep -c "alert:" "$RULES_FILE") log "規則數量: $RULE_COUNT 條" exit 0 fi # 備份現有規則 ssh wooo@${TARGET_HOST} "cp ${TARGET_PATH} ${TARGET_PATH}.bak.\$(date +%Y%m%d%H%M%S) 2>/dev/null || true" log "✅ 現有規則已備份" # 部署新規則 scp "$RULES_FILE" wooo@${TARGET_HOST}:${TARGET_PATH} log "✅ 規則已複製到 ${TARGET_HOST}" # Reload Prometheus ssh wooo@${TARGET_HOST} "curl -s -X POST ${PROMETHEUS_URL}/-/reload" sleep 3 # 驗證規則數量 RULE_COUNT=$(ssh wooo@${TARGET_HOST} "curl -s ${PROMETHEUS_URL}/api/v1/rules | python3 -c \"import sys,json; r=json.load(sys.stdin); print(sum(len(g['rules']) for g in r['data']['groups']))\"") log "Prometheus 已載入 ${RULE_COUNT} 條規則" if [ "$RULE_COUNT" -lt 25 ]; then echo "ERROR: 規則數量異常 ($RULE_COUNT < 25),請檢查" exit 1 fi # 驗證關鍵規則存在 KEY_RULES=("SentryDown" "HarborDown" "GiteaDown" "OpenClawDown" "AlertmanagerDown" "AlertChainUnhealthy") for rule in "${KEY_RULES[@]}"; do EXISTS=$(ssh wooo@${TARGET_HOST} "curl -s ${PROMETHEUS_URL}/api/v1/rules | python3 -c \"import sys,json; r=json.load(sys.stdin); names=[x['name'] for g in r['data']['groups'] for x in g['rules']]; print('OK' if '$rule' in names else 'MISSING')\"") if [ "$EXISTS" = "OK" ]; then log "✅ $rule" else echo "❌ $rule 未找到" exit 1 fi done log "🎉 部署完成!所有關鍵規則已生效"