From 7becdcbaf6d2be1a396a00c099ac713965d20e59 Mon Sep 17 00:00:00 2001 From: OG T Date: Sun, 5 Apr 2026 02:29:21 +0800 Subject: [PATCH] =?UTF-8?q?ops(scripts):=20=E5=8A=A0=E5=85=A5=20deploy-ale?= =?UTF-8?q?rts.sh=20=E8=87=AA=E5=8B=95=E9=83=A8=E7=BD=B2=20Prometheus=20?= =?UTF-8?q?=E8=A6=8F=E5=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 功能: 驗證 YAML → 備份 → scp → reload → 驗證規則數+關鍵規則 同步啟用 Prometheus --web.enable-lifecycle (110 docker-compose.yml) 部署驗證: 28 條規則全部 ✅,關鍵規則 SentryDown/HarborDown/GiteaDown/OpenClawDown/AlertmanagerDown/AlertChainUnhealthy 已上線 Co-Authored-By: Claude Sonnet 4.6 --- scripts/ops/deploy-alerts.sh | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 scripts/ops/deploy-alerts.sh diff --git a/scripts/ops/deploy-alerts.sh b/scripts/ops/deploy-alerts.sh new file mode 100755 index 00000000..d016b511 --- /dev/null +++ b/scripts/ops/deploy-alerts.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# scripts/ops/deploy-alerts.sh +# 部署統一告警規則到 110 Prometheus +# 2026-04-05 Claude Code: Sprint 1 自動化部署 +# 用法: bash scripts/ops/deploy-alerts.sh [--dry-run] + +set -eo pipefail + +RULES_FILE="ops/monitoring/alerts-unified.yml" +TARGET_HOST="192.168.0.110" +TARGET_PATH="/home/wooo/monitoring/alerts.yml" +PROMETHEUS_URL="http://${TARGET_HOST}:9090" +DRY_RUN="${1:-}" + +log() { echo "[$(date '+%H:%M:%S')] $*"; } + +# 確認檔案存在 +if [ ! -f "$RULES_FILE" ]; then + echo "ERROR: $RULES_FILE not found" + exit 1 +fi + +# 驗證 YAML 語法 +python3 -c "import yaml; yaml.safe_load(open('$RULES_FILE'))" || { echo "ERROR: YAML syntax error"; exit 1; } +log "✅ YAML 語法驗證通過" + +# Dry run 模式 +if [ "$DRY_RUN" = "--dry-run" ]; then + log "DRY RUN: would deploy $RULES_FILE to ${TARGET_HOST}:${TARGET_PATH}" + RULE_COUNT=$(grep -c "alert:" "$RULES_FILE") + log "規則數量: $RULE_COUNT 條" + exit 0 +fi + +# 備份現有規則 +ssh wooo@${TARGET_HOST} "cp ${TARGET_PATH} ${TARGET_PATH}.bak.\$(date +%Y%m%d%H%M%S) 2>/dev/null || true" +log "✅ 現有規則已備份" + +# 部署新規則 +scp "$RULES_FILE" wooo@${TARGET_HOST}:${TARGET_PATH} +log "✅ 規則已複製到 ${TARGET_HOST}" + +# Reload Prometheus +ssh wooo@${TARGET_HOST} "curl -s -X POST ${PROMETHEUS_URL}/-/reload" +sleep 3 + +# 驗證規則數量 +RULE_COUNT=$(ssh wooo@${TARGET_HOST} "curl -s ${PROMETHEUS_URL}/api/v1/rules | python3 -c \"import sys,json; r=json.load(sys.stdin); print(sum(len(g['rules']) for g in r['data']['groups']))\"") +log "Prometheus 已載入 ${RULE_COUNT} 條規則" + +if [ "$RULE_COUNT" -lt 25 ]; then + echo "ERROR: 規則數量異常 ($RULE_COUNT < 25),請檢查" + exit 1 +fi + +# 驗證關鍵規則存在 +KEY_RULES=("SentryDown" "HarborDown" "GiteaDown" "OpenClawDown" "AlertmanagerDown" "AlertChainUnhealthy") +for rule in "${KEY_RULES[@]}"; do + EXISTS=$(ssh wooo@${TARGET_HOST} "curl -s ${PROMETHEUS_URL}/api/v1/rules | python3 -c \"import sys,json; r=json.load(sys.stdin); names=[x['name'] for g in r['data']['groups'] for x in g['rules']]; print('OK' if '$rule' in names else 'MISSING')\"") + if [ "$EXISTS" = "OK" ]; then + log "✅ $rule" + else + echo "❌ $rule 未找到" + exit 1 + fi +done + +log "🎉 部署完成!所有關鍵規則已生效"