diff --git a/ops/alertmanager/alertmanager.yml b/ops/alertmanager/alertmanager.yml new file mode 100644 index 00000000..b3f59768 --- /dev/null +++ b/ops/alertmanager/alertmanager.yml @@ -0,0 +1,87 @@ +# AWOOOI Alertmanager 配置 +# 2026-04-05 Claude Code: 修正 webhook URL +# 修正前: http://192.168.0.188:8088/api/v1/webhook/alertmanager (OpenClaw,舊系統,錯誤) +# 修正後: http://192.168.0.121:32334/api/v1/webhooks/alertmanager (AWOOOI API,複數,正確) +# 根據 feedback_alertmanager_awoooi_flow.md 鐵律 +# 2026-04-09 Claude Sonnet 4.6 Asia/Taipei: 新增 Telegram Fallback (ADR-035) +# 架構: awoooi-webhook (主路徑) + telegram-direct (fallback,獨立路由) +# 當 AWOOOI API 無法回應時,critical 告警直接送 Telegram Bot API +# ⚠️ bot_token/chat_id 部署時由 secrets 替換,此檔為模板 + +global: + resolve_timeout: 5m + +route: + receiver: 'awoooi-webhook' + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + routes: + - match: + severity: critical + receiver: 'awoooi-webhook' + group_wait: 10s + # continue:true 讓 critical 同時送 telegram-direct (fallback) + continue: true + - match: + severity: critical + receiver: 'telegram-direct' + group_wait: 10s + - match: + severity: warning + receiver: 'awoooi-webhook' + - match_re: + alertname: 'Zombie.*|Container.*' + receiver: 'awoooi-webhook' + group_wait: 1m + +receivers: + # 主路徑: AWOOOI API 處理所有告警 (AI 分析 + 去重 + Telegram) + - name: 'awoooi-webhook' + webhook_configs: + - url: 'http://192.168.0.121:32334/api/v1/webhooks/alertmanager' + send_resolved: true + + # Fallback 路徑: AWOOOI API 掛掉時,critical 告警直接送 Telegram + # 只有 critical severity 走此路徑(避免 warning 雙重通知) + - name: 'telegram-direct' + telegram_configs: + - bot_token: 'TELEGRAM_BOT_TOKEN_PLACEHOLDER' + chat_id: TELEGRAM_CHAT_ID_PLACEHOLDER + parse_mode: 'HTML' + message: | + 🚨 [Alertmanager Fallback] + {{ range .Alerts }} + ├ {{ .Labels.alertname }} + ├ 嚴重度: {{ .Labels.severity }} + ├ 主機: {{ .Labels.host }}{{ .Labels.instance }} + └ {{ .Annotations.summary }} + {{ end }} + ⚠️ AWOOOI API 可能離線,此為直接告警 + send_resolved: false + +inhibit_rules: + - source_match: + severity: critical + target_match: + severity: warning + equal: ['alertname', 'instance'] + - source_match: + alertname: HostDown + target_match_re: + alertname: 'HostHighCpuLoad|HostOutOfMemory|HostOutOfDiskSpace' + equal: ['host'] + - source_match: + alertname: KubeNodeNotReady + target_match_re: + alertname: 'KubePodCrashLooping|KubePodNotReady|KubeDeploymentReplicasMismatch' + equal: ['node'] + - source_match: + alertname: PostgreSQLDown + target_match: + alertname: PostgreSQLHighConnections + - source_match: + alertname: RedisDown + target_match: + alertname: RedisMemoryHigh