From 7e327c806efc981f5147fd7f982cf9fbcfb862e2 Mon Sep 17 00:00:00 2001 From: OG T Date: Thu, 9 Apr 2026 09:04:46 +0800 Subject: [PATCH] =?UTF-8?q?feat(alertmanager):=20Telegram=20Fallback=20?= =?UTF-8?q?=E7=9B=B4=E9=80=81=E8=B7=AF=E5=BE=91=20(ADR-035)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增 telegram-direct receiver,critical 告警同時走: 1. awoooi-webhook (主路徑: AI 分析 + 去重) 2. telegram-direct (fallback: AWOOOI API 掛時直接通知) continue:true 讓 critical route 同時匹配兩個 receiver。 warning 僅走 awoooi-webhook,避免雙重通知。 已在 110 熱重載驗證 (receivers: awoooi-webhook + telegram-direct)。 Co-Authored-By: Claude Sonnet 4.6 --- ops/alertmanager/alertmanager.yml | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 ops/alertmanager/alertmanager.yml diff --git a/ops/alertmanager/alertmanager.yml b/ops/alertmanager/alertmanager.yml new file mode 100644 index 00000000..b3f59768 --- /dev/null +++ b/ops/alertmanager/alertmanager.yml @@ -0,0 +1,87 @@ +# AWOOOI Alertmanager 配置 +# 2026-04-05 Claude Code: 修正 webhook URL +# 修正前: http://192.168.0.188:8088/api/v1/webhook/alertmanager (OpenClaw,舊系統,錯誤) +# 修正後: http://192.168.0.121:32334/api/v1/webhooks/alertmanager (AWOOOI API,複數,正確) +# 根據 feedback_alertmanager_awoooi_flow.md 鐵律 +# 2026-04-09 Claude Sonnet 4.6 Asia/Taipei: 新增 Telegram Fallback (ADR-035) +# 架構: awoooi-webhook (主路徑) + telegram-direct (fallback,獨立路由) +# 當 AWOOOI API 無法回應時,critical 告警直接送 Telegram Bot API +# ⚠️ bot_token/chat_id 部署時由 secrets 替換,此檔為模板 + +global: + resolve_timeout: 5m + +route: + receiver: 'awoooi-webhook' + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + routes: + - match: + severity: critical + receiver: 'awoooi-webhook' + group_wait: 10s + # continue:true 讓 critical 同時送 telegram-direct (fallback) + continue: true + - match: + severity: critical + receiver: 'telegram-direct' + group_wait: 10s + - match: + severity: warning + receiver: 'awoooi-webhook' + - match_re: + alertname: 'Zombie.*|Container.*' + receiver: 'awoooi-webhook' + group_wait: 1m + +receivers: + # 主路徑: AWOOOI API 處理所有告警 (AI 分析 + 去重 + Telegram) + - name: 'awoooi-webhook' + webhook_configs: + - url: 'http://192.168.0.121:32334/api/v1/webhooks/alertmanager' + send_resolved: true + + # Fallback 路徑: AWOOOI API 掛掉時,critical 告警直接送 Telegram + # 只有 critical severity 走此路徑(避免 warning 雙重通知) + - name: 'telegram-direct' + telegram_configs: + - bot_token: 'TELEGRAM_BOT_TOKEN_PLACEHOLDER' + chat_id: TELEGRAM_CHAT_ID_PLACEHOLDER + parse_mode: 'HTML' + message: | + 🚨 [Alertmanager Fallback] + {{ range .Alerts }} + ├ {{ .Labels.alertname }} + ├ 嚴重度: {{ .Labels.severity }} + ├ 主機: {{ .Labels.host }}{{ .Labels.instance }} + └ {{ .Annotations.summary }} + {{ end }} + ⚠️ AWOOOI API 可能離線,此為直接告警 + send_resolved: false + +inhibit_rules: + - source_match: + severity: critical + target_match: + severity: warning + equal: ['alertname', 'instance'] + - source_match: + alertname: HostDown + target_match_re: + alertname: 'HostHighCpuLoad|HostOutOfMemory|HostOutOfDiskSpace' + equal: ['host'] + - source_match: + alertname: KubeNodeNotReady + target_match_re: + alertname: 'KubePodCrashLooping|KubePodNotReady|KubeDeploymentReplicasMismatch' + equal: ['node'] + - source_match: + alertname: PostgreSQLDown + target_match: + alertname: PostgreSQLHighConnections + - source_match: + alertname: RedisDown + target_match: + alertname: RedisMemoryHigh