feat(alertmanager): Telegram Fallback 直送路徑 (ADR-035)
新增 telegram-direct receiver,critical 告警同時走: 1. awoooi-webhook (主路徑: AI 分析 + 去重) 2. telegram-direct (fallback: AWOOOI API 掛時直接通知) continue:true 讓 critical route 同時匹配兩個 receiver。 warning 僅走 awoooi-webhook,避免雙重通知。 已在 110 熱重載驗證 (receivers: awoooi-webhook + telegram-direct)。 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
87
ops/alertmanager/alertmanager.yml
Normal file
87
ops/alertmanager/alertmanager.yml
Normal file
@@ -0,0 +1,87 @@
|
||||
# AWOOOI Alertmanager 配置
|
||||
# 2026-04-05 Claude Code: 修正 webhook URL
|
||||
# 修正前: http://192.168.0.188:8088/api/v1/webhook/alertmanager (OpenClaw,舊系統,錯誤)
|
||||
# 修正後: http://192.168.0.121:32334/api/v1/webhooks/alertmanager (AWOOOI API,複數,正確)
|
||||
# 根據 feedback_alertmanager_awoooi_flow.md 鐵律
|
||||
# 2026-04-09 Claude Sonnet 4.6 Asia/Taipei: 新增 Telegram Fallback (ADR-035)
|
||||
# 架構: awoooi-webhook (主路徑) + telegram-direct (fallback,獨立路由)
|
||||
# 當 AWOOOI API 無法回應時,critical 告警直接送 Telegram Bot API
|
||||
# ⚠️ bot_token/chat_id 部署時由 secrets 替換,此檔為模板
|
||||
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
receiver: 'awoooi-webhook'
|
||||
group_by: ['alertname', 'severity']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 4h
|
||||
routes:
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: 'awoooi-webhook'
|
||||
group_wait: 10s
|
||||
# continue:true 讓 critical 同時送 telegram-direct (fallback)
|
||||
continue: true
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: 'telegram-direct'
|
||||
group_wait: 10s
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: 'awoooi-webhook'
|
||||
- match_re:
|
||||
alertname: 'Zombie.*|Container.*'
|
||||
receiver: 'awoooi-webhook'
|
||||
group_wait: 1m
|
||||
|
||||
receivers:
|
||||
# 主路徑: AWOOOI API 處理所有告警 (AI 分析 + 去重 + Telegram)
|
||||
- name: 'awoooi-webhook'
|
||||
webhook_configs:
|
||||
- url: 'http://192.168.0.121:32334/api/v1/webhooks/alertmanager'
|
||||
send_resolved: true
|
||||
|
||||
# Fallback 路徑: AWOOOI API 掛掉時,critical 告警直接送 Telegram
|
||||
# 只有 critical severity 走此路徑(避免 warning 雙重通知)
|
||||
- name: 'telegram-direct'
|
||||
telegram_configs:
|
||||
- bot_token: 'TELEGRAM_BOT_TOKEN_PLACEHOLDER'
|
||||
chat_id: TELEGRAM_CHAT_ID_PLACEHOLDER
|
||||
parse_mode: 'HTML'
|
||||
message: |
|
||||
🚨 <b>[Alertmanager Fallback]</b>
|
||||
{{ range .Alerts }}
|
||||
├ <b>{{ .Labels.alertname }}</b>
|
||||
├ 嚴重度: {{ .Labels.severity }}
|
||||
├ 主機: {{ .Labels.host }}{{ .Labels.instance }}
|
||||
└ {{ .Annotations.summary }}
|
||||
{{ end }}
|
||||
<i>⚠️ AWOOOI API 可能離線,此為直接告警</i>
|
||||
send_resolved: false
|
||||
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: critical
|
||||
target_match:
|
||||
severity: warning
|
||||
equal: ['alertname', 'instance']
|
||||
- source_match:
|
||||
alertname: HostDown
|
||||
target_match_re:
|
||||
alertname: 'HostHighCpuLoad|HostOutOfMemory|HostOutOfDiskSpace'
|
||||
equal: ['host']
|
||||
- source_match:
|
||||
alertname: KubeNodeNotReady
|
||||
target_match_re:
|
||||
alertname: 'KubePodCrashLooping|KubePodNotReady|KubeDeploymentReplicasMismatch'
|
||||
equal: ['node']
|
||||
- source_match:
|
||||
alertname: PostgreSQLDown
|
||||
target_match:
|
||||
alertname: PostgreSQLHighConnections
|
||||
- source_match:
|
||||
alertname: RedisDown
|
||||
target_match:
|
||||
alertname: RedisMemoryHigh
|
||||
Reference in New Issue
Block a user