新增 telegram-direct receiver,critical 告警同時走: 1. awoooi-webhook (主路徑: AI 分析 + 去重) 2. telegram-direct (fallback: AWOOOI API 掛時直接通知) continue:true 讓 critical route 同時匹配兩個 receiver。 warning 僅走 awoooi-webhook,避免雙重通知。 已在 110 熱重載驗證 (receivers: awoooi-webhook + telegram-direct)。 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
88 lines
2.9 KiB
YAML
88 lines
2.9 KiB
YAML
# AWOOOI Alertmanager 配置
|
||
# 2026-04-05 Claude Code: 修正 webhook URL
|
||
# 修正前: http://192.168.0.188:8088/api/v1/webhook/alertmanager (OpenClaw,舊系統,錯誤)
|
||
# 修正後: http://192.168.0.121:32334/api/v1/webhooks/alertmanager (AWOOOI API,複數,正確)
|
||
# 根據 feedback_alertmanager_awoooi_flow.md 鐵律
|
||
# 2026-04-09 Claude Sonnet 4.6 Asia/Taipei: 新增 Telegram Fallback (ADR-035)
|
||
# 架構: awoooi-webhook (主路徑) + telegram-direct (fallback,獨立路由)
|
||
# 當 AWOOOI API 無法回應時,critical 告警直接送 Telegram Bot API
|
||
# ⚠️ bot_token/chat_id 部署時由 secrets 替換,此檔為模板
|
||
|
||
global:
|
||
resolve_timeout: 5m
|
||
|
||
route:
|
||
receiver: 'awoooi-webhook'
|
||
group_by: ['alertname', 'severity']
|
||
group_wait: 30s
|
||
group_interval: 5m
|
||
repeat_interval: 4h
|
||
routes:
|
||
- match:
|
||
severity: critical
|
||
receiver: 'awoooi-webhook'
|
||
group_wait: 10s
|
||
# continue:true 讓 critical 同時送 telegram-direct (fallback)
|
||
continue: true
|
||
- match:
|
||
severity: critical
|
||
receiver: 'telegram-direct'
|
||
group_wait: 10s
|
||
- match:
|
||
severity: warning
|
||
receiver: 'awoooi-webhook'
|
||
- match_re:
|
||
alertname: 'Zombie.*|Container.*'
|
||
receiver: 'awoooi-webhook'
|
||
group_wait: 1m
|
||
|
||
receivers:
|
||
# 主路徑: AWOOOI API 處理所有告警 (AI 分析 + 去重 + Telegram)
|
||
- name: 'awoooi-webhook'
|
||
webhook_configs:
|
||
- url: 'http://192.168.0.121:32334/api/v1/webhooks/alertmanager'
|
||
send_resolved: true
|
||
|
||
# Fallback 路徑: AWOOOI API 掛掉時,critical 告警直接送 Telegram
|
||
# 只有 critical severity 走此路徑(避免 warning 雙重通知)
|
||
- name: 'telegram-direct'
|
||
telegram_configs:
|
||
- bot_token: 'TELEGRAM_BOT_TOKEN_PLACEHOLDER'
|
||
chat_id: TELEGRAM_CHAT_ID_PLACEHOLDER
|
||
parse_mode: 'HTML'
|
||
message: |
|
||
🚨 <b>[Alertmanager Fallback]</b>
|
||
{{ range .Alerts }}
|
||
├ <b>{{ .Labels.alertname }}</b>
|
||
├ 嚴重度: {{ .Labels.severity }}
|
||
├ 主機: {{ .Labels.host }}{{ .Labels.instance }}
|
||
└ {{ .Annotations.summary }}
|
||
{{ end }}
|
||
<i>⚠️ AWOOOI API 可能離線,此為直接告警</i>
|
||
send_resolved: false
|
||
|
||
inhibit_rules:
|
||
- source_match:
|
||
severity: critical
|
||
target_match:
|
||
severity: warning
|
||
equal: ['alertname', 'instance']
|
||
- source_match:
|
||
alertname: HostDown
|
||
target_match_re:
|
||
alertname: 'HostHighCpuLoad|HostOutOfMemory|HostOutOfDiskSpace'
|
||
equal: ['host']
|
||
- source_match:
|
||
alertname: KubeNodeNotReady
|
||
target_match_re:
|
||
alertname: 'KubePodCrashLooping|KubePodNotReady|KubeDeploymentReplicasMismatch'
|
||
equal: ['node']
|
||
- source_match:
|
||
alertname: PostgreSQLDown
|
||
target_match:
|
||
alertname: PostgreSQLHighConnections
|
||
- source_match:
|
||
alertname: RedisDown
|
||
target_match:
|
||
alertname: RedisMemoryHigh
|