根因: Alertmanager 打 120:32334 → Connection Refused
120/121 NodePort 直接訪問不通,只有 VIP 125:32334 可通
影響: 告警完全無法送達 AWOOOI API,鏈路靜默失效 (自 2026-04-12 起)
修復: url → http://192.168.0.125:32334/api/v1/webhooks/alertmanager
驗證: 手動 inject 測試告警,API 端收到並觸發完整 LLM 分析流程
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
91 lines
3.1 KiB
YAML
91 lines
3.1 KiB
YAML
# AWOOOI Alertmanager 配置
|
||
# 2026-04-05 Claude Code: 修正 webhook URL
|
||
# 修正前: http://192.168.0.188:8088/api/v1/webhook/alertmanager (OpenClaw,舊系統,錯誤)
|
||
# 修正後: http://192.168.0.121:32334/api/v1/webhooks/alertmanager (AWOOOI API,複數,正確)
|
||
# 根據 feedback_alertmanager_awoooi_flow.md 鐵律
|
||
# 2026-04-09 Claude Sonnet 4.6 Asia/Taipei: 新增 Telegram Fallback (ADR-035)
|
||
# 架構: awoooi-webhook (主路徑) + telegram-direct (fallback,獨立路由)
|
||
# 當 AWOOOI API 無法回應時,critical 告警直接送 Telegram Bot API
|
||
# ⚠️ bot_token/chat_id 部署時由 secrets 替換,此檔為模板
|
||
|
||
global:
|
||
resolve_timeout: 5m
|
||
|
||
route:
|
||
receiver: 'awoooi-webhook'
|
||
group_by: ['alertname', 'severity']
|
||
group_wait: 30s
|
||
group_interval: 5m
|
||
repeat_interval: 4h
|
||
routes:
|
||
- match:
|
||
severity: critical
|
||
receiver: 'awoooi-webhook'
|
||
group_wait: 10s
|
||
# continue:true 讓 critical 同時送 telegram-direct (fallback)
|
||
continue: true
|
||
- match:
|
||
severity: critical
|
||
receiver: 'telegram-direct'
|
||
group_wait: 10s
|
||
- match:
|
||
severity: warning
|
||
receiver: 'awoooi-webhook'
|
||
- match_re:
|
||
alertname: 'Zombie.*|Container.*'
|
||
receiver: 'awoooi-webhook'
|
||
group_wait: 1m
|
||
|
||
receivers:
|
||
# 主路徑: AWOOOI API 處理所有告警 (AI 分析 + 去重 + Telegram)
|
||
# 2026-04-16 ogt + Claude Sonnet 4.6: 改指向 VIP 192.168.0.125
|
||
# 根因: 121:32334 Connection Refused,120:32334 也 Refused
|
||
# 只有 VIP 125:32334 可連通(kube-proxy NodePort 路由正常)
|
||
- name: 'awoooi-webhook'
|
||
webhook_configs:
|
||
- url: 'http://192.168.0.125:32334/api/v1/webhooks/alertmanager'
|
||
send_resolved: true
|
||
|
||
# Fallback 路徑: AWOOOI API 掛掉時,critical 告警直接送 Telegram
|
||
# 只有 critical severity 走此路徑(避免 warning 雙重通知)
|
||
- name: 'telegram-direct'
|
||
telegram_configs:
|
||
- bot_token: 'TELEGRAM_BOT_TOKEN_PLACEHOLDER'
|
||
chat_id: TELEGRAM_CHAT_ID_PLACEHOLDER
|
||
parse_mode: 'HTML'
|
||
message: |
|
||
🚨 <b>[Alertmanager Fallback]</b>
|
||
{{ range .Alerts }}
|
||
├ <b>{{ .Labels.alertname }}</b>
|
||
├ 嚴重度: {{ .Labels.severity }}
|
||
├ 主機: {{ .Labels.host }}{{ .Labels.instance }}
|
||
└ {{ .Annotations.summary }}
|
||
{{ end }}
|
||
<i>⚠️ AWOOOI API 可能離線,此為直接告警</i>
|
||
send_resolved: false
|
||
|
||
inhibit_rules:
|
||
- source_match:
|
||
severity: critical
|
||
target_match:
|
||
severity: warning
|
||
equal: ['alertname', 'instance']
|
||
- source_match:
|
||
alertname: HostDown
|
||
target_match_re:
|
||
alertname: 'HostHighCpuLoad|HostOutOfMemory|HostOutOfDiskSpace'
|
||
equal: ['host']
|
||
- source_match:
|
||
alertname: KubeNodeNotReady
|
||
target_match_re:
|
||
alertname: 'KubePodCrashLooping|KubePodNotReady|KubeDeploymentReplicasMismatch'
|
||
equal: ['node']
|
||
- source_match:
|
||
alertname: PostgreSQLDown
|
||
target_match:
|
||
alertname: PostgreSQLHighConnections
|
||
- source_match:
|
||
alertname: RedisDown
|
||
target_match:
|
||
alertname: RedisMemoryHigh
|