# =============================================================================
# WOOO TECH - Momo Pro System
# Kube-Prometheus-Stack Values
# =============================================================================
# Alertmanager 配置
alertmanager:
enabled: true
config:
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'namespace']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'null' # 預設靜默,只發送明確匹配的告警
routes:
# ============================================
# 靜默規則 (這些告警不發送通知)
# ============================================
# 系統內建告警
- match:
alertname: InfoInhibitor
receiver: 'null'
- match:
alertname: Watchdog
receiver: 'null'
# API 錯誤預算告警 (常見誤報)
- match:
alertname: KubeAPIErrorBudgetBurn
receiver: 'null'
- match:
alertname: KubeAPILatencyHigh
receiver: 'null'
# info/none 等級告警
- match:
severity: none
receiver: 'null'
- match:
severity: info
receiver: 'null'
# Rancher 系統告警
- match:
namespace: cattle-system
receiver: 'null'
- match:
namespace: cattle-fleet-system
receiver: 'null'
# kube-system 系統告警
- match:
namespace: kube-system
receiver: 'null'
# monitoring namespace 的 CPU 節流告警 (常見)
- match:
namespace: monitoring
alertname: CPUThrottlingHigh
receiver: 'null'
# ============================================
# 發送規則 (只有 momo namespace 的重要告警)
# ============================================
- match:
namespace: momo
severity: critical
receiver: 'telegram'
repeat_interval: 1h
- match:
namespace: momo
severity: warning
receiver: 'telegram'
repeat_interval: 4h
receivers:
- name: 'null'
- name: 'telegram'
telegram_configs:
- bot_token: '8075645931:AAH-EGKMo8ZC4QJs-Nc1_0s92xHrGdQvdpg'
chat_id: 5619078117
parse_mode: 'HTML'
message: |
{{ if eq .Status "firing" }}🚨🔥 告警觸發 🔥🚨{{ else }}✅💚 告警恢復 💚✅{{ end }}
🏢 環境: 🟦 UAT (mo.wooo.work)
📋 告警名稱: {{ .CommonLabels.alertname }}
{{ if eq .CommonLabels.severity "critical" }}🔴{{ else if eq .CommonLabels.severity "warning" }}🟡{{ else }}🔵{{ end }} 嚴重程度: {{ .CommonLabels.severity }}
📦 命名空間: {{ .CommonLabels.namespace }}
{{ range .Alerts }}
📝 摘要: {{ .Annotations.summary }}
💬 詳情: {{ .Annotations.description }}
⏰ 時間: {{ .StartsAt.Local.Format "2006-01-02 15:04:05" }}
{{ end }}
🏷️ MOMO Pro K8s 監控系統 (UAT)
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'namespace']
alertmanagerSpec:
resources:
requests:
memory: 64Mi
cpu: 10m
limits:
memory: 128Mi
cpu: 100m
storage:
volumeClaimTemplate:
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
# Prometheus 配置
prometheus:
enabled: true
prometheusSpec:
retention: 7d
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 1Gi
cpu: 500m
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
# 監控 momo namespace
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
# Grafana 配置
grafana:
enabled: true
adminPassword: "Wooo_Grafana_2026"
persistence:
enabled: true
storageClassName: local-path
size: 2Gi
resources:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
cpu: 200m
# 預設 Dashboard
defaultDashboardsEnabled: true
defaultDashboardsTimezone: Asia/Taipei
# Node Exporter
nodeExporter:
enabled: true
resources:
requests:
memory: 32Mi
cpu: 10m
limits:
memory: 64Mi
cpu: 100m
# Kube State Metrics
kubeStateMetrics:
enabled: true
resources:
requests:
memory: 32Mi
cpu: 10m
limits:
memory: 128Mi
cpu: 100m
# 禁用不需要的組件 (K3s 已內建)
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
kubeEtcd:
enabled: false
# Prometheus Operator
prometheusOperator:
resources:
requests:
memory: 64Mi
cpu: 10m
limits:
memory: 256Mi
cpu: 200m