209 lines
5.3 KiB
YAML
209 lines
5.3 KiB
YAML
# =============================================================================
|
|
# WOOO TECH - Momo Pro System
|
|
# Kube-Prometheus-Stack Values
|
|
# =============================================================================
|
|
|
|
# Alertmanager 配置
|
|
alertmanager:
|
|
enabled: true
|
|
config:
|
|
global:
|
|
resolve_timeout: 5m
|
|
|
|
route:
|
|
group_by: ['alertname', 'namespace']
|
|
group_wait: 30s
|
|
group_interval: 5m
|
|
repeat_interval: 4h
|
|
receiver: 'null' # 預設靜默,只發送明確匹配的告警
|
|
routes:
|
|
# ============================================
|
|
# 靜默規則 (這些告警不發送通知)
|
|
# ============================================
|
|
# 系統內建告警
|
|
- match:
|
|
alertname: InfoInhibitor
|
|
receiver: 'null'
|
|
- match:
|
|
alertname: Watchdog
|
|
receiver: 'null'
|
|
# API 錯誤預算告警 (常見誤報)
|
|
- match:
|
|
alertname: KubeAPIErrorBudgetBurn
|
|
receiver: 'null'
|
|
- match:
|
|
alertname: KubeAPILatencyHigh
|
|
receiver: 'null'
|
|
# info/none 等級告警
|
|
- match:
|
|
severity: none
|
|
receiver: 'null'
|
|
- match:
|
|
severity: info
|
|
receiver: 'null'
|
|
# Rancher 系統告警
|
|
- match:
|
|
namespace: cattle-system
|
|
receiver: 'null'
|
|
- match:
|
|
namespace: cattle-fleet-system
|
|
receiver: 'null'
|
|
# kube-system 系統告警
|
|
- match:
|
|
namespace: kube-system
|
|
receiver: 'null'
|
|
# monitoring namespace 的 CPU 節流告警 (常見)
|
|
- match:
|
|
namespace: monitoring
|
|
alertname: CPUThrottlingHigh
|
|
receiver: 'null'
|
|
# ============================================
|
|
# 發送規則 (只有 momo namespace 的重要告警)
|
|
# ============================================
|
|
- match:
|
|
namespace: momo
|
|
severity: critical
|
|
receiver: 'telegram'
|
|
repeat_interval: 1h
|
|
- match:
|
|
namespace: momo
|
|
severity: warning
|
|
receiver: 'telegram'
|
|
repeat_interval: 4h
|
|
|
|
receivers:
|
|
- name: 'null'
|
|
- name: 'telegram'
|
|
telegram_configs:
|
|
- bot_token: '<TELEGRAM_BOT_TOKEN>'
|
|
chat_id: '<TELEGRAM_CHAT_ID>'
|
|
parse_mode: 'HTML'
|
|
message: |
|
|
{{ if eq .Status "firing" }}🚨🔥 <b>告警觸發</b> 🔥🚨{{ else }}✅💚 <b>告警恢復</b> 💚✅{{ end }}
|
|
|
|
🏢 <b>環境:</b> 🟦 <code>UAT</code> (mo.wooo.work)
|
|
📋 <b>告警名稱:</b> {{ .CommonLabels.alertname }}
|
|
{{ if eq .CommonLabels.severity "critical" }}🔴{{ else if eq .CommonLabels.severity "warning" }}🟡{{ else }}🔵{{ end }} <b>嚴重程度:</b> {{ .CommonLabels.severity }}
|
|
📦 <b>命名空間:</b> {{ .CommonLabels.namespace }}
|
|
|
|
{{ range .Alerts }}
|
|
📝 <b>摘要:</b> {{ .Annotations.summary }}
|
|
💬 <b>詳情:</b> {{ .Annotations.description }}
|
|
⏰ <i>時間: {{ .StartsAt.Local.Format "2006-01-02 15:04:05" }}</i>
|
|
{{ end }}
|
|
|
|
🏷️ <i>MOMO Pro K8s 監控系統 (UAT)</i>
|
|
|
|
inhibit_rules:
|
|
- source_match:
|
|
severity: 'critical'
|
|
target_match:
|
|
severity: 'warning'
|
|
equal: ['alertname', 'namespace']
|
|
|
|
alertmanagerSpec:
|
|
resources:
|
|
requests:
|
|
memory: 64Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 128Mi
|
|
cpu: 100m
|
|
storage:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
storageClassName: local-path
|
|
accessModes: ["ReadWriteOnce"]
|
|
resources:
|
|
requests:
|
|
storage: 1Gi
|
|
|
|
# Prometheus 配置
|
|
prometheus:
|
|
enabled: true
|
|
prometheusSpec:
|
|
retention: 7d
|
|
resources:
|
|
requests:
|
|
memory: 256Mi
|
|
cpu: 100m
|
|
limits:
|
|
memory: 1Gi
|
|
cpu: 500m
|
|
storageSpec:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
storageClassName: local-path
|
|
accessModes: ["ReadWriteOnce"]
|
|
resources:
|
|
requests:
|
|
storage: 10Gi
|
|
|
|
# 監控 momo namespace
|
|
serviceMonitorNamespaceSelector: {}
|
|
serviceMonitorSelector: {}
|
|
podMonitorNamespaceSelector: {}
|
|
podMonitorSelector: {}
|
|
|
|
# Grafana 配置
|
|
grafana:
|
|
enabled: true
|
|
adminPassword: "Wooo_Grafana_2026"
|
|
persistence:
|
|
enabled: true
|
|
storageClassName: local-path
|
|
size: 2Gi
|
|
resources:
|
|
requests:
|
|
memory: 128Mi
|
|
cpu: 50m
|
|
limits:
|
|
memory: 256Mi
|
|
cpu: 200m
|
|
|
|
# 預設 Dashboard
|
|
defaultDashboardsEnabled: true
|
|
defaultDashboardsTimezone: Asia/Taipei
|
|
|
|
# Node Exporter
|
|
nodeExporter:
|
|
enabled: true
|
|
resources:
|
|
requests:
|
|
memory: 32Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 64Mi
|
|
cpu: 100m
|
|
|
|
# Kube State Metrics
|
|
kubeStateMetrics:
|
|
enabled: true
|
|
resources:
|
|
requests:
|
|
memory: 32Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 128Mi
|
|
cpu: 100m
|
|
|
|
# 禁用不需要的組件 (K3s 已內建)
|
|
kubeControllerManager:
|
|
enabled: false
|
|
kubeScheduler:
|
|
enabled: false
|
|
kubeProxy:
|
|
enabled: false
|
|
kubeEtcd:
|
|
enabled: false
|
|
|
|
# Prometheus Operator
|
|
prometheusOperator:
|
|
resources:
|
|
requests:
|
|
memory: 64Mi
|
|
cpu: 10m
|
|
limits:
|
|
memory: 256Mi
|
|
cpu: 200m
|