Files
ewoooc/k8s/monitoring/values-prometheus.yaml
OoO d6d8777e41
All checks were successful
CD Pipeline / deploy (push) Successful in 1m12s
V10.601 收斂 Gemini 與密鑰治理
2026-06-06 14:52:46 +08:00

209 lines
5.3 KiB
YAML

# =============================================================================
# WOOO TECH - Momo Pro System
# Kube-Prometheus-Stack Values
# =============================================================================
# Alertmanager 配置
alertmanager:
enabled: true
config:
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'namespace']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'null' # 預設靜默,只發送明確匹配的告警
routes:
# ============================================
# 靜默規則 (這些告警不發送通知)
# ============================================
# 系統內建告警
- match:
alertname: InfoInhibitor
receiver: 'null'
- match:
alertname: Watchdog
receiver: 'null'
# API 錯誤預算告警 (常見誤報)
- match:
alertname: KubeAPIErrorBudgetBurn
receiver: 'null'
- match:
alertname: KubeAPILatencyHigh
receiver: 'null'
# info/none 等級告警
- match:
severity: none
receiver: 'null'
- match:
severity: info
receiver: 'null'
# Rancher 系統告警
- match:
namespace: cattle-system
receiver: 'null'
- match:
namespace: cattle-fleet-system
receiver: 'null'
# kube-system 系統告警
- match:
namespace: kube-system
receiver: 'null'
# monitoring namespace 的 CPU 節流告警 (常見)
- match:
namespace: monitoring
alertname: CPUThrottlingHigh
receiver: 'null'
# ============================================
# 發送規則 (只有 momo namespace 的重要告警)
# ============================================
- match:
namespace: momo
severity: critical
receiver: 'telegram'
repeat_interval: 1h
- match:
namespace: momo
severity: warning
receiver: 'telegram'
repeat_interval: 4h
receivers:
- name: 'null'
- name: 'telegram'
telegram_configs:
- bot_token: '<TELEGRAM_BOT_TOKEN>'
chat_id: '<TELEGRAM_CHAT_ID>'
parse_mode: 'HTML'
message: |
{{ if eq .Status "firing" }}🚨🔥 <b>告警觸發</b> 🔥🚨{{ else }}✅💚 <b>告警恢復</b> 💚✅{{ end }}
🏢 <b>環境:</b> 🟦 <code>UAT</code> (mo.wooo.work)
📋 <b>告警名稱:</b> {{ .CommonLabels.alertname }}
{{ if eq .CommonLabels.severity "critical" }}🔴{{ else if eq .CommonLabels.severity "warning" }}🟡{{ else }}🔵{{ end }} <b>嚴重程度:</b> {{ .CommonLabels.severity }}
📦 <b>命名空間:</b> {{ .CommonLabels.namespace }}
{{ range .Alerts }}
📝 <b>摘要:</b> {{ .Annotations.summary }}
💬 <b>詳情:</b> {{ .Annotations.description }}
⏰ <i>時間: {{ .StartsAt.Local.Format "2006-01-02 15:04:05" }}</i>
{{ end }}
🏷️ <i>MOMO Pro K8s 監控系統 (UAT)</i>
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'namespace']
alertmanagerSpec:
resources:
requests:
memory: 64Mi
cpu: 10m
limits:
memory: 128Mi
cpu: 100m
storage:
volumeClaimTemplate:
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
# Prometheus 配置
prometheus:
enabled: true
prometheusSpec:
retention: 7d
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 1Gi
cpu: 500m
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
# 監控 momo namespace
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
# Grafana 配置
grafana:
enabled: true
adminPassword: "Wooo_Grafana_2026"
persistence:
enabled: true
storageClassName: local-path
size: 2Gi
resources:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
cpu: 200m
# 預設 Dashboard
defaultDashboardsEnabled: true
defaultDashboardsTimezone: Asia/Taipei
# Node Exporter
nodeExporter:
enabled: true
resources:
requests:
memory: 32Mi
cpu: 10m
limits:
memory: 64Mi
cpu: 100m
# Kube State Metrics
kubeStateMetrics:
enabled: true
resources:
requests:
memory: 32Mi
cpu: 10m
limits:
memory: 128Mi
cpu: 100m
# 禁用不需要的組件 (K3s 已內建)
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
kubeEtcd:
enabled: false
# Prometheus Operator
prometheusOperator:
resources:
requests:
memory: 64Mi
cpu: 10m
limits:
memory: 256Mi
cpu: 200m