From 0b68352fc204ac22dfea8426099058356f3cb59e Mon Sep 17 00:00:00 2001 From: OG T Date: Sat, 28 Mar 2026 22:23:42 +0800 Subject: [PATCH] =?UTF-8?q?feat(k3s):=20P2/P3=20=E6=94=B9=E9=80=B2=20-=20k?= =?UTF-8?q?ube-state-metrics=20+=20Kured=20=E6=99=82=E5=8D=80=E4=BF=AE?= =?UTF-8?q?=E5=BE=A9=20+=20Descheduler=20=E8=AA=BF=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2 改進: - 新增 kube-state-metrics v2.10.1 (NodePort:30888) - 新增 7 條 kube-state-metrics 告警規則 (NPD 整合) P3 改進: - 修復 Kured 維護窗口時區 (18:00→02:00 台北時間) - Descheduler threshold 20%→30% (避免過度遷移) 首席架構師審查建議執行項目 Co-Authored-By: Claude Opus 4.5 --- k8s/descheduler/descheduler.yaml | 8 +- k8s/kube-state-metrics/DEPLOY.md | 98 +++++++++ .../kube-state-metrics.yaml | 202 ++++++++++++++++++ k8s/kured/kured.yaml | 6 +- k8s/monitoring/k3s-alerts.yaml | 86 ++++++++ 5 files changed, 394 insertions(+), 6 deletions(-) create mode 100644 k8s/kube-state-metrics/DEPLOY.md create mode 100644 k8s/kube-state-metrics/kube-state-metrics.yaml diff --git a/k8s/descheduler/descheduler.yaml b/k8s/descheduler/descheduler.yaml index 61b6485b..b8cdd299 100644 --- a/k8s/descheduler/descheduler.yaml +++ b/k8s/descheduler/descheduler.yaml @@ -68,10 +68,12 @@ data: cpu: 50 memory: 50 pods: 50 + # 2026-03-29: 從 20% 調整為 30% (首席架構師建議) + # 避免小型叢集過度頻繁遷移 thresholds: - cpu: 20 - memory: 20 - pods: 20 + cpu: 30 + memory: 30 + pods: 30 - name: RemovePodsViolatingNodeAffinity args: nodeAffinityType: diff --git a/k8s/kube-state-metrics/DEPLOY.md b/k8s/kube-state-metrics/DEPLOY.md new file mode 100644 index 00000000..084b7226 --- /dev/null +++ b/k8s/kube-state-metrics/DEPLOY.md @@ -0,0 +1,98 @@ +# kube-state-metrics 部署指南 + +> **版本**: v2.10.1 +> **用途**: 提供 K8s 物件狀態指標給 Prometheus +> **建立日期**: 2026-03-29 (台北時間) + +--- + +## 1. 部署 kube-state-metrics + +```bash +# 從 Git repo 部署 +kubectl apply -f k8s/kube-state-metrics/kube-state-metrics.yaml + +# 驗證 +kubectl get pods -n kube-state-metrics +kubectl get svc -n kube-state-metrics +``` + +## 2. 更新 Prometheus 配置 + +在 `192.168.0.188` 的 Prometheus 配置中新增抓取目標: + +```yaml +# /home/ollama/momo-pro/monitoring/prometheus.yml + +scrape_configs: + # ... 現有配置 ... + + # kube-state-metrics + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['192.168.0.121:30888'] # NodePort + metrics_path: /metrics + scrape_interval: 30s +``` + +## 3. 更新告警規則 + +複製告警規則到 Prometheus: + +```bash +# 在 188 主機執行 +scp k8s/monitoring/k3s-alerts.yaml ollama@192.168.0.188:/home/ollama/momo-pro/monitoring/alerts.yml + +# 重載 Prometheus +docker restart prometheus +``` + +## 4. 驗證 + +```bash +# 檢查 metrics 端點 +curl http://192.168.0.121:30888/metrics | head -50 + +# 在 Prometheus 查詢 +# kube_deployment_status_replicas_available +# kube_pod_container_status_restarts_total +# kube_node_status_condition +``` + +## 5. 相關告警規則 + +| 告警名稱 | 觸發條件 | 嚴重度 | +|---------|---------|--------| +| PodRestartingTooMuch | 1 小時內重啟 > 3 次 | warning | +| DeploymentReplicasMismatch | 副本數不符 > 5 分鐘 | warning | +| PodPendingTooLong | Pending > 10 分鐘 | warning | +| NodeMemoryPressure | NPD 記憶體壓力 | critical | +| NodeDiskPressure | NPD 磁碟壓力 | critical | +| NodePIDPressure | NPD PID 壓力 | warning | +| NodeNotReady | 節點未就緒 > 2 分鐘 | critical | + +--- + +## 架構圖 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ K3s 叢集 (120/121) │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ kube-state-metrics namespace │ │ +│ │ ┌───────────────────┐ ┌─────────────────────────┐ │ │ +│ │ │ kube-state-metrics│ │ Service (NodePort:30888)│ │ │ +│ │ │ Pod │←─│ │ │ │ +│ │ └───────────────────┘ └─────────────────────────┘ │ │ +│ └─────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ :30888 +┌─────────────────────────────────────────────────────────────┐ +│ 192.168.0.188 (ollama) │ +│ ┌────────────┐ ┌──────────────┐ ┌──────────────────┐ │ +│ │ Prometheus │───►│ Alertmanager │───►│ AWOOOI Telegram │ │ +│ │ :9090 │ │ :9093 │ │ │ │ +│ └────────────┘ └──────────────┘ └──────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` diff --git a/k8s/kube-state-metrics/kube-state-metrics.yaml b/k8s/kube-state-metrics/kube-state-metrics.yaml new file mode 100644 index 00000000..68e7bf56 --- /dev/null +++ b/k8s/kube-state-metrics/kube-state-metrics.yaml @@ -0,0 +1,202 @@ +# ============================================================================= +# kube-state-metrics +# ============================================================================= +# K-MON.P2 2026-03-29: 部署 kube-state-metrics 啟用 NPD 告警 +# Deployed by: Claude Code (首席架構師) +# 用途: 提供 K8s 物件狀態指標 (Pods, Nodes, Deployments, etc.) +# ============================================================================= +--- +apiVersion: v1 +kind: Namespace +metadata: + name: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics +rules: + - apiGroups: [""] + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: ["list", "watch"] + - apiGroups: ["apps"] + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: ["list", "watch"] + - apiGroups: ["batch"] + resources: + - cronjobs + - jobs + verbs: ["list", "watch"] + - apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] + - apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] + - apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + - volumeattachments + verbs: ["list", "watch"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: ["list", "watch"] + - apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + - ingresses + verbs: ["list", "watch"] + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: ["list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics + namespace: kube-state-metrics +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: "2.10.1" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + securityContext: + runAsNonRoot: true + runAsUser: 65534 + fsGroup: 65534 + containers: + - name: kube-state-metrics + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1 + imagePullPolicy: IfNotPresent + ports: + - name: http-metrics + containerPort: 8080 + protocol: TCP + - name: telemetry + containerPort: 8081 + protocol: TCP + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + protocol: TCP + - name: telemetry + port: 8081 + targetPort: telemetry + protocol: TCP + selector: + app.kubernetes.io/name: kube-state-metrics +--- +# NodePort 服務供外部 Prometheus 抓取 +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics-external + namespace: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + annotations: + description: "外部 Prometheus 抓取端點 (192.168.0.188:9090)" +spec: + type: NodePort + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + nodePort: 30888 + protocol: TCP + selector: + app.kubernetes.io/name: kube-state-metrics diff --git a/k8s/kured/kured.yaml b/k8s/kured/kured.yaml index 0a2017e0..5b90c671 100644 --- a/k8s/kured/kured.yaml +++ b/k8s/kured/kured.yaml @@ -117,9 +117,9 @@ spec: command: - /usr/bin/kured - --reboot-sentinel=/var/run/reboot-required - # 維護窗口: 02:00-04:00 UTC+8 = 18:00-20:00 UTC (前一天) - - --start-time=18:00 - - --end-time=20:00 + # 維護窗口: 02:00-04:00 台北時間 (UTC+8) + - --start-time=02:00 + - --end-time=04:00 - --time-zone=Asia/Taipei # 重啟間隔: 1小時 - --period=1h diff --git a/k8s/monitoring/k3s-alerts.yaml b/k8s/monitoring/k3s-alerts.yaml index e365cb49..b6bc7130 100644 --- a/k8s/monitoring/k3s-alerts.yaml +++ b/k8s/monitoring/k3s-alerts.yaml @@ -115,3 +115,89 @@ groups: annotations: summary: "⚠️ Sentry 服務離線" description: "Sentry (192.168.0.110:9000) 已離線超過 2 分鐘" + + # ===== kube-state-metrics 告警 (K8s 物件狀態) ===== + - name: kube_state_metrics_alerts + rules: + # Pod 重啟過多 + - alert: PodRestartingTooMuch + expr: increase(kube_pod_container_status_restarts_total{namespace="awoooi-prod"}[1h]) > 3 + labels: + severity: warning + team: ops + component: k8s + annotations: + summary: "⚠️ Pod 重啟過多" + description: "{{ $labels.namespace }}/{{ $labels.pod }} 過去 1 小時重啟 {{ $value }} 次" + + # Deployment 副本數不足 + - alert: DeploymentReplicasMismatch + expr: kube_deployment_status_replicas_available{namespace="awoooi-prod"} != kube_deployment_spec_replicas{namespace="awoooi-prod"} + for: 5m + labels: + severity: warning + team: ops + component: k8s + annotations: + summary: "⚠️ Deployment 副本不足" + description: "{{ $labels.deployment }} 期望 {{ $value }} 副本但可用數不符" + + # Pod 長時間處於 Pending + - alert: PodPendingTooLong + expr: kube_pod_status_phase{phase="Pending", namespace="awoooi-prod"} == 1 + for: 10m + labels: + severity: warning + team: ops + component: k8s + annotations: + summary: "⚠️ Pod 卡在 Pending" + description: "{{ $labels.pod }} 已處於 Pending 狀態超過 10 分鐘" + + # Node 記憶體壓力 (NPD 整合) + - alert: NodeMemoryPressure + expr: kube_node_status_condition{condition="MemoryPressure", status="true"} == 1 + for: 2m + labels: + severity: critical + team: ops + component: k8s + annotations: + summary: "🔴 節點記憶體壓力" + description: "節點 {{ $labels.node }} 記憶體不足" + + # Node 磁碟壓力 (NPD 整合) + - alert: NodeDiskPressure + expr: kube_node_status_condition{condition="DiskPressure", status="true"} == 1 + for: 2m + labels: + severity: critical + team: ops + component: k8s + annotations: + summary: "🔴 節點磁碟壓力" + description: "節點 {{ $labels.node }} 磁碟空間不足" + + # Node PID 壓力 (NPD 整合) + - alert: NodePIDPressure + expr: kube_node_status_condition{condition="PIDPressure", status="true"} == 1 + for: 2m + labels: + severity: warning + team: ops + component: k8s + annotations: + summary: "⚠️ 節點 PID 壓力" + description: "節點 {{ $labels.node }} PID 資源不足" + + # Node Not Ready + - alert: NodeNotReady + expr: kube_node_status_condition{condition="Ready", status="true"} == 0 + for: 2m + labels: + severity: critical + team: ops + component: k8s + annotations: + summary: "🔴 節點未就緒" + description: "節點 {{ $labels.node }} 狀態不是 Ready"