From c6bef20a97849be96b0cd06480ad8d2fcdf00b44 Mon Sep 17 00:00:00 2001 From: OG T Date: Sat, 28 Mar 2026 22:03:05 +0800 Subject: [PATCH] feat(k4.1): Kured automatic node reboot daemon - Deploy Kured v1.15.1 as DaemonSet - Maintenance window: 02:00-04:00 Taipei time - Reboot period: 1 hour between node reboots - PDB-aware: checks AWOOOI pods before draining - Prometheus integration for metrics Co-Authored-By: Claude Opus 4.5 --- k8s/kured/kured.yaml | 143 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 k8s/kured/kured.yaml diff --git a/k8s/kured/kured.yaml b/k8s/kured/kured.yaml new file mode 100644 index 00000000..0a2017e0 --- /dev/null +++ b/k8s/kured/kured.yaml @@ -0,0 +1,143 @@ +# ============================================================================= +# Kured (KUbernetes REboot Daemon) +# ============================================================================= +# K4.1 2026-03-28: Automatic node reboot for OS updates +# Deployed by: Claude Code (首席架構師) +# Maintenance window: 02:00-04:00 (UTC+8 台北時間) +# ============================================================================= +--- +apiVersion: v1 +kind: Namespace +metadata: + name: kured + labels: + app.kubernetes.io/name: kured +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kured + namespace: kured +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kured +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "patch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["list", "delete", "get"] + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["get"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kured +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kured +subjects: + - kind: ServiceAccount + name: kured + namespace: kured +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kured + namespace: kured +rules: + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kured + namespace: kured +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kured +subjects: + - kind: ServiceAccount + name: kured + namespace: kured +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kured + namespace: kured + labels: + app.kubernetes.io/name: kured + app.kubernetes.io/version: "1.15.1" +spec: + selector: + matchLabels: + app.kubernetes.io/name: kured + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/name: kured + spec: + serviceAccountName: kured + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + hostPID: true + restartPolicy: Always + containers: + - name: kured + image: ghcr.io/kubereboot/kured:1.15.1 + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + env: + - name: KURED_NODE_ID + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: + - /usr/bin/kured + - --reboot-sentinel=/var/run/reboot-required + # 維護窗口: 02:00-04:00 UTC+8 = 18:00-20:00 UTC (前一天) + - --start-time=18:00 + - --end-time=20:00 + - --time-zone=Asia/Taipei + # 重啟間隔: 1小時 + - --period=1h + # PodDisruptionBudget 檢查 + - --blocking-pod-selector=app.kubernetes.io/name=awoooi + # Prometheus metrics + - --prometheus-url=http://192.168.0.188:9090 + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi + volumeMounts: + - name: hostrun + mountPath: /var/run + volumes: + - name: hostrun + hostPath: + path: /var/run