feat(k4.1): Kured automatic node reboot daemon
- Deploy Kured v1.15.1 as DaemonSet - Maintenance window: 02:00-04:00 Taipei time - Reboot period: 1 hour between node reboots - PDB-aware: checks AWOOOI pods before draining - Prometheus integration for metrics Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
143
k8s/kured/kured.yaml
Normal file
143
k8s/kured/kured.yaml
Normal file
@@ -0,0 +1,143 @@
|
||||
# =============================================================================
|
||||
# Kured (KUbernetes REboot Daemon)
|
||||
# =============================================================================
|
||||
# K4.1 2026-03-28: Automatic node reboot for OS updates
|
||||
# Deployed by: Claude Code (首席架構師)
|
||||
# Maintenance window: 02:00-04:00 (UTC+8 台北時間)
|
||||
# =============================================================================
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: kured
|
||||
labels:
|
||||
app.kubernetes.io/name: kured
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kured
|
||||
namespace: kured
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: kured
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["list", "delete", "get"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["daemonsets"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/eviction"]
|
||||
verbs: ["create"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: kured
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kured
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kured
|
||||
namespace: kured
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: kured
|
||||
namespace: kured
|
||||
rules:
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["daemonsets"]
|
||||
verbs: ["update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: kured
|
||||
namespace: kured
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: kured
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kured
|
||||
namespace: kured
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: kured
|
||||
namespace: kured
|
||||
labels:
|
||||
app.kubernetes.io/name: kured
|
||||
app.kubernetes.io/version: "1.15.1"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kured
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kured
|
||||
spec:
|
||||
serviceAccountName: kured
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
hostPID: true
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: kured
|
||||
image: ghcr.io/kubereboot/kured:1.15.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: true
|
||||
env:
|
||||
- name: KURED_NODE_ID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
- --reboot-sentinel=/var/run/reboot-required
|
||||
# 維護窗口: 02:00-04:00 UTC+8 = 18:00-20:00 UTC (前一天)
|
||||
- --start-time=18:00
|
||||
- --end-time=20:00
|
||||
- --time-zone=Asia/Taipei
|
||||
# 重啟間隔: 1小時
|
||||
- --period=1h
|
||||
# PodDisruptionBudget 檢查
|
||||
- --blocking-pod-selector=app.kubernetes.io/name=awoooi
|
||||
# Prometheus metrics
|
||||
- --prometheus-url=http://192.168.0.188:9090
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 64Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 32Mi
|
||||
volumeMounts:
|
||||
- name: hostrun
|
||||
mountPath: /var/run
|
||||
volumes:
|
||||
- name: hostrun
|
||||
hostPath:
|
||||
path: /var/run
|
||||
Reference in New Issue
Block a user