- K2.4 NPD: Node Problem Detector (DaemonSet) - K2.3 VPA: 3 Vertical Pod Autoscaler (Off 模式) - K2.1 ArgoCD: v3.3.6 @ :30443 (GitOps) - K2.2 Sealed Secrets: v0.26.0 (加密 Secrets) 新增檔案: - k8s/npd/node-problem-detector.yaml - k8s/awoooi-prod/11-vpa.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
115 lines
2.6 KiB
YAML
115 lines
2.6 KiB
YAML
# Node Problem Detector 完整部署
|
|
# 建立者: Claude Code (首席架構師)
|
|
# 日期: 2026-03-28 (台北)
|
|
# 用途: 偵測節點問題 (OOM, 磁碟壓力, 內核問題等)
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: node-problem-detector
|
|
labels:
|
|
app: node-problem-detector
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: node-problem-detector
|
|
namespace: node-problem-detector
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: node-problem-detector
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: ["nodes"]
|
|
verbs: ["get"]
|
|
- apiGroups: [""]
|
|
resources: ["nodes/status"]
|
|
verbs: ["patch"]
|
|
- apiGroups: [""]
|
|
resources: ["events"]
|
|
verbs: ["create", "patch", "update"]
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: node-problem-detector
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: node-problem-detector
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: node-problem-detector
|
|
namespace: node-problem-detector
|
|
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: node-problem-detector
|
|
namespace: node-problem-detector
|
|
labels:
|
|
app: node-problem-detector
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: node-problem-detector
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: node-problem-detector
|
|
spec:
|
|
serviceAccountName: node-problem-detector
|
|
containers:
|
|
- name: node-problem-detector
|
|
image: registry.k8s.io/node-problem-detector/node-problem-detector:v0.8.17
|
|
command:
|
|
- /node-problem-detector
|
|
- --logtostderr
|
|
- --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json
|
|
resources:
|
|
limits:
|
|
cpu: 100m
|
|
memory: 100Mi
|
|
requests:
|
|
cpu: 20m
|
|
memory: 50Mi
|
|
securityContext:
|
|
privileged: true
|
|
env:
|
|
- name: NODE_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: spec.nodeName
|
|
volumeMounts:
|
|
- name: log
|
|
mountPath: /var/log
|
|
readOnly: true
|
|
- name: kmsg
|
|
mountPath: /dev/kmsg
|
|
readOnly: true
|
|
- name: localtime
|
|
mountPath: /etc/localtime
|
|
readOnly: true
|
|
tolerations:
|
|
- operator: Exists
|
|
effect: NoSchedule
|
|
- operator: Exists
|
|
effect: NoExecute
|
|
volumes:
|
|
- name: log
|
|
hostPath:
|
|
path: /var/log/
|
|
- name: kmsg
|
|
hostPath:
|
|
path: /dev/kmsg
|
|
- name: localtime
|
|
hostPath:
|
|
path: /etc/localtime
|