Files
awoooi/k8s/npd/node-problem-detector.yaml
OG T 66fb56c691 feat(k8s): Phase K2 自動化維運完成
- K2.4 NPD: Node Problem Detector (DaemonSet)
- K2.3 VPA: 3 Vertical Pod Autoscaler (Off 模式)
- K2.1 ArgoCD: v3.3.6 @ :30443 (GitOps)
- K2.2 Sealed Secrets: v0.26.0 (加密 Secrets)

新增檔案:
- k8s/npd/node-problem-detector.yaml
- k8s/awoooi-prod/11-vpa.yaml

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-28 21:27:05 +08:00

115 lines
2.6 KiB
YAML

# Node Problem Detector 完整部署
# 建立者: Claude Code (首席架構師)
# 日期: 2026-03-28 (台北)
# 用途: 偵測節點問題 (OOM, 磁碟壓力, 內核問題等)
---
apiVersion: v1
kind: Namespace
metadata:
name: node-problem-detector
labels:
app: node-problem-detector
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-problem-detector
namespace: node-problem-detector
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: node-problem-detector
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get"]
- apiGroups: [""]
resources: ["nodes/status"]
verbs: ["patch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "patch", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-problem-detector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-problem-detector
subjects:
- kind: ServiceAccount
name: node-problem-detector
namespace: node-problem-detector
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-problem-detector
namespace: node-problem-detector
labels:
app: node-problem-detector
spec:
selector:
matchLabels:
app: node-problem-detector
template:
metadata:
labels:
app: node-problem-detector
spec:
serviceAccountName: node-problem-detector
containers:
- name: node-problem-detector
image: registry.k8s.io/node-problem-detector/node-problem-detector:v0.8.17
command:
- /node-problem-detector
- --logtostderr
- --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 20m
memory: 50Mi
securityContext:
privileged: true
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: log
mountPath: /var/log
readOnly: true
- name: kmsg
mountPath: /dev/kmsg
readOnly: true
- name: localtime
mountPath: /etc/localtime
readOnly: true
tolerations:
- operator: Exists
effect: NoSchedule
- operator: Exists
effect: NoExecute
volumes:
- name: log
hostPath:
path: /var/log/
- name: kmsg
hostPath:
path: /dev/kmsg
- name: localtime
hostPath:
path: /etc/localtime