Critical: Blackbox Exporter replacement 從 K8s DNS 改為主機 IP (192.168.0.188:9115) Important: Descheduler namespace 顯式宣告 PSA restricted labels Suggestion: failedJobsHistoryLimit 3→1, 新增 MinioDiskUsageCritical 5% 告警 R1 Review by: 首席架構師 (Phase O-1) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
163 lines
4.7 KiB
YAML
163 lines
4.7 KiB
YAML
# =============================================================================
|
|
# Descheduler
|
|
# =============================================================================
|
|
# K4.2 2026-03-28: Pod rebalancing for load distribution
|
|
# Deployed by: Claude Code (首席架構師)
|
|
# Schedule: Every 2 hours (cron)
|
|
# =============================================================================
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: descheduler
|
|
labels:
|
|
app.kubernetes.io/name: descheduler
|
|
# R1 Review: 顯式宣告 PSA restricted
|
|
pod-security.kubernetes.io/enforce: restricted
|
|
pod-security.kubernetes.io/audit: restricted
|
|
pod-security.kubernetes.io/warn: restricted
|
|
---
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: descheduler
|
|
namespace: descheduler
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: descheduler
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: ["nodes"]
|
|
verbs: ["get", "watch", "list"]
|
|
- apiGroups: [""]
|
|
resources: ["pods"]
|
|
verbs: ["get", "watch", "list", "delete"]
|
|
- apiGroups: [""]
|
|
resources: ["pods/eviction"]
|
|
verbs: ["create"]
|
|
# Phase O-1.1 2026-04-02: Descheduler 需要 list namespaces 才能跨 namespace 平衡
|
|
- apiGroups: [""]
|
|
resources: ["namespaces"]
|
|
verbs: ["get", "watch", "list"]
|
|
- apiGroups: ["scheduling.k8s.io"]
|
|
resources: ["priorityclasses"]
|
|
verbs: ["get", "watch", "list"]
|
|
# Phase O-1.1: ReplicaSet 資訊用於 RemoveDuplicates 策略
|
|
- apiGroups: ["apps"]
|
|
resources: ["replicasets"]
|
|
verbs: ["get", "watch", "list"]
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: descheduler
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: descheduler
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: descheduler
|
|
namespace: descheduler
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: descheduler-policy
|
|
namespace: descheduler
|
|
data:
|
|
policy.yaml: |
|
|
apiVersion: descheduler/v1alpha2
|
|
kind: DeschedulerPolicy
|
|
profiles:
|
|
- name: default
|
|
pluginConfig:
|
|
- name: LowNodeUtilization
|
|
args:
|
|
targetThresholds:
|
|
cpu: 50
|
|
memory: 50
|
|
pods: 50
|
|
# 2026-03-29: 從 20% 調整為 30% (首席架構師建議)
|
|
# 避免小型叢集過度頻繁遷移
|
|
thresholds:
|
|
cpu: 30
|
|
memory: 30
|
|
pods: 30
|
|
- name: RemovePodsViolatingNodeAffinity
|
|
args:
|
|
nodeAffinityType:
|
|
- requiredDuringSchedulingIgnoredDuringExecution
|
|
- name: RemoveDuplicates
|
|
args:
|
|
excludeOwnerKinds:
|
|
- DaemonSet
|
|
plugins:
|
|
balance:
|
|
enabled:
|
|
- LowNodeUtilization
|
|
- RemoveDuplicates
|
|
deschedule:
|
|
enabled:
|
|
- RemovePodsViolatingNodeAffinity
|
|
---
|
|
apiVersion: batch/v1
|
|
kind: CronJob
|
|
metadata:
|
|
name: descheduler
|
|
namespace: descheduler
|
|
labels:
|
|
app.kubernetes.io/name: descheduler
|
|
app.kubernetes.io/version: "0.30.1"
|
|
spec:
|
|
schedule: "0 */2 * * *" # Every 2 hours
|
|
concurrencyPolicy: Forbid
|
|
successfulJobsHistoryLimit: 3
|
|
failedJobsHistoryLimit: 1
|
|
jobTemplate:
|
|
spec:
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: descheduler
|
|
spec:
|
|
serviceAccountName: descheduler
|
|
restartPolicy: Never
|
|
# Phase O-1.1 2026-04-02: 修復 PodSecurity restricted 違規
|
|
# 缺少 securityContext 導致 CronJob 持續 FailedCreate
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 65534
|
|
seccompProfile:
|
|
type: RuntimeDefault
|
|
containers:
|
|
- name: descheduler
|
|
image: registry.k8s.io/descheduler/descheduler:v0.30.1
|
|
imagePullPolicy: IfNotPresent
|
|
command:
|
|
- /bin/descheduler
|
|
- --policy-config-file=/policy-dir/policy.yaml
|
|
- --v=3
|
|
# Phase O-1.1: PSA restricted 必要欄位
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
capabilities:
|
|
drop: ["ALL"]
|
|
resources:
|
|
limits:
|
|
cpu: 200m
|
|
memory: 128Mi
|
|
requests:
|
|
cpu: 100m
|
|
memory: 64Mi
|
|
volumeMounts:
|
|
- name: policy-volume
|
|
mountPath: /policy-dir
|
|
volumes:
|
|
- name: policy-volume
|
|
configMap:
|
|
name: descheduler-policy
|