feat(k8s): Phase K2 自動化維運完成
- K2.4 NPD: Node Problem Detector (DaemonSet) - K2.3 VPA: 3 Vertical Pod Autoscaler (Off 模式) - K2.1 ArgoCD: v3.3.6 @ :30443 (GitOps) - K2.2 Sealed Secrets: v0.26.0 (加密 Secrets) 新增檔案: - k8s/npd/node-problem-detector.yaml - k8s/awoooi-prod/11-vpa.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -13,8 +13,8 @@
|
||||
| **Day** | Day 10 |
|
||||
| **AI Fallback** | ✅ **Ollama → Gemini → Claude** (ConfigMap 已修正) |
|
||||
| **LLM 模型** | `llama3.2:3b` (CPU 約 2-3 分鐘) |
|
||||
| **K3s 優化** | ✅ **K0/K-NET/K-HA/K-CLEAN** / 🟡 **K1 進行中** / ❌ **K2-K4 待執行** |
|
||||
| **K1 Velero** | ✅ **全部完成** (MinIO + Velero + daily-awoooi-prod Schedule) |
|
||||
| **K3s 優化** | ✅ **K0-K2 全部完成** / ❌ **K3-K4 待執行** |
|
||||
| **K1-K2** | ✅ **全部完成** (Velero + ArgoCD:30443 + VPA + NPD + Sealed Secrets) |
|
||||
| **K-HA** | ✅ **雙 Control-Plane (120+121) + PostgreSQL Datastore** |
|
||||
| **VIP** | ✅ **192.168.0.125 (keepalived + CI/CD 整合)** |
|
||||
| **Phase 16** | ✅ **首席架構師審查 50/50 OUTSTANDING** |
|
||||
@@ -38,7 +38,7 @@
|
||||
| **K-HA** | 雙 CP + PostgreSQL | 4 | 4h | ✅ **完成** |
|
||||
| **K-CLEAN** | 資源清理 | 2 | 2h | ✅ **完成** |
|
||||
| **K1** | Velero 災難恢復 | 6 | 8h | ✅ **完成** (MinIO + Velero + Schedule + 測試備份) |
|
||||
| **K2** | ArgoCD/VPA/NPD | 20 | 12h | ❌ **未開始** |
|
||||
| **K2** | ArgoCD/VPA/NPD | 20 | 12h | ✅ **完成** (NPD + VPA + ArgoCD + Sealed Secrets) |
|
||||
| **K3** | Longhorn/HPA | 7 | 10h | ❌ **未開始** |
|
||||
| **K4** | Kured/Descheduler | 10 | 6h | ❌ **未開始** |
|
||||
|
||||
|
||||
@@ -80,6 +80,12 @@
|
||||
| **Harbor** | `192.168.0.110:5000` | Container Registry |
|
||||
| **GitHub Runner** | - | Self-hosted (awoooi-runner) |
|
||||
|
||||
### K3s 叢集管理
|
||||
|
||||
| 服務 | 端點 | 說明 |
|
||||
|------|------|------|
|
||||
| **ArgoCD** | `192.168.0.125:30443` | GitOps UI (admin / fSCLMBhtpRxhbRxw) |
|
||||
|
||||
### 備份 (192.168.0.188)
|
||||
|
||||
| 服務 | 端點 | 說明 |
|
||||
@@ -149,6 +155,7 @@ HARBOR_URL=http://192.168.0.110:5000
|
||||
|
||||
| 日期 | 版本 | 變更 | 作者 |
|
||||
|------|------|------|------|
|
||||
| 2026-03-28 | v1.3 | K2 完成 - ArgoCD/VPA/NPD/Sealed Secrets | Claude Code |
|
||||
| 2026-03-28 | v1.2 | K1 Velero 備份系統完成,MinIO 端點已記錄 | Claude Code |
|
||||
| 2026-03-28 | v1.1 | OpenClaw 端口 8088→8089 統一,移除 legacy 引用 | Claude Code |
|
||||
| 2026-03-28 | v1.0 | 初版建立 (K-HA 完成後) | 首席架構師 |
|
||||
|
||||
73
k8s/awoooi-prod/11-vpa.yaml
Normal file
73
k8s/awoooi-prod/11-vpa.yaml
Normal file
@@ -0,0 +1,73 @@
|
||||
# AWOOOI VPA 配置 (Off 模式 - 僅建議)
|
||||
# 建立者: Claude Code (首席架構師)
|
||||
# 日期: 2026-03-28 (台北)
|
||||
# 用途: 收集資源使用數據,提供資源調整建議
|
||||
|
||||
---
|
||||
apiVersion: autoscaling.k8s.io/v1
|
||||
kind: VerticalPodAutoscaler
|
||||
metadata:
|
||||
name: awoooi-api-vpa
|
||||
namespace: awoooi-prod
|
||||
spec:
|
||||
targetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: awoooi-api
|
||||
updatePolicy:
|
||||
updateMode: "Off" # 只提供建議,不自動更新
|
||||
resourcePolicy:
|
||||
containerPolicies:
|
||||
- containerName: "*"
|
||||
minAllowed:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
maxAllowed:
|
||||
cpu: 2
|
||||
memory: 2Gi
|
||||
|
||||
---
|
||||
apiVersion: autoscaling.k8s.io/v1
|
||||
kind: VerticalPodAutoscaler
|
||||
metadata:
|
||||
name: awoooi-web-vpa
|
||||
namespace: awoooi-prod
|
||||
spec:
|
||||
targetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: awoooi-web
|
||||
updatePolicy:
|
||||
updateMode: "Off"
|
||||
resourcePolicy:
|
||||
containerPolicies:
|
||||
- containerName: "*"
|
||||
minAllowed:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
maxAllowed:
|
||||
cpu: 1
|
||||
memory: 1Gi
|
||||
|
||||
---
|
||||
apiVersion: autoscaling.k8s.io/v1
|
||||
kind: VerticalPodAutoscaler
|
||||
metadata:
|
||||
name: awoooi-worker-vpa
|
||||
namespace: awoooi-prod
|
||||
spec:
|
||||
targetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: awoooi-worker
|
||||
updatePolicy:
|
||||
updateMode: "Off"
|
||||
resourcePolicy:
|
||||
containerPolicies:
|
||||
- containerName: "*"
|
||||
minAllowed:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
maxAllowed:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
114
k8s/npd/node-problem-detector.yaml
Normal file
114
k8s/npd/node-problem-detector.yaml
Normal file
@@ -0,0 +1,114 @@
|
||||
# Node Problem Detector 完整部署
|
||||
# 建立者: Claude Code (首席架構師)
|
||||
# 日期: 2026-03-28 (台北)
|
||||
# 用途: 偵測節點問題 (OOM, 磁碟壓力, 內核問題等)
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: node-problem-detector
|
||||
labels:
|
||||
app: node-problem-detector
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-problem-detector
|
||||
namespace: node-problem-detector
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: node-problem-detector
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes/status"]
|
||||
verbs: ["patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
verbs: ["create", "patch", "update"]
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: node-problem-detector
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: node-problem-detector
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-problem-detector
|
||||
namespace: node-problem-detector
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-problem-detector
|
||||
namespace: node-problem-detector
|
||||
labels:
|
||||
app: node-problem-detector
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-problem-detector
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-problem-detector
|
||||
spec:
|
||||
serviceAccountName: node-problem-detector
|
||||
containers:
|
||||
- name: node-problem-detector
|
||||
image: registry.k8s.io/node-problem-detector/node-problem-detector:v0.8.17
|
||||
command:
|
||||
- /node-problem-detector
|
||||
- --logtostderr
|
||||
- --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
requests:
|
||||
cpu: 20m
|
||||
memory: 50Mi
|
||||
securityContext:
|
||||
privileged: true
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
- name: log
|
||||
mountPath: /var/log
|
||||
readOnly: true
|
||||
- name: kmsg
|
||||
mountPath: /dev/kmsg
|
||||
readOnly: true
|
||||
- name: localtime
|
||||
mountPath: /etc/localtime
|
||||
readOnly: true
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
effect: NoSchedule
|
||||
- operator: Exists
|
||||
effect: NoExecute
|
||||
volumes:
|
||||
- name: log
|
||||
hostPath:
|
||||
path: /var/log/
|
||||
- name: kmsg
|
||||
hostPath:
|
||||
path: /dev/kmsg
|
||||
- name: localtime
|
||||
hostPath:
|
||||
path: /etc/localtime
|
||||
Reference in New Issue
Block a user