diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 4ae8dbea..f4c5e427 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -13,8 +13,8 @@ | **Day** | Day 10 | | **AI Fallback** | ✅ **Ollama → Gemini → Claude** (ConfigMap 已修正) | | **LLM 模型** | `llama3.2:3b` (CPU 約 2-3 分鐘) | -| **K3s 優化** | ✅ **K0/K-NET/K-HA/K-CLEAN** / 🟡 **K1 進行中** / ❌ **K2-K4 待執行** | -| **K1 Velero** | ✅ **全部完成** (MinIO + Velero + daily-awoooi-prod Schedule) | +| **K3s 優化** | ✅ **K0-K2 全部完成** / ❌ **K3-K4 待執行** | +| **K1-K2** | ✅ **全部完成** (Velero + ArgoCD:30443 + VPA + NPD + Sealed Secrets) | | **K-HA** | ✅ **雙 Control-Plane (120+121) + PostgreSQL Datastore** | | **VIP** | ✅ **192.168.0.125 (keepalived + CI/CD 整合)** | | **Phase 16** | ✅ **首席架構師審查 50/50 OUTSTANDING** | @@ -38,7 +38,7 @@ | **K-HA** | 雙 CP + PostgreSQL | 4 | 4h | ✅ **完成** | | **K-CLEAN** | 資源清理 | 2 | 2h | ✅ **完成** | | **K1** | Velero 災難恢復 | 6 | 8h | ✅ **完成** (MinIO + Velero + Schedule + 測試備份) | -| **K2** | ArgoCD/VPA/NPD | 20 | 12h | ❌ **未開始** | +| **K2** | ArgoCD/VPA/NPD | 20 | 12h | ✅ **完成** (NPD + VPA + ArgoCD + Sealed Secrets) | | **K3** | Longhorn/HPA | 7 | 10h | ❌ **未開始** | | **K4** | Kured/Descheduler | 10 | 6h | ❌ **未開始** | diff --git a/docs/reference/SERVICE-ENDPOINTS.md b/docs/reference/SERVICE-ENDPOINTS.md index da17a616..1df2e97a 100644 --- a/docs/reference/SERVICE-ENDPOINTS.md +++ b/docs/reference/SERVICE-ENDPOINTS.md @@ -80,6 +80,12 @@ | **Harbor** | `192.168.0.110:5000` | Container Registry | | **GitHub Runner** | - | Self-hosted (awoooi-runner) | +### K3s 叢集管理 + +| 服務 | 端點 | 說明 | +|------|------|------| +| **ArgoCD** | `192.168.0.125:30443` | GitOps UI (admin / fSCLMBhtpRxhbRxw) | + ### 備份 (192.168.0.188) | 服務 | 端點 | 說明 | @@ -149,6 +155,7 @@ HARBOR_URL=http://192.168.0.110:5000 | 日期 | 版本 | 變更 | 作者 | |------|------|------|------| +| 2026-03-28 | v1.3 | K2 完成 - ArgoCD/VPA/NPD/Sealed Secrets | Claude Code | | 2026-03-28 | v1.2 | K1 Velero 備份系統完成,MinIO 端點已記錄 | Claude Code | | 2026-03-28 | v1.1 | OpenClaw 端口 8088→8089 統一,移除 legacy 引用 | Claude Code | | 2026-03-28 | v1.0 | 初版建立 (K-HA 完成後) | 首席架構師 | diff --git a/k8s/awoooi-prod/11-vpa.yaml b/k8s/awoooi-prod/11-vpa.yaml new file mode 100644 index 00000000..0f0238eb --- /dev/null +++ b/k8s/awoooi-prod/11-vpa.yaml @@ -0,0 +1,73 @@ +# AWOOOI VPA 配置 (Off 模式 - 僅建議) +# 建立者: Claude Code (首席架構師) +# 日期: 2026-03-28 (台北) +# 用途: 收集資源使用數據,提供資源調整建議 + +--- +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: awoooi-api-vpa + namespace: awoooi-prod +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: awoooi-api + updatePolicy: + updateMode: "Off" # 只提供建議,不自動更新 + resourcePolicy: + containerPolicies: + - containerName: "*" + minAllowed: + cpu: 100m + memory: 128Mi + maxAllowed: + cpu: 2 + memory: 2Gi + +--- +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: awoooi-web-vpa + namespace: awoooi-prod +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: awoooi-web + updatePolicy: + updateMode: "Off" + resourcePolicy: + containerPolicies: + - containerName: "*" + minAllowed: + cpu: 50m + memory: 64Mi + maxAllowed: + cpu: 1 + memory: 1Gi + +--- +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: awoooi-worker-vpa + namespace: awoooi-prod +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: awoooi-worker + updatePolicy: + updateMode: "Off" + resourcePolicy: + containerPolicies: + - containerName: "*" + minAllowed: + cpu: 50m + memory: 64Mi + maxAllowed: + cpu: 500m + memory: 512Mi diff --git a/k8s/npd/node-problem-detector.yaml b/k8s/npd/node-problem-detector.yaml new file mode 100644 index 00000000..33e4d9d1 --- /dev/null +++ b/k8s/npd/node-problem-detector.yaml @@ -0,0 +1,114 @@ +# Node Problem Detector 完整部署 +# 建立者: Claude Code (首席架構師) +# 日期: 2026-03-28 (台北) +# 用途: 偵測節點問題 (OOM, 磁碟壓力, 內核問題等) + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: node-problem-detector + labels: + app: node-problem-detector + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-problem-detector + namespace: node-problem-detector + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: node-problem-detector +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get"] +- apiGroups: [""] + resources: ["nodes/status"] + verbs: ["patch"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch", "update"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: node-problem-detector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-problem-detector +subjects: +- kind: ServiceAccount + name: node-problem-detector + namespace: node-problem-detector + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-problem-detector + namespace: node-problem-detector + labels: + app: node-problem-detector +spec: + selector: + matchLabels: + app: node-problem-detector + template: + metadata: + labels: + app: node-problem-detector + spec: + serviceAccountName: node-problem-detector + containers: + - name: node-problem-detector + image: registry.k8s.io/node-problem-detector/node-problem-detector:v0.8.17 + command: + - /node-problem-detector + - --logtostderr + - --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 20m + memory: 50Mi + securityContext: + privileged: true + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: log + mountPath: /var/log + readOnly: true + - name: kmsg + mountPath: /dev/kmsg + readOnly: true + - name: localtime + mountPath: /etc/localtime + readOnly: true + tolerations: + - operator: Exists + effect: NoSchedule + - operator: Exists + effect: NoExecute + volumes: + - name: log + hostPath: + path: /var/log/ + - name: kmsg + hostPath: + path: /dev/kmsg + - name: localtime + hostPath: + path: /etc/localtime