From 2c6ed4e9cf798cc37e2e6df331f339d952fa82d7 Mon Sep 17 00:00:00 2001 From: OG T Date: Tue, 14 Apr 2026 15:28:52 +0800 Subject: [PATCH] =?UTF-8?q?fix(k8s):=20=E4=BF=AE=E5=BE=A9=20ArgoCD=20probe?= =?UTF-8?q?=20=E5=A4=B1=E6=95=97=20+=20drift-scanner=20egress=20=E5=B0=81?= =?UTF-8?q?=E9=8E=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題 1 — ArgoCD "All connection attempts failed": - ARGOCD_URL 指向 192.168.0.120:30443,但 node 120 kube-proxy 對 30443 有路由 bug(ArgoCD pod 在 121) - 修復: ARGOCD_URL → 192.168.0.121:30443 - NetworkPolicy: 補白名單 192.168.0.121/32:30443 - NetworkPolicy: 補白名單 192.168.0.125/32:30443 (keepalived VIP) 問題 2 — drift-scanner Error x5 / 系統沉默 9.4h: - CronJob pod template 缺少 system=awoooi label - default-deny-all 封鎖所有 egress,allow-required-egress 僅對 system=awoooi pods 生效 - 修復: drift-cronjob pod template 新增 system: awoooi Co-Authored-By: Claude Sonnet 4.6 --- k8s/awoooi-prod/02-network-policy.yaml | 18 ++++++++++++++++-- k8s/awoooi-prod/04-configmap.yaml | 4 +++- k8s/drift-cronjob.yaml | 3 +++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/k8s/awoooi-prod/02-network-policy.yaml b/k8s/awoooi-prod/02-network-policy.yaml index 91fab952..404e88a6 100644 --- a/k8s/awoooi-prod/02-network-policy.yaml +++ b/k8s/awoooi-prod/02-network-policy.yaml @@ -1,8 +1,9 @@ # AWOOOI 正式環境零信任網路策略 # 負責人: CIO -# 版本: v1.4 -# 日期: 2026-04-11 +# 版本: v1.5 +# 日期: 2026-04-14 # 變更: +# - v1.5: 新增 keepalived VIP 192.168.0.125/32 ArgoCD NodePort 30443 egress(修復 heartbeat probe) # - v1.4: 新增 ArgoCD MCP egress(argocd namespace port 80/443) # - v1.3: 新增 192.168.0.111 Ollama 主機 (M1 Pro),移除 188 的 Ollama port # - v1.2: 修復 DNS 規則使用 namespaceSelector (ADR-011 Appendix B) @@ -187,6 +188,16 @@ spec: - protocol: TCP port: 30443 + # keepalived VIP — ArgoCD NodePort 存取點 + # 2026-04-14 Claude Sonnet 4.6: VIP=192.168.0.125 當前在 mon(120),ARGOCD_URL 指向 VIP + # 修復: heartbeat _probe_argocd_sync() 連 VIP:30443 被 NetworkPolicy 擋(All connection attempts failed) + - to: + - ipBlock: + cidr: 192.168.0.125/32 # keepalived VIP + ports: + - protocol: TCP + port: 30443 + # 允許訪問 ArgoCD MCP(MCP Phase 3,2026-04-11) # ArgoCD Server Pod 在 argocd namespace (10.42.0.252),但 DNS 解析到 ClusterIP (10.43.16.201) # 必須同時允許 namespace+pod selector(Pod IP)和 ClusterIP @@ -205,12 +216,15 @@ spec: # 允許訪問 192.168.0.121 K3s Worker (mon1) # 2026-04-09 新增: NodePort 32334(API)/32335(Web) 在 121 上,host probe 需要 + # 2026-04-14 新增: 30443 — ArgoCD server pod 在 121,kube-proxy DNAT 從此節點 - to: - ipBlock: cidr: 192.168.0.121/32 ports: - protocol: TCP port: 6443 + - protocol: TCP + port: 30443 - protocol: TCP port: 32334 - protocol: TCP diff --git a/k8s/awoooi-prod/04-configmap.yaml b/k8s/awoooi-prod/04-configmap.yaml index 1a7240e1..2fe3f537 100644 --- a/k8s/awoooi-prod/04-configmap.yaml +++ b/k8s/awoooi-prod/04-configmap.yaml @@ -113,7 +113,9 @@ data: # MCP Phase 3 (2026-04-11 Claude Sonnet 4.6): ArgoCD + Sentry MCP 啟用 # ARGOCD_API_TOKEN 在 Secrets 中配置 ARGOCD_MCP_ENABLED: "true" - ARGOCD_URL: "https://192.168.0.120:30443" + # 2026-04-14 Claude Sonnet 4.6: 改指向 121 — node 120 kube-proxy 對 30443 有路由 bug + # 測試結果: 120:30443 → 000(FAIL), 121:30443 → 405(ArgoCD 正常), VIP:30443 → 000(經由120故障) + ARGOCD_URL: "https://192.168.0.121:30443" SENTRY_MCP_ENABLED: "true" # Prometheus server 在 110:9090 (非 188) PROMETHEUS_URL: "http://192.168.0.110:9090" diff --git a/k8s/drift-cronjob.yaml b/k8s/drift-cronjob.yaml index b5e33906..9c17e4f7 100644 --- a/k8s/drift-cronjob.yaml +++ b/k8s/drift-cronjob.yaml @@ -38,6 +38,9 @@ spec: labels: app: awoooi component: drift-scanner + # 2026-04-14 Claude Sonnet 4.6: 補 system=awoooi — allow-required-egress 以此篩選 + # 缺此 label 導致 default-deny-all 封鎖所有 egress(系統沉默 9.4h 根因) + system: awoooi spec: restartPolicy: Never # 2026-04-09 Claude Sonnet 4.6: awoooi-api SA 不存在,改用 default(只需呼叫內部 API,不需 K8s 權限)