From 8558ac2d20de2d9f463c3d0a682d7453d504c586 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 24 May 2026 10:11:20 +0800 Subject: [PATCH] fix(k8s): use lightweight api probes --- k8s/awoooi-prod/06-deployment-api.yaml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/k8s/awoooi-prod/06-deployment-api.yaml b/k8s/awoooi-prod/06-deployment-api.yaml index e1550ff1..3c3ddfed 100644 --- a/k8s/awoooi-prod/06-deployment-api.yaml +++ b/k8s/awoooi-prod/06-deployment-api.yaml @@ -25,7 +25,9 @@ spec: type: RollingUpdate rollingUpdate: maxSurge: 1 - maxUnavailable: 0 + # 2026-05-24 Codex: allow one unavailable replica so rollout can replace + # a bad old ReplicaSet instead of deadlocking at 1/2 when probes regress. + maxUnavailable: 1 template: metadata: labels: @@ -137,7 +139,10 @@ spec: memory: "1Gi" livenessProbe: httpGet: - path: /api/v1/health + # 2026-05-24 Codex: K8s probes must stay lightweight. The full + # health endpoint checks Ollama/OpenClaw/SigNoz and can exceed + # kubelet timeout when an external provider is degraded. + path: /api/v1/health/live port: 8000 initialDelaySeconds: 30 periodSeconds: 10 @@ -145,7 +150,7 @@ spec: failureThreshold: 3 readinessProbe: httpGet: - path: /api/v1/health + path: /api/v1/health/ready port: 8000 initialDelaySeconds: 5 periodSeconds: 5 @@ -154,7 +159,7 @@ spec: # Phase K0.5: Startup Probe (允許最多 60 秒啟動時間) startupProbe: httpGet: - path: /api/v1/health + path: /api/v1/health/live port: 8000 initialDelaySeconds: 5 periodSeconds: 5