diff --git a/apps/web/src/app/api/health/route.ts b/apps/web/src/app/api/health/route.ts new file mode 100644 index 00000000..21ea539e --- /dev/null +++ b/apps/web/src/app/api/health/route.ts @@ -0,0 +1,10 @@ +import { NextResponse } from 'next/server'; + +export const dynamic = 'force-dynamic'; + +export async function GET() { + return NextResponse.json({ + status: 'ok', + service: 'awoooi-web', + }); +} diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 6f0057ee..e4f88a2e 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,43 @@ +## 2026-05-31|Web probe health endpoint 收斂 rollout Smoke 警告 + +**背景**: + +- Work Items 稽核鏈 production 驗證後,`59b4943b` post-deploy 雖然成功,但 CI/CD summary 顯示 `Smoke=⚠️`。 +- 追 K8s events 後確認 rollout 期間 `awoooi-web` 兩個 pod 各重啟 2 次,曾出現 readiness / startup / liveness probe timeout,瀏覽器短暫命中 `502 Bad Gateway`;rollout settled 後頁面恢復。 +- live deployment 的 web probes 打 `/`,而 `/` 會走 Next middleware 與 locale redirect / app shell,這不是最小健康檢查面。 + +**本次調整**: + +- 新增 `apps/web/src/app/api/health/route.ts`: + - 回傳 `{"status":"ok","service":"awoooi-web"}`。 + - 不讀 production API、不碰資料庫、不經 localized app shell。 +- `k8s/awoooi-prod/05-deployment-web.yaml`: + - `livenessProbe` / `readinessProbe` / `startupProbe` 全部改打 `/api/health`。 + - 保留原本 timeout / threshold,先降低 probe surface,不直接放寬重啟條件。 + +**驗證**: + +```text +python3 -m json.tool apps/web/messages/zh-TW.json / en.json -> pass +git diff --check -> pass +pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-web-health-probe-20260531.tsbuildinfo -> pass +python3 scripts/security/security-mirror-progress-guard.py -> SECURITY_MIRROR_PROGRESS_GUARD_OK +kubectl kustomize k8s/awoooi-prod: + awoooi-web liveness/readiness/startup path=/api/health +kubectl kustomize k8s/awoooi-prod | kubectl apply --dry-run=server -f - -> pass +NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build -> pass +local production server: + GET http://127.0.0.1:3107/api/health -> 200 {"status":"ok","service":"awoooi-web"} + GET http://127.0.0.1:3107/ -> 307 /zh-TW +``` + +**目前整體進度(pre-deploy)**: + +- Web rollout / Smoke 穩定性:約 82%;已完成 probe surface 修正,待 Gitea deploy 後驗證 restartCount 是否不再增加。 +- 前端 AI 自動化管理介面同步:約 89%;Work Items 已 production 驗證,probe 修正避免下一輪 UI rollout 造成短暫 502。 +- 整體 AI 自動化飛輪:約 74%;本輪是部署穩定性收斂,不改 24h auto-repair claim。 +- 24h 完整 AI Agent 自動修復 production claim:0%;仍不可宣稱全自動修復閉環。 + ## 2026-05-31|IwoooS 視覺化資安指揮板與 Kali 維護就緒度 **背景**: diff --git a/k8s/awoooi-prod/05-deployment-web.yaml b/k8s/awoooi-prod/05-deployment-web.yaml index 3b504791..640274d2 100644 --- a/k8s/awoooi-prod/05-deployment-web.yaml +++ b/k8s/awoooi-prod/05-deployment-web.yaml @@ -70,7 +70,10 @@ spec: memory: "512Mi" livenessProbe: httpGet: - path: / + # 2026-05-31 Codex: probe a cheap API route instead of the + # localized app shell. The shell can block on SSR/middleware + # during rollout and caused transient 502 + liveness restarts. + path: /api/health port: 3000 initialDelaySeconds: 30 periodSeconds: 10 @@ -78,7 +81,7 @@ spec: failureThreshold: 3 readinessProbe: httpGet: - path: / + path: /api/health port: 3000 initialDelaySeconds: 5 periodSeconds: 5 @@ -87,7 +90,7 @@ spec: # Phase K0.5: Startup Probe (允許最多 60 秒啟動時間) startupProbe: httpGet: - path: / + path: /api/health port: 3000 initialDelaySeconds: 5 periodSeconds: 5