From a38d911213e9a29035b519850c2f4b29ae7ffedc Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 2 May 2026 23:48:31 +0800 Subject: [PATCH] fix(heartbeat): exclude Succeeded/Completed CronJob pods from warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 統帥 23:30 截圖鐵證:每日系統報告永遠列「需關注 3 項: Pod drift-scanner-* 未就緒 (Succeeded)」,讓人誤以為告警重複。 實際上 Succeeded/Completed 是 CronJob/Job 跑完的成功狀態, ready=False 是設計(容器已退出)— 不該算 warning。 修法:heartbeat_report_service.py:704 加判斷跳過 Succeeded/Completed pods。 預期效果:今天 23:30 的「需關注 3 項」明天起會降為 0 項,daily report header 從「需關注 N 項」變回「全系統正常」。 Tests: 50 passed (heartbeat 相關) 注意:working tree 還有 statq Codex 未 commit 的 7 個檔案改動 (approval_execution.py 有 indentation error 半成品),本 commit 只動 heartbeat_report_service.py 單檔,不誤碰其他。 Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/services/heartbeat_report_service.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/api/src/services/heartbeat_report_service.py b/apps/api/src/services/heartbeat_report_service.py index 926953ed..338985e9 100644 --- a/apps/api/src/services/heartbeat_report_service.py +++ b/apps/api/src/services/heartbeat_report_service.py @@ -701,7 +701,12 @@ class HeartbeatReportService: warnings.append(f"PENDING 積壓 {report.alert_pipeline.pending_approval} 筆,需人工處理") # Pod 異常 + # 2026-05-02 Claude Opus 4.7 + 統帥 ogt:CronJob/Job 跑完的 Pod (Succeeded/Completed) + # ready=False 是設計(容器已退出),不是異常。原本邏輯每天推「Pod drift-scanner-* 未就緒 + # (Succeeded)」3 條 false positive,讓統帥誤以為告警重複。 for pod in report.pods: + if pod.status in ("Succeeded", "Completed"): + continue # CronJob/Job 跑完是成功,不算未就緒 if not pod.ready: warnings.append(f"Pod {pod.name} 未就緒({pod.status})") if pod.restarts >= 3: