""" P0 #3 K8s pod state machine 測試 2026-05-03 Claude Opus 4.7 + 統帥 ogt 驗證 _build_warnings() 對 5 種 K8s pod phase 的判斷邏輯: Pending — 短暫 OK,>5min 警告 Running — ready=True OK;ready=False 短暫 OK,>2min 警告 Succeeded/Completed — CronJob 跑完,跳過(不算未就緒) Failed — 必告警 Unknown — 必告警 restarts >= 3 — 必告警(無關 phase) 加 test 同時保護未來重構不誤砍 K8s lifecycle 處理。 """ from datetime import datetime, timedelta, timezone import pytest from src.services.heartbeat_report_service import ( HeartbeatReport, HeartbeatReportService, PodInfo, ) def _make_report(pods: list[PodInfo]) -> HeartbeatReport: """構造最小 HeartbeatReport,只填 pods 欄位""" return HeartbeatReport( timestamp=datetime.now(timezone.utc), pods=pods, ) def _start_time(minutes_ago: int) -> str: """構造 N 分鐘前的 ISO 8601 startTime(K8s 格式)""" dt = datetime.now(timezone.utc) - timedelta(minutes=minutes_ago) return dt.strftime("%Y-%m-%dT%H:%M:%SZ") class TestPodStateMachine: """K8s pod phase 完整 state machine 覆蓋""" def setup_method(self): self.svc = HeartbeatReportService() # --- Succeeded / Completed: CronJob 跑完,不算未就緒 --- def test_succeeded_pod_no_warning(self): pod = PodInfo(name="drift-scanner-abc", ready=False, status="Succeeded") warnings = self.svc._build_warnings(_make_report([pod])) assert not any("drift-scanner" in w for w in warnings), \ f"Succeeded pod 不該被當未就緒,實際 warnings: {warnings}" def test_completed_pod_no_warning(self): pod = PodInfo(name="job-xyz", ready=False, status="Completed") warnings = self.svc._build_warnings(_make_report([pod])) assert not any("job-xyz" in w for w in warnings) # --- Failed: 必告警 --- def test_failed_pod_warns(self): pod = PodInfo(name="api-bad", ready=False, status="Failed") warnings = self.svc._build_warnings(_make_report([pod])) assert any("api-bad" in w and "Failed" in w for w in warnings), \ f"Failed pod 必告警,實際 warnings: {warnings}" # --- Unknown: 必告警 --- def test_unknown_pod_warns(self): pod = PodInfo(name="api-unknown", ready=False, status="Unknown") warnings = self.svc._build_warnings(_make_report([pod])) assert any("api-unknown" in w and "Unknown" in w for w in warnings) # --- Pending: 短暫 OK,>5min 警告 --- def test_pending_short_no_warning(self): # 剛建立 1 分鐘的 Pending pod 不該告警 pod = PodInfo( name="api-starting", ready=False, status="Pending", start_time=_start_time(minutes_ago=1), ) warnings = self.svc._build_warnings(_make_report([pod])) assert not any("api-starting" in w for w in warnings), \ f"Pending <5min 不該告警,實際: {warnings}" def test_pending_long_warns(self): # Pending 10 分鐘 → image pull / scheduling 卡住 pod = PodInfo( name="api-stuck", ready=False, status="Pending", start_time=_start_time(minutes_ago=10), ) warnings = self.svc._build_warnings(_make_report([pod])) assert any("api-stuck" in w and "Pending" in w for w in warnings), \ f"Pending >5min 必告警,實際: {warnings}" def test_pending_no_starttime_warns(self): # 拿不到 start_time 時保守告警 pod = PodInfo( name="api-no-time", ready=False, status="Pending", start_time=None, ) warnings = self.svc._build_warnings(_make_report([pod])) assert any("api-no-time" in w for w in warnings) # --- Running ready=True: 健康,跳過 --- def test_running_ready_no_warning(self): pod = PodInfo( name="api-healthy", ready=True, status="Running", start_time=_start_time(minutes_ago=60), ) warnings = self.svc._build_warnings(_make_report([pod])) assert not any("api-healthy" in w for w in warnings), \ f"Running+Ready 不該告警,實際: {warnings}" # --- Running ready=False: 短暫 OK,>2min 警告 --- def test_running_not_ready_short_no_warning(self): # Running 但 not ready 1 分鐘(剛起來,probe 還沒過) pod = PodInfo( name="api-warming", ready=False, status="Running", start_time=_start_time(minutes_ago=1), ) warnings = self.svc._build_warnings(_make_report([pod])) assert not any("api-warming" in w for w in warnings), \ f"Running NotReady <2min 不該告警,實際: {warnings}" def test_running_not_ready_long_warns(self): # Running 但 not ready 5 分鐘 → readiness probe fail pod = PodInfo( name="api-stale", ready=False, status="Running", start_time=_start_time(minutes_ago=5), ) warnings = self.svc._build_warnings(_make_report([pod])) assert any("api-stale" in w and "NotReady" in w for w in warnings), \ f"Running NotReady >2min 必告警,實際: {warnings}" # --- restarts >= 3: 必告警,無關 phase --- def test_high_restarts_warns_even_if_running(self): pod = PodInfo( name="api-flaky", ready=True, status="Running", start_time=_start_time(minutes_ago=30), restarts=5, ) warnings = self.svc._build_warnings(_make_report([pod])) assert any("api-flaky" in w and "重啟" in w for w in warnings) def test_high_restarts_warns_for_succeeded_pod(self): # 即使 Succeeded 也應該因為 restart 過多而告警(CrashLoop 跑完) pod = PodInfo( name="job-crashy", ready=False, status="Succeeded", restarts=10, ) warnings = self.svc._build_warnings(_make_report([pod])) assert any("job-crashy" in w and "重啟" in w for w in warnings) class TestRegression: """Regression:保護 daily report 不再出現 24h 連續同樣 false positive""" def setup_method(self): self.svc = HeartbeatReportService() def test_3_drift_scanner_succeeded_pods_zero_warning(self): """2026-05-02 統帥截圖鐵證:3 個 drift-scanner Succeeded pod 每天造成 「需關注 3 項」假警報。修復後同樣 fixture 應 0 warning。""" pods = [ PodInfo(name=f"drift-scanner-{i}-pvbst", ready=False, status="Succeeded") for i in range(3) ] warnings = self.svc._build_warnings(_make_report(pods)) assert not any("drift-scanner" in w for w in warnings), \ f"Codex stash + 我的修法後 Succeeded CronJob pod 不該告警,實際: {warnings}"