""" 飛輪閉環 E2E 測試 — auto_repair → PostExecutionVerifier → LearningService → EWMA ================================================================================ 2026-04-26 Wave4 P1.3+P1.4 by Claude Engineer-B3 — 飛輪閉環最後一哩 測試範圍: - execute_auto_repair 成功 → verifier 被呼叫 → record_verification_result 被呼叫 - execute_auto_repair 失敗 → verifier 不被呼叫(主 except 路徑) - matched_playbook_id=None 的 record_verification_result → log warning 不 crash - verifier 拋例外 → 修復仍回傳成功,trust 不更新 🔴 遵循 feedback_no_mock_testing.md: - 禁止 MagicMock/AsyncMock/unittest.mock.patch - 使用純 Python Stub 類別 + pytest monkeypatch(替換 module-level getter) """ from __future__ import annotations import asyncio import pytest from src.models.incident import Incident, IncidentStatus, Severity, Signal from src.models.playbook import ( ActionType, Playbook, PlaybookStatus, RepairStep, RiskLevel, SymptomPattern, ) from src.services.auto_repair_service import AutoRepairService from src.utils.timezone import now_taipei # ============================================================================= # Stubs # ============================================================================= class StubVerifier: """PostExecutionVerifier 的輕量 Stub — 記錄呼叫,不真正等 K8s""" def __init__(self, result: str = "success", raise_exc: Exception | None = None): self.result = result self.raise_exc = raise_exc self.calls: list[dict] = [] async def verify( self, incident, snapshot, action_taken: str, warmup_sec: float = 0.0, ) -> str: self.calls.append( {"incident_id": incident.incident_id, "snapshot": snapshot, "action_taken": action_taken} ) if self.raise_exc is not None: raise self.raise_exc return self.result class StubLearningService: """LearningService 的輕量 Stub — 記錄 record_verification_result 呼叫""" def __init__(self) -> None: self.verification_calls: list[dict] = [] async def record_verification_result( self, incident_id: str, action_taken: str, verification_result: str, matched_playbook_id: str | None = None, ) -> None: self.verification_calls.append( { "incident_id": incident_id, "action_taken": action_taken, "verification_result": verification_result, "matched_playbook_id": matched_playbook_id, } ) class StubPlaybookService: """PlaybookService 的輕量 Stub — 支援 record_execution + get_recommendations""" def __init__(self) -> None: self._playbooks: dict[str, Playbook] = {} self._recommendations: list = [] def add_playbook(self, playbook: Playbook) -> None: self._playbooks[playbook.playbook_id] = playbook def set_recommendations(self, recommendations: list) -> None: self._recommendations = recommendations async def get_recommendations(self, symptoms, top_k: int = 3) -> list: return self._recommendations async def get_by_id(self, playbook_id: str) -> Playbook | None: return self._playbooks.get(playbook_id) async def record_execution(self, playbook_id: str, success: bool) -> bool: playbook = self._playbooks.get(playbook_id) if playbook is not None: if success: playbook.success_count += 1 else: playbook.failure_count += 1 return playbook is not None class StubRecommendation: def __init__(self, playbook: Playbook, similarity_score: float = 0.9) -> None: self.playbook = playbook self.similarity_score = similarity_score # ============================================================================= # Factories # ============================================================================= def _make_incident( incident_id: str = "INC-E2E-001", severity: Severity = Severity.P2, ) -> Incident: now = now_taipei() return Incident( incident_id=incident_id, status=IncidentStatus.INVESTIGATING, severity=severity, affected_services=["e2e-service"], signals=[ Signal( alert_name="TestAlert", severity=severity, source="prometheus", fired_at=now, labels={"namespace": "awoooi-prod", "alertname": "TestAlert"}, ) ], ) def _make_playbook( playbook_id: str = "PB-E2E-001", trust_score: float = 0.5, ) -> Playbook: pb = Playbook( playbook_id=playbook_id, name="E2E 測試 Playbook", description="飛輪閉環 E2E 測試用", status=PlaybookStatus.APPROVED, symptom_pattern=SymptomPattern( alert_names=["TestAlert"], affected_services=["e2e-service"], severity_range=["P2"], ), repair_steps=[ RepairStep( step_number=1, action_type=ActionType.MANUAL, command="echo test", risk_level=RiskLevel.LOW, ) ], trust_score=trust_score, success_count=5, failure_count=1, ) return pb async def _no_cooldown(*args, **kwargs) -> tuple[bool, str]: return True, "允許修復 (test bypass)" # ============================================================================= # Tests # ============================================================================= @pytest.mark.asyncio async def test_auto_repair_success_triggers_verify_and_learn(monkeypatch): """ 執行成功 → verifier.verify() 被呼叫 → record_verification_result 被呼叫 驗證飛輪鏈路的前兩段接通。 """ stub_verifier = StubVerifier(result="success") stub_learning = StubLearningService() # 替換 module-level getters(pure Python, no MagicMock) import src.services.auto_repair_service as _ars_mod monkeypatch.setattr(_ars_mod, "_verifier_getter", None, raising=False) import src.services.post_execution_verifier as _pev_mod monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier) import src.services.learning_service as _ls_mod monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning) playbook = _make_playbook() pb_service = StubPlaybookService() pb_service.add_playbook(playbook) pb_service.set_recommendations([StubRecommendation(playbook)]) service = AutoRepairService( playbook_service=pb_service, cooldown_checker=_no_cooldown, ) incident = _make_incident() result = await service.execute_auto_repair(incident, playbook) assert result.success is True # fire-and-forget task — 讓 event loop 執行完 # verifier 有 warmup_sec,但 Stub 忽略 warmup(不 sleep) await asyncio.sleep(0.05) assert len(stub_verifier.calls) == 1, "verifier.verify() 應被呼叫一次" assert stub_verifier.calls[0]["incident_id"] == incident.incident_id assert stub_verifier.calls[0]["snapshot"] is None assert stub_verifier.calls[0]["action_taken"].startswith(f"auto_repair:{playbook.playbook_id}") assert "steps=Step 1:" in stub_verifier.calls[0]["action_taken"] assert len(stub_learning.verification_calls) == 1, "record_verification_result 應被呼叫一次" call = stub_learning.verification_calls[0] assert call["incident_id"] == incident.incident_id assert call["action_taken"] == stub_verifier.calls[0]["action_taken"] assert call["verification_result"] == "success" assert call["matched_playbook_id"] == playbook.playbook_id @pytest.mark.asyncio async def test_auto_repair_can_delegate_post_verification(monkeypatch): """ webhook 路徑會自行 await PostExecutionVerifier;service 層需可跳過內部 fire-and-forget 驗證,避免同一個修復產生兩組驗證與 Telegram 升級。 """ stub_verifier = StubVerifier(result="success") stub_learning = StubLearningService() import src.services.post_execution_verifier as _pev_mod monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier) import src.services.learning_service as _ls_mod monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning) playbook = _make_playbook() pb_service = StubPlaybookService() pb_service.add_playbook(playbook) service = AutoRepairService( playbook_service=pb_service, cooldown_checker=_no_cooldown, ) incident = _make_incident() result = await service.execute_auto_repair( incident, playbook, run_post_verification=False, ) assert result.success is True await asyncio.sleep(0.05) assert stub_verifier.calls == [] assert stub_learning.verification_calls == [] @pytest.mark.asyncio async def test_auto_repair_failure_does_not_call_verifier(monkeypatch): """ 執行失敗(步驟拋例外)→ verifier 不被呼叫(失敗路徑不進入 verify-and-learn 區塊) """ stub_verifier = StubVerifier(result="success") import src.services.post_execution_verifier as _pev_mod monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier) import src.services.learning_service as _ls_mod stub_learning = StubLearningService() monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning) # 建立一個會讓 _execute_step raise 的 playbook(KUBECTL 步驟,executor 不可用時只 skip,不 raise) # 直接讓 playbook_service.record_execution 正常工作,驗證失敗路徑不呼叫 verifier class FailingPlaybookService(StubPlaybookService): async def record_execution(self, playbook_id: str, success: bool) -> bool: # 正常記錄,不 raise return True playbook = _make_playbook() pb_service = FailingPlaybookService() pb_service.add_playbook(playbook) async def _always_fail(self_inner, incident_arg, step_arg) -> str: raise RuntimeError("強制測試失敗") service = AutoRepairService( playbook_service=pb_service, cooldown_checker=_no_cooldown, ) # Monkeypatch instance method monkeypatch.setattr(AutoRepairService, "_execute_step", _always_fail) incident = _make_incident() result = await service.execute_auto_repair(incident, playbook) assert result.success is False await asyncio.sleep(0.05) # 失敗路徑不進入 verify-and-learn 塊 assert len(stub_verifier.calls) == 0, "執行失敗時不應呼叫 verifier" assert len(stub_learning.verification_calls) == 0, "執行失敗時不應呼叫 record_verification_result" @pytest.mark.asyncio async def test_auto_repair_failed_step_string_marks_execution_failure(monkeypatch): """Executor returned FAILED text must not be stored as successful repair.""" stub_verifier = StubVerifier(result="success") stub_learning = StubLearningService() import src.services.post_execution_verifier as _pev_mod monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier) import src.services.learning_service as _ls_mod monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning) playbook = _make_playbook() pb_service = StubPlaybookService() pb_service.add_playbook(playbook) async def _returns_failed(self_inner, incident_arg, step_arg) -> str: return "FAILED: simulated executor failure" monkeypatch.setattr(AutoRepairService, "_execute_step", _returns_failed) service = AutoRepairService( playbook_service=pb_service, cooldown_checker=_no_cooldown, ) result = await service.execute_auto_repair(_make_incident(), playbook) assert result.success is False assert "simulated executor failure" in (result.error or "") assert len(stub_verifier.calls) == 0 assert len(stub_learning.verification_calls) == 0 @pytest.mark.asyncio async def test_record_verification_result_no_playbook_id_does_not_crash(): """ matched_playbook_id=None → record_verification_result 正常執行,不 crash。 驗證 learning_service 對 None playbook_id 的防禦性。 """ from src.services.learning_service import LearningService class NullLearningRepo: async def record_repair(self, **kwargs) -> bool: return True async def get_repair_stats(self, *a, **kw): return {} async def get_all_repair_stats(self, *a, **kw): return {} async def record_disposition(self, *a, **kw): return True async def get_dispositions(self, *a, **kw): return {} class NullTrustRepo: async def save_trust_record(self, *a, **kw): pass async def load_trust_record(self, *a, **kw): return None async def get_all_trust_records(self, *a, **kw): return [] # 直接呼叫 record_verification_result(matched_playbook_id=None) # 不應 raise,只應 log warning 並略過 _update_playbook_stats svc = LearningService( repository=NullLearningRepo(), trust_repository=NullTrustRepo(), ) # 不應拋例外 await svc.record_verification_result( incident_id="INC-NULL-PB-001", action_taken="auto_repair:none", verification_result="success", matched_playbook_id=None, ) # 只要不 crash 即通過 @pytest.mark.asyncio async def test_verifier_exception_does_not_block_repair(monkeypatch): """ verifier 拋例外 → 修復結果仍回傳 success=True,learning 不被呼叫。 驗證 _verify_and_learn 的 exception 隔離。 """ stub_verifier = StubVerifier( result="success", raise_exc=RuntimeError("verifier 模擬故障"), ) stub_learning = StubLearningService() import src.services.post_execution_verifier as _pev_mod monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier) import src.services.learning_service as _ls_mod monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning) playbook = _make_playbook() pb_service = StubPlaybookService() pb_service.add_playbook(playbook) service = AutoRepairService( playbook_service=pb_service, cooldown_checker=_no_cooldown, ) incident = _make_incident() result = await service.execute_auto_repair(incident, playbook) # 主路徑成功回傳 assert result.success is True await asyncio.sleep(0.05) # verifier 被呼叫(但拋了例外) assert len(stub_verifier.calls) == 1 # learning 不應被呼叫(因為 verifier raise 中斷了 _verify_and_learn) assert len(stub_learning.verification_calls) == 0, "verifier 拋例外後 learning 不應被呼叫"