417 lines
13 KiB
Python
417 lines
13 KiB
Python
"""
|
||
飛輪閉環 E2E 測試 — auto_repair → PostExecutionVerifier → LearningService → EWMA
|
||
================================================================================
|
||
2026-04-26 Wave4 P1.3+P1.4 by Claude Engineer-B3 — 飛輪閉環最後一哩
|
||
|
||
測試範圍:
|
||
- execute_auto_repair 成功 → verifier 被呼叫 → record_verification_result 被呼叫
|
||
- execute_auto_repair 失敗 → verifier 不被呼叫(主 except 路徑)
|
||
- matched_playbook_id=None 的 record_verification_result → log warning 不 crash
|
||
- verifier 拋例外 → 修復仍回傳成功,trust 不更新
|
||
|
||
🔴 遵循 feedback_no_mock_testing.md:
|
||
- 禁止 MagicMock/AsyncMock/unittest.mock.patch
|
||
- 使用純 Python Stub 類別 + pytest monkeypatch(替換 module-level getter)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
|
||
import pytest
|
||
|
||
from src.models.incident import Incident, IncidentStatus, Severity, Signal
|
||
from src.models.playbook import (
|
||
ActionType,
|
||
Playbook,
|
||
PlaybookStatus,
|
||
RepairStep,
|
||
RiskLevel,
|
||
SymptomPattern,
|
||
)
|
||
from src.services.auto_repair_service import AutoRepairService
|
||
from src.utils.timezone import now_taipei
|
||
|
||
|
||
# =============================================================================
|
||
# Stubs
|
||
# =============================================================================
|
||
|
||
|
||
class StubVerifier:
|
||
"""PostExecutionVerifier 的輕量 Stub — 記錄呼叫,不真正等 K8s"""
|
||
|
||
def __init__(self, result: str = "success", raise_exc: Exception | None = None):
|
||
self.result = result
|
||
self.raise_exc = raise_exc
|
||
self.calls: list[dict] = []
|
||
|
||
async def verify(
|
||
self,
|
||
incident,
|
||
snapshot,
|
||
action_taken: str,
|
||
warmup_sec: float = 0.0,
|
||
) -> str:
|
||
self.calls.append(
|
||
{"incident_id": incident.incident_id, "snapshot": snapshot, "action_taken": action_taken}
|
||
)
|
||
if self.raise_exc is not None:
|
||
raise self.raise_exc
|
||
return self.result
|
||
|
||
|
||
class StubLearningService:
|
||
"""LearningService 的輕量 Stub — 記錄 record_verification_result 呼叫"""
|
||
|
||
def __init__(self) -> None:
|
||
self.verification_calls: list[dict] = []
|
||
|
||
async def record_verification_result(
|
||
self,
|
||
incident_id: str,
|
||
action_taken: str,
|
||
verification_result: str,
|
||
matched_playbook_id: str | None = None,
|
||
) -> None:
|
||
self.verification_calls.append(
|
||
{
|
||
"incident_id": incident_id,
|
||
"action_taken": action_taken,
|
||
"verification_result": verification_result,
|
||
"matched_playbook_id": matched_playbook_id,
|
||
}
|
||
)
|
||
|
||
|
||
class StubPlaybookService:
|
||
"""PlaybookService 的輕量 Stub — 支援 record_execution + get_recommendations"""
|
||
|
||
def __init__(self) -> None:
|
||
self._playbooks: dict[str, Playbook] = {}
|
||
self._recommendations: list = []
|
||
|
||
def add_playbook(self, playbook: Playbook) -> None:
|
||
self._playbooks[playbook.playbook_id] = playbook
|
||
|
||
def set_recommendations(self, recommendations: list) -> None:
|
||
self._recommendations = recommendations
|
||
|
||
async def get_recommendations(self, symptoms, top_k: int = 3) -> list:
|
||
return self._recommendations
|
||
|
||
async def get_by_id(self, playbook_id: str) -> Playbook | None:
|
||
return self._playbooks.get(playbook_id)
|
||
|
||
async def record_execution(self, playbook_id: str, success: bool) -> bool:
|
||
playbook = self._playbooks.get(playbook_id)
|
||
if playbook is not None:
|
||
if success:
|
||
playbook.success_count += 1
|
||
else:
|
||
playbook.failure_count += 1
|
||
return playbook is not None
|
||
|
||
|
||
class StubRecommendation:
|
||
def __init__(self, playbook: Playbook, similarity_score: float = 0.9) -> None:
|
||
self.playbook = playbook
|
||
self.similarity_score = similarity_score
|
||
|
||
|
||
# =============================================================================
|
||
# Factories
|
||
# =============================================================================
|
||
|
||
|
||
def _make_incident(
|
||
incident_id: str = "INC-E2E-001",
|
||
severity: Severity = Severity.P2,
|
||
) -> Incident:
|
||
now = now_taipei()
|
||
return Incident(
|
||
incident_id=incident_id,
|
||
status=IncidentStatus.INVESTIGATING,
|
||
severity=severity,
|
||
affected_services=["e2e-service"],
|
||
signals=[
|
||
Signal(
|
||
alert_name="TestAlert",
|
||
severity=severity,
|
||
source="prometheus",
|
||
fired_at=now,
|
||
labels={"namespace": "awoooi-prod", "alertname": "TestAlert"},
|
||
)
|
||
],
|
||
)
|
||
|
||
|
||
def _make_playbook(
|
||
playbook_id: str = "PB-E2E-001",
|
||
trust_score: float = 0.5,
|
||
) -> Playbook:
|
||
pb = Playbook(
|
||
playbook_id=playbook_id,
|
||
name="E2E 測試 Playbook",
|
||
description="飛輪閉環 E2E 測試用",
|
||
status=PlaybookStatus.APPROVED,
|
||
symptom_pattern=SymptomPattern(
|
||
alert_names=["TestAlert"],
|
||
affected_services=["e2e-service"],
|
||
severity_range=["P2"],
|
||
),
|
||
repair_steps=[
|
||
RepairStep(
|
||
step_number=1,
|
||
action_type=ActionType.MANUAL,
|
||
command="echo test",
|
||
risk_level=RiskLevel.LOW,
|
||
)
|
||
],
|
||
trust_score=trust_score,
|
||
success_count=5,
|
||
failure_count=1,
|
||
)
|
||
return pb
|
||
|
||
|
||
async def _no_cooldown(*args, **kwargs) -> tuple[bool, str]:
|
||
return True, "允許修復 (test bypass)"
|
||
|
||
|
||
# =============================================================================
|
||
# Tests
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_auto_repair_success_triggers_verify_and_learn(monkeypatch):
|
||
"""
|
||
執行成功 → verifier.verify() 被呼叫 → record_verification_result 被呼叫
|
||
驗證飛輪鏈路的前兩段接通。
|
||
"""
|
||
stub_verifier = StubVerifier(result="success")
|
||
stub_learning = StubLearningService()
|
||
|
||
# 替換 module-level getters(pure Python, no MagicMock)
|
||
import src.services.auto_repair_service as _ars_mod
|
||
monkeypatch.setattr(_ars_mod, "_verifier_getter", None, raising=False)
|
||
|
||
import src.services.post_execution_verifier as _pev_mod
|
||
monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)
|
||
|
||
import src.services.learning_service as _ls_mod
|
||
monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)
|
||
|
||
playbook = _make_playbook()
|
||
pb_service = StubPlaybookService()
|
||
pb_service.add_playbook(playbook)
|
||
pb_service.set_recommendations([StubRecommendation(playbook)])
|
||
|
||
service = AutoRepairService(
|
||
playbook_service=pb_service,
|
||
cooldown_checker=_no_cooldown,
|
||
)
|
||
|
||
incident = _make_incident()
|
||
result = await service.execute_auto_repair(incident, playbook)
|
||
|
||
assert result.success is True
|
||
|
||
# fire-and-forget task — 讓 event loop 執行完
|
||
# verifier 有 warmup_sec,但 Stub 忽略 warmup(不 sleep)
|
||
await asyncio.sleep(0.05)
|
||
|
||
assert len(stub_verifier.calls) == 1, "verifier.verify() 應被呼叫一次"
|
||
assert stub_verifier.calls[0]["incident_id"] == incident.incident_id
|
||
assert stub_verifier.calls[0]["snapshot"] is None
|
||
|
||
assert len(stub_learning.verification_calls) == 1, "record_verification_result 應被呼叫一次"
|
||
call = stub_learning.verification_calls[0]
|
||
assert call["incident_id"] == incident.incident_id
|
||
assert call["verification_result"] == "success"
|
||
assert call["matched_playbook_id"] == playbook.playbook_id
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_auto_repair_can_delegate_post_verification(monkeypatch):
|
||
"""
|
||
webhook 路徑會自行 await PostExecutionVerifier;service 層需可跳過內部
|
||
fire-and-forget 驗證,避免同一個修復產生兩組驗證與 Telegram 升級。
|
||
"""
|
||
stub_verifier = StubVerifier(result="success")
|
||
stub_learning = StubLearningService()
|
||
|
||
import src.services.post_execution_verifier as _pev_mod
|
||
monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)
|
||
|
||
import src.services.learning_service as _ls_mod
|
||
monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)
|
||
|
||
playbook = _make_playbook()
|
||
pb_service = StubPlaybookService()
|
||
pb_service.add_playbook(playbook)
|
||
|
||
service = AutoRepairService(
|
||
playbook_service=pb_service,
|
||
cooldown_checker=_no_cooldown,
|
||
)
|
||
|
||
incident = _make_incident()
|
||
result = await service.execute_auto_repair(
|
||
incident,
|
||
playbook,
|
||
run_post_verification=False,
|
||
)
|
||
|
||
assert result.success is True
|
||
|
||
await asyncio.sleep(0.05)
|
||
|
||
assert stub_verifier.calls == []
|
||
assert stub_learning.verification_calls == []
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_auto_repair_failure_does_not_call_verifier(monkeypatch):
|
||
"""
|
||
執行失敗(步驟拋例外)→ verifier 不被呼叫(失敗路徑不進入 verify-and-learn 區塊)
|
||
"""
|
||
stub_verifier = StubVerifier(result="success")
|
||
|
||
import src.services.post_execution_verifier as _pev_mod
|
||
monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)
|
||
|
||
import src.services.learning_service as _ls_mod
|
||
stub_learning = StubLearningService()
|
||
monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)
|
||
|
||
# 建立一個會讓 _execute_step raise 的 playbook(KUBECTL 步驟,executor 不可用時只 skip,不 raise)
|
||
# 直接讓 playbook_service.record_execution 正常工作,驗證失敗路徑不呼叫 verifier
|
||
|
||
class FailingPlaybookService(StubPlaybookService):
|
||
async def record_execution(self, playbook_id: str, success: bool) -> bool:
|
||
# 正常記錄,不 raise
|
||
return True
|
||
|
||
playbook = _make_playbook()
|
||
pb_service = FailingPlaybookService()
|
||
pb_service.add_playbook(playbook)
|
||
|
||
# 讓 _execute_step 拋例外以觸發失敗路徑
|
||
original_execute_step = AutoRepairService._execute_step
|
||
|
||
async def _always_fail(self_inner, incident_arg, step_arg) -> str:
|
||
raise RuntimeError("強制測試失敗")
|
||
|
||
service = AutoRepairService(
|
||
playbook_service=pb_service,
|
||
cooldown_checker=_no_cooldown,
|
||
)
|
||
# Monkeypatch instance method
|
||
monkeypatch.setattr(AutoRepairService, "_execute_step", _always_fail)
|
||
|
||
incident = _make_incident()
|
||
result = await service.execute_auto_repair(incident, playbook)
|
||
|
||
assert result.success is False
|
||
|
||
await asyncio.sleep(0.05)
|
||
|
||
# 失敗路徑不進入 verify-and-learn 塊
|
||
assert len(stub_verifier.calls) == 0, "執行失敗時不應呼叫 verifier"
|
||
assert len(stub_learning.verification_calls) == 0, "執行失敗時不應呼叫 record_verification_result"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_record_verification_result_no_playbook_id_does_not_crash():
|
||
"""
|
||
matched_playbook_id=None → record_verification_result 正常執行,不 crash。
|
||
驗證 learning_service 對 None playbook_id 的防禦性。
|
||
"""
|
||
from src.services.learning_service import LearningService
|
||
from src.repositories.interfaces import ILearningRepository, ITrustRepository
|
||
|
||
class NullLearningRepo:
|
||
async def record_repair(self, **kwargs) -> bool:
|
||
return True
|
||
|
||
async def get_repair_stats(self, *a, **kw):
|
||
return {}
|
||
|
||
async def get_all_repair_stats(self, *a, **kw):
|
||
return {}
|
||
|
||
async def record_disposition(self, *a, **kw):
|
||
return True
|
||
|
||
async def get_dispositions(self, *a, **kw):
|
||
return {}
|
||
|
||
class NullTrustRepo:
|
||
async def save_trust_record(self, *a, **kw):
|
||
pass
|
||
|
||
async def load_trust_record(self, *a, **kw):
|
||
return None
|
||
|
||
async def get_all_trust_records(self, *a, **kw):
|
||
return []
|
||
|
||
# 直接呼叫 record_verification_result(matched_playbook_id=None)
|
||
# 不應 raise,只應 log warning 並略過 _update_playbook_stats
|
||
svc = LearningService(
|
||
repository=NullLearningRepo(),
|
||
trust_repository=NullTrustRepo(),
|
||
)
|
||
|
||
# 不應拋例外
|
||
await svc.record_verification_result(
|
||
incident_id="INC-NULL-PB-001",
|
||
action_taken="auto_repair:none",
|
||
verification_result="success",
|
||
matched_playbook_id=None,
|
||
)
|
||
# 只要不 crash 即通過
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_verifier_exception_does_not_block_repair(monkeypatch):
|
||
"""
|
||
verifier 拋例外 → 修復結果仍回傳 success=True,learning 不被呼叫。
|
||
驗證 _verify_and_learn 的 exception 隔離。
|
||
"""
|
||
stub_verifier = StubVerifier(
|
||
result="success",
|
||
raise_exc=RuntimeError("verifier 模擬故障"),
|
||
)
|
||
stub_learning = StubLearningService()
|
||
|
||
import src.services.post_execution_verifier as _pev_mod
|
||
monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)
|
||
|
||
import src.services.learning_service as _ls_mod
|
||
monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)
|
||
|
||
playbook = _make_playbook()
|
||
pb_service = StubPlaybookService()
|
||
pb_service.add_playbook(playbook)
|
||
|
||
service = AutoRepairService(
|
||
playbook_service=pb_service,
|
||
cooldown_checker=_no_cooldown,
|
||
)
|
||
|
||
incident = _make_incident()
|
||
result = await service.execute_auto_repair(incident, playbook)
|
||
|
||
# 主路徑成功回傳
|
||
assert result.success is True
|
||
|
||
await asyncio.sleep(0.05)
|
||
|
||
# verifier 被呼叫(但拋了例外)
|
||
assert len(stub_verifier.calls) == 1
|
||
# learning 不應被呼叫(因為 verifier raise 中斷了 _verify_and_learn)
|
||
assert len(stub_learning.verification_calls) == 0, "verifier 拋例外後 learning 不應被呼叫"
|