awoooi/apps/api/tests/test_learning_chain_e2e.py

"""
飛輪閉環 E2E 測試 — auto_repair → PostExecutionVerifier → LearningService → EWMA
================================================================================
2026-04-26 Wave4 P1.3+P1.4 by Claude Engineer-B3 — 飛輪閉環最後一哩

測試範圍:
  - execute_auto_repair 成功 → verifier 被呼叫 → record_verification_result 被呼叫
  - execute_auto_repair 失敗 → verifier 不被呼叫（主 except 路徑）
  - matched_playbook_id=None 的 record_verification_result → log warning 不 crash
  - verifier 拋例外 → 修復仍回傳成功，trust 不更新

🔴 遵循 feedback_no_mock_testing.md:
  - 禁止 MagicMock/AsyncMock/unittest.mock.patch
  - 使用純 Python Stub 類別 + pytest monkeypatch（替換 module-level getter）
"""

from __future__ import annotations

import asyncio

import pytest

from src.models.incident import Incident, IncidentStatus, Severity, Signal
from src.models.playbook import (
    ActionType,
    Playbook,
    PlaybookStatus,
    RepairStep,
    RiskLevel,
    SymptomPattern,
)
from src.services.auto_repair_service import AutoRepairService
from src.utils.timezone import now_taipei


# =============================================================================
# Stubs
# =============================================================================


class StubVerifier:
    """PostExecutionVerifier 的輕量 Stub — 記錄呼叫，不真正等 K8s"""

    def __init__(self, result: str = "success", raise_exc: Exception | None = None):
        self.result = result
        self.raise_exc = raise_exc
        self.calls: list[dict] = []

    async def verify(
        self,
        incident,
        snapshot,
        action_taken: str,
        warmup_sec: float = 0.0,
    ) -> str:
        self.calls.append(
            {"incident_id": incident.incident_id, "snapshot": snapshot, "action_taken": action_taken}
        )
        if self.raise_exc is not None:
            raise self.raise_exc
        return self.result


class StubLearningService:
    """LearningService 的輕量 Stub — 記錄 record_verification_result 呼叫"""

    def __init__(self) -> None:
        self.verification_calls: list[dict] = []

    async def record_verification_result(
        self,
        incident_id: str,
        action_taken: str,
        verification_result: str,
        matched_playbook_id: str | None = None,
    ) -> None:
        self.verification_calls.append(
            {
                "incident_id": incident_id,
                "action_taken": action_taken,
                "verification_result": verification_result,
                "matched_playbook_id": matched_playbook_id,
            }
        )


class StubPlaybookService:
    """PlaybookService 的輕量 Stub — 支援 record_execution + get_recommendations"""

    def __init__(self) -> None:
        self._playbooks: dict[str, Playbook] = {}
        self._recommendations: list = []

    def add_playbook(self, playbook: Playbook) -> None:
        self._playbooks[playbook.playbook_id] = playbook

    def set_recommendations(self, recommendations: list) -> None:
        self._recommendations = recommendations

    async def get_recommendations(self, symptoms, top_k: int = 3) -> list:
        return self._recommendations

    async def get_by_id(self, playbook_id: str) -> Playbook | None:
        return self._playbooks.get(playbook_id)

    async def record_execution(self, playbook_id: str, success: bool) -> bool:
        playbook = self._playbooks.get(playbook_id)
        if playbook is not None:
            if success:
                playbook.success_count += 1
            else:
                playbook.failure_count += 1
        return playbook is not None


class StubRecommendation:
    def __init__(self, playbook: Playbook, similarity_score: float = 0.9) -> None:
        self.playbook = playbook
        self.similarity_score = similarity_score


# =============================================================================
# Factories
# =============================================================================


def _make_incident(
    incident_id: str = "INC-E2E-001",
    severity: Severity = Severity.P2,
) -> Incident:
    now = now_taipei()
    return Incident(
        incident_id=incident_id,
        status=IncidentStatus.INVESTIGATING,
        severity=severity,
        affected_services=["e2e-service"],
        signals=[
            Signal(
                alert_name="TestAlert",
                severity=severity,
                source="prometheus",
                fired_at=now,
                labels={"namespace": "awoooi-prod", "alertname": "TestAlert"},
            )
        ],
    )


def _make_playbook(
    playbook_id: str = "PB-E2E-001",
    trust_score: float = 0.5,
) -> Playbook:
    pb = Playbook(
        playbook_id=playbook_id,
        name="E2E 測試 Playbook",
        description="飛輪閉環 E2E 測試用",
        status=PlaybookStatus.APPROVED,
        symptom_pattern=SymptomPattern(
            alert_names=["TestAlert"],
            affected_services=["e2e-service"],
            severity_range=["P2"],
        ),
        repair_steps=[
            RepairStep(
                step_number=1,
                action_type=ActionType.MANUAL,
                command="echo test",
                risk_level=RiskLevel.LOW,
            )
        ],
        trust_score=trust_score,
        success_count=5,
        failure_count=1,
    )
    return pb


async def _no_cooldown(*args, **kwargs) -> tuple[bool, str]:
    return True, "允許修復 (test bypass)"


# =============================================================================
# Tests
# =============================================================================


@pytest.mark.asyncio
async def test_auto_repair_success_triggers_verify_and_learn(monkeypatch):
    """
    執行成功 → verifier.verify() 被呼叫 → record_verification_result 被呼叫
    驗證飛輪鏈路的前兩段接通。
    """
    stub_verifier = StubVerifier(result="success")
    stub_learning = StubLearningService()

    # 替換 module-level getters（pure Python, no MagicMock）
    import src.services.auto_repair_service as _ars_mod
    monkeypatch.setattr(_ars_mod, "_verifier_getter", None, raising=False)

    import src.services.post_execution_verifier as _pev_mod
    monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)

    import src.services.learning_service as _ls_mod
    monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)

    playbook = _make_playbook()
    pb_service = StubPlaybookService()
    pb_service.add_playbook(playbook)
    pb_service.set_recommendations([StubRecommendation(playbook)])

    service = AutoRepairService(
        playbook_service=pb_service,
        cooldown_checker=_no_cooldown,
    )

    incident = _make_incident()
    result = await service.execute_auto_repair(incident, playbook)

    assert result.success is True

    # fire-and-forget task — 讓 event loop 執行完
    # verifier 有 warmup_sec，但 Stub 忽略 warmup（不 sleep）
    await asyncio.sleep(0.05)

    assert len(stub_verifier.calls) == 1, "verifier.verify() 應被呼叫一次"
    assert stub_verifier.calls[0]["incident_id"] == incident.incident_id
    assert stub_verifier.calls[0]["snapshot"] is None

    assert len(stub_learning.verification_calls) == 1, "record_verification_result 應被呼叫一次"
    call = stub_learning.verification_calls[0]
    assert call["incident_id"] == incident.incident_id
    assert call["verification_result"] == "success"
    assert call["matched_playbook_id"] == playbook.playbook_id


@pytest.mark.asyncio
async def test_auto_repair_can_delegate_post_verification(monkeypatch):
    """
    webhook 路徑會自行 await PostExecutionVerifier；service 層需可跳過內部
    fire-and-forget 驗證，避免同一個修復產生兩組驗證與 Telegram 升級。
    """
    stub_verifier = StubVerifier(result="success")
    stub_learning = StubLearningService()

    import src.services.post_execution_verifier as _pev_mod
    monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)

    import src.services.learning_service as _ls_mod
    monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)

    playbook = _make_playbook()
    pb_service = StubPlaybookService()
    pb_service.add_playbook(playbook)

    service = AutoRepairService(
        playbook_service=pb_service,
        cooldown_checker=_no_cooldown,
    )

    incident = _make_incident()
    result = await service.execute_auto_repair(
        incident,
        playbook,
        run_post_verification=False,
    )

    assert result.success is True

    await asyncio.sleep(0.05)

    assert stub_verifier.calls == []
    assert stub_learning.verification_calls == []


@pytest.mark.asyncio
async def test_auto_repair_failure_does_not_call_verifier(monkeypatch):
    """
    執行失敗（步驟拋例外）→ verifier 不被呼叫（失敗路徑不進入 verify-and-learn 區塊）
    """
    stub_verifier = StubVerifier(result="success")

    import src.services.post_execution_verifier as _pev_mod
    monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)

    import src.services.learning_service as _ls_mod
    stub_learning = StubLearningService()
    monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)

    # 建立一個會讓 _execute_step raise 的 playbook（KUBECTL 步驟，executor 不可用時只 skip，不 raise）
    # 直接讓 playbook_service.record_execution 正常工作，驗證失敗路徑不呼叫 verifier

    class FailingPlaybookService(StubPlaybookService):
        async def record_execution(self, playbook_id: str, success: bool) -> bool:
            # 正常記錄，不 raise
            return True

    playbook = _make_playbook()
    pb_service = FailingPlaybookService()
    pb_service.add_playbook(playbook)

    # 讓 _execute_step 拋例外以觸發失敗路徑
    original_execute_step = AutoRepairService._execute_step

    async def _always_fail(self_inner, incident_arg, step_arg) -> str:
        raise RuntimeError("強制測試失敗")

    service = AutoRepairService(
        playbook_service=pb_service,
        cooldown_checker=_no_cooldown,
    )
    # Monkeypatch instance method
    monkeypatch.setattr(AutoRepairService, "_execute_step", _always_fail)

    incident = _make_incident()
    result = await service.execute_auto_repair(incident, playbook)

    assert result.success is False

    await asyncio.sleep(0.05)

    # 失敗路徑不進入 verify-and-learn 塊
    assert len(stub_verifier.calls) == 0, "執行失敗時不應呼叫 verifier"
    assert len(stub_learning.verification_calls) == 0, "執行失敗時不應呼叫 record_verification_result"


@pytest.mark.asyncio
async def test_record_verification_result_no_playbook_id_does_not_crash():
    """
    matched_playbook_id=None → record_verification_result 正常執行，不 crash。
    驗證 learning_service 對 None playbook_id 的防禦性。
    """
    from src.services.learning_service import LearningService
    from src.repositories.interfaces import ILearningRepository, ITrustRepository

    class NullLearningRepo:
        async def record_repair(self, **kwargs) -> bool:
            return True

        async def get_repair_stats(self, *a, **kw):
            return {}

        async def get_all_repair_stats(self, *a, **kw):
            return {}

        async def record_disposition(self, *a, **kw):
            return True

        async def get_dispositions(self, *a, **kw):
            return {}

    class NullTrustRepo:
        async def save_trust_record(self, *a, **kw):
            pass

        async def load_trust_record(self, *a, **kw):
            return None

        async def get_all_trust_records(self, *a, **kw):
            return []

    # 直接呼叫 record_verification_result(matched_playbook_id=None)
    # 不應 raise，只應 log warning 並略過 _update_playbook_stats
    svc = LearningService(
        repository=NullLearningRepo(),
        trust_repository=NullTrustRepo(),
    )

    # 不應拋例外
    await svc.record_verification_result(
        incident_id="INC-NULL-PB-001",
        action_taken="auto_repair:none",
        verification_result="success",
        matched_playbook_id=None,
    )
    # 只要不 crash 即通過


@pytest.mark.asyncio
async def test_verifier_exception_does_not_block_repair(monkeypatch):
    """
    verifier 拋例外 → 修復結果仍回傳 success=True，learning 不被呼叫。
    驗證 _verify_and_learn 的 exception 隔離。
    """
    stub_verifier = StubVerifier(
        result="success",
        raise_exc=RuntimeError("verifier 模擬故障"),
    )
    stub_learning = StubLearningService()

    import src.services.post_execution_verifier as _pev_mod
    monkeypatch.setattr(_pev_mod, "_verifier", stub_verifier)

    import src.services.learning_service as _ls_mod
    monkeypatch.setattr(_ls_mod, "_learning_service", stub_learning)

    playbook = _make_playbook()
    pb_service = StubPlaybookService()
    pb_service.add_playbook(playbook)

    service = AutoRepairService(
        playbook_service=pb_service,
        cooldown_checker=_no_cooldown,
    )

    incident = _make_incident()
    result = await service.execute_auto_repair(incident, playbook)

    # 主路徑成功回傳
    assert result.success is True

    await asyncio.sleep(0.05)

    # verifier 被呼叫（但拋了例外）
    assert len(stub_verifier.calls) == 1
    # learning 不應被呼叫（因為 verifier raise 中斷了 _verify_and_learn）
    assert len(stub_learning.verification_calls) == 0, "verifier 拋例外後 learning 不應被呼叫"