from datetime import datetime, timezone from uuid import uuid4 import pytest from src.models.approval import ApprovalRequestCreate, BlastRadius, DataImpact, RiskLevel from src.models.incident import Incident, Severity, Signal from src.models.playbook import ( ActionType, Playbook, PlaybookSource, PlaybookStatus, RepairStep, ) from src.models.playbook import RiskLevel as PlaybookRiskLevel from src.services.approval_db import approval_request_to_record_data from src.services.evidence_snapshot import EvidenceSnapshot from src.services.repair_candidate_service import RepairCandidateService class FakeInvestigator: def __init__(self, evidence: EvidenceSnapshot | None) -> None: self.evidence = evidence async def investigate(self, incident: Incident) -> EvidenceSnapshot | None: return self.evidence class FakePlaybookRepository: def __init__(self, playbook: Playbook | None) -> None: self.playbook = playbook async def get_by_id(self, playbook_id: str) -> Playbook | None: if self.playbook and self.playbook.playbook_id == playbook_id: return self.playbook return None class FakeIncidentService: async def get_from_working_memory(self, incident_id: str) -> None: return None class FakeAutoRepairService: def preview_write_ssh_mcp_route(self, incident: Incident, command: str) -> bool: return True def _incident() -> Incident: return Incident( incident_id="INC-TEST-REPAIR", severity=Severity.P2, signals=[ Signal( alert_name="NodeExporterDown", severity=Severity.P2, source="alertmanager", fired_at=datetime.now(timezone.utc), labels={"namespace": "awoooi-prod", "deployment": "awoooi-api"}, annotations={"summary": "node exporter down"}, ) ], affected_services=["awoooi-api"], ) def _evidence(incident_id: str, *, sensors_succeeded: int = 2) -> EvidenceSnapshot: return EvidenceSnapshot( incident_id=incident_id, sensors_attempted=3, sensors_succeeded=sensors_succeeded, mcp_health={"k8s": sensors_succeeded > 0, "prometheus": sensors_succeeded > 1}, evidence_summary="k8s events and metrics collected", ) def _playbook(command: str, *, risk_level: PlaybookRiskLevel = PlaybookRiskLevel.LOW) -> Playbook: return Playbook( playbook_id="PB-REPAIR-001", name="重啟 API deployment", description="以 approved PlayBook 修復 API 工作負載", status=PlaybookStatus.APPROVED, source=PlaybookSource.MANUAL, trust_score=0.72, estimated_duration_minutes=4, repair_steps=[ RepairStep( step_number=1, action_type=ActionType.KUBECTL, command=command, expected_result="deployment restarted and rollout verified", risk_level=risk_level, requires_approval=True, ) ], ) def _generic_fallback_playbook() -> Playbook: playbook = _playbook( "kubectl rollout restart deployment/{target} -n {namespace}", risk_level=PlaybookRiskLevel.MEDIUM, ) playbook.playbook_id = "PB-GENERIC-FALLBACK" playbook.name = "通用兜底規則" playbook.symptom_pattern.alert_names = ["*"] return playbook @pytest.mark.asyncio async def test_build_candidate_from_mcp_evidence_and_approved_playbook() -> None: incident = _incident() service = RepairCandidateService( incident_service=FakeIncidentService(), investigator=FakeInvestigator(_evidence(incident.incident_id)), playbook_repository=FakePlaybookRepository( _playbook("kubectl rollout restart deployment/awoooi-api -n awoooi-prod") ), auto_repair_service=FakeAutoRepairService(), ) result = await service.build_from_incident( incident=incident, alertname="NodeExporterDown", target_resource="awoooi-api", namespace="awoooi-prod", message="node exporter is down", fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", matched_playbook_id="PB-REPAIR-001", severity="medium", ) assert result.candidate_found is True assert result.approval_request is not None assert result.approval_request.action == "kubectl rollout restart deployment/awoooi-api -n awoooi-prod" assert result.approval_request.risk_level == RiskLevel.MEDIUM assert result.approval_request.matched_playbook_id == "PB-REPAIR-001" assert result.metadata["repair_candidate_status"] == "candidate_ready_for_approval" assert result.metadata["mcp_evidence"]["sensors_succeeded"] == 2 assert result.metadata["playbook_trust"]["trust_score"] == 0.72 assert result.metadata["verifier_plan"] @pytest.mark.asyncio async def test_candidate_blocked_when_mcp_evidence_missing() -> None: incident = _incident() service = RepairCandidateService( incident_service=FakeIncidentService(), investigator=FakeInvestigator(_evidence(incident.incident_id, sensors_succeeded=0)), playbook_repository=FakePlaybookRepository( _playbook("kubectl rollout restart deployment/awoooi-api -n awoooi-prod") ), auto_repair_service=FakeAutoRepairService(), ) result = await service.build_from_incident( incident=incident, alertname="NodeExporterDown", target_resource="awoooi-api", namespace="awoooi-prod", message="node exporter is down", fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", matched_playbook_id="PB-REPAIR-001", severity="medium", ) assert result.candidate_found is False assert "mcp_evidence_missing" in result.blockers assert result.metadata["repair_candidate_status"] == "blocked" @pytest.mark.asyncio async def test_candidate_blocked_when_playbook_is_observe_only() -> None: incident = _incident() service = RepairCandidateService( incident_service=FakeIncidentService(), investigator=FakeInvestigator(_evidence(incident.incident_id)), playbook_repository=FakePlaybookRepository( _playbook("kubectl get pods -n awoooi-prod") ), auto_repair_service=FakeAutoRepairService(), ) result = await service.build_from_incident( incident=incident, alertname="NodeExporterDown", target_resource="awoooi-api", namespace="awoooi-prod", message="node exporter is down", fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", matched_playbook_id="PB-REPAIR-001", severity="medium", ) assert result.candidate_found is False assert "playbook_observe_only" in result.blockers @pytest.mark.asyncio async def test_candidate_blocked_when_playbook_is_generic_fallback() -> None: incident = _incident() service = RepairCandidateService( incident_service=FakeIncidentService(), investigator=FakeInvestigator(_evidence(incident.incident_id)), playbook_repository=FakePlaybookRepository(_generic_fallback_playbook()), auto_repair_service=FakeAutoRepairService(), ) result = await service.build_from_incident( incident=incident, alertname="UnknownAlert", target_resource="awoooi-api", namespace="awoooi-prod", message="unknown alert", fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", matched_playbook_id="PB-GENERIC-FALLBACK", severity="medium", ) assert result.candidate_found is False assert "playbook_generic_fallback_not_repair" in result.blockers assert "通用兜底" in result.metadata["repair_candidate_blocker_summary"] assert result.metadata["playbook_draft_required"] is True assert result.metadata["repair_candidate_draft_package"]["schema_version"] == ( "repair_candidate_draft_package_v1" ) assert result.metadata["repair_candidate_draft_package"]["lane"] == ( "create_service_specific_repair_playbook" ) assert "建立專屬 PlayBook 草案" in result.metadata["repair_candidate_next_step"] assert "repair_command" in result.metadata["repair_candidate_draft_package"]["required_fields"] coverage_gap = result.metadata["repair_candidate_draft_package"]["coverage_gap"] assert coverage_gap["schema_version"] == "repair_candidate_coverage_gap_v1" assert coverage_gap["coverage_key"] == "unknownalert:awoooi-api" assert coverage_gap["blocking_stage"] == "service_playbook_coverage" assert coverage_gap["next_owner_lane"] == "create_service_specific_repair_playbook" assert coverage_gap["mcp_evidence_ready"] is True assert coverage_gap["runtime_execution_authorized"] is False assert "recurrence_fingerprint" in coverage_gap["required_mcp_evidence_refs"] assert "repair_steps.command_or_ansible_ref" in coverage_gap["playbook_template_fields"] work_item = result.metadata["repair_candidate_draft_package"]["awooop_work_item"] assert work_item["schema_version"] == "awooop_repair_candidate_draft_work_item_v1" assert work_item["work_item_id"].startswith( "repair-candidate-draft:awoooi:INC-TEST-REPAIR:" ) assert work_item["status"] == "open" assert work_item["needs_human"] is True assert work_item["decision_effect"] == "none" assert work_item["writes_runtime_state"] is False assert work_item["coverage_gap"]["coverage_key"] == "unknownalert:awoooi-api" assert "/awooop/work-items?" in work_item["work_item_href"] assert "https://awoooi.wooo.work/zh-TW/awooop/work-items?" in work_item["work_item_url"] @pytest.mark.asyncio async def test_candidate_blocked_observe_only_prompts_repair_playbook_draft() -> None: incident = _incident() playbook = _playbook( "ssh 192.168.0.188 'uptime; ps aux --sort=-%cpu | head -20; docker stats --no-stream'", risk_level=PlaybookRiskLevel.LOW, ) playbook.repair_steps[0].action_type = ActionType.SSH_COMMAND service = RepairCandidateService( incident_service=FakeIncidentService(), investigator=FakeInvestigator(_evidence(incident.incident_id)), playbook_repository=FakePlaybookRepository(playbook), auto_repair_service=FakeAutoRepairService(), ) service._auto_repair = type( "NoRouteAutoRepairService", (), {"preview_write_ssh_mcp_route": lambda self, incident, command: False}, )() result = await service.build_from_incident( incident=incident, alertname="NodeExporterDown", target_resource="node-exporter-188", namespace="awoooi-prod", message="node exporter is down", fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", matched_playbook_id="PB-REPAIR-001", severity="medium", ) assert result.candidate_found is False assert "playbook_observe_only" in result.blockers assert result.metadata["repair_candidate_draft_package"]["lane"] == ( "promote_diagnostic_to_repair_playbook" ) coverage_gap = result.metadata["repair_candidate_draft_package"]["coverage_gap"] assert coverage_gap["coverage_key"] == "nodeexporterdown:node-exporter-188" assert coverage_gap["target_kind"] == "host_service" assert coverage_gap["blocking_stage"] == "service_playbook_coverage" assert coverage_gap["matched_playbook_id"] == "PB-REPAIR-001" assert "systemd_or_container_status" in coverage_gap["required_mcp_evidence_refs"] assert "診斷命令保留為 MCP evidence collector" in result.metadata["repair_candidate_next_step"] work_item = result.metadata["repair_candidate_draft_package"]["awooop_work_item"] assert work_item["target_resource"] == "node-exporter-188" assert work_item["lane"] == "promote_diagnostic_to_repair_playbook" assert work_item["safety_level"] == "read_only_work_item_projection" assert work_item["coverage_gap"]["next_owner_lane"] == "promote_diagnostic_to_repair_playbook" @pytest.mark.asyncio async def test_missing_mcp_evidence_records_collectable_coverage_gap() -> None: incident = _incident() service = RepairCandidateService( incident_service=FakeIncidentService(), investigator=FakeInvestigator(_evidence(incident.incident_id, sensors_succeeded=0)), playbook_repository=FakePlaybookRepository( _playbook("kubectl rollout restart deployment/awoooi-api -n awoooi-prod") ), auto_repair_service=FakeAutoRepairService(), ) result = await service.build_from_incident( incident=incident, alertname="NodeExporterDown", target_resource="awoooi-api", namespace="awoooi-prod", message="node exporter is down", fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", matched_playbook_id="PB-REPAIR-001", severity="medium", ) coverage_gap = result.metadata["repair_candidate_draft_package"]["coverage_gap"] assert coverage_gap["coverage_key"] == "nodeexporterdown:awoooi-api" assert coverage_gap["blocking_stage"] == "mcp_evidence" assert coverage_gap["mcp_evidence_ready"] is False assert coverage_gap["target_kind"] == "k8s_workload" assert "mcp_health_snapshot" in coverage_gap["required_mcp_evidence_refs"] assert coverage_gap["runtime_execution_authorized"] is False def test_approval_record_data_uses_preallocated_id_without_leaking_metadata() -> None: approval_id = str(uuid4()) request = ApprovalRequestCreate( action="kubectl rollout restart deployment/awoooi-api -n awoooi-prod", description="candidate", risk_level=RiskLevel.MEDIUM, requested_by="repair-candidate-test", blast_radius=BlastRadius(data_impact=DataImpact.WRITE), metadata={"preallocated_approval_id": approval_id, "source": "test"}, ) data = approval_request_to_record_data(request, RiskLevel.MEDIUM, required_sigs=1) assert data["id"] == approval_id assert data["extra_metadata"] == {"source": "test"}