diff --git a/apps/api/src/api/v1/webhooks.py b/apps/api/src/api/v1/webhooks.py index a5ac8bdb..0cd08f8b 100644 --- a/apps/api/src/api/v1/webhooks.py +++ b/apps/api/src/api/v1/webhooks.py @@ -2312,7 +2312,10 @@ async def _process_new_alert_background( primary_responsibility = "OPENCLAW_PLAYBOOK" else: blockers = repair_candidate_result.blockers or ["repair_candidate_missing"] - blocker_text = ", ".join(blockers) + blocker_text = str( + repair_candidate_result.metadata.get("repair_candidate_blocker_summary") + or ", ".join(blockers) + ) fallback_create = ApprovalRequestCreate( action=f"NO_ACTION - REPAIR_CANDIDATE_MISSING: {blocker_text}", description=f"[LLM Failed] {message}\n修復候選阻擋:{blocker_text}", diff --git a/apps/api/src/services/repair_candidate_service.py b/apps/api/src/services/repair_candidate_service.py index 8e70851d..32ddb11e 100644 --- a/apps/api/src/services/repair_candidate_service.py +++ b/apps/api/src/services/repair_candidate_service.py @@ -160,6 +160,8 @@ class RepairCandidateService: blockers.append("playbook_not_approved") if float(playbook.trust_score) < MIN_REPAIR_CANDIDATE_TRUST: blockers.append("playbook_trust_below_gate") + if self._is_generic_fallback_playbook(playbook): + blockers.append("playbook_generic_fallback_not_repair") step, step_blockers = self._select_executable_step(incident, playbook) blockers.extend(step_blockers) @@ -275,6 +277,9 @@ class RepairCandidateService: if step.action_type == ActionType.SSH_COMMAND or command.startswith("ssh "): if self._auto_repair.preview_write_ssh_mcp_route(incident, command): return step, [] + if self._looks_like_diagnostic_command(command): + rejected_readonly = True + continue rejected_unsafe = True if rejected_unsafe: @@ -314,6 +319,9 @@ class RepairCandidateService: ) -> RepairCandidateResult: metadata["repair_candidate_status"] = "blocked" metadata["repair_candidate_blockers"] = list(dict.fromkeys(blockers)) + metadata["repair_candidate_blocker_summary"] = self._humanize_blockers( + metadata["repair_candidate_blockers"] + ) metadata["fallback_action"] = fallback_action return RepairCandidateResult( evidence=evidence, @@ -322,6 +330,43 @@ class RepairCandidateService: metadata=metadata, ) + def _is_generic_fallback_playbook(self, playbook: Playbook) -> bool: + alert_names = playbook.symptom_pattern.alert_names or [] + if "*" in alert_names: + return True + return "通用兜底" in playbook.name or "generic fallback" in playbook.name.lower() + + def _looks_like_diagnostic_command(self, command: str) -> bool: + command = command.lower() + diagnostic_markers = ( + "uptime", + "ps aux", + "docker stats", + "journalctl", + "systemctl status", + "kubectl get", + "kubectl describe", + "tail ", + "grep ", + ) + return any(marker in command for marker in diagnostic_markers) + + def _humanize_blockers(self, blockers: list[str]) -> str: + labels = { + "incident_not_found": "找不到 incident,無法綁定真相鏈", + "mcp_evidence_missing": "MCP 證據未完成", + "playbook_not_matched": "沒有命中專屬 PlayBook", + "playbook_not_found": "命中的 PlayBook 不存在", + "playbook_not_approved": "PlayBook 尚未批准", + "playbook_trust_below_gate": "PlayBook trust score 低於候選門檻", + "playbook_generic_fallback_not_repair": "只命中通用兜底 PlayBook,禁止當成修復命令", + "playbook_has_no_repair_steps": "PlayBook 沒有修復步驟", + "playbook_command_not_safely_routable": "PlayBook 命令未通過安全路由", + "playbook_observe_only": "PlayBook 只有觀察或診斷步驟", + "playbook_has_no_executable_step": "PlayBook 沒有可執行修復步驟", + } + return ";".join(labels.get(blocker, blocker) for blocker in blockers) + def _build_description( self, *, diff --git a/apps/api/tests/test_repair_candidate_service.py b/apps/api/tests/test_repair_candidate_service.py index 4b42aee8..f160e3e8 100644 --- a/apps/api/tests/test_repair_candidate_service.py +++ b/apps/api/tests/test_repair_candidate_service.py @@ -96,6 +96,17 @@ def _playbook(command: str, *, risk_level: PlaybookRiskLevel = PlaybookRiskLevel ) +def _generic_fallback_playbook() -> Playbook: + playbook = _playbook( + "kubectl rollout restart deployment/{target} -n {namespace}", + risk_level=PlaybookRiskLevel.MEDIUM, + ) + playbook.playbook_id = "PB-GENERIC-FALLBACK" + playbook.name = "通用兜底規則" + playbook.symptom_pattern.alert_names = ["*"] + return playbook + + @pytest.mark.asyncio async def test_build_candidate_from_mcp_evidence_and_approved_playbook() -> None: incident = _incident() @@ -185,6 +196,32 @@ async def test_candidate_blocked_when_playbook_is_observe_only() -> None: assert "playbook_observe_only" in result.blockers +@pytest.mark.asyncio +async def test_candidate_blocked_when_playbook_is_generic_fallback() -> None: + incident = _incident() + service = RepairCandidateService( + incident_service=FakeIncidentService(), + investigator=FakeInvestigator(_evidence(incident.incident_id)), + playbook_repository=FakePlaybookRepository(_generic_fallback_playbook()), + auto_repair_service=FakeAutoRepairService(), + ) + + result = await service.build_from_incident( + incident=incident, + alertname="UnknownAlert", + target_resource="awoooi-api", + namespace="awoooi-prod", + message="unknown alert", + fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING", + matched_playbook_id="PB-GENERIC-FALLBACK", + severity="medium", + ) + + assert result.candidate_found is False + assert "playbook_generic_fallback_not_repair" in result.blockers + assert "通用兜底" in result.metadata["repair_candidate_blocker_summary"] + + def test_approval_record_data_uses_preallocated_id_without_leaking_metadata() -> None: approval_id = str(uuid4()) request = ApprovalRequestCreate(