fix(api): 說明修復候選阻擋原因

2026-06-11 16:00:45 +08:00
parent e5b11761ff
commit 47d677ac4a
3 changed files with 86 additions and 1 deletions
--- a/apps/api/src/api/v1/webhooks.py
+++ b/apps/api/src/api/v1/webhooks.py
@@ -2312,7 +2312,10 @@ async def _process_new_alert_background(
                primary_responsibility = "OPENCLAW_PLAYBOOK"
            else:
                blockers = repair_candidate_result.blockers or ["repair_candidate_missing"]
-                blocker_text = ", ".join(blockers)
+                blocker_text = str(
+                    repair_candidate_result.metadata.get("repair_candidate_blocker_summary")
+                    or ", ".join(blockers)
+                )
                fallback_create = ApprovalRequestCreate(
                    action=f"NO_ACTION - REPAIR_CANDIDATE_MISSING: {blocker_text}",
                    description=f"[LLM Failed] {message}\n修復候選阻擋：{blocker_text}",
--- a/apps/api/src/services/repair_candidate_service.py
+++ b/apps/api/src/services/repair_candidate_service.py
@@ -160,6 +160,8 @@ class RepairCandidateService:
            blockers.append("playbook_not_approved")
        if float(playbook.trust_score) < MIN_REPAIR_CANDIDATE_TRUST:
            blockers.append("playbook_trust_below_gate")
+        if self._is_generic_fallback_playbook(playbook):
+            blockers.append("playbook_generic_fallback_not_repair")

        step, step_blockers = self._select_executable_step(incident, playbook)
        blockers.extend(step_blockers)
@@ -275,6 +277,9 @@ class RepairCandidateService:
            if step.action_type == ActionType.SSH_COMMAND or command.startswith("ssh "):
                if self._auto_repair.preview_write_ssh_mcp_route(incident, command):
                    return step, []
+                if self._looks_like_diagnostic_command(command):
+                    rejected_readonly = True
+                    continue
                rejected_unsafe = True

        if rejected_unsafe:
@@ -314,6 +319,9 @@ class RepairCandidateService:
    ) -> RepairCandidateResult:
        metadata["repair_candidate_status"] = "blocked"
        metadata["repair_candidate_blockers"] = list(dict.fromkeys(blockers))
+        metadata["repair_candidate_blocker_summary"] = self._humanize_blockers(
+            metadata["repair_candidate_blockers"]
+        )
        metadata["fallback_action"] = fallback_action
        return RepairCandidateResult(
            evidence=evidence,
@@ -322,6 +330,43 @@ class RepairCandidateService:
            metadata=metadata,
        )

+    def _is_generic_fallback_playbook(self, playbook: Playbook) -> bool:
+        alert_names = playbook.symptom_pattern.alert_names or []
+        if "*" in alert_names:
+            return True
+        return "通用兜底" in playbook.name or "generic fallback" in playbook.name.lower()
+
+    def _looks_like_diagnostic_command(self, command: str) -> bool:
+        command = command.lower()
+        diagnostic_markers = (
+            "uptime",
+            "ps aux",
+            "docker stats",
+            "journalctl",
+            "systemctl status",
+            "kubectl get",
+            "kubectl describe",
+            "tail ",
+            "grep ",
+        )
+        return any(marker in command for marker in diagnostic_markers)
+
+    def _humanize_blockers(self, blockers: list[str]) -> str:
+        labels = {
+            "incident_not_found": "找不到 incident，無法綁定真相鏈",
+            "mcp_evidence_missing": "MCP 證據未完成",
+            "playbook_not_matched": "沒有命中專屬 PlayBook",
+            "playbook_not_found": "命中的 PlayBook 不存在",
+            "playbook_not_approved": "PlayBook 尚未批准",
+            "playbook_trust_below_gate": "PlayBook trust score 低於候選門檻",
+            "playbook_generic_fallback_not_repair": "只命中通用兜底 PlayBook，禁止當成修復命令",
+            "playbook_has_no_repair_steps": "PlayBook 沒有修復步驟",
+            "playbook_command_not_safely_routable": "PlayBook 命令未通過安全路由",
+            "playbook_observe_only": "PlayBook 只有觀察或診斷步驟",
+            "playbook_has_no_executable_step": "PlayBook 沒有可執行修復步驟",
+        }
+        return "；".join(labels.get(blocker, blocker) for blocker in blockers)
+
    def _build_description(
        self,
        *,
--- a/apps/api/tests/test_repair_candidate_service.py
+++ b/apps/api/tests/test_repair_candidate_service.py
@@ -96,6 +96,17 @@ def _playbook(command: str, *, risk_level: PlaybookRiskLevel = PlaybookRiskLevel
    )


+def _generic_fallback_playbook() -> Playbook:
+    playbook = _playbook(
+        "kubectl rollout restart deployment/{target} -n {namespace}",
+        risk_level=PlaybookRiskLevel.MEDIUM,
+    )
+    playbook.playbook_id = "PB-GENERIC-FALLBACK"
+    playbook.name = "通用兜底規則"
+    playbook.symptom_pattern.alert_names = ["*"]
+    return playbook
+
+
@pytest.mark.asyncio
 async def test_build_candidate_from_mcp_evidence_and_approved_playbook() -> None:
    incident = _incident()
@@ -185,6 +196,32 @@ async def test_candidate_blocked_when_playbook_is_observe_only() -> None:
    assert "playbook_observe_only" in result.blockers


+@pytest.mark.asyncio
+async def test_candidate_blocked_when_playbook_is_generic_fallback() -> None:
+    incident = _incident()
+    service = RepairCandidateService(
+        incident_service=FakeIncidentService(),
+        investigator=FakeInvestigator(_evidence(incident.incident_id)),
+        playbook_repository=FakePlaybookRepository(_generic_fallback_playbook()),
+        auto_repair_service=FakeAutoRepairService(),
+    )
+
+    result = await service.build_from_incident(
+        incident=incident,
+        alertname="UnknownAlert",
+        target_resource="awoooi-api",
+        namespace="awoooi-prod",
+        message="unknown alert",
+        fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING",
+        matched_playbook_id="PB-GENERIC-FALLBACK",
+        severity="medium",
+    )
+
+    assert result.candidate_found is False
+    assert "playbook_generic_fallback_not_repair" in result.blockers
+    assert "通用兜底" in result.metadata["repair_candidate_blocker_summary"]
+
+
 def test_approval_record_data_uses_preallocated_id_without_leaking_metadata() -> None:
    approval_id = str(uuid4())
    request = ApprovalRequestCreate(