fix(api): 說明修復候選阻擋原因
All checks were successful
CD Pipeline / tests (push) Successful in 1m29s
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / build-and-deploy (push) Successful in 4m23s
CD Pipeline / post-deploy-checks (push) Successful in 1m39s

This commit is contained in:
Your Name
2026-06-11 16:00:45 +08:00
parent e5b11761ff
commit 47d677ac4a
3 changed files with 86 additions and 1 deletions

View File

@@ -2312,7 +2312,10 @@ async def _process_new_alert_background(
primary_responsibility = "OPENCLAW_PLAYBOOK"
else:
blockers = repair_candidate_result.blockers or ["repair_candidate_missing"]
blocker_text = ", ".join(blockers)
blocker_text = str(
repair_candidate_result.metadata.get("repair_candidate_blocker_summary")
or ", ".join(blockers)
)
fallback_create = ApprovalRequestCreate(
action=f"NO_ACTION - REPAIR_CANDIDATE_MISSING: {blocker_text}",
description=f"[LLM Failed] {message}\n修復候選阻擋:{blocker_text}",

View File

@@ -160,6 +160,8 @@ class RepairCandidateService:
blockers.append("playbook_not_approved")
if float(playbook.trust_score) < MIN_REPAIR_CANDIDATE_TRUST:
blockers.append("playbook_trust_below_gate")
if self._is_generic_fallback_playbook(playbook):
blockers.append("playbook_generic_fallback_not_repair")
step, step_blockers = self._select_executable_step(incident, playbook)
blockers.extend(step_blockers)
@@ -275,6 +277,9 @@ class RepairCandidateService:
if step.action_type == ActionType.SSH_COMMAND or command.startswith("ssh "):
if self._auto_repair.preview_write_ssh_mcp_route(incident, command):
return step, []
if self._looks_like_diagnostic_command(command):
rejected_readonly = True
continue
rejected_unsafe = True
if rejected_unsafe:
@@ -314,6 +319,9 @@ class RepairCandidateService:
) -> RepairCandidateResult:
metadata["repair_candidate_status"] = "blocked"
metadata["repair_candidate_blockers"] = list(dict.fromkeys(blockers))
metadata["repair_candidate_blocker_summary"] = self._humanize_blockers(
metadata["repair_candidate_blockers"]
)
metadata["fallback_action"] = fallback_action
return RepairCandidateResult(
evidence=evidence,
@@ -322,6 +330,43 @@ class RepairCandidateService:
metadata=metadata,
)
def _is_generic_fallback_playbook(self, playbook: Playbook) -> bool:
alert_names = playbook.symptom_pattern.alert_names or []
if "*" in alert_names:
return True
return "通用兜底" in playbook.name or "generic fallback" in playbook.name.lower()
def _looks_like_diagnostic_command(self, command: str) -> bool:
command = command.lower()
diagnostic_markers = (
"uptime",
"ps aux",
"docker stats",
"journalctl",
"systemctl status",
"kubectl get",
"kubectl describe",
"tail ",
"grep ",
)
return any(marker in command for marker in diagnostic_markers)
def _humanize_blockers(self, blockers: list[str]) -> str:
labels = {
"incident_not_found": "找不到 incident無法綁定真相鏈",
"mcp_evidence_missing": "MCP 證據未完成",
"playbook_not_matched": "沒有命中專屬 PlayBook",
"playbook_not_found": "命中的 PlayBook 不存在",
"playbook_not_approved": "PlayBook 尚未批准",
"playbook_trust_below_gate": "PlayBook trust score 低於候選門檻",
"playbook_generic_fallback_not_repair": "只命中通用兜底 PlayBook禁止當成修復命令",
"playbook_has_no_repair_steps": "PlayBook 沒有修復步驟",
"playbook_command_not_safely_routable": "PlayBook 命令未通過安全路由",
"playbook_observe_only": "PlayBook 只有觀察或診斷步驟",
"playbook_has_no_executable_step": "PlayBook 沒有可執行修復步驟",
}
return "".join(labels.get(blocker, blocker) for blocker in blockers)
def _build_description(
self,
*,

View File

@@ -96,6 +96,17 @@ def _playbook(command: str, *, risk_level: PlaybookRiskLevel = PlaybookRiskLevel
)
def _generic_fallback_playbook() -> Playbook:
playbook = _playbook(
"kubectl rollout restart deployment/{target} -n {namespace}",
risk_level=PlaybookRiskLevel.MEDIUM,
)
playbook.playbook_id = "PB-GENERIC-FALLBACK"
playbook.name = "通用兜底規則"
playbook.symptom_pattern.alert_names = ["*"]
return playbook
@pytest.mark.asyncio
async def test_build_candidate_from_mcp_evidence_and_approved_playbook() -> None:
incident = _incident()
@@ -185,6 +196,32 @@ async def test_candidate_blocked_when_playbook_is_observe_only() -> None:
assert "playbook_observe_only" in result.blockers
@pytest.mark.asyncio
async def test_candidate_blocked_when_playbook_is_generic_fallback() -> None:
incident = _incident()
service = RepairCandidateService(
incident_service=FakeIncidentService(),
investigator=FakeInvestigator(_evidence(incident.incident_id)),
playbook_repository=FakePlaybookRepository(_generic_fallback_playbook()),
auto_repair_service=FakeAutoRepairService(),
)
result = await service.build_from_incident(
incident=incident,
alertname="UnknownAlert",
target_resource="awoooi-api",
namespace="awoooi-prod",
message="unknown alert",
fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING",
matched_playbook_id="PB-GENERIC-FALLBACK",
severity="medium",
)
assert result.candidate_found is False
assert "playbook_generic_fallback_not_repair" in result.blockers
assert "通用兜底" in result.metadata["repair_candidate_blocker_summary"]
def test_approval_record_data_uses_preallocated_id_without_leaking_metadata() -> None:
approval_id = str(uuid4())
request = ApprovalRequestCreate(