feat(adr100): request gate5 replay approval
Some checks failed
CD Pipeline / tests (push) Successful in 1m32s
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-02 10:43:09 +08:00
parent 98c01cdaff
commit f519c8e1ab
4 changed files with 261 additions and 19 deletions

View File

@@ -45,6 +45,8 @@ RemediationMode = Literal["auto", "reverify", "replay", "ticket", "approval"]
_READY_STATUSES = {"ready_for_replay", "ready_for_reverify"}
_TICKET_STATUSES = {"needs_playbook_ticket"}
_TICKET_ACTIONS = {"create_playbook_ticket", "promote_diagnostic_to_repair_playbook"}
_RUNTIME_REPLAY_STATUSES = {"ready_for_replay"}
_RUNTIME_REPLAY_ACTIONS = {"replay_with_supported_executor"}
class RemediationNotFoundError(LookupError):
@@ -134,13 +136,13 @@ class Adr100RemediationService:
work_item_id: str,
mode: RemediationMode = "approval",
) -> dict[str, Any]:
"""Create a record-only approval for PlayBook authoring remediation."""
"""Create a record-only approval for PlayBook authoring or runtime replay."""
item = await self._find_work_item(work_item_id)
selected_mode = _select_mode(item, mode)
checks = _base_checks(item)
checks.append({
"name": "playbook_authoring_ticket_required",
"name": "approval_request_supported",
"passed": selected_mode in {"ticket", "approval"},
"detail": str(item.get("remediation_status") or "unknown"),
})
@@ -157,14 +159,43 @@ class Adr100RemediationService:
payload["history"] = await self._record_dry_run_history(item, payload)
return payload
approval_request = _approval_request_for_item(item, incident, checks)
replay_gate: dict[str, Any] | None = None
if selected_mode == "approval":
replay_gate = await self._build_replay_gate(item, incident)
checks.append({
"name": "runtime_replay_gate_ready",
"passed": replay_gate.get("status") == "runtime_replay_ready",
"detail": str(replay_gate.get("status") or "unknown"),
})
if replay_gate.get("status") != "runtime_replay_ready":
payload = _approval_blocked_payload(
item,
selected_mode,
checks,
extra={
"replay_gate": replay_gate,
"verification_result_preview": "runtime_replay_gate_blocked",
},
)
payload["history"] = await self._record_dry_run_history(item, payload)
return payload
approval_request = _runtime_replay_approval_request_for_item(
item,
incident,
checks,
replay_gate,
)
else:
approval_request = _approval_request_for_item(item, incident, checks)
approval_svc = self._approval_service
if approval_svc is None:
from src.services.approval_db import get_approval_service
approval_svc = get_approval_service()
fingerprint = _approval_fingerprint(item)
approval_kind = str((approval_request.metadata or {}).get("approval_kind") or "")
fingerprint = _approval_fingerprint(item, approval_kind=approval_kind)
approval = None
if hasattr(approval_svc, "find_by_fingerprint"):
try:
@@ -597,6 +628,8 @@ class Adr100RemediationService:
"timeline_event_id": None,
}
context = _approval_history_context(item, payload)
approval_kind = str(payload.get("approval_kind") or "")
is_runtime_replay = approval_kind == "adr100_runtime_replay_gate5"
try:
repo = self._alert_operation_log_repository
@@ -612,7 +645,11 @@ class Adr100RemediationService:
approval_id=approval_id or None,
auto_repair_id=str(item.get("auto_repair_id") or "") or None,
actor="adr100_remediation_service",
action_detail="adr100_playbook_authoring_approval_requested",
action_detail=(
"adr100_runtime_replay_gate5_approval_requested"
if is_runtime_replay
else "adr100_playbook_authoring_approval_requested"
),
success=True,
context=context,
)
@@ -635,7 +672,11 @@ class Adr100RemediationService:
event = await timeline.add_event(
event_type="human",
status="warning",
title="ADR-100 PlayBook authoring approval requested",
title=(
"ADR-100 runtime replay Gate 5 approval requested"
if is_runtime_replay
else "ADR-100 PlayBook authoring approval requested"
),
description=_approval_history_description(context),
actor="adr100_remediation_service",
actor_role="approval",
@@ -658,9 +699,21 @@ class Adr100RemediationService:
return history
def _select_mode(item: dict[str, Any], requested: RemediationMode) -> Literal["reverify", "replay", "ticket"]:
def _select_mode(
item: dict[str, Any],
requested: RemediationMode,
) -> Literal["reverify", "replay", "ticket", "approval"]:
if requested == "approval":
return "ticket"
if item.get("remediation_status") in _TICKET_STATUSES:
return "ticket"
if item.get("remediation_action") in _TICKET_ACTIONS:
return "ticket"
if (
item.get("remediation_status") in _RUNTIME_REPLAY_STATUSES
or item.get("remediation_action") in _RUNTIME_REPLAY_ACTIONS
):
return "approval"
return "replay"
if requested in ("reverify", "replay"):
return requested
if requested == "ticket":
@@ -719,6 +772,17 @@ def _plan_for_item(item: dict[str, Any], mode: str) -> dict[str, Any]:
"target_action": item.get("remediation_action"),
}
if mode == "approval":
if (
item.get("remediation_status") in _RUNTIME_REPLAY_STATUSES
or item.get("remediation_action") in _RUNTIME_REPLAY_ACTIONS
):
return {
"step": "request_runtime_replay_gate5_approval",
"agent_id": "auto_repair_executor",
"required_scope": "record_only_until_approved",
"writes": ["approval_records", "alert_operation_log", "timeline"],
"target_action": item.get("remediation_action"),
}
return {
"step": "request_playbook_authoring_approval",
"agent_id": "openclaw_playbook_planner",
@@ -763,6 +827,7 @@ def _approval_blocked_payload(
item: dict[str, Any],
mode: str,
checks: list[dict[str, Any]],
extra: dict[str, Any] | None = None,
) -> dict[str, Any]:
return {
"schema_version": "adr100_remediation_approval_v1",
@@ -784,6 +849,7 @@ def _approval_blocked_payload(
"approval": None,
"approval_id": None,
"plan": _plan_for_item(item, "approval"),
**(extra or {}),
}
@@ -882,12 +948,99 @@ def _approval_request_for_item(
)
def _approval_fingerprint(item: dict[str, Any]) -> str:
def _runtime_replay_approval_request_for_item(
item: dict[str, Any],
incident: Incident,
checks: list[dict[str, Any]],
replay_gate: dict[str, Any],
) -> ApprovalRequestCreate:
services = [svc for svc in (incident.affected_services or []) if svc]
if not services:
services = [str(item.get("alertname") or "unknown_alert")]
playbook_id = str(item.get("playbook_id") or "unknown_playbook")
work_item_id = str(item.get("work_item_id") or "")
write_routes = [
step.get("write_route")
for step in replay_gate.get("steps") or []
if isinstance(step, dict) and step.get("write_route")
]
route_names = [
str(route.get("tool_name") or "unknown_write_route")
for route in write_routes
if isinstance(route, dict)
]
action = (
"RUNTIME_REPLAY_GATE5: "
f"ADR-100 replay {playbook_id} via {', '.join(route_names) or 'mcp_write'}"
)
description = (
f"Incident: {item.get('incident_id') or incident.incident_id}\n"
f"Work item: {work_item_id or '-'}\n"
f"PlayBook: {playbook_id}\n"
f"Replay gate: {replay_gate.get('status')}\n"
f"Write routes: {', '.join(route_names) or '-'}\n\n"
"Approval scope: Gate 5 authorization for a controlled runtime replay. "
"Creating this approval does not execute repair, does not restart a "
"container, does not update incident state, and does not write an "
"auto_repair_executions result. Execution must happen only after the "
"approval status reaches approved and the executor re-validates the gate."
)
return ApprovalRequestCreate(
action=action,
description=description[:4000],
risk_level=RiskLevel.MEDIUM,
blast_radius=BlastRadius(
affected_pods=max(1, int(replay_gate.get("supported_write_route_count") or 1)),
estimated_downtime="<1m",
related_services=services[:6],
data_impact=DataImpact.WRITE,
),
dry_run_checks=[
DryRunCheck(
name=str(check.get("name") or "check"),
passed=bool(check.get("passed")),
message=str(check.get("detail") or ""),
)
for check in checks
],
requested_by="adr100_remediation_service",
expires_at=datetime.now(timezone.utc) + timedelta(hours=2),
metadata={
"schema_version": "adr100_runtime_replay_gate5_approval_v1",
"approval_kind": "adr100_runtime_replay_gate5",
"execution_kind": "runtime_replay_gate5_pending",
"execution_authorized": False,
"repair_attempted": False,
"repair_executed": False,
"work_item_id": work_item_id,
"auto_repair_id": item.get("auto_repair_id"),
"source": "adr100.verification_coverage.remediation_queue",
"target_action": item.get("remediation_action"),
"required_scope": "write_after_approval",
"next_step": "approve_then_dispatch_auto_repair_executor",
"playbook_id": playbook_id,
"flywheel_node": "approval",
"agent_id": "auto_repair_executor",
"mcp_gate": "gate5_required",
"replay_gate": replay_gate,
"write_routes": write_routes,
},
incident_id=str(item.get("incident_id") or incident.incident_id),
matched_playbook_id=playbook_id if playbook_id != "unknown_playbook" else None,
)
def _approval_fingerprint(
item: dict[str, Any],
*,
approval_kind: str = "adr100_playbook_authoring",
) -> str:
work_item_id = str(item.get("work_item_id") or "")
playbook_id = str(item.get("playbook_id") or "")
incident_id = str(item.get("incident_id") or "")
basis = work_item_id or f"{incident_id}:{playbook_id}:{item.get('remediation_action') or ''}"
return hashlib.sha256(f"adr100_playbook_authoring:{basis}".encode("utf-8")).hexdigest()
kind = approval_kind or "adr100_playbook_authoring"
return hashlib.sha256(f"{kind}:{basis}".encode("utf-8")).hexdigest()
def _approval_result_payload(
@@ -900,10 +1053,12 @@ def _approval_result_payload(
approval_created: bool,
fingerprint: str,
) -> dict[str, Any]:
ticket_preview = (request.metadata or {}).get("ticket_preview") or _ticket_preview_for_item(
item,
incident,
)
metadata = request.metadata or {}
approval_kind = str(metadata.get("approval_kind") or "adr100_playbook_authoring")
ticket_preview = metadata.get("ticket_preview")
if ticket_preview is None and approval_kind == "adr100_playbook_authoring":
ticket_preview = _ticket_preview_for_item(item, incident)
replay_gate = metadata.get("replay_gate")
approval_id = str(getattr(approval, "id", "") or "")
approval_status = getattr(getattr(approval, "status", None), "value", None) or getattr(
approval,
@@ -931,8 +1086,13 @@ def _approval_result_payload(
"creates_external_ticket": False,
"deduplicated": not approval_created,
"fingerprint": fingerprint,
"approval_kind": approval_kind,
"checks": checks,
"verification_result_preview": "approval_requested",
"verification_result_preview": (
"runtime_replay_approval_requested"
if approval_kind == "adr100_runtime_replay_gate5"
else "approval_requested"
),
"approval_id": approval_id or None,
"approval": {
"id": approval_id or None,
@@ -945,6 +1105,7 @@ def _approval_result_payload(
"matched_playbook_id": getattr(approval, "matched_playbook_id", None),
},
"ticket_preview": ticket_preview,
"replay_gate": replay_gate,
"plan": _plan_for_item(item, "approval"),
}
@@ -1043,7 +1204,9 @@ def _approval_history_context(item: dict[str, Any], payload: dict[str, Any]) ->
"creates_external_ticket": payload.get("creates_external_ticket"),
"deduplicated": payload.get("deduplicated"),
"fingerprint": payload.get("fingerprint"),
"approval_kind": payload.get("approval_kind"),
"ticket_preview": payload.get("ticket_preview"),
"replay_gate": payload.get("replay_gate"),
"approval": payload.get("approval"),
"approval_id": payload.get("approval_id"),
"plan": payload.get("plan"),
@@ -1077,6 +1240,7 @@ def _history_description(context: dict[str, Any]) -> str:
def _approval_history_description(context: dict[str, Any]) -> str:
approval = context.get("approval") or {}
return (
f"kind={context.get('approval_kind') or 'unknown'} "
f"approval={approval.get('id') or context.get('approval_id') or 'unknown'} "
f"status={approval.get('status') or 'unknown'} "
f"preview={context.get('verification_result_preview')} "
@@ -1125,6 +1289,7 @@ def _history_item(record: Any, context: dict[str, Any]) -> dict[str, Any]:
"writes_ticket": context.get("writes_ticket"),
"writes_approval_record": context.get("writes_approval_record"),
"creates_external_ticket": context.get("creates_external_ticket"),
"approval_kind": context.get("approval_kind"),
"approval_id": context.get("approval_id") or approval.get("id"),
"approval_status": approval.get("status"),
"approval_risk_level": approval.get("risk_level"),