From 16775bb4fa7938b6811a2e8ceae9c79c4cbec9d2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 1 Jun 2026 20:48:24 +0800 Subject: [PATCH] feat(adr100): bridge playbook authoring approvals --- apps/api/src/api/v1/ai_slo.py | 22 + .../services/adr100_remediation_service.py | 380 +++++++++++++++++- apps/api/src/services/approval_execution.py | 73 ++++ .../tests/test_adr100_remediation_service.py | 136 +++++++ .../test_approval_execution_no_action.py | 71 ++++ apps/web/messages/en.json | 6 +- apps/web/messages/zh-TW.json | 6 +- .../app/[locale]/awooop/work-items/page.tsx | 55 ++- docs/LOGBOOK.md | 42 ++ 9 files changed, 784 insertions(+), 7 deletions(-) diff --git a/apps/api/src/api/v1/ai_slo.py b/apps/api/src/api/v1/ai_slo.py index 7e8bc59b..6241a44e 100644 --- a/apps/api/src/api/v1/ai_slo.py +++ b/apps/api/src/api/v1/ai_slo.py @@ -48,6 +48,13 @@ class RemediationDryRunRequest(BaseModel): mode: RemediationMode = "auto" +class RemediationApprovalRequest(BaseModel): + """ADR-100 record-only PlayBook authoring approval request.""" + + work_item_id: str = Field(min_length=1) + mode: RemediationMode = "approval" + + @router.get("/ai/slo") async def get_ai_slo( force_refresh: bool = Query(False, description="忽略快取,強制重算"), @@ -120,6 +127,21 @@ async def dry_run_ai_slo_remediation(request: RemediationDryRunRequest) -> dict: raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc +@router.post("/ai/slo/remediation/approval-request") +async def create_ai_slo_remediation_approval_request( + request: RemediationApprovalRequest, +) -> dict: + """Create a record-only approval request for ADR-100 PlayBook authoring.""" + + try: + return await get_adr100_remediation_service().create_approval_request( + request.work_item_id, + request.mode, + ) + except RemediationNotFoundError as exc: + raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc + + @router.get("/ai/slo/remediation/history") async def list_ai_slo_remediation_history( limit: int = Query(50, ge=1, le=200), diff --git a/apps/api/src/services/adr100_remediation_service.py b/apps/api/src/services/adr100_remediation_service.py index f9ff5149..39c0a213 100644 --- a/apps/api/src/services/adr100_remediation_service.py +++ b/apps/api/src/services/adr100_remediation_service.py @@ -11,10 +11,18 @@ T25: remediation queue items are now actionable without mutating incident state: from __future__ import annotations import asyncio +from datetime import datetime, timedelta, timezone from typing import Any, Literal, Protocol import structlog +from src.models.approval import ( + ApprovalRequestCreate, + BlastRadius, + DataImpact, + DryRunCheck, + RiskLevel, +) from src.models.incident import Incident from src.repositories.incident_repository import IncidentDBRepository from src.services.adr100_slo_status_service import ( @@ -31,7 +39,7 @@ from src.services.post_execution_verifier import ( logger = structlog.get_logger(__name__) -RemediationMode = Literal["auto", "reverify", "replay", "ticket"] +RemediationMode = Literal["auto", "reverify", "replay", "ticket", "approval"] _READY_STATUSES = {"ready_for_replay", "ready_for_reverify"} _TICKET_STATUSES = {"needs_playbook_ticket"} @@ -57,6 +65,7 @@ class Adr100RemediationService: incident_repository: _IncidentRepository | None = None, auto_repair_service: AutoRepairService | None = None, verifier: PostExecutionVerifier | None = None, + approval_service: Any | None = None, timeline_service: Any | None = None, alert_operation_log_repository: Any | None = None, record_history: bool = True, @@ -65,6 +74,7 @@ class Adr100RemediationService: self._incident_repository = incident_repository or IncidentDBRepository() self._auto_repair_service = auto_repair_service or AutoRepairService() self._verifier = verifier or get_post_execution_verifier() + self._approval_service = approval_service self._timeline_service = timeline_service self._alert_operation_log_repository = alert_operation_log_repository self._record_history_enabled = record_history @@ -116,6 +126,74 @@ class Adr100RemediationService: return await self._dry_run_replay(item, incident, checks) return await self._dry_run_reverify(item, incident, checks) + async def create_approval_request( + self, + work_item_id: str, + mode: RemediationMode = "approval", + ) -> dict[str, Any]: + """Create a record-only approval for PlayBook authoring remediation.""" + + item = await self._find_work_item(work_item_id) + selected_mode = _select_mode(item, mode) + checks = _base_checks(item) + checks.append({ + "name": "playbook_authoring_ticket_required", + "passed": selected_mode in {"ticket", "approval"}, + "detail": str(item.get("remediation_status") or "unknown"), + }) + + incident = await self._load_incident(item) + checks.append({ + "name": "incident_loaded", + "passed": incident is not None, + "detail": item.get("incident_id") or "missing incident_id", + }) + + if incident is None or not all(check["passed"] for check in checks): + payload = _approval_blocked_payload(item, selected_mode, checks) + payload["history"] = await self._record_dry_run_history(item, payload) + return payload + + approval_request = _approval_request_for_item(item, incident, checks) + approval_svc = self._approval_service + if approval_svc is None: + from src.services.approval_db import get_approval_service + + approval_svc = get_approval_service() + + fingerprint = _approval_fingerprint(item) + approval = None + if hasattr(approval_svc, "find_by_fingerprint"): + try: + approval = await approval_svc.find_by_fingerprint(fingerprint) + except Exception as exc: + logger.warning( + "adr100_remediation_approval_dedupe_lookup_failed", + fingerprint=fingerprint, + error=str(exc), + ) + + approval_created = approval is None + if approval is None and hasattr(approval_svc, "create_approval_with_fingerprint"): + approval = await approval_svc.create_approval_with_fingerprint( + approval_request, + fingerprint=fingerprint, + ) + elif approval is None: + approval = await approval_svc.create_approval(approval_request) + + payload = _approval_result_payload( + item=item, + incident=incident, + checks=checks, + approval=approval, + request=approval_request, + approval_created=approval_created, + fingerprint=fingerprint, + ) + payload["history"] = await self._record_approval_history(item, payload) + return payload + async def history( self, *, @@ -136,7 +214,7 @@ class Adr100RemediationService: repo = get_alert_operation_log_repository() - for event_type in ("PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED"): + for event_type in ("PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED", "APPROVAL_ESCALATED"): try: batch, _total = await repo.list_recent( limit=fetch_limit, @@ -157,7 +235,10 @@ class Adr100RemediationService: items: list[dict[str, Any]] = [] for row in rows: context = getattr(row, "context", None) or {} - if context.get("schema_version") != "adr100_remediation_dry_run_history_v1": + if context.get("schema_version") not in { + "adr100_remediation_dry_run_history_v1", + "adr100_remediation_approval_history_v1", + }: continue if work_item_id and context.get("work_item_id") != work_item_id: continue @@ -383,8 +464,86 @@ class Adr100RemediationService: ) return history + async def _record_approval_history( + self, + item: dict[str, Any], + payload: dict[str, Any], + ) -> dict[str, Any]: + if not self._record_history_enabled: + return {"recorded": False, "reason": "disabled"} + + incident_id = str(item.get("incident_id") or "") + approval_id = str(payload.get("approval_id") or "") + history: dict[str, Any] = { + "recorded": False, + "alert_operation_id": None, + "timeline_event_id": None, + } + context = _approval_history_context(item, payload) + + try: + repo = self._alert_operation_log_repository + if repo is None: + from src.repositories.alert_operation_log_repository import ( + get_alert_operation_log_repository, + ) + + repo = get_alert_operation_log_repository() + record = await repo.append( + "APPROVAL_ESCALATED", + incident_id=incident_id or None, + approval_id=approval_id or None, + auto_repair_id=str(item.get("auto_repair_id") or "") or None, + actor="adr100_remediation_service", + action_detail="adr100_playbook_authoring_approval_requested", + success=True, + context=context, + ) + if record is not None: + history["alert_operation_id"] = getattr(record, "id", None) + except Exception as exc: + logger.warning( + "adr100_remediation_approval_history_failed", + incident_id=incident_id, + approval_id=approval_id, + error=str(exc), + ) + + try: + timeline = self._timeline_service + if timeline is None: + from src.services.approval_db import get_timeline_service + + timeline = get_timeline_service() + event = await timeline.add_event( + event_type="human", + status="warning", + title="ADR-100 PlayBook authoring approval requested", + description=_approval_history_description(context), + actor="adr100_remediation_service", + actor_role="approval", + approval_id=approval_id or None, + incident_id=incident_id or None, + ) + if event: + history["timeline_event_id"] = event.get("id") + except Exception as exc: + logger.warning( + "adr100_remediation_approval_timeline_failed", + incident_id=incident_id, + approval_id=approval_id, + error=str(exc), + ) + + history["recorded"] = bool( + history.get("alert_operation_id") or history.get("timeline_event_id") + ) + return history + def _select_mode(item: dict[str, Any], requested: RemediationMode) -> Literal["reverify", "replay", "ticket"]: + if requested == "approval": + return "ticket" if requested in ("reverify", "replay"): return requested if requested == "ticket": @@ -442,6 +601,14 @@ def _plan_for_item(item: dict[str, Any], mode: str) -> dict[str, Any]: "writes": ["alert_operation_log", "timeline"], "target_action": item.get("remediation_action"), } + if mode == "approval": + return { + "step": "request_playbook_authoring_approval", + "agent_id": "openclaw_playbook_planner", + "required_scope": "record_only", + "writes": ["approval_records", "alert_operation_log", "timeline"], + "target_action": item.get("remediation_action"), + } return { "step": "validate_supported_executor_route_then_collect_current_state", "agent_id": "auto_repair_executor", @@ -475,6 +642,34 @@ def _dry_run_blocked_payload( } +def _approval_blocked_payload( + item: dict[str, Any], + mode: str, + checks: list[dict[str, Any]], +) -> dict[str, Any]: + return { + "schema_version": "adr100_remediation_approval_v1", + "work_item_id": item.get("work_item_id"), + "incident_id": item.get("incident_id"), + "auto_repair_id": item.get("auto_repair_id"), + "mode": "approval", + "requested_mode": mode, + "allowed": False, + "executed": False, + "safety_level": "approval_record_only", + "writes_incident_state": False, + "writes_auto_repair_result": False, + "writes_ticket": False, + "writes_approval_record": False, + "creates_external_ticket": False, + "checks": checks, + "verification_result_preview": "blocked", + "approval": None, + "approval_id": None, + "plan": _plan_for_item(item, "approval"), + } + + def _dry_run_result_payload( *, item: dict[str, Any], @@ -504,6 +699,139 @@ def _dry_run_result_payload( } +def _approval_request_for_item( + item: dict[str, Any], + incident: Incident, + checks: list[dict[str, Any]], +) -> ApprovalRequestCreate: + ticket_preview = _ticket_preview_for_item(item, incident) + services = [svc for svc in (incident.affected_services or []) if svc] + if not services: + services = [str(item.get("alertname") or "unknown_alert")] + playbook_id = str(item.get("playbook_id") or "unknown_playbook") + work_item_id = str(item.get("work_item_id") or "") + action = ( + "PLAYBOOK_AUTHORING_RECORD_ONLY: " + f"ADR-100 promote diagnostic PlayBook {playbook_id}" + ) + description = ( + f"{ticket_preview.get('title')}\n\n" + f"{ticket_preview.get('body_preview')}\n\n" + "Approval scope: record-only PlayBook authoring. Signing this request " + "does not execute a runtime repair, does not resolve the incident, and " + "does not mark the old diagnostic run as verified_success." + ) + return ApprovalRequestCreate( + action=action, + description=description[:4000], + risk_level=RiskLevel.MEDIUM, + blast_radius=BlastRadius( + affected_pods=0, + estimated_downtime="0", + related_services=services[:6], + data_impact=DataImpact.READ_ONLY, + ), + dry_run_checks=[ + DryRunCheck( + name=str(check.get("name") or "check"), + passed=bool(check.get("passed")), + message=str(check.get("detail") or ""), + ) + for check in checks + ], + requested_by="adr100_remediation_service", + expires_at=datetime.now(timezone.utc) + timedelta(hours=48), + metadata={ + "schema_version": "adr100_playbook_authoring_approval_v1", + "approval_kind": "adr100_playbook_authoring", + "execution_kind": "playbook_authoring_record_only", + "execution_authorized": False, + "repair_attempted": False, + "repair_executed": False, + "work_item_id": work_item_id, + "auto_repair_id": item.get("auto_repair_id"), + "source": "adr100.verification_coverage.remediation_queue", + "ticket_preview": ticket_preview, + "target_action": item.get("remediation_action"), + "required_scope": "record_only", + "next_step": "author_mutating_repair_step", + "playbook_id": playbook_id, + "flywheel_node": "approval", + "agent_id": "openclaw_playbook_planner", + "mcp_gate": "not_required_record_only", + }, + incident_id=str(item.get("incident_id") or incident.incident_id), + matched_playbook_id=playbook_id if playbook_id != "unknown_playbook" else None, + ) + + +def _approval_fingerprint(item: dict[str, Any]) -> str: + work_item_id = str(item.get("work_item_id") or "") + playbook_id = str(item.get("playbook_id") or "") + incident_id = str(item.get("incident_id") or "") + basis = work_item_id or f"{incident_id}:{playbook_id}:{item.get('remediation_action') or ''}" + return f"adr100_playbook_authoring:{basis}"[:240] + + +def _approval_result_payload( + *, + item: dict[str, Any], + incident: Incident, + checks: list[dict[str, Any]], + approval: Any, + request: ApprovalRequestCreate, + approval_created: bool, + fingerprint: str, +) -> dict[str, Any]: + ticket_preview = (request.metadata or {}).get("ticket_preview") or _ticket_preview_for_item( + item, + incident, + ) + approval_id = str(getattr(approval, "id", "") or "") + approval_status = getattr(getattr(approval, "status", None), "value", None) or getattr( + approval, + "status", + None, + ) + risk_level = getattr(getattr(approval, "risk_level", None), "value", None) or getattr( + approval, + "risk_level", + None, + ) + return { + "schema_version": "adr100_remediation_approval_v1", + "work_item_id": item.get("work_item_id"), + "incident_id": item.get("incident_id") or incident.incident_id, + "auto_repair_id": item.get("auto_repair_id"), + "mode": "approval", + "allowed": True, + "executed": False, + "safety_level": "approval_record_only", + "writes_incident_state": False, + "writes_auto_repair_result": False, + "writes_ticket": False, + "writes_approval_record": approval_created, + "creates_external_ticket": False, + "deduplicated": not approval_created, + "fingerprint": fingerprint, + "checks": checks, + "verification_result_preview": "approval_requested", + "approval_id": approval_id or None, + "approval": { + "id": approval_id or None, + "status": str(approval_status or ""), + "risk_level": str(risk_level or ""), + "required_signatures": getattr(approval, "required_signatures", None), + "current_signatures": getattr(approval, "current_signatures", None), + "requested_by": getattr(approval, "requested_by", None), + "incident_id": getattr(approval, "incident_id", None), + "matched_playbook_id": getattr(approval, "matched_playbook_id", None), + }, + "ticket_preview": ticket_preview, + "plan": _plan_for_item(item, "approval"), + } + + def _summarize_post_state(post_state: dict[str, Any]) -> dict[str, Any]: keys = sorted(post_state.keys()) return { @@ -537,6 +865,33 @@ def _history_context(item: dict[str, Any], payload: dict[str, Any]) -> dict[str, } +def _approval_history_context(item: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]: + return { + "schema_version": "adr100_remediation_approval_history_v1", + "work_item_id": item.get("work_item_id"), + "auto_repair_id": item.get("auto_repair_id"), + "playbook_id": item.get("playbook_id"), + "alertname": item.get("alertname"), + "mode": payload.get("mode"), + "allowed": payload.get("allowed"), + "executed": payload.get("executed"), + "safety_level": payload.get("safety_level"), + "writes_incident_state": payload.get("writes_incident_state"), + "writes_auto_repair_result": payload.get("writes_auto_repair_result"), + "writes_ticket": payload.get("writes_ticket"), + "writes_approval_record": payload.get("writes_approval_record"), + "creates_external_ticket": payload.get("creates_external_ticket"), + "deduplicated": payload.get("deduplicated"), + "fingerprint": payload.get("fingerprint"), + "ticket_preview": payload.get("ticket_preview"), + "approval": payload.get("approval"), + "approval_id": payload.get("approval_id"), + "plan": payload.get("plan"), + "verification_result_preview": payload.get("verification_result_preview"), + "checks": payload.get("checks"), + } + + def _timeline_status(payload: dict[str, Any]) -> str: if not payload.get("allowed"): return "warning" @@ -559,6 +914,18 @@ def _history_description(context: dict[str, Any]) -> str: )[:500] +def _approval_history_description(context: dict[str, Any]) -> str: + approval = context.get("approval") or {} + return ( + f"approval={approval.get('id') or context.get('approval_id') or 'unknown'} " + f"status={approval.get('status') or 'unknown'} " + f"preview={context.get('verification_result_preview')} " + f"writes_approval={context.get('writes_approval_record')} " + f"writes_incident={context.get('writes_incident_state')} " + f"writes_auto_repair={context.get('writes_auto_repair_result')}" + )[:500] + + def _record_created_at(record: Any) -> str: value = getattr(record, "created_at", None) if hasattr(value, "isoformat"): @@ -569,6 +936,7 @@ def _record_created_at(record: Any) -> str: def _history_item(record: Any, context: dict[str, Any]) -> dict[str, Any]: route = context.get("mcp_route") or {} post_state = context.get("post_state_summary") or {} + approval = context.get("approval") or {} return { "id": str(getattr(record, "id", "")), "incident_id": getattr(record, "incident_id", None), @@ -594,7 +962,13 @@ def _history_item(record: Any, context: dict[str, Any]) -> dict[str, Any]: "writes_incident_state": context.get("writes_incident_state"), "writes_auto_repair_result": context.get("writes_auto_repair_result"), "writes_ticket": context.get("writes_ticket"), + "writes_approval_record": context.get("writes_approval_record"), "creates_external_ticket": context.get("creates_external_ticket"), + "approval_id": context.get("approval_id") or approval.get("id"), + "approval_status": approval.get("status"), + "approval_risk_level": approval.get("risk_level"), + "deduplicated": context.get("deduplicated"), + "fingerprint": context.get("fingerprint"), "ticket_preview": context.get("ticket_preview"), "plan": context.get("plan"), "checks": context.get("checks") or [], diff --git a/apps/api/src/services/approval_execution.py b/apps/api/src/services/approval_execution.py index ee03ed81..238cfc1a 100644 --- a/apps/api/src/services/approval_execution.py +++ b/apps/api/src/services/approval_execution.py @@ -72,6 +72,15 @@ _SSH_GATEWAY_TOOL_SCOPES: dict[str, str] = { } +def _is_playbook_authoring_record_only_approval(approval: ApprovalRequest) -> bool: + metadata = dict(getattr(approval, "metadata", None) or {}) + return ( + metadata.get("approval_kind") == "adr100_playbook_authoring" + and metadata.get("execution_kind") == "playbook_authoring_record_only" + and metadata.get("execution_authorized") is False + ) + + class ApprovalExecutionService: """ 授權執行服務 - 編排整個執行流程 @@ -254,6 +263,70 @@ class ApprovalExecutionService: pass if operation_type is None or resource_name is None: + if _is_playbook_authoring_record_only_approval(approval): + metadata = dict(getattr(approval, "metadata", None) or {}) + logger.info( + "background_execution_playbook_authoring_record_only", + approval_id=str(approval.id), + action=approval.action, + incident_id=getattr(approval, "incident_id", None), + work_item_id=metadata.get("work_item_id"), + playbook_id=metadata.get("playbook_id"), + ) + await service.update_execution_status( + approval.id, + success=True, + execution_kind="playbook_authoring_record_only", + repair_executed=False, + repair_attempted=False, + ) + await timeline.add_event( + event_type="exec", + status="success", + title="PlayBook authoring approval recorded (no runtime repair)", + description=( + "Approval scope is record-only. No incident state change, " + "no runtime repair, no verified_success promotion." + ), + actor="leWOOOgo", + actor_role="executor", + approval_id=str(approval.id), + incident_id=approval.incident_id, + ) + await self._log_aol_completed( + op_id=_aol_op_id, + status="success", + duration_ms=int((time.time() - _aol_started_ms) * 1000), + output={ + "reason": "PLAYBOOK_AUTHORING_RECORD_ONLY", + "execution_kind": "playbook_authoring_record_only", + "repair_executed": False, + "repair_attempted": False, + "work_item_id": metadata.get("work_item_id"), + "playbook_id": metadata.get("playbook_id"), + }, + ) + await self._log_alert_execution_completed( + approval, + success=True, + execution_kind="playbook_authoring_record_only", + duration_ms=int((time.time() - _aol_started_ms) * 1000), + output={ + "reason": "PLAYBOOK_AUTHORING_RECORD_ONLY", + "repair_executed": False, + "repair_attempted": False, + }, + ) + await self._push_execution_result_to_alert( + approval, + success=True, + error=None, + execution_kind="playbook_authoring_record_only", + repair_executed=False, + repair_attempted=False, + ) + return True + # 2026-04-19 ogt + Claude Opus 4.7: 區分 NO_ACTION vs 真解析失敗 # NO_ACTION 是 AI 刻意選的「純調查不破壞」,不該誤標 EXECUTION_FAILED # 污染 auto_execute 成功率 KPI (MASTER §7.1 #11) diff --git a/apps/api/tests/test_adr100_remediation_service.py b/apps/api/tests/test_adr100_remediation_service.py index 35f30637..9bbdae1f 100644 --- a/apps/api/tests/test_adr100_remediation_service.py +++ b/apps/api/tests/test_adr100_remediation_service.py @@ -2,12 +2,14 @@ from __future__ import annotations from datetime import datetime, timezone from typing import Any +from uuid import UUID import pytest from fastapi import FastAPI from fastapi.testclient import TestClient from src.api.v1.ai_slo import router +from src.models.approval import ApprovalRequest, ApprovalStatus, RiskLevel from src.models.incident import Incident, IncidentStatus, Severity, Signal from src.models.playbook import Playbook from src.services.adr100_remediation_service import ( @@ -101,6 +103,44 @@ class _FakeTimelineService: return {"id": "timeline-1"} +class _FakeApprovalService: + def __init__(self) -> None: + self.requests: list[Any] = [] + self.fingerprints: list[str] = [] + self.existing: ApprovalRequest | None = None + + async def create_approval(self, request: Any) -> ApprovalRequest: + return await self.create_approval_with_fingerprint(request, "") + + async def create_approval_with_fingerprint( + self, + request: Any, + fingerprint: str, + ) -> ApprovalRequest: + self.requests.append(request) + self.fingerprints.append(fingerprint) + return ApprovalRequest( + id=UUID("00000000-0000-0000-0000-00000000a100"), + action=request.action, + description=request.description, + status=ApprovalStatus.PENDING, + risk_level=request.risk_level, + blast_radius=request.blast_radius, + dry_run_checks=request.dry_run_checks, + required_signatures=1, + signatures=[], + requested_by=request.requested_by, + expires_at=request.expires_at, + metadata=request.metadata, + incident_id=request.incident_id, + matched_playbook_id=request.matched_playbook_id, + ) + + async def find_by_fingerprint(self, fingerprint: str) -> ApprovalRequest | None: + self.fingerprints.append(f"lookup:{fingerprint}") + return self.existing + + class _NoopPlaybookService: async def get_recommendations(self, *_args, **_kwargs): # noqa: ANN002, ANN003 return [] @@ -161,6 +201,7 @@ def _service( item: dict[str, Any], incident: Incident | None = None, state: dict[str, Any] | None = None, + approval_service: Any | None = None, timeline_service: Any | None = None, alert_operation_log_repository: Any | None = None, record_history: bool = False, @@ -173,6 +214,7 @@ def _service( cooldown_checker=_no_cooldown, ), verifier=_FakeVerifier(state or {"k8s_get_pod_status": {"phase": "Running"}}), + approval_service=approval_service, timeline_service=timeline_service, alert_operation_log_repository=alert_operation_log_repository, record_history=record_history, @@ -252,6 +294,82 @@ async def test_dry_run_ticket_proposal_records_internal_history_only(): assert timeline.calls[0]["actor_role"] == "ticket" +@pytest.mark.asyncio +async def test_create_approval_request_is_record_only_and_does_not_authorize_repair(): + alert_repo = _FakeAlertOperationLogRepository() + timeline = _FakeTimelineService() + approval_service = _FakeApprovalService() + item = _queue_item( + remediation_status="needs_playbook_ticket", + remediation_action="promote_diagnostic_to_repair_playbook", + remediation_owner="solver_or_operator", + failure_class="observe_only_playbook", + ) + svc = _service( + item=item, + approval_service=approval_service, + timeline_service=timeline, + alert_operation_log_repository=alert_repo, + record_history=True, + ) + + result = await svc.create_approval_request("verification:INC-20260514-TEST01:are-1") + + assert result["schema_version"] == "adr100_remediation_approval_v1" + assert result["allowed"] is True + assert result["mode"] == "approval" + assert result["writes_approval_record"] is True + assert result["deduplicated"] is False + assert result["fingerprint"].startswith("adr100_playbook_authoring:") + assert result["writes_incident_state"] is False + assert result["writes_auto_repair_result"] is False + assert result["approval_id"] == "00000000-0000-0000-0000-00000000a100" + assert result["approval"]["status"] == "pending" + assert result["approval"]["risk_level"] == "medium" + assert result["plan"]["required_scope"] == "record_only" + assert result["history"]["recorded"] is True + assert approval_service.requests[0].risk_level == RiskLevel.MEDIUM + assert approval_service.requests[0].metadata["approval_kind"] == "adr100_playbook_authoring" + assert approval_service.requests[0].metadata["execution_authorized"] is False + assert approval_service.requests[0].metadata["repair_executed"] is False + assert approval_service.fingerprints[0].startswith("lookup:adr100_playbook_authoring:") + assert approval_service.fingerprints[1].startswith("adr100_playbook_authoring:") + assert alert_repo.calls[0]["event_type"] == "APPROVAL_ESCALATED" + assert alert_repo.calls[0]["approval_id"] == "00000000-0000-0000-0000-00000000a100" + assert alert_repo.calls[0]["context"]["writes_approval_record"] is True + assert timeline.calls[0]["actor_role"] == "approval" + + +@pytest.mark.asyncio +async def test_create_approval_request_deduplicates_existing_pending_approval(): + approval_service = _FakeApprovalService() + approval_service.existing = ApprovalRequest( + id=UUID("00000000-0000-0000-0000-00000000a101"), + action="PLAYBOOK_AUTHORING_RECORD_ONLY: existing", + description="existing", + status=ApprovalStatus.PENDING, + risk_level=RiskLevel.MEDIUM, + required_signatures=1, + requested_by="adr100_remediation_service", + metadata={"approval_kind": "adr100_playbook_authoring"}, + incident_id="INC-20260514-TEST01", + matched_playbook_id="PB-1", + ) + item = _queue_item( + remediation_status="needs_playbook_ticket", + remediation_action="promote_diagnostic_to_repair_playbook", + failure_class="observe_only_playbook", + ) + svc = _service(item=item, approval_service=approval_service) + + result = await svc.create_approval_request("verification:INC-20260514-TEST01:are-1") + + assert result["approval_id"] == "00000000-0000-0000-0000-00000000a101" + assert result["writes_approval_record"] is False + assert result["deduplicated"] is True + assert approval_service.requests == [] + + @pytest.mark.asyncio async def test_dry_run_reverify_collects_state_without_writes(): item = _queue_item( @@ -375,6 +493,18 @@ def test_ai_slo_remediation_endpoints(monkeypatch): async def dry_run(self, work_item_id: str, mode: str = "auto") -> dict[str, Any]: return {"work_item_id": work_item_id, "mode": mode, "executed": True} + async def create_approval_request( + self, + work_item_id: str, + mode: str = "approval", + ) -> dict[str, Any]: + return { + "schema_version": "adr100_remediation_approval_v1", + "work_item_id": work_item_id, + "mode": mode, + "approval_id": "approval-1", + } + async def history( self, *, @@ -411,11 +541,17 @@ def test_ai_slo_remediation_endpoints(monkeypatch): "/api/v1/ai/slo/remediation/history", params={"limit": 10, "work_item_id": "verification:INC:are-1"}, ) + approval = client.post( + "/api/v1/ai/slo/remediation/approval-request", + json={"work_item_id": "verification:INC:are-1", "mode": "approval"}, + ) assert preview.status_code == 200 assert preview.json()["mode"] == "reverify" assert dry_run.status_code == 200 assert dry_run.json()["executed"] is True + assert approval.status_code == 200 + assert approval.json()["approval_id"] == "approval-1" assert history.status_code == 200 assert history.json()["schema_version"] == "adr100_remediation_history_v1" assert history.json()["filters"]["work_item_id"] == "verification:INC:are-1" diff --git a/apps/api/tests/test_approval_execution_no_action.py b/apps/api/tests/test_approval_execution_no_action.py index fc11e1ab..8fc12d4c 100644 --- a/apps/api/tests/test_approval_execution_no_action.py +++ b/apps/api/tests/test_approval_execution_no_action.py @@ -6,6 +6,77 @@ import pytest from src.services.approval_execution import ApprovalExecutionService +@pytest.mark.asyncio +async def test_playbook_authoring_record_only_approval_does_not_resolve_incident(monkeypatch): + approval = SimpleNamespace( + id="approval-playbook-authoring-1", + action="PLAYBOOK_AUTHORING_RECORD_ONLY: promote diagnostic playbook", + incident_id="INC-TEST-PLAYBOOK", + metadata={ + "approval_kind": "adr100_playbook_authoring", + "execution_kind": "playbook_authoring_record_only", + "execution_authorized": False, + "work_item_id": "verification:INC-TEST-PLAYBOOK:are-1", + "playbook_id": "PB-1", + }, + ) + incident_service = SimpleNamespace(resolve_incident=AsyncMock()) + update_execution_status = AsyncMock() + timeline_add_event = AsyncMock() + alert_completed = AsyncMock(return_value=None) + + monkeypatch.setattr( + "src.services.approval_execution.get_approval_service", + lambda: SimpleNamespace(update_execution_status=update_execution_status), + ) + monkeypatch.setattr( + "src.services.approval_execution.get_timeline_service", + lambda: SimpleNamespace(add_event=timeline_add_event), + ) + monkeypatch.setattr( + "src.services.approval_execution.parse_operation_from_action", + lambda _: SimpleNamespace( + operation_type=None, resource_name=None, namespace=None + ), + ) + monkeypatch.setattr( + "src.services.incident_service.get_incident_service", + lambda: incident_service, + ) + monkeypatch.setattr( + "src.services.approval_execution.ApprovalExecutionService._push_execution_result_to_alert", + AsyncMock(return_value=None), + ) + monkeypatch.setattr( + "src.services.approval_execution.ApprovalExecutionService._log_aol_completed", + AsyncMock(return_value=None), + ) + monkeypatch.setattr( + "src.services.approval_execution.ApprovalExecutionService._log_alert_execution_started", + AsyncMock(return_value=None), + ) + monkeypatch.setattr( + "src.services.approval_execution.ApprovalExecutionService._log_alert_execution_completed", + alert_completed, + ) + + result = await ApprovalExecutionService().execute_approved_action(approval) + + assert result is True + update_execution_status.assert_awaited_once_with( + approval.id, + success=True, + execution_kind="playbook_authoring_record_only", + repair_executed=False, + repair_attempted=False, + ) + assert "no runtime repair" in timeline_add_event.await_args.kwargs["title"] + assert alert_completed.await_args.kwargs["execution_kind"] == ( + "playbook_authoring_record_only" + ) + incident_service.resolve_incident.assert_not_awaited() + + @pytest.mark.asyncio async def test_no_action_execution_resolves_incident_once(monkeypatch): # Arrange diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index b7ad64cb..d8c0e262 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -2982,13 +2982,17 @@ "actions": { "preview": "預覽", "dryRun": "預檢 / 草稿", + "approval": "送審批", "loading": "處理中", "failed": "補救工作操作失敗" }, "result": { "mode": "模式={value}", "allowed": "允許={value}", - "writes": "寫入 incident={incident} / autoRepair={autoRepair}" + "writes": "寫入 incident={incident} / autoRepair={autoRepair}", + "approvalWrite": "已建立審批紀錄", + "deduplicated": "已收斂既有審批", + "approval": "審批 {id} / {status} / {risk}" } }, "callbackTraceRecoveryActions": { diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index b7ad64cb..d8c0e262 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -2982,13 +2982,17 @@ "actions": { "preview": "預覽", "dryRun": "預檢 / 草稿", + "approval": "送審批", "loading": "處理中", "failed": "補救工作操作失敗" }, "result": { "mode": "模式={value}", "allowed": "允許={value}", - "writes": "寫入 incident={incident} / autoRepair={autoRepair}" + "writes": "寫入 incident={incident} / autoRepair={autoRepair}", + "approvalWrite": "已建立審批紀錄", + "deduplicated": "已收斂既有審批", + "approval": "審批 {id} / {status} / {risk}" } }, "callbackTraceRecoveryActions": { diff --git a/apps/web/src/app/[locale]/awooop/work-items/page.tsx b/apps/web/src/app/[locale]/awooop/work-items/page.tsx index ec34d006..bf6c9304 100644 --- a/apps/web/src/app/[locale]/awooop/work-items/page.tsx +++ b/apps/web/src/app/[locale]/awooop/work-items/page.tsx @@ -196,7 +196,10 @@ type RecurrenceWorkItemActionResult = { writes_source_event?: boolean | null; writes_auto_repair_result?: boolean | null; writes_ticket?: boolean | null; + writes_approval_record?: boolean | null; creates_external_ticket?: boolean | null; + deduplicated?: boolean | null; + fingerprint?: string | null; verification_result_preview?: string | null; next_step?: string | null; checks?: Array<{ name?: string | null; passed?: boolean | null; detail?: string | null }>; @@ -214,12 +217,24 @@ type RecurrenceWorkItemActionResult = { labels?: string[] | null; body_preview?: string | null; } | null; + approval_id?: string | null; + approval?: { + id?: string | null; + status?: string | null; + risk_level?: string | null; + required_signatures?: number | null; + current_signatures?: number | null; + requested_by?: string | null; + incident_id?: string | null; + matched_playbook_id?: string | null; + } | null; plan?: { step?: string | null; flywheel_node?: string | null; agent_id?: string | null; required_scope?: string | null; target_action?: string | null; + writes?: string[] | null; } | null; read_model_route?: { agent_id?: string | null; @@ -239,6 +254,7 @@ type RecurrenceWorkItemActionState = { loading?: | "preview" | "dryRun" + | "approval" | "handoff" | "acceptSource" | "rejectSource" @@ -2661,7 +2677,7 @@ function Adr100RemediationQueuePanel({ const runAction = useCallback(async ( item: RemediationQueueItem, - action: "preview" | "dryRun" + action: "preview" | "dryRun" | "approval" ) => { const workItemId = item.work_item_id ?? ""; if (!workItemId) return; @@ -2677,7 +2693,13 @@ function Adr100RemediationQueuePanel({ `${API_BASE}/api/v1/ai/slo/remediation/preview?work_item_id=${encodeURIComponent(workItemId)}&mode=${encodeURIComponent(mode)}`, 12000 ) - : await postJson( + : action === "approval" + ? await postJson( + `${API_BASE}/api/v1/ai/slo/remediation/approval-request`, + { work_item_id: workItemId, mode: "approval" }, + 15000 + ) + : await postJson( `${API_BASE}/api/v1/ai/slo/remediation/dry-run`, { work_item_id: workItemId, mode }, 15000 @@ -2742,6 +2764,9 @@ function Adr100RemediationQueuePanel({ const state = workItemId ? actionState[workItemId] : undefined; const result = state?.result ?? null; const ticketPreview = result?.ticket_preview ?? null; + const approval = result?.approval ?? null; + const canCreateApproval = item.remediation_status === "needs_playbook_ticket" + || item.remediation_action === "promote_diagnostic_to_repair_playbook"; return (
@@ -2799,6 +2824,17 @@ function Adr100RemediationQueuePanel({
{state?.error ? ( @@ -2816,7 +2852,22 @@ function Adr100RemediationQueuePanel({ incident: String(result.writes_incident_state ?? false), autoRepair: String(result.writes_auto_repair_result ?? false), })} + {result.writes_approval_record ? ( + {t("result.approvalWrite")} + ) : null} + {result.deduplicated ? ( + {t("result.deduplicated")} + ) : null} + {approval ? ( +

+ {t("result.approval", { + id: approval.id ?? result.approval_id ?? "--", + status: approval.status ?? "--", + risk: approval.risk_level ?? "--", + })} +

+ ) : null} {ticketPreview ? (

{ticketPreview.title ?? t("ticketFallback")}

diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 85988131..4615dc72 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -47,6 +47,48 @@ - Runtime ingestion / GitHub primary / AwoooP production landing:約 `40-45%`;`request_sent_count=0`、`owner_response_received_count=0`、`owner_response_accepted_count=0`、`active_runtime_gate_count=0`。 - Kali `192.168.0.112` 與開發主機 `192.168.0.111` / `192.168.0.168` 仍維持已納入框架、未啟動掃描 / 修復 / 更新 / 重啟的邊界。 +## 2026-06-01|ADR-100 PlayBook authoring 審批橋接 + +**背景**: + +- ADR-100 remediation queue 已能把 `observe_only_playbook` 轉成 `needs_playbook_ticket` 與 record-only ticket 草稿,但 operator 還缺少從 Work Items 直接送入審批面的橋接。 +- 不能直接把 ticket 草稿做成一般 `NO_ACTION` approval,因為現行 `NO_ACTION` 簽核完成後會 resolve incident;這會讓 PlayBook authoring 被誤當成事故修復完成。 + +**本次調整**: + +- `apps/api/src/services/adr100_remediation_service.py`: + - 新增 `create_approval_request()`,可從 remediation work item 建立 `adr100_playbook_authoring` approval。 + - approval scope 明確為 `playbook_authoring_record_only`:`execution_authorized=false`、`repair_attempted=false`、`repair_executed=false`。 + - 以 `adr100_playbook_authoring:{work_item}` fingerprint 收斂重複送審,避免同一個告警項目一直產生重複 approval。 + - 寫入 `approval_records`、`alert_operation_log(APPROVAL_ESCALATED)` 與 timeline;不寫 incident state、不寫 auto-repair result、不建立外部 ticket。 +- `apps/api/src/services/approval_execution.py`: + - 新增 `playbook_authoring_record_only` 執行分支。 + - 人工批准後只把 approval 本身結成 terminal success,留下 `repair_executed=false` 證據;不 resolve incident、不執行 runtime repair、不升級為 `verified_success`。 +- `apps/api/src/api/v1/ai_slo.py`: + - 新增 `POST /api/v1/ai/slo/remediation/approval-request`。 +- `apps/web/src/app/[locale]/awooop/work-items/page.tsx`: + - ADR-100 補救工作佇列新增「送審批」按鈕,顯示 approval id / status / risk。 + - 顯示 `已建立審批紀錄` 或 `已收斂既有審批`,避免 operator 誤判重複送出。 +- `apps/web/messages/zh-TW.json` / `en.json`: + - 新增 Work Items 審批橋接文案;英文語系維持繁中文案鏡像。 + +**驗證**: + +- `python3 -m py_compile apps/api/src/services/adr100_remediation_service.py apps/api/src/api/v1/ai_slo.py apps/api/src/services/approval_execution.py apps/api/tests/test_adr100_remediation_service.py apps/api/tests/test_approval_execution_no_action.py` +- `DATABASE_URL=postgresql://test:test@localhost:5432/test PYTHONPATH=apps/api /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_adr100_remediation_service.py apps/api/tests/test_approval_execution_no_action.py -q` → `15 passed` +- `python3 -m json.tool apps/web/messages/zh-TW.json` / `apps/web/messages/en.json` +- `pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-work-items-approval-request.tsbuildinfo` +- `NEXT_PUBLIC_API_URL=https://awoooi.wooo.work NEXT_PRIVATE_BUILD_WORKER_COUNT=1 pnpm --dir apps/web run build` +- `git diff --check` +- `python3 scripts/security/security-mirror-progress-guard.py --root .` → `SECURITY_MIRROR_PROGRESS_GUARD_OK` + +**目前整體進度(本階段完成後)**: + +- ADR-100 非成功驗證補救工作項:約 `93%`;已從草稿/歷史進一步接到可審批但 record-only 的 approval bridge。 +- Approval / execution 誠實度:約 `92%`;已避免 PlayBook authoring approval 誤關 incident 或偽裝成 runtime repair。 +- 真正 verified auto-repair 成功樣本:仍約 `3-4%`;本階段只是打通審批橋,不把歷史 observe-only 修成成功。 +- 完整 AI 自動化飛輪總進度:維持 `61%`;下一個上調條件仍是 production 出現可追溯 `verified_success` 與 24h 穩定資料。 + ## 2026-06-01|ADR-100 observe-only PlayBook 補救工作項落地 **背景**: