diff --git a/apps/api/src/services/reboot_auto_recovery_drill_preflight.py b/apps/api/src/services/reboot_auto_recovery_drill_preflight.py index a6cc06f1..79c28e4d 100644 --- a/apps/api/src/services/reboot_auto_recovery_drill_preflight.py +++ b/apps/api/src/services/reboot_auto_recovery_drill_preflight.py @@ -60,12 +60,91 @@ def _build_payload(scorecard: dict[str, Any]) -> dict[str, Any]: "blocked_only_by_fresh_reboot_window": active_blockers == [_FRESH_BOOT_BLOCKER], } preflight_ready = all(preconditions.values()) - blocker_count = sum(1 for value in preconditions.values() if not value) + preflight_blockers = [ + f"{name}_not_ready" for name, value in preconditions.items() if not value + ] + blocker_count = len(preflight_blockers) status = ( "ready_for_break_glass_reboot_drill_authorization" if preflight_ready else "blocked_reboot_drill_preflight_not_ready" ) + safe_next_step = ( + "collect_separate_reboot_drill_authorization_or_wait_for_next_" + "real_all_host_reboot_event_then_rerun_verify_only" + ) + target_selector = { + "scope": "awoooi_p0_reboot_slo_hosts", + "required_host_aliases": required_hosts, + "required_host_count": len(required_hosts), + "observed_host_count": _int(scorecard.get("observed_host_count")), + "missing_host_count": _int(scorecard.get("missing_host_count")), + "unreachable_host_count": _int(scorecard.get("unreachable_host_count")), + "stale_host_count": _int(scorecard.get("stale_host_count")), + "selector_source": "P0-006 committed reboot auto-recovery scorecard", + } + current_readback = { + "scorecard_status": str(scorecard.get("status") or ""), + "readiness_percent": _int(scorecard.get("readiness_percent")), + "active_blocker_count": _int(scorecard.get("active_blocker_count")), + "active_blockers": active_blockers, + "service_green": scorecard.get("service_green") is True, + "product_data_green": scorecard.get("product_data_green") is True, + "backup_core_green": scorecard.get("backup_core_green") is True, + "post_start_blocked": _int(rollups.get("post_start_blocked")), + "latest_verify_only_metric_ready": _int( + scorecard.get("latest_verify_only_metric_ready") + ), + "latest_verify_only_metric_blocker_count": _int( + scorecard.get("latest_verify_only_metric_blocker_count") + ), + "latest_verify_only_metric_max_host_uptime_seconds": _int( + scorecard.get("latest_verify_only_metric_max_host_uptime_seconds") + ), + "stockplatform_freshness_status": str( + scorecard.get("stockplatform_freshness_status") or "" + ), + "stockplatform_ingestion_status": str( + scorecard.get("stockplatform_ingestion_status") or "" + ), + } + check_mode = { + "verify_only_available": True, + "verify_only_source": ( + "scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py" + ), + "post_apply_verifier_endpoint": ( + "/api/v1/agents/reboot-auto-recovery-slo-scorecard" + ), + "expected_after_real_fresh_boot_or_approved_drill": { + "status": "slo_ready", + "active_blocker_count": 0, + "latest_verify_only_metric_ready": 1, + "latest_verify_only_metric_blocker_count": 0, + "max_host_uptime_seconds_lte": target_seconds, + }, + } + rollback_plan = { + "preflight_is_read_only": True, + "rollback_required_for_this_endpoint": False, + "if_separately_approved_drill_fails": [ + "stop further reboot waves", + "run post-reboot readiness summary", + "keep startup recovery units as the recovery source of truth", + "rerun SLO verify-only and expose blockers without manual DB writes", + ], + } + boundaries = { + **operation_boundaries, + "host_reboot_authorized_by_this_endpoint": False, + "host_reboot_performed": False, + "service_restart_performed": False, + "database_write_or_restore_performed": False, + "workflow_trigger_performed": False, + "secret_value_collection_allowed": False, + "github_api_used": False, + "runtime_write_allowed": False, + } return { "schema_version": _API_SCHEMA_VERSION, "generated_at": str(scorecard.get("generated_at") or ""), @@ -76,83 +155,53 @@ def _build_payload(scorecard: dict[str, Any]) -> dict[str, Any]: "preflight_blocker_count": blocker_count, "break_glass_authorization_required": True, "execution_authorized_by_this_endpoint": False, - "safe_next_step": ( - "collect_separate_reboot_drill_authorization_or_wait_for_next_" - "real_all_host_reboot_event_then_rerun_verify_only" - ), - "target_selector": { - "scope": "awoooi_p0_reboot_slo_hosts", - "required_host_aliases": required_hosts, - "required_host_count": len(required_hosts), - "observed_host_count": _int(scorecard.get("observed_host_count")), - "missing_host_count": _int(scorecard.get("missing_host_count")), - "unreachable_host_count": _int(scorecard.get("unreachable_host_count")), - "stale_host_count": _int(scorecard.get("stale_host_count")), - "selector_source": "P0-006 committed reboot auto-recovery scorecard", - }, + "safe_next_step": safe_next_step, + "target_selector": target_selector, "preconditions": preconditions, - "current_readback": { - "scorecard_status": str(scorecard.get("status") or ""), - "readiness_percent": _int(scorecard.get("readiness_percent")), - "active_blocker_count": _int(scorecard.get("active_blocker_count")), - "active_blockers": active_blockers, - "service_green": scorecard.get("service_green") is True, - "product_data_green": scorecard.get("product_data_green") is True, - "backup_core_green": scorecard.get("backup_core_green") is True, - "post_start_blocked": _int(rollups.get("post_start_blocked")), - "latest_verify_only_metric_ready": _int( - scorecard.get("latest_verify_only_metric_ready") - ), - "latest_verify_only_metric_blocker_count": _int( - scorecard.get("latest_verify_only_metric_blocker_count") - ), - "latest_verify_only_metric_max_host_uptime_seconds": _int( - scorecard.get("latest_verify_only_metric_max_host_uptime_seconds") - ), - "stockplatform_freshness_status": str( - scorecard.get("stockplatform_freshness_status") or "" - ), - "stockplatform_ingestion_status": str( - scorecard.get("stockplatform_ingestion_status") or "" - ), + "current_readback": current_readback, + "check_mode": check_mode, + "rollback_plan": rollback_plan, + "readback": { + "workplan_id": "P0-006", + "workplan_title": "reboot auto-recovery 10-minute SLO", + "source_scorecard_status": str(scorecard.get("status") or ""), + "preflight_ready": preflight_ready, + "break_glass_authorization_required": True, + "execution_authorized_by_this_endpoint": False, + "target_selector": target_selector, + "current_readback": current_readback, + "check_mode": check_mode, + "rollback_plan": rollback_plan, + "safe_next_step": safe_next_step, }, - "check_mode": { - "verify_only_available": True, - "verify_only_source": ( - "scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py" - ), - "post_apply_verifier_endpoint": ( - "/api/v1/agents/reboot-auto-recovery-slo-scorecard" - ), - "expected_after_real_fresh_boot_or_approved_drill": { - "status": "slo_ready", - "active_blocker_count": 0, - "latest_verify_only_metric_ready": 1, - "latest_verify_only_metric_blocker_count": 0, - "max_host_uptime_seconds_lte": target_seconds, - }, - }, - "rollback_plan": { - "preflight_is_read_only": True, - "rollback_required_for_this_endpoint": False, - "if_separately_approved_drill_fails": [ - "stop further reboot waves", - "run post-reboot readiness summary", - "keep startup recovery units as the recovery source of truth", - "rerun SLO verify-only and expose blockers without manual DB writes", + "rollups": { + "preflight_ready": preflight_ready, + "preflight_ready_count": int(preflight_ready), + "preflight_blocker_count": blocker_count, + "target_required_host_count": len(required_hosts), + "target_observed_host_count": target_selector["observed_host_count"], + "target_missing_host_count": target_selector["missing_host_count"], + "target_unreachable_host_count": target_selector[ + "unreachable_host_count" ], - }, - "operation_boundaries": { - **operation_boundaries, + "target_stale_host_count": target_selector["stale_host_count"], + "service_green": preconditions["service_green"], + "product_data_green": preconditions["product_data_green"], + "backup_core_green": preconditions["backup_core_green"], + "stockplatform_freshness_ok": preconditions["stockplatform_freshness_ok"], + "stockplatform_ingestion_ok": preconditions["stockplatform_ingestion_ok"], + "blocked_only_by_fresh_reboot_window": preconditions[ + "blocked_only_by_fresh_reboot_window" + ], + "break_glass_authorization_required": True, + "execution_authorized_by_this_endpoint": False, "host_reboot_authorized_by_this_endpoint": False, - "host_reboot_performed": False, - "service_restart_performed": False, - "database_write_or_restore_performed": False, "workflow_trigger_performed": False, "secret_value_collection_allowed": False, - "github_api_used": False, "runtime_write_allowed": False, }, + "active_blockers": preflight_blockers, + "operation_boundaries": boundaries, "forbidden_without_separate_break_glass": [ "host_reboot", "node_drain", diff --git a/apps/api/tests/test_reboot_auto_recovery_slo_scorecard_api.py b/apps/api/tests/test_reboot_auto_recovery_slo_scorecard_api.py index f1034f70..9bde1c08 100644 --- a/apps/api/tests/test_reboot_auto_recovery_slo_scorecard_api.py +++ b/apps/api/tests/test_reboot_auto_recovery_slo_scorecard_api.py @@ -145,6 +145,7 @@ def _assert_drill_preflight_payload(payload: dict): assert payload["status"] == "ready_for_break_glass_reboot_drill_authorization" assert payload["preflight_ready"] is True assert payload["preflight_blocker_count"] == 0 + assert payload["active_blockers"] == [] assert payload["break_glass_authorization_required"] is True assert payload["execution_authorized_by_this_endpoint"] is False assert payload["safe_next_step"] == ( @@ -198,6 +199,20 @@ def _assert_drill_preflight_payload(payload: dict): rollback = payload["rollback_plan"] assert rollback["preflight_is_read_only"] is True assert rollback["rollback_required_for_this_endpoint"] is False + readback = payload["readback"] + assert readback["workplan_id"] == "P0-006" + assert readback["preflight_ready"] is True + assert readback["target_selector"]["required_host_count"] == 4 + assert readback["current_readback"]["active_blocker_count"] == 1 + assert readback["safe_next_step"] == payload["safe_next_step"] + rollups = payload["rollups"] + assert rollups["preflight_ready"] is True + assert rollups["preflight_ready_count"] == 1 + assert rollups["preflight_blocker_count"] == 0 + assert rollups["target_required_host_count"] == 4 + assert rollups["target_observed_host_count"] == 4 + assert rollups["blocked_only_by_fresh_reboot_window"] is True + assert rollups["host_reboot_authorized_by_this_endpoint"] is False boundaries = payload["operation_boundaries"] assert boundaries["host_reboot_authorized_by_this_endpoint"] is False assert boundaries["host_reboot_performed"] is False diff --git a/apps/web/src/lib/api-client.ts b/apps/web/src/lib/api-client.ts index dab836e3..408ca406 100644 --- a/apps/web/src/lib/api-client.ts +++ b/apps/web/src/lib/api-client.ts @@ -2595,6 +2595,42 @@ export interface RebootAutoRecoveryDrillPreflightSnapshot { break_glass_authorization_required: boolean execution_authorized_by_this_endpoint: boolean safe_next_step: string + readback: { + workplan_id: 'P0-006' + workplan_title: string + source_scorecard_status: string + preflight_ready: boolean + break_glass_authorization_required: boolean + execution_authorized_by_this_endpoint: boolean + target_selector: RebootAutoRecoveryDrillPreflightSnapshot['target_selector'] + current_readback: RebootAutoRecoveryDrillPreflightSnapshot['current_readback'] + check_mode: RebootAutoRecoveryDrillPreflightSnapshot['check_mode'] + rollback_plan: RebootAutoRecoveryDrillPreflightSnapshot['rollback_plan'] + safe_next_step: string + } + rollups: { + preflight_ready: boolean + preflight_ready_count: number + preflight_blocker_count: number + target_required_host_count: number + target_observed_host_count: number + target_missing_host_count: number + target_unreachable_host_count: number + target_stale_host_count: number + service_green: boolean + product_data_green: boolean + backup_core_green: boolean + stockplatform_freshness_ok: boolean + stockplatform_ingestion_ok: boolean + blocked_only_by_fresh_reboot_window: boolean + break_glass_authorization_required: boolean + execution_authorized_by_this_endpoint: boolean + host_reboot_authorized_by_this_endpoint: boolean + workflow_trigger_performed: boolean + secret_value_collection_allowed: boolean + runtime_write_allowed: boolean + } + active_blockers: string[] target_selector: { scope: string required_host_aliases: string[] diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 2c3f2162..162fed81 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,17 @@ +## 2026-06-30 — 01:02 P0-006 drill preflight machine-readback shape + +**照優先順序完成的實作**: +- P0-005 credential escrow 與 P0-003 Gitea private inventory production readback 均已 closed,主線回到 P0-006。 +- Production `/api/v1/agents/reboot-auto-recovery-drill-preflight` 已有 target selector / preconditions / verifier / rollback / boundary top-level 欄位,但通用 readback 查詢 `readback`、`rollups`、`active_blockers` 會得到空值;本輪補成機器可讀相容 shape,不改既有 top-level contract。 +- 新增 `readback.workplan_id=P0-006`、`rollups.preflight_ready_count=1`、`rollups.host_reboot_authorized_by_this_endpoint=false` 與 `active_blockers=[]`,讓 Delivery / automation / handoff reader 可不靠特殊欄位解析 P0-006 drill preflight。 + +**驗證**: +- Focused pytest:P0-006 / Delivery Workbench / P0-004 runtime enablement / CD profile `40 passed`。 +- `py_compile`、Web typecheck、Gitea runner pressure guard、Gitea step env secret guard:通過。 +- 本地 loader 讀回 `status=ready_for_break_glass_reboot_drill_authorization`、`readback_workplan_id=P0-006`、`rollups_preflight_ready_count=1`、`active_blockers=[]`。 + +**邊界**:未重啟主機,未 restart service,未 workflow_dispatch,未操作 host / Docker / K8s / DB / firewall,未使用 GitHub / `gh` / GitHub API,未讀 secret / token / raw sessions / SQLite / `.env`。 + ## 2026-06-30 — 00:41 P0-004 template copy receipt runtime-image readback 修正 **照優先順序完成的實作**: