fix(api): expose reboot drill preflight readback rollups
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 22s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 22s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
This commit is contained in:
@@ -60,12 +60,91 @@ def _build_payload(scorecard: dict[str, Any]) -> dict[str, Any]:
|
||||
"blocked_only_by_fresh_reboot_window": active_blockers == [_FRESH_BOOT_BLOCKER],
|
||||
}
|
||||
preflight_ready = all(preconditions.values())
|
||||
blocker_count = sum(1 for value in preconditions.values() if not value)
|
||||
preflight_blockers = [
|
||||
f"{name}_not_ready" for name, value in preconditions.items() if not value
|
||||
]
|
||||
blocker_count = len(preflight_blockers)
|
||||
status = (
|
||||
"ready_for_break_glass_reboot_drill_authorization"
|
||||
if preflight_ready
|
||||
else "blocked_reboot_drill_preflight_not_ready"
|
||||
)
|
||||
safe_next_step = (
|
||||
"collect_separate_reboot_drill_authorization_or_wait_for_next_"
|
||||
"real_all_host_reboot_event_then_rerun_verify_only"
|
||||
)
|
||||
target_selector = {
|
||||
"scope": "awoooi_p0_reboot_slo_hosts",
|
||||
"required_host_aliases": required_hosts,
|
||||
"required_host_count": len(required_hosts),
|
||||
"observed_host_count": _int(scorecard.get("observed_host_count")),
|
||||
"missing_host_count": _int(scorecard.get("missing_host_count")),
|
||||
"unreachable_host_count": _int(scorecard.get("unreachable_host_count")),
|
||||
"stale_host_count": _int(scorecard.get("stale_host_count")),
|
||||
"selector_source": "P0-006 committed reboot auto-recovery scorecard",
|
||||
}
|
||||
current_readback = {
|
||||
"scorecard_status": str(scorecard.get("status") or ""),
|
||||
"readiness_percent": _int(scorecard.get("readiness_percent")),
|
||||
"active_blocker_count": _int(scorecard.get("active_blocker_count")),
|
||||
"active_blockers": active_blockers,
|
||||
"service_green": scorecard.get("service_green") is True,
|
||||
"product_data_green": scorecard.get("product_data_green") is True,
|
||||
"backup_core_green": scorecard.get("backup_core_green") is True,
|
||||
"post_start_blocked": _int(rollups.get("post_start_blocked")),
|
||||
"latest_verify_only_metric_ready": _int(
|
||||
scorecard.get("latest_verify_only_metric_ready")
|
||||
),
|
||||
"latest_verify_only_metric_blocker_count": _int(
|
||||
scorecard.get("latest_verify_only_metric_blocker_count")
|
||||
),
|
||||
"latest_verify_only_metric_max_host_uptime_seconds": _int(
|
||||
scorecard.get("latest_verify_only_metric_max_host_uptime_seconds")
|
||||
),
|
||||
"stockplatform_freshness_status": str(
|
||||
scorecard.get("stockplatform_freshness_status") or ""
|
||||
),
|
||||
"stockplatform_ingestion_status": str(
|
||||
scorecard.get("stockplatform_ingestion_status") or ""
|
||||
),
|
||||
}
|
||||
check_mode = {
|
||||
"verify_only_available": True,
|
||||
"verify_only_source": (
|
||||
"scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py"
|
||||
),
|
||||
"post_apply_verifier_endpoint": (
|
||||
"/api/v1/agents/reboot-auto-recovery-slo-scorecard"
|
||||
),
|
||||
"expected_after_real_fresh_boot_or_approved_drill": {
|
||||
"status": "slo_ready",
|
||||
"active_blocker_count": 0,
|
||||
"latest_verify_only_metric_ready": 1,
|
||||
"latest_verify_only_metric_blocker_count": 0,
|
||||
"max_host_uptime_seconds_lte": target_seconds,
|
||||
},
|
||||
}
|
||||
rollback_plan = {
|
||||
"preflight_is_read_only": True,
|
||||
"rollback_required_for_this_endpoint": False,
|
||||
"if_separately_approved_drill_fails": [
|
||||
"stop further reboot waves",
|
||||
"run post-reboot readiness summary",
|
||||
"keep startup recovery units as the recovery source of truth",
|
||||
"rerun SLO verify-only and expose blockers without manual DB writes",
|
||||
],
|
||||
}
|
||||
boundaries = {
|
||||
**operation_boundaries,
|
||||
"host_reboot_authorized_by_this_endpoint": False,
|
||||
"host_reboot_performed": False,
|
||||
"service_restart_performed": False,
|
||||
"database_write_or_restore_performed": False,
|
||||
"workflow_trigger_performed": False,
|
||||
"secret_value_collection_allowed": False,
|
||||
"github_api_used": False,
|
||||
"runtime_write_allowed": False,
|
||||
}
|
||||
return {
|
||||
"schema_version": _API_SCHEMA_VERSION,
|
||||
"generated_at": str(scorecard.get("generated_at") or ""),
|
||||
@@ -76,83 +155,53 @@ def _build_payload(scorecard: dict[str, Any]) -> dict[str, Any]:
|
||||
"preflight_blocker_count": blocker_count,
|
||||
"break_glass_authorization_required": True,
|
||||
"execution_authorized_by_this_endpoint": False,
|
||||
"safe_next_step": (
|
||||
"collect_separate_reboot_drill_authorization_or_wait_for_next_"
|
||||
"real_all_host_reboot_event_then_rerun_verify_only"
|
||||
),
|
||||
"target_selector": {
|
||||
"scope": "awoooi_p0_reboot_slo_hosts",
|
||||
"required_host_aliases": required_hosts,
|
||||
"required_host_count": len(required_hosts),
|
||||
"observed_host_count": _int(scorecard.get("observed_host_count")),
|
||||
"missing_host_count": _int(scorecard.get("missing_host_count")),
|
||||
"unreachable_host_count": _int(scorecard.get("unreachable_host_count")),
|
||||
"stale_host_count": _int(scorecard.get("stale_host_count")),
|
||||
"selector_source": "P0-006 committed reboot auto-recovery scorecard",
|
||||
},
|
||||
"safe_next_step": safe_next_step,
|
||||
"target_selector": target_selector,
|
||||
"preconditions": preconditions,
|
||||
"current_readback": {
|
||||
"scorecard_status": str(scorecard.get("status") or ""),
|
||||
"readiness_percent": _int(scorecard.get("readiness_percent")),
|
||||
"active_blocker_count": _int(scorecard.get("active_blocker_count")),
|
||||
"active_blockers": active_blockers,
|
||||
"service_green": scorecard.get("service_green") is True,
|
||||
"product_data_green": scorecard.get("product_data_green") is True,
|
||||
"backup_core_green": scorecard.get("backup_core_green") is True,
|
||||
"post_start_blocked": _int(rollups.get("post_start_blocked")),
|
||||
"latest_verify_only_metric_ready": _int(
|
||||
scorecard.get("latest_verify_only_metric_ready")
|
||||
),
|
||||
"latest_verify_only_metric_blocker_count": _int(
|
||||
scorecard.get("latest_verify_only_metric_blocker_count")
|
||||
),
|
||||
"latest_verify_only_metric_max_host_uptime_seconds": _int(
|
||||
scorecard.get("latest_verify_only_metric_max_host_uptime_seconds")
|
||||
),
|
||||
"stockplatform_freshness_status": str(
|
||||
scorecard.get("stockplatform_freshness_status") or ""
|
||||
),
|
||||
"stockplatform_ingestion_status": str(
|
||||
scorecard.get("stockplatform_ingestion_status") or ""
|
||||
),
|
||||
"current_readback": current_readback,
|
||||
"check_mode": check_mode,
|
||||
"rollback_plan": rollback_plan,
|
||||
"readback": {
|
||||
"workplan_id": "P0-006",
|
||||
"workplan_title": "reboot auto-recovery 10-minute SLO",
|
||||
"source_scorecard_status": str(scorecard.get("status") or ""),
|
||||
"preflight_ready": preflight_ready,
|
||||
"break_glass_authorization_required": True,
|
||||
"execution_authorized_by_this_endpoint": False,
|
||||
"target_selector": target_selector,
|
||||
"current_readback": current_readback,
|
||||
"check_mode": check_mode,
|
||||
"rollback_plan": rollback_plan,
|
||||
"safe_next_step": safe_next_step,
|
||||
},
|
||||
"check_mode": {
|
||||
"verify_only_available": True,
|
||||
"verify_only_source": (
|
||||
"scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py"
|
||||
),
|
||||
"post_apply_verifier_endpoint": (
|
||||
"/api/v1/agents/reboot-auto-recovery-slo-scorecard"
|
||||
),
|
||||
"expected_after_real_fresh_boot_or_approved_drill": {
|
||||
"status": "slo_ready",
|
||||
"active_blocker_count": 0,
|
||||
"latest_verify_only_metric_ready": 1,
|
||||
"latest_verify_only_metric_blocker_count": 0,
|
||||
"max_host_uptime_seconds_lte": target_seconds,
|
||||
},
|
||||
},
|
||||
"rollback_plan": {
|
||||
"preflight_is_read_only": True,
|
||||
"rollback_required_for_this_endpoint": False,
|
||||
"if_separately_approved_drill_fails": [
|
||||
"stop further reboot waves",
|
||||
"run post-reboot readiness summary",
|
||||
"keep startup recovery units as the recovery source of truth",
|
||||
"rerun SLO verify-only and expose blockers without manual DB writes",
|
||||
"rollups": {
|
||||
"preflight_ready": preflight_ready,
|
||||
"preflight_ready_count": int(preflight_ready),
|
||||
"preflight_blocker_count": blocker_count,
|
||||
"target_required_host_count": len(required_hosts),
|
||||
"target_observed_host_count": target_selector["observed_host_count"],
|
||||
"target_missing_host_count": target_selector["missing_host_count"],
|
||||
"target_unreachable_host_count": target_selector[
|
||||
"unreachable_host_count"
|
||||
],
|
||||
},
|
||||
"operation_boundaries": {
|
||||
**operation_boundaries,
|
||||
"target_stale_host_count": target_selector["stale_host_count"],
|
||||
"service_green": preconditions["service_green"],
|
||||
"product_data_green": preconditions["product_data_green"],
|
||||
"backup_core_green": preconditions["backup_core_green"],
|
||||
"stockplatform_freshness_ok": preconditions["stockplatform_freshness_ok"],
|
||||
"stockplatform_ingestion_ok": preconditions["stockplatform_ingestion_ok"],
|
||||
"blocked_only_by_fresh_reboot_window": preconditions[
|
||||
"blocked_only_by_fresh_reboot_window"
|
||||
],
|
||||
"break_glass_authorization_required": True,
|
||||
"execution_authorized_by_this_endpoint": False,
|
||||
"host_reboot_authorized_by_this_endpoint": False,
|
||||
"host_reboot_performed": False,
|
||||
"service_restart_performed": False,
|
||||
"database_write_or_restore_performed": False,
|
||||
"workflow_trigger_performed": False,
|
||||
"secret_value_collection_allowed": False,
|
||||
"github_api_used": False,
|
||||
"runtime_write_allowed": False,
|
||||
},
|
||||
"active_blockers": preflight_blockers,
|
||||
"operation_boundaries": boundaries,
|
||||
"forbidden_without_separate_break_glass": [
|
||||
"host_reboot",
|
||||
"node_drain",
|
||||
|
||||
@@ -145,6 +145,7 @@ def _assert_drill_preflight_payload(payload: dict):
|
||||
assert payload["status"] == "ready_for_break_glass_reboot_drill_authorization"
|
||||
assert payload["preflight_ready"] is True
|
||||
assert payload["preflight_blocker_count"] == 0
|
||||
assert payload["active_blockers"] == []
|
||||
assert payload["break_glass_authorization_required"] is True
|
||||
assert payload["execution_authorized_by_this_endpoint"] is False
|
||||
assert payload["safe_next_step"] == (
|
||||
@@ -198,6 +199,20 @@ def _assert_drill_preflight_payload(payload: dict):
|
||||
rollback = payload["rollback_plan"]
|
||||
assert rollback["preflight_is_read_only"] is True
|
||||
assert rollback["rollback_required_for_this_endpoint"] is False
|
||||
readback = payload["readback"]
|
||||
assert readback["workplan_id"] == "P0-006"
|
||||
assert readback["preflight_ready"] is True
|
||||
assert readback["target_selector"]["required_host_count"] == 4
|
||||
assert readback["current_readback"]["active_blocker_count"] == 1
|
||||
assert readback["safe_next_step"] == payload["safe_next_step"]
|
||||
rollups = payload["rollups"]
|
||||
assert rollups["preflight_ready"] is True
|
||||
assert rollups["preflight_ready_count"] == 1
|
||||
assert rollups["preflight_blocker_count"] == 0
|
||||
assert rollups["target_required_host_count"] == 4
|
||||
assert rollups["target_observed_host_count"] == 4
|
||||
assert rollups["blocked_only_by_fresh_reboot_window"] is True
|
||||
assert rollups["host_reboot_authorized_by_this_endpoint"] is False
|
||||
boundaries = payload["operation_boundaries"]
|
||||
assert boundaries["host_reboot_authorized_by_this_endpoint"] is False
|
||||
assert boundaries["host_reboot_performed"] is False
|
||||
|
||||
@@ -2595,6 +2595,42 @@ export interface RebootAutoRecoveryDrillPreflightSnapshot {
|
||||
break_glass_authorization_required: boolean
|
||||
execution_authorized_by_this_endpoint: boolean
|
||||
safe_next_step: string
|
||||
readback: {
|
||||
workplan_id: 'P0-006'
|
||||
workplan_title: string
|
||||
source_scorecard_status: string
|
||||
preflight_ready: boolean
|
||||
break_glass_authorization_required: boolean
|
||||
execution_authorized_by_this_endpoint: boolean
|
||||
target_selector: RebootAutoRecoveryDrillPreflightSnapshot['target_selector']
|
||||
current_readback: RebootAutoRecoveryDrillPreflightSnapshot['current_readback']
|
||||
check_mode: RebootAutoRecoveryDrillPreflightSnapshot['check_mode']
|
||||
rollback_plan: RebootAutoRecoveryDrillPreflightSnapshot['rollback_plan']
|
||||
safe_next_step: string
|
||||
}
|
||||
rollups: {
|
||||
preflight_ready: boolean
|
||||
preflight_ready_count: number
|
||||
preflight_blocker_count: number
|
||||
target_required_host_count: number
|
||||
target_observed_host_count: number
|
||||
target_missing_host_count: number
|
||||
target_unreachable_host_count: number
|
||||
target_stale_host_count: number
|
||||
service_green: boolean
|
||||
product_data_green: boolean
|
||||
backup_core_green: boolean
|
||||
stockplatform_freshness_ok: boolean
|
||||
stockplatform_ingestion_ok: boolean
|
||||
blocked_only_by_fresh_reboot_window: boolean
|
||||
break_glass_authorization_required: boolean
|
||||
execution_authorized_by_this_endpoint: boolean
|
||||
host_reboot_authorized_by_this_endpoint: boolean
|
||||
workflow_trigger_performed: boolean
|
||||
secret_value_collection_allowed: boolean
|
||||
runtime_write_allowed: boolean
|
||||
}
|
||||
active_blockers: string[]
|
||||
target_selector: {
|
||||
scope: string
|
||||
required_host_aliases: string[]
|
||||
|
||||
@@ -1,3 +1,17 @@
|
||||
## 2026-06-30 — 01:02 P0-006 drill preflight machine-readback shape
|
||||
|
||||
**照優先順序完成的實作**:
|
||||
- P0-005 credential escrow 與 P0-003 Gitea private inventory production readback 均已 closed,主線回到 P0-006。
|
||||
- Production `/api/v1/agents/reboot-auto-recovery-drill-preflight` 已有 target selector / preconditions / verifier / rollback / boundary top-level 欄位,但通用 readback 查詢 `readback`、`rollups`、`active_blockers` 會得到空值;本輪補成機器可讀相容 shape,不改既有 top-level contract。
|
||||
- 新增 `readback.workplan_id=P0-006`、`rollups.preflight_ready_count=1`、`rollups.host_reboot_authorized_by_this_endpoint=false` 與 `active_blockers=[]`,讓 Delivery / automation / handoff reader 可不靠特殊欄位解析 P0-006 drill preflight。
|
||||
|
||||
**驗證**:
|
||||
- Focused pytest:P0-006 / Delivery Workbench / P0-004 runtime enablement / CD profile `40 passed`。
|
||||
- `py_compile`、Web typecheck、Gitea runner pressure guard、Gitea step env secret guard:通過。
|
||||
- 本地 loader 讀回 `status=ready_for_break_glass_reboot_drill_authorization`、`readback_workplan_id=P0-006`、`rollups_preflight_ready_count=1`、`active_blockers=[]`。
|
||||
|
||||
**邊界**:未重啟主機,未 restart service,未 workflow_dispatch,未操作 host / Docker / K8s / DB / firewall,未使用 GitHub / `gh` / GitHub API,未讀 secret / token / raw sessions / SQLite / `.env`。
|
||||
|
||||
## 2026-06-30 — 00:41 P0-004 template copy receipt runtime-image readback 修正
|
||||
|
||||
**照優先順序完成的實作**:
|
||||
|
||||
Reference in New Issue
Block a user