fix(api): expose reboot drill preflight readback rollups
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 22s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-30 01:04:07 +08:00
parent f1ad1f000c
commit 88145ea07c
4 changed files with 184 additions and 70 deletions

View File

@@ -60,12 +60,91 @@ def _build_payload(scorecard: dict[str, Any]) -> dict[str, Any]:
"blocked_only_by_fresh_reboot_window": active_blockers == [_FRESH_BOOT_BLOCKER],
}
preflight_ready = all(preconditions.values())
blocker_count = sum(1 for value in preconditions.values() if not value)
preflight_blockers = [
f"{name}_not_ready" for name, value in preconditions.items() if not value
]
blocker_count = len(preflight_blockers)
status = (
"ready_for_break_glass_reboot_drill_authorization"
if preflight_ready
else "blocked_reboot_drill_preflight_not_ready"
)
safe_next_step = (
"collect_separate_reboot_drill_authorization_or_wait_for_next_"
"real_all_host_reboot_event_then_rerun_verify_only"
)
target_selector = {
"scope": "awoooi_p0_reboot_slo_hosts",
"required_host_aliases": required_hosts,
"required_host_count": len(required_hosts),
"observed_host_count": _int(scorecard.get("observed_host_count")),
"missing_host_count": _int(scorecard.get("missing_host_count")),
"unreachable_host_count": _int(scorecard.get("unreachable_host_count")),
"stale_host_count": _int(scorecard.get("stale_host_count")),
"selector_source": "P0-006 committed reboot auto-recovery scorecard",
}
current_readback = {
"scorecard_status": str(scorecard.get("status") or ""),
"readiness_percent": _int(scorecard.get("readiness_percent")),
"active_blocker_count": _int(scorecard.get("active_blocker_count")),
"active_blockers": active_blockers,
"service_green": scorecard.get("service_green") is True,
"product_data_green": scorecard.get("product_data_green") is True,
"backup_core_green": scorecard.get("backup_core_green") is True,
"post_start_blocked": _int(rollups.get("post_start_blocked")),
"latest_verify_only_metric_ready": _int(
scorecard.get("latest_verify_only_metric_ready")
),
"latest_verify_only_metric_blocker_count": _int(
scorecard.get("latest_verify_only_metric_blocker_count")
),
"latest_verify_only_metric_max_host_uptime_seconds": _int(
scorecard.get("latest_verify_only_metric_max_host_uptime_seconds")
),
"stockplatform_freshness_status": str(
scorecard.get("stockplatform_freshness_status") or ""
),
"stockplatform_ingestion_status": str(
scorecard.get("stockplatform_ingestion_status") or ""
),
}
check_mode = {
"verify_only_available": True,
"verify_only_source": (
"scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py"
),
"post_apply_verifier_endpoint": (
"/api/v1/agents/reboot-auto-recovery-slo-scorecard"
),
"expected_after_real_fresh_boot_or_approved_drill": {
"status": "slo_ready",
"active_blocker_count": 0,
"latest_verify_only_metric_ready": 1,
"latest_verify_only_metric_blocker_count": 0,
"max_host_uptime_seconds_lte": target_seconds,
},
}
rollback_plan = {
"preflight_is_read_only": True,
"rollback_required_for_this_endpoint": False,
"if_separately_approved_drill_fails": [
"stop further reboot waves",
"run post-reboot readiness summary",
"keep startup recovery units as the recovery source of truth",
"rerun SLO verify-only and expose blockers without manual DB writes",
],
}
boundaries = {
**operation_boundaries,
"host_reboot_authorized_by_this_endpoint": False,
"host_reboot_performed": False,
"service_restart_performed": False,
"database_write_or_restore_performed": False,
"workflow_trigger_performed": False,
"secret_value_collection_allowed": False,
"github_api_used": False,
"runtime_write_allowed": False,
}
return {
"schema_version": _API_SCHEMA_VERSION,
"generated_at": str(scorecard.get("generated_at") or ""),
@@ -76,83 +155,53 @@ def _build_payload(scorecard: dict[str, Any]) -> dict[str, Any]:
"preflight_blocker_count": blocker_count,
"break_glass_authorization_required": True,
"execution_authorized_by_this_endpoint": False,
"safe_next_step": (
"collect_separate_reboot_drill_authorization_or_wait_for_next_"
"real_all_host_reboot_event_then_rerun_verify_only"
),
"target_selector": {
"scope": "awoooi_p0_reboot_slo_hosts",
"required_host_aliases": required_hosts,
"required_host_count": len(required_hosts),
"observed_host_count": _int(scorecard.get("observed_host_count")),
"missing_host_count": _int(scorecard.get("missing_host_count")),
"unreachable_host_count": _int(scorecard.get("unreachable_host_count")),
"stale_host_count": _int(scorecard.get("stale_host_count")),
"selector_source": "P0-006 committed reboot auto-recovery scorecard",
},
"safe_next_step": safe_next_step,
"target_selector": target_selector,
"preconditions": preconditions,
"current_readback": {
"scorecard_status": str(scorecard.get("status") or ""),
"readiness_percent": _int(scorecard.get("readiness_percent")),
"active_blocker_count": _int(scorecard.get("active_blocker_count")),
"active_blockers": active_blockers,
"service_green": scorecard.get("service_green") is True,
"product_data_green": scorecard.get("product_data_green") is True,
"backup_core_green": scorecard.get("backup_core_green") is True,
"post_start_blocked": _int(rollups.get("post_start_blocked")),
"latest_verify_only_metric_ready": _int(
scorecard.get("latest_verify_only_metric_ready")
),
"latest_verify_only_metric_blocker_count": _int(
scorecard.get("latest_verify_only_metric_blocker_count")
),
"latest_verify_only_metric_max_host_uptime_seconds": _int(
scorecard.get("latest_verify_only_metric_max_host_uptime_seconds")
),
"stockplatform_freshness_status": str(
scorecard.get("stockplatform_freshness_status") or ""
),
"stockplatform_ingestion_status": str(
scorecard.get("stockplatform_ingestion_status") or ""
),
"current_readback": current_readback,
"check_mode": check_mode,
"rollback_plan": rollback_plan,
"readback": {
"workplan_id": "P0-006",
"workplan_title": "reboot auto-recovery 10-minute SLO",
"source_scorecard_status": str(scorecard.get("status") or ""),
"preflight_ready": preflight_ready,
"break_glass_authorization_required": True,
"execution_authorized_by_this_endpoint": False,
"target_selector": target_selector,
"current_readback": current_readback,
"check_mode": check_mode,
"rollback_plan": rollback_plan,
"safe_next_step": safe_next_step,
},
"check_mode": {
"verify_only_available": True,
"verify_only_source": (
"scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py"
),
"post_apply_verifier_endpoint": (
"/api/v1/agents/reboot-auto-recovery-slo-scorecard"
),
"expected_after_real_fresh_boot_or_approved_drill": {
"status": "slo_ready",
"active_blocker_count": 0,
"latest_verify_only_metric_ready": 1,
"latest_verify_only_metric_blocker_count": 0,
"max_host_uptime_seconds_lte": target_seconds,
},
},
"rollback_plan": {
"preflight_is_read_only": True,
"rollback_required_for_this_endpoint": False,
"if_separately_approved_drill_fails": [
"stop further reboot waves",
"run post-reboot readiness summary",
"keep startup recovery units as the recovery source of truth",
"rerun SLO verify-only and expose blockers without manual DB writes",
"rollups": {
"preflight_ready": preflight_ready,
"preflight_ready_count": int(preflight_ready),
"preflight_blocker_count": blocker_count,
"target_required_host_count": len(required_hosts),
"target_observed_host_count": target_selector["observed_host_count"],
"target_missing_host_count": target_selector["missing_host_count"],
"target_unreachable_host_count": target_selector[
"unreachable_host_count"
],
},
"operation_boundaries": {
**operation_boundaries,
"target_stale_host_count": target_selector["stale_host_count"],
"service_green": preconditions["service_green"],
"product_data_green": preconditions["product_data_green"],
"backup_core_green": preconditions["backup_core_green"],
"stockplatform_freshness_ok": preconditions["stockplatform_freshness_ok"],
"stockplatform_ingestion_ok": preconditions["stockplatform_ingestion_ok"],
"blocked_only_by_fresh_reboot_window": preconditions[
"blocked_only_by_fresh_reboot_window"
],
"break_glass_authorization_required": True,
"execution_authorized_by_this_endpoint": False,
"host_reboot_authorized_by_this_endpoint": False,
"host_reboot_performed": False,
"service_restart_performed": False,
"database_write_or_restore_performed": False,
"workflow_trigger_performed": False,
"secret_value_collection_allowed": False,
"github_api_used": False,
"runtime_write_allowed": False,
},
"active_blockers": preflight_blockers,
"operation_boundaries": boundaries,
"forbidden_without_separate_break_glass": [
"host_reboot",
"node_drain",

View File

@@ -145,6 +145,7 @@ def _assert_drill_preflight_payload(payload: dict):
assert payload["status"] == "ready_for_break_glass_reboot_drill_authorization"
assert payload["preflight_ready"] is True
assert payload["preflight_blocker_count"] == 0
assert payload["active_blockers"] == []
assert payload["break_glass_authorization_required"] is True
assert payload["execution_authorized_by_this_endpoint"] is False
assert payload["safe_next_step"] == (
@@ -198,6 +199,20 @@ def _assert_drill_preflight_payload(payload: dict):
rollback = payload["rollback_plan"]
assert rollback["preflight_is_read_only"] is True
assert rollback["rollback_required_for_this_endpoint"] is False
readback = payload["readback"]
assert readback["workplan_id"] == "P0-006"
assert readback["preflight_ready"] is True
assert readback["target_selector"]["required_host_count"] == 4
assert readback["current_readback"]["active_blocker_count"] == 1
assert readback["safe_next_step"] == payload["safe_next_step"]
rollups = payload["rollups"]
assert rollups["preflight_ready"] is True
assert rollups["preflight_ready_count"] == 1
assert rollups["preflight_blocker_count"] == 0
assert rollups["target_required_host_count"] == 4
assert rollups["target_observed_host_count"] == 4
assert rollups["blocked_only_by_fresh_reboot_window"] is True
assert rollups["host_reboot_authorized_by_this_endpoint"] is False
boundaries = payload["operation_boundaries"]
assert boundaries["host_reboot_authorized_by_this_endpoint"] is False
assert boundaries["host_reboot_performed"] is False

View File

@@ -2595,6 +2595,42 @@ export interface RebootAutoRecoveryDrillPreflightSnapshot {
break_glass_authorization_required: boolean
execution_authorized_by_this_endpoint: boolean
safe_next_step: string
readback: {
workplan_id: 'P0-006'
workplan_title: string
source_scorecard_status: string
preflight_ready: boolean
break_glass_authorization_required: boolean
execution_authorized_by_this_endpoint: boolean
target_selector: RebootAutoRecoveryDrillPreflightSnapshot['target_selector']
current_readback: RebootAutoRecoveryDrillPreflightSnapshot['current_readback']
check_mode: RebootAutoRecoveryDrillPreflightSnapshot['check_mode']
rollback_plan: RebootAutoRecoveryDrillPreflightSnapshot['rollback_plan']
safe_next_step: string
}
rollups: {
preflight_ready: boolean
preflight_ready_count: number
preflight_blocker_count: number
target_required_host_count: number
target_observed_host_count: number
target_missing_host_count: number
target_unreachable_host_count: number
target_stale_host_count: number
service_green: boolean
product_data_green: boolean
backup_core_green: boolean
stockplatform_freshness_ok: boolean
stockplatform_ingestion_ok: boolean
blocked_only_by_fresh_reboot_window: boolean
break_glass_authorization_required: boolean
execution_authorized_by_this_endpoint: boolean
host_reboot_authorized_by_this_endpoint: boolean
workflow_trigger_performed: boolean
secret_value_collection_allowed: boolean
runtime_write_allowed: boolean
}
active_blockers: string[]
target_selector: {
scope: string
required_host_aliases: string[]

View File

@@ -1,3 +1,17 @@
## 2026-06-30 — 01:02 P0-006 drill preflight machine-readback shape
**照優先順序完成的實作**
- P0-005 credential escrow 與 P0-003 Gitea private inventory production readback 均已 closed主線回到 P0-006。
- Production `/api/v1/agents/reboot-auto-recovery-drill-preflight` 已有 target selector / preconditions / verifier / rollback / boundary top-level 欄位,但通用 readback 查詢 `readback``rollups``active_blockers` 會得到空值;本輪補成機器可讀相容 shape不改既有 top-level contract。
- 新增 `readback.workplan_id=P0-006``rollups.preflight_ready_count=1``rollups.host_reboot_authorized_by_this_endpoint=false``active_blockers=[]`,讓 Delivery / automation / handoff reader 可不靠特殊欄位解析 P0-006 drill preflight。
**驗證**
- Focused pytestP0-006 / Delivery Workbench / P0-004 runtime enablement / CD profile `40 passed`
- `py_compile`、Web typecheck、Gitea runner pressure guard、Gitea step env secret guard通過。
- 本地 loader 讀回 `status=ready_for_break_glass_reboot_drill_authorization``readback_workplan_id=P0-006``rollups_preflight_ready_count=1``active_blockers=[]`
**邊界**:未重啟主機,未 restart service未 workflow_dispatch未操作 host / Docker / K8s / DB / firewall未使用 GitHub / `gh` / GitHub API未讀 secret / token / raw sessions / SQLite / `.env`
## 2026-06-30 — 00:41 P0-004 template copy receipt runtime-image readback 修正
**照優先順序完成的實作**