feat(api): expose reboot SLO readback rollups
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 22s
CD Pipeline / build-and-deploy (push) Successful in 4m10s
CD Pipeline / post-deploy-checks (push) Successful in 1m1s

This commit is contained in:
Your Name
2026-06-29 21:46:34 +08:00
parent 7d9df2bf30
commit f426522b59
3 changed files with 131 additions and 37 deletions

View File

@@ -66,6 +66,68 @@ def _build_payload(scorecard: dict[str, Any], path: Path) -> dict[str, Any]:
can_claim_slo = (
scorecard.get("can_claim_all_services_recovered_within_target") is True
)
latest_verify_metric = _dict(scorecard.get("latest_verify_only_metric"))
active_blocker_count = len(active_blockers)
observed_host_count = len(_strings(host_boot_detection.get("observed_hosts")))
missing_host_count = len(_strings(host_boot_detection.get("missing_hosts")))
unreachable_host_count = len(_strings(host_boot_detection.get("unreachable_hosts")))
stale_host_count = len(_strings(host_boot_detection.get("stale_hosts")))
service_green = post_reboot_readiness.get("service_green") is True
product_data_green = post_reboot_readiness.get("product_data_green") is True
backup_core_green = post_reboot_readiness.get("backup_core_green") is True
host_188_service_green = post_reboot_readiness.get("host_188_service_green") is True
blocked_by_fresh_reboot_window_only = active_blockers == [
"host_boot_observation_older_than_target_window"
]
latest_verify_only_metric_present = bool(latest_verify_metric)
rollups = {
"active_blocker_count": active_blocker_count,
"readiness_percent": readiness_percent,
"completed_check_count": completed_check_count,
"required_check_count": len(required_checks),
"can_claim_all_services_recovered_within_target": can_claim_slo,
"observed_host_count": observed_host_count,
"missing_host_count": missing_host_count,
"unreachable_host_count": unreachable_host_count,
"stale_host_count": stale_host_count,
"post_start_blocked": _int(post_reboot_readiness.get("post_start_blocked")),
"service_green": service_green,
"product_data_green": product_data_green,
"backup_core_green": backup_core_green,
"host_188_service_green": host_188_service_green,
"blocked_by_fresh_reboot_window_only": blocked_by_fresh_reboot_window_only,
"latest_verify_only_metric_present": latest_verify_only_metric_present,
"latest_verify_only_metric_ready": _int(latest_verify_metric.get("ready")),
"latest_verify_only_metric_blocker_count": _int(
latest_verify_metric.get("blocker_count")
),
"latest_verify_only_metric_max_host_uptime_seconds": _int(
latest_verify_metric.get("max_host_uptime_seconds")
),
"latest_verify_only_metric_last_run_timestamp": _int(
latest_verify_metric.get("last_run_timestamp")
),
"stockplatform_freshness_status": str(
stockplatform.get("freshness_status") or "unknown"
),
"stockplatform_ingestion_status": str(
stockplatform.get("ingestion_status") or "unknown"
),
"stockplatform_freshness_blocker_count": len(
_strings(stockplatform.get("freshness_blockers"))
),
"stockplatform_ingestion_blocker_count": len(
_strings(stockplatform.get("ingestion_blockers"))
),
"stockplatform_final_retry_window_passed": _dict(
stockplatform.get("eod_window")
).get("final_retry_window_passed")
is True,
"stockplatform_controlled_recovery_gate_required": recovery_gate.get(
"required"
)
is True,
}
return {
"schema_version": _API_SCHEMA_VERSION,
"generated_at": str(scorecard.get("generated_at") or ""),
@@ -74,55 +136,47 @@ def _build_payload(scorecard: dict[str, Any], path: Path) -> dict[str, Any]:
"status": str(scorecard.get("status") or "unknown"),
"safe_next_step": safe_next_step,
"can_claim_all_services_recovered_within_target": can_claim_slo,
"active_blocker_count": active_blocker_count,
"readiness_percent": readiness_percent,
"service_green": service_green,
"product_data_green": product_data_green,
"backup_core_green": backup_core_green,
"host_188_service_green": host_188_service_green,
"observed_host_count": observed_host_count,
"missing_host_count": missing_host_count,
"unreachable_host_count": unreachable_host_count,
"stale_host_count": stale_host_count,
"blocked_by_fresh_reboot_window_only": blocked_by_fresh_reboot_window_only,
"latest_verify_only_metric_present": latest_verify_only_metric_present,
"latest_verify_only_metric_ready": rollups["latest_verify_only_metric_ready"],
"latest_verify_only_metric_blocker_count": rollups[
"latest_verify_only_metric_blocker_count"
],
"latest_verify_only_metric_max_host_uptime_seconds": rollups[
"latest_verify_only_metric_max_host_uptime_seconds"
],
"latest_verify_only_metric_last_run_timestamp": rollups[
"latest_verify_only_metric_last_run_timestamp"
],
"stockplatform_freshness_status": rollups["stockplatform_freshness_status"],
"stockplatform_ingestion_status": rollups["stockplatform_ingestion_status"],
"readback": {
"workplan_id": "P0-006",
"workplan_title": "主機重啟自動偵測、自動觸發與 10 分鐘恢復 SLO",
"source_scorecard_ref": f"docs/operations/{path.name}",
"target_minutes": _int(scorecard.get("target_minutes")),
"safe_next_step": safe_next_step,
"active_blocker_count": active_blocker_count,
"readiness_percent": readiness_percent,
"blocked_by_fresh_reboot_window_only": blocked_by_fresh_reboot_window_only,
"latest_verify_only_metric_present": latest_verify_only_metric_present,
},
"host_boot_detection": host_boot_detection,
"post_reboot_readiness": post_reboot_readiness,
"stockplatform_data_freshness": stockplatform,
"active_blockers": active_blockers,
"required_checks": required_checks,
"rollups": {
"active_blocker_count": len(active_blockers),
"readiness_percent": readiness_percent,
"completed_check_count": completed_check_count,
"required_check_count": len(required_checks),
"can_claim_all_services_recovered_within_target": can_claim_slo,
"observed_host_count": len(_strings(host_boot_detection.get("observed_hosts"))),
"missing_host_count": len(_strings(host_boot_detection.get("missing_hosts"))),
"unreachable_host_count": len(
_strings(host_boot_detection.get("unreachable_hosts"))
),
"stale_host_count": len(_strings(host_boot_detection.get("stale_hosts"))),
"post_start_blocked": _int(post_reboot_readiness.get("post_start_blocked")),
"service_green": post_reboot_readiness.get("service_green") is True,
"product_data_green": post_reboot_readiness.get("product_data_green") is True,
"backup_core_green": post_reboot_readiness.get("backup_core_green") is True,
"stockplatform_freshness_status": str(
stockplatform.get("freshness_status") or "unknown"
),
"stockplatform_ingestion_status": str(
stockplatform.get("ingestion_status") or "unknown"
),
"stockplatform_freshness_blocker_count": len(
_strings(stockplatform.get("freshness_blockers"))
),
"stockplatform_ingestion_blocker_count": len(
_strings(stockplatform.get("ingestion_blockers"))
),
"stockplatform_final_retry_window_passed": _dict(
stockplatform.get("eod_window")
).get("final_retry_window_passed")
is True,
"stockplatform_controlled_recovery_gate_required": recovery_gate.get(
"required"
)
is True,
},
"rollups": rollups,
"operation_boundaries": {
"read_only_api_allowed": True,
"host_reboot_performed": False,

View File

@@ -35,12 +35,34 @@ def _assert_reboot_slo_payload(payload: dict):
"event_or_approved_reboot_drill_to_prove_10_minute_slo"
)
assert payload["can_claim_all_services_recovered_within_target"] is False
assert payload["active_blocker_count"] == 1
assert payload["readiness_percent"] == 82
assert payload["service_green"] is True
assert payload["product_data_green"] is True
assert payload["backup_core_green"] is True
assert payload["host_188_service_green"] is True
assert payload["observed_host_count"] == 4
assert payload["missing_host_count"] == 0
assert payload["unreachable_host_count"] == 0
assert payload["stale_host_count"] == 4
assert payload["blocked_by_fresh_reboot_window_only"] is True
assert payload["latest_verify_only_metric_present"] is True
assert payload["latest_verify_only_metric_ready"] == 0
assert payload["latest_verify_only_metric_blocker_count"] == 1
assert payload["latest_verify_only_metric_max_host_uptime_seconds"] == 541538
assert payload["latest_verify_only_metric_last_run_timestamp"] > 0
assert payload["stockplatform_freshness_status"] == "ok"
assert payload["stockplatform_ingestion_status"] == "ok"
assert payload["readback"]["workplan_id"] == "P0-006"
assert payload["readback"]["target_minutes"] == 10
assert payload["readback"]["safe_next_step"] == (
"timer_and_service_data_readback_green_wait_for_next_all_host_reboot_"
"event_or_approved_reboot_drill_to_prove_10_minute_slo"
)
assert payload["readback"]["active_blocker_count"] == 1
assert payload["readback"]["readiness_percent"] == 82
assert payload["readback"]["blocked_by_fresh_reboot_window_only"] is True
assert payload["readback"]["latest_verify_only_metric_present"] is True
assert payload["rollups"]["active_blocker_count"] == 1
assert payload["rollups"]["readiness_percent"] == 82
assert payload["rollups"]["observed_host_count"] == 4
@@ -50,6 +72,11 @@ def _assert_reboot_slo_payload(payload: dict):
assert payload["rollups"]["service_green"] is True
assert payload["rollups"]["product_data_green"] is True
assert payload["rollups"]["backup_core_green"] is True
assert payload["rollups"]["host_188_service_green"] is True
assert payload["rollups"]["blocked_by_fresh_reboot_window_only"] is True
assert payload["rollups"]["latest_verify_only_metric_present"] is True
assert payload["rollups"]["latest_verify_only_metric_ready"] == 0
assert payload["rollups"]["latest_verify_only_metric_blocker_count"] == 1
assert payload["rollups"]["stockplatform_freshness_status"] == "ok"
assert payload["rollups"]["stockplatform_ingestion_status"] == "ok"
assert payload["rollups"]["stockplatform_freshness_blocker_count"] == 0

View File

@@ -1,3 +1,16 @@
## 2026-06-29 — 21:43 P0-006 reboot SLO API readback promoted
**照優先順序完成的實作**
- P0-005 credential escrow evidence refs 已由 production API / Delivery Workbench 讀回 closed最新 Gitea CD #3968 / #3969 皆 success。
- P0-006 仍是 active P0唯一 blocker 維持 `host_boot_observation_older_than_target_window`;未從此 lane 重啟主機或 restart service。
- 將 P0-006 `/api/v1/agents/reboot-auto-recovery-slo-scorecard` 的主線判斷值提升成 top-level readback`active_blocker_count``readiness_percent`、service/data/backup green、observed/stale hosts、verify-only metric 與 StockPlatform freshness / ingestion 狀態。
**驗證**
- Focused pytestP0-006 scorecard / Delivery Workbench / CD profile `24 passed`
- `ruff check``py_compile``git diff --check`、Gitea runner pressure guard、Gitea secret env guard通過。
**邊界**:未操作 host / Docker / K8s / DB / firewall / Wazuh runtime未觸發 workflow_dispatch未使用 GitHub / `gh` / GitHub API未讀 secret / token / raw sessions / SQLite / `.env`
## 2026-06-29 — 21:25 CD profile fix for log writeback readbacks
**照優先順序處理**