From 62a4702dd8a42750d48054dff14b29ca6debd7a9 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 30 Jun 2026 21:14:32 +0800 Subject: [PATCH] fix(api): keep priority readback fail closed --- .../awoooi_priority_work_order_readback.py | 226 +++++++++++++----- ...awoooi_priority_work_order_readback_api.py | 36 ++- 2 files changed, 190 insertions(+), 72 deletions(-) diff --git a/apps/api/src/services/awoooi_priority_work_order_readback.py b/apps/api/src/services/awoooi_priority_work_order_readback.py index cb0fb6d5..f1650ee7 100644 --- a/apps/api/src/services/awoooi_priority_work_order_readback.py +++ b/apps/api/src/services/awoooi_priority_work_order_readback.py @@ -907,6 +907,31 @@ def _enrich_from_current_readbacks(payload: dict[str, Any]) -> None: evidence["production_deploy_governance_fields_present"] = closure_percent == 100 evidence["latest_cd_run_status"] = "Success" + stock_freshness_status = str( + workbench_rollups.get("current_p0_stockplatform_freshness_status") or "" + ) + stock_ingestion_status = str( + workbench_rollups.get("current_p0_stockplatform_ingestion_status") or "" + ) + stock_blockers = _stockplatform_status_blockers( + freshness_status=stock_freshness_status, + ingestion_status=stock_ingestion_status, + ) + stockplatform_green = not stock_blockers + service_green = workbench_rollups.get("current_p0_service_green") is True + product_data_green = ( + workbench_rollups.get("current_p0_product_data_green") is True + ) + backup_core_green = workbench_rollups.get("current_p0_backup_core_green") is True + drill_preflight_ready = reboot_preflight_rollups.get("preflight_ready") is True + base_readback_green = ( + service_green + and product_data_green + and backup_core_green + and stockplatform_green + and drill_preflight_ready + ) + for item in _list(payload.get("in_progress_or_blocked_in_priority_order")): workplan = _dict(item) if workplan.get("workplan_id") != "P0-006": @@ -914,28 +939,24 @@ def _enrich_from_current_readbacks(payload: dict[str, Any]) -> None: evidence = _dict(workplan.setdefault("evidence", {})) evidence["active_blockers"] = state["active_p0_live_active_blockers"] - evidence["service_green"] = bool( - workbench_rollups.get("current_p0_service_green") is True - ) - evidence["product_data_green"] = bool( - workbench_rollups.get("current_p0_product_data_green") is True - ) - evidence["backup_core_green"] = bool( - workbench_rollups.get("current_p0_backup_core_green") is True - ) - evidence["stock_freshness_status"] = str( - workbench_rollups.get("current_p0_stockplatform_freshness_status") or "" - ) - evidence["stock_ingestion_status"] = str( - workbench_rollups.get("current_p0_stockplatform_ingestion_status") or "" - ) - evidence["stock_blockers"] = [] - evidence["stock_ingestion_blockers"] = [] + evidence["service_green"] = service_green + evidence["product_data_green"] = product_data_green + evidence["backup_core_green"] = backup_core_green + evidence["stock_freshness_status"] = stock_freshness_status + evidence["stock_ingestion_status"] = stock_ingestion_status + evidence["stock_blockers"] = [ + blocker + for blocker in stock_blockers + if blocker.startswith("stockplatform_freshness_") + ] + evidence["stock_ingestion_blockers"] = [ + blocker + for blocker in stock_blockers + if blocker.startswith("stockplatform_ingestion_") + ] evidence["stockplatform_controlled_recovery_gate_required"] = False evidence["drill_preflight_status"] = str(reboot_preflight.get("status") or "") - evidence["drill_preflight_ready"] = ( - reboot_preflight_rollups.get("preflight_ready") is True - ) + evidence["drill_preflight_ready"] = drill_preflight_ready evidence["drill_preflight_blocker_count"] = _int( reboot_preflight_rollups.get("preflight_blocker_count") ) @@ -964,31 +985,57 @@ def _enrich_from_current_readbacks(payload: dict[str, Any]) -> None: ) professional_fix = _dict(workplan.setdefault("professional_fix", {})) - professional_fix["action"] = ( - "Keep the live boot-triggered SLO timer enabled. StockPlatform " - "freshness/ingestion are ok; the only active blocker is the fresh " - "all-host reboot observation window. Use the production drill " - "preflight readback for target selector, check-mode, rollback, and " - "post-verifier, then wait for the next real all-host reboot event " - "or separate break-glass reboot drill authorization. Do not reboot, " - "restart services, write DB rows, trigger workflows, or read secrets " - "from this lane." - ) professional_fix["owner"] = ( "reboot auto-recovery lane plus drill preflight readback" ) + + if base_readback_green: + professional_fix["action"] = ( + "Keep the live boot-triggered SLO timer enabled. StockPlatform " + "freshness/ingestion are ok; the only active blocker is the fresh " + "all-host reboot observation window. Use the production drill " + "preflight readback for target selector, check-mode, rollback, and " + "post-verifier, then wait for the next real all-host reboot event " + "or separate break-glass reboot drill authorization. Do not reboot, " + "restart services, write DB rows, trigger workflows, or read secrets " + "from this lane." + ) + workplan["reason"] = ( + "Current readback shows service, product data, backup, and " + "StockPlatform freshness/ingestion are green. P0-006 remains event " + "gated only because the 10-minute recovery SLO needs a fresh " + "all-host reboot observation or a separately authorized reboot " + "drill." + ) + workplan["safe_next_step"] = ( + "keep_timer_live_wait_for_next_all_host_reboot_event_or_separately_" + "approved_reboot_drill_to_prove_10_minute_slo" + ) + workplan["status"] = "blocked_waiting_fresh_all_host_reboot_window" + continue + + professional_fix["action"] = ( + "Keep the live boot-triggered SLO timer enabled, but do not claim " + "reboot-window-only closure while service/data/backup/StockPlatform " + "readback is not green. First run the controlled source freshness and " + "Postgres contract readback, then rerun public health, freshness, " + "ingestion, backup, and drill preflight verifiers. Do not reboot, " + "restart services, write DB rows, fake freshness, trigger workflows, " + "or read secrets from this lane." + ) workplan["reason"] = ( - "Current readback shows service, product data, backup, and " - "StockPlatform freshness/ingestion are green. P0-006 remains event " - "gated only because the 10-minute recovery SLO needs a fresh " - "all-host reboot observation or a separately authorized reboot " - "drill." + "Current P0-006 readback is not green: service_green=" + f"{service_green}, product_data_green={product_data_green}, " + f"backup_core_green={backup_core_green}, " + f"stock_freshness_status={stock_freshness_status}, " + f"stock_ingestion_status={stock_ingestion_status}, " + f"drill_preflight_ready={drill_preflight_ready}." ) workplan["safe_next_step"] = ( - "keep_timer_live_wait_for_next_all_host_reboot_event_or_separately_" - "approved_reboot_drill_to_prove_10_minute_slo" + "resolve_stockplatform_source_freshness_and_reboot_slo_readback_" + "blockers_then_rerun_priority_work_order" ) - workplan["status"] = "blocked_waiting_fresh_all_host_reboot_window" + workplan["status"] = "blocked_p0_006_readback_not_green" p0_004_ready = ( state["p0_004_template_copy_apply_gate_runtime_readback_state"] == "ready" @@ -1002,30 +1049,68 @@ def _enrich_from_current_readbacks(payload: dict[str, Any]) -> None: if p0_004_ready and p0_006_event_gated else "mainline_readback_requires_attention" ) - payload["next_execution_order"] = [ - ( - "P0-006: service/data/backup/StockPlatform readback is green, but " - "the 10-minute reboot SLO cannot be claimed until a fresh all-host " - "reboot event or separately approved reboot drill; keep timer live " - "and do not reboot/restart/DB-write from this lane." - ), - ( - "P0-006-REBOOT-DRILL-PREFLIGHT-READBACK: production endpoint is 200 " - "and preflight is ready for separate break-glass reboot drill " - "authorization; do not reboot from this lane without that explicit " - "drill authorization." - ), - ( - "P0-004-TEMPLATE-COPY-APPLY-GATE-READBACK: production apply gate, " - "template-copy receipt, and runtime enablement readbacks are ready; " - "keep closed unless production readback regresses." - ), - ( - "NEXT: keep this priority-order API as the source of truth before " - "opening the next blocker-free mainline item; stale snapshots, old " - "failed CD runs, and retired GitHub lanes must not reorder closed work." - ), - ] + if p0_006_event_gated: + payload["next_execution_order"] = [ + ( + "P0-006: service/data/backup/StockPlatform readback is green, but " + "the 10-minute reboot SLO cannot be claimed until a fresh all-host " + "reboot event or separately approved reboot drill; keep timer live " + "and do not reboot/restart/DB-write from this lane." + ), + ( + "P0-006-REBOOT-DRILL-PREFLIGHT-READBACK: production endpoint is 200 " + "and preflight is ready for separate break-glass reboot drill " + "authorization; do not reboot from this lane without that explicit " + "drill authorization." + ), + ( + "P0-004-TEMPLATE-COPY-APPLY-GATE-READBACK: production apply gate, " + "template-copy receipt, and runtime enablement readbacks are ready; " + "keep closed unless production readback regresses." + ), + ( + "NEXT: keep this priority-order API as the source of truth before " + "opening the next blocker-free mainline item; stale snapshots, old " + "failed CD runs, and retired GitHub lanes must not reorder closed work." + ), + ] + else: + state["active_p0_state"] = "blocked_p0_006_readback_not_green" + state["next_executable_mainline_workplan_id"] = ( + "P0-006-STOCKPLATFORM-DATA-DEPENDENCY-READBACK" + if not stockplatform_green + else "P0-006-REBOOT-SLO-READBACK-RECOVERY" + ) + state["next_executable_mainline_state"] = ( + "controlled_stockplatform_source_freshness_and_postgres_contract_" + "readback_required" + if not stockplatform_green + else "controlled_reboot_slo_readback_recovery_required" + ) + payload["next_execution_order"] = [ + ( + "P0-006: service/data/backup/StockPlatform readback is not green; " + "resolve controlled source freshness, Postgres contract, backup, " + "and reboot SLO readback blockers before claiming reboot-window-only " + "closure. Do not reboot/restart/DB-write/fake freshness from this lane." + ), + ( + "P0-006-STOCKPLATFORM-DATA-DEPENDENCY-READBACK: StockPlatform " + f"freshness={stock_freshness_status} and " + f"ingestion={stock_ingestion_status}; run read-only source " + "freshness/Postgres contract readback and controlled verifier." + ), + ( + "P0-006-REBOOT-DRILL-PREFLIGHT-READBACK: keep preflight as " + "evidence only while base service/data/backup readback is not green; " + "do not reboot without separate break-glass drill authorization." + ), + ( + "NEXT: keep this priority-order API as the source of truth before " + "opening the next blocker-free mainline item; stale snapshots, old " + "failed CD runs, and retired GitHub lanes must not reorder closed work." + ), + ] _set_rollups_and_summary( payload=payload, current_head=current_head, @@ -1249,6 +1334,23 @@ def _unique_strings(values: list[str]) -> list[str]: return unique +def _stockplatform_status_blockers( + *, + freshness_status: str, + ingestion_status: str, +) -> list[str]: + blockers: list[str] = [] + if freshness_status != "ok": + blockers.append( + f"stockplatform_freshness_{freshness_status or 'unknown'}" + ) + if ingestion_status != "ok": + blockers.append( + f"stockplatform_ingestion_{ingestion_status or 'unknown'}" + ) + return blockers + + def _is_sha(value: str) -> bool: return bool(_SHA_RE.fullmatch(value)) diff --git a/apps/api/tests/test_awoooi_priority_work_order_readback_api.py b/apps/api/tests/test_awoooi_priority_work_order_readback_api.py index d377b19e..1868d9c4 100644 --- a/apps/api/tests/test_awoooi_priority_work_order_readback_api.py +++ b/apps/api/tests/test_awoooi_priority_work_order_readback_api.py @@ -43,21 +43,33 @@ def test_awoooi_priority_work_order_readback_loader_returns_mainline_order(): assert payload["next_execution_order"][0].startswith("P0-006:") in_progress = payload["in_progress_or_blocked_in_priority_order"][0] assert in_progress["workplan_id"] == "P0-006" - assert in_progress["evidence"]["stock_freshness_status"] == "ok" - assert in_progress["evidence"]["stock_ingestion_status"] == "ok" - assert in_progress["evidence"]["stock_blockers"] == [] - assert in_progress["evidence"]["drill_preflight_ready"] is True - assert in_progress["evidence"]["drill_preflight_blocker_count"] == 0 + assert payload["mainline_execution_state"]["active_p0_state"] == ( + "blocked_p0_006_readback_not_green" + ) + assert payload["mainline_execution_state"]["next_executable_mainline_workplan_id"] == ( + "P0-006-STOCKPLATFORM-DATA-DEPENDENCY-READBACK" + ) + assert in_progress["status"] == "blocked_p0_006_readback_not_green" + assert in_progress["evidence"]["stock_freshness_status"] == "not_configured" + assert in_progress["evidence"]["stock_ingestion_status"] == "not_configured" + assert in_progress["evidence"]["stock_blockers"] == [ + "stockplatform_freshness_not_configured" + ] + assert in_progress["evidence"]["stock_ingestion_blockers"] == [ + "stockplatform_ingestion_not_configured" + ] + assert in_progress["evidence"]["drill_preflight_ready"] is False + assert in_progress["evidence"]["drill_preflight_blocker_count"] == 9 assert ( in_progress["evidence"][ "drill_preflight_execution_authorized_by_this_endpoint" ] is False ) - assert "StockPlatform freshness/ingestion are ok" in in_progress[ + assert "service/data/backup/StockPlatform readback is not green" in in_progress[ "professional_fix" ]["action"] - assert "current service recovery blocker" not in in_progress[ + assert "Do not reboot" in in_progress[ "professional_fix" ]["action"] assert payload["operation_boundaries"]["github_api_used"] is False @@ -88,13 +100,17 @@ def test_awoooi_priority_work_order_readback_endpoint_returns_snapshot( assert response.status_code == 200 data = response.json() - assert data["status"] == "p0_006_event_gated_all_immediate_apply_gaps_closed" + assert data["status"] == "mainline_readback_requires_attention" assert data["mainline_execution_state"]["active_p0_workplan_id"] == "P0-006" assert data["mainline_execution_state"]["p0_004_template_copy_apply_gate_runtime_readback_state"] == "ready" - assert data["mainline_execution_state"]["reboot_drill_preflight_runtime_readback_state"] == "ready" + assert data["mainline_execution_state"]["active_p0_state"] == ( + "blocked_p0_006_readback_not_green" + ) + assert data["mainline_execution_state"]["reboot_drill_preflight_runtime_readback_state"] == "blocked" assert data["rollups"]["stockplatform_public_api_runtime_ready"] is True + assert data["rollups"]["active_p0_event_gated_by_fresh_reboot_window_only"] is False assert data["next_execution_order"][0].startswith("P0-006:") - assert "do not reboot" in data["next_execution_order"][0] + assert "readback is not green" in data["next_execution_order"][0] def test_awoooi_priority_work_order_readback_overlays_harbor_deploy_blocker(