feat(ops): carry stock readbacks into reboot slo
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 24s
CD Pipeline / build-and-deploy (push) Successful in 4m33s
CD Pipeline / post-deploy-checks (push) Successful in 56s

This commit is contained in:
Your Name
2026-06-30 00:00:28 +08:00
parent 29e32a07a8
commit c32fab9cb4
8 changed files with 203 additions and 17 deletions

View File

@@ -192,7 +192,7 @@ def test_delivery_closure_workbench_exposes_p0_006_reboot_slo_lane():
assert lane["metric"]["stockplatform_ingestion_status"] == "ok"
assert lane["metric"]["stockplatform_freshness_blocker_count"] == 0
assert lane["metric"]["stockplatform_ingestion_blocker_count"] == 0
assert lane["metric"]["stockplatform_final_retry_window_passed"] is False
assert lane["metric"]["stockplatform_final_retry_window_passed"] is True
assert lane["metric"]["stockplatform_controlled_recovery_gate_required"] is False
assert lane["metric"]["host_reboot_performed"] is False
assert lane["metric"]["service_restart_performed"] is False
@@ -245,7 +245,7 @@ def _assert_delivery_workbench_shape(data: dict):
data["summary"][
"reboot_auto_recovery_stockplatform_final_retry_window_passed"
]
is False
is True
)
assert (
data["summary"][

View File

@@ -101,7 +101,7 @@ def _assert_reboot_slo_payload(payload: dict):
assert payload["rollups"]["stockplatform_ingestion_status"] == "ok"
assert payload["rollups"]["stockplatform_freshness_blocker_count"] == 0
assert payload["rollups"]["stockplatform_ingestion_blocker_count"] == 0
assert payload["rollups"]["stockplatform_final_retry_window_passed"] is False
assert payload["rollups"]["stockplatform_final_retry_window_passed"] is True
assert (
payload["rollups"]["stockplatform_controlled_recovery_gate_required"]
is False
@@ -114,12 +114,16 @@ def _assert_reboot_slo_payload(payload: dict):
assert stockplatform["ingestion_blockers"] == []
assert stockplatform["margin_short_recovery"]["status"] == "recovered"
assert stockplatform["margin_short_recovery"]["successful_source_run_ids"] == [
3390,
3389,
3390,
]
assert stockplatform["ai_recommendations_recovery"]["status"] == "recovered"
assert stockplatform["eod_window"]["final_retry_window_passed"] is False
assert stockplatform["eod_window"]["final_retry_window_passed"] is True
assert stockplatform["controlled_recovery_gate"]["required"] is False
assert (
stockplatform["controlled_recovery_gate"]["status"]
== "not_required_freshness_recovered"
)
assert "manual_db_update" in stockplatform["controlled_recovery_gate"][
"forbidden_actions"
]

View File

@@ -1,3 +1,17 @@
## 2026-06-29 — 23:45 P0-006 final retry window readback source closure
**照優先順序完成的實作**
- P0-006 仍是 active P0StockPlatform final retry window 已過freshness / ingestion live readback 仍為 `ok`,因此 `stockplatform_final_retry_window_passed=true` 且 controlled data recovery gate 維持 `not_required_freshness_recovered`
- `scripts/reboot-recovery/reboot-auto-recovery-slo-exporter.sh` 現在會把 public StockPlatform freshness / ingestion JSON 存成 artifact 並傳給 scorecard不再只靠 post-start summary 推導。
- `scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py` 在 freshness 已恢復時明確輸出 `margin_short_recovery``ai_recommendations_recovery``controlled_recovery_gate.status=not_required_freshness_recovered`
- 更新 `docs/operations/awoooi-reboot-auto-recovery-slo-scorecard.snapshot.json`production API / Delivery Workbench 發佈後會讀到 23:45 final-window truth唯一剩餘 blocker 仍是 `host_boot_observation_older_than_target_window`
**驗證**
- Focused pytestP0-006 scorecard / exporter contract / API readback `14 passed`Delivery Workbench / P0-006 API / CD profile `24 passed`
- `bash -n``py_compile`、JSON parse、Gitea runner pressure guard、Gitea secret env guard、`git diff --check`:通過。
**邊界**:未重啟主機,未 restart Docker / Nginx / K3s / DB / service未寫 StockPlatform DB未 workflow_dispatch未使用 GitHub / `gh` / GitHub API未讀 secret / token / raw sessions / SQLite / `.env`
## 2026-06-29 — 23:44 主線 priority readback 收斂P0 event-gatedP1 production verified
**照優先順序讀回的事實**

View File

@@ -8,7 +8,7 @@
"free_gib": 4.454,
"min_free_gib": 2.0
},
"generated_at": "2026-06-29T21:15:30+08:00",
"generated_at": "2026-06-29T23:45:54+08:00",
"host_boot_detection": {
"host_rows": [
{
@@ -126,9 +126,9 @@
"target_selector": "stockplatform-v2:system_freshness:core.margin_short_daily,ai.recommendations"
},
"eod_window": {
"classification": "recovered_after_21_05_retry_window",
"classification": "recovered_after_final_retry_window",
"final_retry_window_end_local": "23:35",
"final_retry_window_passed": false,
"final_retry_window_passed": true,
"first_full_window_end_local": "19:15",
"next_action": "rerun_slo_verify_only_after_next_fresh_all_host_reboot_event_or_approved_reboot_drill",
"pending": false
@@ -141,7 +141,23 @@
"ingestion_status": "ok",
"latest_source_runs": [
{
"source_run_id": 3390,
"source_run_id": 3392,
"source_name": "intelligence_security_linker",
"target_date": null,
"status": "succeeded",
"started_at": "2026-06-29T15:00:09.333642Z",
"finished_at": "2026-06-29T15:00:09.333642Z"
},
{
"source_run_id": 3391,
"source_name": "intelligence_reports_import",
"target_date": null,
"status": "succeeded",
"started_at": "2026-06-29T15:00:08.472808Z",
"finished_at": "2026-06-29T15:00:08.472808Z"
},
{
"source_run_id": 3389,
"source_name": "official_margin_short_daily",
"target_date": "2026-06-29",
"status": "succeeded",
@@ -149,7 +165,7 @@
"finished_at": "2026-06-29T13:05:13.341357Z"
},
{
"source_run_id": 3389,
"source_run_id": 3390,
"source_name": "official_margin_short_daily",
"target_date": "2026-06-29",
"status": "succeeded",
@@ -171,6 +187,22 @@
"status": "succeeded",
"started_at": "2026-06-29T13:00:07.822700Z",
"finished_at": "2026-06-29T13:00:07.822700Z"
},
{
"source_run_id": 3385,
"source_name": "official_margin_short_daily",
"target_date": "2026-06-29",
"status": "official_pending",
"started_at": "2026-06-29T12:05:16.716460Z",
"finished_at": "2026-06-29T12:05:16.716460Z"
},
{
"source_run_id": 3386,
"source_name": "official_margin_short_daily",
"target_date": "2026-06-29",
"status": "official_pending",
"started_at": "2026-06-29T12:05:16.716460Z",
"finished_at": "2026-06-29T12:05:16.716460Z"
}
],
"latest_trading_date": "2026-06-29",
@@ -181,14 +213,15 @@
"cleared_blocker": "core_margin_short_daily_missing",
"cleared_ingestion_blocker": "core.margin_short_daily_incomplete",
"successful_source_run_ids": [
3390,
3389
3389,
3390
]
},
"ai_recommendations_recovery": {
"status": "recovered",
"cleared_blocker": "ai_recommendations_stale"
}
},
"final_retry_checked_at": "2026-06-29T23:45:54+08:00"
},
"target_minutes": 10,
"target_seconds": 600,

View File

@@ -13,6 +13,9 @@ OUTPUT_NAME="${OUTPUT_NAME:-reboot_auto_recovery_slo.prom}"
TARGET_MINUTES="${TARGET_MINUTES:-10}"
MIN_FREE_GIB="${MIN_FREE_GIB:-2}"
LOCK_FILE="${LOCK_FILE:-${LOG_DIR}/reboot_auto_recovery_slo.lock}"
STOCK_FRESHNESS_URL="${STOCK_FRESHNESS_URL:-https://stock.wooo.work/api/v1/system/freshness}"
STOCK_INGESTION_URL="${STOCK_INGESTION_URL:-https://stock.wooo.work/api/v1/system/ingestion}"
STOCK_READBACK_TIMEOUT_SECONDS="${STOCK_READBACK_TIMEOUT_SECONDS:-10}"
mkdir -p "$TEXTFILE_DIR" "$LOG_DIR"
@@ -28,18 +31,39 @@ mkdir -p "$artifact_dir"
host_probe="$artifact_dir/host-probe.txt"
summary_file="$artifact_dir/summary.txt"
scorecard_file="$artifact_dir/scorecard.json"
stock_freshness_file="$artifact_dir/stock-freshness.json"
stock_ingestion_file="$artifact_dir/stock-ingestion.json"
bash "$ROOT_DIR/scripts/reboot-recovery/reboot-auto-recovery-host-probe.sh" >"$host_probe" 2>&1 || true
ARTIFACT_DIR="$artifact_dir/post-reboot-readiness" \
bash "$ROOT_DIR/scripts/reboot-recovery/post-reboot-readiness-summary.sh" --no-color >"$summary_file" 2>&1 || true
python3 "$ROOT_DIR/scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py" \
if command -v curl >/dev/null 2>&1; then
curl -fsS --max-time "$STOCK_READBACK_TIMEOUT_SECONDS" \
"$STOCK_FRESHNESS_URL" >"$stock_freshness_file" 2>"$artifact_dir/stock-freshness.err" \
|| rm -f "$stock_freshness_file"
curl -fsS --max-time "$STOCK_READBACK_TIMEOUT_SECONDS" \
"$STOCK_INGESTION_URL" >"$stock_ingestion_file" 2>"$artifact_dir/stock-ingestion.err" \
|| rm -f "$stock_ingestion_file"
fi
scorecard_args=(
"$ROOT_DIR/scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py"
--summary-file "$summary_file" \
--host-probe-file "$host_probe" \
--target-minutes "$TARGET_MINUTES" \
--min-free-gib "$MIN_FREE_GIB" \
--disk-path / \
--output "$scorecard_file" || true
--output "$scorecard_file"
)
if [ -s "$stock_freshness_file" ]; then
scorecard_args+=(--stock-freshness-file "$stock_freshness_file")
fi
if [ -s "$stock_ingestion_file" ]; then
scorecard_args+=(--stock-ingestion-file "$stock_ingestion_file")
fi
python3 "${scorecard_args[@]}" || true
now="$(date +%s)"
ready="$(python3 - "$scorecard_file" <<'PY'

View File

@@ -264,6 +264,15 @@ def build_stockplatform_readback(
or summary.get("STOCK_LATEST_TRADING_DATE")
or ""
)
latest_source_runs = compact_source_runs(ingestion)
margin_short_success_ids = [
row.get("source_run_id")
for row in latest_source_runs
if row.get("source_name") == "official_margin_short_daily"
and row.get("target_date") == latest_trading_date
and row.get("status") == "succeeded"
and row.get("source_run_id") is not None
]
eod_pending = truthy(summary.get("STOCK_EOD_WINDOW_PENDING"))
eod_final_window = str(
summary.get("STOCK_EOD_FINAL_RETRY_WINDOW_END_LOCAL") or "unknown"
@@ -278,6 +287,15 @@ def build_stockplatform_readback(
or final_passed
)
)
recovery_gate_status = (
"ready_to_open"
if recovery_required
else (
"not_required_freshness_recovered"
if freshness_status == "ok" and not freshness_blockers
else "not_required_yet"
)
)
return {
"freshness_endpoint_readback_present": bool(freshness),
"ingestion_endpoint_readback_present": bool(ingestion),
@@ -291,7 +309,7 @@ def build_stockplatform_readback(
for row in compact_stock_sources(freshness)
if row["status"] not in {"ok", "warning"}
],
"latest_source_runs": compact_source_runs(ingestion),
"latest_source_runs": latest_source_runs,
"eod_window": {
"pending": eod_pending,
"classification": str(summary.get("STOCK_EOD_CLASSIFICATION") or "unknown"),
@@ -304,7 +322,7 @@ def build_stockplatform_readback(
},
"controlled_recovery_gate": {
"required": recovery_required,
"status": "ready_to_open" if recovery_required else "not_required_yet",
"status": recovery_gate_status,
"target_selector": "stockplatform-v2:system_freshness:core.margin_short_daily,ai.recommendations",
"allowed_actions": [
"inspect_existing_ingestion_readback",
@@ -320,6 +338,27 @@ def build_stockplatform_readback(
"reboot_or_service_restart_from_reboot_slo_lane",
],
},
"margin_short_recovery": {
"status": (
"recovered"
if freshness_status == "ok"
and "core_margin_short_daily_missing" not in freshness_blockers
and bool(margin_short_success_ids)
else "not_verified"
),
"cleared_blocker": "core_margin_short_daily_missing",
"cleared_ingestion_blocker": "core.margin_short_daily_incomplete",
"successful_source_run_ids": margin_short_success_ids,
},
"ai_recommendations_recovery": {
"status": (
"recovered"
if freshness_status == "ok"
and "ai_recommendations_stale" not in freshness_blockers
else "not_verified"
),
"cleared_blocker": "ai_recommendations_stale",
},
}

View File

@@ -77,3 +77,16 @@ def test_exporter_uses_user_writable_lock_after_creating_log_dir() -> None:
assert 'LOCK_FILE="${LOCK_FILE:-${LOG_DIR}/reboot_auto_recovery_slo.lock}"' in text
assert text.index('mkdir -p "$TEXTFILE_DIR" "$LOG_DIR"') < text.index('exec 9>"$LOCK_FILE"')
def test_exporter_carries_stockplatform_readbacks_into_scorecard() -> None:
text = EXPORTER.read_text(encoding="utf-8")
assert "STOCK_FRESHNESS_URL" in text
assert "STOCK_INGESTION_URL" in text
assert 'stock_freshness_file="$artifact_dir/stock-freshness.json"' in text
assert 'stock_ingestion_file="$artifact_dir/stock-ingestion.json"' in text
assert "scorecard_args+=(--stock-freshness-file" in text
assert "scorecard_args+=(--stock-ingestion-file" in text
assert "manual_db_update" not in text
assert "systemctl restart" not in text

View File

@@ -200,6 +200,65 @@ STOCK_EOD_FINAL_RETRY_WINDOW_END_LOCAL=23:35
)
def test_stockplatform_recovered_marks_controlled_gate_not_required(
tmp_path: Path,
) -> None:
summary = GREEN_SUMMARY + """\
STOCK_FRESHNESS_STATUS=ok
STOCK_LATEST_TRADING_DATE=2026-06-29
STOCK_BLOCKERS=none
STOCK_EOD_WINDOW_PENDING=0
STOCK_EOD_CLASSIFICATION=recovered_after_21_05_retry_window
STOCK_EOD_NEXT_ACTION=rerun_slo_verify_only_after_next_fresh_all_host_reboot_event_or_approved_reboot_drill
STOCK_EOD_FIRST_FULL_WINDOW_END_LOCAL=19:15
STOCK_EOD_FINAL_RETRY_WINDOW_END_LOCAL=23:35
"""
payload = run_scorecard_with_stock(
tmp_path,
summary,
{
"status": "ok",
"latest_trading_date": "2026-06-29",
"blockers": [],
},
{
"status": "ok",
"latest_trading_date": "2026-06-29",
"blockers": [],
"latest_source_runs": [
{
"source_run_id": 3390,
"source_name": "official_margin_short_daily",
"target_date": "2026-06-29",
"status": "succeeded",
},
{
"source_run_id": 3389,
"source_name": "official_margin_short_daily",
"target_date": "2026-06-29",
"status": "succeeded",
},
],
},
generated_at="2026-06-29T23:40:00+08:00",
)
stockplatform = payload["stockplatform_data_freshness"]
assert stockplatform["eod_window"]["final_retry_window_passed"] is True
assert stockplatform["controlled_recovery_gate"]["required"] is False
assert (
stockplatform["controlled_recovery_gate"]["status"]
== "not_required_freshness_recovered"
)
assert stockplatform["margin_short_recovery"]["status"] == "recovered"
assert stockplatform["margin_short_recovery"]["successful_source_run_ids"] == [
3390,
3389,
]
assert stockplatform["ai_recommendations_recovery"]["status"] == "recovered"
def test_stockplatform_blocked_after_final_retry_opens_controlled_gate(
tmp_path: Path,
) -> None: