feat(ops): carry stock readbacks into reboot slo
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 24s
CD Pipeline / build-and-deploy (push) Successful in 4m33s
CD Pipeline / post-deploy-checks (push) Successful in 56s
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 24s
CD Pipeline / build-and-deploy (push) Successful in 4m33s
CD Pipeline / post-deploy-checks (push) Successful in 56s
This commit is contained in:
@@ -192,7 +192,7 @@ def test_delivery_closure_workbench_exposes_p0_006_reboot_slo_lane():
|
||||
assert lane["metric"]["stockplatform_ingestion_status"] == "ok"
|
||||
assert lane["metric"]["stockplatform_freshness_blocker_count"] == 0
|
||||
assert lane["metric"]["stockplatform_ingestion_blocker_count"] == 0
|
||||
assert lane["metric"]["stockplatform_final_retry_window_passed"] is False
|
||||
assert lane["metric"]["stockplatform_final_retry_window_passed"] is True
|
||||
assert lane["metric"]["stockplatform_controlled_recovery_gate_required"] is False
|
||||
assert lane["metric"]["host_reboot_performed"] is False
|
||||
assert lane["metric"]["service_restart_performed"] is False
|
||||
@@ -245,7 +245,7 @@ def _assert_delivery_workbench_shape(data: dict):
|
||||
data["summary"][
|
||||
"reboot_auto_recovery_stockplatform_final_retry_window_passed"
|
||||
]
|
||||
is False
|
||||
is True
|
||||
)
|
||||
assert (
|
||||
data["summary"][
|
||||
|
||||
@@ -101,7 +101,7 @@ def _assert_reboot_slo_payload(payload: dict):
|
||||
assert payload["rollups"]["stockplatform_ingestion_status"] == "ok"
|
||||
assert payload["rollups"]["stockplatform_freshness_blocker_count"] == 0
|
||||
assert payload["rollups"]["stockplatform_ingestion_blocker_count"] == 0
|
||||
assert payload["rollups"]["stockplatform_final_retry_window_passed"] is False
|
||||
assert payload["rollups"]["stockplatform_final_retry_window_passed"] is True
|
||||
assert (
|
||||
payload["rollups"]["stockplatform_controlled_recovery_gate_required"]
|
||||
is False
|
||||
@@ -114,12 +114,16 @@ def _assert_reboot_slo_payload(payload: dict):
|
||||
assert stockplatform["ingestion_blockers"] == []
|
||||
assert stockplatform["margin_short_recovery"]["status"] == "recovered"
|
||||
assert stockplatform["margin_short_recovery"]["successful_source_run_ids"] == [
|
||||
3390,
|
||||
3389,
|
||||
3390,
|
||||
]
|
||||
assert stockplatform["ai_recommendations_recovery"]["status"] == "recovered"
|
||||
assert stockplatform["eod_window"]["final_retry_window_passed"] is False
|
||||
assert stockplatform["eod_window"]["final_retry_window_passed"] is True
|
||||
assert stockplatform["controlled_recovery_gate"]["required"] is False
|
||||
assert (
|
||||
stockplatform["controlled_recovery_gate"]["status"]
|
||||
== "not_required_freshness_recovered"
|
||||
)
|
||||
assert "manual_db_update" in stockplatform["controlled_recovery_gate"][
|
||||
"forbidden_actions"
|
||||
]
|
||||
|
||||
@@ -1,3 +1,17 @@
|
||||
## 2026-06-29 — 23:45 P0-006 final retry window readback source closure
|
||||
|
||||
**照優先順序完成的實作**:
|
||||
- P0-006 仍是 active P0;StockPlatform final retry window 已過,freshness / ingestion live readback 仍為 `ok`,因此 `stockplatform_final_retry_window_passed=true` 且 controlled data recovery gate 維持 `not_required_freshness_recovered`。
|
||||
- `scripts/reboot-recovery/reboot-auto-recovery-slo-exporter.sh` 現在會把 public StockPlatform freshness / ingestion JSON 存成 artifact 並傳給 scorecard,不再只靠 post-start summary 推導。
|
||||
- `scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py` 在 freshness 已恢復時明確輸出 `margin_short_recovery`、`ai_recommendations_recovery` 與 `controlled_recovery_gate.status=not_required_freshness_recovered`。
|
||||
- 更新 `docs/operations/awoooi-reboot-auto-recovery-slo-scorecard.snapshot.json`,production API / Delivery Workbench 發佈後會讀到 23:45 final-window truth;唯一剩餘 blocker 仍是 `host_boot_observation_older_than_target_window`。
|
||||
|
||||
**驗證**:
|
||||
- Focused pytest:P0-006 scorecard / exporter contract / API readback `14 passed`;Delivery Workbench / P0-006 API / CD profile `24 passed`。
|
||||
- `bash -n`、`py_compile`、JSON parse、Gitea runner pressure guard、Gitea secret env guard、`git diff --check`:通過。
|
||||
|
||||
**邊界**:未重啟主機,未 restart Docker / Nginx / K3s / DB / service,未寫 StockPlatform DB,未 workflow_dispatch,未使用 GitHub / `gh` / GitHub API,未讀 secret / token / raw sessions / SQLite / `.env`。
|
||||
|
||||
## 2026-06-29 — 23:44 主線 priority readback 收斂:P0 event-gated,P1 production verified
|
||||
|
||||
**照優先順序讀回的事實**:
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"free_gib": 4.454,
|
||||
"min_free_gib": 2.0
|
||||
},
|
||||
"generated_at": "2026-06-29T21:15:30+08:00",
|
||||
"generated_at": "2026-06-29T23:45:54+08:00",
|
||||
"host_boot_detection": {
|
||||
"host_rows": [
|
||||
{
|
||||
@@ -126,9 +126,9 @@
|
||||
"target_selector": "stockplatform-v2:system_freshness:core.margin_short_daily,ai.recommendations"
|
||||
},
|
||||
"eod_window": {
|
||||
"classification": "recovered_after_21_05_retry_window",
|
||||
"classification": "recovered_after_final_retry_window",
|
||||
"final_retry_window_end_local": "23:35",
|
||||
"final_retry_window_passed": false,
|
||||
"final_retry_window_passed": true,
|
||||
"first_full_window_end_local": "19:15",
|
||||
"next_action": "rerun_slo_verify_only_after_next_fresh_all_host_reboot_event_or_approved_reboot_drill",
|
||||
"pending": false
|
||||
@@ -141,7 +141,23 @@
|
||||
"ingestion_status": "ok",
|
||||
"latest_source_runs": [
|
||||
{
|
||||
"source_run_id": 3390,
|
||||
"source_run_id": 3392,
|
||||
"source_name": "intelligence_security_linker",
|
||||
"target_date": null,
|
||||
"status": "succeeded",
|
||||
"started_at": "2026-06-29T15:00:09.333642Z",
|
||||
"finished_at": "2026-06-29T15:00:09.333642Z"
|
||||
},
|
||||
{
|
||||
"source_run_id": 3391,
|
||||
"source_name": "intelligence_reports_import",
|
||||
"target_date": null,
|
||||
"status": "succeeded",
|
||||
"started_at": "2026-06-29T15:00:08.472808Z",
|
||||
"finished_at": "2026-06-29T15:00:08.472808Z"
|
||||
},
|
||||
{
|
||||
"source_run_id": 3389,
|
||||
"source_name": "official_margin_short_daily",
|
||||
"target_date": "2026-06-29",
|
||||
"status": "succeeded",
|
||||
@@ -149,7 +165,7 @@
|
||||
"finished_at": "2026-06-29T13:05:13.341357Z"
|
||||
},
|
||||
{
|
||||
"source_run_id": 3389,
|
||||
"source_run_id": 3390,
|
||||
"source_name": "official_margin_short_daily",
|
||||
"target_date": "2026-06-29",
|
||||
"status": "succeeded",
|
||||
@@ -171,6 +187,22 @@
|
||||
"status": "succeeded",
|
||||
"started_at": "2026-06-29T13:00:07.822700Z",
|
||||
"finished_at": "2026-06-29T13:00:07.822700Z"
|
||||
},
|
||||
{
|
||||
"source_run_id": 3385,
|
||||
"source_name": "official_margin_short_daily",
|
||||
"target_date": "2026-06-29",
|
||||
"status": "official_pending",
|
||||
"started_at": "2026-06-29T12:05:16.716460Z",
|
||||
"finished_at": "2026-06-29T12:05:16.716460Z"
|
||||
},
|
||||
{
|
||||
"source_run_id": 3386,
|
||||
"source_name": "official_margin_short_daily",
|
||||
"target_date": "2026-06-29",
|
||||
"status": "official_pending",
|
||||
"started_at": "2026-06-29T12:05:16.716460Z",
|
||||
"finished_at": "2026-06-29T12:05:16.716460Z"
|
||||
}
|
||||
],
|
||||
"latest_trading_date": "2026-06-29",
|
||||
@@ -181,14 +213,15 @@
|
||||
"cleared_blocker": "core_margin_short_daily_missing",
|
||||
"cleared_ingestion_blocker": "core.margin_short_daily_incomplete",
|
||||
"successful_source_run_ids": [
|
||||
3390,
|
||||
3389
|
||||
3389,
|
||||
3390
|
||||
]
|
||||
},
|
||||
"ai_recommendations_recovery": {
|
||||
"status": "recovered",
|
||||
"cleared_blocker": "ai_recommendations_stale"
|
||||
}
|
||||
},
|
||||
"final_retry_checked_at": "2026-06-29T23:45:54+08:00"
|
||||
},
|
||||
"target_minutes": 10,
|
||||
"target_seconds": 600,
|
||||
|
||||
@@ -13,6 +13,9 @@ OUTPUT_NAME="${OUTPUT_NAME:-reboot_auto_recovery_slo.prom}"
|
||||
TARGET_MINUTES="${TARGET_MINUTES:-10}"
|
||||
MIN_FREE_GIB="${MIN_FREE_GIB:-2}"
|
||||
LOCK_FILE="${LOCK_FILE:-${LOG_DIR}/reboot_auto_recovery_slo.lock}"
|
||||
STOCK_FRESHNESS_URL="${STOCK_FRESHNESS_URL:-https://stock.wooo.work/api/v1/system/freshness}"
|
||||
STOCK_INGESTION_URL="${STOCK_INGESTION_URL:-https://stock.wooo.work/api/v1/system/ingestion}"
|
||||
STOCK_READBACK_TIMEOUT_SECONDS="${STOCK_READBACK_TIMEOUT_SECONDS:-10}"
|
||||
|
||||
mkdir -p "$TEXTFILE_DIR" "$LOG_DIR"
|
||||
|
||||
@@ -28,18 +31,39 @@ mkdir -p "$artifact_dir"
|
||||
host_probe="$artifact_dir/host-probe.txt"
|
||||
summary_file="$artifact_dir/summary.txt"
|
||||
scorecard_file="$artifact_dir/scorecard.json"
|
||||
stock_freshness_file="$artifact_dir/stock-freshness.json"
|
||||
stock_ingestion_file="$artifact_dir/stock-ingestion.json"
|
||||
|
||||
bash "$ROOT_DIR/scripts/reboot-recovery/reboot-auto-recovery-host-probe.sh" >"$host_probe" 2>&1 || true
|
||||
ARTIFACT_DIR="$artifact_dir/post-reboot-readiness" \
|
||||
bash "$ROOT_DIR/scripts/reboot-recovery/post-reboot-readiness-summary.sh" --no-color >"$summary_file" 2>&1 || true
|
||||
|
||||
python3 "$ROOT_DIR/scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py" \
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
curl -fsS --max-time "$STOCK_READBACK_TIMEOUT_SECONDS" \
|
||||
"$STOCK_FRESHNESS_URL" >"$stock_freshness_file" 2>"$artifact_dir/stock-freshness.err" \
|
||||
|| rm -f "$stock_freshness_file"
|
||||
curl -fsS --max-time "$STOCK_READBACK_TIMEOUT_SECONDS" \
|
||||
"$STOCK_INGESTION_URL" >"$stock_ingestion_file" 2>"$artifact_dir/stock-ingestion.err" \
|
||||
|| rm -f "$stock_ingestion_file"
|
||||
fi
|
||||
|
||||
scorecard_args=(
|
||||
"$ROOT_DIR/scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py"
|
||||
--summary-file "$summary_file" \
|
||||
--host-probe-file "$host_probe" \
|
||||
--target-minutes "$TARGET_MINUTES" \
|
||||
--min-free-gib "$MIN_FREE_GIB" \
|
||||
--disk-path / \
|
||||
--output "$scorecard_file" || true
|
||||
--output "$scorecard_file"
|
||||
)
|
||||
if [ -s "$stock_freshness_file" ]; then
|
||||
scorecard_args+=(--stock-freshness-file "$stock_freshness_file")
|
||||
fi
|
||||
if [ -s "$stock_ingestion_file" ]; then
|
||||
scorecard_args+=(--stock-ingestion-file "$stock_ingestion_file")
|
||||
fi
|
||||
|
||||
python3 "${scorecard_args[@]}" || true
|
||||
|
||||
now="$(date +%s)"
|
||||
ready="$(python3 - "$scorecard_file" <<'PY'
|
||||
|
||||
@@ -264,6 +264,15 @@ def build_stockplatform_readback(
|
||||
or summary.get("STOCK_LATEST_TRADING_DATE")
|
||||
or ""
|
||||
)
|
||||
latest_source_runs = compact_source_runs(ingestion)
|
||||
margin_short_success_ids = [
|
||||
row.get("source_run_id")
|
||||
for row in latest_source_runs
|
||||
if row.get("source_name") == "official_margin_short_daily"
|
||||
and row.get("target_date") == latest_trading_date
|
||||
and row.get("status") == "succeeded"
|
||||
and row.get("source_run_id") is not None
|
||||
]
|
||||
eod_pending = truthy(summary.get("STOCK_EOD_WINDOW_PENDING"))
|
||||
eod_final_window = str(
|
||||
summary.get("STOCK_EOD_FINAL_RETRY_WINDOW_END_LOCAL") or "unknown"
|
||||
@@ -278,6 +287,15 @@ def build_stockplatform_readback(
|
||||
or final_passed
|
||||
)
|
||||
)
|
||||
recovery_gate_status = (
|
||||
"ready_to_open"
|
||||
if recovery_required
|
||||
else (
|
||||
"not_required_freshness_recovered"
|
||||
if freshness_status == "ok" and not freshness_blockers
|
||||
else "not_required_yet"
|
||||
)
|
||||
)
|
||||
return {
|
||||
"freshness_endpoint_readback_present": bool(freshness),
|
||||
"ingestion_endpoint_readback_present": bool(ingestion),
|
||||
@@ -291,7 +309,7 @@ def build_stockplatform_readback(
|
||||
for row in compact_stock_sources(freshness)
|
||||
if row["status"] not in {"ok", "warning"}
|
||||
],
|
||||
"latest_source_runs": compact_source_runs(ingestion),
|
||||
"latest_source_runs": latest_source_runs,
|
||||
"eod_window": {
|
||||
"pending": eod_pending,
|
||||
"classification": str(summary.get("STOCK_EOD_CLASSIFICATION") or "unknown"),
|
||||
@@ -304,7 +322,7 @@ def build_stockplatform_readback(
|
||||
},
|
||||
"controlled_recovery_gate": {
|
||||
"required": recovery_required,
|
||||
"status": "ready_to_open" if recovery_required else "not_required_yet",
|
||||
"status": recovery_gate_status,
|
||||
"target_selector": "stockplatform-v2:system_freshness:core.margin_short_daily,ai.recommendations",
|
||||
"allowed_actions": [
|
||||
"inspect_existing_ingestion_readback",
|
||||
@@ -320,6 +338,27 @@ def build_stockplatform_readback(
|
||||
"reboot_or_service_restart_from_reboot_slo_lane",
|
||||
],
|
||||
},
|
||||
"margin_short_recovery": {
|
||||
"status": (
|
||||
"recovered"
|
||||
if freshness_status == "ok"
|
||||
and "core_margin_short_daily_missing" not in freshness_blockers
|
||||
and bool(margin_short_success_ids)
|
||||
else "not_verified"
|
||||
),
|
||||
"cleared_blocker": "core_margin_short_daily_missing",
|
||||
"cleared_ingestion_blocker": "core.margin_short_daily_incomplete",
|
||||
"successful_source_run_ids": margin_short_success_ids,
|
||||
},
|
||||
"ai_recommendations_recovery": {
|
||||
"status": (
|
||||
"recovered"
|
||||
if freshness_status == "ok"
|
||||
and "ai_recommendations_stale" not in freshness_blockers
|
||||
else "not_verified"
|
||||
),
|
||||
"cleared_blocker": "ai_recommendations_stale",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -77,3 +77,16 @@ def test_exporter_uses_user_writable_lock_after_creating_log_dir() -> None:
|
||||
|
||||
assert 'LOCK_FILE="${LOCK_FILE:-${LOG_DIR}/reboot_auto_recovery_slo.lock}"' in text
|
||||
assert text.index('mkdir -p "$TEXTFILE_DIR" "$LOG_DIR"') < text.index('exec 9>"$LOCK_FILE"')
|
||||
|
||||
|
||||
def test_exporter_carries_stockplatform_readbacks_into_scorecard() -> None:
|
||||
text = EXPORTER.read_text(encoding="utf-8")
|
||||
|
||||
assert "STOCK_FRESHNESS_URL" in text
|
||||
assert "STOCK_INGESTION_URL" in text
|
||||
assert 'stock_freshness_file="$artifact_dir/stock-freshness.json"' in text
|
||||
assert 'stock_ingestion_file="$artifact_dir/stock-ingestion.json"' in text
|
||||
assert "scorecard_args+=(--stock-freshness-file" in text
|
||||
assert "scorecard_args+=(--stock-ingestion-file" in text
|
||||
assert "manual_db_update" not in text
|
||||
assert "systemctl restart" not in text
|
||||
|
||||
@@ -200,6 +200,65 @@ STOCK_EOD_FINAL_RETRY_WINDOW_END_LOCAL=23:35
|
||||
)
|
||||
|
||||
|
||||
def test_stockplatform_recovered_marks_controlled_gate_not_required(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
summary = GREEN_SUMMARY + """\
|
||||
STOCK_FRESHNESS_STATUS=ok
|
||||
STOCK_LATEST_TRADING_DATE=2026-06-29
|
||||
STOCK_BLOCKERS=none
|
||||
STOCK_EOD_WINDOW_PENDING=0
|
||||
STOCK_EOD_CLASSIFICATION=recovered_after_21_05_retry_window
|
||||
STOCK_EOD_NEXT_ACTION=rerun_slo_verify_only_after_next_fresh_all_host_reboot_event_or_approved_reboot_drill
|
||||
STOCK_EOD_FIRST_FULL_WINDOW_END_LOCAL=19:15
|
||||
STOCK_EOD_FINAL_RETRY_WINDOW_END_LOCAL=23:35
|
||||
"""
|
||||
|
||||
payload = run_scorecard_with_stock(
|
||||
tmp_path,
|
||||
summary,
|
||||
{
|
||||
"status": "ok",
|
||||
"latest_trading_date": "2026-06-29",
|
||||
"blockers": [],
|
||||
},
|
||||
{
|
||||
"status": "ok",
|
||||
"latest_trading_date": "2026-06-29",
|
||||
"blockers": [],
|
||||
"latest_source_runs": [
|
||||
{
|
||||
"source_run_id": 3390,
|
||||
"source_name": "official_margin_short_daily",
|
||||
"target_date": "2026-06-29",
|
||||
"status": "succeeded",
|
||||
},
|
||||
{
|
||||
"source_run_id": 3389,
|
||||
"source_name": "official_margin_short_daily",
|
||||
"target_date": "2026-06-29",
|
||||
"status": "succeeded",
|
||||
},
|
||||
],
|
||||
},
|
||||
generated_at="2026-06-29T23:40:00+08:00",
|
||||
)
|
||||
|
||||
stockplatform = payload["stockplatform_data_freshness"]
|
||||
assert stockplatform["eod_window"]["final_retry_window_passed"] is True
|
||||
assert stockplatform["controlled_recovery_gate"]["required"] is False
|
||||
assert (
|
||||
stockplatform["controlled_recovery_gate"]["status"]
|
||||
== "not_required_freshness_recovered"
|
||||
)
|
||||
assert stockplatform["margin_short_recovery"]["status"] == "recovered"
|
||||
assert stockplatform["margin_short_recovery"]["successful_source_run_ids"] == [
|
||||
3390,
|
||||
3389,
|
||||
]
|
||||
assert stockplatform["ai_recommendations_recovery"]["status"] == "recovered"
|
||||
|
||||
|
||||
def test_stockplatform_blocked_after_final_retry_opens_controlled_gate(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
|
||||
Reference in New Issue
Block a user