fix(api): ignore stale cd failure after production readback
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 38s
CD Pipeline / build-and-deploy (push) Successful in 6m11s
CD Pipeline / post-deploy-checks (push) Successful in 1m55s
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 38s
CD Pipeline / build-and-deploy (push) Successful in 6m11s
CD Pipeline / post-deploy-checks (push) Successful in 1m55s
This commit is contained in:
@@ -618,6 +618,44 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
)
|
||||
|
||||
state = _dict(payload.setdefault("mainline_execution_state", {}))
|
||||
current_head = _dict(payload.get("current_head"))
|
||||
production_readback_verified = bool(
|
||||
state.get("current_main_cd_run_status") == "production_readback_verified"
|
||||
and current_head.get("production_source_truth_available") is True
|
||||
and _is_sha(str(state.get("latest_successful_deployed_source_sha") or ""))
|
||||
)
|
||||
deploy_marker_resolved_by_production_readback = bool(
|
||||
production_readback_verified
|
||||
and (deploy_marker_readback_required or cd_failed_after_registry_ready)
|
||||
)
|
||||
active_deployment_closure_state = (
|
||||
"production_readback_verified"
|
||||
if deploy_marker_resolved_by_production_readback
|
||||
else deployment_closure_state
|
||||
)
|
||||
active_deploy_marker_readback_required = bool(
|
||||
deploy_marker_readback_required
|
||||
and not deploy_marker_resolved_by_production_readback
|
||||
)
|
||||
active_cd_failed_after_registry_ready = bool(
|
||||
cd_failed_after_registry_ready
|
||||
and not deploy_marker_resolved_by_production_readback
|
||||
)
|
||||
active_current_cd_run_id = (
|
||||
str(state.get("current_main_cd_run_id") or "")
|
||||
if deploy_marker_resolved_by_production_readback
|
||||
else current_cd_run_id
|
||||
)
|
||||
active_current_cd_run_status = (
|
||||
"production_readback_verified"
|
||||
if deploy_marker_resolved_by_production_readback
|
||||
else current_cd_run_status
|
||||
)
|
||||
active_current_cd_commit_sha = (
|
||||
str(state.get("latest_successful_deployed_source_sha") or "")
|
||||
if deploy_marker_resolved_by_production_readback
|
||||
else current_cd_commit_sha
|
||||
)
|
||||
state["active_p0_state"] = "blocked_ai_loop_current_blocker_execution_queue"
|
||||
state["next_executable_mainline_workplan_id"] = (
|
||||
"P0-006-AI-LOOP-CURRENT-BLOCKER-EXECUTION-QUEUE"
|
||||
@@ -632,15 +670,38 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
registry_v2_status_classifier
|
||||
)
|
||||
state["ai_loop_current_blocker_deployment_closure_state"] = (
|
||||
deployment_closure_state
|
||||
active_deployment_closure_state
|
||||
)
|
||||
state["ai_loop_current_blocker_deploy_marker_readback_required"] = (
|
||||
active_deploy_marker_readback_required
|
||||
)
|
||||
state["ai_loop_current_blocker_current_cd_run_id"] = active_current_cd_run_id
|
||||
state["ai_loop_current_blocker_current_cd_run_status"] = (
|
||||
active_current_cd_run_status
|
||||
)
|
||||
state["ai_loop_current_blocker_current_cd_commit_sha"] = (
|
||||
active_current_cd_commit_sha
|
||||
)
|
||||
state["ai_loop_current_blocker_cd_failed_after_registry_ready"] = (
|
||||
active_cd_failed_after_registry_ready
|
||||
)
|
||||
state["ai_loop_current_blocker_deploy_marker_resolved_by_production_readback"] = (
|
||||
deploy_marker_resolved_by_production_readback
|
||||
)
|
||||
state["ai_loop_current_blocker_historical_deployment_closure_state"] = (
|
||||
deployment_closure_state
|
||||
)
|
||||
state["ai_loop_current_blocker_historical_deploy_marker_readback_required"] = (
|
||||
deploy_marker_readback_required
|
||||
)
|
||||
state["ai_loop_current_blocker_current_cd_run_id"] = current_cd_run_id
|
||||
state["ai_loop_current_blocker_current_cd_run_status"] = current_cd_run_status
|
||||
state["ai_loop_current_blocker_current_cd_commit_sha"] = current_cd_commit_sha
|
||||
state["ai_loop_current_blocker_cd_failed_after_registry_ready"] = (
|
||||
state["ai_loop_current_blocker_historical_current_cd_run_id"] = current_cd_run_id
|
||||
state["ai_loop_current_blocker_historical_current_cd_run_status"] = (
|
||||
current_cd_run_status
|
||||
)
|
||||
state["ai_loop_current_blocker_historical_current_cd_commit_sha"] = (
|
||||
current_cd_commit_sha
|
||||
)
|
||||
state["ai_loop_current_blocker_historical_cd_failed_after_registry_ready"] = (
|
||||
cd_failed_after_registry_ready
|
||||
)
|
||||
state["ai_loop_current_blocker_harbor_110_repair_run_id"] = (
|
||||
@@ -717,12 +778,12 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
+ ([pressure_blocker] if pressure_blocker else [])
|
||||
+ (
|
||||
["deploy_marker_readback_required_after_registry_ready"]
|
||||
if deploy_marker_readback_required
|
||||
if active_deploy_marker_readback_required
|
||||
else []
|
||||
)
|
||||
+ (
|
||||
["current_cd_failure_after_registry_ready"]
|
||||
if cd_failed_after_registry_ready
|
||||
if active_cd_failed_after_registry_ready
|
||||
else []
|
||||
)
|
||||
)
|
||||
@@ -749,19 +810,44 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
registry_v2_status_classifier
|
||||
)
|
||||
evidence["ai_loop_current_blocker_deployment_closure_state"] = (
|
||||
deployment_closure_state
|
||||
active_deployment_closure_state
|
||||
)
|
||||
evidence["ai_loop_current_blocker_deploy_marker_readback_required"] = (
|
||||
deploy_marker_readback_required
|
||||
active_deploy_marker_readback_required
|
||||
)
|
||||
evidence["ai_loop_current_blocker_current_cd_run_id"] = (
|
||||
active_current_cd_run_id
|
||||
)
|
||||
evidence["ai_loop_current_blocker_current_cd_run_id"] = current_cd_run_id
|
||||
evidence["ai_loop_current_blocker_current_cd_run_status"] = (
|
||||
current_cd_run_status
|
||||
active_current_cd_run_status
|
||||
)
|
||||
evidence["ai_loop_current_blocker_current_cd_commit_sha"] = (
|
||||
current_cd_commit_sha
|
||||
active_current_cd_commit_sha
|
||||
)
|
||||
evidence["ai_loop_current_blocker_cd_failed_after_registry_ready"] = (
|
||||
active_cd_failed_after_registry_ready
|
||||
)
|
||||
evidence[
|
||||
"ai_loop_current_blocker_deploy_marker_resolved_by_production_readback"
|
||||
] = deploy_marker_resolved_by_production_readback
|
||||
evidence["ai_loop_current_blocker_historical_deployment_closure_state"] = (
|
||||
deployment_closure_state
|
||||
)
|
||||
evidence[
|
||||
"ai_loop_current_blocker_historical_deploy_marker_readback_required"
|
||||
] = deploy_marker_readback_required
|
||||
evidence["ai_loop_current_blocker_historical_current_cd_run_id"] = (
|
||||
current_cd_run_id
|
||||
)
|
||||
evidence["ai_loop_current_blocker_historical_current_cd_run_status"] = (
|
||||
current_cd_run_status
|
||||
)
|
||||
evidence["ai_loop_current_blocker_historical_current_cd_commit_sha"] = (
|
||||
current_cd_commit_sha
|
||||
)
|
||||
evidence[
|
||||
"ai_loop_current_blocker_historical_cd_failed_after_registry_ready"
|
||||
] = (
|
||||
cd_failed_after_registry_ready
|
||||
)
|
||||
evidence["ai_loop_current_blocker_harbor_110_repair_run_id"] = (
|
||||
@@ -909,20 +995,21 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
"SSH/session control-path readback, ordered local-console phases, "
|
||||
"post-recovery queue readbacks, and metadata-only KM/RAG/MCP/"
|
||||
"PlayBook writeback."
|
||||
),
|
||||
(
|
||||
"P0-006-CD-DEPLOY-MARKER-READBACK: close the latest visible CD "
|
||||
f"{current_cd_run_id or 'unknown'} status "
|
||||
f"{current_cd_run_status or 'unknown'} and verify deploy marker / "
|
||||
"production image / priority API before claiming runtime closure."
|
||||
),
|
||||
(
|
||||
"P0-006-HARBOR-REGISTRY-CONTROLLED-RECOVERY-PREFLIGHT: only "
|
||||
"rerun Harbor watchdog repair if registry /v2/ regresses below "
|
||||
"200/401; otherwise keep focus on 110 SSH control-path and "
|
||||
"deploy-marker closure."
|
||||
),
|
||||
)
|
||||
]
|
||||
if not deploy_marker_resolved_by_production_readback:
|
||||
payload["next_execution_order"].append(
|
||||
"P0-006-CD-DEPLOY-MARKER-READBACK: close the latest visible CD "
|
||||
f"{active_current_cd_run_id or 'unknown'} status "
|
||||
f"{active_current_cd_run_status or 'unknown'} and verify deploy marker / "
|
||||
"production image / priority API before claiming runtime closure."
|
||||
)
|
||||
payload["next_execution_order"].append(
|
||||
"P0-006-HARBOR-REGISTRY-CONTROLLED-RECOVERY-PREFLIGHT: only "
|
||||
"rerun Harbor watchdog repair if registry /v2/ regresses below "
|
||||
"200/401; otherwise keep focus on 110 SSH control-path and "
|
||||
"deploy-marker closure."
|
||||
)
|
||||
else:
|
||||
payload["next_execution_order"] = [
|
||||
(
|
||||
@@ -938,14 +1025,15 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
"P0-006-HARBOR-REGISTRY-CONTROLLED-RECOVERY-PREFLIGHT: after the "
|
||||
"AI Loop queue item verifies 110 control path, rerun Harbor "
|
||||
"watchdog check-mode / repair-once and registry /v2/ readback."
|
||||
),
|
||||
(
|
||||
)
|
||||
]
|
||||
if not deploy_marker_resolved_by_production_readback:
|
||||
payload["next_execution_order"].append(
|
||||
"P0-006-CD-DEPLOY-MARKER-READBACK: after registry /v2/ is "
|
||||
"200/401, let the next Gitea CD run build/push/deploy and then "
|
||||
"verify delivery-closure-workbench, priority work order, and "
|
||||
"production marker advance."
|
||||
),
|
||||
]
|
||||
)
|
||||
_refresh_rollups_after_stockplatform_overlay(payload, state)
|
||||
summary = _dict(payload.setdefault("summary", {}))
|
||||
summary["ai_loop_current_blocker_execution_queue_count"] = len(queue)
|
||||
@@ -955,14 +1043,22 @@ def apply_ai_loop_current_blocker_execution_queue(
|
||||
registry_v2_status_classifier
|
||||
)
|
||||
summary["ai_loop_current_blocker_deployment_closure_state"] = (
|
||||
deployment_closure_state
|
||||
active_deployment_closure_state
|
||||
)
|
||||
summary["ai_loop_current_blocker_deploy_marker_readback_required"] = (
|
||||
deploy_marker_readback_required
|
||||
active_deploy_marker_readback_required
|
||||
)
|
||||
summary["ai_loop_current_blocker_current_cd_run_status"] = (
|
||||
active_current_cd_run_status
|
||||
)
|
||||
summary["ai_loop_current_blocker_current_cd_run_status"] = current_cd_run_status
|
||||
summary["ai_loop_current_blocker_cd_failed_after_registry_ready"] = (
|
||||
cd_failed_after_registry_ready
|
||||
active_cd_failed_after_registry_ready
|
||||
)
|
||||
summary["ai_loop_current_blocker_deploy_marker_resolved_by_production_readback"] = (
|
||||
deploy_marker_resolved_by_production_readback
|
||||
)
|
||||
summary["ai_loop_current_blocker_historical_current_cd_run_status"] = (
|
||||
current_cd_run_status
|
||||
)
|
||||
summary["ai_loop_current_blocker_harbor_110_repair_run_status"] = (
|
||||
harbor_110_repair_run_status
|
||||
|
||||
@@ -799,6 +799,81 @@ def test_awoooi_priority_work_order_readback_normalizes_runtime_source_truth(
|
||||
assert "f426522" not in current_truth
|
||||
|
||||
|
||||
def test_awoooi_priority_work_order_readback_does_not_reopen_stale_cd_failure_after_production_readback(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
runtime_sha = "b4dc407ce05c68a3908b993437a61b869d83810f"
|
||||
runtime_short_sha = runtime_sha[:10]
|
||||
monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", runtime_sha)
|
||||
monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", runtime_sha)
|
||||
|
||||
payload = load_latest_awoooi_priority_work_order_readback()
|
||||
apply_harbor_registry_controlled_recovery_preflight(
|
||||
payload,
|
||||
_harbor_registry_ready(),
|
||||
)
|
||||
executor = json.loads(
|
||||
json.dumps(load_latest_ai_agent_log_controlled_writeback_executor_readback())
|
||||
)
|
||||
executor["agent_consumption_context"]["current_blocker_execution_queue"][0].update(
|
||||
{
|
||||
"deployment_closure_state": (
|
||||
"blocked_latest_visible_cd_failure_after_registry_ready"
|
||||
),
|
||||
"deploy_marker_readback_required": True,
|
||||
"current_cd_run_id": "4258",
|
||||
"current_cd_run_status": "Failure",
|
||||
"current_cd_commit_sha": "06819ea96c058e7987811e853242390eaced7f91",
|
||||
"cd_failed_after_registry_ready": True,
|
||||
}
|
||||
)
|
||||
apply_ai_loop_current_blocker_execution_queue(
|
||||
payload,
|
||||
executor,
|
||||
)
|
||||
|
||||
state = payload["mainline_execution_state"]
|
||||
evidence = payload["in_progress_or_blocked_in_priority_order"][0]["evidence"]
|
||||
blockers = state["active_p0_live_active_blockers"]
|
||||
assert state["current_main_cd_run_status"] == "production_readback_verified"
|
||||
assert state["ai_loop_current_blocker_deploy_marker_readback_required"] is False
|
||||
assert state["ai_loop_current_blocker_cd_failed_after_registry_ready"] is False
|
||||
assert (
|
||||
state["ai_loop_current_blocker_deploy_marker_resolved_by_production_readback"]
|
||||
is True
|
||||
)
|
||||
assert state["ai_loop_current_blocker_current_cd_run_status"] == (
|
||||
"production_readback_verified"
|
||||
)
|
||||
assert state["ai_loop_current_blocker_current_cd_run_id"] == (
|
||||
f"production_readback:{runtime_short_sha}"
|
||||
)
|
||||
assert state["ai_loop_current_blocker_historical_current_cd_run_id"] == "4258"
|
||||
assert state["ai_loop_current_blocker_historical_current_cd_run_status"] == (
|
||||
"Failure"
|
||||
)
|
||||
assert "deploy_marker_readback_required_after_registry_ready" not in blockers
|
||||
assert "current_cd_failure_after_registry_ready" not in blockers
|
||||
assert evidence["ai_loop_current_blocker_deploy_marker_readback_required"] is False
|
||||
assert evidence["ai_loop_current_blocker_cd_failed_after_registry_ready"] is False
|
||||
assert evidence[
|
||||
"ai_loop_current_blocker_historical_deploy_marker_readback_required"
|
||||
] is True
|
||||
assert evidence["ai_loop_current_blocker_historical_current_cd_run_status"] == (
|
||||
"Failure"
|
||||
)
|
||||
assert payload["summary"][
|
||||
"ai_loop_current_blocker_deploy_marker_resolved_by_production_readback"
|
||||
] is True
|
||||
assert payload["summary"]["ai_loop_current_blocker_current_cd_run_status"] == (
|
||||
"production_readback_verified"
|
||||
)
|
||||
assert all(
|
||||
"P0-006-CD-DEPLOY-MARKER-READBACK" not in item
|
||||
for item in payload["next_execution_order"]
|
||||
)
|
||||
|
||||
|
||||
def test_awoooi_priority_work_order_readback_rejects_reordered_active_p0(tmp_path):
|
||||
operations_dir = tmp_path / "docs" / "operations"
|
||||
operations_dir.mkdir(parents=True)
|
||||
|
||||
@@ -25,6 +25,27 @@
|
||||
|
||||
**下一步**:照 P0 繼續收斂剩餘 WARN:先處理 MOMO source freshness / current-month confirmation 與 110 `backup-all` failed components;另開受控設計把高頻全量 pg_dump 改成專用 backup role + 分層備份,避免下一次重啟或 cron 再製造 DB/IO 壓力。
|
||||
|
||||
## 2026-07-01 — 21:20 P0-006 production readback 不再重開舊 CD failure
|
||||
|
||||
**照主線修正的問題**:
|
||||
- Gitea CD `#4293` 對 `87512d32f5` 已 Success,production priority readback 也讀到 `latest_successful_deployed_source_sha=87512d32f5a73849b2d8090092677a745c85f9a8` 與 `production_readback_verified`;但 AI Loop current blocker queue 仍把舊 `#4258 Failure` / deploy-marker readback requirement 寫回 active blocker 與 `next_execution_order`。
|
||||
- `apply_ai_loop_current_blocker_execution_queue` 現在會在 production source truth 已 verified 時,把舊 CD failure / deploy-marker requirement 保留在 `historical_*` evidence,但 active 欄位改為 `production_readback_verified`,且不再把 `deploy_marker_readback_required_after_registry_ready` / `current_cd_failure_after_registry_ready` 加回 active blockers。
|
||||
- P0-006 仍維持 blocked;剩餘主線 blocker 是 AI Loop current blocker execution queue / 110 control-path readback,不把 deploy marker 成功誤宣稱為 reboot SLO closure。
|
||||
|
||||
**驗證**:
|
||||
- `python3.11 -m py_compile apps/api/src/services/awoooi_priority_work_order_readback.py apps/api/tests/test_awoooi_priority_work_order_readback_api.py`:通過。
|
||||
- `DATABASE_URL=sqlite+aiosqlite:////tmp/awoooi-codex-api-test.db PYTHONPATH=apps/api python3.11 -m pytest apps/api/tests/test_awoooi_priority_work_order_readback_api.py -q`:`12 passed`。
|
||||
- `DATABASE_URL=sqlite+aiosqlite:////tmp/awoooi-codex-api-test.db PYTHONPATH=apps/api python3.11 -m pytest ops/runner/test_cd_controlled_runtime_profile.py apps/api/tests/test_awoooi_priority_work_order_readback_api.py apps/api/tests/test_reboot_auto_recovery_slo_scorecard_api.py apps/api/tests/test_delivery_closure_workbench_api.py -q`:`61 passed`。
|
||||
- `git diff --check`:通過。
|
||||
|
||||
**仍維持**:
|
||||
- 沒有使用 GitHub / `gh` / GitHub API / GitHub Actions。
|
||||
- 沒有讀 secret / token / `.env` / raw sessions / SQLite / auth。
|
||||
- 沒有重啟主機,沒有 Docker / Nginx / K3s / DB / firewall restart,沒有 workflow_dispatch,沒有 runtime write。
|
||||
|
||||
**下一步**:
|
||||
- 正常 push Gitea `main`,等 CD 完成後讀回 `/api/v1/agents/awoooi-priority-work-order-readback`;預期 active P0 仍是 `P0-006`,但 summary/current blocker 的 CD status 應為 `production_readback_verified`,`next_execution_order` 不再包含 `P0-006-CD-DEPLOY-MARKER-READBACK`。
|
||||
|
||||
## 2026-07-01 — 20:34 P0-006 reboot SLO machine-readback source closure
|
||||
|
||||
**照主線修正的問題**:
|
||||
|
||||
Reference in New Issue
Block a user