From b3183582a6876fbccabf598087f2f76ae4410cd4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 30 Jun 2026 08:25:39 +0800 Subject: [PATCH] fix(api): verify production deploy image readback --- .gitea/workflows/cd.yaml | 92 +++++++++++++++--- ...oooi_production_deploy_readback_blocker.py | 71 ++++++++++---- .../services/delivery_closure_workbench.py | 22 +++++ ...oooi_production_deploy_readback_blocker.py | 94 +++++++++++++------ docs/LOGBOOK.md | 23 +++++ k8s/awoooi-prod/06-deployment-api.yaml | 5 + .../test_cd_controlled_runtime_profile.py | 8 ++ 7 files changed, 254 insertions(+), 61 deletions(-) diff --git a/.gitea/workflows/cd.yaml b/.gitea/workflows/cd.yaml index 2de14bbc..2bcecd05 100644 --- a/.gitea/workflows/cd.yaml +++ b/.gitea/workflows/cd.yaml @@ -1433,20 +1433,24 @@ jobs: path = Path("k8s/awoooi-prod/06-deployment-api.yaml") image_tag = os.environ["IMAGE_TAG"] text = path.read_text(encoding="utf-8") - pattern = ( - r'(\n\s+- name: AWOOOI_BUILD_COMMIT_SHA\n' - r'\s+# [^\n]*\n' - r'\s+# [^\n]*\n' - r'\s+value: ")[^"]*(")' - ) - if not re.search(pattern, text): - raise SystemExit("AWOOOI_BUILD_COMMIT_SHA env block not found") - text = re.sub( - pattern, - lambda match: f"{match.group(1)}{image_tag}{match.group(2)}", - text, - count=1, - ) + + def replace_env_value(source: str, env_name: str) -> str: + pattern = ( + rf'(\n\s+- name: {re.escape(env_name)}\n' + r'(?:\s+# [^\n]*\n)*' + r'\s+value: ")[^"]*(")' + ) + if not re.search(pattern, source): + raise SystemExit(f"{env_name} env block not found") + return re.sub( + pattern, + lambda match: f"{match.group(1)}{image_tag}{match.group(2)}", + source, + count=1, + ) + + text = replace_env_value(text, "AWOOOI_BUILD_COMMIT_SHA") + text = replace_env_value(text, "AWOOOI_DESIRED_API_IMAGE_TAG") path.write_text(text, encoding="utf-8") PY @@ -1676,6 +1680,66 @@ jobs: fi rm -f "$ROLLOUT_LOG" + if [ "$ROLLOUT_EXIT" -eq 0 ]; then + python3 - <<'PY' + import json + import os + import sys + import urllib.request + + expected = os.environ["IMAGE_TAG"].strip().lower() + expected_short = expected[:10] + url = "https://awoooi.wooo.work/api/v1/agents/delivery-closure-workbench" + try: + with urllib.request.urlopen(url, timeout=20) as response: + payload = json.load(response) + except Exception as exc: + print( + "production_workbench_deploy_readback_failed=" + f"{type(exc).__name__}", + file=sys.stderr, + ) + raise SystemExit(1) from exc + + summary = payload.get("summary") if isinstance(payload, dict) else {} + if not isinstance(summary, dict): + summary = {} + runtime_short = str( + summary.get("production_deploy_runtime_build_commit_short_sha") or "" + ) + desired_short = str( + summary.get("production_deploy_desired_main_api_image_tag_short_sha") + or "" + ) + desired_status = str( + summary.get( + "production_deploy_desired_main_api_image_tag_readback_status" + ) + or "" + ) + matches_main = summary.get("production_deploy_image_tag_matches_main") is True + if ( + runtime_short != expected_short + or desired_short != expected_short + or desired_status != "ok" + or not matches_main + ): + print( + "production_deploy_readback_mismatch=" + f"expected={expected_short};runtime={runtime_short};" + f"desired={desired_short};desired_status={desired_status};" + f"matches_main={matches_main}", + file=sys.stderr, + ) + raise SystemExit(1) + + print( + "✅ Production deploy readback matches Gitea main desired image tag " + f"({expected_short})" + ) + PY + fi + if [ "$ROLLOUT_EXIT" -eq 0 ] && [ "$ROLLOUT_RISK" = "1" ]; then ACTOR="${GITHUB_ACTOR:-${{ github.actor }}}" if AWOOI_CICD_STATUS=pending \ diff --git a/apps/api/src/services/awoooi_production_deploy_readback_blocker.py b/apps/api/src/services/awoooi_production_deploy_readback_blocker.py index fcec59a5..7e96e871 100644 --- a/apps/api/src/services/awoooi_production_deploy_readback_blocker.py +++ b/apps/api/src/services/awoooi_production_deploy_readback_blocker.py @@ -118,29 +118,62 @@ def _enrich_runtime_build_readback(payload: dict[str, Any]) -> None: else "runtime_build_diverges_from_committed_deploy_readback" ) - if source_matches_runtime and image_matches_runtime: + desired_tag = os.getenv("AWOOOI_DESIRED_API_IMAGE_TAG", "").strip().lower() + if not _SHA_RE.fullmatch(desired_tag): + readback["desired_main_api_image_tag_readback_status"] = "unavailable" + _mark_runtime_image_tag_blocked( + payload, + blocker="gitea_main_desired_api_image_tag_readback_unavailable", + next_action=( + "redeploy_with_awoooi_desired_api_image_tag_env_matching_gitops" + "_desired_api_tag" + ), + ) return - # The runtime build SHA is evidence about the currently running image, not - # permission to rewrite source-control truth. If it diverges from the - # committed deploy snapshot, fail closed so Workbench cannot report a false - # "image tag matches main" closure while production routes are stale. + readback["desired_main_api_image_tag_source"] = "gitops_deployment_env" + readback["desired_main_api_image_tag_readback_status"] = "ok" + readback["desired_main_api_image_tag_sha"] = desired_tag + readback["desired_main_api_image_tag_short_sha"] = desired_tag[:10] + image_matches_main = build_sha == desired_tag + readback["production_image_tag_matches_main"] = image_matches_main + rollups["source_control_main_ready"] = True + rollups["production_image_tag_matches_main"] = image_matches_main + if image_matches_main: + return + + _mark_runtime_image_tag_blocked( + payload, + blocker="production_runtime_image_tag_does_not_match_gitea_main_desired_tag", + next_action=( + "complete_cd_rollout_until_runtime_build_commit_matches_gitops" + "_desired_api_image_tag_env" + ), + source_control_main_ready=True, + ) + + +def _mark_runtime_image_tag_blocked( + payload: dict[str, Any], + *, + blocker: str, + next_action: str, + source_control_main_ready: bool = False, +) -> None: + payload["status"] = "blocked_production_runtime_image_tag_not_verified" + readback = _dict(payload.get("readback")) readback["production_image_tag_matches_main"] = False - rollups["source_control_main_ready"] = False + blockers = _list(payload.setdefault("blockers", [])) + if blocker not in blockers: + blockers.append(blocker) + next_actions = _list(payload.setdefault("next_actions", [])) + if next_action not in next_actions: + next_actions.append(next_action) + rollups = _dict(payload.get("rollups")) + rollups["hard_blocker_count"] = len(blockers) + rollups["next_action_count"] = len(next_actions) + rollups["source_control_main_ready"] = source_control_main_ready rollups["production_image_tag_matches_main"] = False - payload["status"] = "runtime_build_readback_stale" - _append_unique( - payload, - "blockers", - "runtime_build_commit_sha_does_not_match_committed_production_readback", - ) - _append_unique( - payload, - "next_actions", - "refresh_production_deploy_readback_from_current_main_and_runtime_route_smoke", - ) - rollups["hard_blocker_count"] = len(_list(payload.get("blockers"))) - rollups["next_action_count"] = len(_list(payload.get("next_actions"))) def _require_no_internal_network_literals(value: Any, label: str) -> None: diff --git a/apps/api/src/services/delivery_closure_workbench.py b/apps/api/src/services/delivery_closure_workbench.py index db23d16d..932286a8 100644 --- a/apps/api/src/services/delivery_closure_workbench.py +++ b/apps/api/src/services/delivery_closure_workbench.py @@ -311,6 +311,18 @@ def build_delivery_closure_workbench( ) is True ), + "desired_main_api_image_tag_short_sha": str( + production_deploy_readback.get( + "desired_main_api_image_tag_short_sha" + ) + or "" + ), + "desired_main_api_image_tag_readback_status": str( + production_deploy_readback.get( + "desired_main_api_image_tag_readback_status" + ) + or "" + ), "production_image_tag_matches_main": production_deploy_readback.get( "production_image_tag_matches_main" ) @@ -1673,6 +1685,16 @@ def build_delivery_closure_workbench( ) is True ), + "production_deploy_desired_main_api_image_tag_short_sha": str( + production_deploy_readback.get("desired_main_api_image_tag_short_sha") + or "" + ), + "production_deploy_desired_main_api_image_tag_readback_status": str( + production_deploy_readback.get( + "desired_main_api_image_tag_readback_status" + ) + or "" + ), "production_deploy_governance_fields_present": production_deploy_rollups.get( "production_governance_fields_present" ) diff --git a/apps/api/tests/test_awoooi_production_deploy_readback_blocker.py b/apps/api/tests/test_awoooi_production_deploy_readback_blocker.py index bf83fb59..f7d297bc 100644 --- a/apps/api/tests/test_awoooi_production_deploy_readback_blocker.py +++ b/apps/api/tests/test_awoooi_production_deploy_readback_blocker.py @@ -1,26 +1,25 @@ from __future__ import annotations -from src.services.awoooi_production_deploy_readback_blocker import ( - load_latest_awoooi_production_deploy_readback_blocker, -) +from src.services import awoooi_production_deploy_readback_blocker as service -def test_production_deploy_readback_uses_runtime_build_commit(monkeypatch): +_COMMITTED_SNAPSHOT_SHA = "a70c6756d9e76c33143676eef82bab7a49ac1839" + + +def test_production_deploy_readback_verifies_runtime_build_against_gitops_desired( + monkeypatch, +): build_sha = "0123456789abcdef0123456789abcdef01234567" monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha) + monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", build_sha) - payload = load_latest_awoooi_production_deploy_readback_blocker() + payload = service.load_latest_awoooi_production_deploy_readback_blocker() readback = payload["readback"] rollups = payload["rollups"] assert readback["runtime_build_commit_sha"] == build_sha assert readback["runtime_build_commit_short_sha"] == build_sha[:10] - assert readback["observed_source_control_main_sha"] == ( - "a70c6756d9e76c33143676eef82bab7a49ac1839" - ) - assert readback["production_image_tag_sha"] == ( - "a70c6756d9e76c33143676eef82bab7a49ac1839" - ) + assert readback["production_image_tag_sha"] == _COMMITTED_SNAPSHOT_SHA assert ( readback["runtime_build_matches_committed_source_control_readback"] is False @@ -32,23 +31,21 @@ def test_production_deploy_readback_uses_runtime_build_commit(monkeypatch): assert readback["runtime_build_readback_status"] == ( "runtime_build_diverges_from_committed_deploy_readback" ) - assert readback["production_image_tag_matches_main"] is False - assert payload["status"] == "runtime_build_readback_stale" - assert rollups["source_control_main_ready"] is False - assert rollups["production_image_tag_matches_main"] is False - assert rollups["hard_blocker_count"] == 1 - assert payload["blockers"] == [ - "runtime_build_commit_sha_does_not_match_committed_production_readback" - ] + assert readback["desired_main_api_image_tag_sha"] == build_sha + assert readback["desired_main_api_image_tag_source"] == "gitops_deployment_env" + assert readback["desired_main_api_image_tag_readback_status"] == "ok" + assert readback["production_image_tag_matches_main"] is True + assert payload["status"] == "closure_verified" + assert rollups["production_image_tag_matches_main"] is True + assert rollups["hard_blocker_count"] == 0 -def test_production_deploy_readback_keeps_closure_when_runtime_matches_snapshot( - monkeypatch, -): - build_sha = "a70c6756d9e76c33143676eef82bab7a49ac1839" +def test_production_deploy_readback_keeps_committed_snapshot_evidence(monkeypatch): + build_sha = _COMMITTED_SNAPSHOT_SHA monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha) + monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", build_sha) - payload = load_latest_awoooi_production_deploy_readback_blocker() + payload = service.load_latest_awoooi_production_deploy_readback_blocker() readback = payload["readback"] rollups = payload["rollups"] @@ -58,19 +55,60 @@ def test_production_deploy_readback_keeps_closure_when_runtime_matches_snapshot( assert readback["runtime_build_readback_status"] == ( "matches_committed_deploy_readback" ) + assert readback["desired_main_api_image_tag_sha"] == build_sha assert readback["production_image_tag_matches_main"] is True assert payload["status"] == "closure_verified" assert rollups["production_image_tag_matches_main"] is True assert rollups["hard_blocker_count"] == 0 +def test_production_deploy_readback_blocks_runtime_build_mismatch(monkeypatch): + build_sha = "0123456789abcdef0123456789abcdef01234567" + desired_sha = "abcdef0123456789abcdef0123456789abcdef01" + monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha) + monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", desired_sha) + + payload = service.load_latest_awoooi_production_deploy_readback_blocker() + readback = payload["readback"] + rollups = payload["rollups"] + + assert payload["status"] == "blocked_production_runtime_image_tag_not_verified" + assert readback["runtime_build_commit_sha"] == build_sha + assert readback["runtime_build_readback_status"] == ( + "runtime_build_diverges_from_committed_deploy_readback" + ) + assert readback["desired_main_api_image_tag_sha"] == desired_sha + assert readback["production_image_tag_matches_main"] is False + assert rollups["source_control_main_ready"] is True + assert rollups["production_image_tag_matches_main"] is False + assert rollups["hard_blocker_count"] == 1 + assert "production_runtime_image_tag_does_not_match_gitea_main_desired_tag" in ( + payload["blockers"] + ) + + +def test_production_deploy_readback_blocks_unavailable_gitops_desired(monkeypatch): + build_sha = "0123456789abcdef0123456789abcdef01234567" + monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha) + monkeypatch.delenv("AWOOOI_DESIRED_API_IMAGE_TAG", raising=False) + + payload = service.load_latest_awoooi_production_deploy_readback_blocker() + readback = payload["readback"] + + assert payload["status"] == "blocked_production_runtime_image_tag_not_verified" + assert readback["desired_main_api_image_tag_readback_status"] == "unavailable" + assert readback["production_image_tag_matches_main"] is False + assert payload["rollups"]["source_control_main_ready"] is False + assert "gitea_main_desired_api_image_tag_readback_unavailable" in payload[ + "blockers" + ] + + def test_production_deploy_readback_ignores_non_sha_runtime_value(monkeypatch): monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", "none") - payload = load_latest_awoooi_production_deploy_readback_blocker() + payload = service.load_latest_awoooi_production_deploy_readback_blocker() readback = payload["readback"] assert "runtime_build_commit_sha" not in readback - assert readback["production_image_tag_sha"] == ( - "a70c6756d9e76c33143676eef82bab7a49ac1839" - ) + assert readback["production_image_tag_sha"] == _COMMITTED_SNAPSHOT_SHA diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 9146f602..7b7f7048 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -50025,6 +50025,29 @@ production browser smoke: - 沒有重啟主機,沒有 restart Docker / Nginx / K3s / DB / firewall。 - 沒有使用 GitHub / gh / GitHub API / GitHub Actions。 +## 2026-06-30 — 08:18 P0 production deploy readback false-positive closeout + +**狀態**: +- Gitea `main` 已有 `49c02e5b3` API / Workbench source 與 `6083cb71a` CD deploy marker,但 production 兩個新 API route 仍回 404,公開 Gitea Actions 顯示 CD #4003 `Failure`。 +- 既有 production deploy readback 只用 runtime `AWOOOI_BUILD_COMMIT_SHA` 覆寫 source/main 欄位,可能讓舊 production image 自稱 `production_image_tag_matches_main=true`。 + +**完成內容**: +- `awoooi_production_deploy_readback_blocker` 改為比對 runtime `AWOOOI_BUILD_COMMIT_SHA` 與 GitOps desired env `AWOOOI_DESIRED_API_IMAGE_TAG`;缺少 desired env 或不一致時 fail-closed 成 blocker。 +- `k8s/awoooi-prod/06-deployment-api.yaml` 新增 `AWOOOI_DESIRED_API_IMAGE_TAG`,CD rewrite 會與 `AWOOOI_BUILD_COMMIT_SHA` 一起更新。 +- CD rollout 後新增 production Workbench readback gate:runtime short SHA、desired short SHA、desired status 與 `matches_main` 必須等於本次 `IMAGE_TAG` 才算 deploy closure。 +- Delivery Workbench summary/lane evidence 新增 runtime tag、desired tag、desired status 欄位,方便 production readback 直接看出 image truth。 + +**本地驗證結果**: +- `DATABASE_URL=postgresql://test:test@localhost:5432/test python3.11 -m pytest apps/api/tests/test_awoooi_production_deploy_readback_blocker.py apps/api/tests/test_delivery_closure_workbench_api.py ops/runner/test_cd_controlled_runtime_profile.py ops/runner/test_read_public_gitea_actions_queue.py ops/runner/test_verify_awoooi_non110_cd_closure.py -q`:`44 passed`。 +- `DATABASE_URL=postgresql://test:test@localhost:5432/test python3.11 -m pytest apps/api/tests/test_gitea_workflow_runner_owner_attestation_request_api.py apps/api/tests/test_ai_agent_log_controlled_writeback_consumer_readback_api.py apps/api/tests/test_gitea_workflow_runner_health.py apps/api/tests/test_delivery_closure_workbench_api.py -q`:`15 passed`。 +- `py_compile`、`.gitea/workflows/cd.yaml` / K8s YAML parse、`ops/runner/guard-gitea-runner-pressure.py --root .`、`scripts/ci/check-gitea-step-env-secrets.js`、`git diff --check`:通過。 +- local live smoke:`runtime=49c02e5b30`、`desired=49c02e5b30`、`desired_status=ok`、`matches=true`、`hard_blockers=0`。 + +**仍維持**: +- 沒有讀 secret / token / `.env` / raw sessions / SQLite / auth。 +- 沒有使用 GitHub / gh / GitHub API / GitHub Actions。 +- 沒有重啟主機,沒有 workflow_dispatch,沒有 host / Docker / K8s / DB / firewall runtime 寫入。 + ## 2026-06-29 — 16:16 P0-003 CD controlled-runtime deploy marker fix **狀態**: diff --git a/k8s/awoooi-prod/06-deployment-api.yaml b/k8s/awoooi-prod/06-deployment-api.yaml index 44163ab7..afabd9c3 100644 --- a/k8s/awoooi-prod/06-deployment-api.yaml +++ b/k8s/awoooi-prod/06-deployment-api.yaml @@ -80,6 +80,11 @@ spec: # 2026-06-29 Codex: CD rewrites this to the deployed image tag so # production deploy readback does not rely on a stale static snapshot. value: "49c02e5b306709f42c2b860a60436b7b4834a1ae" + - name: AWOOOI_DESIRED_API_IMAGE_TAG + # 2026-06-30 Codex: CD rewrites this alongside AWOOOI_BUILD_COMMIT_SHA. + # Production readback compares runtime image truth against this + # GitOps desired tag instead of doing a slow Gitea raw fetch. + value: "49c02e5b306709f42c2b860a60436b7b4834a1ae" - name: USE_AI_ROUTER value: "true" - name: ENABLE_NEMOTRON_COLLABORATION diff --git a/ops/runner/test_cd_controlled_runtime_profile.py b/ops/runner/test_cd_controlled_runtime_profile.py index a941c4b6..af45e236 100644 --- a/ops/runner/test_cd_controlled_runtime_profile.py +++ b/ops/runner/test_cd_controlled_runtime_profile.py @@ -50,6 +50,14 @@ def test_workflow_secret_transport_sources_stay_on_controlled_runtime_profile() assert source in text +def test_cd_requires_production_deploy_readback_after_rollout() -> None: + text = _workflow_text() + assert "production_deploy_readback_mismatch=" in text + assert "production_deploy_runtime_build_commit_short_sha" in text + assert "production_deploy_desired_main_api_image_tag_short_sha" in text + assert "production_deploy_desired_main_api_image_tag_readback_status" in text + + def test_onboarding_warning_step_template_stays_on_controlled_runtime_profile() -> None: text = _workflow_text() assert "onboarding warning-step workflow is" in text