fix(api): verify production deploy image readback
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 39s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped

This commit is contained in:
Your Name
2026-06-30 08:25:39 +08:00
parent ff7febbf62
commit b3183582a6
7 changed files with 254 additions and 61 deletions

View File

@@ -1433,20 +1433,24 @@ jobs:
path = Path("k8s/awoooi-prod/06-deployment-api.yaml")
image_tag = os.environ["IMAGE_TAG"]
text = path.read_text(encoding="utf-8")
pattern = (
r'(\n\s+- name: AWOOOI_BUILD_COMMIT_SHA\n'
r'\s+# [^\n]*\n'
r'\s+# [^\n]*\n'
r'\s+value: ")[^"]*(")'
)
if not re.search(pattern, text):
raise SystemExit("AWOOOI_BUILD_COMMIT_SHA env block not found")
text = re.sub(
pattern,
lambda match: f"{match.group(1)}{image_tag}{match.group(2)}",
text,
count=1,
)
def replace_env_value(source: str, env_name: str) -> str:
pattern = (
rf'(\n\s+- name: {re.escape(env_name)}\n'
r'(?:\s+# [^\n]*\n)*'
r'\s+value: ")[^"]*(")'
)
if not re.search(pattern, source):
raise SystemExit(f"{env_name} env block not found")
return re.sub(
pattern,
lambda match: f"{match.group(1)}{image_tag}{match.group(2)}",
source,
count=1,
)
text = replace_env_value(text, "AWOOOI_BUILD_COMMIT_SHA")
text = replace_env_value(text, "AWOOOI_DESIRED_API_IMAGE_TAG")
path.write_text(text, encoding="utf-8")
PY
@@ -1676,6 +1680,66 @@ jobs:
fi
rm -f "$ROLLOUT_LOG"
if [ "$ROLLOUT_EXIT" -eq 0 ]; then
python3 - <<'PY'
import json
import os
import sys
import urllib.request
expected = os.environ["IMAGE_TAG"].strip().lower()
expected_short = expected[:10]
url = "https://awoooi.wooo.work/api/v1/agents/delivery-closure-workbench"
try:
with urllib.request.urlopen(url, timeout=20) as response:
payload = json.load(response)
except Exception as exc:
print(
"production_workbench_deploy_readback_failed="
f"{type(exc).__name__}",
file=sys.stderr,
)
raise SystemExit(1) from exc
summary = payload.get("summary") if isinstance(payload, dict) else {}
if not isinstance(summary, dict):
summary = {}
runtime_short = str(
summary.get("production_deploy_runtime_build_commit_short_sha") or ""
)
desired_short = str(
summary.get("production_deploy_desired_main_api_image_tag_short_sha")
or ""
)
desired_status = str(
summary.get(
"production_deploy_desired_main_api_image_tag_readback_status"
)
or ""
)
matches_main = summary.get("production_deploy_image_tag_matches_main") is True
if (
runtime_short != expected_short
or desired_short != expected_short
or desired_status != "ok"
or not matches_main
):
print(
"production_deploy_readback_mismatch="
f"expected={expected_short};runtime={runtime_short};"
f"desired={desired_short};desired_status={desired_status};"
f"matches_main={matches_main}",
file=sys.stderr,
)
raise SystemExit(1)
print(
"✅ Production deploy readback matches Gitea main desired image tag "
f"({expected_short})"
)
PY
fi
if [ "$ROLLOUT_EXIT" -eq 0 ] && [ "$ROLLOUT_RISK" = "1" ]; then
ACTOR="${GITHUB_ACTOR:-${{ github.actor }}}"
if AWOOI_CICD_STATUS=pending \

View File

@@ -118,29 +118,62 @@ def _enrich_runtime_build_readback(payload: dict[str, Any]) -> None:
else "runtime_build_diverges_from_committed_deploy_readback"
)
if source_matches_runtime and image_matches_runtime:
desired_tag = os.getenv("AWOOOI_DESIRED_API_IMAGE_TAG", "").strip().lower()
if not _SHA_RE.fullmatch(desired_tag):
readback["desired_main_api_image_tag_readback_status"] = "unavailable"
_mark_runtime_image_tag_blocked(
payload,
blocker="gitea_main_desired_api_image_tag_readback_unavailable",
next_action=(
"redeploy_with_awoooi_desired_api_image_tag_env_matching_gitops"
"_desired_api_tag"
),
)
return
# The runtime build SHA is evidence about the currently running image, not
# permission to rewrite source-control truth. If it diverges from the
# committed deploy snapshot, fail closed so Workbench cannot report a false
# "image tag matches main" closure while production routes are stale.
readback["desired_main_api_image_tag_source"] = "gitops_deployment_env"
readback["desired_main_api_image_tag_readback_status"] = "ok"
readback["desired_main_api_image_tag_sha"] = desired_tag
readback["desired_main_api_image_tag_short_sha"] = desired_tag[:10]
image_matches_main = build_sha == desired_tag
readback["production_image_tag_matches_main"] = image_matches_main
rollups["source_control_main_ready"] = True
rollups["production_image_tag_matches_main"] = image_matches_main
if image_matches_main:
return
_mark_runtime_image_tag_blocked(
payload,
blocker="production_runtime_image_tag_does_not_match_gitea_main_desired_tag",
next_action=(
"complete_cd_rollout_until_runtime_build_commit_matches_gitops"
"_desired_api_image_tag_env"
),
source_control_main_ready=True,
)
def _mark_runtime_image_tag_blocked(
payload: dict[str, Any],
*,
blocker: str,
next_action: str,
source_control_main_ready: bool = False,
) -> None:
payload["status"] = "blocked_production_runtime_image_tag_not_verified"
readback = _dict(payload.get("readback"))
readback["production_image_tag_matches_main"] = False
rollups["source_control_main_ready"] = False
blockers = _list(payload.setdefault("blockers", []))
if blocker not in blockers:
blockers.append(blocker)
next_actions = _list(payload.setdefault("next_actions", []))
if next_action not in next_actions:
next_actions.append(next_action)
rollups = _dict(payload.get("rollups"))
rollups["hard_blocker_count"] = len(blockers)
rollups["next_action_count"] = len(next_actions)
rollups["source_control_main_ready"] = source_control_main_ready
rollups["production_image_tag_matches_main"] = False
payload["status"] = "runtime_build_readback_stale"
_append_unique(
payload,
"blockers",
"runtime_build_commit_sha_does_not_match_committed_production_readback",
)
_append_unique(
payload,
"next_actions",
"refresh_production_deploy_readback_from_current_main_and_runtime_route_smoke",
)
rollups["hard_blocker_count"] = len(_list(payload.get("blockers")))
rollups["next_action_count"] = len(_list(payload.get("next_actions")))
def _require_no_internal_network_literals(value: Any, label: str) -> None:

View File

@@ -311,6 +311,18 @@ def build_delivery_closure_workbench(
)
is True
),
"desired_main_api_image_tag_short_sha": str(
production_deploy_readback.get(
"desired_main_api_image_tag_short_sha"
)
or ""
),
"desired_main_api_image_tag_readback_status": str(
production_deploy_readback.get(
"desired_main_api_image_tag_readback_status"
)
or ""
),
"production_image_tag_matches_main": production_deploy_readback.get(
"production_image_tag_matches_main"
)
@@ -1673,6 +1685,16 @@ def build_delivery_closure_workbench(
)
is True
),
"production_deploy_desired_main_api_image_tag_short_sha": str(
production_deploy_readback.get("desired_main_api_image_tag_short_sha")
or ""
),
"production_deploy_desired_main_api_image_tag_readback_status": str(
production_deploy_readback.get(
"desired_main_api_image_tag_readback_status"
)
or ""
),
"production_deploy_governance_fields_present": production_deploy_rollups.get(
"production_governance_fields_present"
)

View File

@@ -1,26 +1,25 @@
from __future__ import annotations
from src.services.awoooi_production_deploy_readback_blocker import (
load_latest_awoooi_production_deploy_readback_blocker,
)
from src.services import awoooi_production_deploy_readback_blocker as service
def test_production_deploy_readback_uses_runtime_build_commit(monkeypatch):
_COMMITTED_SNAPSHOT_SHA = "a70c6756d9e76c33143676eef82bab7a49ac1839"
def test_production_deploy_readback_verifies_runtime_build_against_gitops_desired(
monkeypatch,
):
build_sha = "0123456789abcdef0123456789abcdef01234567"
monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha)
monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", build_sha)
payload = load_latest_awoooi_production_deploy_readback_blocker()
payload = service.load_latest_awoooi_production_deploy_readback_blocker()
readback = payload["readback"]
rollups = payload["rollups"]
assert readback["runtime_build_commit_sha"] == build_sha
assert readback["runtime_build_commit_short_sha"] == build_sha[:10]
assert readback["observed_source_control_main_sha"] == (
"a70c6756d9e76c33143676eef82bab7a49ac1839"
)
assert readback["production_image_tag_sha"] == (
"a70c6756d9e76c33143676eef82bab7a49ac1839"
)
assert readback["production_image_tag_sha"] == _COMMITTED_SNAPSHOT_SHA
assert (
readback["runtime_build_matches_committed_source_control_readback"]
is False
@@ -32,23 +31,21 @@ def test_production_deploy_readback_uses_runtime_build_commit(monkeypatch):
assert readback["runtime_build_readback_status"] == (
"runtime_build_diverges_from_committed_deploy_readback"
)
assert readback["production_image_tag_matches_main"] is False
assert payload["status"] == "runtime_build_readback_stale"
assert rollups["source_control_main_ready"] is False
assert rollups["production_image_tag_matches_main"] is False
assert rollups["hard_blocker_count"] == 1
assert payload["blockers"] == [
"runtime_build_commit_sha_does_not_match_committed_production_readback"
]
assert readback["desired_main_api_image_tag_sha"] == build_sha
assert readback["desired_main_api_image_tag_source"] == "gitops_deployment_env"
assert readback["desired_main_api_image_tag_readback_status"] == "ok"
assert readback["production_image_tag_matches_main"] is True
assert payload["status"] == "closure_verified"
assert rollups["production_image_tag_matches_main"] is True
assert rollups["hard_blocker_count"] == 0
def test_production_deploy_readback_keeps_closure_when_runtime_matches_snapshot(
monkeypatch,
):
build_sha = "a70c6756d9e76c33143676eef82bab7a49ac1839"
def test_production_deploy_readback_keeps_committed_snapshot_evidence(monkeypatch):
build_sha = _COMMITTED_SNAPSHOT_SHA
monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha)
monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", build_sha)
payload = load_latest_awoooi_production_deploy_readback_blocker()
payload = service.load_latest_awoooi_production_deploy_readback_blocker()
readback = payload["readback"]
rollups = payload["rollups"]
@@ -58,19 +55,60 @@ def test_production_deploy_readback_keeps_closure_when_runtime_matches_snapshot(
assert readback["runtime_build_readback_status"] == (
"matches_committed_deploy_readback"
)
assert readback["desired_main_api_image_tag_sha"] == build_sha
assert readback["production_image_tag_matches_main"] is True
assert payload["status"] == "closure_verified"
assert rollups["production_image_tag_matches_main"] is True
assert rollups["hard_blocker_count"] == 0
def test_production_deploy_readback_blocks_runtime_build_mismatch(monkeypatch):
build_sha = "0123456789abcdef0123456789abcdef01234567"
desired_sha = "abcdef0123456789abcdef0123456789abcdef01"
monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha)
monkeypatch.setenv("AWOOOI_DESIRED_API_IMAGE_TAG", desired_sha)
payload = service.load_latest_awoooi_production_deploy_readback_blocker()
readback = payload["readback"]
rollups = payload["rollups"]
assert payload["status"] == "blocked_production_runtime_image_tag_not_verified"
assert readback["runtime_build_commit_sha"] == build_sha
assert readback["runtime_build_readback_status"] == (
"runtime_build_diverges_from_committed_deploy_readback"
)
assert readback["desired_main_api_image_tag_sha"] == desired_sha
assert readback["production_image_tag_matches_main"] is False
assert rollups["source_control_main_ready"] is True
assert rollups["production_image_tag_matches_main"] is False
assert rollups["hard_blocker_count"] == 1
assert "production_runtime_image_tag_does_not_match_gitea_main_desired_tag" in (
payload["blockers"]
)
def test_production_deploy_readback_blocks_unavailable_gitops_desired(monkeypatch):
build_sha = "0123456789abcdef0123456789abcdef01234567"
monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", build_sha)
monkeypatch.delenv("AWOOOI_DESIRED_API_IMAGE_TAG", raising=False)
payload = service.load_latest_awoooi_production_deploy_readback_blocker()
readback = payload["readback"]
assert payload["status"] == "blocked_production_runtime_image_tag_not_verified"
assert readback["desired_main_api_image_tag_readback_status"] == "unavailable"
assert readback["production_image_tag_matches_main"] is False
assert payload["rollups"]["source_control_main_ready"] is False
assert "gitea_main_desired_api_image_tag_readback_unavailable" in payload[
"blockers"
]
def test_production_deploy_readback_ignores_non_sha_runtime_value(monkeypatch):
monkeypatch.setenv("AWOOOI_BUILD_COMMIT_SHA", "none")
payload = load_latest_awoooi_production_deploy_readback_blocker()
payload = service.load_latest_awoooi_production_deploy_readback_blocker()
readback = payload["readback"]
assert "runtime_build_commit_sha" not in readback
assert readback["production_image_tag_sha"] == (
"a70c6756d9e76c33143676eef82bab7a49ac1839"
)
assert readback["production_image_tag_sha"] == _COMMITTED_SNAPSHOT_SHA

View File

@@ -50025,6 +50025,29 @@ production browser smoke:
- 沒有重啟主機,沒有 restart Docker / Nginx / K3s / DB / firewall。
- 沒有使用 GitHub / gh / GitHub API / GitHub Actions。
## 2026-06-30 — 08:18 P0 production deploy readback false-positive closeout
**狀態**
- Gitea `main` 已有 `49c02e5b3` API / Workbench source 與 `6083cb71a` CD deploy marker但 production 兩個新 API route 仍回 404公開 Gitea Actions 顯示 CD #4003 `Failure`
- 既有 production deploy readback 只用 runtime `AWOOOI_BUILD_COMMIT_SHA` 覆寫 source/main 欄位,可能讓舊 production image 自稱 `production_image_tag_matches_main=true`
**完成內容**
- `awoooi_production_deploy_readback_blocker` 改為比對 runtime `AWOOOI_BUILD_COMMIT_SHA` 與 GitOps desired env `AWOOOI_DESIRED_API_IMAGE_TAG`;缺少 desired env 或不一致時 fail-closed 成 blocker。
- `k8s/awoooi-prod/06-deployment-api.yaml` 新增 `AWOOOI_DESIRED_API_IMAGE_TAG`CD rewrite 會與 `AWOOOI_BUILD_COMMIT_SHA` 一起更新。
- CD rollout 後新增 production Workbench readback gateruntime short SHA、desired short SHA、desired status 與 `matches_main` 必須等於本次 `IMAGE_TAG` 才算 deploy closure。
- Delivery Workbench summary/lane evidence 新增 runtime tag、desired tag、desired status 欄位,方便 production readback 直接看出 image truth。
**本地驗證結果**
- `DATABASE_URL=postgresql://test:test@localhost:5432/test python3.11 -m pytest apps/api/tests/test_awoooi_production_deploy_readback_blocker.py apps/api/tests/test_delivery_closure_workbench_api.py ops/runner/test_cd_controlled_runtime_profile.py ops/runner/test_read_public_gitea_actions_queue.py ops/runner/test_verify_awoooi_non110_cd_closure.py -q``44 passed`
- `DATABASE_URL=postgresql://test:test@localhost:5432/test python3.11 -m pytest apps/api/tests/test_gitea_workflow_runner_owner_attestation_request_api.py apps/api/tests/test_ai_agent_log_controlled_writeback_consumer_readback_api.py apps/api/tests/test_gitea_workflow_runner_health.py apps/api/tests/test_delivery_closure_workbench_api.py -q``15 passed`
- `py_compile``.gitea/workflows/cd.yaml` / K8s YAML parse、`ops/runner/guard-gitea-runner-pressure.py --root .``scripts/ci/check-gitea-step-env-secrets.js``git diff --check`:通過。
- local live smoke`runtime=49c02e5b30``desired=49c02e5b30``desired_status=ok``matches=true``hard_blockers=0`
**仍維持**
- 沒有讀 secret / token / `.env` / raw sessions / SQLite / auth。
- 沒有使用 GitHub / gh / GitHub API / GitHub Actions。
- 沒有重啟主機,沒有 workflow_dispatch沒有 host / Docker / K8s / DB / firewall runtime 寫入。
## 2026-06-29 — 16:16 P0-003 CD controlled-runtime deploy marker fix
**狀態**

View File

@@ -80,6 +80,11 @@ spec:
# 2026-06-29 Codex: CD rewrites this to the deployed image tag so
# production deploy readback does not rely on a stale static snapshot.
value: "49c02e5b306709f42c2b860a60436b7b4834a1ae"
- name: AWOOOI_DESIRED_API_IMAGE_TAG
# 2026-06-30 Codex: CD rewrites this alongside AWOOOI_BUILD_COMMIT_SHA.
# Production readback compares runtime image truth against this
# GitOps desired tag instead of doing a slow Gitea raw fetch.
value: "49c02e5b306709f42c2b860a60436b7b4834a1ae"
- name: USE_AI_ROUTER
value: "true"
- name: ENABLE_NEMOTRON_COLLABORATION

View File

@@ -50,6 +50,14 @@ def test_workflow_secret_transport_sources_stay_on_controlled_runtime_profile()
assert source in text
def test_cd_requires_production_deploy_readback_after_rollout() -> None:
text = _workflow_text()
assert "production_deploy_readback_mismatch=" in text
assert "production_deploy_runtime_build_commit_short_sha" in text
assert "production_deploy_desired_main_api_image_tag_short_sha" in text
assert "production_deploy_desired_main_api_image_tag_readback_status" in text
def test_onboarding_warning_step_template_stays_on_controlled_runtime_profile() -> None:
text = _workflow_text()
assert "onboarding warning-step workflow is" in text