feat(governance): 新增 result capture writer dry-run readback
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m29s
CD Pipeline / build-and-deploy (push) Successful in 4m57s
CD Pipeline / post-deploy-checks (push) Successful in 1m45s

This commit is contained in:
Your Name
2026-06-14 00:39:39 +08:00
parent c5cf6d3cc0
commit cdc6fe8737
10 changed files with 1605 additions and 1 deletions

View File

@@ -133,6 +133,9 @@ from src.services.ai_agent_result_capture_writer_implementation_review import (
from src.services.ai_agent_result_capture_writer_dry_run_fixture import (
load_latest_ai_agent_result_capture_writer_dry_run_fixture,
)
from src.services.ai_agent_result_capture_writer_dry_run_readback import (
load_latest_ai_agent_result_capture_writer_dry_run_readback,
)
from src.services.ai_agent_runtime_readback_approval_package import (
load_latest_ai_agent_runtime_readback_approval_package,
)
@@ -1785,6 +1788,36 @@ async def get_agent_result_capture_writer_dry_run_fixture() -> dict[str, Any]:
) from exc
@router.get(
"/agent-result-capture-writer-dry-run-readback",
response_model=dict[str, Any],
summary="取得 AI Agent result capture writer dry-run readback",
description=(
"讀取最新已提交的 P2-124 result capture writer dry-run readback"
"此端點只回傳 dry-run readback、receipt verifier、promotion readiness、"
"blocked promotion write 與 operator handoff不套用 writer、不執行 dry-run、"
"不寫 receipt、不寫 result capture、learning、PlayBook trust、reviewer queue、"
"Gateway queue不送 Telegram、不呼叫 Bot API、不讀 secret。"
),
)
async def get_agent_result_capture_writer_dry_run_readback() -> dict[str, Any]:
"""Return the latest read-only result capture writer dry-run readback package."""
try:
payload = await asyncio.to_thread(load_latest_ai_agent_result_capture_writer_dry_run_readback)
return redact_public_lan_topology(payload)
except FileNotFoundError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except (json.JSONDecodeError, ValueError) as exc:
logger.error("ai_agent_result_capture_writer_dry_run_readback_invalid", error=str(exc))
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="AI Agent result capture writer dry-run readback 無效",
) from exc
@router.get(
"/agent-owner-approved-fixture-dry-run",
response_model=dict[str, Any],

View File

@@ -0,0 +1,390 @@
"""
AI Agent result capture writer dry-run readback snapshot.
Loads the latest committed P2-124 dry-run readback / promotion readiness package.
This module validates committed evidence only; it never applies writers, executes
dry-runs, writes receipts, writes result captures, writes learning records,
updates PlayBook trust, writes reviewer / Gateway queues, sends Telegram
messages, reads canonical runtime targets, reads secrets, or performs
destructive operations.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from src.services.snapshot_paths import default_evaluations_dir
_DEFAULT_EVALUATIONS_DIR = default_evaluations_dir(Path(__file__))
_SNAPSHOT_PATTERN = "ai_agent_result_capture_writer_dry_run_readback_*.json"
_SCHEMA_VERSION = "ai_agent_result_capture_writer_dry_run_readback_v1"
_RUNTIME_AUTHORITY = "result_capture_writer_dry_run_readback_only_no_live_write"
def load_latest_ai_agent_result_capture_writer_dry_run_readback(
evaluations_dir: Path | None = None,
) -> dict[str, Any]:
"""Load the newest committed result capture writer dry-run readback package."""
directory = evaluations_dir or _DEFAULT_EVALUATIONS_DIR
candidates = sorted(directory.glob(_SNAPSHOT_PATTERN))
if not candidates:
raise FileNotFoundError(f"no AI Agent result capture writer dry-run readback snapshots found in {directory}")
latest = candidates[-1]
with latest.open(encoding="utf-8") as handle:
payload = json.load(handle)
if not isinstance(payload, dict):
raise ValueError(f"{latest}: expected JSON object")
label = str(latest)
_require_schema(payload, label)
_require_prior(payload, label)
_require_truth(payload, label)
_require_readbacks(payload, label)
_require_receipt_verifiers(payload, label)
_require_promotion_gates(payload, label)
_require_blocked_writes(payload, label)
_require_actions(payload, label)
_require_display_redaction(payload, label)
_require_no_forbidden_display_terms(payload, label)
_require_rollup_consistency(payload, label)
return payload
def _require_schema(payload: dict[str, Any], label: str) -> None:
if payload.get("schema_version") != _SCHEMA_VERSION:
raise ValueError(f"{label}: expected schema_version={_SCHEMA_VERSION}")
status = payload.get("program_status") or {}
expected = {
"current_priority": "P2",
"current_task_id": "P2-124",
"next_task_id": "P2-125",
"read_only_mode": True,
"runtime_authority": _RUNTIME_AUTHORITY,
"overall_completion_percent": 100,
}
mismatches = _mismatches(status, expected)
if mismatches:
raise ValueError(f"{label}: program_status mismatch: {mismatches}")
if not status.get("status_note"):
raise ValueError(f"{label}: program_status.status_note is required")
def _require_prior(payload: dict[str, Any], label: str) -> None:
prior = payload.get("prior_result_capture_writer_dry_run_fixture") or {}
expected = {
"schema_version": "ai_agent_result_capture_writer_dry_run_fixture_v1",
"writer_dry_run_fixture_count": 5,
"receipt_preview_count": 5,
"idempotency_replay_check_count": 5,
"rollback_rehearsal_count": 5,
"blocked_runtime_write_count": 6,
"operator_action_count": 5,
"writer_apply_count": 0,
"dry_run_execution_count": 0,
"receipt_write_count": 0,
"result_capture_write_count": 0,
"learning_write_count": 0,
"playbook_trust_write_count": 0,
"reviewer_queue_write_count": 0,
"gateway_queue_write_count": 0,
"telegram_send_count": 0,
"bot_api_call_count": 0,
"report_receipt_write_count": 0,
}
mismatches = _mismatches(prior, expected)
if mismatches:
raise ValueError(f"{label}: prior_result_capture_writer_dry_run_fixture mismatch: {mismatches}")
if not prior.get("readiness_note"):
raise ValueError(f"{label}: prior_result_capture_writer_dry_run_fixture.readiness_note is required")
def _require_truth(payload: dict[str, Any], label: str) -> None:
truth = payload.get("readback_truth") or {}
required_true = {
"p2_123_fixture_loaded",
"dry_run_readback_ready",
"receipt_verifier_ready",
"promotion_readiness_ready",
"fixture_only_mode",
"owner_approval_required",
"dual_approval_required",
"dry_run_hash_required",
"receipt_verifier_required",
"promotion_owner_required",
"rollback_owner_required",
}
missing = sorted(field for field in required_true if truth.get(field) is not True)
if missing:
raise ValueError(f"{label}: readback ready flags must remain true: {missing}")
required_false = {
"writer_apply_enabled",
"dry_run_execution_enabled",
"receipt_write_enabled",
"promotion_apply_enabled",
"canonical_runtime_target_read_enabled",
"live_query_enabled",
"reviewer_queue_write_enabled",
"gateway_queue_write_enabled",
"telegram_send_enabled",
"bot_api_call_enabled",
"report_receipt_write_enabled",
"result_capture_write_enabled",
"learning_write_enabled",
"playbook_trust_write_enabled",
"production_write_enabled",
"secret_read_enabled",
"destructive_operation_enabled",
}
unsafe = sorted(field for field in required_false if truth.get(field) is not False)
if unsafe:
raise ValueError(f"{label}: live read/send/write flags must remain false: {unsafe}")
zero_counts = {
"owner_approval_received_count",
"dual_approval_received_count",
"dry_run_hash_verified_count",
"receipt_verifier_pass_count",
"writer_apply_count",
"dry_run_execution_count",
"receipt_write_count",
"promotion_apply_count",
"canonical_runtime_target_read_count",
"live_query_count",
"reviewer_queue_write_count",
"gateway_queue_write_count",
"telegram_send_count",
"bot_api_call_count",
"report_receipt_write_count",
"result_capture_write_count",
"learning_write_count",
"playbook_trust_write_count",
"production_write_count",
"secret_read_count",
"destructive_operation_count",
}
non_zero = sorted(field for field in zero_counts if truth.get(field) != 0)
if non_zero:
raise ValueError(f"{label}: dry-run readback live counters must remain zero: {non_zero}")
if not truth.get("truth_note"):
raise ValueError(f"{label}: readback_truth.truth_note is required")
def _require_readbacks(payload: dict[str, Any], label: str) -> None:
cards = payload.get("dry_run_readback_cards") or []
required = {
"readback_result_capture_writer",
"readback_learning_writer",
"readback_playbook_trust_writer",
"readback_reviewer_queue_writer",
"readback_gateway_queue_writer",
}
ids = {card.get("readback_id") for card in cards}
if ids != required:
raise ValueError(f"{label}: dry-run readback cards must match {sorted(required)}")
for card in cards:
readback_id = card.get("readback_id")
if card.get("readback_only") is not True:
raise ValueError(f"{label}: readback {readback_id} must remain readback-only")
if card.get("runtime_write_enabled") is not False:
raise ValueError(f"{label}: readback {readback_id} must not enable runtime write")
if card.get("dry_run_execution_enabled") is not False:
raise ValueError(f"{label}: readback {readback_id} must not enable dry-run execution")
if card.get("status") not in {"ready_for_owner_review", "approval_required", "blocked_by_policy"}:
raise ValueError(f"{label}: readback {readback_id} status is invalid")
for field in {"source_fixture", "receipt_preview", "promotion_candidate", "operator_note"}:
if not card.get(field):
raise ValueError(f"{label}: readback {readback_id} missing {field}")
if not _is_redacted_sha256(card.get("evidence_hash")):
raise ValueError(f"{label}: readback {readback_id} must expose redacted evidence_hash")
def _require_receipt_verifiers(payload: dict[str, Any], label: str) -> None:
checks = payload.get("receipt_verifier_checks") or []
required = {
"verify_result_capture_receipt",
"verify_learning_receipt",
"verify_playbook_trust_receipt",
"verify_reviewer_queue_receipt",
"verify_gateway_queue_receipt",
}
ids = {check.get("verifier_id") for check in checks}
if ids != required:
raise ValueError(f"{label}: receipt verifier checks must match {sorted(required)}")
for check in checks:
verifier_id = check.get("verifier_id")
if check.get("verifier_mode") != "no_write_receipt_verifier":
raise ValueError(f"{label}: verifier {verifier_id} must remain no-write")
if check.get("live_receipt_enabled") is not False:
raise ValueError(f"{label}: verifier {verifier_id} must not enable live receipt")
if check.get("required_before_promotion") is not True:
raise ValueError(f"{label}: verifier {verifier_id} must be required before promotion")
if check.get("status") not in {"ready", "approval_required", "blocked_by_policy"}:
raise ValueError(f"{label}: verifier {verifier_id} status is invalid")
if not check.get("failure_if_missing"):
raise ValueError(f"{label}: verifier {verifier_id} failure_if_missing is required")
def _require_promotion_gates(payload: dict[str, Any], label: str) -> None:
gates = payload.get("promotion_readiness_gates") or []
required = {
"gate_writer_apply_owner_approval",
"gate_dry_run_hash",
"gate_receipt_verifier",
"gate_rollback_owner",
"gate_production_write_block",
}
ids = {gate.get("gate_id") for gate in gates}
if ids != required:
raise ValueError(f"{label}: promotion readiness gates must match {sorted(required)}")
for gate in gates:
gate_id = gate.get("gate_id")
if gate.get("promotion_allowed") is not False:
raise ValueError(f"{label}: promotion gate {gate_id} must not allow promotion")
if gate.get("status") not in {"ready_for_owner_review", "approval_required", "blocked_by_policy"}:
raise ValueError(f"{label}: promotion gate {gate_id} status is invalid")
if not gate.get("required_evidence"):
raise ValueError(f"{label}: promotion gate {gate_id} required_evidence is required")
def _require_blocked_writes(payload: dict[str, Any], label: str) -> None:
blockers = payload.get("blocked_promotion_writes") or []
required = {
"blocked_writer_apply",
"blocked_dry_run_execution",
"blocked_receipt_write",
"blocked_result_capture_write",
"blocked_gateway_queue_write",
"blocked_telegram_send",
}
ids = {blocker.get("blocker_id") for blocker in blockers}
if ids != required:
raise ValueError(f"{label}: blocked promotion writes must match {sorted(required)}")
for blocker in blockers:
blocker_id = blocker.get("blocker_id")
if blocker.get("status") not in {"approval_required", "blocked_by_policy"}:
raise ValueError(f"{label}: blocker {blocker_id} status is invalid")
if blocker.get("severity") not in {"high", "critical"}:
raise ValueError(f"{label}: blocker {blocker_id} severity is invalid")
if not blocker.get("blocked_action") or not blocker.get("blocked_until"):
raise ValueError(f"{label}: blocker {blocker_id} must include blocked_action and blocked_until")
if not _is_redacted_sha256(blocker.get("evidence_hash")):
raise ValueError(f"{label}: blocker {blocker_id} must expose redacted evidence_hash")
def _require_actions(payload: dict[str, Any], label: str) -> None:
actions = payload.get("operator_actions") or []
required = {
"review_dry_run_readback",
"verify_receipt_verifier",
"score_promotion_readiness",
"prepare_p2_125_owner_packet",
"hold_runtime_write_gate",
}
ids = {action.get("action_id") for action in actions}
if ids != required:
raise ValueError(f"{label}: operator actions must match {sorted(required)}")
for action in actions:
action_id = action.get("action_id")
if action.get("runtime_write_allowed") is not False:
raise ValueError(f"{label}: action {action_id} must not allow runtime write")
if not action.get("operator_instruction"):
raise ValueError(f"{label}: action {action_id} operator_instruction is required")
def _require_display_redaction(payload: dict[str, Any], label: str) -> None:
contract = payload.get("display_redaction_contract") or {}
expected = {
"redaction_required": True,
"raw_prompt_display_allowed": False,
"private_reasoning_display_allowed": False,
"secret_value_display_allowed": False,
"raw_runtime_payload_display_allowed": False,
"internal_collaboration_content_display_allowed": False,
}
mismatches = _mismatches(contract, expected)
if mismatches:
raise ValueError(f"{label}: display_redaction_contract mismatch: {mismatches}")
if not contract.get("frontend_display_policy"):
raise ValueError(f"{label}: display_redaction_contract.frontend_display_policy is required")
def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None:
rollups = payload.get("rollups") or {}
readbacks = payload.get("dry_run_readback_cards") or []
verifiers = payload.get("receipt_verifier_checks") or []
gates = payload.get("promotion_readiness_gates") or []
blockers = payload.get("blocked_promotion_writes") or []
actions = payload.get("operator_actions") or []
expected = {
"dry_run_readback_card_count": len(readbacks),
"receipt_verifier_check_count": len(verifiers),
"promotion_readiness_gate_count": len(gates),
"blocked_promotion_write_count": len(blockers),
"operator_action_count": len(actions),
"approval_required_readback_count": sum(1 for item in readbacks if item.get("status") == "approval_required"),
"blocked_readback_count": sum(1 for item in readbacks if item.get("status") == "blocked_by_policy"),
"approval_required_verifier_count": sum(1 for item in verifiers if item.get("status") == "approval_required"),
"blocked_verifier_count": sum(1 for item in verifiers if item.get("status") == "blocked_by_policy"),
"approval_required_gate_count": sum(1 for item in gates if item.get("status") == "approval_required"),
"blocked_gate_count": sum(1 for item in gates if item.get("status") == "blocked_by_policy"),
"critical_blocker_count": sum(1 for item in blockers if item.get("severity") == "critical"),
"owner_approval_received_count": 0,
"dual_approval_received_count": 0,
"dry_run_hash_verified_count": 0,
"receipt_verifier_pass_count": 0,
"writer_apply_count": 0,
"dry_run_execution_count": 0,
"receipt_write_count": 0,
"promotion_apply_count": 0,
"canonical_runtime_target_read_count": 0,
"live_query_count": 0,
"reviewer_queue_write_count": 0,
"gateway_queue_write_count": 0,
"telegram_send_count": 0,
"bot_api_call_count": 0,
"report_receipt_write_count": 0,
"result_capture_write_count": 0,
"learning_write_count": 0,
"playbook_trust_write_count": 0,
"production_write_count": 0,
"secret_read_count": 0,
"destructive_operation_count": 0,
}
mismatches = _mismatches(rollups, expected)
if mismatches:
raise ValueError(f"{label}: rollup counts mismatch: {mismatches}")
def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None:
serialized = json.dumps(payload, ensure_ascii=False)
forbidden = {
"work_window_transcript",
"session_id",
"browser_context",
"authorization_header",
"raw Telegram payload",
"private reasoning",
"raw prompt",
"chain-of-thought",
}
hits = sorted(term for term in forbidden if term in serialized)
if hits:
raise ValueError(f"{label}: forbidden display terms present: {hits}")
def _is_redacted_sha256(value: Any) -> bool:
if not isinstance(value, str) or not value.startswith("sha256:"):
return False
digest = value.removeprefix("sha256:")
return len(digest) == 64 and all(char in "0123456789abcdef" for char in digest)
def _mismatches(payload: dict[str, Any], expected: dict[str, Any]) -> dict[str, dict[str, Any]]:
return {
key: {"expected": value, "actual": payload.get(key)}
for key, value in expected.items()
if payload.get(key) != value
}