feat(governance): 新增 result capture writer dry-run readback
This commit is contained in:
@@ -133,6 +133,9 @@ from src.services.ai_agent_result_capture_writer_implementation_review import (
|
||||
from src.services.ai_agent_result_capture_writer_dry_run_fixture import (
|
||||
load_latest_ai_agent_result_capture_writer_dry_run_fixture,
|
||||
)
|
||||
from src.services.ai_agent_result_capture_writer_dry_run_readback import (
|
||||
load_latest_ai_agent_result_capture_writer_dry_run_readback,
|
||||
)
|
||||
from src.services.ai_agent_runtime_readback_approval_package import (
|
||||
load_latest_ai_agent_runtime_readback_approval_package,
|
||||
)
|
||||
@@ -1785,6 +1788,36 @@ async def get_agent_result_capture_writer_dry_run_fixture() -> dict[str, Any]:
|
||||
) from exc
|
||||
|
||||
|
||||
@router.get(
|
||||
"/agent-result-capture-writer-dry-run-readback",
|
||||
response_model=dict[str, Any],
|
||||
summary="取得 AI Agent result capture writer dry-run readback",
|
||||
description=(
|
||||
"讀取最新已提交的 P2-124 result capture writer dry-run readback;"
|
||||
"此端點只回傳 dry-run readback、receipt verifier、promotion readiness、"
|
||||
"blocked promotion write 與 operator handoff,不套用 writer、不執行 dry-run、"
|
||||
"不寫 receipt、不寫 result capture、learning、PlayBook trust、reviewer queue、"
|
||||
"Gateway queue,不送 Telegram、不呼叫 Bot API、不讀 secret。"
|
||||
),
|
||||
)
|
||||
async def get_agent_result_capture_writer_dry_run_readback() -> dict[str, Any]:
|
||||
"""Return the latest read-only result capture writer dry-run readback package."""
|
||||
try:
|
||||
payload = await asyncio.to_thread(load_latest_ai_agent_result_capture_writer_dry_run_readback)
|
||||
return redact_public_lan_topology(payload)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(exc),
|
||||
) from exc
|
||||
except (json.JSONDecodeError, ValueError) as exc:
|
||||
logger.error("ai_agent_result_capture_writer_dry_run_readback_invalid", error=str(exc))
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="AI Agent result capture writer dry-run readback 無效",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.get(
|
||||
"/agent-owner-approved-fixture-dry-run",
|
||||
response_model=dict[str, Any],
|
||||
|
||||
@@ -0,0 +1,390 @@
|
||||
"""
|
||||
AI Agent result capture writer dry-run readback snapshot.
|
||||
|
||||
Loads the latest committed P2-124 dry-run readback / promotion readiness package.
|
||||
This module validates committed evidence only; it never applies writers, executes
|
||||
dry-runs, writes receipts, writes result captures, writes learning records,
|
||||
updates PlayBook trust, writes reviewer / Gateway queues, sends Telegram
|
||||
messages, reads canonical runtime targets, reads secrets, or performs
|
||||
destructive operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from src.services.snapshot_paths import default_evaluations_dir
|
||||
|
||||
_DEFAULT_EVALUATIONS_DIR = default_evaluations_dir(Path(__file__))
|
||||
_SNAPSHOT_PATTERN = "ai_agent_result_capture_writer_dry_run_readback_*.json"
|
||||
_SCHEMA_VERSION = "ai_agent_result_capture_writer_dry_run_readback_v1"
|
||||
_RUNTIME_AUTHORITY = "result_capture_writer_dry_run_readback_only_no_live_write"
|
||||
|
||||
|
||||
def load_latest_ai_agent_result_capture_writer_dry_run_readback(
|
||||
evaluations_dir: Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Load the newest committed result capture writer dry-run readback package."""
|
||||
directory = evaluations_dir or _DEFAULT_EVALUATIONS_DIR
|
||||
candidates = sorted(directory.glob(_SNAPSHOT_PATTERN))
|
||||
if not candidates:
|
||||
raise FileNotFoundError(f"no AI Agent result capture writer dry-run readback snapshots found in {directory}")
|
||||
|
||||
latest = candidates[-1]
|
||||
with latest.open(encoding="utf-8") as handle:
|
||||
payload = json.load(handle)
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError(f"{latest}: expected JSON object")
|
||||
|
||||
label = str(latest)
|
||||
_require_schema(payload, label)
|
||||
_require_prior(payload, label)
|
||||
_require_truth(payload, label)
|
||||
_require_readbacks(payload, label)
|
||||
_require_receipt_verifiers(payload, label)
|
||||
_require_promotion_gates(payload, label)
|
||||
_require_blocked_writes(payload, label)
|
||||
_require_actions(payload, label)
|
||||
_require_display_redaction(payload, label)
|
||||
_require_no_forbidden_display_terms(payload, label)
|
||||
_require_rollup_consistency(payload, label)
|
||||
return payload
|
||||
|
||||
|
||||
def _require_schema(payload: dict[str, Any], label: str) -> None:
|
||||
if payload.get("schema_version") != _SCHEMA_VERSION:
|
||||
raise ValueError(f"{label}: expected schema_version={_SCHEMA_VERSION}")
|
||||
status = payload.get("program_status") or {}
|
||||
expected = {
|
||||
"current_priority": "P2",
|
||||
"current_task_id": "P2-124",
|
||||
"next_task_id": "P2-125",
|
||||
"read_only_mode": True,
|
||||
"runtime_authority": _RUNTIME_AUTHORITY,
|
||||
"overall_completion_percent": 100,
|
||||
}
|
||||
mismatches = _mismatches(status, expected)
|
||||
if mismatches:
|
||||
raise ValueError(f"{label}: program_status mismatch: {mismatches}")
|
||||
if not status.get("status_note"):
|
||||
raise ValueError(f"{label}: program_status.status_note is required")
|
||||
|
||||
|
||||
def _require_prior(payload: dict[str, Any], label: str) -> None:
|
||||
prior = payload.get("prior_result_capture_writer_dry_run_fixture") or {}
|
||||
expected = {
|
||||
"schema_version": "ai_agent_result_capture_writer_dry_run_fixture_v1",
|
||||
"writer_dry_run_fixture_count": 5,
|
||||
"receipt_preview_count": 5,
|
||||
"idempotency_replay_check_count": 5,
|
||||
"rollback_rehearsal_count": 5,
|
||||
"blocked_runtime_write_count": 6,
|
||||
"operator_action_count": 5,
|
||||
"writer_apply_count": 0,
|
||||
"dry_run_execution_count": 0,
|
||||
"receipt_write_count": 0,
|
||||
"result_capture_write_count": 0,
|
||||
"learning_write_count": 0,
|
||||
"playbook_trust_write_count": 0,
|
||||
"reviewer_queue_write_count": 0,
|
||||
"gateway_queue_write_count": 0,
|
||||
"telegram_send_count": 0,
|
||||
"bot_api_call_count": 0,
|
||||
"report_receipt_write_count": 0,
|
||||
}
|
||||
mismatches = _mismatches(prior, expected)
|
||||
if mismatches:
|
||||
raise ValueError(f"{label}: prior_result_capture_writer_dry_run_fixture mismatch: {mismatches}")
|
||||
if not prior.get("readiness_note"):
|
||||
raise ValueError(f"{label}: prior_result_capture_writer_dry_run_fixture.readiness_note is required")
|
||||
|
||||
|
||||
def _require_truth(payload: dict[str, Any], label: str) -> None:
|
||||
truth = payload.get("readback_truth") or {}
|
||||
required_true = {
|
||||
"p2_123_fixture_loaded",
|
||||
"dry_run_readback_ready",
|
||||
"receipt_verifier_ready",
|
||||
"promotion_readiness_ready",
|
||||
"fixture_only_mode",
|
||||
"owner_approval_required",
|
||||
"dual_approval_required",
|
||||
"dry_run_hash_required",
|
||||
"receipt_verifier_required",
|
||||
"promotion_owner_required",
|
||||
"rollback_owner_required",
|
||||
}
|
||||
missing = sorted(field for field in required_true if truth.get(field) is not True)
|
||||
if missing:
|
||||
raise ValueError(f"{label}: readback ready flags must remain true: {missing}")
|
||||
required_false = {
|
||||
"writer_apply_enabled",
|
||||
"dry_run_execution_enabled",
|
||||
"receipt_write_enabled",
|
||||
"promotion_apply_enabled",
|
||||
"canonical_runtime_target_read_enabled",
|
||||
"live_query_enabled",
|
||||
"reviewer_queue_write_enabled",
|
||||
"gateway_queue_write_enabled",
|
||||
"telegram_send_enabled",
|
||||
"bot_api_call_enabled",
|
||||
"report_receipt_write_enabled",
|
||||
"result_capture_write_enabled",
|
||||
"learning_write_enabled",
|
||||
"playbook_trust_write_enabled",
|
||||
"production_write_enabled",
|
||||
"secret_read_enabled",
|
||||
"destructive_operation_enabled",
|
||||
}
|
||||
unsafe = sorted(field for field in required_false if truth.get(field) is not False)
|
||||
if unsafe:
|
||||
raise ValueError(f"{label}: live read/send/write flags must remain false: {unsafe}")
|
||||
zero_counts = {
|
||||
"owner_approval_received_count",
|
||||
"dual_approval_received_count",
|
||||
"dry_run_hash_verified_count",
|
||||
"receipt_verifier_pass_count",
|
||||
"writer_apply_count",
|
||||
"dry_run_execution_count",
|
||||
"receipt_write_count",
|
||||
"promotion_apply_count",
|
||||
"canonical_runtime_target_read_count",
|
||||
"live_query_count",
|
||||
"reviewer_queue_write_count",
|
||||
"gateway_queue_write_count",
|
||||
"telegram_send_count",
|
||||
"bot_api_call_count",
|
||||
"report_receipt_write_count",
|
||||
"result_capture_write_count",
|
||||
"learning_write_count",
|
||||
"playbook_trust_write_count",
|
||||
"production_write_count",
|
||||
"secret_read_count",
|
||||
"destructive_operation_count",
|
||||
}
|
||||
non_zero = sorted(field for field in zero_counts if truth.get(field) != 0)
|
||||
if non_zero:
|
||||
raise ValueError(f"{label}: dry-run readback live counters must remain zero: {non_zero}")
|
||||
if not truth.get("truth_note"):
|
||||
raise ValueError(f"{label}: readback_truth.truth_note is required")
|
||||
|
||||
|
||||
def _require_readbacks(payload: dict[str, Any], label: str) -> None:
|
||||
cards = payload.get("dry_run_readback_cards") or []
|
||||
required = {
|
||||
"readback_result_capture_writer",
|
||||
"readback_learning_writer",
|
||||
"readback_playbook_trust_writer",
|
||||
"readback_reviewer_queue_writer",
|
||||
"readback_gateway_queue_writer",
|
||||
}
|
||||
ids = {card.get("readback_id") for card in cards}
|
||||
if ids != required:
|
||||
raise ValueError(f"{label}: dry-run readback cards must match {sorted(required)}")
|
||||
for card in cards:
|
||||
readback_id = card.get("readback_id")
|
||||
if card.get("readback_only") is not True:
|
||||
raise ValueError(f"{label}: readback {readback_id} must remain readback-only")
|
||||
if card.get("runtime_write_enabled") is not False:
|
||||
raise ValueError(f"{label}: readback {readback_id} must not enable runtime write")
|
||||
if card.get("dry_run_execution_enabled") is not False:
|
||||
raise ValueError(f"{label}: readback {readback_id} must not enable dry-run execution")
|
||||
if card.get("status") not in {"ready_for_owner_review", "approval_required", "blocked_by_policy"}:
|
||||
raise ValueError(f"{label}: readback {readback_id} status is invalid")
|
||||
for field in {"source_fixture", "receipt_preview", "promotion_candidate", "operator_note"}:
|
||||
if not card.get(field):
|
||||
raise ValueError(f"{label}: readback {readback_id} missing {field}")
|
||||
if not _is_redacted_sha256(card.get("evidence_hash")):
|
||||
raise ValueError(f"{label}: readback {readback_id} must expose redacted evidence_hash")
|
||||
|
||||
|
||||
def _require_receipt_verifiers(payload: dict[str, Any], label: str) -> None:
|
||||
checks = payload.get("receipt_verifier_checks") or []
|
||||
required = {
|
||||
"verify_result_capture_receipt",
|
||||
"verify_learning_receipt",
|
||||
"verify_playbook_trust_receipt",
|
||||
"verify_reviewer_queue_receipt",
|
||||
"verify_gateway_queue_receipt",
|
||||
}
|
||||
ids = {check.get("verifier_id") for check in checks}
|
||||
if ids != required:
|
||||
raise ValueError(f"{label}: receipt verifier checks must match {sorted(required)}")
|
||||
for check in checks:
|
||||
verifier_id = check.get("verifier_id")
|
||||
if check.get("verifier_mode") != "no_write_receipt_verifier":
|
||||
raise ValueError(f"{label}: verifier {verifier_id} must remain no-write")
|
||||
if check.get("live_receipt_enabled") is not False:
|
||||
raise ValueError(f"{label}: verifier {verifier_id} must not enable live receipt")
|
||||
if check.get("required_before_promotion") is not True:
|
||||
raise ValueError(f"{label}: verifier {verifier_id} must be required before promotion")
|
||||
if check.get("status") not in {"ready", "approval_required", "blocked_by_policy"}:
|
||||
raise ValueError(f"{label}: verifier {verifier_id} status is invalid")
|
||||
if not check.get("failure_if_missing"):
|
||||
raise ValueError(f"{label}: verifier {verifier_id} failure_if_missing is required")
|
||||
|
||||
|
||||
def _require_promotion_gates(payload: dict[str, Any], label: str) -> None:
|
||||
gates = payload.get("promotion_readiness_gates") or []
|
||||
required = {
|
||||
"gate_writer_apply_owner_approval",
|
||||
"gate_dry_run_hash",
|
||||
"gate_receipt_verifier",
|
||||
"gate_rollback_owner",
|
||||
"gate_production_write_block",
|
||||
}
|
||||
ids = {gate.get("gate_id") for gate in gates}
|
||||
if ids != required:
|
||||
raise ValueError(f"{label}: promotion readiness gates must match {sorted(required)}")
|
||||
for gate in gates:
|
||||
gate_id = gate.get("gate_id")
|
||||
if gate.get("promotion_allowed") is not False:
|
||||
raise ValueError(f"{label}: promotion gate {gate_id} must not allow promotion")
|
||||
if gate.get("status") not in {"ready_for_owner_review", "approval_required", "blocked_by_policy"}:
|
||||
raise ValueError(f"{label}: promotion gate {gate_id} status is invalid")
|
||||
if not gate.get("required_evidence"):
|
||||
raise ValueError(f"{label}: promotion gate {gate_id} required_evidence is required")
|
||||
|
||||
|
||||
def _require_blocked_writes(payload: dict[str, Any], label: str) -> None:
|
||||
blockers = payload.get("blocked_promotion_writes") or []
|
||||
required = {
|
||||
"blocked_writer_apply",
|
||||
"blocked_dry_run_execution",
|
||||
"blocked_receipt_write",
|
||||
"blocked_result_capture_write",
|
||||
"blocked_gateway_queue_write",
|
||||
"blocked_telegram_send",
|
||||
}
|
||||
ids = {blocker.get("blocker_id") for blocker in blockers}
|
||||
if ids != required:
|
||||
raise ValueError(f"{label}: blocked promotion writes must match {sorted(required)}")
|
||||
for blocker in blockers:
|
||||
blocker_id = blocker.get("blocker_id")
|
||||
if blocker.get("status") not in {"approval_required", "blocked_by_policy"}:
|
||||
raise ValueError(f"{label}: blocker {blocker_id} status is invalid")
|
||||
if blocker.get("severity") not in {"high", "critical"}:
|
||||
raise ValueError(f"{label}: blocker {blocker_id} severity is invalid")
|
||||
if not blocker.get("blocked_action") or not blocker.get("blocked_until"):
|
||||
raise ValueError(f"{label}: blocker {blocker_id} must include blocked_action and blocked_until")
|
||||
if not _is_redacted_sha256(blocker.get("evidence_hash")):
|
||||
raise ValueError(f"{label}: blocker {blocker_id} must expose redacted evidence_hash")
|
||||
|
||||
|
||||
def _require_actions(payload: dict[str, Any], label: str) -> None:
|
||||
actions = payload.get("operator_actions") or []
|
||||
required = {
|
||||
"review_dry_run_readback",
|
||||
"verify_receipt_verifier",
|
||||
"score_promotion_readiness",
|
||||
"prepare_p2_125_owner_packet",
|
||||
"hold_runtime_write_gate",
|
||||
}
|
||||
ids = {action.get("action_id") for action in actions}
|
||||
if ids != required:
|
||||
raise ValueError(f"{label}: operator actions must match {sorted(required)}")
|
||||
for action in actions:
|
||||
action_id = action.get("action_id")
|
||||
if action.get("runtime_write_allowed") is not False:
|
||||
raise ValueError(f"{label}: action {action_id} must not allow runtime write")
|
||||
if not action.get("operator_instruction"):
|
||||
raise ValueError(f"{label}: action {action_id} operator_instruction is required")
|
||||
|
||||
|
||||
def _require_display_redaction(payload: dict[str, Any], label: str) -> None:
|
||||
contract = payload.get("display_redaction_contract") or {}
|
||||
expected = {
|
||||
"redaction_required": True,
|
||||
"raw_prompt_display_allowed": False,
|
||||
"private_reasoning_display_allowed": False,
|
||||
"secret_value_display_allowed": False,
|
||||
"raw_runtime_payload_display_allowed": False,
|
||||
"internal_collaboration_content_display_allowed": False,
|
||||
}
|
||||
mismatches = _mismatches(contract, expected)
|
||||
if mismatches:
|
||||
raise ValueError(f"{label}: display_redaction_contract mismatch: {mismatches}")
|
||||
if not contract.get("frontend_display_policy"):
|
||||
raise ValueError(f"{label}: display_redaction_contract.frontend_display_policy is required")
|
||||
|
||||
|
||||
def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None:
|
||||
rollups = payload.get("rollups") or {}
|
||||
readbacks = payload.get("dry_run_readback_cards") or []
|
||||
verifiers = payload.get("receipt_verifier_checks") or []
|
||||
gates = payload.get("promotion_readiness_gates") or []
|
||||
blockers = payload.get("blocked_promotion_writes") or []
|
||||
actions = payload.get("operator_actions") or []
|
||||
expected = {
|
||||
"dry_run_readback_card_count": len(readbacks),
|
||||
"receipt_verifier_check_count": len(verifiers),
|
||||
"promotion_readiness_gate_count": len(gates),
|
||||
"blocked_promotion_write_count": len(blockers),
|
||||
"operator_action_count": len(actions),
|
||||
"approval_required_readback_count": sum(1 for item in readbacks if item.get("status") == "approval_required"),
|
||||
"blocked_readback_count": sum(1 for item in readbacks if item.get("status") == "blocked_by_policy"),
|
||||
"approval_required_verifier_count": sum(1 for item in verifiers if item.get("status") == "approval_required"),
|
||||
"blocked_verifier_count": sum(1 for item in verifiers if item.get("status") == "blocked_by_policy"),
|
||||
"approval_required_gate_count": sum(1 for item in gates if item.get("status") == "approval_required"),
|
||||
"blocked_gate_count": sum(1 for item in gates if item.get("status") == "blocked_by_policy"),
|
||||
"critical_blocker_count": sum(1 for item in blockers if item.get("severity") == "critical"),
|
||||
"owner_approval_received_count": 0,
|
||||
"dual_approval_received_count": 0,
|
||||
"dry_run_hash_verified_count": 0,
|
||||
"receipt_verifier_pass_count": 0,
|
||||
"writer_apply_count": 0,
|
||||
"dry_run_execution_count": 0,
|
||||
"receipt_write_count": 0,
|
||||
"promotion_apply_count": 0,
|
||||
"canonical_runtime_target_read_count": 0,
|
||||
"live_query_count": 0,
|
||||
"reviewer_queue_write_count": 0,
|
||||
"gateway_queue_write_count": 0,
|
||||
"telegram_send_count": 0,
|
||||
"bot_api_call_count": 0,
|
||||
"report_receipt_write_count": 0,
|
||||
"result_capture_write_count": 0,
|
||||
"learning_write_count": 0,
|
||||
"playbook_trust_write_count": 0,
|
||||
"production_write_count": 0,
|
||||
"secret_read_count": 0,
|
||||
"destructive_operation_count": 0,
|
||||
}
|
||||
mismatches = _mismatches(rollups, expected)
|
||||
if mismatches:
|
||||
raise ValueError(f"{label}: rollup counts mismatch: {mismatches}")
|
||||
|
||||
|
||||
def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None:
|
||||
serialized = json.dumps(payload, ensure_ascii=False)
|
||||
forbidden = {
|
||||
"work_window_transcript",
|
||||
"session_id",
|
||||
"browser_context",
|
||||
"authorization_header",
|
||||
"raw Telegram payload",
|
||||
"private reasoning",
|
||||
"raw prompt",
|
||||
"chain-of-thought",
|
||||
}
|
||||
hits = sorted(term for term in forbidden if term in serialized)
|
||||
if hits:
|
||||
raise ValueError(f"{label}: forbidden display terms present: {hits}")
|
||||
|
||||
|
||||
def _is_redacted_sha256(value: Any) -> bool:
|
||||
if not isinstance(value, str) or not value.startswith("sha256:"):
|
||||
return False
|
||||
digest = value.removeprefix("sha256:")
|
||||
return len(digest) == 64 and all(char in "0123456789abcdef" for char in digest)
|
||||
|
||||
|
||||
def _mismatches(payload: dict[str, Any], expected: dict[str, Any]) -> dict[str, dict[str, Any]]:
|
||||
return {
|
||||
key: {"expected": value, "actual": payload.get(key)}
|
||||
for key, value in expected.items()
|
||||
if payload.get(key) != value
|
||||
}
|
||||
Reference in New Issue
Block a user