feat(governance): 新增 result capture writer dry-run readback

2026-06-14 00:39:39 +08:00
parent c5cf6d3cc0
commit cdc6fe8737
10 changed files with 1605 additions and 1 deletions
--- a/apps/api/src/api/v1/agents.py
+++ b/apps/api/src/api/v1/agents.py
@@ -133,6 +133,9 @@ from src.services.ai_agent_result_capture_writer_implementation_review import (
 from src.services.ai_agent_result_capture_writer_dry_run_fixture import (
    load_latest_ai_agent_result_capture_writer_dry_run_fixture,
 )
+from src.services.ai_agent_result_capture_writer_dry_run_readback import (
+    load_latest_ai_agent_result_capture_writer_dry_run_readback,
+)
 from src.services.ai_agent_runtime_readback_approval_package import (
    load_latest_ai_agent_runtime_readback_approval_package,
 )
@@ -1785,6 +1788,36 @@ async def get_agent_result_capture_writer_dry_run_fixture() -> dict[str, Any]:
        ) from exc


+@router.get(
+    "/agent-result-capture-writer-dry-run-readback",
+    response_model=dict[str, Any],
+    summary="取得 AI Agent result capture writer dry-run readback",
+    description=(
+        "讀取最新已提交的 P2-124 result capture writer dry-run readback；"
+        "此端點只回傳 dry-run readback、receipt verifier、promotion readiness、"
+        "blocked promotion write 與 operator handoff，不套用 writer、不執行 dry-run、"
+        "不寫 receipt、不寫 result capture、learning、PlayBook trust、reviewer queue、"
+        "Gateway queue，不送 Telegram、不呼叫 Bot API、不讀 secret。"
+    ),
+)
+async def get_agent_result_capture_writer_dry_run_readback() -> dict[str, Any]:
+    """Return the latest read-only result capture writer dry-run readback package."""
+    try:
+        payload = await asyncio.to_thread(load_latest_ai_agent_result_capture_writer_dry_run_readback)
+        return redact_public_lan_topology(payload)
+    except FileNotFoundError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(exc),
+        ) from exc
+    except (json.JSONDecodeError, ValueError) as exc:
+        logger.error("ai_agent_result_capture_writer_dry_run_readback_invalid", error=str(exc))
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="AI Agent result capture writer dry-run readback 無效",
+        ) from exc
+
+
@router.get(
    "/agent-owner-approved-fixture-dry-run",
    response_model=dict[str, Any],
--- a/apps/api/src/services/ai_agent_result_capture_writer_dry_run_readback.py
+++ b/apps/api/src/services/ai_agent_result_capture_writer_dry_run_readback.py
@@ -0,0 +1,390 @@
+"""
+AI Agent result capture writer dry-run readback snapshot.
+
+Loads the latest committed P2-124 dry-run readback / promotion readiness package.
+This module validates committed evidence only; it never applies writers, executes
+dry-runs, writes receipts, writes result captures, writes learning records,
+updates PlayBook trust, writes reviewer / Gateway queues, sends Telegram
+messages, reads canonical runtime targets, reads secrets, or performs
+destructive operations.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from src.services.snapshot_paths import default_evaluations_dir
+
+_DEFAULT_EVALUATIONS_DIR = default_evaluations_dir(Path(__file__))
+_SNAPSHOT_PATTERN = "ai_agent_result_capture_writer_dry_run_readback_*.json"
+_SCHEMA_VERSION = "ai_agent_result_capture_writer_dry_run_readback_v1"
+_RUNTIME_AUTHORITY = "result_capture_writer_dry_run_readback_only_no_live_write"
+
+
+def load_latest_ai_agent_result_capture_writer_dry_run_readback(
+    evaluations_dir: Path | None = None,
+) -> dict[str, Any]:
+    """Load the newest committed result capture writer dry-run readback package."""
+    directory = evaluations_dir or _DEFAULT_EVALUATIONS_DIR
+    candidates = sorted(directory.glob(_SNAPSHOT_PATTERN))
+    if not candidates:
+        raise FileNotFoundError(f"no AI Agent result capture writer dry-run readback snapshots found in {directory}")
+
+    latest = candidates[-1]
+    with latest.open(encoding="utf-8") as handle:
+        payload = json.load(handle)
+
+    if not isinstance(payload, dict):
+        raise ValueError(f"{latest}: expected JSON object")
+
+    label = str(latest)
+    _require_schema(payload, label)
+    _require_prior(payload, label)
+    _require_truth(payload, label)
+    _require_readbacks(payload, label)
+    _require_receipt_verifiers(payload, label)
+    _require_promotion_gates(payload, label)
+    _require_blocked_writes(payload, label)
+    _require_actions(payload, label)
+    _require_display_redaction(payload, label)
+    _require_no_forbidden_display_terms(payload, label)
+    _require_rollup_consistency(payload, label)
+    return payload
+
+
+def _require_schema(payload: dict[str, Any], label: str) -> None:
+    if payload.get("schema_version") != _SCHEMA_VERSION:
+        raise ValueError(f"{label}: expected schema_version={_SCHEMA_VERSION}")
+    status = payload.get("program_status") or {}
+    expected = {
+        "current_priority": "P2",
+        "current_task_id": "P2-124",
+        "next_task_id": "P2-125",
+        "read_only_mode": True,
+        "runtime_authority": _RUNTIME_AUTHORITY,
+        "overall_completion_percent": 100,
+    }
+    mismatches = _mismatches(status, expected)
+    if mismatches:
+        raise ValueError(f"{label}: program_status mismatch: {mismatches}")
+    if not status.get("status_note"):
+        raise ValueError(f"{label}: program_status.status_note is required")
+
+
+def _require_prior(payload: dict[str, Any], label: str) -> None:
+    prior = payload.get("prior_result_capture_writer_dry_run_fixture") or {}
+    expected = {
+        "schema_version": "ai_agent_result_capture_writer_dry_run_fixture_v1",
+        "writer_dry_run_fixture_count": 5,
+        "receipt_preview_count": 5,
+        "idempotency_replay_check_count": 5,
+        "rollback_rehearsal_count": 5,
+        "blocked_runtime_write_count": 6,
+        "operator_action_count": 5,
+        "writer_apply_count": 0,
+        "dry_run_execution_count": 0,
+        "receipt_write_count": 0,
+        "result_capture_write_count": 0,
+        "learning_write_count": 0,
+        "playbook_trust_write_count": 0,
+        "reviewer_queue_write_count": 0,
+        "gateway_queue_write_count": 0,
+        "telegram_send_count": 0,
+        "bot_api_call_count": 0,
+        "report_receipt_write_count": 0,
+    }
+    mismatches = _mismatches(prior, expected)
+    if mismatches:
+        raise ValueError(f"{label}: prior_result_capture_writer_dry_run_fixture mismatch: {mismatches}")
+    if not prior.get("readiness_note"):
+        raise ValueError(f"{label}: prior_result_capture_writer_dry_run_fixture.readiness_note is required")
+
+
+def _require_truth(payload: dict[str, Any], label: str) -> None:
+    truth = payload.get("readback_truth") or {}
+    required_true = {
+        "p2_123_fixture_loaded",
+        "dry_run_readback_ready",
+        "receipt_verifier_ready",
+        "promotion_readiness_ready",
+        "fixture_only_mode",
+        "owner_approval_required",
+        "dual_approval_required",
+        "dry_run_hash_required",
+        "receipt_verifier_required",
+        "promotion_owner_required",
+        "rollback_owner_required",
+    }
+    missing = sorted(field for field in required_true if truth.get(field) is not True)
+    if missing:
+        raise ValueError(f"{label}: readback ready flags must remain true: {missing}")
+    required_false = {
+        "writer_apply_enabled",
+        "dry_run_execution_enabled",
+        "receipt_write_enabled",
+        "promotion_apply_enabled",
+        "canonical_runtime_target_read_enabled",
+        "live_query_enabled",
+        "reviewer_queue_write_enabled",
+        "gateway_queue_write_enabled",
+        "telegram_send_enabled",
+        "bot_api_call_enabled",
+        "report_receipt_write_enabled",
+        "result_capture_write_enabled",
+        "learning_write_enabled",
+        "playbook_trust_write_enabled",
+        "production_write_enabled",
+        "secret_read_enabled",
+        "destructive_operation_enabled",
+    }
+    unsafe = sorted(field for field in required_false if truth.get(field) is not False)
+    if unsafe:
+        raise ValueError(f"{label}: live read/send/write flags must remain false: {unsafe}")
+    zero_counts = {
+        "owner_approval_received_count",
+        "dual_approval_received_count",
+        "dry_run_hash_verified_count",
+        "receipt_verifier_pass_count",
+        "writer_apply_count",
+        "dry_run_execution_count",
+        "receipt_write_count",
+        "promotion_apply_count",
+        "canonical_runtime_target_read_count",
+        "live_query_count",
+        "reviewer_queue_write_count",
+        "gateway_queue_write_count",
+        "telegram_send_count",
+        "bot_api_call_count",
+        "report_receipt_write_count",
+        "result_capture_write_count",
+        "learning_write_count",
+        "playbook_trust_write_count",
+        "production_write_count",
+        "secret_read_count",
+        "destructive_operation_count",
+    }
+    non_zero = sorted(field for field in zero_counts if truth.get(field) != 0)
+    if non_zero:
+        raise ValueError(f"{label}: dry-run readback live counters must remain zero: {non_zero}")
+    if not truth.get("truth_note"):
+        raise ValueError(f"{label}: readback_truth.truth_note is required")
+
+
+def _require_readbacks(payload: dict[str, Any], label: str) -> None:
+    cards = payload.get("dry_run_readback_cards") or []
+    required = {
+        "readback_result_capture_writer",
+        "readback_learning_writer",
+        "readback_playbook_trust_writer",
+        "readback_reviewer_queue_writer",
+        "readback_gateway_queue_writer",
+    }
+    ids = {card.get("readback_id") for card in cards}
+    if ids != required:
+        raise ValueError(f"{label}: dry-run readback cards must match {sorted(required)}")
+    for card in cards:
+        readback_id = card.get("readback_id")
+        if card.get("readback_only") is not True:
+            raise ValueError(f"{label}: readback {readback_id} must remain readback-only")
+        if card.get("runtime_write_enabled") is not False:
+            raise ValueError(f"{label}: readback {readback_id} must not enable runtime write")
+        if card.get("dry_run_execution_enabled") is not False:
+            raise ValueError(f"{label}: readback {readback_id} must not enable dry-run execution")
+        if card.get("status") not in {"ready_for_owner_review", "approval_required", "blocked_by_policy"}:
+            raise ValueError(f"{label}: readback {readback_id} status is invalid")
+        for field in {"source_fixture", "receipt_preview", "promotion_candidate", "operator_note"}:
+            if not card.get(field):
+                raise ValueError(f"{label}: readback {readback_id} missing {field}")
+        if not _is_redacted_sha256(card.get("evidence_hash")):
+            raise ValueError(f"{label}: readback {readback_id} must expose redacted evidence_hash")
+
+
+def _require_receipt_verifiers(payload: dict[str, Any], label: str) -> None:
+    checks = payload.get("receipt_verifier_checks") or []
+    required = {
+        "verify_result_capture_receipt",
+        "verify_learning_receipt",
+        "verify_playbook_trust_receipt",
+        "verify_reviewer_queue_receipt",
+        "verify_gateway_queue_receipt",
+    }
+    ids = {check.get("verifier_id") for check in checks}
+    if ids != required:
+        raise ValueError(f"{label}: receipt verifier checks must match {sorted(required)}")
+    for check in checks:
+        verifier_id = check.get("verifier_id")
+        if check.get("verifier_mode") != "no_write_receipt_verifier":
+            raise ValueError(f"{label}: verifier {verifier_id} must remain no-write")
+        if check.get("live_receipt_enabled") is not False:
+            raise ValueError(f"{label}: verifier {verifier_id} must not enable live receipt")
+        if check.get("required_before_promotion") is not True:
+            raise ValueError(f"{label}: verifier {verifier_id} must be required before promotion")
+        if check.get("status") not in {"ready", "approval_required", "blocked_by_policy"}:
+            raise ValueError(f"{label}: verifier {verifier_id} status is invalid")
+        if not check.get("failure_if_missing"):
+            raise ValueError(f"{label}: verifier {verifier_id} failure_if_missing is required")
+
+
+def _require_promotion_gates(payload: dict[str, Any], label: str) -> None:
+    gates = payload.get("promotion_readiness_gates") or []
+    required = {
+        "gate_writer_apply_owner_approval",
+        "gate_dry_run_hash",
+        "gate_receipt_verifier",
+        "gate_rollback_owner",
+        "gate_production_write_block",
+    }
+    ids = {gate.get("gate_id") for gate in gates}
+    if ids != required:
+        raise ValueError(f"{label}: promotion readiness gates must match {sorted(required)}")
+    for gate in gates:
+        gate_id = gate.get("gate_id")
+        if gate.get("promotion_allowed") is not False:
+            raise ValueError(f"{label}: promotion gate {gate_id} must not allow promotion")
+        if gate.get("status") not in {"ready_for_owner_review", "approval_required", "blocked_by_policy"}:
+            raise ValueError(f"{label}: promotion gate {gate_id} status is invalid")
+        if not gate.get("required_evidence"):
+            raise ValueError(f"{label}: promotion gate {gate_id} required_evidence is required")
+
+
+def _require_blocked_writes(payload: dict[str, Any], label: str) -> None:
+    blockers = payload.get("blocked_promotion_writes") or []
+    required = {
+        "blocked_writer_apply",
+        "blocked_dry_run_execution",
+        "blocked_receipt_write",
+        "blocked_result_capture_write",
+        "blocked_gateway_queue_write",
+        "blocked_telegram_send",
+    }
+    ids = {blocker.get("blocker_id") for blocker in blockers}
+    if ids != required:
+        raise ValueError(f"{label}: blocked promotion writes must match {sorted(required)}")
+    for blocker in blockers:
+        blocker_id = blocker.get("blocker_id")
+        if blocker.get("status") not in {"approval_required", "blocked_by_policy"}:
+            raise ValueError(f"{label}: blocker {blocker_id} status is invalid")
+        if blocker.get("severity") not in {"high", "critical"}:
+            raise ValueError(f"{label}: blocker {blocker_id} severity is invalid")
+        if not blocker.get("blocked_action") or not blocker.get("blocked_until"):
+            raise ValueError(f"{label}: blocker {blocker_id} must include blocked_action and blocked_until")
+        if not _is_redacted_sha256(blocker.get("evidence_hash")):
+            raise ValueError(f"{label}: blocker {blocker_id} must expose redacted evidence_hash")
+
+
+def _require_actions(payload: dict[str, Any], label: str) -> None:
+    actions = payload.get("operator_actions") or []
+    required = {
+        "review_dry_run_readback",
+        "verify_receipt_verifier",
+        "score_promotion_readiness",
+        "prepare_p2_125_owner_packet",
+        "hold_runtime_write_gate",
+    }
+    ids = {action.get("action_id") for action in actions}
+    if ids != required:
+        raise ValueError(f"{label}: operator actions must match {sorted(required)}")
+    for action in actions:
+        action_id = action.get("action_id")
+        if action.get("runtime_write_allowed") is not False:
+            raise ValueError(f"{label}: action {action_id} must not allow runtime write")
+        if not action.get("operator_instruction"):
+            raise ValueError(f"{label}: action {action_id} operator_instruction is required")
+
+
+def _require_display_redaction(payload: dict[str, Any], label: str) -> None:
+    contract = payload.get("display_redaction_contract") or {}
+    expected = {
+        "redaction_required": True,
+        "raw_prompt_display_allowed": False,
+        "private_reasoning_display_allowed": False,
+        "secret_value_display_allowed": False,
+        "raw_runtime_payload_display_allowed": False,
+        "internal_collaboration_content_display_allowed": False,
+    }
+    mismatches = _mismatches(contract, expected)
+    if mismatches:
+        raise ValueError(f"{label}: display_redaction_contract mismatch: {mismatches}")
+    if not contract.get("frontend_display_policy"):
+        raise ValueError(f"{label}: display_redaction_contract.frontend_display_policy is required")
+
+
+def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None:
+    rollups = payload.get("rollups") or {}
+    readbacks = payload.get("dry_run_readback_cards") or []
+    verifiers = payload.get("receipt_verifier_checks") or []
+    gates = payload.get("promotion_readiness_gates") or []
+    blockers = payload.get("blocked_promotion_writes") or []
+    actions = payload.get("operator_actions") or []
+    expected = {
+        "dry_run_readback_card_count": len(readbacks),
+        "receipt_verifier_check_count": len(verifiers),
+        "promotion_readiness_gate_count": len(gates),
+        "blocked_promotion_write_count": len(blockers),
+        "operator_action_count": len(actions),
+        "approval_required_readback_count": sum(1 for item in readbacks if item.get("status") == "approval_required"),
+        "blocked_readback_count": sum(1 for item in readbacks if item.get("status") == "blocked_by_policy"),
+        "approval_required_verifier_count": sum(1 for item in verifiers if item.get("status") == "approval_required"),
+        "blocked_verifier_count": sum(1 for item in verifiers if item.get("status") == "blocked_by_policy"),
+        "approval_required_gate_count": sum(1 for item in gates if item.get("status") == "approval_required"),
+        "blocked_gate_count": sum(1 for item in gates if item.get("status") == "blocked_by_policy"),
+        "critical_blocker_count": sum(1 for item in blockers if item.get("severity") == "critical"),
+        "owner_approval_received_count": 0,
+        "dual_approval_received_count": 0,
+        "dry_run_hash_verified_count": 0,
+        "receipt_verifier_pass_count": 0,
+        "writer_apply_count": 0,
+        "dry_run_execution_count": 0,
+        "receipt_write_count": 0,
+        "promotion_apply_count": 0,
+        "canonical_runtime_target_read_count": 0,
+        "live_query_count": 0,
+        "reviewer_queue_write_count": 0,
+        "gateway_queue_write_count": 0,
+        "telegram_send_count": 0,
+        "bot_api_call_count": 0,
+        "report_receipt_write_count": 0,
+        "result_capture_write_count": 0,
+        "learning_write_count": 0,
+        "playbook_trust_write_count": 0,
+        "production_write_count": 0,
+        "secret_read_count": 0,
+        "destructive_operation_count": 0,
+    }
+    mismatches = _mismatches(rollups, expected)
+    if mismatches:
+        raise ValueError(f"{label}: rollup counts mismatch: {mismatches}")
+
+
+def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None:
+    serialized = json.dumps(payload, ensure_ascii=False)
+    forbidden = {
+        "work_window_transcript",
+        "session_id",
+        "browser_context",
+        "authorization_header",
+        "raw Telegram payload",
+        "private reasoning",
+        "raw prompt",
+        "chain-of-thought",
+    }
+    hits = sorted(term for term in forbidden if term in serialized)
+    if hits:
+        raise ValueError(f"{label}: forbidden display terms present: {hits}")
+
+
+def _is_redacted_sha256(value: Any) -> bool:
+    if not isinstance(value, str) or not value.startswith("sha256:"):
+        return False
+    digest = value.removeprefix("sha256:")
+    return len(digest) == 64 and all(char in "0123456789abcdef" for char in digest)
+
+
+def _mismatches(payload: dict[str, Any], expected: dict[str, Any]) -> dict[str, dict[str, Any]]:
+    return {
+        key: {"expected": value, "actual": payload.get(key)}
+        for key, value in expected.items()
+        if payload.get(key) != value
+    }