feat(agents): expose controlled executor handoff runway

2026-06-27 11:42:21 +08:00
parent fccd8874fc
commit b2b51ecbf2
11 changed files with 2092 additions and 1 deletions
--- a/apps/api/tests/test_ai_agent_controlled_executor_handoff.py
+++ b/apps/api/tests/test_ai_agent_controlled_executor_handoff.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+import copy
+import json
+from pathlib import Path
+
+import pytest
+
+from src.services.ai_agent_controlled_executor_handoff import (
+    load_latest_ai_agent_controlled_executor_handoff,
+)
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+_COMMITTED_SNAPSHOT = (
+    _REPO_ROOT
+    / "docs"
+    / "evaluations"
+    / "ai_agent_controlled_executor_handoff_2026-06-27.json"
+)
+
+
+def test_load_latest_ai_agent_controlled_executor_handoff_reads_newest_file(tmp_path):
+    older = _snapshot(generated_at="2026-06-26T23:55:00+08:00")
+    newer = _snapshot(generated_at="2026-06-27T01:20:00+08:00")
+    (tmp_path / "ai_agent_controlled_executor_handoff_2026-06-26.json").write_text(
+        json.dumps(older),
+        encoding="utf-8",
+    )
+    (tmp_path / "ai_agent_controlled_executor_handoff_2026-06-27.json").write_text(
+        json.dumps(newer),
+        encoding="utf-8",
+    )
+
+    loaded = load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+    assert loaded["generated_at"] == "2026-06-27T01:20:00+08:00"
+    assert loaded["schema_version"] == "ai_agent_controlled_executor_handoff_v1"
+    assert loaded["program_status"]["current_task_id"] == "P2-415"
+    assert loaded["program_status"]["next_task_id"] == "P2-416"
+    assert loaded["program_status"]["read_only_mode"] is True
+    assert loaded["program_status"]["runtime_authority"] == "controlled_executor_handoff_readback_no_live_apply"
+    assert loaded["handoff_truth"]["high_risk_controlled_executor_handoff_ready"] is True
+    assert loaded["handoff_truth"]["critical_break_glass_required"] is True
+    assert loaded["handoff_truth"]["controlled_executor_dispatch_enabled"] is False
+    assert loaded["rollups"]["source_readback_count"] == 8
+    assert loaded["rollups"]["handoff_packet_count"] == 7
+    assert loaded["rollups"]["ready_for_controlled_executor_count"] == 5
+    assert loaded["rollups"]["critical_break_glass_count"] == 2
+    assert loaded["rollups"]["ansible_check_mode_packet_count"] == 3
+    assert loaded["rollups"]["mcp_tool_route_count"] == 7
+    assert loaded["rollups"]["executor_route_count"] == 5
+    assert loaded["rollups"]["verifier_binding_count"] == 5
+    assert loaded["rollups"]["learning_writeback_contract_count"] == 3
+    assert loaded["rollups"]["owner_response_required_count"] == 2
+    assert loaded["rollups"]["missing_check_mode_count"] == 0
+    assert loaded["rollups"]["missing_verifier_count"] == 0
+    assert loaded["rollups"]["controlled_executor_dispatch_count"] == 0
+    assert loaded["rollups"]["live_apply_count"] == 0
+    assert loaded["rollups"]["gateway_queue_write_count"] == 0
+    assert loaded["rollups"]["telegram_send_count"] == 0
+    assert loaded["rollups"]["km_write_count"] == 0
+    assert loaded["rollups"]["playbook_trust_write_count"] == 0
+    assert loaded["rollups"]["production_write_count"] == 0
+    assert loaded["rollups"]["host_write_count"] == 0
+    assert loaded["rollups"]["kubectl_action_count"] == 0
+
+
+def test_ai_agent_controlled_executor_handoff_rejects_high_packet_without_check_mode(tmp_path):
+    snapshot = _snapshot()
+    high_packet = _first_packet(snapshot, "high")
+    high_packet["check_mode_passed"] = False
+    snapshot["rollups"]["missing_check_mode_count"] = 1
+    _write_snapshot(tmp_path, snapshot)
+
+    with pytest.raises(ValueError, match="controlled executor gates"):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def test_ai_agent_controlled_executor_handoff_keeps_high_packet_off_owner_response(tmp_path):
+    snapshot = _snapshot()
+    high_packet = _first_packet(snapshot, "high")
+    high_packet["owner_response_required"] = True
+    snapshot["rollups"]["owner_response_required_count"] = 3
+    _write_snapshot(tmp_path, snapshot)
+
+    with pytest.raises(ValueError, match="owner response"):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def test_ai_agent_controlled_executor_handoff_keeps_critical_on_break_glass(tmp_path):
+    snapshot = _snapshot()
+    critical_packet = _first_packet(snapshot, "critical")
+    critical_packet["controlled_executor_handoff_allowed"] = True
+    _write_snapshot(tmp_path, snapshot)
+
+    with pytest.raises(ValueError, match="critical packet"):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def test_ai_agent_controlled_executor_handoff_blocks_live_apply_rollup(tmp_path):
+    snapshot = _snapshot()
+    snapshot["rollups"]["live_apply_count"] = 1
+    _write_snapshot(tmp_path, snapshot)
+
+    with pytest.raises(ValueError, match="live/write rollup counts"):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def test_ai_agent_controlled_executor_handoff_requires_rollup_consistency(tmp_path):
+    snapshot = _snapshot()
+    snapshot["rollups"]["handoff_packet_count"] = 99
+    _write_snapshot(tmp_path, snapshot)
+
+    with pytest.raises(ValueError, match="rollup counts"):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def test_ai_agent_controlled_executor_handoff_rejects_private_terms(tmp_path):
+    snapshot = _snapshot()
+    snapshot["executor_handoff_packets"][0]["display_name"] = "請把 In app browser 狀態放進前端"
+    _write_snapshot(tmp_path, snapshot)
+
+    with pytest.raises(ValueError, match="forbidden public terms"):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def test_ai_agent_controlled_executor_handoff_fails_when_missing(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        load_latest_ai_agent_controlled_executor_handoff(tmp_path)
+
+
+def _snapshot(*, generated_at: str = "2026-06-27T01:20:00+08:00") -> dict:
+    payload = json.loads(_COMMITTED_SNAPSHOT.read_text(encoding="utf-8"))
+    cloned = copy.deepcopy(payload)
+    cloned["generated_at"] = generated_at
+    return cloned
+
+
+def _first_packet(snapshot: dict, risk_tier: str) -> dict:
+    return next(packet for packet in snapshot["executor_handoff_packets"] if packet["risk_tier"] == risk_tier)
+
+
+def _write_snapshot(path: Path, snapshot: dict) -> None:
+    (path / "ai_agent_controlled_executor_handoff_2026-06-27.json").write_text(
+        json.dumps(snapshot),
+        encoding="utf-8",
+    )
--- a/apps/api/tests/test_ai_agent_controlled_executor_handoff_api.py
+++ b/apps/api/tests/test_ai_agent_controlled_executor_handoff_api.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from src.api.v1.agents import router
+
+
+def test_ai_agent_controlled_executor_handoff_endpoint_returns_committed_snapshot():
+    app = FastAPI()
+    app.include_router(router, prefix="/api/v1")
+    client = TestClient(app)
+
+    response = client.get("/api/v1/agents/agent-controlled-executor-handoff")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["schema_version"] == "ai_agent_controlled_executor_handoff_v1"
+    assert data["program_status"]["current_task_id"] == "P2-415"
+    assert data["program_status"]["next_task_id"] == "P2-416"
+    assert data["program_status"]["read_only_mode"] is True
+    assert data["program_status"]["runtime_authority"] == "controlled_executor_handoff_readback_no_live_apply"
+    assert data["handoff_truth"]["high_risk_controlled_executor_handoff_ready"] is True
+    assert data["handoff_truth"]["high_risk_owner_review_required"] is False
+    assert data["handoff_truth"]["critical_break_glass_required"] is True
+    assert data["handoff_truth"]["controlled_executor_dispatch_enabled"] is False
+    assert data["rollups"]["source_readback_count"] == len(data["source_readbacks"]) == 8
+    assert data["rollups"]["handoff_packet_count"] == len(data["executor_handoff_packets"]) == 7
+    assert data["rollups"]["ready_for_controlled_executor_count"] == 5
+    assert data["rollups"]["critical_break_glass_count"] == 2
+    assert data["rollups"]["high_risk_packet_count"] == 5
+    assert data["rollups"]["critical_packet_count"] == 2
+    assert data["rollups"]["executor_route_count"] == len(data["executor_routes"]) == 5
+    assert data["rollups"]["verifier_binding_count"] == len(data["verifier_bindings"]) == 5
+    assert data["rollups"]["learning_writeback_contract_count"] == len(data["learning_writeback_contracts"]) == 3
+    assert data["rollups"]["owner_response_required_count"] == 2
+    assert data["rollups"]["missing_check_mode_count"] == 0
+    assert data["rollups"]["missing_rollback_count"] == 0
+    assert data["rollups"]["missing_verifier_count"] == 0
+    assert data["rollups"]["missing_telegram_evidence_count"] == 0
+    assert data["rollups"]["missing_learning_writeback_count"] == 0
+    assert data["rollups"]["controlled_executor_dispatch_count"] == 0
+    assert data["rollups"]["live_apply_count"] == 0
+    assert data["rollups"]["gateway_queue_write_count"] == 0
+    assert data["rollups"]["telegram_send_count"] == 0
+    assert data["rollups"]["bot_api_call_count"] == 0
+    assert data["rollups"]["km_write_count"] == 0
+    assert data["rollups"]["playbook_trust_write_count"] == 0
+    assert data["rollups"]["production_write_count"] == 0
+    assert data["rollups"]["secret_read_count"] == 0
+    assert data["rollups"]["paid_api_call_count"] == 0
+    assert data["rollups"]["host_write_count"] == 0
+    assert data["rollups"]["kubectl_action_count"] == 0
+    assert data["rollups"]["destructive_operation_count"] == 0
+    assert all(
+        packet["controlled_executor_handoff_allowed"] is True
+        for packet in data["executor_handoff_packets"]
+        if packet["risk_tier"] == "high"
+    )
+    assert all(
+        packet["owner_response_required"] is False
+        for packet in data["executor_handoff_packets"]
+        if packet["risk_tier"] == "high"
+    )
+    assert all(
+        packet["handoff_status"] == "critical_break_glass_only"
+        for packet in data["executor_handoff_packets"]
+        if packet["risk_tier"] == "critical"
+    )
+    assert data["activation_boundaries"]["controlled_executor_handoff_preview_allowed"] is True
+    assert data["activation_boundaries"]["controlled_executor_dispatch_enabled"] is False
+    assert data["activation_boundaries"]["live_apply_enabled"] is False
+    assert data["display_redaction_contract"]["redaction_required"] is True
+    assert data["display_redaction_contract"]["work_window_transcript_display_allowed"] is False