feat(agents): expose controlled executor handoff runway
Some checks failed
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m47s
CD Pipeline / build-and-deploy (push) Successful in 6m20s
CD Pipeline / post-deploy-checks (push) Successful in 2m18s
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-27 11:42:21 +08:00
parent fccd8874fc
commit b2b51ecbf2
11 changed files with 2092 additions and 1 deletions

View File

@@ -0,0 +1,147 @@
from __future__ import annotations
import copy
import json
from pathlib import Path
import pytest
from src.services.ai_agent_controlled_executor_handoff import (
load_latest_ai_agent_controlled_executor_handoff,
)
_REPO_ROOT = Path(__file__).resolve().parents[3]
_COMMITTED_SNAPSHOT = (
_REPO_ROOT
/ "docs"
/ "evaluations"
/ "ai_agent_controlled_executor_handoff_2026-06-27.json"
)
def test_load_latest_ai_agent_controlled_executor_handoff_reads_newest_file(tmp_path):
older = _snapshot(generated_at="2026-06-26T23:55:00+08:00")
newer = _snapshot(generated_at="2026-06-27T01:20:00+08:00")
(tmp_path / "ai_agent_controlled_executor_handoff_2026-06-26.json").write_text(
json.dumps(older),
encoding="utf-8",
)
(tmp_path / "ai_agent_controlled_executor_handoff_2026-06-27.json").write_text(
json.dumps(newer),
encoding="utf-8",
)
loaded = load_latest_ai_agent_controlled_executor_handoff(tmp_path)
assert loaded["generated_at"] == "2026-06-27T01:20:00+08:00"
assert loaded["schema_version"] == "ai_agent_controlled_executor_handoff_v1"
assert loaded["program_status"]["current_task_id"] == "P2-415"
assert loaded["program_status"]["next_task_id"] == "P2-416"
assert loaded["program_status"]["read_only_mode"] is True
assert loaded["program_status"]["runtime_authority"] == "controlled_executor_handoff_readback_no_live_apply"
assert loaded["handoff_truth"]["high_risk_controlled_executor_handoff_ready"] is True
assert loaded["handoff_truth"]["critical_break_glass_required"] is True
assert loaded["handoff_truth"]["controlled_executor_dispatch_enabled"] is False
assert loaded["rollups"]["source_readback_count"] == 8
assert loaded["rollups"]["handoff_packet_count"] == 7
assert loaded["rollups"]["ready_for_controlled_executor_count"] == 5
assert loaded["rollups"]["critical_break_glass_count"] == 2
assert loaded["rollups"]["ansible_check_mode_packet_count"] == 3
assert loaded["rollups"]["mcp_tool_route_count"] == 7
assert loaded["rollups"]["executor_route_count"] == 5
assert loaded["rollups"]["verifier_binding_count"] == 5
assert loaded["rollups"]["learning_writeback_contract_count"] == 3
assert loaded["rollups"]["owner_response_required_count"] == 2
assert loaded["rollups"]["missing_check_mode_count"] == 0
assert loaded["rollups"]["missing_verifier_count"] == 0
assert loaded["rollups"]["controlled_executor_dispatch_count"] == 0
assert loaded["rollups"]["live_apply_count"] == 0
assert loaded["rollups"]["gateway_queue_write_count"] == 0
assert loaded["rollups"]["telegram_send_count"] == 0
assert loaded["rollups"]["km_write_count"] == 0
assert loaded["rollups"]["playbook_trust_write_count"] == 0
assert loaded["rollups"]["production_write_count"] == 0
assert loaded["rollups"]["host_write_count"] == 0
assert loaded["rollups"]["kubectl_action_count"] == 0
def test_ai_agent_controlled_executor_handoff_rejects_high_packet_without_check_mode(tmp_path):
snapshot = _snapshot()
high_packet = _first_packet(snapshot, "high")
high_packet["check_mode_passed"] = False
snapshot["rollups"]["missing_check_mode_count"] = 1
_write_snapshot(tmp_path, snapshot)
with pytest.raises(ValueError, match="controlled executor gates"):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def test_ai_agent_controlled_executor_handoff_keeps_high_packet_off_owner_response(tmp_path):
snapshot = _snapshot()
high_packet = _first_packet(snapshot, "high")
high_packet["owner_response_required"] = True
snapshot["rollups"]["owner_response_required_count"] = 3
_write_snapshot(tmp_path, snapshot)
with pytest.raises(ValueError, match="owner response"):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def test_ai_agent_controlled_executor_handoff_keeps_critical_on_break_glass(tmp_path):
snapshot = _snapshot()
critical_packet = _first_packet(snapshot, "critical")
critical_packet["controlled_executor_handoff_allowed"] = True
_write_snapshot(tmp_path, snapshot)
with pytest.raises(ValueError, match="critical packet"):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def test_ai_agent_controlled_executor_handoff_blocks_live_apply_rollup(tmp_path):
snapshot = _snapshot()
snapshot["rollups"]["live_apply_count"] = 1
_write_snapshot(tmp_path, snapshot)
with pytest.raises(ValueError, match="live/write rollup counts"):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def test_ai_agent_controlled_executor_handoff_requires_rollup_consistency(tmp_path):
snapshot = _snapshot()
snapshot["rollups"]["handoff_packet_count"] = 99
_write_snapshot(tmp_path, snapshot)
with pytest.raises(ValueError, match="rollup counts"):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def test_ai_agent_controlled_executor_handoff_rejects_private_terms(tmp_path):
snapshot = _snapshot()
snapshot["executor_handoff_packets"][0]["display_name"] = "請把 In app browser 狀態放進前端"
_write_snapshot(tmp_path, snapshot)
with pytest.raises(ValueError, match="forbidden public terms"):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def test_ai_agent_controlled_executor_handoff_fails_when_missing(tmp_path):
with pytest.raises(FileNotFoundError):
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
def _snapshot(*, generated_at: str = "2026-06-27T01:20:00+08:00") -> dict:
payload = json.loads(_COMMITTED_SNAPSHOT.read_text(encoding="utf-8"))
cloned = copy.deepcopy(payload)
cloned["generated_at"] = generated_at
return cloned
def _first_packet(snapshot: dict, risk_tier: str) -> dict:
return next(packet for packet in snapshot["executor_handoff_packets"] if packet["risk_tier"] == risk_tier)
def _write_snapshot(path: Path, snapshot: dict) -> None:
(path / "ai_agent_controlled_executor_handoff_2026-06-27.json").write_text(
json.dumps(snapshot),
encoding="utf-8",
)

View File

@@ -0,0 +1,74 @@
from __future__ import annotations
from fastapi import FastAPI
from fastapi.testclient import TestClient
from src.api.v1.agents import router
def test_ai_agent_controlled_executor_handoff_endpoint_returns_committed_snapshot():
app = FastAPI()
app.include_router(router, prefix="/api/v1")
client = TestClient(app)
response = client.get("/api/v1/agents/agent-controlled-executor-handoff")
assert response.status_code == 200
data = response.json()
assert data["schema_version"] == "ai_agent_controlled_executor_handoff_v1"
assert data["program_status"]["current_task_id"] == "P2-415"
assert data["program_status"]["next_task_id"] == "P2-416"
assert data["program_status"]["read_only_mode"] is True
assert data["program_status"]["runtime_authority"] == "controlled_executor_handoff_readback_no_live_apply"
assert data["handoff_truth"]["high_risk_controlled_executor_handoff_ready"] is True
assert data["handoff_truth"]["high_risk_owner_review_required"] is False
assert data["handoff_truth"]["critical_break_glass_required"] is True
assert data["handoff_truth"]["controlled_executor_dispatch_enabled"] is False
assert data["rollups"]["source_readback_count"] == len(data["source_readbacks"]) == 8
assert data["rollups"]["handoff_packet_count"] == len(data["executor_handoff_packets"]) == 7
assert data["rollups"]["ready_for_controlled_executor_count"] == 5
assert data["rollups"]["critical_break_glass_count"] == 2
assert data["rollups"]["high_risk_packet_count"] == 5
assert data["rollups"]["critical_packet_count"] == 2
assert data["rollups"]["executor_route_count"] == len(data["executor_routes"]) == 5
assert data["rollups"]["verifier_binding_count"] == len(data["verifier_bindings"]) == 5
assert data["rollups"]["learning_writeback_contract_count"] == len(data["learning_writeback_contracts"]) == 3
assert data["rollups"]["owner_response_required_count"] == 2
assert data["rollups"]["missing_check_mode_count"] == 0
assert data["rollups"]["missing_rollback_count"] == 0
assert data["rollups"]["missing_verifier_count"] == 0
assert data["rollups"]["missing_telegram_evidence_count"] == 0
assert data["rollups"]["missing_learning_writeback_count"] == 0
assert data["rollups"]["controlled_executor_dispatch_count"] == 0
assert data["rollups"]["live_apply_count"] == 0
assert data["rollups"]["gateway_queue_write_count"] == 0
assert data["rollups"]["telegram_send_count"] == 0
assert data["rollups"]["bot_api_call_count"] == 0
assert data["rollups"]["km_write_count"] == 0
assert data["rollups"]["playbook_trust_write_count"] == 0
assert data["rollups"]["production_write_count"] == 0
assert data["rollups"]["secret_read_count"] == 0
assert data["rollups"]["paid_api_call_count"] == 0
assert data["rollups"]["host_write_count"] == 0
assert data["rollups"]["kubectl_action_count"] == 0
assert data["rollups"]["destructive_operation_count"] == 0
assert all(
packet["controlled_executor_handoff_allowed"] is True
for packet in data["executor_handoff_packets"]
if packet["risk_tier"] == "high"
)
assert all(
packet["owner_response_required"] is False
for packet in data["executor_handoff_packets"]
if packet["risk_tier"] == "high"
)
assert all(
packet["handoff_status"] == "critical_break_glass_only"
for packet in data["executor_handoff_packets"]
if packet["risk_tier"] == "critical"
)
assert data["activation_boundaries"]["controlled_executor_handoff_preview_allowed"] is True
assert data["activation_boundaries"]["controlled_executor_dispatch_enabled"] is False
assert data["activation_boundaries"]["live_apply_enabled"] is False
assert data["display_redaction_contract"]["redaction_required"] is True
assert data["display_redaction_contract"]["work_window_transcript_display_allowed"] is False