feat(agents): expose controlled executor handoff runway
Some checks failed
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m47s
CD Pipeline / build-and-deploy (push) Successful in 6m20s
CD Pipeline / post-deploy-checks (push) Successful in 2m18s
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
Some checks failed
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m47s
CD Pipeline / build-and-deploy (push) Successful in 6m20s
CD Pipeline / post-deploy-checks (push) Successful in 2m18s
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
This commit is contained in:
147
apps/api/tests/test_ai_agent_controlled_executor_handoff.py
Normal file
147
apps/api/tests/test_ai_agent_controlled_executor_handoff.py
Normal file
@@ -0,0 +1,147 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from src.services.ai_agent_controlled_executor_handoff import (
|
||||
load_latest_ai_agent_controlled_executor_handoff,
|
||||
)
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
_COMMITTED_SNAPSHOT = (
|
||||
_REPO_ROOT
|
||||
/ "docs"
|
||||
/ "evaluations"
|
||||
/ "ai_agent_controlled_executor_handoff_2026-06-27.json"
|
||||
)
|
||||
|
||||
|
||||
def test_load_latest_ai_agent_controlled_executor_handoff_reads_newest_file(tmp_path):
|
||||
older = _snapshot(generated_at="2026-06-26T23:55:00+08:00")
|
||||
newer = _snapshot(generated_at="2026-06-27T01:20:00+08:00")
|
||||
(tmp_path / "ai_agent_controlled_executor_handoff_2026-06-26.json").write_text(
|
||||
json.dumps(older),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(tmp_path / "ai_agent_controlled_executor_handoff_2026-06-27.json").write_text(
|
||||
json.dumps(newer),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
loaded = load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
assert loaded["generated_at"] == "2026-06-27T01:20:00+08:00"
|
||||
assert loaded["schema_version"] == "ai_agent_controlled_executor_handoff_v1"
|
||||
assert loaded["program_status"]["current_task_id"] == "P2-415"
|
||||
assert loaded["program_status"]["next_task_id"] == "P2-416"
|
||||
assert loaded["program_status"]["read_only_mode"] is True
|
||||
assert loaded["program_status"]["runtime_authority"] == "controlled_executor_handoff_readback_no_live_apply"
|
||||
assert loaded["handoff_truth"]["high_risk_controlled_executor_handoff_ready"] is True
|
||||
assert loaded["handoff_truth"]["critical_break_glass_required"] is True
|
||||
assert loaded["handoff_truth"]["controlled_executor_dispatch_enabled"] is False
|
||||
assert loaded["rollups"]["source_readback_count"] == 8
|
||||
assert loaded["rollups"]["handoff_packet_count"] == 7
|
||||
assert loaded["rollups"]["ready_for_controlled_executor_count"] == 5
|
||||
assert loaded["rollups"]["critical_break_glass_count"] == 2
|
||||
assert loaded["rollups"]["ansible_check_mode_packet_count"] == 3
|
||||
assert loaded["rollups"]["mcp_tool_route_count"] == 7
|
||||
assert loaded["rollups"]["executor_route_count"] == 5
|
||||
assert loaded["rollups"]["verifier_binding_count"] == 5
|
||||
assert loaded["rollups"]["learning_writeback_contract_count"] == 3
|
||||
assert loaded["rollups"]["owner_response_required_count"] == 2
|
||||
assert loaded["rollups"]["missing_check_mode_count"] == 0
|
||||
assert loaded["rollups"]["missing_verifier_count"] == 0
|
||||
assert loaded["rollups"]["controlled_executor_dispatch_count"] == 0
|
||||
assert loaded["rollups"]["live_apply_count"] == 0
|
||||
assert loaded["rollups"]["gateway_queue_write_count"] == 0
|
||||
assert loaded["rollups"]["telegram_send_count"] == 0
|
||||
assert loaded["rollups"]["km_write_count"] == 0
|
||||
assert loaded["rollups"]["playbook_trust_write_count"] == 0
|
||||
assert loaded["rollups"]["production_write_count"] == 0
|
||||
assert loaded["rollups"]["host_write_count"] == 0
|
||||
assert loaded["rollups"]["kubectl_action_count"] == 0
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_rejects_high_packet_without_check_mode(tmp_path):
|
||||
snapshot = _snapshot()
|
||||
high_packet = _first_packet(snapshot, "high")
|
||||
high_packet["check_mode_passed"] = False
|
||||
snapshot["rollups"]["missing_check_mode_count"] = 1
|
||||
_write_snapshot(tmp_path, snapshot)
|
||||
|
||||
with pytest.raises(ValueError, match="controlled executor gates"):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_keeps_high_packet_off_owner_response(tmp_path):
|
||||
snapshot = _snapshot()
|
||||
high_packet = _first_packet(snapshot, "high")
|
||||
high_packet["owner_response_required"] = True
|
||||
snapshot["rollups"]["owner_response_required_count"] = 3
|
||||
_write_snapshot(tmp_path, snapshot)
|
||||
|
||||
with pytest.raises(ValueError, match="owner response"):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_keeps_critical_on_break_glass(tmp_path):
|
||||
snapshot = _snapshot()
|
||||
critical_packet = _first_packet(snapshot, "critical")
|
||||
critical_packet["controlled_executor_handoff_allowed"] = True
|
||||
_write_snapshot(tmp_path, snapshot)
|
||||
|
||||
with pytest.raises(ValueError, match="critical packet"):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_blocks_live_apply_rollup(tmp_path):
|
||||
snapshot = _snapshot()
|
||||
snapshot["rollups"]["live_apply_count"] = 1
|
||||
_write_snapshot(tmp_path, snapshot)
|
||||
|
||||
with pytest.raises(ValueError, match="live/write rollup counts"):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_requires_rollup_consistency(tmp_path):
|
||||
snapshot = _snapshot()
|
||||
snapshot["rollups"]["handoff_packet_count"] = 99
|
||||
_write_snapshot(tmp_path, snapshot)
|
||||
|
||||
with pytest.raises(ValueError, match="rollup counts"):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_rejects_private_terms(tmp_path):
|
||||
snapshot = _snapshot()
|
||||
snapshot["executor_handoff_packets"][0]["display_name"] = "請把 In app browser 狀態放進前端"
|
||||
_write_snapshot(tmp_path, snapshot)
|
||||
|
||||
with pytest.raises(ValueError, match="forbidden public terms"):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_fails_when_missing(tmp_path):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_latest_ai_agent_controlled_executor_handoff(tmp_path)
|
||||
|
||||
|
||||
def _snapshot(*, generated_at: str = "2026-06-27T01:20:00+08:00") -> dict:
|
||||
payload = json.loads(_COMMITTED_SNAPSHOT.read_text(encoding="utf-8"))
|
||||
cloned = copy.deepcopy(payload)
|
||||
cloned["generated_at"] = generated_at
|
||||
return cloned
|
||||
|
||||
|
||||
def _first_packet(snapshot: dict, risk_tier: str) -> dict:
|
||||
return next(packet for packet in snapshot["executor_handoff_packets"] if packet["risk_tier"] == risk_tier)
|
||||
|
||||
|
||||
def _write_snapshot(path: Path, snapshot: dict) -> None:
|
||||
(path / "ai_agent_controlled_executor_handoff_2026-06-27.json").write_text(
|
||||
json.dumps(snapshot),
|
||||
encoding="utf-8",
|
||||
)
|
||||
@@ -0,0 +1,74 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from src.api.v1.agents import router
|
||||
|
||||
|
||||
def test_ai_agent_controlled_executor_handoff_endpoint_returns_committed_snapshot():
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/api/v1")
|
||||
client = TestClient(app)
|
||||
|
||||
response = client.get("/api/v1/agents/agent-controlled-executor-handoff")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["schema_version"] == "ai_agent_controlled_executor_handoff_v1"
|
||||
assert data["program_status"]["current_task_id"] == "P2-415"
|
||||
assert data["program_status"]["next_task_id"] == "P2-416"
|
||||
assert data["program_status"]["read_only_mode"] is True
|
||||
assert data["program_status"]["runtime_authority"] == "controlled_executor_handoff_readback_no_live_apply"
|
||||
assert data["handoff_truth"]["high_risk_controlled_executor_handoff_ready"] is True
|
||||
assert data["handoff_truth"]["high_risk_owner_review_required"] is False
|
||||
assert data["handoff_truth"]["critical_break_glass_required"] is True
|
||||
assert data["handoff_truth"]["controlled_executor_dispatch_enabled"] is False
|
||||
assert data["rollups"]["source_readback_count"] == len(data["source_readbacks"]) == 8
|
||||
assert data["rollups"]["handoff_packet_count"] == len(data["executor_handoff_packets"]) == 7
|
||||
assert data["rollups"]["ready_for_controlled_executor_count"] == 5
|
||||
assert data["rollups"]["critical_break_glass_count"] == 2
|
||||
assert data["rollups"]["high_risk_packet_count"] == 5
|
||||
assert data["rollups"]["critical_packet_count"] == 2
|
||||
assert data["rollups"]["executor_route_count"] == len(data["executor_routes"]) == 5
|
||||
assert data["rollups"]["verifier_binding_count"] == len(data["verifier_bindings"]) == 5
|
||||
assert data["rollups"]["learning_writeback_contract_count"] == len(data["learning_writeback_contracts"]) == 3
|
||||
assert data["rollups"]["owner_response_required_count"] == 2
|
||||
assert data["rollups"]["missing_check_mode_count"] == 0
|
||||
assert data["rollups"]["missing_rollback_count"] == 0
|
||||
assert data["rollups"]["missing_verifier_count"] == 0
|
||||
assert data["rollups"]["missing_telegram_evidence_count"] == 0
|
||||
assert data["rollups"]["missing_learning_writeback_count"] == 0
|
||||
assert data["rollups"]["controlled_executor_dispatch_count"] == 0
|
||||
assert data["rollups"]["live_apply_count"] == 0
|
||||
assert data["rollups"]["gateway_queue_write_count"] == 0
|
||||
assert data["rollups"]["telegram_send_count"] == 0
|
||||
assert data["rollups"]["bot_api_call_count"] == 0
|
||||
assert data["rollups"]["km_write_count"] == 0
|
||||
assert data["rollups"]["playbook_trust_write_count"] == 0
|
||||
assert data["rollups"]["production_write_count"] == 0
|
||||
assert data["rollups"]["secret_read_count"] == 0
|
||||
assert data["rollups"]["paid_api_call_count"] == 0
|
||||
assert data["rollups"]["host_write_count"] == 0
|
||||
assert data["rollups"]["kubectl_action_count"] == 0
|
||||
assert data["rollups"]["destructive_operation_count"] == 0
|
||||
assert all(
|
||||
packet["controlled_executor_handoff_allowed"] is True
|
||||
for packet in data["executor_handoff_packets"]
|
||||
if packet["risk_tier"] == "high"
|
||||
)
|
||||
assert all(
|
||||
packet["owner_response_required"] is False
|
||||
for packet in data["executor_handoff_packets"]
|
||||
if packet["risk_tier"] == "high"
|
||||
)
|
||||
assert all(
|
||||
packet["handoff_status"] == "critical_break_glass_only"
|
||||
for packet in data["executor_handoff_packets"]
|
||||
if packet["risk_tier"] == "critical"
|
||||
)
|
||||
assert data["activation_boundaries"]["controlled_executor_handoff_preview_allowed"] is True
|
||||
assert data["activation_boundaries"]["controlled_executor_dispatch_enabled"] is False
|
||||
assert data["activation_boundaries"]["live_apply_enabled"] is False
|
||||
assert data["display_redaction_contract"]["redaction_required"] is True
|
||||
assert data["display_redaction_contract"]["work_window_transcript_display_allowed"] is False
|
||||
Reference in New Issue
Block a user