Files
awoooi/apps/api/tests/test_ai_agent_autonomous_runtime_control.py
Your Name fe42ed1b43
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 21s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
feat(awooop): expose ai automation log taxonomy
2026-06-29 15:21:17 +08:00

589 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from src.services.ai_agent_autonomous_runtime_control import (
build_ai_agent_autonomous_runtime_control,
build_runtime_receipt_readback_from_rows,
classify_deploy_control_plane_observation,
)
def test_ai_agent_autonomous_runtime_control_uses_current_owner_directive():
data = build_ai_agent_autonomous_runtime_control()
assert data["schema_version"] == "ai_agent_autonomous_runtime_control_v1"
assert data["program_status"]["runtime_authority"] == (
"current_owner_directive_controlled_ai_automation"
)
assert data["program_status"]["deploy_readback_marker"] == (
"p2_416_d1n_autonomous_runtime_control_prod_readback_v2"
)
assert data["program_status"]["deploy_attempt_note"] == (
"cd_internal_control_plane_readback_retry_20260628_2"
)
assert data["program_status"]["legacy_no_send_no_live_rules_overridden"] is True
assert data["program_status"]["implementation_completion_percent"] == 88
assert data["current_policy"]["low_risk_controlled_apply_allowed"] is True
assert data["current_policy"]["medium_risk_controlled_apply_allowed"] is True
assert data["current_policy"]["high_risk_controlled_apply_allowed"] is True
assert data["current_policy"]["owner_review_required_for_low_medium_high"] is False
assert data["current_policy"]["telegram_gateway_required"] is True
assert data["current_policy"]["direct_bot_api_allowed"] is False
assert data["current_policy"]["post_apply_verifier_required"] is True
assert data["current_policy"]["km_learning_writeback_required"] is True
def test_ai_agent_autonomous_runtime_control_exposes_reports_and_executor_receipts():
data = build_ai_agent_autonomous_runtime_control()
cadences = {item["cadence"]: item for item in data["report_delivery"]["cadences"]}
assert set(cadences) == {"daily", "weekly", "monthly"}
assert {item["telegram_gateway_delivery_enabled"] for item in cadences.values()} == {True}
assert {item["direct_bot_api_allowed"] for item in cadences.values()} == {False}
assert "run_daily_report_loop" in cadences["daily"]["worker"]
assert "run_weekly_report_loop" in cadences["weekly"]["worker"]
assert "run_monthly_report_loop" in cadences["monthly"]["worker"]
operation_types = {
item["operation_type"]
for item in data["controlled_executor"]["operation_receipts"]
}
assert {
"ansible_candidate_matched",
"ansible_check_mode_executed",
"ansible_apply_executed",
"incident_evidence.post_execution_state",
"knowledge_entries",
}.issubset(operation_types)
assert data["rollups"]["automated_risk_tier_count"] == 3
assert data["rollups"]["report_cadence_enabled_count"] == 3
assert data["rollups"]["direct_bot_api_allowed_count"] == 0
assert data["rollups"]["legacy_policy_overridden_count"] >= 4
assert data["runtime_receipt_readback"]["db_read_status"] == "not_queried"
def test_ai_agent_autonomous_runtime_control_exposes_internal_control_loop():
data = build_ai_agent_autonomous_runtime_control()
integration = data["control_plane_integration"]
assert integration["schema_version"] == "ai_agent_autonomous_runtime_internal_loop_v1"
assert integration["status"] == "mcp_rag_km_playbook_log_control_loop_declared"
assert {sensor["normalized_event"] for sensor in integration["mcp_sensors"]} == {
"RunObservation",
"RunnerLaneState",
"ProductionTruthSnapshot",
"FrontendTruthSnapshot",
}
assert "controlled_cd_lane_capacity_label_guardrails" in integration["rag_context_queries"]
assert "running_no_container_stale_ui" in integration["playbook_decision_classes"]
assert integration["km_writeback_contract"]["stores_raw_logs"] is False
assert integration["km_writeback_contract"]["stores_secret_values"] is False
assert integration["log_projection_contract"]["raw_html_or_long_log_allowed"] is False
assert data["rollups"]["mcp_sensor_count"] == 4
assert data["rollups"]["playbook_decision_class_count"] == 7
def test_deploy_control_plane_classifier_separates_stale_spinner_from_real_failure():
stale = classify_deploy_control_plane_observation(
run_status="running",
is_latest_deploy_intent=True,
active_task_container_count=0,
production_marker_hit=True,
latest_flow_closed=True,
runner_capacity_ok=True,
runner_forbidden_label_count=0,
)
assert stale["classification"] == "running_no_container_stale_ui"
assert stale["action"] == "treat_gitea_spinner_as_stale_and_keep_production_truth"
assert stale["safety_boundary"]["writes_runtime_state"] is False
assert stale["internal_writeback"]["km_writeback_required"] is True
failure = classify_deploy_control_plane_observation(
run_status="failure",
is_latest_deploy_intent=True,
active_task_container_count=0,
production_marker_hit=False,
latest_flow_closed=False,
runner_capacity_ok=True,
runner_forbidden_label_count=0,
)
assert failure["classification"] == "real_failure_requires_playbook_repair"
assert failure["action"] == "open_cd_repair_playbook_with_target_selector_and_verifier"
assert failure["safety_boundary"]["opens_legacy_runner"] is False
assert failure["internal_writeback"]["playbook_route_required"] is True
def test_ai_agent_autonomous_runtime_control_keeps_hard_blockers_and_redaction():
data = build_ai_agent_autonomous_runtime_control()
assert "secret_token_private_key_cookie_session_auth_header_cleartext" in data["hard_blockers"]
assert "drop_truncate_restore_prune_destructive_database_operation" in data["hard_blockers"]
assert "force_push_delete_repo_refs_or_visibility_change" in data["hard_blockers"]
visibility = data["visibility_contract"]
assert visibility["work_window_transcript_display_allowed"] is False
assert visibility["prompt_body_display_allowed"] is False
assert visibility["internal_reasoning_display_allowed"] is False
assert visibility["sensitive_value_display_allowed"] is False
assert visibility["telegram_unredacted_payload_display_allowed"] is False
def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
apply_op_id = "73b7a95c-3652-4c0d-bb4c-729e500acedb"
incident_id = "INC-20260627-64472B"
readback = build_runtime_receipt_readback_from_rows(
project_id="awoooi",
db_read_status="ok",
operation_count_rows=[
{
"operation_type": "ansible_candidate_matched",
"status": "dry_run",
"total": 1,
"recent": 1,
},
{
"operation_type": "ansible_apply_executed",
"status": "success",
"total": 1,
"recent": 1,
},
{
"operation_type": "ansible_check_mode_executed",
"status": "success",
"total": 1,
"recent": 1,
},
],
operation_latest_rows=[
{
"op_id": "candidate-op",
"parent_op_id": None,
"operation_type": "ansible_candidate_matched",
"status": "dry_run",
"actor": "decision_manager",
"incident_id": incident_id,
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"execution_mode": "check_mode",
},
{
"op_id": "check-mode-op",
"parent_op_id": "candidate-op",
"operation_type": "ansible_check_mode_executed",
"status": "success",
"actor": "ansible_check_mode_worker",
"incident_id": incident_id,
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"execution_mode": "check_mode",
"returncode": "0",
},
{
"op_id": apply_op_id,
"parent_op_id": "check-mode-op",
"operation_type": "ansible_apply_executed",
"status": "success",
"actor": "ansible_controlled_apply_worker",
"incident_id": incident_id,
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"execution_mode": "controlled_apply",
"source_candidate_op_id": "candidate-op",
"check_mode_op_id": "check-mode-op",
"returncode": "0",
"duration_ms": 7727,
},
],
auto_repair_count_rows=[
{"result_status": "success", "total": 1, "recent": 1},
],
auto_repair_latest_rows=[
{
"id": "auto-repair-1",
"incident_id": incident_id,
"catalog_id": "ansible:188-momo-backup-user",
"playbook_name": "infra/ansible/playbooks/188-momo-backup-user.yml",
"result_status": "success",
"executed_steps_text": f'["apply:{apply_op_id}"]',
"triggered_by": "ansible_controlled_apply",
"risk_level": "low",
"execution_time_ms": 7727,
},
],
verifier_count_rows=[
{"verification_result": "success", "total": 1, "recent": 1},
],
verifier_latest_rows=[
{
"id": "evidence-1",
"incident_id": incident_id,
"verification_result": "success",
"apply_op_id": apply_op_id,
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"returncode": "0",
},
],
km_count_rows=[
{"status": "review", "total": 1, "recent": 1},
],
km_latest_rows=[
{
"id": "km-1",
"title": "AI 自動修復沉澱INC-20260627-64472B",
"related_incident_id": incident_id,
"related_playbook_id": "ansible:188-momo-backup-user",
"path_type": "ansible_apply_receipt:73b7a95c",
"status": "review",
"created_by": "ai_agent_ansible_worker",
},
],
telegram_count_rows=[
{"send_status": "sent", "total": 1, "recent": 1},
],
telegram_latest_rows=[
{
"message_id": "telegram-row-1",
"run_id": "telegram-run-1",
"message_type": "final",
"send_status": "sent",
"provider_message_id": "12345",
"incident_id": incident_id,
"action": "controlled_apply_result",
},
],
mcp_gateway_count_rows=[
{"status": "success", "total": 3, "recent": 1},
],
legacy_mcp_count_rows=[
{"status": "success", "total": 2, "recent": 2},
],
service_log_count_rows=[
{"status": "sanitized_recent_logs", "total": 2, "recent": 2},
],
executor_log_count_rows=[
{"status": "success", "total": 2, "recent": 1},
],
timeline_count_rows=[
{"status": "success", "total": 1, "recent": 1},
],
playbook_trust_count_rows=[
{"status": "learning_active", "total": 4, "recent": 1},
],
)
assert readback["db_read_status"] == "ok"
assert readback["writes_on_read"] is False
assert readback["ansible_apply_executed"]["total"] == 1
assert readback["auto_repair_execution_receipt"]["by_status"]["success"] == 1
assert readback["post_apply_verifier"]["by_status"]["success"] == 1
assert readback["km_writeback"]["by_status"]["review"] == 1
assert readback["telegram_receipt"]["by_status"]["sent"] == 1
assert readback["latest_flow_closure"] == {
"apply_op_id": apply_op_id,
"incident_id": incident_id,
"has_post_apply_verifier": True,
"has_km_writeback": True,
"has_telegram_receipt": True,
"closed": True,
"missing": [],
}
assert readback["latest_failure_classification"]["classification"] == (
"latest_controlled_apply_closed_success"
)
assert readback["controlled_retry_package"]["status"] == "not_required_for_latest_apply"
ledger = readback["autonomous_execution_loop_ledger"]
assert ledger["schema_version"] == "ai_agent_autonomous_execution_loop_ledger_v1"
assert ledger["operation_id"] == apply_op_id
assert ledger["root_candidate_op_id"] == "candidate-op"
assert ledger["check_mode_op_id"] == "check-mode-op"
assert ledger["apply_op_id"] == apply_op_id
assert ledger["execution_state"] == "closed_success"
assert ledger["closed"] is True
assert ledger["missing_stage_ids"] == []
assert ledger["next_executor_action"] == "keep_receipt_chain_closed"
assert [stage["stage_id"] for stage in ledger["stages"]] == [
"candidate",
"check_mode",
"controlled_apply",
"auto_repair_execution_receipt",
"post_apply_verifier",
"km_playbook_writeback",
"telegram_receipt",
]
assert {stage["present"] for stage in ledger["stages"]} == {True}
assert ledger["safety_contract"]["backfill_may_send_telegram"] is False
assert ledger["safety_contract"]["live_apply_may_send_telegram_gateway_receipt"] is True
trace = readback["trace_ledger"]
assert trace["schema_version"] == "ai_agent_autonomous_trace_ledger_v1"
trace_stage_ids = {stage["stage_id"] for stage in trace["stages"]}
assert {
"mcp_context",
"service_log_evidence",
"candidate",
"check_mode",
"executor_log_projection",
"controlled_apply",
"auto_repair_execution_receipt",
"post_apply_verifier",
"rag_km_learning",
"playbook_trust",
"timeline_projection",
"telegram_receipt",
} == trace_stage_ids
assert trace["missing_required_stage_ids"] == []
assert trace["recorded_stage_count"] == trace["stage_count"]
assert "rag_km_learning" in trace["learning_source_stage_ids"]
assert "playbook_trust" in trace["learning_source_stage_ids"]
assert trace["public_safety"] == {
"reads_raw_sessions": False,
"stores_secret_values": False,
"stores_unredacted_telegram_payload": False,
"stores_internal_reasoning": False,
}
assert readback["mcp_context"]["total"] == 5
assert readback["service_log_evidence"]["total"] == 2
assert readback["executor_log_projection"]["total"] == 2
assert readback["playbook_trust"]["by_status"]["learning_active"] == 4
taxonomy = readback["log_integration_taxonomy"]
assert taxonomy["schema_version"] == "ai_agent_log_integration_taxonomy_v1"
assert taxonomy["normalized_event_flow"] == [
"collect_source_log_or_receipt",
"redact_and_hash_sensitive_fields",
"assign_labels",
"correlate_incident_operation_playbook",
"write_trace_ledger",
"retrieve_similar_context_via_rag",
"select_or_repair_playbook",
"run_check_mode_then_controlled_apply",
"verify_and_write_learning_back",
]
assert {
"project",
"product",
"website",
"service",
"package",
"tool",
"incident",
"operation",
"playbook",
}.issubset(set(taxonomy["label_dimensions"]))
source_family_ids = {item["source_family_id"] for item in taxonomy["source_families"]}
assert {
"mcp_gateway_tool_calls",
"legacy_mcp_tool_calls",
"service_package_logs",
"executor_operation_logs",
"auto_repair_receipts",
"post_apply_verifier",
"rag_km_entries",
"playbook_trust_signals",
"operator_timeline_projection",
"telegram_delivery_receipts",
} == source_family_ids
assert taxonomy["rollups"]["source_family_count"] == 10
assert taxonomy["rollups"]["active_source_family_count"] == 10
assert taxonomy["rollups"]["classified_event_total"] > 0
assert taxonomy["public_safety"]["raw_secret_collection_allowed"] is False
assert taxonomy["public_safety"]["unredacted_payload_storage_allowed"] is False
def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
apply_op_id = "94925d5e-6fdc-49c3-90e8-f0a0d57a6a58"
incident_id = "INC-20260628-A40A9A"
readback = build_runtime_receipt_readback_from_rows(
project_id="awoooi",
db_read_status="ok",
operation_count_rows=[
{
"operation_type": "ansible_apply_executed",
"status": "failed",
"total": 1,
"recent": 1,
},
],
operation_latest_rows=[
{
"op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
"parent_op_id": "candidate-op-2",
"operation_type": "ansible_check_mode_executed",
"status": "success",
"actor": "ansible_check_mode_worker",
"incident_id": incident_id,
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web-readonly.yml",
"execution_mode": "check_mode",
"returncode": "0",
},
{
"op_id": apply_op_id,
"parent_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
"operation_type": "ansible_apply_executed",
"status": "failed",
"actor": "ansible_controlled_apply_worker",
"incident_id": incident_id,
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"execution_mode": "controlled_apply",
"source_candidate_op_id": "candidate-op-2",
"check_mode_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
"returncode": "2",
"duration_ms": 4797,
},
],
auto_repair_count_rows=[
{"result_status": "failed", "total": 1, "recent": 1},
],
auto_repair_latest_rows=[
{
"id": "auto-repair-2",
"incident_id": incident_id,
"catalog_id": "ansible:188-ai-web",
"playbook_name": "infra/ansible/playbooks/188-ai-web.yml",
"result_status": "failed",
"executed_steps_text": f'["apply:{apply_op_id}"]',
"triggered_by": "ansible_controlled_apply",
"risk_level": "medium",
"execution_time_ms": 4797,
},
],
verifier_count_rows=[
{"verification_result": "failed", "total": 1, "recent": 1},
],
verifier_latest_rows=[
{
"id": "evidence-1",
"incident_id": incident_id,
"verification_result": "failed",
"apply_op_id": apply_op_id,
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"returncode": "2",
},
],
km_count_rows=[
{"status": "REVIEW", "total": 1, "recent": 1},
],
km_latest_rows=[
{
"id": "km-1",
"title": "AI 自動修復沉澱INC-20260628-A40A9A",
"related_incident_id": incident_id,
"related_playbook_id": "ansible:188-ai-web",
"path_type": "ansible_apply_receipt:94925d5e",
"status": "REVIEW",
"created_by": "ai_agent_ansible_worker",
},
],
telegram_count_rows=[
{"send_status": "sent", "total": 1, "recent": 1},
],
telegram_latest_rows=[
{
"message_id": "telegram-row-1",
"run_id": "telegram-run-1",
"message_type": "final",
"send_status": "sent",
"provider_message_id": "32016",
"incident_id": incident_id,
"action": "controlled_apply_result",
},
],
)
classification = readback["latest_failure_classification"]
assert classification["classification"] == "closed_failed_apply_requires_ai_repair"
assert classification["action"] == "queue_check_mode_replay_and_playbook_repair_candidate"
assert classification["target_selector"] == {
"incident_id": incident_id,
"apply_op_id": apply_op_id,
"parent_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"execution_mode": "controlled_apply",
}
assert classification["evidence"]["returncode"] == "2"
assert classification["evidence"]["verification_result"] == "failed"
assert classification["evidence"]["latest_flow_closed"] is True
assert classification["evidence"]["output_tail_in_readback"] is False
assert classification["evidence"]["unredacted_output_required"] is False
assert classification["safe_next_steps"] == [
"run_no_write_check_mode_replay",
"extract_sanitized_failed_task_summary",
"write_km_playbook_repair_candidate",
"retry_controlled_apply_only_after_check_mode_passes",
]
retry = readback["controlled_retry_package"]
assert retry["package_id"] == "ansible_retry:94925d5e"
assert retry["status"] == "ready_for_no_write_check_mode_replay"
assert retry["source_of_truth"] == {
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"source_diff_required_before_retry": True,
"failed_task_summary_required": True,
}
assert retry["preflight"]["no_write_check_mode_replay_required"] is True
assert retry["preflight"]["reuse_parent_check_mode_op_id"] == (
"8b555f41-e81f-4d8e-956b-fb20d358db63"
)
assert retry["apply_gate"]["controlled_apply_retry_allowed_now"] is False
assert retry["apply_gate"]["requires_check_mode_success_before_apply"] is True
assert retry["rollback"]["destructive_rollback_allowed"] is False
assert retry["post_apply"] == {
"post_apply_verifier_required": True,
"km_playbook_trust_writeback_required": True,
"telegram_receipt_required": True,
}
assert retry["next_ai_action"] == "run_no_write_check_mode_replay"
ledger = readback["autonomous_execution_loop_ledger"]
assert ledger["closed"] is True
assert ledger["execution_state"] == "closed_failed_apply_repair_ready"
assert ledger["next_executor_action"] == "run_no_write_check_mode_replay"
assert ledger["missing_stage_ids"] == []
assert ledger["stages"][3]["stage_id"] == "auto_repair_execution_receipt"
assert ledger["stages"][3]["status"] == "failed"
def test_runtime_execution_loop_ledger_does_not_mix_unrelated_check_mode_rows():
apply_op_id = "db3f12ce-08fc-4289-8c93-338305d5850c"
readback = build_runtime_receipt_readback_from_rows(
project_id="awoooi",
db_read_status="ok",
operation_latest_rows=[
{
"op_id": "unrelated-check-mode-op",
"parent_op_id": "unrelated-candidate",
"operation_type": "ansible_check_mode_executed",
"status": "success",
"actor": "ansible_check_mode_worker",
"incident_id": "INC-OTHER",
"catalog_id": "ansible:110-devops",
"playbook_path": "infra/ansible/playbooks/110-devops.yml",
"execution_mode": "check_mode",
"returncode": "0",
},
{
"op_id": apply_op_id,
"parent_op_id": "expected-check-mode-op",
"operation_type": "ansible_apply_executed",
"status": "success",
"actor": "ansible_controlled_apply_worker",
"incident_id": "INC-20260629-LEDGER",
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"execution_mode": "controlled_apply",
"returncode": "0",
},
],
)
ledger = readback["autonomous_execution_loop_ledger"]
assert ledger["operation_id"] == apply_op_id
assert ledger["check_mode_op_id"] == "expected-check-mode-op"
assert ledger["root_candidate_op_id"] is None
assert ledger["incident_id"] == "INC-20260629-LEDGER"
assert ledger["stages"][1]["stage_id"] == "check_mode"
assert ledger["stages"][1]["ref_id"] == "expected-check-mode-op"
assert ledger["stages"][1]["status"] == "inferred_from_apply_parent"
assert "candidate" in ledger["missing_stage_ids"]