Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
Code Review / ai-code-review (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
263 lines
9.8 KiB
Python
263 lines
9.8 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from src.services.host_runaway_aiops_loop_readiness import (
|
|
load_latest_host_runaway_aiops_loop_readiness,
|
|
)
|
|
|
|
|
|
def test_load_latest_host_runaway_aiops_loop_readiness_reads_newest_file(tmp_path):
|
|
older = _snapshot(generated_at="2026-06-17T00:00:00+08:00")
|
|
newer = _snapshot(generated_at="2026-06-18T15:08:00+08:00")
|
|
(tmp_path / "host_runaway_aiops_loop_readiness_2026-06-17.json").write_text(
|
|
json.dumps(older),
|
|
encoding="utf-8",
|
|
)
|
|
_write_snapshot(tmp_path, newer)
|
|
|
|
loaded = load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
assert loaded["generated_at"] == "2026-06-18T15:08:00+08:00"
|
|
assert loaded["program_status"]["current_task_id"] == "P3-009"
|
|
assert loaded["program_status"]["next_task_id"] == "P3-010"
|
|
assert loaded["rollups"]["loop_stage_count"] == 6
|
|
assert loaded["rollups"]["alert_lane_count"] == 2
|
|
assert loaded["rollups"]["asset_writeback_contract_count"] == 5
|
|
assert loaded["live_readback"]["host_label"] == "110"
|
|
assert loaded["live_readback"]["deploy_marker"] == "2d278568"
|
|
assert loaded["activation_boundaries"]["runtime_remediation_enabled"] is False
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_requires_read_only_mode(tmp_path):
|
|
snapshot = _snapshot()
|
|
snapshot["program_status"]["read_only_mode"] = False
|
|
_write_snapshot(tmp_path, snapshot)
|
|
|
|
with pytest.raises(ValueError, match="read_only_mode"):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_blocks_process_termination(tmp_path):
|
|
snapshot = _snapshot()
|
|
snapshot["activation_boundaries"]["process_termination_authorized"] = True
|
|
_write_snapshot(tmp_path, snapshot)
|
|
|
|
with pytest.raises(ValueError, match="false activation boundaries"):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_blocks_host_write_rollup(tmp_path):
|
|
snapshot = _snapshot()
|
|
snapshot["rollups"]["host_write_count"] = 1
|
|
_write_snapshot(tmp_path, snapshot)
|
|
|
|
with pytest.raises(ValueError, match="zero rollup fields"):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_requires_alert_lane_contract(tmp_path):
|
|
snapshot = _snapshot()
|
|
snapshot["alert_lanes"][0]["runtime_write_gate"] = 1
|
|
_write_snapshot(tmp_path, snapshot)
|
|
|
|
with pytest.raises(ValueError, match="runtime_write_gate"):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_requires_asset_writeback_fields(tmp_path):
|
|
snapshot = _snapshot()
|
|
snapshot["asset_writeback_contract"][0]["required_fields"] = []
|
|
_write_snapshot(tmp_path, snapshot)
|
|
|
|
with pytest.raises(ValueError, match="required_fields"):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_rejects_private_terms(tmp_path):
|
|
snapshot = _snapshot()
|
|
snapshot["loop_stages"][0]["next_action"] = "不要顯示工作視窗內容"
|
|
_write_snapshot(tmp_path, snapshot)
|
|
|
|
with pytest.raises(ValueError, match="forbidden public terms"):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def test_host_runaway_aiops_loop_readiness_fails_when_missing(tmp_path):
|
|
with pytest.raises(FileNotFoundError):
|
|
load_latest_host_runaway_aiops_loop_readiness(tmp_path)
|
|
|
|
|
|
def _write_snapshot(tmp_path, payload: dict) -> None:
|
|
(tmp_path / "host_runaway_aiops_loop_readiness_2026-06-18.json").write_text(
|
|
json.dumps(payload),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def _snapshot(*, generated_at: str = "2026-06-18T15:08:00+08:00") -> dict:
|
|
return {
|
|
"schema_version": "host_runaway_aiops_loop_readiness_v1",
|
|
"generated_at": generated_at,
|
|
"program_status": {
|
|
"overall_completion_percent": 100,
|
|
"current_priority": "P3",
|
|
"current_task_id": "P3-009",
|
|
"next_task_id": "P3-010",
|
|
"read_only_mode": True,
|
|
"runtime_authority": "host_runaway_aiops_loop_readiness_only_no_host_write",
|
|
"status_note": "只讀 readiness。",
|
|
},
|
|
"source_refs": [
|
|
"scripts/ops/host-runaway-process-exporter.py",
|
|
"scripts/ops/host-runaway-process-remediation.py",
|
|
"ops/monitoring/alerts-unified.yml",
|
|
"apps/api/src/services/telegram_gateway.py",
|
|
"docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md",
|
|
"docs/runbooks/FULL-STACK-COLD-START-SOP.md",
|
|
"docs/LOGBOOK.md",
|
|
],
|
|
"rollups": {
|
|
"loop_stage_count": 6,
|
|
"alert_lane_count": 2,
|
|
"asset_writeback_contract_count": 5,
|
|
"source_ref_count": 7,
|
|
"live_readback_metric_count": 8,
|
|
"blocked_runtime_action_count": 12,
|
|
"runtime_remediation_authorized_count": 0,
|
|
"telegram_send_count": 0,
|
|
"gateway_queue_write_count": 0,
|
|
"bot_api_call_count": 0,
|
|
"host_write_count": 0,
|
|
"process_termination_count": 0,
|
|
"docker_restart_count": 0,
|
|
"systemd_restart_count": 0,
|
|
"nginx_reload_count": 0,
|
|
"firewall_change_count": 0,
|
|
"kubectl_action_count": 0,
|
|
"production_write_count": 0,
|
|
},
|
|
"loop_stages": [
|
|
_stage("read_only_host_textfile_exporter"),
|
|
_stage("prometheus_alert_rules"),
|
|
_stage("telegram_ai_event_packet"),
|
|
_stage("playbook_contract"),
|
|
_stage("km_verifier_writeback_contract"),
|
|
_stage("gated_remediation_helper"),
|
|
],
|
|
"alert_lanes": [
|
|
{
|
|
"alertname": "HostOrphanBrowserSmokeHighCpu",
|
|
"lane_id": "orphan_browser_smoke_runaway_process",
|
|
"classification": "host_resource_runaway_process",
|
|
"action_policy": "triage_packet_then_dry_run_then_gated_sigterm",
|
|
"dry_run_allowed": True,
|
|
"apply_allowed_without_owner_gate": False,
|
|
"runtime_write_gate": 0,
|
|
"next_action": "prepare packet",
|
|
},
|
|
{
|
|
"alertname": "HostCiRunnerLoadSaturation",
|
|
"lane_id": "ci_runner_load_saturation",
|
|
"classification": "host_resource_capacity",
|
|
"action_policy": "capacity_triage_no_process_remediation",
|
|
"dry_run_allowed": False,
|
|
"apply_allowed_without_owner_gate": False,
|
|
"runtime_write_gate": 0,
|
|
"next_action": "capacity triage",
|
|
},
|
|
],
|
|
"asset_writeback_contract": [
|
|
_asset("knowledge_base_incident_summary"),
|
|
_asset("playbook_trust_evidence"),
|
|
_asset("awooop_work_item_truth_chain"),
|
|
_asset("verifier_post_check"),
|
|
_asset("recurrence_guard"),
|
|
],
|
|
"live_readback": {
|
|
"host_label": "110",
|
|
"monitor_up": 1,
|
|
"orphan_browser_group_count": 0,
|
|
"active_ci_container_count": 2,
|
|
"load5_per_core_upper_observed": 0.81,
|
|
"swap_used_ratio_upper_observed": 1.0,
|
|
"remediation_authorized_count": 0,
|
|
"alerts_firing_count": 0,
|
|
"deploy_marker": "2d278568",
|
|
"runtime_revision": "f358a0f6c3e614e407dedb6eee89bf10b2bc8173",
|
|
"argocd_sync": "Synced",
|
|
"argocd_health": "Healthy",
|
|
"production_route_count": 3,
|
|
"forbidden_public_hit_count": 0,
|
|
},
|
|
"remediation_gate": {
|
|
"dry_run_required": True,
|
|
"owner_approval_required": True,
|
|
"maintenance_window_required": True,
|
|
"evidence_ref_required": True,
|
|
"post_check_required": True,
|
|
"allowed_signal_after_gate": "SIGTERM",
|
|
"process_termination_authorized": False,
|
|
"disallowed_actions": [
|
|
"SIGKILL",
|
|
"docker restart",
|
|
"systemctl restart",
|
|
"nginx reload",
|
|
"firewall change",
|
|
"kubectl action",
|
|
"secret read",
|
|
"production write",
|
|
],
|
|
},
|
|
"activation_boundaries": {
|
|
"read_only_readback_allowed": True,
|
|
"ai_triage_packet_allowed": True,
|
|
"dry_run_generation_allowed": True,
|
|
"runtime_remediation_enabled": False,
|
|
"process_termination_authorized": False,
|
|
"telegram_send_enabled": False,
|
|
"gateway_queue_write_enabled": False,
|
|
"bot_api_call_enabled": False,
|
|
"host_write_enabled": False,
|
|
"docker_restart_enabled": False,
|
|
"systemd_restart_enabled": False,
|
|
"nginx_reload_enabled": False,
|
|
"firewall_change_enabled": False,
|
|
"kubectl_action_enabled": False,
|
|
"production_write_enabled": False,
|
|
"secret_read_enabled": False,
|
|
},
|
|
"next_steps": [
|
|
{
|
|
"step_id": "real_alert_receipt_fixture",
|
|
"description": "verify packet",
|
|
"runtime_write_allowed": False,
|
|
}
|
|
],
|
|
}
|
|
|
|
|
|
def _stage(stage_id: str) -> dict:
|
|
return {
|
|
"stage_id": stage_id,
|
|
"display_name": stage_id,
|
|
"owner_agent": "openclaw",
|
|
"status": "ready",
|
|
"completion_percent": 100,
|
|
"evidence_refs": ["docs/LOGBOOK.md"],
|
|
"next_action": "keep readback visible",
|
|
"blocked_runtime_actions": ["host_write"],
|
|
}
|
|
|
|
|
|
def _asset(asset_id: str) -> dict:
|
|
return {
|
|
"asset_id": asset_id,
|
|
"display_name": asset_id,
|
|
"required_on_real_incident": True,
|
|
"live_write_enabled": False,
|
|
"required_fields": ["incident_id"],
|
|
}
|