319 lines
11 KiB
Python
319 lines
11 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from src.services.adr100_slo_status_service import (
|
|
Adr100SloStatusService,
|
|
_build_verification_coverage_payload,
|
|
)
|
|
|
|
|
|
QUALITY_SUMMARY_LATENCY_QUERY = (
|
|
'max(awooop_truth_chain_quality_summary_last_duration_seconds{'
|
|
'project_id="awoooi",limit="8",success="true"})'
|
|
)
|
|
|
|
|
|
class _FakePrometheusResponse:
|
|
def __init__(self, payload: dict[str, Any]) -> None:
|
|
self._payload = payload
|
|
|
|
def json(self) -> dict[str, Any]:
|
|
return self._payload
|
|
|
|
|
|
class _FakePrometheusClient:
|
|
def __init__(self, values: dict[str, str]) -> None:
|
|
self.values = values
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
async def get(self, *args, **kwargs): # noqa: ANN002, ANN003
|
|
query = str(kwargs.get("params", {}).get("query", ""))
|
|
value = self.values.get(query)
|
|
if value is None:
|
|
return _FakePrometheusResponse({
|
|
"status": "success",
|
|
"data": {"result": []},
|
|
})
|
|
return _FakePrometheusResponse({
|
|
"status": "success",
|
|
"data": {"result": [{"value": [1778756604, value]}]},
|
|
})
|
|
|
|
|
|
async def _low_volume_coverage(self): # noqa: ANN001
|
|
return {
|
|
"schema_version": "adr100_verification_coverage_v1",
|
|
"source": "postgresql",
|
|
"window": "24h",
|
|
"status": "skipped_low_volume",
|
|
"reason": "no_auto_repair_executions_24h",
|
|
"evaluable": False,
|
|
"total_auto": 0,
|
|
"successful_auto": 0,
|
|
"verified_auto": 0,
|
|
"verified_success": 0,
|
|
"verified_non_success": 0,
|
|
"unverified_auto": 0,
|
|
"coverage_rate": None,
|
|
"verification_success_rate": None,
|
|
"last_auto_at": None,
|
|
"last_verified_auto_at": None,
|
|
"last_verification_evidence_at": None,
|
|
"latest_auto_age_seconds": None,
|
|
"last_verified_auto_age_seconds": None,
|
|
"recent_unverified": [],
|
|
"recent_non_success": [],
|
|
"non_success_breakdown": {
|
|
"by_verification_result": [],
|
|
"by_failure_class": [],
|
|
},
|
|
"remediation_queue": {
|
|
"schema_version": "adr100_remediation_queue_v1",
|
|
"source": "recent_non_success_read_model",
|
|
"total": 0,
|
|
"ready_for_ai": 0,
|
|
"needs_human": 0,
|
|
"items": [],
|
|
"by_status": [],
|
|
"by_action": [],
|
|
},
|
|
}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch):
|
|
values = {
|
|
"sum(rate(automation_operation_log_total[5m]))": "0",
|
|
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
|
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
|
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
|
|
QUALITY_SUMMARY_LATENCY_QUERY: "1.2",
|
|
}
|
|
|
|
monkeypatch.setattr(
|
|
"httpx.AsyncClient",
|
|
lambda *args, **kwargs: _FakePrometheusClient(values),
|
|
)
|
|
monkeypatch.setattr(
|
|
Adr100SloStatusService,
|
|
"_fetch_verification_coverage",
|
|
_low_volume_coverage,
|
|
)
|
|
|
|
report = await Adr100SloStatusService().fetch_report()
|
|
|
|
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
|
assert by_name["decision_accuracy"]["status"] == "skipped_low_volume"
|
|
assert by_name["decision_accuracy"]["evaluable"] is False
|
|
assert by_name["confidence_calibration"]["status"] == "skipped_low_volume"
|
|
assert by_name["km_growth_rate"]["status"] == "ok"
|
|
assert by_name["km_growth_rate"]["value"] == 24
|
|
assert by_name["truth_chain_quality_summary_latency"]["status"] == "ok"
|
|
assert by_name["truth_chain_quality_summary_latency"]["direction"] == "below"
|
|
assert report["overall_status"] == "partial"
|
|
assert report["overall_compliance"] == 1.0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch):
|
|
values = {
|
|
"sum(rate(automation_operation_log_total[5m]))": "0.02",
|
|
"sli:autonomy_rate:5m": "0.5",
|
|
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
|
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
|
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3",
|
|
QUALITY_SUMMARY_LATENCY_QUERY: "1.2",
|
|
}
|
|
|
|
monkeypatch.setattr(
|
|
"httpx.AsyncClient",
|
|
lambda *args, **kwargs: _FakePrometheusClient(values),
|
|
)
|
|
monkeypatch.setattr(
|
|
Adr100SloStatusService,
|
|
"_fetch_verification_coverage",
|
|
_low_volume_coverage,
|
|
)
|
|
|
|
report = await Adr100SloStatusService().fetch_report()
|
|
|
|
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
|
assert by_name["autonomy_rate"]["status"] == "violated"
|
|
assert by_name["autonomy_rate"]["sample_count"] == 6
|
|
assert by_name["km_growth_rate"]["status"] == "violated"
|
|
assert report["overall_status"] == "violated"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_report_classifies_below_direction_slo(monkeypatch):
|
|
values = {
|
|
"sum(rate(automation_operation_log_total[5m]))": "0",
|
|
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
|
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
|
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
|
|
QUALITY_SUMMARY_LATENCY_QUERY: "9.5",
|
|
}
|
|
|
|
monkeypatch.setattr(
|
|
"httpx.AsyncClient",
|
|
lambda *args, **kwargs: _FakePrometheusClient(values),
|
|
)
|
|
monkeypatch.setattr(
|
|
Adr100SloStatusService,
|
|
"_fetch_verification_coverage",
|
|
_low_volume_coverage,
|
|
)
|
|
|
|
report = await Adr100SloStatusService().fetch_report()
|
|
|
|
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
|
latency = by_name["truth_chain_quality_summary_latency"]
|
|
assert latency["status"] == "violated"
|
|
assert latency["direction"] == "below"
|
|
assert latency["value"] == 9.5
|
|
assert report["overall_status"] == "violated"
|
|
|
|
|
|
def test_verification_coverage_payload_flags_backlog():
|
|
payload = _build_verification_coverage_payload(
|
|
{
|
|
"total_auto": 7,
|
|
"successful_auto": 6,
|
|
"verified_auto": 5,
|
|
"verified_success": 4,
|
|
"verified_non_success": 1,
|
|
"unverified_auto": 2,
|
|
"last_auto_at": None,
|
|
"last_verified_auto_at": None,
|
|
"last_verification_evidence_at": None,
|
|
"latest_auto_age_seconds": 90,
|
|
"last_verified_auto_age_seconds": 120,
|
|
},
|
|
[
|
|
{
|
|
"id": "are-1",
|
|
"incident_id": "INC-1",
|
|
"success": True,
|
|
"created_at": None,
|
|
},
|
|
],
|
|
[
|
|
{
|
|
"auto_repair_id": "are-2",
|
|
"incident_id": "INC-2",
|
|
"incident_status": "INVESTIGATING",
|
|
"incident_severity": "P2",
|
|
"alert_category": "infrastructure",
|
|
"alertname": "DockerContainerMemoryLimitPressure",
|
|
"auto_success": False,
|
|
"playbook_id": "PB-1",
|
|
"playbook_name": "Docker pressure playbook",
|
|
"triggered_by": "auto_repair",
|
|
"risk_level": "LOW",
|
|
"verification_result": "degraded",
|
|
"auto_error": "FAILED: Unsupported scheme: 'ssh {host}'",
|
|
"post_state_text": '{"k8s_get_pod_logs": {"error": "empty_pod_name"}}',
|
|
"evidence_summary": "result=degraded",
|
|
"auto_created_at": None,
|
|
"verification_collected_at": None,
|
|
},
|
|
],
|
|
)
|
|
|
|
assert payload["status"] == "warning"
|
|
assert payload["reason"] == "verification_backlog_present"
|
|
assert payload["coverage_rate"] == pytest.approx(5 / 7)
|
|
assert payload["verification_success_rate"] == pytest.approx(4 / 5)
|
|
assert payload["recent_unverified"][0]["incident_id"] == "INC-1"
|
|
assert payload["recent_non_success"][0]["failure_class"] == "unsupported_action_scheme"
|
|
assert payload["recent_non_success"][0]["next_step"] == "normalize_playbook_executor"
|
|
assert payload["recent_non_success"][0]["remediation_status"] == "ready_for_replay"
|
|
assert payload["recent_non_success"][0]["remediation_action"] == (
|
|
"replay_with_supported_executor"
|
|
)
|
|
assert payload["non_success_breakdown"]["by_failure_class"] == [
|
|
{"name": "unsupported_action_scheme", "count": 1},
|
|
]
|
|
assert payload["non_success_breakdown"]["by_verification_result"] == [
|
|
{"name": "degraded", "count": 1},
|
|
]
|
|
assert payload["non_success_breakdown"]["by_remediation_status"] == [
|
|
{"name": "ready_for_replay", "count": 1},
|
|
]
|
|
assert payload["remediation_queue"]["total"] == 1
|
|
assert payload["remediation_queue"]["ready_for_ai"] == 1
|
|
assert payload["remediation_queue"]["needs_human"] == 0
|
|
assert payload["remediation_queue"]["items"][0]["work_item_id"] == (
|
|
"verification:INC-2:are-2"
|
|
)
|
|
|
|
|
|
def test_verification_coverage_payload_skips_when_no_auto_repair():
|
|
payload = _build_verification_coverage_payload(
|
|
{
|
|
"total_auto": 0,
|
|
"successful_auto": 0,
|
|
"verified_auto": 0,
|
|
"verified_success": 0,
|
|
"verified_non_success": 0,
|
|
"unverified_auto": 0,
|
|
},
|
|
[],
|
|
[],
|
|
)
|
|
|
|
assert payload["status"] == "skipped_low_volume"
|
|
assert payload["reason"] == "no_auto_repair_executions_24h"
|
|
assert payload["evaluable"] is False
|
|
assert payload["remediation_queue"]["total"] == 0
|
|
|
|
|
|
def test_verification_coverage_payload_routes_observe_only_playbook_to_ticket():
|
|
payload = _build_verification_coverage_payload(
|
|
{
|
|
"total_auto": 1,
|
|
"successful_auto": 1,
|
|
"verified_auto": 1,
|
|
"verified_success": 0,
|
|
"verified_non_success": 1,
|
|
"unverified_auto": 0,
|
|
},
|
|
[],
|
|
[
|
|
{
|
|
"auto_repair_id": "are-3",
|
|
"incident_id": "INC-3",
|
|
"incident_status": "INVESTIGATING",
|
|
"incident_severity": "P2",
|
|
"alert_category": "infrastructure",
|
|
"alertname": "DockerContainerMemoryLimitPressure",
|
|
"auto_success": True,
|
|
"playbook_id": "PB-3",
|
|
"playbook_name": "Docker pressure diagnostic playbook",
|
|
"triggered_by": "auto_repair",
|
|
"risk_level": "LOW",
|
|
"verification_result": "degraded",
|
|
"auto_error": "",
|
|
"post_state_text": '{"ssh_diagnose": {"command": "docker stats --no-stream api"}}',
|
|
"evidence_summary": "SUCCESS: mcp:ssh_diagnose",
|
|
"auto_created_at": None,
|
|
"verification_collected_at": None,
|
|
},
|
|
],
|
|
)
|
|
|
|
item = payload["recent_non_success"][0]
|
|
assert item["failure_class"] == "observe_only_playbook"
|
|
assert item["next_step"] == "author_mutating_repair_step"
|
|
assert item["remediation_status"] == "needs_playbook_ticket"
|
|
assert item["remediation_action"] == "promote_diagnostic_to_repair_playbook"
|
|
assert payload["remediation_queue"]["needs_human"] == 1
|