Files
awoooi/apps/api/tests/test_adr100_slo_status_service.py
Your Name 7f3722c7f7
Some checks failed
CD Pipeline / tests (push) Successful in 1m22s
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / build-and-deploy (push) Successful in 4m10s
CD Pipeline / post-deploy-checks (push) Has been cancelled
fix(ai): improve docker repair verification signals
2026-06-01 19:27:36 +08:00

319 lines
11 KiB
Python

from __future__ import annotations
from typing import Any
import pytest
from src.services.adr100_slo_status_service import (
Adr100SloStatusService,
_build_verification_coverage_payload,
)
QUALITY_SUMMARY_LATENCY_QUERY = (
'max(awooop_truth_chain_quality_summary_last_duration_seconds{'
'project_id="awoooi",limit="8",success="true"})'
)
class _FakePrometheusResponse:
def __init__(self, payload: dict[str, Any]) -> None:
self._payload = payload
def json(self) -> dict[str, Any]:
return self._payload
class _FakePrometheusClient:
def __init__(self, values: dict[str, str]) -> None:
self.values = values
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc, tb):
return False
async def get(self, *args, **kwargs): # noqa: ANN002, ANN003
query = str(kwargs.get("params", {}).get("query", ""))
value = self.values.get(query)
if value is None:
return _FakePrometheusResponse({
"status": "success",
"data": {"result": []},
})
return _FakePrometheusResponse({
"status": "success",
"data": {"result": [{"value": [1778756604, value]}]},
})
async def _low_volume_coverage(self): # noqa: ANN001
return {
"schema_version": "adr100_verification_coverage_v1",
"source": "postgresql",
"window": "24h",
"status": "skipped_low_volume",
"reason": "no_auto_repair_executions_24h",
"evaluable": False,
"total_auto": 0,
"successful_auto": 0,
"verified_auto": 0,
"verified_success": 0,
"verified_non_success": 0,
"unverified_auto": 0,
"coverage_rate": None,
"verification_success_rate": None,
"last_auto_at": None,
"last_verified_auto_at": None,
"last_verification_evidence_at": None,
"latest_auto_age_seconds": None,
"last_verified_auto_age_seconds": None,
"recent_unverified": [],
"recent_non_success": [],
"non_success_breakdown": {
"by_verification_result": [],
"by_failure_class": [],
},
"remediation_queue": {
"schema_version": "adr100_remediation_queue_v1",
"source": "recent_non_success_read_model",
"total": 0,
"ready_for_ai": 0,
"needs_human": 0,
"items": [],
"by_status": [],
"by_action": [],
},
}
@pytest.mark.asyncio
async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch):
values = {
"sum(rate(automation_operation_log_total[5m]))": "0",
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
QUALITY_SUMMARY_LATENCY_QUERY: "1.2",
}
monkeypatch.setattr(
"httpx.AsyncClient",
lambda *args, **kwargs: _FakePrometheusClient(values),
)
monkeypatch.setattr(
Adr100SloStatusService,
"_fetch_verification_coverage",
_low_volume_coverage,
)
report = await Adr100SloStatusService().fetch_report()
by_name = {metric["name"]: metric for metric in report["metrics"]}
assert by_name["decision_accuracy"]["status"] == "skipped_low_volume"
assert by_name["decision_accuracy"]["evaluable"] is False
assert by_name["confidence_calibration"]["status"] == "skipped_low_volume"
assert by_name["km_growth_rate"]["status"] == "ok"
assert by_name["km_growth_rate"]["value"] == 24
assert by_name["truth_chain_quality_summary_latency"]["status"] == "ok"
assert by_name["truth_chain_quality_summary_latency"]["direction"] == "below"
assert report["overall_status"] == "partial"
assert report["overall_compliance"] == 1.0
@pytest.mark.asyncio
async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch):
values = {
"sum(rate(automation_operation_log_total[5m]))": "0.02",
"sli:autonomy_rate:5m": "0.5",
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3",
QUALITY_SUMMARY_LATENCY_QUERY: "1.2",
}
monkeypatch.setattr(
"httpx.AsyncClient",
lambda *args, **kwargs: _FakePrometheusClient(values),
)
monkeypatch.setattr(
Adr100SloStatusService,
"_fetch_verification_coverage",
_low_volume_coverage,
)
report = await Adr100SloStatusService().fetch_report()
by_name = {metric["name"]: metric for metric in report["metrics"]}
assert by_name["autonomy_rate"]["status"] == "violated"
assert by_name["autonomy_rate"]["sample_count"] == 6
assert by_name["km_growth_rate"]["status"] == "violated"
assert report["overall_status"] == "violated"
@pytest.mark.asyncio
async def test_fetch_report_classifies_below_direction_slo(monkeypatch):
values = {
"sum(rate(automation_operation_log_total[5m]))": "0",
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
QUALITY_SUMMARY_LATENCY_QUERY: "9.5",
}
monkeypatch.setattr(
"httpx.AsyncClient",
lambda *args, **kwargs: _FakePrometheusClient(values),
)
monkeypatch.setattr(
Adr100SloStatusService,
"_fetch_verification_coverage",
_low_volume_coverage,
)
report = await Adr100SloStatusService().fetch_report()
by_name = {metric["name"]: metric for metric in report["metrics"]}
latency = by_name["truth_chain_quality_summary_latency"]
assert latency["status"] == "violated"
assert latency["direction"] == "below"
assert latency["value"] == 9.5
assert report["overall_status"] == "violated"
def test_verification_coverage_payload_flags_backlog():
payload = _build_verification_coverage_payload(
{
"total_auto": 7,
"successful_auto": 6,
"verified_auto": 5,
"verified_success": 4,
"verified_non_success": 1,
"unverified_auto": 2,
"last_auto_at": None,
"last_verified_auto_at": None,
"last_verification_evidence_at": None,
"latest_auto_age_seconds": 90,
"last_verified_auto_age_seconds": 120,
},
[
{
"id": "are-1",
"incident_id": "INC-1",
"success": True,
"created_at": None,
},
],
[
{
"auto_repair_id": "are-2",
"incident_id": "INC-2",
"incident_status": "INVESTIGATING",
"incident_severity": "P2",
"alert_category": "infrastructure",
"alertname": "DockerContainerMemoryLimitPressure",
"auto_success": False,
"playbook_id": "PB-1",
"playbook_name": "Docker pressure playbook",
"triggered_by": "auto_repair",
"risk_level": "LOW",
"verification_result": "degraded",
"auto_error": "FAILED: Unsupported scheme: 'ssh {host}'",
"post_state_text": '{"k8s_get_pod_logs": {"error": "empty_pod_name"}}',
"evidence_summary": "result=degraded",
"auto_created_at": None,
"verification_collected_at": None,
},
],
)
assert payload["status"] == "warning"
assert payload["reason"] == "verification_backlog_present"
assert payload["coverage_rate"] == pytest.approx(5 / 7)
assert payload["verification_success_rate"] == pytest.approx(4 / 5)
assert payload["recent_unverified"][0]["incident_id"] == "INC-1"
assert payload["recent_non_success"][0]["failure_class"] == "unsupported_action_scheme"
assert payload["recent_non_success"][0]["next_step"] == "normalize_playbook_executor"
assert payload["recent_non_success"][0]["remediation_status"] == "ready_for_replay"
assert payload["recent_non_success"][0]["remediation_action"] == (
"replay_with_supported_executor"
)
assert payload["non_success_breakdown"]["by_failure_class"] == [
{"name": "unsupported_action_scheme", "count": 1},
]
assert payload["non_success_breakdown"]["by_verification_result"] == [
{"name": "degraded", "count": 1},
]
assert payload["non_success_breakdown"]["by_remediation_status"] == [
{"name": "ready_for_replay", "count": 1},
]
assert payload["remediation_queue"]["total"] == 1
assert payload["remediation_queue"]["ready_for_ai"] == 1
assert payload["remediation_queue"]["needs_human"] == 0
assert payload["remediation_queue"]["items"][0]["work_item_id"] == (
"verification:INC-2:are-2"
)
def test_verification_coverage_payload_skips_when_no_auto_repair():
payload = _build_verification_coverage_payload(
{
"total_auto": 0,
"successful_auto": 0,
"verified_auto": 0,
"verified_success": 0,
"verified_non_success": 0,
"unverified_auto": 0,
},
[],
[],
)
assert payload["status"] == "skipped_low_volume"
assert payload["reason"] == "no_auto_repair_executions_24h"
assert payload["evaluable"] is False
assert payload["remediation_queue"]["total"] == 0
def test_verification_coverage_payload_routes_observe_only_playbook_to_ticket():
payload = _build_verification_coverage_payload(
{
"total_auto": 1,
"successful_auto": 1,
"verified_auto": 1,
"verified_success": 0,
"verified_non_success": 1,
"unverified_auto": 0,
},
[],
[
{
"auto_repair_id": "are-3",
"incident_id": "INC-3",
"incident_status": "INVESTIGATING",
"incident_severity": "P2",
"alert_category": "infrastructure",
"alertname": "DockerContainerMemoryLimitPressure",
"auto_success": True,
"playbook_id": "PB-3",
"playbook_name": "Docker pressure diagnostic playbook",
"triggered_by": "auto_repair",
"risk_level": "LOW",
"verification_result": "degraded",
"auto_error": "",
"post_state_text": '{"ssh_diagnose": {"command": "docker stats --no-stream api"}}',
"evidence_summary": "SUCCESS: mcp:ssh_diagnose",
"auto_created_at": None,
"verification_collected_at": None,
},
],
)
item = payload["recent_non_success"][0]
assert item["failure_class"] == "observe_only_playbook"
assert item["next_step"] == "author_mutating_repair_step"
assert item["remediation_status"] == "needs_playbook_ticket"
assert item["remediation_action"] == "promote_diagnostic_to_repair_playbook"
assert payload["remediation_queue"]["needs_human"] == 1