feat(governance): surface adr100 slo states
This commit is contained in:
@@ -20,6 +20,7 @@ from __future__ import annotations
|
|||||||
import structlog
|
import structlog
|
||||||
from fastapi import APIRouter, Query
|
from fastapi import APIRouter, Query
|
||||||
|
|
||||||
|
from src.services.adr100_slo_status_service import get_adr100_slo_status_service
|
||||||
from src.services.ai_slo_calculator import AiSloCalculator
|
from src.services.ai_slo_calculator import AiSloCalculator
|
||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
@@ -50,9 +51,11 @@ async def get_ai_slo(
|
|||||||
if cached:
|
if cached:
|
||||||
data = cached.to_dict()
|
data = cached.to_dict()
|
||||||
data["cache_hit"] = True
|
data["cache_hit"] = True
|
||||||
|
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
report = await calc.run()
|
report = await calc.run()
|
||||||
data = report.to_dict()
|
data = report.to_dict()
|
||||||
data["cache_hit"] = False
|
data["cache_hit"] = False
|
||||||
|
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||||
return data
|
return data
|
||||||
|
|||||||
278
apps/api/src/services/adr100_slo_status_service.py
Normal file
278
apps/api/src/services/adr100_slo_status_service.py
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
"""
|
||||||
|
Read-only ADR-100 SLO status snapshot.
|
||||||
|
|
||||||
|
GovernanceAgent.check_slo_compliance() can emit governance alerts when an SLO is
|
||||||
|
violated. This service is intentionally read-only so dashboards can show the
|
||||||
|
same Prometheus-backed state without producing Telegram/DB side effects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from src.core.config import settings
|
||||||
|
|
||||||
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Adr100SloDefinition:
|
||||||
|
name: str
|
||||||
|
query: str
|
||||||
|
target: float
|
||||||
|
hard_red_line: float
|
||||||
|
direction: str
|
||||||
|
unit: str
|
||||||
|
window: str
|
||||||
|
denominator_query: str | None = None
|
||||||
|
denominator_window_seconds: int = 0
|
||||||
|
minimum_events: float = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = (
|
||||||
|
Adr100SloDefinition(
|
||||||
|
name="autonomy_rate",
|
||||||
|
query="sli:autonomy_rate:5m",
|
||||||
|
target=0.80,
|
||||||
|
hard_red_line=0.70,
|
||||||
|
direction="above",
|
||||||
|
unit="percent",
|
||||||
|
window="5m",
|
||||||
|
denominator_query="sum(rate(automation_operation_log_total[5m]))",
|
||||||
|
denominator_window_seconds=300,
|
||||||
|
),
|
||||||
|
Adr100SloDefinition(
|
||||||
|
name="decision_accuracy",
|
||||||
|
query="sli:decision_accuracy:5m",
|
||||||
|
target=0.90,
|
||||||
|
hard_red_line=0.85,
|
||||||
|
direction="above",
|
||||||
|
unit="percent",
|
||||||
|
window="5m",
|
||||||
|
denominator_query='sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))',
|
||||||
|
denominator_window_seconds=300,
|
||||||
|
),
|
||||||
|
Adr100SloDefinition(
|
||||||
|
name="confidence_calibration",
|
||||||
|
query="sli:confidence_calibration:1h",
|
||||||
|
target=0.80,
|
||||||
|
hard_red_line=0.70,
|
||||||
|
direction="above",
|
||||||
|
unit="percent",
|
||||||
|
window="1h",
|
||||||
|
denominator_query="sum(rate(approval_records_high_confidence_total[1h]))",
|
||||||
|
denominator_window_seconds=3600,
|
||||||
|
),
|
||||||
|
Adr100SloDefinition(
|
||||||
|
name="km_growth_rate",
|
||||||
|
query="max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)",
|
||||||
|
target=20.0,
|
||||||
|
hard_red_line=5.0,
|
||||||
|
direction="above",
|
||||||
|
unit="count",
|
||||||
|
window="24h",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Adr100SloStatusService:
|
||||||
|
"""Fetch ADR-100 SLO status from Prometheus without writing governance events."""
|
||||||
|
|
||||||
|
async def fetch_report(self) -> dict[str, Any]:
|
||||||
|
prom_url = getattr(
|
||||||
|
settings,
|
||||||
|
"PROMETHEUS_URL",
|
||||||
|
"http://prometheus.observability.svc:9090",
|
||||||
|
).rstrip("/")
|
||||||
|
metrics: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||||
|
for definition in ADR100_SLO_DEFINITIONS:
|
||||||
|
metrics.append(await self._fetch_metric(client, prom_url, definition))
|
||||||
|
|
||||||
|
evaluable = [metric for metric in metrics if metric.get("evaluable")]
|
||||||
|
ok_count = sum(1 for metric in evaluable if metric.get("status") == "ok")
|
||||||
|
overall_compliance = (ok_count / len(evaluable)) if evaluable else None
|
||||||
|
overall_status = _overall_status(metrics, evaluable)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"schema_version": "adr100_slo_status_v1",
|
||||||
|
"source": "prometheus",
|
||||||
|
"evaluated_at": datetime.now(UTC).isoformat(),
|
||||||
|
"overall_status": overall_status,
|
||||||
|
"overall_compliance": overall_compliance,
|
||||||
|
"evaluable_count": len(evaluable),
|
||||||
|
"metric_count": len(metrics),
|
||||||
|
"metrics": metrics,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _fetch_metric(
|
||||||
|
self,
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
prom_url: str,
|
||||||
|
definition: Adr100SloDefinition,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
denominator_value: float | None = None
|
||||||
|
sample_count: float | None = None
|
||||||
|
|
||||||
|
if definition.denominator_query:
|
||||||
|
denominator_result = await _query_prometheus_value(
|
||||||
|
client,
|
||||||
|
prom_url,
|
||||||
|
definition.denominator_query,
|
||||||
|
)
|
||||||
|
if denominator_result["status"] != "ok":
|
||||||
|
return _metric_payload(
|
||||||
|
definition,
|
||||||
|
value=None,
|
||||||
|
status="no_data",
|
||||||
|
reason=denominator_result["reason"],
|
||||||
|
denominator_value=None,
|
||||||
|
sample_count=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
denominator_value = float(denominator_result["value"])
|
||||||
|
sample_count = denominator_value * definition.denominator_window_seconds
|
||||||
|
if sample_count < definition.minimum_events:
|
||||||
|
return _metric_payload(
|
||||||
|
definition,
|
||||||
|
value=None,
|
||||||
|
status="skipped_low_volume",
|
||||||
|
reason="denominator_below_minimum_events",
|
||||||
|
denominator_value=denominator_value,
|
||||||
|
sample_count=sample_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
value_result = await _query_prometheus_value(client, prom_url, definition.query)
|
||||||
|
if value_result["status"] != "ok":
|
||||||
|
status = (
|
||||||
|
"skipped_low_volume"
|
||||||
|
if value_result["reason"] == "prometheus_nan_or_inf"
|
||||||
|
else "no_data"
|
||||||
|
)
|
||||||
|
return _metric_payload(
|
||||||
|
definition,
|
||||||
|
value=None,
|
||||||
|
status=status,
|
||||||
|
reason=value_result["reason"],
|
||||||
|
denominator_value=denominator_value,
|
||||||
|
sample_count=sample_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
value = float(value_result["value"])
|
||||||
|
status = _classify_status(value, definition)
|
||||||
|
return _metric_payload(
|
||||||
|
definition,
|
||||||
|
value=value,
|
||||||
|
status=status,
|
||||||
|
reason=None,
|
||||||
|
denominator_value=denominator_value,
|
||||||
|
sample_count=sample_count if sample_count is not None else value,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _query_prometheus_value(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
prom_url: str,
|
||||||
|
query: str,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
response = await client.get(
|
||||||
|
f"{prom_url}/api/v1/query",
|
||||||
|
params={"query": query},
|
||||||
|
)
|
||||||
|
data = response.json()
|
||||||
|
if data.get("status") != "success":
|
||||||
|
return {"status": "error", "reason": "prometheus_query_failed"}
|
||||||
|
|
||||||
|
results = data.get("data", {}).get("result", [])
|
||||||
|
if not results:
|
||||||
|
return {
|
||||||
|
"status": "no_data",
|
||||||
|
"reason": "prometheus_empty_result_metric_not_emitted",
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_value = results[0]["value"][1]
|
||||||
|
value = float(raw_value)
|
||||||
|
if not math.isfinite(value):
|
||||||
|
return {
|
||||||
|
"status": "skipped",
|
||||||
|
"reason": "prometheus_nan_or_inf",
|
||||||
|
"raw_value": raw_value,
|
||||||
|
}
|
||||||
|
return {"status": "ok", "value": value}
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("adr100_slo_prometheus_query_error", query=query, error=str(exc))
|
||||||
|
return {"status": "error", "reason": "prometheus_query_error"}
|
||||||
|
|
||||||
|
|
||||||
|
def _metric_payload(
|
||||||
|
definition: Adr100SloDefinition,
|
||||||
|
*,
|
||||||
|
value: float | None,
|
||||||
|
status: str,
|
||||||
|
reason: str | None,
|
||||||
|
denominator_value: float | None,
|
||||||
|
sample_count: float | None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": definition.name,
|
||||||
|
"query": definition.query,
|
||||||
|
"value": value,
|
||||||
|
"target": definition.target,
|
||||||
|
"hard_red_line": definition.hard_red_line,
|
||||||
|
"direction": definition.direction,
|
||||||
|
"unit": definition.unit,
|
||||||
|
"window": definition.window,
|
||||||
|
"status": status,
|
||||||
|
"evaluable": status in {"ok", "warning", "violated"},
|
||||||
|
"reason": reason,
|
||||||
|
"denominator_query": definition.denominator_query,
|
||||||
|
"denominator_value": denominator_value,
|
||||||
|
"sample_count": sample_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_status(value: float, definition: Adr100SloDefinition) -> str:
|
||||||
|
if definition.direction == "above":
|
||||||
|
if value < definition.hard_red_line:
|
||||||
|
return "violated"
|
||||||
|
if value < definition.target:
|
||||||
|
return "warning"
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
if value > definition.hard_red_line:
|
||||||
|
return "violated"
|
||||||
|
if value > definition.target:
|
||||||
|
return "warning"
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
|
||||||
|
def _overall_status(metrics: list[dict[str, Any]], evaluable: list[dict[str, Any]]) -> str:
|
||||||
|
if any(metric.get("status") == "violated" for metric in metrics):
|
||||||
|
return "violated"
|
||||||
|
if any(metric.get("status") == "warning" for metric in metrics):
|
||||||
|
return "warning"
|
||||||
|
if evaluable and any(metric.get("status") == "skipped_low_volume" for metric in metrics):
|
||||||
|
return "partial"
|
||||||
|
if evaluable:
|
||||||
|
return "ok"
|
||||||
|
if any(metric.get("status") == "no_data" for metric in metrics):
|
||||||
|
return "no_data"
|
||||||
|
return "skipped_low_volume"
|
||||||
|
|
||||||
|
|
||||||
|
_adr100_slo_status_service: Adr100SloStatusService | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_adr100_slo_status_service() -> Adr100SloStatusService:
|
||||||
|
global _adr100_slo_status_service
|
||||||
|
if _adr100_slo_status_service is None:
|
||||||
|
_adr100_slo_status_service = Adr100SloStatusService()
|
||||||
|
return _adr100_slo_status_service
|
||||||
89
apps/api/tests/test_adr100_slo_status_service.py
Normal file
89
apps/api/tests/test_adr100_slo_status_service.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.services.adr100_slo_status_service import Adr100SloStatusService
|
||||||
|
|
||||||
|
|
||||||
|
class _FakePrometheusResponse:
|
||||||
|
def __init__(self, payload: dict[str, Any]) -> None:
|
||||||
|
self._payload = payload
|
||||||
|
|
||||||
|
def json(self) -> dict[str, Any]:
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
|
||||||
|
class _FakePrometheusClient:
|
||||||
|
def __init__(self, values: dict[str, str]) -> None:
|
||||||
|
self.values = values
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc, tb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def get(self, *args, **kwargs): # noqa: ANN002, ANN003
|
||||||
|
query = str(kwargs.get("params", {}).get("query", ""))
|
||||||
|
value = self.values.get(query)
|
||||||
|
if value is None:
|
||||||
|
return _FakePrometheusResponse({
|
||||||
|
"status": "success",
|
||||||
|
"data": {"result": []},
|
||||||
|
})
|
||||||
|
return _FakePrometheusResponse({
|
||||||
|
"status": "success",
|
||||||
|
"data": {"result": [{"value": [1778756604, value]}]},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch):
|
||||||
|
values = {
|
||||||
|
"sum(rate(automation_operation_log_total[5m]))": "0",
|
||||||
|
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||||
|
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||||
|
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"httpx.AsyncClient",
|
||||||
|
lambda *args, **kwargs: _FakePrometheusClient(values),
|
||||||
|
)
|
||||||
|
|
||||||
|
report = await Adr100SloStatusService().fetch_report()
|
||||||
|
|
||||||
|
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
||||||
|
assert by_name["decision_accuracy"]["status"] == "skipped_low_volume"
|
||||||
|
assert by_name["decision_accuracy"]["evaluable"] is False
|
||||||
|
assert by_name["confidence_calibration"]["status"] == "skipped_low_volume"
|
||||||
|
assert by_name["km_growth_rate"]["status"] == "ok"
|
||||||
|
assert by_name["km_growth_rate"]["value"] == 24
|
||||||
|
assert report["overall_status"] == "partial"
|
||||||
|
assert report["overall_compliance"] == 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch):
|
||||||
|
values = {
|
||||||
|
"sum(rate(automation_operation_log_total[5m]))": "0.02",
|
||||||
|
"sli:autonomy_rate:5m": "0.5",
|
||||||
|
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||||
|
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||||
|
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3",
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"httpx.AsyncClient",
|
||||||
|
lambda *args, **kwargs: _FakePrometheusClient(values),
|
||||||
|
)
|
||||||
|
|
||||||
|
report = await Adr100SloStatusService().fetch_report()
|
||||||
|
|
||||||
|
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
||||||
|
assert by_name["autonomy_rate"]["status"] == "violated"
|
||||||
|
assert by_name["autonomy_rate"]["sample_count"] == 6
|
||||||
|
assert by_name["km_growth_rate"]["status"] == "violated"
|
||||||
|
assert report["overall_status"] == "violated"
|
||||||
@@ -1354,15 +1354,38 @@
|
|||||||
"comingSoon": "This tab is coming soon",
|
"comingSoon": "This tab is coming soon",
|
||||||
"slo": {
|
"slo": {
|
||||||
"kpi": {
|
"kpi": {
|
||||||
|
"autonomy_rate": "Autonomy Rate",
|
||||||
"decision_accuracy": "Decision Accuracy",
|
"decision_accuracy": "Decision Accuracy",
|
||||||
|
"confidence_calibration": "Confidence Calibration",
|
||||||
"km_growth_rate": "KM Growth Rate",
|
"km_growth_rate": "KM Growth Rate",
|
||||||
"mcp_call_diversity": "MCP Call Diversity",
|
"mcp_call_diversity": "MCP Call Diversity",
|
||||||
|
"auto_execute_success_rate": "Auto Execute Success",
|
||||||
|
"human_override_rate": "Human Override Rate",
|
||||||
|
"verifier_false_neg_rate": "Verifier False Negative",
|
||||||
"current": "Current",
|
"current": "Current",
|
||||||
"target": "Target",
|
"target": "Target",
|
||||||
"sparkline": "7-day trend",
|
"sparkline": "7-day trend",
|
||||||
"loading": "Loading...",
|
"loading": "Loading...",
|
||||||
"error": "Failed to load",
|
"error": "Failed to load",
|
||||||
"noData": "No data"
|
"noData": "No data",
|
||||||
|
"sampleCount": "Samples {count}",
|
||||||
|
"window": "Window {window}",
|
||||||
|
"state": {
|
||||||
|
"ok": "OK",
|
||||||
|
"warning": "Below target",
|
||||||
|
"violated": "Hard red line",
|
||||||
|
"skipped_low_volume": "Low sample wait",
|
||||||
|
"no_data": "No data",
|
||||||
|
"error": "Query failed",
|
||||||
|
"partial": "Partially evaluable"
|
||||||
|
},
|
||||||
|
"reason": {
|
||||||
|
"none": "None",
|
||||||
|
"denominator_below_minimum_events": "Denominator events too low",
|
||||||
|
"prometheus_nan_or_inf": "Prometheus has no valid denominator yet",
|
||||||
|
"prometheus_empty_result_metric_not_emitted": "Prometheus has not returned the metric yet",
|
||||||
|
"unknown": "Reason pending"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"chart": {
|
"chart": {
|
||||||
"title": "30-day Violation Timeline",
|
"title": "30-day Violation Timeline",
|
||||||
|
|||||||
@@ -1355,15 +1355,38 @@
|
|||||||
"comingSoon": "本 Tab 即將上線",
|
"comingSoon": "本 Tab 即將上線",
|
||||||
"slo": {
|
"slo": {
|
||||||
"kpi": {
|
"kpi": {
|
||||||
|
"autonomy_rate": "自主化率",
|
||||||
"decision_accuracy": "決策準確率",
|
"decision_accuracy": "決策準確率",
|
||||||
|
"confidence_calibration": "信心校準",
|
||||||
"km_growth_rate": "KM 成長率",
|
"km_growth_rate": "KM 成長率",
|
||||||
"mcp_call_diversity": "MCP 呼叫多樣性",
|
"mcp_call_diversity": "MCP 呼叫多樣性",
|
||||||
|
"auto_execute_success_rate": "自動執行成功率",
|
||||||
|
"human_override_rate": "人工推翻率",
|
||||||
|
"verifier_false_neg_rate": "驗證漏判率",
|
||||||
"current": "當前",
|
"current": "當前",
|
||||||
"target": "目標",
|
"target": "目標",
|
||||||
"sparkline": "7 日趨勢",
|
"sparkline": "7 日趨勢",
|
||||||
"loading": "載入中...",
|
"loading": "載入中...",
|
||||||
"error": "無法載入",
|
"error": "無法載入",
|
||||||
"noData": "暫無資料"
|
"noData": "暫無資料",
|
||||||
|
"sampleCount": "樣本 {count}",
|
||||||
|
"window": "視窗 {window}",
|
||||||
|
"state": {
|
||||||
|
"ok": "正常",
|
||||||
|
"warning": "低於目標",
|
||||||
|
"violated": "硬紅線",
|
||||||
|
"skipped_low_volume": "低樣本等待",
|
||||||
|
"no_data": "沒有資料",
|
||||||
|
"error": "查詢失敗",
|
||||||
|
"partial": "部分可評估"
|
||||||
|
},
|
||||||
|
"reason": {
|
||||||
|
"none": "無",
|
||||||
|
"denominator_below_minimum_events": "分母事件不足",
|
||||||
|
"prometheus_nan_or_inf": "Prometheus 暫無有效分母",
|
||||||
|
"prometheus_empty_result_metric_not_emitted": "Prometheus 尚未回傳指標",
|
||||||
|
"unknown": "原因待查"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"chart": {
|
"chart": {
|
||||||
"title": "30 日違反事件時序",
|
"title": "30 日違反事件時序",
|
||||||
|
|||||||
@@ -31,11 +31,32 @@ const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? ''
|
|||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
interface SloApiResponse {
|
interface SloApiResponse {
|
||||||
metrics?: {
|
metrics?: Array<{
|
||||||
|
name: SloMetric['name']
|
||||||
|
value: number | null
|
||||||
|
threshold: number
|
||||||
|
direction: 'above' | 'below'
|
||||||
|
sample_count: number
|
||||||
|
violated: boolean
|
||||||
|
}> | {
|
||||||
decision_accuracy?: { current: number; target: number; status: string; sparkline?: number[] }
|
decision_accuracy?: { current: number; target: number; status: string; sparkline?: number[] }
|
||||||
km_growth_rate?: { current: number; target: number; status: string; sparkline?: number[] }
|
km_growth_rate?: { current: number; target: number; status: string; sparkline?: number[] }
|
||||||
mcp_call_diversity?: { current: number; target: number; status: string; sparkline?: number[] }
|
mcp_call_diversity?: { current: number; target: number; status: string; sparkline?: number[] }
|
||||||
}
|
}
|
||||||
|
adr100?: {
|
||||||
|
overall_status?: string
|
||||||
|
overall_compliance?: number | null
|
||||||
|
metrics?: Array<{
|
||||||
|
name: SloMetric['name']
|
||||||
|
value: number | null
|
||||||
|
target: number
|
||||||
|
status: 'ok' | 'warning' | 'violated' | 'skipped_low_volume' | 'no_data' | 'error'
|
||||||
|
unit: 'percent' | 'count'
|
||||||
|
sample_count?: number | null
|
||||||
|
window?: string
|
||||||
|
reason?: string | null
|
||||||
|
}>
|
||||||
|
}
|
||||||
overall_compliance?: number
|
overall_compliance?: number
|
||||||
computed_at?: string
|
computed_at?: string
|
||||||
}
|
}
|
||||||
@@ -51,15 +72,55 @@ interface SummaryApiResponse {
|
|||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
function mapStatus(s: string): SloMetric['status'] {
|
function mapStatus(s: string): SloMetric['status'] {
|
||||||
if (s === 'healthy') return 'healthy'
|
if (s === 'healthy' || s === 'ok') return 'healthy'
|
||||||
if (s === 'warning') return 'warning'
|
if (s === 'warning') return 'warning'
|
||||||
|
if (s === 'skipped_low_volume') return 'syncing'
|
||||||
|
if (s === 'no_data') return 'idle'
|
||||||
return 'critical'
|
return 'critical'
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildMetrics(api: SloApiResponse): SloMetric[] {
|
function buildMetrics(api: SloApiResponse): SloMetric[] {
|
||||||
|
const adr100Metrics = api.adr100?.metrics
|
||||||
|
if (adr100Metrics?.length) {
|
||||||
|
const order: SloMetric['name'][] = ['autonomy_rate', 'decision_accuracy', 'confidence_calibration', 'km_growth_rate']
|
||||||
|
const byName = new Map(adr100Metrics.map(metric => [metric.name, metric]))
|
||||||
|
const built: SloMetric[] = []
|
||||||
|
order.forEach(name => {
|
||||||
|
const entry = byName.get(name)
|
||||||
|
if (!entry) return
|
||||||
|
built.push({
|
||||||
|
name,
|
||||||
|
current: entry.value ?? null,
|
||||||
|
target: entry.target,
|
||||||
|
status: mapStatus(entry.status),
|
||||||
|
state: entry.status,
|
||||||
|
unit: entry.unit === 'count' ? 'count' : '%',
|
||||||
|
sparkline: [],
|
||||||
|
sampleCount: entry.sample_count ?? null,
|
||||||
|
window: entry.window,
|
||||||
|
reason: entry.reason ?? null,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
return built
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(api.metrics)) {
|
||||||
|
return api.metrics.map(entry => ({
|
||||||
|
name: entry.name,
|
||||||
|
current: entry.value,
|
||||||
|
target: entry.threshold,
|
||||||
|
status: entry.value == null ? 'syncing' : entry.violated ? 'critical' : 'healthy',
|
||||||
|
state: entry.value == null ? 'skipped_low_volume' : entry.violated ? 'violated' : 'ok',
|
||||||
|
unit: '%',
|
||||||
|
sparkline: [],
|
||||||
|
sampleCount: entry.sample_count,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
const m = api.metrics ?? {}
|
const m = api.metrics ?? {}
|
||||||
const names: Array<SloMetric['name']> = ['decision_accuracy', 'km_growth_rate', 'mcp_call_diversity']
|
if (Array.isArray(m)) return []
|
||||||
return names.map(name => {
|
const names: Array<'decision_accuracy' | 'km_growth_rate' | 'mcp_call_diversity'> = ['decision_accuracy', 'km_growth_rate', 'mcp_call_diversity']
|
||||||
|
return names.map((name): SloMetric => {
|
||||||
const entry = m[name]
|
const entry = m[name]
|
||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
@@ -111,7 +172,7 @@ export function SloTab() {
|
|||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
const metrics = sloData ? buildMetrics(sloData) : []
|
const metrics = sloData ? buildMetrics(sloData) : []
|
||||||
const compliance = sloData?.overall_compliance ?? null
|
const compliance = sloData?.adr100?.overall_compliance ?? sloData?.overall_compliance ?? null
|
||||||
|
|
||||||
const chartData: ViolationDataPoint[] = summaryData?.data ?? []
|
const chartData: ViolationDataPoint[] = summaryData?.data ?? []
|
||||||
const eventTypes: string[] = summaryData?.event_types ?? []
|
const eventTypes: string[] = summaryData?.event_types ?? []
|
||||||
@@ -169,7 +230,7 @@ export function SloTab() {
|
|||||||
className="slo-kpi-grid"
|
className="slo-kpi-grid"
|
||||||
>
|
>
|
||||||
{sloLoading
|
{sloLoading
|
||||||
? [0, 1, 2].map(i => <SloKpiCard key={i} metric={{ name: 'decision_accuracy', current: null, target: 0.9, status: 'warning' }} loading />)
|
? [0, 1, 2, 3].map(i => <SloKpiCard key={i} metric={{ name: 'decision_accuracy', current: null, target: 0.9, status: 'warning' }} loading />)
|
||||||
: metrics.map(m => <SloKpiCard key={m.name} metric={m} />)
|
: metrics.map(m => <SloKpiCard key={m.name} metric={m} />)
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -24,12 +24,24 @@ import { useTranslations } from 'next-intl'
|
|||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
export interface SloMetric {
|
export interface SloMetric {
|
||||||
name: 'decision_accuracy' | 'km_growth_rate' | 'mcp_call_diversity'
|
name:
|
||||||
|
| 'autonomy_rate'
|
||||||
|
| 'decision_accuracy'
|
||||||
|
| 'confidence_calibration'
|
||||||
|
| 'km_growth_rate'
|
||||||
|
| 'mcp_call_diversity'
|
||||||
|
| 'auto_execute_success_rate'
|
||||||
|
| 'human_override_rate'
|
||||||
|
| 'verifier_false_neg_rate'
|
||||||
current: number | null
|
current: number | null
|
||||||
target: number
|
target: number
|
||||||
status: 'healthy' | 'warning' | 'critical'
|
status: 'healthy' | 'warning' | 'critical' | 'idle' | 'syncing'
|
||||||
unit?: string
|
state?: 'ok' | 'warning' | 'violated' | 'skipped_low_volume' | 'no_data' | 'error' | 'partial'
|
||||||
|
unit?: '%' | 'count'
|
||||||
sparkline?: number[] // 7 points, most recent last
|
sparkline?: number[] // 7 points, most recent last
|
||||||
|
sampleCount?: number | null
|
||||||
|
window?: string
|
||||||
|
reason?: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
interface SloKpiCardProps {
|
interface SloKpiCardProps {
|
||||||
@@ -45,6 +57,22 @@ const statusColor: Record<SloMetric['status'], string> = {
|
|||||||
healthy: '#22C55E',
|
healthy: '#22C55E',
|
||||||
warning: '#F59E0B',
|
warning: '#F59E0B',
|
||||||
critical: '#FF3300',
|
critical: '#FF3300',
|
||||||
|
idle: '#87867f',
|
||||||
|
syncing: '#3B82F6',
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatCompactNumber(value: number): string {
|
||||||
|
if (value >= 100) return value.toFixed(0)
|
||||||
|
if (value >= 10) return value.toFixed(1)
|
||||||
|
return value.toFixed(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
function reasonKey(reason?: string | null): string {
|
||||||
|
if (!reason) return 'none'
|
||||||
|
if (reason === 'denominator_below_minimum_events') return 'denominator_below_minimum_events'
|
||||||
|
if (reason === 'prometheus_nan_or_inf') return 'prometheus_nan_or_inf'
|
||||||
|
if (reason === 'prometheus_empty_result_metric_not_emitted') return 'prometheus_empty_result_metric_not_emitted'
|
||||||
|
return 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -73,21 +101,24 @@ export function SloKpiCard({ metric, loading = false }: SloKpiCardProps) {
|
|||||||
if (loading) return <KpiSkeleton />
|
if (loading) return <KpiSkeleton />
|
||||||
|
|
||||||
const color = statusColor[metric.status]
|
const color = statusColor[metric.status]
|
||||||
const orbStatus: StatusType = metric.status === 'healthy' ? 'healthy'
|
const orbStatus: StatusType = metric.status
|
||||||
: metric.status === 'warning' ? 'warning'
|
|
||||||
: 'critical'
|
|
||||||
|
|
||||||
const formattedValue = metric.current == null
|
const formattedValue = metric.current == null
|
||||||
? '--'
|
? '--'
|
||||||
: metric.unit === '%'
|
: metric.unit === '%'
|
||||||
? `${(metric.current * 100).toFixed(1)}%`
|
? `${(metric.current * 100).toFixed(1)}%`
|
||||||
: metric.current.toFixed(2)
|
: metric.current.toFixed(0)
|
||||||
|
|
||||||
const formattedTarget = metric.unit === '%'
|
const formattedTarget = metric.unit === '%'
|
||||||
? `${(metric.target * 100).toFixed(0)}%`
|
? `${(metric.target * 100).toFixed(0)}%`
|
||||||
: metric.target.toFixed(2)
|
: metric.target.toFixed(0)
|
||||||
|
|
||||||
const sparkData = (metric.sparkline ?? Array(7).fill(0)).map((v, i) => ({ i, v }))
|
const sparkData = (metric.sparkline ?? Array(7).fill(0)).map((v, i) => ({ i, v }))
|
||||||
|
const stateLabel = metric.state ? t(`state.${metric.state}`) : ''
|
||||||
|
const reasonLabel = metric.reason ? t(`reason.${reasonKey(metric.reason)}`) : null
|
||||||
|
const sampleLabel = metric.sampleCount == null
|
||||||
|
? null
|
||||||
|
: t('sampleCount', { count: formatCompactNumber(metric.sampleCount) })
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<GlassCard variant="elevated" padding="md" className="min-w-0 flex-1">
|
<GlassCard variant="elevated" padding="md" className="min-w-0 flex-1">
|
||||||
@@ -114,35 +145,46 @@ export function SloKpiCard({ metric, loading = false }: SloKpiCardProps) {
|
|||||||
color,
|
color,
|
||||||
lineHeight: 1,
|
lineHeight: 1,
|
||||||
marginBottom: 4,
|
marginBottom: 4,
|
||||||
letterSpacing: '-0.5px',
|
letterSpacing: 0,
|
||||||
}}>
|
}}>
|
||||||
{formattedValue}
|
{formattedValue}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Target + sparkline row */}
|
<div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
|
||||||
<div style={{ display: 'flex', alignItems: 'flex-end', justifyContent: 'space-between' }}>
|
{/* Target + sparkline row */}
|
||||||
<span style={{
|
<div style={{ display: 'flex', alignItems: 'flex-end', justifyContent: 'space-between', gap: 8 }}>
|
||||||
fontFamily: "'DM Mono', monospace",
|
<span style={{
|
||||||
fontSize: 10,
|
fontFamily: "'DM Mono', monospace",
|
||||||
color: '#87867f',
|
fontSize: 10,
|
||||||
}}>
|
color: '#87867f',
|
||||||
{t('target')} {formattedTarget}
|
}}>
|
||||||
</span>
|
{t('target')} {formattedTarget}
|
||||||
|
</span>
|
||||||
|
|
||||||
{/* Sparkline 80×24px */}
|
{/* Sparkline 80×24px */}
|
||||||
<div style={{ width: 80, height: 24 }} aria-label={t('sparkline')}>
|
<div style={{ width: 80, height: 24, flexShrink: 0 }} aria-label={t('sparkline')}>
|
||||||
<ResponsiveContainer width="100%" height="100%">
|
<ResponsiveContainer width="100%" height="100%">
|
||||||
<LineChart data={sparkData} margin={{ top: 2, right: 0, bottom: 2, left: 0 }}>
|
<LineChart data={sparkData} margin={{ top: 2, right: 0, bottom: 2, left: 0 }}>
|
||||||
<Line
|
<Line
|
||||||
type="monotone"
|
type="monotone"
|
||||||
dataKey="v"
|
dataKey="v"
|
||||||
stroke={color}
|
stroke={color}
|
||||||
strokeWidth={1.5}
|
strokeWidth={1.5}
|
||||||
dot={false}
|
dot={false}
|
||||||
isAnimationActive={false}
|
isAnimationActive={false}
|
||||||
/>
|
/>
|
||||||
</LineChart>
|
</LineChart>
|
||||||
</ResponsiveContainer>
|
</ResponsiveContainer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ display: 'flex', flexDirection: 'column', gap: 3, minHeight: 28 }}>
|
||||||
|
<span style={{ fontFamily: "'DM Mono', monospace", fontSize: 10, color }}>
|
||||||
|
{stateLabel}
|
||||||
|
</span>
|
||||||
|
<span style={{ fontFamily: "'DM Mono', monospace", fontSize: 9, color: '#87867f', lineHeight: 1.35 }}>
|
||||||
|
{reasonLabel ?? sampleLabel ?? (metric.window ? t('window', { window: metric.window }) : '')}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</GlassCard>
|
</GlassCard>
|
||||||
|
|||||||
Reference in New Issue
Block a user