feat(governance): surface adr100 slo states
This commit is contained in:
@@ -20,6 +20,7 @@ from __future__ import annotations
|
||||
import structlog
|
||||
from fastapi import APIRouter, Query
|
||||
|
||||
from src.services.adr100_slo_status_service import get_adr100_slo_status_service
|
||||
from src.services.ai_slo_calculator import AiSloCalculator
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -50,9 +51,11 @@ async def get_ai_slo(
|
||||
if cached:
|
||||
data = cached.to_dict()
|
||||
data["cache_hit"] = True
|
||||
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||
return data
|
||||
|
||||
report = await calc.run()
|
||||
data = report.to_dict()
|
||||
data["cache_hit"] = False
|
||||
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||
return data
|
||||
|
||||
278
apps/api/src/services/adr100_slo_status_service.py
Normal file
278
apps/api/src/services/adr100_slo_status_service.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""
|
||||
Read-only ADR-100 SLO status snapshot.
|
||||
|
||||
GovernanceAgent.check_slo_compliance() can emit governance alerts when an SLO is
|
||||
violated. This service is intentionally read-only so dashboards can show the
|
||||
same Prometheus-backed state without producing Telegram/DB side effects.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Adr100SloDefinition:
|
||||
name: str
|
||||
query: str
|
||||
target: float
|
||||
hard_red_line: float
|
||||
direction: str
|
||||
unit: str
|
||||
window: str
|
||||
denominator_query: str | None = None
|
||||
denominator_window_seconds: int = 0
|
||||
minimum_events: float = 1.0
|
||||
|
||||
|
||||
ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = (
|
||||
Adr100SloDefinition(
|
||||
name="autonomy_rate",
|
||||
query="sli:autonomy_rate:5m",
|
||||
target=0.80,
|
||||
hard_red_line=0.70,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="5m",
|
||||
denominator_query="sum(rate(automation_operation_log_total[5m]))",
|
||||
denominator_window_seconds=300,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="decision_accuracy",
|
||||
query="sli:decision_accuracy:5m",
|
||||
target=0.90,
|
||||
hard_red_line=0.85,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="5m",
|
||||
denominator_query='sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))',
|
||||
denominator_window_seconds=300,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="confidence_calibration",
|
||||
query="sli:confidence_calibration:1h",
|
||||
target=0.80,
|
||||
hard_red_line=0.70,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="1h",
|
||||
denominator_query="sum(rate(approval_records_high_confidence_total[1h]))",
|
||||
denominator_window_seconds=3600,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="km_growth_rate",
|
||||
query="max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)",
|
||||
target=20.0,
|
||||
hard_red_line=5.0,
|
||||
direction="above",
|
||||
unit="count",
|
||||
window="24h",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Adr100SloStatusService:
|
||||
"""Fetch ADR-100 SLO status from Prometheus without writing governance events."""
|
||||
|
||||
async def fetch_report(self) -> dict[str, Any]:
|
||||
prom_url = getattr(
|
||||
settings,
|
||||
"PROMETHEUS_URL",
|
||||
"http://prometheus.observability.svc:9090",
|
||||
).rstrip("/")
|
||||
metrics: list[dict[str, Any]] = []
|
||||
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
for definition in ADR100_SLO_DEFINITIONS:
|
||||
metrics.append(await self._fetch_metric(client, prom_url, definition))
|
||||
|
||||
evaluable = [metric for metric in metrics if metric.get("evaluable")]
|
||||
ok_count = sum(1 for metric in evaluable if metric.get("status") == "ok")
|
||||
overall_compliance = (ok_count / len(evaluable)) if evaluable else None
|
||||
overall_status = _overall_status(metrics, evaluable)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_slo_status_v1",
|
||||
"source": "prometheus",
|
||||
"evaluated_at": datetime.now(UTC).isoformat(),
|
||||
"overall_status": overall_status,
|
||||
"overall_compliance": overall_compliance,
|
||||
"evaluable_count": len(evaluable),
|
||||
"metric_count": len(metrics),
|
||||
"metrics": metrics,
|
||||
}
|
||||
|
||||
async def _fetch_metric(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
prom_url: str,
|
||||
definition: Adr100SloDefinition,
|
||||
) -> dict[str, Any]:
|
||||
denominator_value: float | None = None
|
||||
sample_count: float | None = None
|
||||
|
||||
if definition.denominator_query:
|
||||
denominator_result = await _query_prometheus_value(
|
||||
client,
|
||||
prom_url,
|
||||
definition.denominator_query,
|
||||
)
|
||||
if denominator_result["status"] != "ok":
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status="no_data",
|
||||
reason=denominator_result["reason"],
|
||||
denominator_value=None,
|
||||
sample_count=None,
|
||||
)
|
||||
|
||||
denominator_value = float(denominator_result["value"])
|
||||
sample_count = denominator_value * definition.denominator_window_seconds
|
||||
if sample_count < definition.minimum_events:
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status="skipped_low_volume",
|
||||
reason="denominator_below_minimum_events",
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count,
|
||||
)
|
||||
|
||||
value_result = await _query_prometheus_value(client, prom_url, definition.query)
|
||||
if value_result["status"] != "ok":
|
||||
status = (
|
||||
"skipped_low_volume"
|
||||
if value_result["reason"] == "prometheus_nan_or_inf"
|
||||
else "no_data"
|
||||
)
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status=status,
|
||||
reason=value_result["reason"],
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count,
|
||||
)
|
||||
|
||||
value = float(value_result["value"])
|
||||
status = _classify_status(value, definition)
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=value,
|
||||
status=status,
|
||||
reason=None,
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count if sample_count is not None else value,
|
||||
)
|
||||
|
||||
|
||||
async def _query_prometheus_value(
|
||||
client: httpx.AsyncClient,
|
||||
prom_url: str,
|
||||
query: str,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
response = await client.get(
|
||||
f"{prom_url}/api/v1/query",
|
||||
params={"query": query},
|
||||
)
|
||||
data = response.json()
|
||||
if data.get("status") != "success":
|
||||
return {"status": "error", "reason": "prometheus_query_failed"}
|
||||
|
||||
results = data.get("data", {}).get("result", [])
|
||||
if not results:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"reason": "prometheus_empty_result_metric_not_emitted",
|
||||
}
|
||||
|
||||
raw_value = results[0]["value"][1]
|
||||
value = float(raw_value)
|
||||
if not math.isfinite(value):
|
||||
return {
|
||||
"status": "skipped",
|
||||
"reason": "prometheus_nan_or_inf",
|
||||
"raw_value": raw_value,
|
||||
}
|
||||
return {"status": "ok", "value": value}
|
||||
except Exception as exc:
|
||||
logger.warning("adr100_slo_prometheus_query_error", query=query, error=str(exc))
|
||||
return {"status": "error", "reason": "prometheus_query_error"}
|
||||
|
||||
|
||||
def _metric_payload(
|
||||
definition: Adr100SloDefinition,
|
||||
*,
|
||||
value: float | None,
|
||||
status: str,
|
||||
reason: str | None,
|
||||
denominator_value: float | None,
|
||||
sample_count: float | None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"name": definition.name,
|
||||
"query": definition.query,
|
||||
"value": value,
|
||||
"target": definition.target,
|
||||
"hard_red_line": definition.hard_red_line,
|
||||
"direction": definition.direction,
|
||||
"unit": definition.unit,
|
||||
"window": definition.window,
|
||||
"status": status,
|
||||
"evaluable": status in {"ok", "warning", "violated"},
|
||||
"reason": reason,
|
||||
"denominator_query": definition.denominator_query,
|
||||
"denominator_value": denominator_value,
|
||||
"sample_count": sample_count,
|
||||
}
|
||||
|
||||
|
||||
def _classify_status(value: float, definition: Adr100SloDefinition) -> str:
|
||||
if definition.direction == "above":
|
||||
if value < definition.hard_red_line:
|
||||
return "violated"
|
||||
if value < definition.target:
|
||||
return "warning"
|
||||
return "ok"
|
||||
|
||||
if value > definition.hard_red_line:
|
||||
return "violated"
|
||||
if value > definition.target:
|
||||
return "warning"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _overall_status(metrics: list[dict[str, Any]], evaluable: list[dict[str, Any]]) -> str:
|
||||
if any(metric.get("status") == "violated" for metric in metrics):
|
||||
return "violated"
|
||||
if any(metric.get("status") == "warning" for metric in metrics):
|
||||
return "warning"
|
||||
if evaluable and any(metric.get("status") == "skipped_low_volume" for metric in metrics):
|
||||
return "partial"
|
||||
if evaluable:
|
||||
return "ok"
|
||||
if any(metric.get("status") == "no_data" for metric in metrics):
|
||||
return "no_data"
|
||||
return "skipped_low_volume"
|
||||
|
||||
|
||||
_adr100_slo_status_service: Adr100SloStatusService | None = None
|
||||
|
||||
|
||||
def get_adr100_slo_status_service() -> Adr100SloStatusService:
|
||||
global _adr100_slo_status_service
|
||||
if _adr100_slo_status_service is None:
|
||||
_adr100_slo_status_service = Adr100SloStatusService()
|
||||
return _adr100_slo_status_service
|
||||
89
apps/api/tests/test_adr100_slo_status_service.py
Normal file
89
apps/api/tests/test_adr100_slo_status_service.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from src.services.adr100_slo_status_service import Adr100SloStatusService
|
||||
|
||||
|
||||
class _FakePrometheusResponse:
|
||||
def __init__(self, payload: dict[str, Any]) -> None:
|
||||
self._payload = payload
|
||||
|
||||
def json(self) -> dict[str, Any]:
|
||||
return self._payload
|
||||
|
||||
|
||||
class _FakePrometheusClient:
|
||||
def __init__(self, values: dict[str, str]) -> None:
|
||||
self.values = values
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
async def get(self, *args, **kwargs): # noqa: ANN002, ANN003
|
||||
query = str(kwargs.get("params", {}).get("query", ""))
|
||||
value = self.values.get(query)
|
||||
if value is None:
|
||||
return _FakePrometheusResponse({
|
||||
"status": "success",
|
||||
"data": {"result": []},
|
||||
})
|
||||
return _FakePrometheusResponse({
|
||||
"status": "success",
|
||||
"data": {"result": [{"value": [1778756604, value]}]},
|
||||
})
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch):
|
||||
values = {
|
||||
"sum(rate(automation_operation_log_total[5m]))": "0",
|
||||
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"httpx.AsyncClient",
|
||||
lambda *args, **kwargs: _FakePrometheusClient(values),
|
||||
)
|
||||
|
||||
report = await Adr100SloStatusService().fetch_report()
|
||||
|
||||
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
||||
assert by_name["decision_accuracy"]["status"] == "skipped_low_volume"
|
||||
assert by_name["decision_accuracy"]["evaluable"] is False
|
||||
assert by_name["confidence_calibration"]["status"] == "skipped_low_volume"
|
||||
assert by_name["km_growth_rate"]["status"] == "ok"
|
||||
assert by_name["km_growth_rate"]["value"] == 24
|
||||
assert report["overall_status"] == "partial"
|
||||
assert report["overall_compliance"] == 1.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch):
|
||||
values = {
|
||||
"sum(rate(automation_operation_log_total[5m]))": "0.02",
|
||||
"sli:autonomy_rate:5m": "0.5",
|
||||
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"httpx.AsyncClient",
|
||||
lambda *args, **kwargs: _FakePrometheusClient(values),
|
||||
)
|
||||
|
||||
report = await Adr100SloStatusService().fetch_report()
|
||||
|
||||
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
||||
assert by_name["autonomy_rate"]["status"] == "violated"
|
||||
assert by_name["autonomy_rate"]["sample_count"] == 6
|
||||
assert by_name["km_growth_rate"]["status"] == "violated"
|
||||
assert report["overall_status"] == "violated"
|
||||
@@ -1354,15 +1354,38 @@
|
||||
"comingSoon": "This tab is coming soon",
|
||||
"slo": {
|
||||
"kpi": {
|
||||
"autonomy_rate": "Autonomy Rate",
|
||||
"decision_accuracy": "Decision Accuracy",
|
||||
"confidence_calibration": "Confidence Calibration",
|
||||
"km_growth_rate": "KM Growth Rate",
|
||||
"mcp_call_diversity": "MCP Call Diversity",
|
||||
"auto_execute_success_rate": "Auto Execute Success",
|
||||
"human_override_rate": "Human Override Rate",
|
||||
"verifier_false_neg_rate": "Verifier False Negative",
|
||||
"current": "Current",
|
||||
"target": "Target",
|
||||
"sparkline": "7-day trend",
|
||||
"loading": "Loading...",
|
||||
"error": "Failed to load",
|
||||
"noData": "No data"
|
||||
"noData": "No data",
|
||||
"sampleCount": "Samples {count}",
|
||||
"window": "Window {window}",
|
||||
"state": {
|
||||
"ok": "OK",
|
||||
"warning": "Below target",
|
||||
"violated": "Hard red line",
|
||||
"skipped_low_volume": "Low sample wait",
|
||||
"no_data": "No data",
|
||||
"error": "Query failed",
|
||||
"partial": "Partially evaluable"
|
||||
},
|
||||
"reason": {
|
||||
"none": "None",
|
||||
"denominator_below_minimum_events": "Denominator events too low",
|
||||
"prometheus_nan_or_inf": "Prometheus has no valid denominator yet",
|
||||
"prometheus_empty_result_metric_not_emitted": "Prometheus has not returned the metric yet",
|
||||
"unknown": "Reason pending"
|
||||
}
|
||||
},
|
||||
"chart": {
|
||||
"title": "30-day Violation Timeline",
|
||||
|
||||
@@ -1355,15 +1355,38 @@
|
||||
"comingSoon": "本 Tab 即將上線",
|
||||
"slo": {
|
||||
"kpi": {
|
||||
"autonomy_rate": "自主化率",
|
||||
"decision_accuracy": "決策準確率",
|
||||
"confidence_calibration": "信心校準",
|
||||
"km_growth_rate": "KM 成長率",
|
||||
"mcp_call_diversity": "MCP 呼叫多樣性",
|
||||
"auto_execute_success_rate": "自動執行成功率",
|
||||
"human_override_rate": "人工推翻率",
|
||||
"verifier_false_neg_rate": "驗證漏判率",
|
||||
"current": "當前",
|
||||
"target": "目標",
|
||||
"sparkline": "7 日趨勢",
|
||||
"loading": "載入中...",
|
||||
"error": "無法載入",
|
||||
"noData": "暫無資料"
|
||||
"noData": "暫無資料",
|
||||
"sampleCount": "樣本 {count}",
|
||||
"window": "視窗 {window}",
|
||||
"state": {
|
||||
"ok": "正常",
|
||||
"warning": "低於目標",
|
||||
"violated": "硬紅線",
|
||||
"skipped_low_volume": "低樣本等待",
|
||||
"no_data": "沒有資料",
|
||||
"error": "查詢失敗",
|
||||
"partial": "部分可評估"
|
||||
},
|
||||
"reason": {
|
||||
"none": "無",
|
||||
"denominator_below_minimum_events": "分母事件不足",
|
||||
"prometheus_nan_or_inf": "Prometheus 暫無有效分母",
|
||||
"prometheus_empty_result_metric_not_emitted": "Prometheus 尚未回傳指標",
|
||||
"unknown": "原因待查"
|
||||
}
|
||||
},
|
||||
"chart": {
|
||||
"title": "30 日違反事件時序",
|
||||
|
||||
@@ -31,11 +31,32 @@ const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? ''
|
||||
// =============================================================================
|
||||
|
||||
interface SloApiResponse {
|
||||
metrics?: {
|
||||
metrics?: Array<{
|
||||
name: SloMetric['name']
|
||||
value: number | null
|
||||
threshold: number
|
||||
direction: 'above' | 'below'
|
||||
sample_count: number
|
||||
violated: boolean
|
||||
}> | {
|
||||
decision_accuracy?: { current: number; target: number; status: string; sparkline?: number[] }
|
||||
km_growth_rate?: { current: number; target: number; status: string; sparkline?: number[] }
|
||||
mcp_call_diversity?: { current: number; target: number; status: string; sparkline?: number[] }
|
||||
}
|
||||
adr100?: {
|
||||
overall_status?: string
|
||||
overall_compliance?: number | null
|
||||
metrics?: Array<{
|
||||
name: SloMetric['name']
|
||||
value: number | null
|
||||
target: number
|
||||
status: 'ok' | 'warning' | 'violated' | 'skipped_low_volume' | 'no_data' | 'error'
|
||||
unit: 'percent' | 'count'
|
||||
sample_count?: number | null
|
||||
window?: string
|
||||
reason?: string | null
|
||||
}>
|
||||
}
|
||||
overall_compliance?: number
|
||||
computed_at?: string
|
||||
}
|
||||
@@ -51,15 +72,55 @@ interface SummaryApiResponse {
|
||||
// =============================================================================
|
||||
|
||||
function mapStatus(s: string): SloMetric['status'] {
|
||||
if (s === 'healthy') return 'healthy'
|
||||
if (s === 'healthy' || s === 'ok') return 'healthy'
|
||||
if (s === 'warning') return 'warning'
|
||||
if (s === 'skipped_low_volume') return 'syncing'
|
||||
if (s === 'no_data') return 'idle'
|
||||
return 'critical'
|
||||
}
|
||||
|
||||
function buildMetrics(api: SloApiResponse): SloMetric[] {
|
||||
const adr100Metrics = api.adr100?.metrics
|
||||
if (adr100Metrics?.length) {
|
||||
const order: SloMetric['name'][] = ['autonomy_rate', 'decision_accuracy', 'confidence_calibration', 'km_growth_rate']
|
||||
const byName = new Map(adr100Metrics.map(metric => [metric.name, metric]))
|
||||
const built: SloMetric[] = []
|
||||
order.forEach(name => {
|
||||
const entry = byName.get(name)
|
||||
if (!entry) return
|
||||
built.push({
|
||||
name,
|
||||
current: entry.value ?? null,
|
||||
target: entry.target,
|
||||
status: mapStatus(entry.status),
|
||||
state: entry.status,
|
||||
unit: entry.unit === 'count' ? 'count' : '%',
|
||||
sparkline: [],
|
||||
sampleCount: entry.sample_count ?? null,
|
||||
window: entry.window,
|
||||
reason: entry.reason ?? null,
|
||||
})
|
||||
})
|
||||
return built
|
||||
}
|
||||
|
||||
if (Array.isArray(api.metrics)) {
|
||||
return api.metrics.map(entry => ({
|
||||
name: entry.name,
|
||||
current: entry.value,
|
||||
target: entry.threshold,
|
||||
status: entry.value == null ? 'syncing' : entry.violated ? 'critical' : 'healthy',
|
||||
state: entry.value == null ? 'skipped_low_volume' : entry.violated ? 'violated' : 'ok',
|
||||
unit: '%',
|
||||
sparkline: [],
|
||||
sampleCount: entry.sample_count,
|
||||
}))
|
||||
}
|
||||
|
||||
const m = api.metrics ?? {}
|
||||
const names: Array<SloMetric['name']> = ['decision_accuracy', 'km_growth_rate', 'mcp_call_diversity']
|
||||
return names.map(name => {
|
||||
if (Array.isArray(m)) return []
|
||||
const names: Array<'decision_accuracy' | 'km_growth_rate' | 'mcp_call_diversity'> = ['decision_accuracy', 'km_growth_rate', 'mcp_call_diversity']
|
||||
return names.map((name): SloMetric => {
|
||||
const entry = m[name]
|
||||
return {
|
||||
name,
|
||||
@@ -111,7 +172,7 @@ export function SloTab() {
|
||||
}, [])
|
||||
|
||||
const metrics = sloData ? buildMetrics(sloData) : []
|
||||
const compliance = sloData?.overall_compliance ?? null
|
||||
const compliance = sloData?.adr100?.overall_compliance ?? sloData?.overall_compliance ?? null
|
||||
|
||||
const chartData: ViolationDataPoint[] = summaryData?.data ?? []
|
||||
const eventTypes: string[] = summaryData?.event_types ?? []
|
||||
@@ -169,7 +230,7 @@ export function SloTab() {
|
||||
className="slo-kpi-grid"
|
||||
>
|
||||
{sloLoading
|
||||
? [0, 1, 2].map(i => <SloKpiCard key={i} metric={{ name: 'decision_accuracy', current: null, target: 0.9, status: 'warning' }} loading />)
|
||||
? [0, 1, 2, 3].map(i => <SloKpiCard key={i} metric={{ name: 'decision_accuracy', current: null, target: 0.9, status: 'warning' }} loading />)
|
||||
: metrics.map(m => <SloKpiCard key={m.name} metric={m} />)
|
||||
}
|
||||
</div>
|
||||
|
||||
@@ -24,12 +24,24 @@ import { useTranslations } from 'next-intl'
|
||||
// =============================================================================
|
||||
|
||||
export interface SloMetric {
|
||||
name: 'decision_accuracy' | 'km_growth_rate' | 'mcp_call_diversity'
|
||||
name:
|
||||
| 'autonomy_rate'
|
||||
| 'decision_accuracy'
|
||||
| 'confidence_calibration'
|
||||
| 'km_growth_rate'
|
||||
| 'mcp_call_diversity'
|
||||
| 'auto_execute_success_rate'
|
||||
| 'human_override_rate'
|
||||
| 'verifier_false_neg_rate'
|
||||
current: number | null
|
||||
target: number
|
||||
status: 'healthy' | 'warning' | 'critical'
|
||||
unit?: string
|
||||
status: 'healthy' | 'warning' | 'critical' | 'idle' | 'syncing'
|
||||
state?: 'ok' | 'warning' | 'violated' | 'skipped_low_volume' | 'no_data' | 'error' | 'partial'
|
||||
unit?: '%' | 'count'
|
||||
sparkline?: number[] // 7 points, most recent last
|
||||
sampleCount?: number | null
|
||||
window?: string
|
||||
reason?: string | null
|
||||
}
|
||||
|
||||
interface SloKpiCardProps {
|
||||
@@ -45,6 +57,22 @@ const statusColor: Record<SloMetric['status'], string> = {
|
||||
healthy: '#22C55E',
|
||||
warning: '#F59E0B',
|
||||
critical: '#FF3300',
|
||||
idle: '#87867f',
|
||||
syncing: '#3B82F6',
|
||||
}
|
||||
|
||||
function formatCompactNumber(value: number): string {
|
||||
if (value >= 100) return value.toFixed(0)
|
||||
if (value >= 10) return value.toFixed(1)
|
||||
return value.toFixed(2)
|
||||
}
|
||||
|
||||
function reasonKey(reason?: string | null): string {
|
||||
if (!reason) return 'none'
|
||||
if (reason === 'denominator_below_minimum_events') return 'denominator_below_minimum_events'
|
||||
if (reason === 'prometheus_nan_or_inf') return 'prometheus_nan_or_inf'
|
||||
if (reason === 'prometheus_empty_result_metric_not_emitted') return 'prometheus_empty_result_metric_not_emitted'
|
||||
return 'unknown'
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
@@ -73,21 +101,24 @@ export function SloKpiCard({ metric, loading = false }: SloKpiCardProps) {
|
||||
if (loading) return <KpiSkeleton />
|
||||
|
||||
const color = statusColor[metric.status]
|
||||
const orbStatus: StatusType = metric.status === 'healthy' ? 'healthy'
|
||||
: metric.status === 'warning' ? 'warning'
|
||||
: 'critical'
|
||||
const orbStatus: StatusType = metric.status
|
||||
|
||||
const formattedValue = metric.current == null
|
||||
? '--'
|
||||
: metric.unit === '%'
|
||||
? `${(metric.current * 100).toFixed(1)}%`
|
||||
: metric.current.toFixed(2)
|
||||
: metric.current.toFixed(0)
|
||||
|
||||
const formattedTarget = metric.unit === '%'
|
||||
? `${(metric.target * 100).toFixed(0)}%`
|
||||
: metric.target.toFixed(2)
|
||||
: metric.target.toFixed(0)
|
||||
|
||||
const sparkData = (metric.sparkline ?? Array(7).fill(0)).map((v, i) => ({ i, v }))
|
||||
const stateLabel = metric.state ? t(`state.${metric.state}`) : ''
|
||||
const reasonLabel = metric.reason ? t(`reason.${reasonKey(metric.reason)}`) : null
|
||||
const sampleLabel = metric.sampleCount == null
|
||||
? null
|
||||
: t('sampleCount', { count: formatCompactNumber(metric.sampleCount) })
|
||||
|
||||
return (
|
||||
<GlassCard variant="elevated" padding="md" className="min-w-0 flex-1">
|
||||
@@ -114,35 +145,46 @@ export function SloKpiCard({ metric, loading = false }: SloKpiCardProps) {
|
||||
color,
|
||||
lineHeight: 1,
|
||||
marginBottom: 4,
|
||||
letterSpacing: '-0.5px',
|
||||
letterSpacing: 0,
|
||||
}}>
|
||||
{formattedValue}
|
||||
</div>
|
||||
|
||||
{/* Target + sparkline row */}
|
||||
<div style={{ display: 'flex', alignItems: 'flex-end', justifyContent: 'space-between' }}>
|
||||
<span style={{
|
||||
fontFamily: "'DM Mono', monospace",
|
||||
fontSize: 10,
|
||||
color: '#87867f',
|
||||
}}>
|
||||
{t('target')} {formattedTarget}
|
||||
</span>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
|
||||
{/* Target + sparkline row */}
|
||||
<div style={{ display: 'flex', alignItems: 'flex-end', justifyContent: 'space-between', gap: 8 }}>
|
||||
<span style={{
|
||||
fontFamily: "'DM Mono', monospace",
|
||||
fontSize: 10,
|
||||
color: '#87867f',
|
||||
}}>
|
||||
{t('target')} {formattedTarget}
|
||||
</span>
|
||||
|
||||
{/* Sparkline 80×24px */}
|
||||
<div style={{ width: 80, height: 24 }} aria-label={t('sparkline')}>
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<LineChart data={sparkData} margin={{ top: 2, right: 0, bottom: 2, left: 0 }}>
|
||||
<Line
|
||||
type="monotone"
|
||||
dataKey="v"
|
||||
stroke={color}
|
||||
strokeWidth={1.5}
|
||||
dot={false}
|
||||
isAnimationActive={false}
|
||||
/>
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
{/* Sparkline 80×24px */}
|
||||
<div style={{ width: 80, height: 24, flexShrink: 0 }} aria-label={t('sparkline')}>
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<LineChart data={sparkData} margin={{ top: 2, right: 0, bottom: 2, left: 0 }}>
|
||||
<Line
|
||||
type="monotone"
|
||||
dataKey="v"
|
||||
stroke={color}
|
||||
strokeWidth={1.5}
|
||||
dot={false}
|
||||
isAnimationActive={false}
|
||||
/>
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 3, minHeight: 28 }}>
|
||||
<span style={{ fontFamily: "'DM Mono', monospace", fontSize: 10, color }}>
|
||||
{stateLabel}
|
||||
</span>
|
||||
<span style={{ fontFamily: "'DM Mono', monospace", fontSize: 9, color: '#87867f', lineHeight: 1.35 }}>
|
||||
{reasonLabel ?? sampleLabel ?? (metric.window ? t('window', { window: metric.window }) : '')}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</GlassCard>
|
||||
|
||||
Reference in New Issue
Block a user