diff --git a/apps/api/src/api/v1/ai_slo.py b/apps/api/src/api/v1/ai_slo.py index 8ce200c8..5ddf0e1b 100644 --- a/apps/api/src/api/v1/ai_slo.py +++ b/apps/api/src/api/v1/ai_slo.py @@ -20,6 +20,7 @@ from __future__ import annotations import structlog from fastapi import APIRouter, Query +from src.services.adr100_slo_status_service import get_adr100_slo_status_service from src.services.ai_slo_calculator import AiSloCalculator logger = structlog.get_logger(__name__) @@ -50,9 +51,11 @@ async def get_ai_slo( if cached: data = cached.to_dict() data["cache_hit"] = True + data["adr100"] = await get_adr100_slo_status_service().fetch_report() return data report = await calc.run() data = report.to_dict() data["cache_hit"] = False + data["adr100"] = await get_adr100_slo_status_service().fetch_report() return data diff --git a/apps/api/src/services/adr100_slo_status_service.py b/apps/api/src/services/adr100_slo_status_service.py new file mode 100644 index 00000000..37bd56ee --- /dev/null +++ b/apps/api/src/services/adr100_slo_status_service.py @@ -0,0 +1,278 @@ +""" +Read-only ADR-100 SLO status snapshot. + +GovernanceAgent.check_slo_compliance() can emit governance alerts when an SLO is +violated. This service is intentionally read-only so dashboards can show the +same Prometheus-backed state without producing Telegram/DB side effects. +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import Any + +import httpx +import structlog + +from src.core.config import settings + +logger = structlog.get_logger(__name__) + + +@dataclass(frozen=True) +class Adr100SloDefinition: + name: str + query: str + target: float + hard_red_line: float + direction: str + unit: str + window: str + denominator_query: str | None = None + denominator_window_seconds: int = 0 + minimum_events: float = 1.0 + + +ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = ( + Adr100SloDefinition( + name="autonomy_rate", + query="sli:autonomy_rate:5m", + target=0.80, + hard_red_line=0.70, + direction="above", + unit="percent", + window="5m", + denominator_query="sum(rate(automation_operation_log_total[5m]))", + denominator_window_seconds=300, + ), + Adr100SloDefinition( + name="decision_accuracy", + query="sli:decision_accuracy:5m", + target=0.90, + hard_red_line=0.85, + direction="above", + unit="percent", + window="5m", + denominator_query='sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))', + denominator_window_seconds=300, + ), + Adr100SloDefinition( + name="confidence_calibration", + query="sli:confidence_calibration:1h", + target=0.80, + hard_red_line=0.70, + direction="above", + unit="percent", + window="1h", + denominator_query="sum(rate(approval_records_high_confidence_total[1h]))", + denominator_window_seconds=3600, + ), + Adr100SloDefinition( + name="km_growth_rate", + query="max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)", + target=20.0, + hard_red_line=5.0, + direction="above", + unit="count", + window="24h", + ), +) + + +class Adr100SloStatusService: + """Fetch ADR-100 SLO status from Prometheus without writing governance events.""" + + async def fetch_report(self) -> dict[str, Any]: + prom_url = getattr( + settings, + "PROMETHEUS_URL", + "http://prometheus.observability.svc:9090", + ).rstrip("/") + metrics: list[dict[str, Any]] = [] + + async with httpx.AsyncClient(timeout=5.0) as client: + for definition in ADR100_SLO_DEFINITIONS: + metrics.append(await self._fetch_metric(client, prom_url, definition)) + + evaluable = [metric for metric in metrics if metric.get("evaluable")] + ok_count = sum(1 for metric in evaluable if metric.get("status") == "ok") + overall_compliance = (ok_count / len(evaluable)) if evaluable else None + overall_status = _overall_status(metrics, evaluable) + + return { + "schema_version": "adr100_slo_status_v1", + "source": "prometheus", + "evaluated_at": datetime.now(UTC).isoformat(), + "overall_status": overall_status, + "overall_compliance": overall_compliance, + "evaluable_count": len(evaluable), + "metric_count": len(metrics), + "metrics": metrics, + } + + async def _fetch_metric( + self, + client: httpx.AsyncClient, + prom_url: str, + definition: Adr100SloDefinition, + ) -> dict[str, Any]: + denominator_value: float | None = None + sample_count: float | None = None + + if definition.denominator_query: + denominator_result = await _query_prometheus_value( + client, + prom_url, + definition.denominator_query, + ) + if denominator_result["status"] != "ok": + return _metric_payload( + definition, + value=None, + status="no_data", + reason=denominator_result["reason"], + denominator_value=None, + sample_count=None, + ) + + denominator_value = float(denominator_result["value"]) + sample_count = denominator_value * definition.denominator_window_seconds + if sample_count < definition.minimum_events: + return _metric_payload( + definition, + value=None, + status="skipped_low_volume", + reason="denominator_below_minimum_events", + denominator_value=denominator_value, + sample_count=sample_count, + ) + + value_result = await _query_prometheus_value(client, prom_url, definition.query) + if value_result["status"] != "ok": + status = ( + "skipped_low_volume" + if value_result["reason"] == "prometheus_nan_or_inf" + else "no_data" + ) + return _metric_payload( + definition, + value=None, + status=status, + reason=value_result["reason"], + denominator_value=denominator_value, + sample_count=sample_count, + ) + + value = float(value_result["value"]) + status = _classify_status(value, definition) + return _metric_payload( + definition, + value=value, + status=status, + reason=None, + denominator_value=denominator_value, + sample_count=sample_count if sample_count is not None else value, + ) + + +async def _query_prometheus_value( + client: httpx.AsyncClient, + prom_url: str, + query: str, +) -> dict[str, Any]: + try: + response = await client.get( + f"{prom_url}/api/v1/query", + params={"query": query}, + ) + data = response.json() + if data.get("status") != "success": + return {"status": "error", "reason": "prometheus_query_failed"} + + results = data.get("data", {}).get("result", []) + if not results: + return { + "status": "no_data", + "reason": "prometheus_empty_result_metric_not_emitted", + } + + raw_value = results[0]["value"][1] + value = float(raw_value) + if not math.isfinite(value): + return { + "status": "skipped", + "reason": "prometheus_nan_or_inf", + "raw_value": raw_value, + } + return {"status": "ok", "value": value} + except Exception as exc: + logger.warning("adr100_slo_prometheus_query_error", query=query, error=str(exc)) + return {"status": "error", "reason": "prometheus_query_error"} + + +def _metric_payload( + definition: Adr100SloDefinition, + *, + value: float | None, + status: str, + reason: str | None, + denominator_value: float | None, + sample_count: float | None, +) -> dict[str, Any]: + return { + "name": definition.name, + "query": definition.query, + "value": value, + "target": definition.target, + "hard_red_line": definition.hard_red_line, + "direction": definition.direction, + "unit": definition.unit, + "window": definition.window, + "status": status, + "evaluable": status in {"ok", "warning", "violated"}, + "reason": reason, + "denominator_query": definition.denominator_query, + "denominator_value": denominator_value, + "sample_count": sample_count, + } + + +def _classify_status(value: float, definition: Adr100SloDefinition) -> str: + if definition.direction == "above": + if value < definition.hard_red_line: + return "violated" + if value < definition.target: + return "warning" + return "ok" + + if value > definition.hard_red_line: + return "violated" + if value > definition.target: + return "warning" + return "ok" + + +def _overall_status(metrics: list[dict[str, Any]], evaluable: list[dict[str, Any]]) -> str: + if any(metric.get("status") == "violated" for metric in metrics): + return "violated" + if any(metric.get("status") == "warning" for metric in metrics): + return "warning" + if evaluable and any(metric.get("status") == "skipped_low_volume" for metric in metrics): + return "partial" + if evaluable: + return "ok" + if any(metric.get("status") == "no_data" for metric in metrics): + return "no_data" + return "skipped_low_volume" + + +_adr100_slo_status_service: Adr100SloStatusService | None = None + + +def get_adr100_slo_status_service() -> Adr100SloStatusService: + global _adr100_slo_status_service + if _adr100_slo_status_service is None: + _adr100_slo_status_service = Adr100SloStatusService() + return _adr100_slo_status_service diff --git a/apps/api/tests/test_adr100_slo_status_service.py b/apps/api/tests/test_adr100_slo_status_service.py new file mode 100644 index 00000000..3fbd0f3e --- /dev/null +++ b/apps/api/tests/test_adr100_slo_status_service.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +from src.services.adr100_slo_status_service import Adr100SloStatusService + + +class _FakePrometheusResponse: + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + + def json(self) -> dict[str, Any]: + return self._payload + + +class _FakePrometheusClient: + def __init__(self, values: dict[str, str]) -> None: + self.values = values + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def get(self, *args, **kwargs): # noqa: ANN002, ANN003 + query = str(kwargs.get("params", {}).get("query", "")) + value = self.values.get(query) + if value is None: + return _FakePrometheusResponse({ + "status": "success", + "data": {"result": []}, + }) + return _FakePrometheusResponse({ + "status": "success", + "data": {"result": [{"value": [1778756604, value]}]}, + }) + + +@pytest.mark.asyncio +async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch): + values = { + "sum(rate(automation_operation_log_total[5m]))": "0", + 'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0", + "sum(rate(approval_records_high_confidence_total[1h]))": "0", + "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24", + } + + monkeypatch.setattr( + "httpx.AsyncClient", + lambda *args, **kwargs: _FakePrometheusClient(values), + ) + + report = await Adr100SloStatusService().fetch_report() + + by_name = {metric["name"]: metric for metric in report["metrics"]} + assert by_name["decision_accuracy"]["status"] == "skipped_low_volume" + assert by_name["decision_accuracy"]["evaluable"] is False + assert by_name["confidence_calibration"]["status"] == "skipped_low_volume" + assert by_name["km_growth_rate"]["status"] == "ok" + assert by_name["km_growth_rate"]["value"] == 24 + assert report["overall_status"] == "partial" + assert report["overall_compliance"] == 1.0 + + +@pytest.mark.asyncio +async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch): + values = { + "sum(rate(automation_operation_log_total[5m]))": "0.02", + "sli:autonomy_rate:5m": "0.5", + 'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0", + "sum(rate(approval_records_high_confidence_total[1h]))": "0", + "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3", + } + + monkeypatch.setattr( + "httpx.AsyncClient", + lambda *args, **kwargs: _FakePrometheusClient(values), + ) + + report = await Adr100SloStatusService().fetch_report() + + by_name = {metric["name"]: metric for metric in report["metrics"]} + assert by_name["autonomy_rate"]["status"] == "violated" + assert by_name["autonomy_rate"]["sample_count"] == 6 + assert by_name["km_growth_rate"]["status"] == "violated" + assert report["overall_status"] == "violated" diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 94289e64..06e0bdb5 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -1354,15 +1354,38 @@ "comingSoon": "This tab is coming soon", "slo": { "kpi": { + "autonomy_rate": "Autonomy Rate", "decision_accuracy": "Decision Accuracy", + "confidence_calibration": "Confidence Calibration", "km_growth_rate": "KM Growth Rate", "mcp_call_diversity": "MCP Call Diversity", + "auto_execute_success_rate": "Auto Execute Success", + "human_override_rate": "Human Override Rate", + "verifier_false_neg_rate": "Verifier False Negative", "current": "Current", "target": "Target", "sparkline": "7-day trend", "loading": "Loading...", "error": "Failed to load", - "noData": "No data" + "noData": "No data", + "sampleCount": "Samples {count}", + "window": "Window {window}", + "state": { + "ok": "OK", + "warning": "Below target", + "violated": "Hard red line", + "skipped_low_volume": "Low sample wait", + "no_data": "No data", + "error": "Query failed", + "partial": "Partially evaluable" + }, + "reason": { + "none": "None", + "denominator_below_minimum_events": "Denominator events too low", + "prometheus_nan_or_inf": "Prometheus has no valid denominator yet", + "prometheus_empty_result_metric_not_emitted": "Prometheus has not returned the metric yet", + "unknown": "Reason pending" + } }, "chart": { "title": "30-day Violation Timeline", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 8b2a0642..42c0663f 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -1355,15 +1355,38 @@ "comingSoon": "本 Tab 即將上線", "slo": { "kpi": { + "autonomy_rate": "自主化率", "decision_accuracy": "決策準確率", + "confidence_calibration": "信心校準", "km_growth_rate": "KM 成長率", "mcp_call_diversity": "MCP 呼叫多樣性", + "auto_execute_success_rate": "自動執行成功率", + "human_override_rate": "人工推翻率", + "verifier_false_neg_rate": "驗證漏判率", "current": "當前", "target": "目標", "sparkline": "7 日趨勢", "loading": "載入中...", "error": "無法載入", - "noData": "暫無資料" + "noData": "暫無資料", + "sampleCount": "樣本 {count}", + "window": "視窗 {window}", + "state": { + "ok": "正常", + "warning": "低於目標", + "violated": "硬紅線", + "skipped_low_volume": "低樣本等待", + "no_data": "沒有資料", + "error": "查詢失敗", + "partial": "部分可評估" + }, + "reason": { + "none": "無", + "denominator_below_minimum_events": "分母事件不足", + "prometheus_nan_or_inf": "Prometheus 暫無有效分母", + "prometheus_empty_result_metric_not_emitted": "Prometheus 尚未回傳指標", + "unknown": "原因待查" + } }, "chart": { "title": "30 日違反事件時序", diff --git a/apps/web/src/app/[locale]/governance/tabs/slo-tab.tsx b/apps/web/src/app/[locale]/governance/tabs/slo-tab.tsx index 37b6e6c1..ed6cd089 100644 --- a/apps/web/src/app/[locale]/governance/tabs/slo-tab.tsx +++ b/apps/web/src/app/[locale]/governance/tabs/slo-tab.tsx @@ -31,11 +31,32 @@ const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? '' // ============================================================================= interface SloApiResponse { - metrics?: { + metrics?: Array<{ + name: SloMetric['name'] + value: number | null + threshold: number + direction: 'above' | 'below' + sample_count: number + violated: boolean + }> | { decision_accuracy?: { current: number; target: number; status: string; sparkline?: number[] } km_growth_rate?: { current: number; target: number; status: string; sparkline?: number[] } mcp_call_diversity?: { current: number; target: number; status: string; sparkline?: number[] } } + adr100?: { + overall_status?: string + overall_compliance?: number | null + metrics?: Array<{ + name: SloMetric['name'] + value: number | null + target: number + status: 'ok' | 'warning' | 'violated' | 'skipped_low_volume' | 'no_data' | 'error' + unit: 'percent' | 'count' + sample_count?: number | null + window?: string + reason?: string | null + }> + } overall_compliance?: number computed_at?: string } @@ -51,15 +72,55 @@ interface SummaryApiResponse { // ============================================================================= function mapStatus(s: string): SloMetric['status'] { - if (s === 'healthy') return 'healthy' + if (s === 'healthy' || s === 'ok') return 'healthy' if (s === 'warning') return 'warning' + if (s === 'skipped_low_volume') return 'syncing' + if (s === 'no_data') return 'idle' return 'critical' } function buildMetrics(api: SloApiResponse): SloMetric[] { + const adr100Metrics = api.adr100?.metrics + if (adr100Metrics?.length) { + const order: SloMetric['name'][] = ['autonomy_rate', 'decision_accuracy', 'confidence_calibration', 'km_growth_rate'] + const byName = new Map(adr100Metrics.map(metric => [metric.name, metric])) + const built: SloMetric[] = [] + order.forEach(name => { + const entry = byName.get(name) + if (!entry) return + built.push({ + name, + current: entry.value ?? null, + target: entry.target, + status: mapStatus(entry.status), + state: entry.status, + unit: entry.unit === 'count' ? 'count' : '%', + sparkline: [], + sampleCount: entry.sample_count ?? null, + window: entry.window, + reason: entry.reason ?? null, + }) + }) + return built + } + + if (Array.isArray(api.metrics)) { + return api.metrics.map(entry => ({ + name: entry.name, + current: entry.value, + target: entry.threshold, + status: entry.value == null ? 'syncing' : entry.violated ? 'critical' : 'healthy', + state: entry.value == null ? 'skipped_low_volume' : entry.violated ? 'violated' : 'ok', + unit: '%', + sparkline: [], + sampleCount: entry.sample_count, + })) + } + const m = api.metrics ?? {} - const names: Array = ['decision_accuracy', 'km_growth_rate', 'mcp_call_diversity'] - return names.map(name => { + if (Array.isArray(m)) return [] + const names: Array<'decision_accuracy' | 'km_growth_rate' | 'mcp_call_diversity'> = ['decision_accuracy', 'km_growth_rate', 'mcp_call_diversity'] + return names.map((name): SloMetric => { const entry = m[name] return { name, @@ -111,7 +172,7 @@ export function SloTab() { }, []) const metrics = sloData ? buildMetrics(sloData) : [] - const compliance = sloData?.overall_compliance ?? null + const compliance = sloData?.adr100?.overall_compliance ?? sloData?.overall_compliance ?? null const chartData: ViolationDataPoint[] = summaryData?.data ?? [] const eventTypes: string[] = summaryData?.event_types ?? [] @@ -169,7 +230,7 @@ export function SloTab() { className="slo-kpi-grid" > {sloLoading - ? [0, 1, 2].map(i => ) + ? [0, 1, 2, 3].map(i => ) : metrics.map(m => ) } diff --git a/apps/web/src/components/governance/slo-kpi-card.tsx b/apps/web/src/components/governance/slo-kpi-card.tsx index a410394a..a8560436 100644 --- a/apps/web/src/components/governance/slo-kpi-card.tsx +++ b/apps/web/src/components/governance/slo-kpi-card.tsx @@ -24,12 +24,24 @@ import { useTranslations } from 'next-intl' // ============================================================================= export interface SloMetric { - name: 'decision_accuracy' | 'km_growth_rate' | 'mcp_call_diversity' + name: + | 'autonomy_rate' + | 'decision_accuracy' + | 'confidence_calibration' + | 'km_growth_rate' + | 'mcp_call_diversity' + | 'auto_execute_success_rate' + | 'human_override_rate' + | 'verifier_false_neg_rate' current: number | null target: number - status: 'healthy' | 'warning' | 'critical' - unit?: string + status: 'healthy' | 'warning' | 'critical' | 'idle' | 'syncing' + state?: 'ok' | 'warning' | 'violated' | 'skipped_low_volume' | 'no_data' | 'error' | 'partial' + unit?: '%' | 'count' sparkline?: number[] // 7 points, most recent last + sampleCount?: number | null + window?: string + reason?: string | null } interface SloKpiCardProps { @@ -45,6 +57,22 @@ const statusColor: Record = { healthy: '#22C55E', warning: '#F59E0B', critical: '#FF3300', + idle: '#87867f', + syncing: '#3B82F6', +} + +function formatCompactNumber(value: number): string { + if (value >= 100) return value.toFixed(0) + if (value >= 10) return value.toFixed(1) + return value.toFixed(2) +} + +function reasonKey(reason?: string | null): string { + if (!reason) return 'none' + if (reason === 'denominator_below_minimum_events') return 'denominator_below_minimum_events' + if (reason === 'prometheus_nan_or_inf') return 'prometheus_nan_or_inf' + if (reason === 'prometheus_empty_result_metric_not_emitted') return 'prometheus_empty_result_metric_not_emitted' + return 'unknown' } // ============================================================================= @@ -73,21 +101,24 @@ export function SloKpiCard({ metric, loading = false }: SloKpiCardProps) { if (loading) return const color = statusColor[metric.status] - const orbStatus: StatusType = metric.status === 'healthy' ? 'healthy' - : metric.status === 'warning' ? 'warning' - : 'critical' + const orbStatus: StatusType = metric.status const formattedValue = metric.current == null ? '--' : metric.unit === '%' ? `${(metric.current * 100).toFixed(1)}%` - : metric.current.toFixed(2) + : metric.current.toFixed(0) const formattedTarget = metric.unit === '%' ? `${(metric.target * 100).toFixed(0)}%` - : metric.target.toFixed(2) + : metric.target.toFixed(0) const sparkData = (metric.sparkline ?? Array(7).fill(0)).map((v, i) => ({ i, v })) + const stateLabel = metric.state ? t(`state.${metric.state}`) : '' + const reasonLabel = metric.reason ? t(`reason.${reasonKey(metric.reason)}`) : null + const sampleLabel = metric.sampleCount == null + ? null + : t('sampleCount', { count: formatCompactNumber(metric.sampleCount) }) return ( @@ -114,35 +145,46 @@ export function SloKpiCard({ metric, loading = false }: SloKpiCardProps) { color, lineHeight: 1, marginBottom: 4, - letterSpacing: '-0.5px', + letterSpacing: 0, }}> {formattedValue} - {/* Target + sparkline row */} -
- - {t('target')} {formattedTarget} - +
+ {/* Target + sparkline row */} +
+ + {t('target')} {formattedTarget} + - {/* Sparkline 80×24px */} -
- - - - - + {/* Sparkline 80×24px */} +
+ + + + + +
+
+ +
+ + {stateLabel} + + + {reasonLabel ?? sampleLabel ?? (metric.window ? t('window', { window: metric.window }) : '')} +