diff --git a/apps/api/src/api/v1/platform/truth_chain.py b/apps/api/src/api/v1/platform/truth_chain.py index a6be498e..1dc75585 100644 --- a/apps/api/src/api/v1/platform/truth_chain.py +++ b/apps/api/src/api/v1/platform/truth_chain.py @@ -2,6 +2,7 @@ from __future__ import annotations +from time import perf_counter from typing import Any from fastapi import APIRouter, Depends, Query @@ -13,6 +14,7 @@ from src.core.awooop_operator_auth import ( from src.services.awooop_truth_chain_service import ( fetch_automation_quality_summary, fetch_truth_chain, + record_quality_summary_observation, ) router = APIRouter() @@ -33,12 +35,25 @@ async def get_automation_quality_summary( limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"), refresh: bool = Query(False, description="略過短 TTL 快取並重新聚合"), ) -> dict[str, Any]: - summary = await fetch_automation_quality_summary( - project_id=project_id, - hours=hours, - limit=limit, - refresh=refresh, - ) + started_at = perf_counter() + try: + summary = await fetch_automation_quality_summary( + project_id=project_id, + hours=hours, + limit=limit, + refresh=refresh, + ) + except Exception as exc: + record_quality_summary_observation( + project_id=project_id, + hours=hours, + limit=limit, + cache_status="error", + success=False, + duration_seconds=perf_counter() - started_at, + error=exc.__class__.__name__, + ) + raise summary["examples"] = [] summary["visibility_note"] = ( "Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details." diff --git a/apps/api/src/services/adr100_slo_metrics_service.py b/apps/api/src/services/adr100_slo_metrics_service.py index 801a7340..8588408f 100644 --- a/apps/api/src/services/adr100_slo_metrics_service.py +++ b/apps/api/src/services/adr100_slo_metrics_service.py @@ -15,6 +15,7 @@ from time import time from sqlalchemy import text from src.db.base import get_db_context +from src.services.awooop_truth_chain_service import get_quality_summary_observations @dataclass(frozen=True) @@ -30,6 +31,18 @@ class VerificationSample: count: int +@dataclass(frozen=True) +class QualitySummaryObservation: + project_id: str + hours: int + limit: int + cache_status: str + success: bool + duration_seconds: float + observed_at: float + error: str | None = None + + @dataclass(frozen=True) class Adr100SloMetricsSnapshot: automation_operations: list[AutomationOperationSample] = field(default_factory=list) @@ -40,6 +53,7 @@ class Adr100SloMetricsSnapshot: knowledge_entries_created_24h: int = 0 high_confidence_total: int = 0 high_confidence_success_total: int = 0 + quality_summary_observations: list[QualitySummaryObservation] = field(default_factory=list) emitted_at: float = field(default_factory=time) @@ -123,6 +137,23 @@ class Adr100SloMetricsService: high_confidence_success_total=int( confidence_row.high_confidence_success_total or 0 ), + quality_summary_observations=[ + QualitySummaryObservation( + project_id=str(row.get("project_id") or "awoooi"), + hours=int(row.get("hours") or 0), + limit=int(row.get("limit") or 0), + cache_status=str(row.get("cache_status") or "unknown"), + success=bool(row.get("success")), + duration_seconds=float(row.get("duration_seconds") or 0.0), + observed_at=float(row.get("observed_at") or 0.0), + error=( + str(row.get("error")) + if row.get("error") is not None + else None + ), + ) + for row in get_quality_summary_observations() + ], ) @@ -208,8 +239,56 @@ def render_adr100_slo_metrics(snapshot: Adr100SloMetricsSnapshot) -> str: "# HELP adr100_slo_emitter_last_success_timestamp Last successful ADR-100 DB metrics emission timestamp", "# TYPE adr100_slo_emitter_last_success_timestamp gauge", f"adr100_slo_emitter_last_success_timestamp {snapshot.emitted_at:.0f}", - "", ]) + lines.extend([ + "# HELP awooop_truth_chain_quality_summary_last_duration_seconds Last observed AwoooP truth-chain quality summary aggregation duration", + "# TYPE awooop_truth_chain_quality_summary_last_duration_seconds gauge", + ]) + if snapshot.quality_summary_observations: + for observation in snapshot.quality_summary_observations: + labels = _quality_summary_labels(observation) + lines.append( + "awooop_truth_chain_quality_summary_last_duration_seconds" + f"{labels} {observation.duration_seconds:.6f}" + ) + else: + lines.append( + 'awooop_truth_chain_quality_summary_last_duration_seconds{project_id="none",hours="0",limit="0",cache_status="none",success="false"} 0' + ) + + lines.extend([ + "# HELP awooop_truth_chain_quality_summary_last_success Last observed AwoooP truth-chain quality summary success flag", + "# TYPE awooop_truth_chain_quality_summary_last_success gauge", + ]) + if snapshot.quality_summary_observations: + for observation in snapshot.quality_summary_observations: + labels = _quality_summary_labels(observation) + lines.append( + "awooop_truth_chain_quality_summary_last_success" + f"{labels} {1 if observation.success else 0}" + ) + else: + lines.append( + 'awooop_truth_chain_quality_summary_last_success{project_id="none",hours="0",limit="0",cache_status="none",success="false"} 0' + ) + + lines.extend([ + "# HELP awooop_truth_chain_quality_summary_observed_timestamp Last observed AwoooP truth-chain quality summary timestamp", + "# TYPE awooop_truth_chain_quality_summary_observed_timestamp gauge", + ]) + if snapshot.quality_summary_observations: + for observation in snapshot.quality_summary_observations: + labels = _quality_summary_labels(observation) + lines.append( + "awooop_truth_chain_quality_summary_observed_timestamp" + f"{labels} {observation.observed_at:.0f}" + ) + else: + lines.append( + 'awooop_truth_chain_quality_summary_observed_timestamp{project_id="none",hours="0",limit="0",cache_status="none",success="false"} 0' + ) + + lines.append("") return "\n".join(lines) @@ -217,6 +296,18 @@ def _escape_label(value: str) -> str: return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"') +def _quality_summary_labels(observation: QualitySummaryObservation) -> str: + return ( + "{" + f'project_id="{_escape_label(observation.project_id)}",' + f'hours="{observation.hours}",' + f'limit="{observation.limit}",' + f'cache_status="{_escape_label(observation.cache_status)}",' + f'success="{"true" if observation.success else "false"}"' + "}" + ) + + _AUTOMATION_OPERATION_SQL = """ WITH automation_scope AS ( SELECT diff --git a/apps/api/src/services/adr100_slo_status_service.py b/apps/api/src/services/adr100_slo_status_service.py index e5fce3c8..9e63e1cf 100644 --- a/apps/api/src/services/adr100_slo_status_service.py +++ b/apps/api/src/services/adr100_slo_status_service.py @@ -80,6 +80,16 @@ ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = ( unit="count", window="24h", ), + Adr100SloDefinition( + name="truth_chain_quality_summary_latency", + query='max(awooop_truth_chain_quality_summary_last_duration_seconds{project_id="awoooi",limit="8",success="true"})', + target=2.0, + hard_red_line=8.0, + direction="below", + unit="seconds", + window="last_observation", + minimum_events=0.0, + ), ) diff --git a/apps/api/src/services/awooop_truth_chain_service.py b/apps/api/src/services/awooop_truth_chain_service.py index 188ede64..13b89923 100644 --- a/apps/api/src/services/awooop_truth_chain_service.py +++ b/apps/api/src/services/awooop_truth_chain_service.py @@ -13,6 +13,7 @@ import shutil from datetime import UTC, date, datetime, timedelta from decimal import Decimal from pathlib import Path +from time import perf_counter, time from typing import Any from uuid import UUID @@ -37,6 +38,54 @@ _JSON_TEXT_FIELDS = {"gate_result", "source_envelope"} _QUALITY_SUMMARY_CACHE_TTL_SECONDS = int( os.getenv("AWOOOP_QUALITY_SUMMARY_CACHE_TTL_SECONDS", "30") ) +_QUALITY_SUMMARY_OBSERVATIONS: dict[str, dict[str, Any]] = {} + + +def record_quality_summary_observation( + *, + project_id: str, + hours: int, + limit: int, + cache_status: str, + success: bool, + duration_seconds: float, + error: str | None = None, +) -> None: + normalized_project_id = project_id or "awoooi" + normalized_cache_status = cache_status or "unknown" + key = "|".join([ + normalized_project_id, + str(int(hours)), + str(int(limit)), + normalized_cache_status, + "success" if success else "failed", + ]) + _QUALITY_SUMMARY_OBSERVATIONS[key] = { + "project_id": normalized_project_id, + "hours": int(hours), + "limit": int(limit), + "cache_status": normalized_cache_status, + "success": bool(success), + "duration_seconds": max(0.0, float(duration_seconds)), + "observed_at": time(), + "error": str(error)[:160] if error else None, + } + + +def get_quality_summary_observations() -> list[dict[str, Any]]: + return [ + dict(observation) + for observation in sorted( + _QUALITY_SUMMARY_OBSERVATIONS.values(), + key=lambda item: ( + str(item.get("project_id") or ""), + int(item.get("hours") or 0), + int(item.get("limit") or 0), + str(item.get("cache_status") or ""), + bool(item.get("success")), + ), + ) + ] def _clean(value: Any) -> Any: @@ -2079,6 +2128,7 @@ async def fetch_automation_quality_summary( refresh: bool = False, ) -> dict[str, Any]: """Return a recent incident-level quality summary for the automation flywheel.""" + started_at = perf_counter() bounded_hours = max(1, min(int(hours), 168)) bounded_limit = max(1, min(int(limit), 500)) normalized_project_id = project_id or "awoooi" @@ -2094,13 +2144,26 @@ async def fetch_automation_quality_summary( ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS, ) if cached_summary is not None: + duration_seconds = perf_counter() - started_at + record_quality_summary_observation( + project_id=normalized_project_id, + hours=bounded_hours, + limit=bounded_limit, + cache_status="hit", + success=True, + duration_seconds=duration_seconds, + ) logger.info( "awooop_automation_quality_summary_cache_hit", project_id=normalized_project_id, window_hours=bounded_hours, limit=bounded_limit, ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS, + duration_seconds=round(duration_seconds, 3), ) + cached_summary = dict(cached_summary) + cached_summary["cache_status"] = "hit" + cached_summary["aggregation_duration_seconds"] = round(duration_seconds, 3) return cached_summary cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours) @@ -2525,9 +2588,22 @@ async def fetch_automation_quality_summary( cache_status="miss", cache_ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS, ) - return await store_operator_summary_async( + stored_summary = await store_operator_summary_async( "truth_chain_quality_summary", cache_key, summary, ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS, ) + duration_seconds = perf_counter() - started_at + record_quality_summary_observation( + project_id=normalized_project_id, + hours=bounded_hours, + limit=bounded_limit, + cache_status="miss", + success=True, + duration_seconds=duration_seconds, + ) + stored_summary = dict(stored_summary) + stored_summary["cache_status"] = "miss" + stored_summary["aggregation_duration_seconds"] = round(duration_seconds, 3) + return stored_summary diff --git a/apps/api/src/services/governance_agent.py b/apps/api/src/services/governance_agent.py index 5a7d8c23..51979188 100644 --- a/apps/api/src/services/governance_agent.py +++ b/apps/api/src/services/governance_agent.py @@ -5,7 +5,7 @@ 2. knowledge_degradation — KM 7 天未更新 > 20% 總量 → 告警知識衰退 3. llm_hallucination — 近 100 筆 evidence verification_result=failed 比例 > 10% 4. execution_blast_radius — 近 100 筆 auto_repair_executions.success=False 比例 > 15% -5. slo_compliance — 4 個 SLO 合規性檢查(ADR-100),違反時降級飛輪行為 +5. slo_compliance — 5 個 SLO 合規性檢查(ADR-100),違反時降級飛輪行為 所有 check 互相隔離(try/except),任一失敗不阻斷其他項目。 @@ -57,6 +57,36 @@ RECENT_LIMIT = 100 # 最近幾筆做統計 GOVERNANCE_SELF_CHECK_LEASE_KEY = "governance:self_check:cycle_lease" +def _slo_remediation_items(name: str) -> list[str]: + if name == "truth_chain_quality_summary_latency": + return [ + "Check truth-chain quality summary cache miss latency and DB query plan", + "Confirm operator summary cache is warm before treating homepage SLO as degraded", + ] + return [ + "Pause auto-scaling or risky auto-fix tasks", + "Review evidence/decision traces and adjust policy thresholds", + ] + + +def _slo_actionable_items(name: str) -> list[str]: + if name == "truth_chain_quality_summary_latency": + return [ + "Call /api/v1/platform/truth-chain/quality/summary?limit=8&refresh=true and compare duration", + "Inspect /metrics for awooop_truth_chain_quality_summary_last_duration_seconds", + ] + return [ + "Check verifier lag and post-exec learning health", + "Run emergency incident audit on failed approvals", + ] + + +def _slo_next_action(name: str) -> str: + if name == "truth_chain_quality_summary_latency": + return "run_truth_chain_quality_summary_latency_probe" + return "trigger_flywheel_safeguard" + + # ============================================================================= # GovernanceAgent # ============================================================================= @@ -421,7 +451,7 @@ class GovernanceAgent: # ========================================================================= async def check_slo_compliance(self) -> dict[str, Any]: - """SLO 4 項合規性檢查 — 違反時降級飛輪行為 + """SLO 5 項合規性檢查 — 違反時降級飛輪行為 從 Prometheus Recording rules 讀取 SLI 值, 與硬紅線閾值比對,違反時呼叫 _alert() 寫 PG + 推 Telegram。 @@ -430,6 +460,7 @@ class GovernanceAgent: SLO 2 決策準確率: sli:decision_accuracy:5m 硬紅線 < 0.85 SLO 3 信心校準: sli:confidence_calibration:1h 硬紅線 < 0.70 SLO 4 KM 增長率: knowledge_entries_created_24h / sli:km_growth_rate:24h 硬紅線 < 5 + SLO 5 總覽延遲: awooop_truth_chain_quality_summary_last_duration_seconds 硬紅線 > 8s 2026-04-27 P3.4 by Claude — AI SLO(ADR-100) """ @@ -446,13 +477,15 @@ class GovernanceAgent: "decision_accuracy": "sli:decision_accuracy:5m", "confidence_calibration": "sli:confidence_calibration:1h", "km_growth_rate": "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)", + "truth_chain_quality_summary_latency": 'max(awooop_truth_chain_quality_summary_last_duration_seconds{project_id="awoooi",limit="8",success="true"})', } - # 硬紅線:低於此值必須告警(非軟性警告) + # 硬紅線:above 指標低於此值、below 指標高於此值時必須告警(非軟性警告) hard_red_lines: dict[str, float] = { "autonomy_rate": 0.70, "decision_accuracy": 0.85, "confidence_calibration": 0.70, "km_growth_rate": 5.0, + "truth_chain_quality_summary_latency": 8.0, } # SLO 目標值(供日誌記錄) slo_targets: dict[str, float] = { @@ -460,6 +493,14 @@ class GovernanceAgent: "decision_accuracy": 0.90, "confidence_calibration": 0.80, "km_growth_rate": 20.0, + "truth_chain_quality_summary_latency": 2.0, + } + slo_directions: dict[str, str] = { + "autonomy_rate": "above", + "decision_accuracy": "above", + "confidence_calibration": "above", + "km_growth_rate": "above", + "truth_chain_quality_summary_latency": "below", } results: dict[str, Any] = {} @@ -511,7 +552,17 @@ class GovernanceAgent: continue threshold = hard_red_lines[name] target = slo_targets[name] - violated = value < threshold + direction = slo_directions.get(name, "above") + violated = value > threshold if direction == "below" else value < threshold + gap = ( + value - threshold + if violated and direction == "below" + else threshold - value + if violated + else target - value + if direction == "below" + else value - target + ) results[name] = { "name": name, @@ -519,7 +570,8 @@ class GovernanceAgent: "value": round(value, 4), "slo_target": target, "hard_red_line": threshold, - "gap": round(threshold - value, 4) if violated else round(value - target, 4), + "direction": direction, + "gap": round(gap, 4), "violated": violated, } @@ -533,20 +585,15 @@ class GovernanceAgent: "value": round(value, 4), "target": target, "threshold": threshold, - "gap": round(threshold - value, 4), + "direction": direction, + "gap": round(gap, 4), }, "remediation": { - "items": [ - "Pause auto-scaling or risky auto-fix tasks", - "Review evidence/decision traces and adjust policy thresholds", - ], - "next_action": "trigger_flywheel_safeguard", + "items": _slo_remediation_items(name), + "next_action": _slo_next_action(name), }, "actionable": { - "items": [ - "Check verifier lag and post-exec learning health", - "Run emergency incident audit on failed approvals", - ], + "items": _slo_actionable_items(name), }, }, ) @@ -716,7 +763,7 @@ class GovernanceAgent: "actionable": { "items": [ "先確認 /metrics 是否已輸出 ADR-100 底層指標", - "檢查 Prometheus rule 是否已載入 sli:autonomy_rate:5m 等 4 項告警規則", + "檢查 Prometheus rule 與 truth-chain quality summary runtime metric 是否可查詢", ], }, }, diff --git a/apps/api/tests/test_adr100_slo_metrics_service.py b/apps/api/tests/test_adr100_slo_metrics_service.py index 8fbb4eb4..d526d332 100644 --- a/apps/api/tests/test_adr100_slo_metrics_service.py +++ b/apps/api/tests/test_adr100_slo_metrics_service.py @@ -1,6 +1,7 @@ from src.services.adr100_slo_metrics_service import ( Adr100SloMetricsSnapshot, AutomationOperationSample, + QualitySummaryObservation, VerificationSample, render_adr100_slo_metrics, ) @@ -38,6 +39,17 @@ def test_render_adr100_slo_metrics_outputs_required_series() -> None: knowledge_entries_created_24h=25, high_confidence_total=9, high_confidence_success_total=7, + quality_summary_observations=[ + QualitySummaryObservation( + project_id="awoooi", + hours=24, + limit=8, + cache_status="miss", + success=True, + duration_seconds=1.234567, + observed_at=1_778_756_100, + ), + ], emitted_at=1_778_756_000, ) @@ -58,6 +70,18 @@ def test_render_adr100_slo_metrics_outputs_required_series() -> None: assert "approval_records_high_confidence_total 9" in rendered assert "approval_records_high_confidence_success_total 7" in rendered assert "adr100_slo_emitter_last_success_timestamp 1778756000" in rendered + assert ( + 'awooop_truth_chain_quality_summary_last_duration_seconds{project_id="awoooi",' + 'hours="24",limit="8",cache_status="miss",success="true"} 1.234567' + ) in rendered + assert ( + 'awooop_truth_chain_quality_summary_last_success{project_id="awoooi",' + 'hours="24",limit="8",cache_status="miss",success="true"} 1' + ) in rendered + assert ( + 'awooop_truth_chain_quality_summary_observed_timestamp{project_id="awoooi",' + 'hours="24",limit="8",cache_status="miss",success="true"} 1778756100' + ) in rendered def test_render_adr100_slo_metrics_emits_zero_series_when_empty() -> None: @@ -71,6 +95,10 @@ def test_render_adr100_slo_metrics_emits_zero_series_when_empty() -> None: assert 'post_execution_verification_created_24h{outcome="none"} 0' in rendered assert "knowledge_entries_total 0" in rendered assert "knowledge_entries_created_24h 0" in rendered + assert ( + 'awooop_truth_chain_quality_summary_last_duration_seconds{project_id="none",' + 'hours="0",limit="0",cache_status="none",success="false"} 0' + ) in rendered def test_render_adr100_slo_metrics_escapes_labels() -> None: diff --git a/apps/api/tests/test_adr100_slo_status_service.py b/apps/api/tests/test_adr100_slo_status_service.py index 82f783e0..b836163c 100644 --- a/apps/api/tests/test_adr100_slo_status_service.py +++ b/apps/api/tests/test_adr100_slo_status_service.py @@ -10,6 +10,12 @@ from src.services.adr100_slo_status_service import ( ) +QUALITY_SUMMARY_LATENCY_QUERY = ( + 'max(awooop_truth_chain_quality_summary_last_duration_seconds{' + 'project_id="awoooi",limit="8",success="true"})' +) + + class _FakePrometheusResponse: def __init__(self, payload: dict[str, Any]) -> None: self._payload = payload @@ -89,6 +95,7 @@ async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch): 'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0", "sum(rate(approval_records_high_confidence_total[1h]))": "0", "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24", + QUALITY_SUMMARY_LATENCY_QUERY: "1.2", } monkeypatch.setattr( @@ -109,6 +116,8 @@ async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch): assert by_name["confidence_calibration"]["status"] == "skipped_low_volume" assert by_name["km_growth_rate"]["status"] == "ok" assert by_name["km_growth_rate"]["value"] == 24 + assert by_name["truth_chain_quality_summary_latency"]["status"] == "ok" + assert by_name["truth_chain_quality_summary_latency"]["direction"] == "below" assert report["overall_status"] == "partial" assert report["overall_compliance"] == 1.0 @@ -121,6 +130,7 @@ async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch): 'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0", "sum(rate(approval_records_high_confidence_total[1h]))": "0", "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3", + QUALITY_SUMMARY_LATENCY_QUERY: "1.2", } monkeypatch.setattr( @@ -142,6 +152,36 @@ async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch): assert report["overall_status"] == "violated" +@pytest.mark.asyncio +async def test_fetch_report_classifies_below_direction_slo(monkeypatch): + values = { + "sum(rate(automation_operation_log_total[5m]))": "0", + 'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0", + "sum(rate(approval_records_high_confidence_total[1h]))": "0", + "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24", + QUALITY_SUMMARY_LATENCY_QUERY: "9.5", + } + + monkeypatch.setattr( + "httpx.AsyncClient", + lambda *args, **kwargs: _FakePrometheusClient(values), + ) + monkeypatch.setattr( + Adr100SloStatusService, + "_fetch_verification_coverage", + _low_volume_coverage, + ) + + report = await Adr100SloStatusService().fetch_report() + + by_name = {metric["name"]: metric for metric in report["metrics"]} + latency = by_name["truth_chain_quality_summary_latency"] + assert latency["status"] == "violated" + assert latency["direction"] == "below" + assert latency["value"] == 9.5 + assert report["overall_status"] == "violated" + + def test_verification_coverage_payload_flags_backlog(): payload = _build_verification_coverage_payload( { diff --git a/apps/api/tests/test_governance_agent.py b/apps/api/tests/test_governance_agent.py index 2815d08a..b7837f16 100644 --- a/apps/api/tests/test_governance_agent.py +++ b/apps/api/tests/test_governance_agent.py @@ -797,6 +797,12 @@ class TestRunSelfCheckGlobalFailureAlert: assert "governance_self_failure" not in calls +QUALITY_SUMMARY_LATENCY_QUERY = ( + 'max(awooop_truth_chain_quality_summary_last_duration_seconds{' + 'project_id="awoooi",limit="8",success="true"})' +) + + class _FakePrometheusResponse: def __init__(self, value: str) -> None: self._value = value @@ -809,7 +815,7 @@ class _FakePrometheusResponse: class _FakePrometheusClient: - def __init__(self, value: str) -> None: + def __init__(self, value: str | dict[str, str]) -> None: self._value = value self.queries: list[str] = [] @@ -820,7 +826,10 @@ class _FakePrometheusClient: return False async def get(self, *args, **kwargs): # noqa: ANN002, ANN003 - self.queries.append(str(kwargs.get("params", {}).get("query", ""))) + query = str(kwargs.get("params", {}).get("query", "")) + self.queries.append(query) + if isinstance(self._value, dict): + return _FakePrometheusResponse(self._value.get(query, "NaN")) return _FakePrometheusResponse(self._value) @@ -838,6 +847,7 @@ class TestCheckSloCompliance: "decision_accuracy", "confidence_calibration", "km_growth_rate", + "truth_chain_quality_summary_latency", ): assert result[name]["status"] == "skipped" assert result[name]["reason"] == "prometheus_nan_or_inf" @@ -847,7 +857,13 @@ class TestCheckSloCompliance: async def test_km_growth_prefers_db_derived_24h_gauge(self): """KM SLO 要優先使用 DB 24h gauge,避免新 counter 暖機時誤報 0.""" agent = _make_agent() - client = _FakePrometheusClient("25") + client = _FakePrometheusClient({ + "sli:autonomy_rate:5m": "0.95", + "sli:decision_accuracy:5m": "0.96", + "sli:confidence_calibration:1h": "0.97", + "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "25", + QUALITY_SUMMARY_LATENCY_QUERY: "1.1", + }) with patch("httpx.AsyncClient", return_value=client): result = await agent.check_slo_compliance() @@ -855,3 +871,4 @@ class TestCheckSloCompliance: assert "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)" in client.queries assert result["km_growth_rate"]["status"] == "ok" assert result["km_growth_rate"]["value"] == 25 + assert result["truth_chain_quality_summary_latency"]["status"] == "ok" diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 2dc40bc8..54772c2f 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,29 @@ +## 2026-06-01|truth-chain quality summary 納入 AI 自健診 SLO + +**背景**: + +- 正式環境已把 `/api/v1/platform/truth-chain/quality/summary` 的 N+1 查詢修成批次化,但「首頁/quality summary 是否又變慢」尚未進入 AI 自健診。 +- 先前飛輪核心異常只會看到泛化的 `auto_execute_success_rate`,無法快速判斷是治理資料、執行資料,還是 operator summary 資料面拖慢。 + +**本次調整**: + +- `apps/api/src/services/awooop_truth_chain_service.py`:記錄 quality summary 的 cache hit / miss 聚合耗時與最後觀測時間。 +- `apps/api/src/api/v1/platform/truth_chain.py`:端點例外時也寫入 failure observation,讓 `/metrics` 能看見摘要面失敗。 +- `apps/api/src/services/adr100_slo_metrics_service.py`:新增 `awooop_truth_chain_quality_summary_last_duration_seconds`、`awooop_truth_chain_quality_summary_last_success`、`awooop_truth_chain_quality_summary_observed_timestamp`。 +- `apps/api/src/services/adr100_slo_status_service.py`:新增第 5 個 ADR-100 SLO:`truth_chain_quality_summary_latency`,目標 `< 2s`、硬紅線 `> 8s`。 +- `apps/api/src/services/governance_agent.py`:SLO 判斷支援 `above` / `below` 方向,避免把 latency 這種「越低越好」的指標誤判。 + +**驗證**: + +- `python3 -m py_compile apps/api/src/services/awooop_truth_chain_service.py apps/api/src/services/adr100_slo_metrics_service.py apps/api/src/services/adr100_slo_status_service.py apps/api/src/services/governance_agent.py apps/api/src/api/v1/platform/truth_chain.py` +- `python3 scripts/security/security-mirror-progress-guard.py --root .` → `SECURITY_MIRROR_PROGRESS_GUARD_OK` +- `DATABASE_URL=postgresql://test:test@localhost:5432/test PYTHONPATH=apps/api /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_adr100_slo_metrics_service.py apps/api/tests/test_adr100_slo_status_service.py apps/api/tests/test_governance_agent.py apps/api/tests/test_awooop_truth_chain_service.py -q` → `85 passed` + +**進度邊界**: + +- 整體 AI 自動化飛輪進度仍維持 `61%`;這輪是自健診可觀測性與 SLO 精準度補強,不代表自動修復成功率已提升。 +- 下一步需推 Gitea main、等待 production deploy,並以正式 `/metrics` / `/api/v1/ai/slo` 驗證新 SLO 是否被 Prometheus 抓到。 + ## 2026-06-01|IwoooS 首層漸進揭露使用體驗收斂 **背景**: