fix(api): add quality summary slo metric
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from time import perf_counter
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
@@ -13,6 +14,7 @@ from src.core.awooop_operator_auth import (
|
||||
from src.services.awooop_truth_chain_service import (
|
||||
fetch_automation_quality_summary,
|
||||
fetch_truth_chain,
|
||||
record_quality_summary_observation,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
@@ -33,12 +35,25 @@ async def get_automation_quality_summary(
|
||||
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
|
||||
refresh: bool = Query(False, description="略過短 TTL 快取並重新聚合"),
|
||||
) -> dict[str, Any]:
|
||||
summary = await fetch_automation_quality_summary(
|
||||
project_id=project_id,
|
||||
hours=hours,
|
||||
limit=limit,
|
||||
refresh=refresh,
|
||||
)
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
summary = await fetch_automation_quality_summary(
|
||||
project_id=project_id,
|
||||
hours=hours,
|
||||
limit=limit,
|
||||
refresh=refresh,
|
||||
)
|
||||
except Exception as exc:
|
||||
record_quality_summary_observation(
|
||||
project_id=project_id,
|
||||
hours=hours,
|
||||
limit=limit,
|
||||
cache_status="error",
|
||||
success=False,
|
||||
duration_seconds=perf_counter() - started_at,
|
||||
error=exc.__class__.__name__,
|
||||
)
|
||||
raise
|
||||
summary["examples"] = []
|
||||
summary["visibility_note"] = (
|
||||
"Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details."
|
||||
|
||||
@@ -15,6 +15,7 @@ from time import time
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.services.awooop_truth_chain_service import get_quality_summary_observations
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -30,6 +31,18 @@ class VerificationSample:
|
||||
count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QualitySummaryObservation:
|
||||
project_id: str
|
||||
hours: int
|
||||
limit: int
|
||||
cache_status: str
|
||||
success: bool
|
||||
duration_seconds: float
|
||||
observed_at: float
|
||||
error: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Adr100SloMetricsSnapshot:
|
||||
automation_operations: list[AutomationOperationSample] = field(default_factory=list)
|
||||
@@ -40,6 +53,7 @@ class Adr100SloMetricsSnapshot:
|
||||
knowledge_entries_created_24h: int = 0
|
||||
high_confidence_total: int = 0
|
||||
high_confidence_success_total: int = 0
|
||||
quality_summary_observations: list[QualitySummaryObservation] = field(default_factory=list)
|
||||
emitted_at: float = field(default_factory=time)
|
||||
|
||||
|
||||
@@ -123,6 +137,23 @@ class Adr100SloMetricsService:
|
||||
high_confidence_success_total=int(
|
||||
confidence_row.high_confidence_success_total or 0
|
||||
),
|
||||
quality_summary_observations=[
|
||||
QualitySummaryObservation(
|
||||
project_id=str(row.get("project_id") or "awoooi"),
|
||||
hours=int(row.get("hours") or 0),
|
||||
limit=int(row.get("limit") or 0),
|
||||
cache_status=str(row.get("cache_status") or "unknown"),
|
||||
success=bool(row.get("success")),
|
||||
duration_seconds=float(row.get("duration_seconds") or 0.0),
|
||||
observed_at=float(row.get("observed_at") or 0.0),
|
||||
error=(
|
||||
str(row.get("error"))
|
||||
if row.get("error") is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
for row in get_quality_summary_observations()
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@@ -208,8 +239,56 @@ def render_adr100_slo_metrics(snapshot: Adr100SloMetricsSnapshot) -> str:
|
||||
"# HELP adr100_slo_emitter_last_success_timestamp Last successful ADR-100 DB metrics emission timestamp",
|
||||
"# TYPE adr100_slo_emitter_last_success_timestamp gauge",
|
||||
f"adr100_slo_emitter_last_success_timestamp {snapshot.emitted_at:.0f}",
|
||||
"",
|
||||
])
|
||||
lines.extend([
|
||||
"# HELP awooop_truth_chain_quality_summary_last_duration_seconds Last observed AwoooP truth-chain quality summary aggregation duration",
|
||||
"# TYPE awooop_truth_chain_quality_summary_last_duration_seconds gauge",
|
||||
])
|
||||
if snapshot.quality_summary_observations:
|
||||
for observation in snapshot.quality_summary_observations:
|
||||
labels = _quality_summary_labels(observation)
|
||||
lines.append(
|
||||
"awooop_truth_chain_quality_summary_last_duration_seconds"
|
||||
f"{labels} {observation.duration_seconds:.6f}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'awooop_truth_chain_quality_summary_last_duration_seconds{project_id="none",hours="0",limit="0",cache_status="none",success="false"} 0'
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"# HELP awooop_truth_chain_quality_summary_last_success Last observed AwoooP truth-chain quality summary success flag",
|
||||
"# TYPE awooop_truth_chain_quality_summary_last_success gauge",
|
||||
])
|
||||
if snapshot.quality_summary_observations:
|
||||
for observation in snapshot.quality_summary_observations:
|
||||
labels = _quality_summary_labels(observation)
|
||||
lines.append(
|
||||
"awooop_truth_chain_quality_summary_last_success"
|
||||
f"{labels} {1 if observation.success else 0}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'awooop_truth_chain_quality_summary_last_success{project_id="none",hours="0",limit="0",cache_status="none",success="false"} 0'
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"# HELP awooop_truth_chain_quality_summary_observed_timestamp Last observed AwoooP truth-chain quality summary timestamp",
|
||||
"# TYPE awooop_truth_chain_quality_summary_observed_timestamp gauge",
|
||||
])
|
||||
if snapshot.quality_summary_observations:
|
||||
for observation in snapshot.quality_summary_observations:
|
||||
labels = _quality_summary_labels(observation)
|
||||
lines.append(
|
||||
"awooop_truth_chain_quality_summary_observed_timestamp"
|
||||
f"{labels} {observation.observed_at:.0f}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'awooop_truth_chain_quality_summary_observed_timestamp{project_id="none",hours="0",limit="0",cache_status="none",success="false"} 0'
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@@ -217,6 +296,18 @@ def _escape_label(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
|
||||
|
||||
def _quality_summary_labels(observation: QualitySummaryObservation) -> str:
|
||||
return (
|
||||
"{"
|
||||
f'project_id="{_escape_label(observation.project_id)}",'
|
||||
f'hours="{observation.hours}",'
|
||||
f'limit="{observation.limit}",'
|
||||
f'cache_status="{_escape_label(observation.cache_status)}",'
|
||||
f'success="{"true" if observation.success else "false"}"'
|
||||
"}"
|
||||
)
|
||||
|
||||
|
||||
_AUTOMATION_OPERATION_SQL = """
|
||||
WITH automation_scope AS (
|
||||
SELECT
|
||||
|
||||
@@ -80,6 +80,16 @@ ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = (
|
||||
unit="count",
|
||||
window="24h",
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="truth_chain_quality_summary_latency",
|
||||
query='max(awooop_truth_chain_quality_summary_last_duration_seconds{project_id="awoooi",limit="8",success="true"})',
|
||||
target=2.0,
|
||||
hard_red_line=8.0,
|
||||
direction="below",
|
||||
unit="seconds",
|
||||
window="last_observation",
|
||||
minimum_events=0.0,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import shutil
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from time import perf_counter, time
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
@@ -37,6 +38,54 @@ _JSON_TEXT_FIELDS = {"gate_result", "source_envelope"}
|
||||
_QUALITY_SUMMARY_CACHE_TTL_SECONDS = int(
|
||||
os.getenv("AWOOOP_QUALITY_SUMMARY_CACHE_TTL_SECONDS", "30")
|
||||
)
|
||||
_QUALITY_SUMMARY_OBSERVATIONS: dict[str, dict[str, Any]] = {}
|
||||
|
||||
|
||||
def record_quality_summary_observation(
|
||||
*,
|
||||
project_id: str,
|
||||
hours: int,
|
||||
limit: int,
|
||||
cache_status: str,
|
||||
success: bool,
|
||||
duration_seconds: float,
|
||||
error: str | None = None,
|
||||
) -> None:
|
||||
normalized_project_id = project_id or "awoooi"
|
||||
normalized_cache_status = cache_status or "unknown"
|
||||
key = "|".join([
|
||||
normalized_project_id,
|
||||
str(int(hours)),
|
||||
str(int(limit)),
|
||||
normalized_cache_status,
|
||||
"success" if success else "failed",
|
||||
])
|
||||
_QUALITY_SUMMARY_OBSERVATIONS[key] = {
|
||||
"project_id": normalized_project_id,
|
||||
"hours": int(hours),
|
||||
"limit": int(limit),
|
||||
"cache_status": normalized_cache_status,
|
||||
"success": bool(success),
|
||||
"duration_seconds": max(0.0, float(duration_seconds)),
|
||||
"observed_at": time(),
|
||||
"error": str(error)[:160] if error else None,
|
||||
}
|
||||
|
||||
|
||||
def get_quality_summary_observations() -> list[dict[str, Any]]:
|
||||
return [
|
||||
dict(observation)
|
||||
for observation in sorted(
|
||||
_QUALITY_SUMMARY_OBSERVATIONS.values(),
|
||||
key=lambda item: (
|
||||
str(item.get("project_id") or ""),
|
||||
int(item.get("hours") or 0),
|
||||
int(item.get("limit") or 0),
|
||||
str(item.get("cache_status") or ""),
|
||||
bool(item.get("success")),
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def _clean(value: Any) -> Any:
|
||||
@@ -2079,6 +2128,7 @@ async def fetch_automation_quality_summary(
|
||||
refresh: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Return a recent incident-level quality summary for the automation flywheel."""
|
||||
started_at = perf_counter()
|
||||
bounded_hours = max(1, min(int(hours), 168))
|
||||
bounded_limit = max(1, min(int(limit), 500))
|
||||
normalized_project_id = project_id or "awoooi"
|
||||
@@ -2094,13 +2144,26 @@ async def fetch_automation_quality_summary(
|
||||
ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS,
|
||||
)
|
||||
if cached_summary is not None:
|
||||
duration_seconds = perf_counter() - started_at
|
||||
record_quality_summary_observation(
|
||||
project_id=normalized_project_id,
|
||||
hours=bounded_hours,
|
||||
limit=bounded_limit,
|
||||
cache_status="hit",
|
||||
success=True,
|
||||
duration_seconds=duration_seconds,
|
||||
)
|
||||
logger.info(
|
||||
"awooop_automation_quality_summary_cache_hit",
|
||||
project_id=normalized_project_id,
|
||||
window_hours=bounded_hours,
|
||||
limit=bounded_limit,
|
||||
ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS,
|
||||
duration_seconds=round(duration_seconds, 3),
|
||||
)
|
||||
cached_summary = dict(cached_summary)
|
||||
cached_summary["cache_status"] = "hit"
|
||||
cached_summary["aggregation_duration_seconds"] = round(duration_seconds, 3)
|
||||
return cached_summary
|
||||
|
||||
cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours)
|
||||
@@ -2525,9 +2588,22 @@ async def fetch_automation_quality_summary(
|
||||
cache_status="miss",
|
||||
cache_ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS,
|
||||
)
|
||||
return await store_operator_summary_async(
|
||||
stored_summary = await store_operator_summary_async(
|
||||
"truth_chain_quality_summary",
|
||||
cache_key,
|
||||
summary,
|
||||
ttl_seconds=_QUALITY_SUMMARY_CACHE_TTL_SECONDS,
|
||||
)
|
||||
duration_seconds = perf_counter() - started_at
|
||||
record_quality_summary_observation(
|
||||
project_id=normalized_project_id,
|
||||
hours=bounded_hours,
|
||||
limit=bounded_limit,
|
||||
cache_status="miss",
|
||||
success=True,
|
||||
duration_seconds=duration_seconds,
|
||||
)
|
||||
stored_summary = dict(stored_summary)
|
||||
stored_summary["cache_status"] = "miss"
|
||||
stored_summary["aggregation_duration_seconds"] = round(duration_seconds, 3)
|
||||
return stored_summary
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
2. knowledge_degradation — KM 7 天未更新 > 20% 總量 → 告警知識衰退
|
||||
3. llm_hallucination — 近 100 筆 evidence verification_result=failed 比例 > 10%
|
||||
4. execution_blast_radius — 近 100 筆 auto_repair_executions.success=False 比例 > 15%
|
||||
5. slo_compliance — 4 個 SLO 合規性檢查(ADR-100),違反時降級飛輪行為
|
||||
5. slo_compliance — 5 個 SLO 合規性檢查(ADR-100),違反時降級飛輪行為
|
||||
|
||||
所有 check 互相隔離(try/except),任一失敗不阻斷其他項目。
|
||||
|
||||
@@ -57,6 +57,36 @@ RECENT_LIMIT = 100 # 最近幾筆做統計
|
||||
GOVERNANCE_SELF_CHECK_LEASE_KEY = "governance:self_check:cycle_lease"
|
||||
|
||||
|
||||
def _slo_remediation_items(name: str) -> list[str]:
|
||||
if name == "truth_chain_quality_summary_latency":
|
||||
return [
|
||||
"Check truth-chain quality summary cache miss latency and DB query plan",
|
||||
"Confirm operator summary cache is warm before treating homepage SLO as degraded",
|
||||
]
|
||||
return [
|
||||
"Pause auto-scaling or risky auto-fix tasks",
|
||||
"Review evidence/decision traces and adjust policy thresholds",
|
||||
]
|
||||
|
||||
|
||||
def _slo_actionable_items(name: str) -> list[str]:
|
||||
if name == "truth_chain_quality_summary_latency":
|
||||
return [
|
||||
"Call /api/v1/platform/truth-chain/quality/summary?limit=8&refresh=true and compare duration",
|
||||
"Inspect /metrics for awooop_truth_chain_quality_summary_last_duration_seconds",
|
||||
]
|
||||
return [
|
||||
"Check verifier lag and post-exec learning health",
|
||||
"Run emergency incident audit on failed approvals",
|
||||
]
|
||||
|
||||
|
||||
def _slo_next_action(name: str) -> str:
|
||||
if name == "truth_chain_quality_summary_latency":
|
||||
return "run_truth_chain_quality_summary_latency_probe"
|
||||
return "trigger_flywheel_safeguard"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GovernanceAgent
|
||||
# =============================================================================
|
||||
@@ -421,7 +451,7 @@ class GovernanceAgent:
|
||||
# =========================================================================
|
||||
|
||||
async def check_slo_compliance(self) -> dict[str, Any]:
|
||||
"""SLO 4 項合規性檢查 — 違反時降級飛輪行為
|
||||
"""SLO 5 項合規性檢查 — 違反時降級飛輪行為
|
||||
|
||||
從 Prometheus Recording rules 讀取 SLI 值,
|
||||
與硬紅線閾值比對,違反時呼叫 _alert() 寫 PG + 推 Telegram。
|
||||
@@ -430,6 +460,7 @@ class GovernanceAgent:
|
||||
SLO 2 決策準確率: sli:decision_accuracy:5m 硬紅線 < 0.85
|
||||
SLO 3 信心校準: sli:confidence_calibration:1h 硬紅線 < 0.70
|
||||
SLO 4 KM 增長率: knowledge_entries_created_24h / sli:km_growth_rate:24h 硬紅線 < 5
|
||||
SLO 5 總覽延遲: awooop_truth_chain_quality_summary_last_duration_seconds 硬紅線 > 8s
|
||||
|
||||
2026-04-27 P3.4 by Claude — AI SLO(ADR-100)
|
||||
"""
|
||||
@@ -446,13 +477,15 @@ class GovernanceAgent:
|
||||
"decision_accuracy": "sli:decision_accuracy:5m",
|
||||
"confidence_calibration": "sli:confidence_calibration:1h",
|
||||
"km_growth_rate": "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)",
|
||||
"truth_chain_quality_summary_latency": 'max(awooop_truth_chain_quality_summary_last_duration_seconds{project_id="awoooi",limit="8",success="true"})',
|
||||
}
|
||||
# 硬紅線:低於此值必須告警(非軟性警告)
|
||||
# 硬紅線:above 指標低於此值、below 指標高於此值時必須告警(非軟性警告)
|
||||
hard_red_lines: dict[str, float] = {
|
||||
"autonomy_rate": 0.70,
|
||||
"decision_accuracy": 0.85,
|
||||
"confidence_calibration": 0.70,
|
||||
"km_growth_rate": 5.0,
|
||||
"truth_chain_quality_summary_latency": 8.0,
|
||||
}
|
||||
# SLO 目標值(供日誌記錄)
|
||||
slo_targets: dict[str, float] = {
|
||||
@@ -460,6 +493,14 @@ class GovernanceAgent:
|
||||
"decision_accuracy": 0.90,
|
||||
"confidence_calibration": 0.80,
|
||||
"km_growth_rate": 20.0,
|
||||
"truth_chain_quality_summary_latency": 2.0,
|
||||
}
|
||||
slo_directions: dict[str, str] = {
|
||||
"autonomy_rate": "above",
|
||||
"decision_accuracy": "above",
|
||||
"confidence_calibration": "above",
|
||||
"km_growth_rate": "above",
|
||||
"truth_chain_quality_summary_latency": "below",
|
||||
}
|
||||
|
||||
results: dict[str, Any] = {}
|
||||
@@ -511,7 +552,17 @@ class GovernanceAgent:
|
||||
continue
|
||||
threshold = hard_red_lines[name]
|
||||
target = slo_targets[name]
|
||||
violated = value < threshold
|
||||
direction = slo_directions.get(name, "above")
|
||||
violated = value > threshold if direction == "below" else value < threshold
|
||||
gap = (
|
||||
value - threshold
|
||||
if violated and direction == "below"
|
||||
else threshold - value
|
||||
if violated
|
||||
else target - value
|
||||
if direction == "below"
|
||||
else value - target
|
||||
)
|
||||
|
||||
results[name] = {
|
||||
"name": name,
|
||||
@@ -519,7 +570,8 @@ class GovernanceAgent:
|
||||
"value": round(value, 4),
|
||||
"slo_target": target,
|
||||
"hard_red_line": threshold,
|
||||
"gap": round(threshold - value, 4) if violated else round(value - target, 4),
|
||||
"direction": direction,
|
||||
"gap": round(gap, 4),
|
||||
"violated": violated,
|
||||
}
|
||||
|
||||
@@ -533,20 +585,15 @@ class GovernanceAgent:
|
||||
"value": round(value, 4),
|
||||
"target": target,
|
||||
"threshold": threshold,
|
||||
"gap": round(threshold - value, 4),
|
||||
"direction": direction,
|
||||
"gap": round(gap, 4),
|
||||
},
|
||||
"remediation": {
|
||||
"items": [
|
||||
"Pause auto-scaling or risky auto-fix tasks",
|
||||
"Review evidence/decision traces and adjust policy thresholds",
|
||||
],
|
||||
"next_action": "trigger_flywheel_safeguard",
|
||||
"items": _slo_remediation_items(name),
|
||||
"next_action": _slo_next_action(name),
|
||||
},
|
||||
"actionable": {
|
||||
"items": [
|
||||
"Check verifier lag and post-exec learning health",
|
||||
"Run emergency incident audit on failed approvals",
|
||||
],
|
||||
"items": _slo_actionable_items(name),
|
||||
},
|
||||
},
|
||||
)
|
||||
@@ -716,7 +763,7 @@ class GovernanceAgent:
|
||||
"actionable": {
|
||||
"items": [
|
||||
"先確認 /metrics 是否已輸出 ADR-100 底層指標",
|
||||
"檢查 Prometheus rule 是否已載入 sli:autonomy_rate:5m 等 4 項告警規則",
|
||||
"檢查 Prometheus rule 與 truth-chain quality summary runtime metric 是否可查詢",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from src.services.adr100_slo_metrics_service import (
|
||||
Adr100SloMetricsSnapshot,
|
||||
AutomationOperationSample,
|
||||
QualitySummaryObservation,
|
||||
VerificationSample,
|
||||
render_adr100_slo_metrics,
|
||||
)
|
||||
@@ -38,6 +39,17 @@ def test_render_adr100_slo_metrics_outputs_required_series() -> None:
|
||||
knowledge_entries_created_24h=25,
|
||||
high_confidence_total=9,
|
||||
high_confidence_success_total=7,
|
||||
quality_summary_observations=[
|
||||
QualitySummaryObservation(
|
||||
project_id="awoooi",
|
||||
hours=24,
|
||||
limit=8,
|
||||
cache_status="miss",
|
||||
success=True,
|
||||
duration_seconds=1.234567,
|
||||
observed_at=1_778_756_100,
|
||||
),
|
||||
],
|
||||
emitted_at=1_778_756_000,
|
||||
)
|
||||
|
||||
@@ -58,6 +70,18 @@ def test_render_adr100_slo_metrics_outputs_required_series() -> None:
|
||||
assert "approval_records_high_confidence_total 9" in rendered
|
||||
assert "approval_records_high_confidence_success_total 7" in rendered
|
||||
assert "adr100_slo_emitter_last_success_timestamp 1778756000" in rendered
|
||||
assert (
|
||||
'awooop_truth_chain_quality_summary_last_duration_seconds{project_id="awoooi",'
|
||||
'hours="24",limit="8",cache_status="miss",success="true"} 1.234567'
|
||||
) in rendered
|
||||
assert (
|
||||
'awooop_truth_chain_quality_summary_last_success{project_id="awoooi",'
|
||||
'hours="24",limit="8",cache_status="miss",success="true"} 1'
|
||||
) in rendered
|
||||
assert (
|
||||
'awooop_truth_chain_quality_summary_observed_timestamp{project_id="awoooi",'
|
||||
'hours="24",limit="8",cache_status="miss",success="true"} 1778756100'
|
||||
) in rendered
|
||||
|
||||
|
||||
def test_render_adr100_slo_metrics_emits_zero_series_when_empty() -> None:
|
||||
@@ -71,6 +95,10 @@ def test_render_adr100_slo_metrics_emits_zero_series_when_empty() -> None:
|
||||
assert 'post_execution_verification_created_24h{outcome="none"} 0' in rendered
|
||||
assert "knowledge_entries_total 0" in rendered
|
||||
assert "knowledge_entries_created_24h 0" in rendered
|
||||
assert (
|
||||
'awooop_truth_chain_quality_summary_last_duration_seconds{project_id="none",'
|
||||
'hours="0",limit="0",cache_status="none",success="false"} 0'
|
||||
) in rendered
|
||||
|
||||
|
||||
def test_render_adr100_slo_metrics_escapes_labels() -> None:
|
||||
|
||||
@@ -10,6 +10,12 @@ from src.services.adr100_slo_status_service import (
|
||||
)
|
||||
|
||||
|
||||
QUALITY_SUMMARY_LATENCY_QUERY = (
|
||||
'max(awooop_truth_chain_quality_summary_last_duration_seconds{'
|
||||
'project_id="awoooi",limit="8",success="true"})'
|
||||
)
|
||||
|
||||
|
||||
class _FakePrometheusResponse:
|
||||
def __init__(self, payload: dict[str, Any]) -> None:
|
||||
self._payload = payload
|
||||
@@ -89,6 +95,7 @@ async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch):
|
||||
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
|
||||
QUALITY_SUMMARY_LATENCY_QUERY: "1.2",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
@@ -109,6 +116,8 @@ async def test_fetch_report_marks_ratio_slos_low_volume(monkeypatch):
|
||||
assert by_name["confidence_calibration"]["status"] == "skipped_low_volume"
|
||||
assert by_name["km_growth_rate"]["status"] == "ok"
|
||||
assert by_name["km_growth_rate"]["value"] == 24
|
||||
assert by_name["truth_chain_quality_summary_latency"]["status"] == "ok"
|
||||
assert by_name["truth_chain_quality_summary_latency"]["direction"] == "below"
|
||||
assert report["overall_status"] == "partial"
|
||||
assert report["overall_compliance"] == 1.0
|
||||
|
||||
@@ -121,6 +130,7 @@ async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch):
|
||||
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "3",
|
||||
QUALITY_SUMMARY_LATENCY_QUERY: "1.2",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
@@ -142,6 +152,36 @@ async def test_fetch_report_classifies_hard_red_line_violation(monkeypatch):
|
||||
assert report["overall_status"] == "violated"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_report_classifies_below_direction_slo(monkeypatch):
|
||||
values = {
|
||||
"sum(rate(automation_operation_log_total[5m]))": "0",
|
||||
'sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))': "0",
|
||||
"sum(rate(approval_records_high_confidence_total[1h]))": "0",
|
||||
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "24",
|
||||
QUALITY_SUMMARY_LATENCY_QUERY: "9.5",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"httpx.AsyncClient",
|
||||
lambda *args, **kwargs: _FakePrometheusClient(values),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
Adr100SloStatusService,
|
||||
"_fetch_verification_coverage",
|
||||
_low_volume_coverage,
|
||||
)
|
||||
|
||||
report = await Adr100SloStatusService().fetch_report()
|
||||
|
||||
by_name = {metric["name"]: metric for metric in report["metrics"]}
|
||||
latency = by_name["truth_chain_quality_summary_latency"]
|
||||
assert latency["status"] == "violated"
|
||||
assert latency["direction"] == "below"
|
||||
assert latency["value"] == 9.5
|
||||
assert report["overall_status"] == "violated"
|
||||
|
||||
|
||||
def test_verification_coverage_payload_flags_backlog():
|
||||
payload = _build_verification_coverage_payload(
|
||||
{
|
||||
|
||||
@@ -797,6 +797,12 @@ class TestRunSelfCheckGlobalFailureAlert:
|
||||
assert "governance_self_failure" not in calls
|
||||
|
||||
|
||||
QUALITY_SUMMARY_LATENCY_QUERY = (
|
||||
'max(awooop_truth_chain_quality_summary_last_duration_seconds{'
|
||||
'project_id="awoooi",limit="8",success="true"})'
|
||||
)
|
||||
|
||||
|
||||
class _FakePrometheusResponse:
|
||||
def __init__(self, value: str) -> None:
|
||||
self._value = value
|
||||
@@ -809,7 +815,7 @@ class _FakePrometheusResponse:
|
||||
|
||||
|
||||
class _FakePrometheusClient:
|
||||
def __init__(self, value: str) -> None:
|
||||
def __init__(self, value: str | dict[str, str]) -> None:
|
||||
self._value = value
|
||||
self.queries: list[str] = []
|
||||
|
||||
@@ -820,7 +826,10 @@ class _FakePrometheusClient:
|
||||
return False
|
||||
|
||||
async def get(self, *args, **kwargs): # noqa: ANN002, ANN003
|
||||
self.queries.append(str(kwargs.get("params", {}).get("query", "")))
|
||||
query = str(kwargs.get("params", {}).get("query", ""))
|
||||
self.queries.append(query)
|
||||
if isinstance(self._value, dict):
|
||||
return _FakePrometheusResponse(self._value.get(query, "NaN"))
|
||||
return _FakePrometheusResponse(self._value)
|
||||
|
||||
|
||||
@@ -838,6 +847,7 @@ class TestCheckSloCompliance:
|
||||
"decision_accuracy",
|
||||
"confidence_calibration",
|
||||
"km_growth_rate",
|
||||
"truth_chain_quality_summary_latency",
|
||||
):
|
||||
assert result[name]["status"] == "skipped"
|
||||
assert result[name]["reason"] == "prometheus_nan_or_inf"
|
||||
@@ -847,7 +857,13 @@ class TestCheckSloCompliance:
|
||||
async def test_km_growth_prefers_db_derived_24h_gauge(self):
|
||||
"""KM SLO 要優先使用 DB 24h gauge,避免新 counter 暖機時誤報 0."""
|
||||
agent = _make_agent()
|
||||
client = _FakePrometheusClient("25")
|
||||
client = _FakePrometheusClient({
|
||||
"sli:autonomy_rate:5m": "0.95",
|
||||
"sli:decision_accuracy:5m": "0.96",
|
||||
"sli:confidence_calibration:1h": "0.97",
|
||||
"max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)": "25",
|
||||
QUALITY_SUMMARY_LATENCY_QUERY: "1.1",
|
||||
})
|
||||
|
||||
with patch("httpx.AsyncClient", return_value=client):
|
||||
result = await agent.check_slo_compliance()
|
||||
@@ -855,3 +871,4 @@ class TestCheckSloCompliance:
|
||||
assert "max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)" in client.queries
|
||||
assert result["km_growth_rate"]["status"] == "ok"
|
||||
assert result["km_growth_rate"]["value"] == 25
|
||||
assert result["truth_chain_quality_summary_latency"]["status"] == "ok"
|
||||
|
||||
@@ -1,3 +1,29 @@
|
||||
## 2026-06-01|truth-chain quality summary 納入 AI 自健診 SLO
|
||||
|
||||
**背景**:
|
||||
|
||||
- 正式環境已把 `/api/v1/platform/truth-chain/quality/summary` 的 N+1 查詢修成批次化,但「首頁/quality summary 是否又變慢」尚未進入 AI 自健診。
|
||||
- 先前飛輪核心異常只會看到泛化的 `auto_execute_success_rate`,無法快速判斷是治理資料、執行資料,還是 operator summary 資料面拖慢。
|
||||
|
||||
**本次調整**:
|
||||
|
||||
- `apps/api/src/services/awooop_truth_chain_service.py`:記錄 quality summary 的 cache hit / miss 聚合耗時與最後觀測時間。
|
||||
- `apps/api/src/api/v1/platform/truth_chain.py`:端點例外時也寫入 failure observation,讓 `/metrics` 能看見摘要面失敗。
|
||||
- `apps/api/src/services/adr100_slo_metrics_service.py`:新增 `awooop_truth_chain_quality_summary_last_duration_seconds`、`awooop_truth_chain_quality_summary_last_success`、`awooop_truth_chain_quality_summary_observed_timestamp`。
|
||||
- `apps/api/src/services/adr100_slo_status_service.py`:新增第 5 個 ADR-100 SLO:`truth_chain_quality_summary_latency`,目標 `< 2s`、硬紅線 `> 8s`。
|
||||
- `apps/api/src/services/governance_agent.py`:SLO 判斷支援 `above` / `below` 方向,避免把 latency 這種「越低越好」的指標誤判。
|
||||
|
||||
**驗證**:
|
||||
|
||||
- `python3 -m py_compile apps/api/src/services/awooop_truth_chain_service.py apps/api/src/services/adr100_slo_metrics_service.py apps/api/src/services/adr100_slo_status_service.py apps/api/src/services/governance_agent.py apps/api/src/api/v1/platform/truth_chain.py`
|
||||
- `python3 scripts/security/security-mirror-progress-guard.py --root .` → `SECURITY_MIRROR_PROGRESS_GUARD_OK`
|
||||
- `DATABASE_URL=postgresql://test:test@localhost:5432/test PYTHONPATH=apps/api /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_adr100_slo_metrics_service.py apps/api/tests/test_adr100_slo_status_service.py apps/api/tests/test_governance_agent.py apps/api/tests/test_awooop_truth_chain_service.py -q` → `85 passed`
|
||||
|
||||
**進度邊界**:
|
||||
|
||||
- 整體 AI 自動化飛輪進度仍維持 `61%`;這輪是自健診可觀測性與 SLO 精準度補強,不代表自動修復成功率已提升。
|
||||
- 下一步需推 Gitea main、等待 production deploy,並以正式 `/metrics` / `/api/v1/ai/slo` 驗證新 SLO 是否被 Prometheus 抓到。
|
||||
|
||||
## 2026-06-01|IwoooS 首層漸進揭露使用體驗收斂
|
||||
|
||||
**背景**:
|
||||
|
||||
Reference in New Issue
Block a user