diff --git a/apps/api/src/api/v1/platform/truth_chain.py b/apps/api/src/api/v1/platform/truth_chain.py index 7b0fad01..76bcc500 100644 --- a/apps/api/src/api/v1/platform/truth_chain.py +++ b/apps/api/src/api/v1/platform/truth_chain.py @@ -10,11 +10,38 @@ from src.core.awooop_operator_auth import ( AwoooPOperatorPrincipal, verify_awooop_operator, ) -from src.services.awooop_truth_chain_service import fetch_truth_chain +from src.services.awooop_truth_chain_service import ( + fetch_automation_quality_summary, + fetch_truth_chain, +) router = APIRouter() +@router.get( + "/truth-chain/quality/summary", + summary="查詢 AI 自動化品質總覽", + description=( + "T12c read-only endpoint. 聚合最近 incident 的 automation quality gate," + "讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。" + ), +) +async def get_automation_quality_summary( + project_id: str = Query("awoooi", description="租戶 ID"), + hours: int = Query(24, ge=1, le=168, description="回看小時數"), + limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"), + operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator), +) -> dict[str, Any]: + # The operator dependency gates this summary because it aggregates incident + # lifecycle state across alert, execution, and notification tables. + _ = operator + return await fetch_automation_quality_summary( + project_id=project_id, + hours=hours, + limit=limit, + ) + + @router.get( "/truth-chain/{source_id}", summary="查詢 Telegram / Incident / Drift 真相鏈", diff --git a/apps/api/src/services/awooop_truth_chain_service.py b/apps/api/src/services/awooop_truth_chain_service.py index 8809c5af..01736c90 100644 --- a/apps/api/src/services/awooop_truth_chain_service.py +++ b/apps/api/src/services/awooop_truth_chain_service.py @@ -8,7 +8,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth. from __future__ import annotations import json -from datetime import date, datetime +from datetime import UTC, date, datetime, timedelta from decimal import Decimal from typing import Any from uuid import UUID @@ -489,6 +489,130 @@ def build_automation_quality( } +def _automation_quality_score_bucket(score: int) -> str: + if score >= 85: + return "green" + if score >= 60: + return "yellow" + return "red" + + +def summarize_automation_quality_records( + *, + project_id: str, + window_hours: int, + records: list[dict[str, Any]], + limit: int, +) -> dict[str, Any]: + """Aggregate per-incident automation quality into an operator summary.""" + verdicts: dict[str, dict[str, Any]] = {} + gate_failures: dict[str, dict[str, Any]] = {} + score_buckets: dict[str, int] = {"green": 0, "yellow": 0, "red": 0} + examples: list[dict[str, Any]] = [] + total_score = 0 + evaluated_total = 0 + verified_total = 0 + + for record in records: + incident = record.get("incident") if isinstance(record.get("incident"), dict) else {} + truth_status = record.get("truth_status") if isinstance(record.get("truth_status"), dict) else {} + quality = record.get("automation_quality") if isinstance(record.get("automation_quality"), dict) else {} + if quality.get("applicable") is not True: + continue + + evaluated_total += 1 + score = int(quality.get("score") or 0) + total_score += score + bucket = _automation_quality_score_bucket(score) + score_buckets[bucket] += 1 + + verdict = str(quality.get("verdict") or "unknown") + if verdict == "auto_repaired_verified": + verified_total += 1 + verdict_row = verdicts.setdefault( + verdict, + { + "verdict": verdict, + "total": 0, + "score_sum": 0, + "min_score": score, + "max_score": score, + "needs_human": False, + }, + ) + verdict_row["total"] += 1 + verdict_row["score_sum"] += score + verdict_row["min_score"] = min(int(verdict_row["min_score"]), score) + verdict_row["max_score"] = max(int(verdict_row["max_score"]), score) + verdict_row["needs_human"] = bool( + verdict_row["needs_human"] or truth_status.get("needs_human") + ) + + for gate in quality.get("gates") or []: + if not isinstance(gate, dict): + continue + gate_status = str(gate.get("status") or "") + if gate_status not in {"failed", "missing"}: + continue + gate_name = str(gate.get("name") or "unknown") + gate_row = gate_failures.setdefault( + gate_name, + {"gate": gate_name, "total": 0, "statuses": {}}, + ) + gate_row["total"] += 1 + gate_row["statuses"][gate_status] = int(gate_row["statuses"].get(gate_status, 0)) + 1 + + examples.append({ + "incident_id": incident.get("incident_id"), + "alertname": incident.get("alertname"), + "severity": incident.get("severity"), + "status": incident.get("status"), + "created_at": incident.get("created_at"), + "truth_stage": truth_status.get("current_stage"), + "truth_stage_status": truth_status.get("stage_status"), + "needs_human": bool(truth_status.get("needs_human")), + "verdict": verdict, + "score": score, + "score_bucket": bucket, + "blockers": list(quality.get("blockers") or [])[:8], + }) + + by_verdict = [] + for row in verdicts.values(): + total = int(row["total"]) + row["avg_score"] = round(float(row.pop("score_sum")) / total, 1) if total else 0.0 + by_verdict.append(row) + by_verdict.sort(key=lambda row: (-int(row["total"]), str(row["verdict"]))) + + failing_gates = sorted( + gate_failures.values(), + key=lambda row: (-int(row["total"]), str(row["gate"])), + ) + + return { + "schema_version": "automation_quality_summary_v1", + "project_id": project_id, + "window_hours": window_hours, + "limit": limit, + "incident_total": len(records), + "evaluated_total": evaluated_total, + "verified_auto_repair_total": verified_total, + "average_score": round(total_score / evaluated_total, 1) if evaluated_total else 0.0, + "score_buckets": score_buckets, + "by_verdict": by_verdict, + "gate_failures": failing_gates, + "examples": examples[:25], + "production_claim": { + "can_claim_full_auto_repair": evaluated_total > 0 and verified_total == evaluated_total, + "reason": ( + "all_evaluated_incidents_auto_repaired_verified" + if evaluated_total > 0 and verified_total == evaluated_total + else "some_incidents_are_not_auto_repaired_verified" + ), + }, + } + + def _summarize_mcp(rows: list[dict[str, Any]]) -> dict[str, Any]: by_tool: dict[str, dict[str, Any]] = {} success_count = 0 @@ -1108,3 +1232,72 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[ current_stage=truth_status["current_stage"], ) return result + + +async def fetch_automation_quality_summary( + *, + project_id: str = "awoooi", + hours: int = 24, + limit: int = 200, +) -> dict[str, Any]: + """Return a recent incident-level quality summary for the automation flywheel.""" + bounded_hours = max(1, min(int(hours), 168)) + bounded_limit = max(1, min(int(limit), 500)) + cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours) + + async with get_db_context(project_id) as db: + incidents = await _fetch_all( + db, + """ + SELECT + incident_id, + project_id, + status::text AS status, + severity::text AS severity, + alertname, + alert_category, + notification_type, + created_at, + updated_at, + resolved_at, + verification_result + FROM incidents + WHERE (project_id = :project_id OR project_id IS NULL) + AND created_at >= :cutoff + ORDER BY created_at DESC + LIMIT :limit + """, + { + "project_id": project_id, + "cutoff": cutoff, + "limit": bounded_limit, + }, + ) + + records: list[dict[str, Any]] = [] + for incident in incidents: + incident_id = str(incident.get("incident_id") or "") + if not incident_id: + continue + truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id) + records.append({ + "incident": truth_chain.get("incident") or incident, + "truth_status": truth_chain.get("truth_status") or {}, + "automation_quality": truth_chain.get("automation_quality") or {}, + }) + + summary = summarize_automation_quality_records( + project_id=project_id, + window_hours=bounded_hours, + records=records, + limit=bounded_limit, + ) + logger.info( + "awooop_automation_quality_summary_fetched", + project_id=project_id, + window_hours=bounded_hours, + incident_total=summary["incident_total"], + evaluated_total=summary["evaluated_total"], + can_claim_full_auto_repair=summary["production_claim"]["can_claim_full_auto_repair"], + ) + return summary diff --git a/apps/api/tests/test_awooop_truth_chain_service.py b/apps/api/tests/test_awooop_truth_chain_service.py index 225ecf4b..777d5580 100644 --- a/apps/api/tests/test_awooop_truth_chain_service.py +++ b/apps/api/tests/test_awooop_truth_chain_service.py @@ -8,11 +8,13 @@ from src.services.awooop_ansible_audit_service import ( build_ansible_truth, ) from src.services.awooop_truth_chain_service import ( + _automation_quality_score_bucket, build_automation_quality, build_incident_reconciliation, _clean_row, _summarize_gateway_mcp, _truth_status, + summarize_automation_quality_records, ) from src.services.drift_repeat_state import ( build_drift_fingerprint, @@ -280,6 +282,89 @@ def test_automation_quality_marks_verified_auto_repair() -> None: assert quality["blockers"] == [] +def test_automation_quality_score_buckets_are_stable() -> None: + assert _automation_quality_score_bucket(100) == "green" + assert _automation_quality_score_bucket(85) == "green" + assert _automation_quality_score_bucket(84) == "yellow" + assert _automation_quality_score_bucket(60) == "yellow" + assert _automation_quality_score_bucket(59) == "red" + + +def test_automation_quality_summary_denies_full_claim_when_unverified() -> None: + summary = summarize_automation_quality_records( + project_id="awoooi", + window_hours=24, + limit=200, + records=[ + { + "incident": { + "incident_id": "INC-OK", + "alertname": "container recovered", + "severity": "P4", + "status": "RESOLVED", + "created_at": "2026-05-13T01:00:00+00:00", + }, + "truth_status": { + "current_stage": "execution_succeeded", + "stage_status": "success", + "needs_human": False, + }, + "automation_quality": { + "applicable": True, + "verdict": "auto_repaired_verified", + "score": 100, + "gates": [ + {"name": "verification_recorded", "status": "passed"}, + ], + "blockers": [], + }, + }, + { + "incident": { + "incident_id": "INC-GAP", + "alertname": "low risk action", + "severity": "P4", + "status": "INVESTIGATING", + "created_at": "2026-05-13T02:00:00+00:00", + }, + "truth_status": { + "current_stage": "execution_succeeded", + "stage_status": "success", + "needs_human": False, + }, + "automation_quality": { + "applicable": True, + "verdict": "execution_unverified", + "score": 65, + "gates": [ + {"name": "verification_recorded", "status": "missing"}, + {"name": "learning_recorded", "status": "missing"}, + ], + "blockers": ["verification_recorded", "learning_recorded"], + }, + }, + ], + ) + + assert summary["schema_version"] == "automation_quality_summary_v1" + assert summary["incident_total"] == 2 + assert summary["evaluated_total"] == 2 + assert summary["verified_auto_repair_total"] == 1 + assert summary["score_buckets"] == {"green": 1, "yellow": 1, "red": 0} + assert summary["production_claim"]["can_claim_full_auto_repair"] is False + assert summary["production_claim"]["reason"] == "some_incidents_are_not_auto_repaired_verified" + assert {row["verdict"]: row["total"] for row in summary["by_verdict"]} == { + "auto_repaired_verified": 1, + "execution_unverified": 1, + } + assert {row["gate"]: row["total"] for row in summary["gate_failures"]} == { + "learning_recorded": 1, + "verification_recorded": 1, + } + assert summary["examples"][1]["incident_id"] == "INC-GAP" + assert summary["examples"][1]["score_bucket"] == "yellow" + + def test_reconciliation_marks_consistent_resolved_execution() -> None: reconciliation = build_incident_reconciliation( incident={"incident_id": "INC-2", "status": "RESOLVED"}, diff --git a/apps/api/tests/test_platform_router_order.py b/apps/api/tests/test_platform_router_order.py index f1a013ca..6f5e8944 100644 --- a/apps/api/tests/test_platform_router_order.py +++ b/apps/api/tests/test_platform_router_order.py @@ -35,3 +35,15 @@ def test_truth_chain_route_is_registered() -> None: ] assert "/truth-chain/{source_id}" in paths + + +def test_truth_chain_quality_summary_route_is_registered_before_dynamic_source_id() -> None: + paths = [ + route.path + for route in router.routes + if "GET" in getattr(route, "methods", set()) + ] + + assert "/truth-chain/quality/summary" in paths + assert "/truth-chain/{source_id}" in paths + assert paths.index("/truth-chain/quality/summary") < paths.index("/truth-chain/{source_id}") diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index df149643..6268be13 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -7410,3 +7410,35 @@ _format_automation_quality_lines(...) returned 6 lines with verdict + score + ex - 目前實況仍顯示多筆 incident 是 `execution_unverified`,不能宣稱完整 AI 自動修復已完成。 - 下一步應把 `execution_unverified` 的 verification gap 收斂到 post-execution verifier / learning writeback,而不是只在 Telegram 補文案。 - 目前整體進度更新:約 72%。 + +### 2026-05-13 — AwoooP truth-chain T12c:全體告警自動化品質總覽(local green) + +**目的**: + +- Operator 不應逐張 Telegram 卡片猜「是否重複發生」、「是否已進 AI 自動修復」、「卡在哪個流程」。 +- T12c 先提供 read-only 聚合 API,把最近 incident 全部套用 T12b automation quality gate,回傳 verdict 分布、分數區間、缺失 gate、代表案例與 production claim。 + +**變更**: + +- 新增 `GET /api/v1/platform/truth-chain/quality/summary`: + - query: `project_id` / `hours` / `limit` + - 回傳 `automation_quality_summary_v1` + - 顯示 `by_verdict`、`score_buckets`、`gate_failures`、`examples` + - `production_claim.can_claim_full_auto_repair` 嚴格要求所有評估 incident 都是 `auto_repaired_verified` +- 新增純函式 `summarize_automation_quality_records(...)`,讓品質總覽可單元測試。 +- 新增 route-order 測試,確保 `/truth-chain/quality/summary` 不會被 `/truth-chain/{source_id}` 誤吃。 + +**local verification**: + +```text +DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q +19 passed + +ruff check --select F821 src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py +All checks passed + +python3 -m py_compile src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py +OK +``` + +**目前整體進度**:約 73%。