feat(awooop): summarize automation quality

2026-05-13 15:56:42 +08:00
parent c00e911b28
commit ae7c7cbd23
5 changed files with 351 additions and 2 deletions
--- a/apps/api/src/api/v1/platform/truth_chain.py
+++ b/apps/api/src/api/v1/platform/truth_chain.py
@@ -10,11 +10,38 @@ from src.core.awooop_operator_auth import (
    AwoooPOperatorPrincipal,
    verify_awooop_operator,
 )
-from src.services.awooop_truth_chain_service import fetch_truth_chain
+from src.services.awooop_truth_chain_service import (
+    fetch_automation_quality_summary,
+    fetch_truth_chain,
+)

 router = APIRouter()


+@router.get(
+    "/truth-chain/quality/summary",
+    summary="查詢 AI 自動化品質總覽",
+    description=(
+        "T12c read-only endpoint. 聚合最近 incident 的 automation quality gate，"
+        "讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
+    ),
+)
+async def get_automation_quality_summary(
+    project_id: str = Query("awoooi", description="租戶 ID"),
+    hours: int = Query(24, ge=1, le=168, description="回看小時數"),
+    limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
+    operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
+) -> dict[str, Any]:
+    # The operator dependency gates this summary because it aggregates incident
+    # lifecycle state across alert, execution, and notification tables.
+    _ = operator
+    return await fetch_automation_quality_summary(
+        project_id=project_id,
+        hours=hours,
+        limit=limit,
+    )
+
+
@router.get(
    "/truth-chain/{source_id}",
    summary="查詢 Telegram / Incident / Drift 真相鏈",
--- a/apps/api/src/services/awooop_truth_chain_service.py
+++ b/apps/api/src/services/awooop_truth_chain_service.py
@@ -8,7 +8,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
 from __future__ import annotations

 import json
-from datetime import date, datetime
+from datetime import UTC, date, datetime, timedelta
 from decimal import Decimal
 from typing import Any
 from uuid import UUID
@@ -489,6 +489,130 @@ def build_automation_quality(
    }


+def _automation_quality_score_bucket(score: int) -> str:
+    if score >= 85:
+        return "green"
+    if score >= 60:
+        return "yellow"
+    return "red"
+
+
+def summarize_automation_quality_records(
+    *,
+    project_id: str,
+    window_hours: int,
+    records: list[dict[str, Any]],
+    limit: int,
+) -> dict[str, Any]:
+    """Aggregate per-incident automation quality into an operator summary."""
+    verdicts: dict[str, dict[str, Any]] = {}
+    gate_failures: dict[str, dict[str, Any]] = {}
+    score_buckets: dict[str, int] = {"green": 0, "yellow": 0, "red": 0}
+    examples: list[dict[str, Any]] = []
+    total_score = 0
+    evaluated_total = 0
+    verified_total = 0
+
+    for record in records:
+        incident = record.get("incident") if isinstance(record.get("incident"), dict) else {}
+        truth_status = record.get("truth_status") if isinstance(record.get("truth_status"), dict) else {}
+        quality = record.get("automation_quality") if isinstance(record.get("automation_quality"), dict) else {}
+        if quality.get("applicable") is not True:
+            continue
+
+        evaluated_total += 1
+        score = int(quality.get("score") or 0)
+        total_score += score
+        bucket = _automation_quality_score_bucket(score)
+        score_buckets[bucket] += 1
+
+        verdict = str(quality.get("verdict") or "unknown")
+        if verdict == "auto_repaired_verified":
+            verified_total += 1
+        verdict_row = verdicts.setdefault(
+            verdict,
+            {
+                "verdict": verdict,
+                "total": 0,
+                "score_sum": 0,
+                "min_score": score,
+                "max_score": score,
+                "needs_human": False,
+            },
+        )
+        verdict_row["total"] += 1
+        verdict_row["score_sum"] += score
+        verdict_row["min_score"] = min(int(verdict_row["min_score"]), score)
+        verdict_row["max_score"] = max(int(verdict_row["max_score"]), score)
+        verdict_row["needs_human"] = bool(
+            verdict_row["needs_human"] or truth_status.get("needs_human")
+        )
+
+        for gate in quality.get("gates") or []:
+            if not isinstance(gate, dict):
+                continue
+            gate_status = str(gate.get("status") or "")
+            if gate_status not in {"failed", "missing"}:
+                continue
+            gate_name = str(gate.get("name") or "unknown")
+            gate_row = gate_failures.setdefault(
+                gate_name,
+                {"gate": gate_name, "total": 0, "statuses": {}},
+            )
+            gate_row["total"] += 1
+            gate_row["statuses"][gate_status] = int(gate_row["statuses"].get(gate_status, 0)) + 1
+
+        examples.append({
+            "incident_id": incident.get("incident_id"),
+            "alertname": incident.get("alertname"),
+            "severity": incident.get("severity"),
+            "status": incident.get("status"),
+            "created_at": incident.get("created_at"),
+            "truth_stage": truth_status.get("current_stage"),
+            "truth_stage_status": truth_status.get("stage_status"),
+            "needs_human": bool(truth_status.get("needs_human")),
+            "verdict": verdict,
+            "score": score,
+            "score_bucket": bucket,
+            "blockers": list(quality.get("blockers") or [])[:8],
+        })
+
+    by_verdict = []
+    for row in verdicts.values():
+        total = int(row["total"])
+        row["avg_score"] = round(float(row.pop("score_sum")) / total, 1) if total else 0.0
+        by_verdict.append(row)
+    by_verdict.sort(key=lambda row: (-int(row["total"]), str(row["verdict"])))
+
+    failing_gates = sorted(
+        gate_failures.values(),
+        key=lambda row: (-int(row["total"]), str(row["gate"])),
+    )
+
+    return {
+        "schema_version": "automation_quality_summary_v1",
+        "project_id": project_id,
+        "window_hours": window_hours,
+        "limit": limit,
+        "incident_total": len(records),
+        "evaluated_total": evaluated_total,
+        "verified_auto_repair_total": verified_total,
+        "average_score": round(total_score / evaluated_total, 1) if evaluated_total else 0.0,
+        "score_buckets": score_buckets,
+        "by_verdict": by_verdict,
+        "gate_failures": failing_gates,
+        "examples": examples[:25],
+        "production_claim": {
+            "can_claim_full_auto_repair": evaluated_total > 0 and verified_total == evaluated_total,
+            "reason": (
+                "all_evaluated_incidents_auto_repaired_verified"
+                if evaluated_total > 0 and verified_total == evaluated_total
+                else "some_incidents_are_not_auto_repaired_verified"
+            ),
+        },
+    }
+
+
 def _summarize_mcp(rows: list[dict[str, Any]]) -> dict[str, Any]:
    by_tool: dict[str, dict[str, Any]] = {}
    success_count = 0
@@ -1108,3 +1232,72 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
        current_stage=truth_status["current_stage"],
    )
    return result
+
+
+async def fetch_automation_quality_summary(
+    *,
+    project_id: str = "awoooi",
+    hours: int = 24,
+    limit: int = 200,
+) -> dict[str, Any]:
+    """Return a recent incident-level quality summary for the automation flywheel."""
+    bounded_hours = max(1, min(int(hours), 168))
+    bounded_limit = max(1, min(int(limit), 500))
+    cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours)
+
+    async with get_db_context(project_id) as db:
+        incidents = await _fetch_all(
+            db,
+            """
+            SELECT
+                incident_id,
+                project_id,
+                status::text AS status,
+                severity::text AS severity,
+                alertname,
+                alert_category,
+                notification_type,
+                created_at,
+                updated_at,
+                resolved_at,
+                verification_result
+            FROM incidents
+            WHERE (project_id = :project_id OR project_id IS NULL)
+              AND created_at >= :cutoff
+            ORDER BY created_at DESC
+            LIMIT :limit
+            """,
+            {
+                "project_id": project_id,
+                "cutoff": cutoff,
+                "limit": bounded_limit,
+            },
+        )
+
+    records: list[dict[str, Any]] = []
+    for incident in incidents:
+        incident_id = str(incident.get("incident_id") or "")
+        if not incident_id:
+            continue
+        truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id)
+        records.append({
+            "incident": truth_chain.get("incident") or incident,
+            "truth_status": truth_chain.get("truth_status") or {},
+            "automation_quality": truth_chain.get("automation_quality") or {},
+        })
+
+    summary = summarize_automation_quality_records(
+        project_id=project_id,
+        window_hours=bounded_hours,
+        records=records,
+        limit=bounded_limit,
+    )
+    logger.info(
+        "awooop_automation_quality_summary_fetched",
+        project_id=project_id,
+        window_hours=bounded_hours,
+        incident_total=summary["incident_total"],
+        evaluated_total=summary["evaluated_total"],
+        can_claim_full_auto_repair=summary["production_claim"]["can_claim_full_auto_repair"],
+    )
+    return summary
--- a/apps/api/tests/test_awooop_truth_chain_service.py
+++ b/apps/api/tests/test_awooop_truth_chain_service.py
@@ -8,11 +8,13 @@ from src.services.awooop_ansible_audit_service import (
    build_ansible_truth,
 )
 from src.services.awooop_truth_chain_service import (
+    _automation_quality_score_bucket,
    build_automation_quality,
    build_incident_reconciliation,
    _clean_row,
    _summarize_gateway_mcp,
    _truth_status,
+    summarize_automation_quality_records,
 )
 from src.services.drift_repeat_state import (
    build_drift_fingerprint,
@@ -280,6 +282,89 @@ def test_automation_quality_marks_verified_auto_repair() -> None:
    assert quality["blockers"] == []


+def test_automation_quality_score_buckets_are_stable() -> None:
+    assert _automation_quality_score_bucket(100) == "green"
+    assert _automation_quality_score_bucket(85) == "green"
+    assert _automation_quality_score_bucket(84) == "yellow"
+    assert _automation_quality_score_bucket(60) == "yellow"
+    assert _automation_quality_score_bucket(59) == "red"
+
+
+def test_automation_quality_summary_denies_full_claim_when_unverified() -> None:
+    summary = summarize_automation_quality_records(
+        project_id="awoooi",
+        window_hours=24,
+        limit=200,
+        records=[
+            {
+                "incident": {
+                    "incident_id": "INC-OK",
+                    "alertname": "container recovered",
+                    "severity": "P4",
+                    "status": "RESOLVED",
+                    "created_at": "2026-05-13T01:00:00+00:00",
+                },
+                "truth_status": {
+                    "current_stage": "execution_succeeded",
+                    "stage_status": "success",
+                    "needs_human": False,
+                },
+                "automation_quality": {
+                    "applicable": True,
+                    "verdict": "auto_repaired_verified",
+                    "score": 100,
+                    "gates": [
+                        {"name": "verification_recorded", "status": "passed"},
+                    ],
+                    "blockers": [],
+                },
+            },
+            {
+                "incident": {
+                    "incident_id": "INC-GAP",
+                    "alertname": "low risk action",
+                    "severity": "P4",
+                    "status": "INVESTIGATING",
+                    "created_at": "2026-05-13T02:00:00+00:00",
+                },
+                "truth_status": {
+                    "current_stage": "execution_succeeded",
+                    "stage_status": "success",
+                    "needs_human": False,
+                },
+                "automation_quality": {
+                    "applicable": True,
+                    "verdict": "execution_unverified",
+                    "score": 65,
+                    "gates": [
+                        {"name": "verification_recorded", "status": "missing"},
+                        {"name": "learning_recorded", "status": "missing"},
+                    ],
+                    "blockers": ["verification_recorded", "learning_recorded"],
+                },
+            },
+        ],
+    )
+
+    assert summary["schema_version"] == "automation_quality_summary_v1"
+    assert summary["incident_total"] == 2
+    assert summary["evaluated_total"] == 2
+    assert summary["verified_auto_repair_total"] == 1
+    assert summary["score_buckets"] == {"green": 1, "yellow": 1, "red": 0}
+    assert summary["production_claim"]["can_claim_full_auto_repair"] is False
+    assert summary["production_claim"]["reason"] == "some_incidents_are_not_auto_repaired_verified"
+    assert {row["verdict"]: row["total"] for row in summary["by_verdict"]} == {
+        "auto_repaired_verified": 1,
+        "execution_unverified": 1,
+    }
+    assert {row["gate"]: row["total"] for row in summary["gate_failures"]} == {
+        "learning_recorded": 1,
+        "verification_recorded": 1,
+    }
+    assert summary["examples"][1]["incident_id"] == "INC-GAP"
+    assert summary["examples"][1]["score_bucket"] == "yellow"
+
+
 def test_reconciliation_marks_consistent_resolved_execution() -> None:
    reconciliation = build_incident_reconciliation(
        incident={"incident_id": "INC-2", "status": "RESOLVED"},
--- a/apps/api/tests/test_platform_router_order.py
+++ b/apps/api/tests/test_platform_router_order.py
@@ -35,3 +35,15 @@ def test_truth_chain_route_is_registered() -> None:
    ]

    assert "/truth-chain/{source_id}" in paths
+
+
+def test_truth_chain_quality_summary_route_is_registered_before_dynamic_source_id() -> None:
+    paths = [
+        route.path
+        for route in router.routes
+        if "GET" in getattr(route, "methods", set())
+    ]
+
+    assert "/truth-chain/quality/summary" in paths
+    assert "/truth-chain/{source_id}" in paths
+    assert paths.index("/truth-chain/quality/summary") < paths.index("/truth-chain/{source_id}")
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -7410,3 +7410,35 @@ _format_automation_quality_lines(...) returned 6 lines with verdict + score + ex
 - 目前實況仍顯示多筆 incident 是 `execution_unverified`，不能宣稱完整 AI 自動修復已完成。
 - 下一步應把 `execution_unverified` 的 verification gap 收斂到 post-execution verifier / learning writeback，而不是只在 Telegram 補文案。
 - 目前整體進度更新：約 72%。
+
+### 2026-05-13 — AwoooP truth-chain T12c：全體告警自動化品質總覽（local green）
+
+**目的**：
+
+- Operator 不應逐張 Telegram 卡片猜「是否重複發生」、「是否已進 AI 自動修復」、「卡在哪個流程」。
+- T12c 先提供 read-only 聚合 API，把最近 incident 全部套用 T12b automation quality gate，回傳 verdict 分布、分數區間、缺失 gate、代表案例與 production claim。
+
+**變更**：
+
+- 新增 `GET /api/v1/platform/truth-chain/quality/summary`：
+  - query: `project_id` / `hours` / `limit`
+  - 回傳 `automation_quality_summary_v1`
+  - 顯示 `by_verdict`、`score_buckets`、`gate_failures`、`examples`
+  - `production_claim.can_claim_full_auto_repair` 嚴格要求所有評估 incident 都是 `auto_repaired_verified`
+- 新增純函式 `summarize_automation_quality_records(...)`，讓品質總覽可單元測試。
+- 新增 route-order 測試，確保 `/truth-chain/quality/summary` 不會被 `/truth-chain/{source_id}` 誤吃。
+
+**local verification**：
+
+```text
+DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q
+19 passed
+
+ruff check --select F821 src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
+All checks passed
+
+python3 -m py_compile src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
+OK
+```
+
+**目前整體進度**：約 73%。