feat(awooop): summarize automation quality

2026-05-13 15:56:42 +08:00
parent c00e911b28
commit ae7c7cbd23
5 changed files with 351 additions and 2 deletions
--- a/apps/api/src/api/v1/platform/truth_chain.py
+++ b/apps/api/src/api/v1/platform/truth_chain.py
@@ -10,11 +10,38 @@ from src.core.awooop_operator_auth import (
    AwoooPOperatorPrincipal,
    verify_awooop_operator,
 )
-from src.services.awooop_truth_chain_service import fetch_truth_chain
+from src.services.awooop_truth_chain_service import (
    fetch_automation_quality_summary,
    fetch_truth_chain,
 )
 router = APIRouter()
@router.get(
    "/truth-chain/quality/summary",
    summary="查詢 AI 自動化品質總覽",
    description=(
        "T12c read-only endpoint. 聚合最近 incident 的 automation quality gate，"
        "讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
    ),
 )
 async def get_automation_quality_summary(
    project_id: str = Query("awoooi", description="租戶 ID"),
    hours: int = Query(24, ge=1, le=168, description="回看小時數"),
    limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
    operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
 ) -> dict[str, Any]:
    # The operator dependency gates this summary because it aggregates incident
    # lifecycle state across alert, execution, and notification tables.
    _ = operator
    return await fetch_automation_quality_summary(
        project_id=project_id,
        hours=hours,
        limit=limit,
    )
@router.get(
    "/truth-chain/{source_id}",
    summary="查詢 Telegram / Incident / Drift 真相鏈",
--- a/apps/api/src/services/awooop_truth_chain_service.py
+++ b/apps/api/src/services/awooop_truth_chain_service.py
@@ -8,7 +8,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
 from __future__ import annotations
 import json
-from datetime import date, datetime
+from datetime import UTC, date, datetime, timedelta
 from decimal import Decimal
 from typing import Any
 from uuid import UUID
@@ -489,6 +489,130 @@ def build_automation_quality(
    }
 def _automation_quality_score_bucket(score: int) -> str:
    if score >= 85:
        return "green"
    if score >= 60:
        return "yellow"
    return "red"
 def summarize_automation_quality_records(
    *,
    project_id: str,
    window_hours: int,
    records: list[dict[str, Any]],
    limit: int,
 ) -> dict[str, Any]:
    """Aggregate per-incident automation quality into an operator summary."""
    verdicts: dict[str, dict[str, Any]] = {}
    gate_failures: dict[str, dict[str, Any]] = {}
    score_buckets: dict[str, int] = {"green": 0, "yellow": 0, "red": 0}
    examples: list[dict[str, Any]] = []
    total_score = 0
    evaluated_total = 0
    verified_total = 0
    for record in records:
        incident = record.get("incident") if isinstance(record.get("incident"), dict) else {}
        truth_status = record.get("truth_status") if isinstance(record.get("truth_status"), dict) else {}
        quality = record.get("automation_quality") if isinstance(record.get("automation_quality"), dict) else {}
        if quality.get("applicable") is not True:
            continue
        evaluated_total += 1
        score = int(quality.get("score") or 0)
        total_score += score
        bucket = _automation_quality_score_bucket(score)
        score_buckets[bucket] += 1
        verdict = str(quality.get("verdict") or "unknown")
        if verdict == "auto_repaired_verified":
            verified_total += 1
        verdict_row = verdicts.setdefault(
            verdict,
            {
                "verdict": verdict,
                "total": 0,
                "score_sum": 0,
                "min_score": score,
                "max_score": score,
                "needs_human": False,
            },
        )
        verdict_row["total"] += 1
        verdict_row["score_sum"] += score
        verdict_row["min_score"] = min(int(verdict_row["min_score"]), score)
        verdict_row["max_score"] = max(int(verdict_row["max_score"]), score)
        verdict_row["needs_human"] = bool(
            verdict_row["needs_human"] or truth_status.get("needs_human")
        )
        for gate in quality.get("gates") or []:
            if not isinstance(gate, dict):
                continue
            gate_status = str(gate.get("status") or "")
            if gate_status not in {"failed", "missing"}:
                continue
            gate_name = str(gate.get("name") or "unknown")
            gate_row = gate_failures.setdefault(
                gate_name,
                {"gate": gate_name, "total": 0, "statuses": {}},
            )
            gate_row["total"] += 1
            gate_row["statuses"][gate_status] = int(gate_row["statuses"].get(gate_status, 0)) + 1
        examples.append({
            "incident_id": incident.get("incident_id"),
            "alertname": incident.get("alertname"),
            "severity": incident.get("severity"),
            "status": incident.get("status"),
            "created_at": incident.get("created_at"),
            "truth_stage": truth_status.get("current_stage"),
            "truth_stage_status": truth_status.get("stage_status"),
            "needs_human": bool(truth_status.get("needs_human")),
            "verdict": verdict,
            "score": score,
            "score_bucket": bucket,
            "blockers": list(quality.get("blockers") or [])[:8],
        })
    by_verdict = []
    for row in verdicts.values():
        total = int(row["total"])
        row["avg_score"] = round(float(row.pop("score_sum")) / total, 1) if total else 0.0
        by_verdict.append(row)
    by_verdict.sort(key=lambda row: (-int(row["total"]), str(row["verdict"])))
    failing_gates = sorted(
        gate_failures.values(),
        key=lambda row: (-int(row["total"]), str(row["gate"])),
    )
    return {
        "schema_version": "automation_quality_summary_v1",
        "project_id": project_id,
        "window_hours": window_hours,
        "limit": limit,
        "incident_total": len(records),
        "evaluated_total": evaluated_total,
        "verified_auto_repair_total": verified_total,
        "average_score": round(total_score / evaluated_total, 1) if evaluated_total else 0.0,
        "score_buckets": score_buckets,
        "by_verdict": by_verdict,
        "gate_failures": failing_gates,
        "examples": examples[:25],
        "production_claim": {
            "can_claim_full_auto_repair": evaluated_total > 0 and verified_total == evaluated_total,
            "reason": (
                "all_evaluated_incidents_auto_repaired_verified"
                if evaluated_total > 0 and verified_total == evaluated_total
                else "some_incidents_are_not_auto_repaired_verified"
            ),
        },
    }
 def _summarize_mcp(rows: list[dict[str, Any]]) -> dict[str, Any]:
    by_tool: dict[str, dict[str, Any]] = {}
    success_count = 0
@@ -1108,3 +1232,72 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
        current_stage=truth_status["current_stage"],
    )
    return result
 async def fetch_automation_quality_summary(
    *,
    project_id: str = "awoooi",
    hours: int = 24,
    limit: int = 200,
 ) -> dict[str, Any]:
    """Return a recent incident-level quality summary for the automation flywheel."""
    bounded_hours = max(1, min(int(hours), 168))
    bounded_limit = max(1, min(int(limit), 500))
    cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours)
    async with get_db_context(project_id) as db:
        incidents = await _fetch_all(
            db,
            """
            SELECT
                incident_id,
                project_id,
                status::text AS status,
                severity::text AS severity,
                alertname,
                alert_category,
                notification_type,
                created_at,
                updated_at,
                resolved_at,
                verification_result
            FROM incidents
            WHERE (project_id = :project_id OR project_id IS NULL)
              AND created_at >= :cutoff
            ORDER BY created_at DESC
            LIMIT :limit
            """,
            {
                "project_id": project_id,
                "cutoff": cutoff,
                "limit": bounded_limit,
            },
        )
    records: list[dict[str, Any]] = []
    for incident in incidents:
        incident_id = str(incident.get("incident_id") or "")
        if not incident_id:
            continue
        truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id)
        records.append({
            "incident": truth_chain.get("incident") or incident,
            "truth_status": truth_chain.get("truth_status") or {},
            "automation_quality": truth_chain.get("automation_quality") or {},
        })
    summary = summarize_automation_quality_records(
        project_id=project_id,
        window_hours=bounded_hours,
        records=records,
        limit=bounded_limit,
    )
    logger.info(
        "awooop_automation_quality_summary_fetched",
        project_id=project_id,
        window_hours=bounded_hours,
        incident_total=summary["incident_total"],
        evaluated_total=summary["evaluated_total"],
        can_claim_full_auto_repair=summary["production_claim"]["can_claim_full_auto_repair"],
    )
    return summary
--- a/apps/api/tests/test_awooop_truth_chain_service.py
+++ b/apps/api/tests/test_awooop_truth_chain_service.py
@@ -8,11 +8,13 @@ from src.services.awooop_ansible_audit_service import (
    build_ansible_truth,
 )
 from src.services.awooop_truth_chain_service import (
    _automation_quality_score_bucket,
    build_automation_quality,
    build_incident_reconciliation,
    _clean_row,
    _summarize_gateway_mcp,
    _truth_status,
    summarize_automation_quality_records,
 )
 from src.services.drift_repeat_state import (
    build_drift_fingerprint,
@@ -280,6 +282,89 @@ def test_automation_quality_marks_verified_auto_repair() -> None:
    assert quality["blockers"] == []
 def test_automation_quality_score_buckets_are_stable() -> None:
    assert _automation_quality_score_bucket(100) == "green"
    assert _automation_quality_score_bucket(85) == "green"
    assert _automation_quality_score_bucket(84) == "yellow"
    assert _automation_quality_score_bucket(60) == "yellow"
    assert _automation_quality_score_bucket(59) == "red"
 def test_automation_quality_summary_denies_full_claim_when_unverified() -> None:
    summary = summarize_automation_quality_records(
        project_id="awoooi",
        window_hours=24,
        limit=200,
        records=[
            {
                "incident": {
                    "incident_id": "INC-OK",
                    "alertname": "container recovered",
                    "severity": "P4",
                    "status": "RESOLVED",
                    "created_at": "2026-05-13T01:00:00+00:00",
                },
                "truth_status": {
                    "current_stage": "execution_succeeded",
                    "stage_status": "success",
                    "needs_human": False,
                },
                "automation_quality": {
                    "applicable": True,
                    "verdict": "auto_repaired_verified",
                    "score": 100,
                    "gates": [
                        {"name": "verification_recorded", "status": "passed"},
                    ],
                    "blockers": [],
                },
            },
            {
                "incident": {
                    "incident_id": "INC-GAP",
                    "alertname": "low risk action",
                    "severity": "P4",
                    "status": "INVESTIGATING",
                    "created_at": "2026-05-13T02:00:00+00:00",
                },
                "truth_status": {
                    "current_stage": "execution_succeeded",
                    "stage_status": "success",
                    "needs_human": False,
                },
                "automation_quality": {
                    "applicable": True,
                    "verdict": "execution_unverified",
                    "score": 65,
                    "gates": [
                        {"name": "verification_recorded", "status": "missing"},
                        {"name": "learning_recorded", "status": "missing"},
                    ],
                    "blockers": ["verification_recorded", "learning_recorded"],
                },
            },
        ],
    )
    assert summary["schema_version"] == "automation_quality_summary_v1"
    assert summary["incident_total"] == 2
    assert summary["evaluated_total"] == 2
    assert summary["verified_auto_repair_total"] == 1
    assert summary["score_buckets"] == {"green": 1, "yellow": 1, "red": 0}
    assert summary["production_claim"]["can_claim_full_auto_repair"] is False
    assert summary["production_claim"]["reason"] == "some_incidents_are_not_auto_repaired_verified"
    assert {row["verdict"]: row["total"] for row in summary["by_verdict"]} == {
        "auto_repaired_verified": 1,
        "execution_unverified": 1,
    }
    assert {row["gate"]: row["total"] for row in summary["gate_failures"]} == {
        "learning_recorded": 1,
        "verification_recorded": 1,
    }
    assert summary["examples"][1]["incident_id"] == "INC-GAP"
    assert summary["examples"][1]["score_bucket"] == "yellow"
 def test_reconciliation_marks_consistent_resolved_execution() -> None:
    reconciliation = build_incident_reconciliation(
        incident={"incident_id": "INC-2", "status": "RESOLVED"},
--- a/apps/api/tests/test_platform_router_order.py
+++ b/apps/api/tests/test_platform_router_order.py
@@ -35,3 +35,15 @@ def test_truth_chain_route_is_registered() -> None:
    ]
    assert "/truth-chain/{source_id}" in paths
 def test_truth_chain_quality_summary_route_is_registered_before_dynamic_source_id() -> None:
    paths = [
        route.path
        for route in router.routes
        if "GET" in getattr(route, "methods", set())
    ]
    assert "/truth-chain/quality/summary" in paths
    assert "/truth-chain/{source_id}" in paths
    assert paths.index("/truth-chain/quality/summary") < paths.index("/truth-chain/{source_id}")
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -7410,3 +7410,35 @@ _format_automation_quality_lines(...) returned 6 lines with verdict + score + ex
 - 目前實況仍顯示多筆 incident 是 `execution_unverified`，不能宣稱完整 AI 自動修復已完成。
 - 下一步應把 `execution_unverified` 的 verification gap 收斂到 post-execution verifier / learning writeback，而不是只在 Telegram 補文案。
 - 目前整體進度更新：約 72%。
 ### 2026-05-13 — AwoooP truth-chain T12c：全體告警自動化品質總覽（local green）
 **目的**：
 - Operator 不應逐張 Telegram 卡片猜「是否重複發生」、「是否已進 AI 自動修復」、「卡在哪個流程」。
 - T12c 先提供 read-only 聚合 API，把最近 incident 全部套用 T12b automation quality gate，回傳 verdict 分布、分數區間、缺失 gate、代表案例與 production claim。
 **變更**：
 - 新增 `GET /api/v1/platform/truth-chain/quality/summary`：
  - query: `project_id` / `hours` / `limit`
  - 回傳 `automation_quality_summary_v1`
  - 顯示 `by_verdict`、`score_buckets`、`gate_failures`、`examples`
  - `production_claim.can_claim_full_auto_repair` 嚴格要求所有評估 incident 都是 `auto_repaired_verified`
 - 新增純函式 `summarize_automation_quality_records(...)`，讓品質總覽可單元測試。
 - 新增 route-order 測試，確保 `/truth-chain/quality/summary` 不會被 `/truth-chain/{source_id}` 誤吃。
 **local verification**：
 ```text
 DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q
 19 passed
 ruff check --select F821 src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
 All checks passed
 python3 -m py_compile src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
 OK
 ```
 **目前整體進度**：約 73%。