feat(awooop): summarize automation quality
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Successful in 3m43s
CD Pipeline / post-deploy-checks (push) Successful in 1m29s

This commit is contained in:
Your Name
2026-05-13 15:56:42 +08:00
parent c00e911b28
commit ae7c7cbd23
5 changed files with 351 additions and 2 deletions

View File

@@ -10,11 +10,38 @@ from src.core.awooop_operator_auth import (
AwoooPOperatorPrincipal, AwoooPOperatorPrincipal,
verify_awooop_operator, verify_awooop_operator,
) )
from src.services.awooop_truth_chain_service import fetch_truth_chain from src.services.awooop_truth_chain_service import (
fetch_automation_quality_summary,
fetch_truth_chain,
)
router = APIRouter() router = APIRouter()
@router.get(
"/truth-chain/quality/summary",
summary="查詢 AI 自動化品質總覽",
description=(
"T12c read-only endpoint. 聚合最近 incident 的 automation quality gate"
"讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
),
)
async def get_automation_quality_summary(
project_id: str = Query("awoooi", description="租戶 ID"),
hours: int = Query(24, ge=1, le=168, description="回看小時數"),
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
) -> dict[str, Any]:
# The operator dependency gates this summary because it aggregates incident
# lifecycle state across alert, execution, and notification tables.
_ = operator
return await fetch_automation_quality_summary(
project_id=project_id,
hours=hours,
limit=limit,
)
@router.get( @router.get(
"/truth-chain/{source_id}", "/truth-chain/{source_id}",
summary="查詢 Telegram / Incident / Drift 真相鏈", summary="查詢 Telegram / Incident / Drift 真相鏈",

View File

@@ -8,7 +8,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
from __future__ import annotations from __future__ import annotations
import json import json
from datetime import date, datetime from datetime import UTC, date, datetime, timedelta
from decimal import Decimal from decimal import Decimal
from typing import Any from typing import Any
from uuid import UUID from uuid import UUID
@@ -489,6 +489,130 @@ def build_automation_quality(
} }
def _automation_quality_score_bucket(score: int) -> str:
if score >= 85:
return "green"
if score >= 60:
return "yellow"
return "red"
def summarize_automation_quality_records(
*,
project_id: str,
window_hours: int,
records: list[dict[str, Any]],
limit: int,
) -> dict[str, Any]:
"""Aggregate per-incident automation quality into an operator summary."""
verdicts: dict[str, dict[str, Any]] = {}
gate_failures: dict[str, dict[str, Any]] = {}
score_buckets: dict[str, int] = {"green": 0, "yellow": 0, "red": 0}
examples: list[dict[str, Any]] = []
total_score = 0
evaluated_total = 0
verified_total = 0
for record in records:
incident = record.get("incident") if isinstance(record.get("incident"), dict) else {}
truth_status = record.get("truth_status") if isinstance(record.get("truth_status"), dict) else {}
quality = record.get("automation_quality") if isinstance(record.get("automation_quality"), dict) else {}
if quality.get("applicable") is not True:
continue
evaluated_total += 1
score = int(quality.get("score") or 0)
total_score += score
bucket = _automation_quality_score_bucket(score)
score_buckets[bucket] += 1
verdict = str(quality.get("verdict") or "unknown")
if verdict == "auto_repaired_verified":
verified_total += 1
verdict_row = verdicts.setdefault(
verdict,
{
"verdict": verdict,
"total": 0,
"score_sum": 0,
"min_score": score,
"max_score": score,
"needs_human": False,
},
)
verdict_row["total"] += 1
verdict_row["score_sum"] += score
verdict_row["min_score"] = min(int(verdict_row["min_score"]), score)
verdict_row["max_score"] = max(int(verdict_row["max_score"]), score)
verdict_row["needs_human"] = bool(
verdict_row["needs_human"] or truth_status.get("needs_human")
)
for gate in quality.get("gates") or []:
if not isinstance(gate, dict):
continue
gate_status = str(gate.get("status") or "")
if gate_status not in {"failed", "missing"}:
continue
gate_name = str(gate.get("name") or "unknown")
gate_row = gate_failures.setdefault(
gate_name,
{"gate": gate_name, "total": 0, "statuses": {}},
)
gate_row["total"] += 1
gate_row["statuses"][gate_status] = int(gate_row["statuses"].get(gate_status, 0)) + 1
examples.append({
"incident_id": incident.get("incident_id"),
"alertname": incident.get("alertname"),
"severity": incident.get("severity"),
"status": incident.get("status"),
"created_at": incident.get("created_at"),
"truth_stage": truth_status.get("current_stage"),
"truth_stage_status": truth_status.get("stage_status"),
"needs_human": bool(truth_status.get("needs_human")),
"verdict": verdict,
"score": score,
"score_bucket": bucket,
"blockers": list(quality.get("blockers") or [])[:8],
})
by_verdict = []
for row in verdicts.values():
total = int(row["total"])
row["avg_score"] = round(float(row.pop("score_sum")) / total, 1) if total else 0.0
by_verdict.append(row)
by_verdict.sort(key=lambda row: (-int(row["total"]), str(row["verdict"])))
failing_gates = sorted(
gate_failures.values(),
key=lambda row: (-int(row["total"]), str(row["gate"])),
)
return {
"schema_version": "automation_quality_summary_v1",
"project_id": project_id,
"window_hours": window_hours,
"limit": limit,
"incident_total": len(records),
"evaluated_total": evaluated_total,
"verified_auto_repair_total": verified_total,
"average_score": round(total_score / evaluated_total, 1) if evaluated_total else 0.0,
"score_buckets": score_buckets,
"by_verdict": by_verdict,
"gate_failures": failing_gates,
"examples": examples[:25],
"production_claim": {
"can_claim_full_auto_repair": evaluated_total > 0 and verified_total == evaluated_total,
"reason": (
"all_evaluated_incidents_auto_repaired_verified"
if evaluated_total > 0 and verified_total == evaluated_total
else "some_incidents_are_not_auto_repaired_verified"
),
},
}
def _summarize_mcp(rows: list[dict[str, Any]]) -> dict[str, Any]: def _summarize_mcp(rows: list[dict[str, Any]]) -> dict[str, Any]:
by_tool: dict[str, dict[str, Any]] = {} by_tool: dict[str, dict[str, Any]] = {}
success_count = 0 success_count = 0
@@ -1108,3 +1232,72 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
current_stage=truth_status["current_stage"], current_stage=truth_status["current_stage"],
) )
return result return result
async def fetch_automation_quality_summary(
*,
project_id: str = "awoooi",
hours: int = 24,
limit: int = 200,
) -> dict[str, Any]:
"""Return a recent incident-level quality summary for the automation flywheel."""
bounded_hours = max(1, min(int(hours), 168))
bounded_limit = max(1, min(int(limit), 500))
cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours)
async with get_db_context(project_id) as db:
incidents = await _fetch_all(
db,
"""
SELECT
incident_id,
project_id,
status::text AS status,
severity::text AS severity,
alertname,
alert_category,
notification_type,
created_at,
updated_at,
resolved_at,
verification_result
FROM incidents
WHERE (project_id = :project_id OR project_id IS NULL)
AND created_at >= :cutoff
ORDER BY created_at DESC
LIMIT :limit
""",
{
"project_id": project_id,
"cutoff": cutoff,
"limit": bounded_limit,
},
)
records: list[dict[str, Any]] = []
for incident in incidents:
incident_id = str(incident.get("incident_id") or "")
if not incident_id:
continue
truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id)
records.append({
"incident": truth_chain.get("incident") or incident,
"truth_status": truth_chain.get("truth_status") or {},
"automation_quality": truth_chain.get("automation_quality") or {},
})
summary = summarize_automation_quality_records(
project_id=project_id,
window_hours=bounded_hours,
records=records,
limit=bounded_limit,
)
logger.info(
"awooop_automation_quality_summary_fetched",
project_id=project_id,
window_hours=bounded_hours,
incident_total=summary["incident_total"],
evaluated_total=summary["evaluated_total"],
can_claim_full_auto_repair=summary["production_claim"]["can_claim_full_auto_repair"],
)
return summary

View File

@@ -8,11 +8,13 @@ from src.services.awooop_ansible_audit_service import (
build_ansible_truth, build_ansible_truth,
) )
from src.services.awooop_truth_chain_service import ( from src.services.awooop_truth_chain_service import (
_automation_quality_score_bucket,
build_automation_quality, build_automation_quality,
build_incident_reconciliation, build_incident_reconciliation,
_clean_row, _clean_row,
_summarize_gateway_mcp, _summarize_gateway_mcp,
_truth_status, _truth_status,
summarize_automation_quality_records,
) )
from src.services.drift_repeat_state import ( from src.services.drift_repeat_state import (
build_drift_fingerprint, build_drift_fingerprint,
@@ -280,6 +282,89 @@ def test_automation_quality_marks_verified_auto_repair() -> None:
assert quality["blockers"] == [] assert quality["blockers"] == []
def test_automation_quality_score_buckets_are_stable() -> None:
assert _automation_quality_score_bucket(100) == "green"
assert _automation_quality_score_bucket(85) == "green"
assert _automation_quality_score_bucket(84) == "yellow"
assert _automation_quality_score_bucket(60) == "yellow"
assert _automation_quality_score_bucket(59) == "red"
def test_automation_quality_summary_denies_full_claim_when_unverified() -> None:
summary = summarize_automation_quality_records(
project_id="awoooi",
window_hours=24,
limit=200,
records=[
{
"incident": {
"incident_id": "INC-OK",
"alertname": "container recovered",
"severity": "P4",
"status": "RESOLVED",
"created_at": "2026-05-13T01:00:00+00:00",
},
"truth_status": {
"current_stage": "execution_succeeded",
"stage_status": "success",
"needs_human": False,
},
"automation_quality": {
"applicable": True,
"verdict": "auto_repaired_verified",
"score": 100,
"gates": [
{"name": "verification_recorded", "status": "passed"},
],
"blockers": [],
},
},
{
"incident": {
"incident_id": "INC-GAP",
"alertname": "low risk action",
"severity": "P4",
"status": "INVESTIGATING",
"created_at": "2026-05-13T02:00:00+00:00",
},
"truth_status": {
"current_stage": "execution_succeeded",
"stage_status": "success",
"needs_human": False,
},
"automation_quality": {
"applicable": True,
"verdict": "execution_unverified",
"score": 65,
"gates": [
{"name": "verification_recorded", "status": "missing"},
{"name": "learning_recorded", "status": "missing"},
],
"blockers": ["verification_recorded", "learning_recorded"],
},
},
],
)
assert summary["schema_version"] == "automation_quality_summary_v1"
assert summary["incident_total"] == 2
assert summary["evaluated_total"] == 2
assert summary["verified_auto_repair_total"] == 1
assert summary["score_buckets"] == {"green": 1, "yellow": 1, "red": 0}
assert summary["production_claim"]["can_claim_full_auto_repair"] is False
assert summary["production_claim"]["reason"] == "some_incidents_are_not_auto_repaired_verified"
assert {row["verdict"]: row["total"] for row in summary["by_verdict"]} == {
"auto_repaired_verified": 1,
"execution_unverified": 1,
}
assert {row["gate"]: row["total"] for row in summary["gate_failures"]} == {
"learning_recorded": 1,
"verification_recorded": 1,
}
assert summary["examples"][1]["incident_id"] == "INC-GAP"
assert summary["examples"][1]["score_bucket"] == "yellow"
def test_reconciliation_marks_consistent_resolved_execution() -> None: def test_reconciliation_marks_consistent_resolved_execution() -> None:
reconciliation = build_incident_reconciliation( reconciliation = build_incident_reconciliation(
incident={"incident_id": "INC-2", "status": "RESOLVED"}, incident={"incident_id": "INC-2", "status": "RESOLVED"},

View File

@@ -35,3 +35,15 @@ def test_truth_chain_route_is_registered() -> None:
] ]
assert "/truth-chain/{source_id}" in paths assert "/truth-chain/{source_id}" in paths
def test_truth_chain_quality_summary_route_is_registered_before_dynamic_source_id() -> None:
paths = [
route.path
for route in router.routes
if "GET" in getattr(route, "methods", set())
]
assert "/truth-chain/quality/summary" in paths
assert "/truth-chain/{source_id}" in paths
assert paths.index("/truth-chain/quality/summary") < paths.index("/truth-chain/{source_id}")

View File

@@ -7410,3 +7410,35 @@ _format_automation_quality_lines(...) returned 6 lines with verdict + score + ex
- 目前實況仍顯示多筆 incident 是 `execution_unverified`,不能宣稱完整 AI 自動修復已完成。 - 目前實況仍顯示多筆 incident 是 `execution_unverified`,不能宣稱完整 AI 自動修復已完成。
- 下一步應把 `execution_unverified` 的 verification gap 收斂到 post-execution verifier / learning writeback而不是只在 Telegram 補文案。 - 下一步應把 `execution_unverified` 的 verification gap 收斂到 post-execution verifier / learning writeback而不是只在 Telegram 補文案。
- 目前整體進度更新:約 72%。 - 目前整體進度更新:約 72%。
### 2026-05-13 — AwoooP truth-chain T12c全體告警自動化品質總覽local green
**目的**
- Operator 不應逐張 Telegram 卡片猜「是否重複發生」、「是否已進 AI 自動修復」、「卡在哪個流程」。
- T12c 先提供 read-only 聚合 API把最近 incident 全部套用 T12b automation quality gate回傳 verdict 分布、分數區間、缺失 gate、代表案例與 production claim。
**變更**
- 新增 `GET /api/v1/platform/truth-chain/quality/summary`
- query: `project_id` / `hours` / `limit`
- 回傳 `automation_quality_summary_v1`
- 顯示 `by_verdict``score_buckets``gate_failures``examples`
- `production_claim.can_claim_full_auto_repair` 嚴格要求所有評估 incident 都是 `auto_repaired_verified`
- 新增純函式 `summarize_automation_quality_records(...)`,讓品質總覽可單元測試。
- 新增 route-order 測試,確保 `/truth-chain/quality/summary` 不會被 `/truth-chain/{source_id}` 誤吃。
**local verification**
```text
DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q
19 passed
ruff check --select F821 src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
All checks passed
python3 -m py_compile src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
OK
```
**目前整體進度**:約 73%。