feat(awooop): summarize automation quality
This commit is contained in:
@@ -10,11 +10,38 @@ from src.core.awooop_operator_auth import (
|
||||
AwoooPOperatorPrincipal,
|
||||
verify_awooop_operator,
|
||||
)
|
||||
from src.services.awooop_truth_chain_service import fetch_truth_chain
|
||||
from src.services.awooop_truth_chain_service import (
|
||||
fetch_automation_quality_summary,
|
||||
fetch_truth_chain,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/truth-chain/quality/summary",
|
||||
summary="查詢 AI 自動化品質總覽",
|
||||
description=(
|
||||
"T12c read-only endpoint. 聚合最近 incident 的 automation quality gate,"
|
||||
"讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
|
||||
),
|
||||
)
|
||||
async def get_automation_quality_summary(
|
||||
project_id: str = Query("awoooi", description="租戶 ID"),
|
||||
hours: int = Query(24, ge=1, le=168, description="回看小時數"),
|
||||
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
|
||||
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
|
||||
) -> dict[str, Any]:
|
||||
# The operator dependency gates this summary because it aggregates incident
|
||||
# lifecycle state across alert, execution, and notification tables.
|
||||
_ = operator
|
||||
return await fetch_automation_quality_summary(
|
||||
project_id=project_id,
|
||||
hours=hours,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/truth-chain/{source_id}",
|
||||
summary="查詢 Telegram / Incident / Drift 真相鏈",
|
||||
|
||||
@@ -8,7 +8,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date, datetime
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
@@ -489,6 +489,130 @@ def build_automation_quality(
|
||||
}
|
||||
|
||||
|
||||
def _automation_quality_score_bucket(score: int) -> str:
|
||||
if score >= 85:
|
||||
return "green"
|
||||
if score >= 60:
|
||||
return "yellow"
|
||||
return "red"
|
||||
|
||||
|
||||
def summarize_automation_quality_records(
|
||||
*,
|
||||
project_id: str,
|
||||
window_hours: int,
|
||||
records: list[dict[str, Any]],
|
||||
limit: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Aggregate per-incident automation quality into an operator summary."""
|
||||
verdicts: dict[str, dict[str, Any]] = {}
|
||||
gate_failures: dict[str, dict[str, Any]] = {}
|
||||
score_buckets: dict[str, int] = {"green": 0, "yellow": 0, "red": 0}
|
||||
examples: list[dict[str, Any]] = []
|
||||
total_score = 0
|
||||
evaluated_total = 0
|
||||
verified_total = 0
|
||||
|
||||
for record in records:
|
||||
incident = record.get("incident") if isinstance(record.get("incident"), dict) else {}
|
||||
truth_status = record.get("truth_status") if isinstance(record.get("truth_status"), dict) else {}
|
||||
quality = record.get("automation_quality") if isinstance(record.get("automation_quality"), dict) else {}
|
||||
if quality.get("applicable") is not True:
|
||||
continue
|
||||
|
||||
evaluated_total += 1
|
||||
score = int(quality.get("score") or 0)
|
||||
total_score += score
|
||||
bucket = _automation_quality_score_bucket(score)
|
||||
score_buckets[bucket] += 1
|
||||
|
||||
verdict = str(quality.get("verdict") or "unknown")
|
||||
if verdict == "auto_repaired_verified":
|
||||
verified_total += 1
|
||||
verdict_row = verdicts.setdefault(
|
||||
verdict,
|
||||
{
|
||||
"verdict": verdict,
|
||||
"total": 0,
|
||||
"score_sum": 0,
|
||||
"min_score": score,
|
||||
"max_score": score,
|
||||
"needs_human": False,
|
||||
},
|
||||
)
|
||||
verdict_row["total"] += 1
|
||||
verdict_row["score_sum"] += score
|
||||
verdict_row["min_score"] = min(int(verdict_row["min_score"]), score)
|
||||
verdict_row["max_score"] = max(int(verdict_row["max_score"]), score)
|
||||
verdict_row["needs_human"] = bool(
|
||||
verdict_row["needs_human"] or truth_status.get("needs_human")
|
||||
)
|
||||
|
||||
for gate in quality.get("gates") or []:
|
||||
if not isinstance(gate, dict):
|
||||
continue
|
||||
gate_status = str(gate.get("status") or "")
|
||||
if gate_status not in {"failed", "missing"}:
|
||||
continue
|
||||
gate_name = str(gate.get("name") or "unknown")
|
||||
gate_row = gate_failures.setdefault(
|
||||
gate_name,
|
||||
{"gate": gate_name, "total": 0, "statuses": {}},
|
||||
)
|
||||
gate_row["total"] += 1
|
||||
gate_row["statuses"][gate_status] = int(gate_row["statuses"].get(gate_status, 0)) + 1
|
||||
|
||||
examples.append({
|
||||
"incident_id": incident.get("incident_id"),
|
||||
"alertname": incident.get("alertname"),
|
||||
"severity": incident.get("severity"),
|
||||
"status": incident.get("status"),
|
||||
"created_at": incident.get("created_at"),
|
||||
"truth_stage": truth_status.get("current_stage"),
|
||||
"truth_stage_status": truth_status.get("stage_status"),
|
||||
"needs_human": bool(truth_status.get("needs_human")),
|
||||
"verdict": verdict,
|
||||
"score": score,
|
||||
"score_bucket": bucket,
|
||||
"blockers": list(quality.get("blockers") or [])[:8],
|
||||
})
|
||||
|
||||
by_verdict = []
|
||||
for row in verdicts.values():
|
||||
total = int(row["total"])
|
||||
row["avg_score"] = round(float(row.pop("score_sum")) / total, 1) if total else 0.0
|
||||
by_verdict.append(row)
|
||||
by_verdict.sort(key=lambda row: (-int(row["total"]), str(row["verdict"])))
|
||||
|
||||
failing_gates = sorted(
|
||||
gate_failures.values(),
|
||||
key=lambda row: (-int(row["total"]), str(row["gate"])),
|
||||
)
|
||||
|
||||
return {
|
||||
"schema_version": "automation_quality_summary_v1",
|
||||
"project_id": project_id,
|
||||
"window_hours": window_hours,
|
||||
"limit": limit,
|
||||
"incident_total": len(records),
|
||||
"evaluated_total": evaluated_total,
|
||||
"verified_auto_repair_total": verified_total,
|
||||
"average_score": round(total_score / evaluated_total, 1) if evaluated_total else 0.0,
|
||||
"score_buckets": score_buckets,
|
||||
"by_verdict": by_verdict,
|
||||
"gate_failures": failing_gates,
|
||||
"examples": examples[:25],
|
||||
"production_claim": {
|
||||
"can_claim_full_auto_repair": evaluated_total > 0 and verified_total == evaluated_total,
|
||||
"reason": (
|
||||
"all_evaluated_incidents_auto_repaired_verified"
|
||||
if evaluated_total > 0 and verified_total == evaluated_total
|
||||
else "some_incidents_are_not_auto_repaired_verified"
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _summarize_mcp(rows: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
by_tool: dict[str, dict[str, Any]] = {}
|
||||
success_count = 0
|
||||
@@ -1108,3 +1232,72 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
current_stage=truth_status["current_stage"],
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def fetch_automation_quality_summary(
|
||||
*,
|
||||
project_id: str = "awoooi",
|
||||
hours: int = 24,
|
||||
limit: int = 200,
|
||||
) -> dict[str, Any]:
|
||||
"""Return a recent incident-level quality summary for the automation flywheel."""
|
||||
bounded_hours = max(1, min(int(hours), 168))
|
||||
bounded_limit = max(1, min(int(limit), 500))
|
||||
cutoff = datetime.now(UTC) - timedelta(hours=bounded_hours)
|
||||
|
||||
async with get_db_context(project_id) as db:
|
||||
incidents = await _fetch_all(
|
||||
db,
|
||||
"""
|
||||
SELECT
|
||||
incident_id,
|
||||
project_id,
|
||||
status::text AS status,
|
||||
severity::text AS severity,
|
||||
alertname,
|
||||
alert_category,
|
||||
notification_type,
|
||||
created_at,
|
||||
updated_at,
|
||||
resolved_at,
|
||||
verification_result
|
||||
FROM incidents
|
||||
WHERE (project_id = :project_id OR project_id IS NULL)
|
||||
AND created_at >= :cutoff
|
||||
ORDER BY created_at DESC
|
||||
LIMIT :limit
|
||||
""",
|
||||
{
|
||||
"project_id": project_id,
|
||||
"cutoff": cutoff,
|
||||
"limit": bounded_limit,
|
||||
},
|
||||
)
|
||||
|
||||
records: list[dict[str, Any]] = []
|
||||
for incident in incidents:
|
||||
incident_id = str(incident.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
continue
|
||||
truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id)
|
||||
records.append({
|
||||
"incident": truth_chain.get("incident") or incident,
|
||||
"truth_status": truth_chain.get("truth_status") or {},
|
||||
"automation_quality": truth_chain.get("automation_quality") or {},
|
||||
})
|
||||
|
||||
summary = summarize_automation_quality_records(
|
||||
project_id=project_id,
|
||||
window_hours=bounded_hours,
|
||||
records=records,
|
||||
limit=bounded_limit,
|
||||
)
|
||||
logger.info(
|
||||
"awooop_automation_quality_summary_fetched",
|
||||
project_id=project_id,
|
||||
window_hours=bounded_hours,
|
||||
incident_total=summary["incident_total"],
|
||||
evaluated_total=summary["evaluated_total"],
|
||||
can_claim_full_auto_repair=summary["production_claim"]["can_claim_full_auto_repair"],
|
||||
)
|
||||
return summary
|
||||
|
||||
@@ -8,11 +8,13 @@ from src.services.awooop_ansible_audit_service import (
|
||||
build_ansible_truth,
|
||||
)
|
||||
from src.services.awooop_truth_chain_service import (
|
||||
_automation_quality_score_bucket,
|
||||
build_automation_quality,
|
||||
build_incident_reconciliation,
|
||||
_clean_row,
|
||||
_summarize_gateway_mcp,
|
||||
_truth_status,
|
||||
summarize_automation_quality_records,
|
||||
)
|
||||
from src.services.drift_repeat_state import (
|
||||
build_drift_fingerprint,
|
||||
@@ -280,6 +282,89 @@ def test_automation_quality_marks_verified_auto_repair() -> None:
|
||||
assert quality["blockers"] == []
|
||||
|
||||
|
||||
def test_automation_quality_score_buckets_are_stable() -> None:
|
||||
assert _automation_quality_score_bucket(100) == "green"
|
||||
assert _automation_quality_score_bucket(85) == "green"
|
||||
assert _automation_quality_score_bucket(84) == "yellow"
|
||||
assert _automation_quality_score_bucket(60) == "yellow"
|
||||
assert _automation_quality_score_bucket(59) == "red"
|
||||
|
||||
|
||||
def test_automation_quality_summary_denies_full_claim_when_unverified() -> None:
|
||||
summary = summarize_automation_quality_records(
|
||||
project_id="awoooi",
|
||||
window_hours=24,
|
||||
limit=200,
|
||||
records=[
|
||||
{
|
||||
"incident": {
|
||||
"incident_id": "INC-OK",
|
||||
"alertname": "container recovered",
|
||||
"severity": "P4",
|
||||
"status": "RESOLVED",
|
||||
"created_at": "2026-05-13T01:00:00+00:00",
|
||||
},
|
||||
"truth_status": {
|
||||
"current_stage": "execution_succeeded",
|
||||
"stage_status": "success",
|
||||
"needs_human": False,
|
||||
},
|
||||
"automation_quality": {
|
||||
"applicable": True,
|
||||
"verdict": "auto_repaired_verified",
|
||||
"score": 100,
|
||||
"gates": [
|
||||
{"name": "verification_recorded", "status": "passed"},
|
||||
],
|
||||
"blockers": [],
|
||||
},
|
||||
},
|
||||
{
|
||||
"incident": {
|
||||
"incident_id": "INC-GAP",
|
||||
"alertname": "low risk action",
|
||||
"severity": "P4",
|
||||
"status": "INVESTIGATING",
|
||||
"created_at": "2026-05-13T02:00:00+00:00",
|
||||
},
|
||||
"truth_status": {
|
||||
"current_stage": "execution_succeeded",
|
||||
"stage_status": "success",
|
||||
"needs_human": False,
|
||||
},
|
||||
"automation_quality": {
|
||||
"applicable": True,
|
||||
"verdict": "execution_unverified",
|
||||
"score": 65,
|
||||
"gates": [
|
||||
{"name": "verification_recorded", "status": "missing"},
|
||||
{"name": "learning_recorded", "status": "missing"},
|
||||
],
|
||||
"blockers": ["verification_recorded", "learning_recorded"],
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
assert summary["schema_version"] == "automation_quality_summary_v1"
|
||||
assert summary["incident_total"] == 2
|
||||
assert summary["evaluated_total"] == 2
|
||||
assert summary["verified_auto_repair_total"] == 1
|
||||
assert summary["score_buckets"] == {"green": 1, "yellow": 1, "red": 0}
|
||||
assert summary["production_claim"]["can_claim_full_auto_repair"] is False
|
||||
assert summary["production_claim"]["reason"] == "some_incidents_are_not_auto_repaired_verified"
|
||||
assert {row["verdict"]: row["total"] for row in summary["by_verdict"]} == {
|
||||
"auto_repaired_verified": 1,
|
||||
"execution_unverified": 1,
|
||||
}
|
||||
assert {row["gate"]: row["total"] for row in summary["gate_failures"]} == {
|
||||
"learning_recorded": 1,
|
||||
"verification_recorded": 1,
|
||||
}
|
||||
assert summary["examples"][1]["incident_id"] == "INC-GAP"
|
||||
assert summary["examples"][1]["score_bucket"] == "yellow"
|
||||
|
||||
|
||||
def test_reconciliation_marks_consistent_resolved_execution() -> None:
|
||||
reconciliation = build_incident_reconciliation(
|
||||
incident={"incident_id": "INC-2", "status": "RESOLVED"},
|
||||
|
||||
@@ -35,3 +35,15 @@ def test_truth_chain_route_is_registered() -> None:
|
||||
]
|
||||
|
||||
assert "/truth-chain/{source_id}" in paths
|
||||
|
||||
|
||||
def test_truth_chain_quality_summary_route_is_registered_before_dynamic_source_id() -> None:
|
||||
paths = [
|
||||
route.path
|
||||
for route in router.routes
|
||||
if "GET" in getattr(route, "methods", set())
|
||||
]
|
||||
|
||||
assert "/truth-chain/quality/summary" in paths
|
||||
assert "/truth-chain/{source_id}" in paths
|
||||
assert paths.index("/truth-chain/quality/summary") < paths.index("/truth-chain/{source_id}")
|
||||
|
||||
@@ -7410,3 +7410,35 @@ _format_automation_quality_lines(...) returned 6 lines with verdict + score + ex
|
||||
- 目前實況仍顯示多筆 incident 是 `execution_unverified`,不能宣稱完整 AI 自動修復已完成。
|
||||
- 下一步應把 `execution_unverified` 的 verification gap 收斂到 post-execution verifier / learning writeback,而不是只在 Telegram 補文案。
|
||||
- 目前整體進度更新:約 72%。
|
||||
|
||||
### 2026-05-13 — AwoooP truth-chain T12c:全體告警自動化品質總覽(local green)
|
||||
|
||||
**目的**:
|
||||
|
||||
- Operator 不應逐張 Telegram 卡片猜「是否重複發生」、「是否已進 AI 自動修復」、「卡在哪個流程」。
|
||||
- T12c 先提供 read-only 聚合 API,把最近 incident 全部套用 T12b automation quality gate,回傳 verdict 分布、分數區間、缺失 gate、代表案例與 production claim。
|
||||
|
||||
**變更**:
|
||||
|
||||
- 新增 `GET /api/v1/platform/truth-chain/quality/summary`:
|
||||
- query: `project_id` / `hours` / `limit`
|
||||
- 回傳 `automation_quality_summary_v1`
|
||||
- 顯示 `by_verdict`、`score_buckets`、`gate_failures`、`examples`
|
||||
- `production_claim.can_claim_full_auto_repair` 嚴格要求所有評估 incident 都是 `auto_repaired_verified`
|
||||
- 新增純函式 `summarize_automation_quality_records(...)`,讓品質總覽可單元測試。
|
||||
- 新增 route-order 測試,確保 `/truth-chain/quality/summary` 不會被 `/truth-chain/{source_id}` 誤吃。
|
||||
|
||||
**local verification**:
|
||||
|
||||
```text
|
||||
DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q
|
||||
19 passed
|
||||
|
||||
ruff check --select F821 src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
|
||||
All checks passed
|
||||
|
||||
python3 -m py_compile src/services/awooop_truth_chain_service.py src/api/v1/platform/truth_chain.py tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py
|
||||
OK
|
||||
```
|
||||
|
||||
**目前整體進度**:約 73%。
|
||||
|
||||
Reference in New Issue
Block a user