feat(awooop): link recurring alerts to repair work
This commit is contained in:
@@ -7,6 +7,7 @@ automation state.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
@@ -18,6 +19,8 @@ from src.db.base import get_db_context
|
||||
_MAX_DOSSIER_EVENTS = 50
|
||||
_MAX_COVERAGE_EVENTS = 200
|
||||
_MAX_RECURRENCE_EVENTS = 300
|
||||
_MAX_REPAIR_INCIDENTS = 200
|
||||
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
|
||||
|
||||
|
||||
def _as_dict(value: Any) -> dict[str, Any]:
|
||||
@@ -41,6 +44,39 @@ def _ref_count(source_refs: dict[str, Any], key: str) -> int:
|
||||
return 1 if value else 0
|
||||
|
||||
|
||||
def _append_unique(values: list[str], candidate: Any) -> None:
|
||||
text_value = str(candidate or "").strip()
|
||||
if text_value and text_value not in values:
|
||||
values.append(text_value)
|
||||
|
||||
|
||||
def _append_incident_ids_from_text(values: list[str], text_value: Any) -> None:
|
||||
if not text_value:
|
||||
return
|
||||
for incident_id in _INCIDENT_ID_RE.findall(str(text_value)):
|
||||
_append_unique(values, incident_id)
|
||||
|
||||
|
||||
def _append_incident_ids_from_refs(
|
||||
values: list[str], source_refs: dict[str, Any]
|
||||
) -> None:
|
||||
incident_ids = source_refs.get("incident_ids")
|
||||
if isinstance(incident_ids, list):
|
||||
for incident_id in incident_ids:
|
||||
_append_unique(values, incident_id)
|
||||
else:
|
||||
_append_unique(values, incident_ids)
|
||||
|
||||
|
||||
def _event_incident_ids(event: dict[str, Any]) -> list[str]:
|
||||
incident_ids: list[str] = []
|
||||
_append_incident_ids_from_refs(incident_ids, _as_dict(event.get("source_refs")))
|
||||
_append_incident_ids_from_text(incident_ids, event.get("content_preview"))
|
||||
_append_incident_ids_from_text(incident_ids, event.get("content_redacted"))
|
||||
_append_incident_ids_from_text(incident_ids, event.get("provider_event_id"))
|
||||
return incident_ids
|
||||
|
||||
|
||||
def _recurrence_key(event: dict[str, Any]) -> str:
|
||||
fingerprint = str(event.get("fingerprint") or "").strip()
|
||||
if fingerprint:
|
||||
@@ -61,15 +97,18 @@ def build_dossier_recurrence(
|
||||
*,
|
||||
project_id: str,
|
||||
limit: int,
|
||||
repair_summaries_by_incident: dict[str, dict[str, Any]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Group recent source events into recurrence buckets with linked run state."""
|
||||
groups: dict[str, dict[str, Any]] = {}
|
||||
repair_summaries = repair_summaries_by_incident or {}
|
||||
|
||||
for row in rows:
|
||||
event = build_dossier_event(row)
|
||||
key = _recurrence_key(event)
|
||||
source_ref_count = int(event.get("source_ref_count") or 0)
|
||||
source_refs = _as_dict(event.get("source_refs"))
|
||||
incident_ids = _event_incident_ids(event)
|
||||
run_id = row.get("run_id")
|
||||
run_state = row.get("run_state")
|
||||
received_at = event.get("received_at")
|
||||
@@ -90,6 +129,8 @@ def build_dossier_recurrence(
|
||||
"latest_run_id": run_id,
|
||||
"latest_run_state": run_state,
|
||||
"latest_agent_id": row.get("run_agent_id"),
|
||||
"latest_incident_id": incident_ids[0] if incident_ids else None,
|
||||
"incident_ids": [],
|
||||
"occurrence_total": 0,
|
||||
"duplicate_total": 0,
|
||||
"linked_run_total": 0,
|
||||
@@ -112,6 +153,9 @@ def build_dossier_recurrence(
|
||||
if event.get("is_duplicate"):
|
||||
group["duplicate_total"] += 1
|
||||
|
||||
for incident_id in incident_ids:
|
||||
_append_unique(group["incident_ids"], incident_id)
|
||||
|
||||
group["sentry_ref_total"] += _ref_count(source_refs, "sentry_issue_ids")
|
||||
group["signoz_ref_total"] += _ref_count(source_refs, "signoz_alerts")
|
||||
group["alert_ref_total"] += _ref_count(source_refs, "alert_ids")
|
||||
@@ -136,19 +180,27 @@ def build_dossier_recurrence(
|
||||
or str(received_at) > str(group.get("latest_received_at"))
|
||||
):
|
||||
group["latest_received_at"] = received_at
|
||||
group["latest_incident_id"] = (
|
||||
incident_ids[0] if incident_ids else group.get("latest_incident_id")
|
||||
)
|
||||
|
||||
items = []
|
||||
linked_run_total = 0
|
||||
for group in groups.values():
|
||||
run_ids = group.pop("_run_ids")
|
||||
group["linked_run_total"] = len(run_ids)
|
||||
_attach_work_item_summary(group, repair_summaries)
|
||||
linked_run_total += len(run_ids)
|
||||
items.append(group)
|
||||
|
||||
items.sort(key=lambda item: str(item.get("latest_received_at") or ""), reverse=True)
|
||||
items.sort(key=lambda item: int(item.get("occurrence_total") or 0), reverse=True)
|
||||
latest_received_at = max(
|
||||
(item.get("latest_received_at") for item in items if item.get("latest_received_at")),
|
||||
(
|
||||
item.get("latest_received_at")
|
||||
for item in items
|
||||
if item.get("latest_received_at")
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
|
||||
@@ -161,15 +213,133 @@ def build_dossier_recurrence(
|
||||
"recurrent_group_total": sum(
|
||||
1 for item in items if int(item.get("occurrence_total") or 0) > 1
|
||||
),
|
||||
"duplicate_event_total": sum(int(item.get("duplicate_total") or 0) for item in items),
|
||||
"duplicate_event_total": sum(
|
||||
int(item.get("duplicate_total") or 0) for item in items
|
||||
),
|
||||
"linked_run_total": linked_run_total,
|
||||
"unlinked_event_total": sum(1 for row in rows if not row.get("run_id")),
|
||||
"auto_repair_linked_total": sum(
|
||||
1
|
||||
for item in items
|
||||
if _as_dict(item.get("repair_summary")).get("latest_auto_repair_id")
|
||||
),
|
||||
"verified_repair_group_total": sum(
|
||||
1
|
||||
for item in items
|
||||
if _as_dict(item.get("repair_summary")).get("status")
|
||||
== "auto_repair_verified"
|
||||
),
|
||||
"open_work_item_group_total": sum(
|
||||
1
|
||||
for item in items
|
||||
if _as_dict(item.get("work_item")).get("status") == "open"
|
||||
),
|
||||
"manual_gate_group_total": sum(
|
||||
1
|
||||
for item in items
|
||||
if _as_dict(item.get("repair_summary")).get("status") == "manual_gate"
|
||||
),
|
||||
"latest_received_at": latest_received_at,
|
||||
},
|
||||
"items": items,
|
||||
}
|
||||
|
||||
|
||||
def _repair_status(
|
||||
*,
|
||||
incident_id: str | None,
|
||||
latest_run_state: str | None,
|
||||
repair_summary: dict[str, Any] | None,
|
||||
) -> str:
|
||||
if not incident_id:
|
||||
return "no_incident_link"
|
||||
if repair_summary:
|
||||
latest_success = repair_summary.get("latest_success")
|
||||
verification = str(
|
||||
repair_summary.get("latest_verification_result") or ""
|
||||
).lower()
|
||||
if latest_success is True and verification == "success":
|
||||
return "auto_repair_verified"
|
||||
if latest_success is True:
|
||||
return "auto_repair_succeeded_unverified"
|
||||
if latest_success is False:
|
||||
return "auto_repair_failed"
|
||||
return "auto_repair_recorded"
|
||||
if latest_run_state == "waiting_approval":
|
||||
return "manual_gate"
|
||||
if latest_run_state in {"pending", "running", "waiting_tool"}:
|
||||
return "investigating"
|
||||
if latest_run_state == "completed":
|
||||
return "run_completed_no_repair"
|
||||
return "no_repair_record"
|
||||
|
||||
|
||||
def _work_item_status(repair_status: str) -> str:
|
||||
if repair_status in {"no_incident_link", "run_completed_no_repair"}:
|
||||
return "none"
|
||||
if repair_status == "auto_repair_verified":
|
||||
return "closed"
|
||||
return "open"
|
||||
|
||||
|
||||
def _attach_work_item_summary(
|
||||
group: dict[str, Any],
|
||||
repair_summaries_by_incident: dict[str, dict[str, Any]],
|
||||
) -> None:
|
||||
incident_ids = [
|
||||
str(incident_id) for incident_id in group.get("incident_ids", []) if incident_id
|
||||
]
|
||||
latest_incident_id = str(group.get("latest_incident_id") or "") or (
|
||||
incident_ids[0] if incident_ids else None
|
||||
)
|
||||
repair_summary = (
|
||||
repair_summaries_by_incident.get(latest_incident_id)
|
||||
if latest_incident_id
|
||||
else None
|
||||
)
|
||||
status_value = _repair_status(
|
||||
incident_id=latest_incident_id,
|
||||
latest_run_state=group.get("latest_run_state"),
|
||||
repair_summary=repair_summary,
|
||||
)
|
||||
if repair_summary:
|
||||
repair_payload = dict(repair_summary)
|
||||
repair_payload["status"] = status_value
|
||||
else:
|
||||
repair_payload = {
|
||||
"schema_version": "awooop_recurrence_repair_summary_v1",
|
||||
"status": status_value,
|
||||
"incident_id": latest_incident_id,
|
||||
"latest_auto_repair_id": None,
|
||||
"latest_verification_result": None,
|
||||
"auto_repair_total": 0,
|
||||
"success_total": 0,
|
||||
"failed_total": 0,
|
||||
}
|
||||
|
||||
work_status = _work_item_status(status_value)
|
||||
auto_repair_id = repair_payload.get("latest_auto_repair_id")
|
||||
work_item_id = None
|
||||
if latest_incident_id and work_status != "none":
|
||||
work_item_id = (
|
||||
f"verification:{latest_incident_id}:{auto_repair_id}"
|
||||
if auto_repair_id
|
||||
else f"incident:{latest_incident_id}"
|
||||
)
|
||||
|
||||
group["latest_incident_id"] = latest_incident_id
|
||||
group["repair_summary"] = repair_payload
|
||||
group["work_item"] = {
|
||||
"schema_version": "awooop_recurrence_work_item_link_v1",
|
||||
"work_item_id": work_item_id,
|
||||
"incident_id": latest_incident_id,
|
||||
"auto_repair_id": auto_repair_id,
|
||||
"status": work_status,
|
||||
"kind": "verification" if auto_repair_id else "incident_followup",
|
||||
"needs_human": work_status == "open",
|
||||
}
|
||||
|
||||
|
||||
def build_dossier_coverage(
|
||||
rows: list[dict[str, Any]],
|
||||
*,
|
||||
@@ -229,9 +399,9 @@ def build_dossier_coverage(
|
||||
provider_item["sentry_ref_total"] += event_sentry_refs
|
||||
provider_item["signoz_ref_total"] += event_signoz_refs
|
||||
provider_item["alert_ref_total"] += event_alert_refs
|
||||
provider_item["latest_received_at"] = (
|
||||
provider_item["latest_received_at"] or event.get("received_at")
|
||||
)
|
||||
provider_item["latest_received_at"] = provider_item[
|
||||
"latest_received_at"
|
||||
] or event.get("received_at")
|
||||
|
||||
duplicate_total = sum(1 for event in events if event.get("is_duplicate"))
|
||||
redacted_total = sum(1 for event in events if event.get("has_redacted_content"))
|
||||
@@ -258,7 +428,10 @@ def build_dossier_coverage(
|
||||
},
|
||||
"providers": sorted(
|
||||
provider_map.values(),
|
||||
key=lambda item: (-int(item.get("total") or 0), str(item.get("provider") or "")),
|
||||
key=lambda item: (
|
||||
-int(item.get("total") or 0),
|
||||
str(item.get("provider") or ""),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -299,6 +472,104 @@ def build_dossier_event(row: dict[str, Any]) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _collect_incident_ids_from_rows(rows: list[dict[str, Any]]) -> list[str]:
|
||||
incident_ids: list[str] = []
|
||||
for row in rows:
|
||||
event = build_dossier_event(row)
|
||||
for incident_id in _event_incident_ids(event):
|
||||
_append_unique(incident_ids, incident_id)
|
||||
return incident_ids
|
||||
|
||||
|
||||
async def _fetch_auto_repair_summaries_by_incident(
|
||||
db: Any,
|
||||
incident_ids: list[str],
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""Fetch latest auto-repair and verifier evidence for recurrence groups."""
|
||||
visible_incident_ids = incident_ids[:_MAX_REPAIR_INCIDENTS]
|
||||
if not visible_incident_ids:
|
||||
return {}
|
||||
|
||||
placeholders: list[str] = []
|
||||
params: dict[str, Any] = {}
|
||||
for index, incident_id in enumerate(visible_incident_ids):
|
||||
key = f"incident_id_{index}"
|
||||
placeholders.append(f":{key}")
|
||||
params[key] = incident_id
|
||||
|
||||
result = await db.execute(
|
||||
text(
|
||||
f"""
|
||||
WITH ranked AS (
|
||||
SELECT
|
||||
are.id AS latest_auto_repair_id,
|
||||
are.incident_id,
|
||||
are.playbook_id AS latest_playbook_id,
|
||||
are.playbook_name AS latest_playbook_name,
|
||||
are.success AS latest_success,
|
||||
left(coalesce(are.error_message, ''), 240) AS latest_error_message_preview,
|
||||
are.triggered_by AS latest_triggered_by,
|
||||
are.risk_level AS latest_risk_level,
|
||||
are.execution_time_ms AS latest_execution_time_ms,
|
||||
are.created_at AS latest_auto_repair_at,
|
||||
latest_evidence.verification_result AS latest_verification_result,
|
||||
latest_evidence.collected_at AS latest_verification_at,
|
||||
COUNT(*) OVER (PARTITION BY are.incident_id) AS auto_repair_total,
|
||||
COUNT(*) FILTER (WHERE are.success IS TRUE)
|
||||
OVER (PARTITION BY are.incident_id) AS success_total,
|
||||
COUNT(*) FILTER (WHERE are.success IS FALSE)
|
||||
OVER (PARTITION BY are.incident_id) AS failed_total,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY are.incident_id
|
||||
ORDER BY are.created_at DESC
|
||||
) AS rn
|
||||
FROM auto_repair_executions are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
ev.verification_result,
|
||||
ev.collected_at
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest_evidence ON TRUE
|
||||
WHERE are.incident_id IN ({", ".join(placeholders)})
|
||||
)
|
||||
SELECT
|
||||
latest_auto_repair_id,
|
||||
incident_id,
|
||||
latest_playbook_id,
|
||||
latest_playbook_name,
|
||||
latest_success,
|
||||
latest_error_message_preview,
|
||||
latest_triggered_by,
|
||||
latest_risk_level,
|
||||
latest_execution_time_ms,
|
||||
latest_auto_repair_at,
|
||||
latest_verification_result,
|
||||
latest_verification_at,
|
||||
auto_repair_total,
|
||||
success_total,
|
||||
failed_total
|
||||
FROM ranked
|
||||
WHERE rn = 1
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
summaries: dict[str, dict[str, Any]] = {}
|
||||
for row in result.mappings().all():
|
||||
item = dict(row)
|
||||
incident_id = str(item.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
continue
|
||||
item["schema_version"] = "awooop_recurrence_repair_summary_v1"
|
||||
summaries[incident_id] = item
|
||||
return summaries
|
||||
|
||||
|
||||
async def fetch_channel_event_dossier(
|
||||
*,
|
||||
project_id: str | None,
|
||||
@@ -329,7 +600,8 @@ async def fetch_channel_event_dossier(
|
||||
|
||||
async with get_db_context(effective_project_id) as db:
|
||||
result = await db.execute(
|
||||
text(f"""
|
||||
text(
|
||||
f"""
|
||||
SELECT
|
||||
event_id,
|
||||
project_id,
|
||||
@@ -347,7 +619,8 @@ async def fetch_channel_event_dossier(
|
||||
WHERE {" AND ".join(where_clauses)}
|
||||
ORDER BY received_at ASC
|
||||
LIMIT :limit
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
rows = [dict(row) for row in result.mappings().all()]
|
||||
@@ -364,7 +637,9 @@ async def fetch_channel_event_dossier(
|
||||
"source_count": len(events),
|
||||
"duplicate_total": duplicate_total,
|
||||
"redacted_total": redacted_total,
|
||||
"source_ref_total": sum(int(event.get("source_ref_count") or 0) for event in events),
|
||||
"source_ref_total": sum(
|
||||
int(event.get("source_ref_count") or 0) for event in events
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -392,7 +667,8 @@ async def fetch_channel_event_dossier_coverage(
|
||||
|
||||
async with get_db_context(effective_project_id) as db:
|
||||
result = await db.execute(
|
||||
text(f"""
|
||||
text(
|
||||
f"""
|
||||
SELECT
|
||||
event_id,
|
||||
project_id,
|
||||
@@ -410,7 +686,8 @@ async def fetch_channel_event_dossier_coverage(
|
||||
WHERE {" AND ".join(where_clauses)}
|
||||
ORDER BY received_at DESC
|
||||
LIMIT :limit
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
rows = [dict(row) for row in result.mappings().all()]
|
||||
@@ -445,7 +722,8 @@ async def fetch_channel_event_dossier_recurrence(
|
||||
|
||||
async with get_db_context(effective_project_id) as db:
|
||||
result = await db.execute(
|
||||
text(f"""
|
||||
text(
|
||||
f"""
|
||||
SELECT
|
||||
e.event_id,
|
||||
e.project_id,
|
||||
@@ -469,13 +747,19 @@ async def fetch_channel_event_dossier_recurrence(
|
||||
WHERE {" AND ".join(where_clauses)}
|
||||
ORDER BY e.received_at DESC
|
||||
LIMIT :limit
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
rows = [dict(row) for row in result.mappings().all()]
|
||||
repair_summaries = await _fetch_auto_repair_summaries_by_incident(
|
||||
db,
|
||||
_collect_incident_ids_from_rows(rows),
|
||||
)
|
||||
|
||||
return build_dossier_recurrence(
|
||||
rows,
|
||||
project_id=effective_project_id,
|
||||
limit=safe_limit,
|
||||
repair_summaries_by_incident=repair_summaries,
|
||||
)
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
from uuid import UUID
|
||||
|
||||
from src.services import channel_event_dossier_service
|
||||
from src.services.channel_event_dossier_service import (
|
||||
build_dossier_event,
|
||||
build_dossier_coverage,
|
||||
build_dossier_event,
|
||||
build_dossier_recurrence,
|
||||
fetch_channel_event_dossier,
|
||||
fetch_channel_event_dossier_coverage,
|
||||
@@ -16,38 +17,40 @@ from src.services.channel_event_dossier_service import (
|
||||
|
||||
|
||||
def test_build_dossier_event_summarizes_source_envelope() -> None:
|
||||
event = build_dossier_event({
|
||||
"event_id": "event-1",
|
||||
"project_id": "awoooi",
|
||||
"channel_type": "internal",
|
||||
"provider_event_id": "sentry:received:issue-1",
|
||||
"content_hash": "h" * 64,
|
||||
"content_preview": "Sentry issue",
|
||||
"content_redacted": "Sentry issue redacted",
|
||||
"redaction_version": "audit_sink_v1",
|
||||
"source_envelope": {
|
||||
"provider": "sentry",
|
||||
"stage": "received",
|
||||
"source_url": "https://sentry.example.invalid/issues/issue-1",
|
||||
"content_sha256": "a" * 64,
|
||||
"content_length": 42,
|
||||
"source_refs": {
|
||||
"event_ids": ["issue-1"],
|
||||
"sentry_issue_ids": ["issue-1", "sentry:received:issue-1"],
|
||||
"fingerprints": ["sentry-issue-1"],
|
||||
event = build_dossier_event(
|
||||
{
|
||||
"event_id": "event-1",
|
||||
"project_id": "awoooi",
|
||||
"channel_type": "internal",
|
||||
"provider_event_id": "sentry:received:issue-1",
|
||||
"content_hash": "h" * 64,
|
||||
"content_preview": "Sentry issue",
|
||||
"content_redacted": "Sentry issue redacted",
|
||||
"redaction_version": "audit_sink_v1",
|
||||
"source_envelope": {
|
||||
"provider": "sentry",
|
||||
"stage": "received",
|
||||
"source_url": "https://sentry.example.invalid/issues/issue-1",
|
||||
"content_sha256": "a" * 64,
|
||||
"content_length": 42,
|
||||
"source_refs": {
|
||||
"event_ids": ["issue-1"],
|
||||
"sentry_issue_ids": ["issue-1", "sentry:received:issue-1"],
|
||||
"fingerprints": ["sentry-issue-1"],
|
||||
},
|
||||
"log_correlation": {
|
||||
"alertname": "Sentry Issue",
|
||||
"severity": "error",
|
||||
"namespace": "sentry",
|
||||
"target_resource": "frontend",
|
||||
"fingerprint": "sentry-issue-1",
|
||||
},
|
||||
},
|
||||
"log_correlation": {
|
||||
"alertname": "Sentry Issue",
|
||||
"severity": "error",
|
||||
"namespace": "sentry",
|
||||
"target_resource": "frontend",
|
||||
"fingerprint": "sentry-issue-1",
|
||||
},
|
||||
},
|
||||
"is_duplicate": False,
|
||||
"provider_ts": None,
|
||||
"received_at": "2026-05-13T13:46:00",
|
||||
})
|
||||
"is_duplicate": False,
|
||||
"provider_ts": None,
|
||||
"received_at": "2026-05-13T13:46:00",
|
||||
}
|
||||
)
|
||||
|
||||
assert event["provider"] == "sentry"
|
||||
assert event["stage"] == "received"
|
||||
@@ -153,6 +156,7 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
"provider": "alertmanager",
|
||||
"source_refs": {
|
||||
"alert_ids": ["alert-2"],
|
||||
"incident_ids": ["INC-20260513-ABCD"],
|
||||
"fingerprints": ["fp-host-disk"],
|
||||
},
|
||||
"log_correlation": {
|
||||
@@ -183,6 +187,7 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
"provider": "alertmanager",
|
||||
"source_refs": {
|
||||
"alert_ids": ["alert-1"],
|
||||
"incident_ids": ["INC-20260513-ABCD"],
|
||||
"fingerprints": ["fp-host-disk"],
|
||||
},
|
||||
"log_correlation": {
|
||||
@@ -224,6 +229,26 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
],
|
||||
project_id="awoooi",
|
||||
limit=100,
|
||||
repair_summaries_by_incident={
|
||||
"INC-20260513-ABCD": {
|
||||
"schema_version": "awooop_recurrence_repair_summary_v1",
|
||||
"incident_id": "INC-20260513-ABCD",
|
||||
"latest_auto_repair_id": "repair-1",
|
||||
"latest_playbook_id": "playbook-1",
|
||||
"latest_playbook_name": "Restart workload",
|
||||
"latest_success": False,
|
||||
"latest_error_message_preview": "verifier failed",
|
||||
"latest_triggered_by": "auto_repair",
|
||||
"latest_risk_level": "low",
|
||||
"latest_execution_time_ms": 1200,
|
||||
"latest_auto_repair_at": "2026-05-13T13:48:00",
|
||||
"latest_verification_result": "failed",
|
||||
"latest_verification_at": "2026-05-13T13:49:00",
|
||||
"auto_repair_total": 1,
|
||||
"success_total": 0,
|
||||
"failed_total": 1,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert recurrence["summary"]["source_event_total"] == 3
|
||||
@@ -232,6 +257,9 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
assert recurrence["summary"]["duplicate_event_total"] == 1
|
||||
assert recurrence["summary"]["linked_run_total"] == 2
|
||||
assert recurrence["summary"]["unlinked_event_total"] == 1
|
||||
assert recurrence["summary"]["auto_repair_linked_total"] == 1
|
||||
assert recurrence["summary"]["open_work_item_group_total"] == 1
|
||||
assert recurrence["summary"]["verified_repair_group_total"] == 0
|
||||
|
||||
host_group = recurrence["items"][0]
|
||||
assert host_group["recurrence_key"] == "fingerprint:fp-host-disk"
|
||||
@@ -239,8 +267,21 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
assert host_group["duplicate_total"] == 1
|
||||
assert host_group["linked_run_total"] == 2
|
||||
assert host_group["latest_run_state"] == "waiting_approval"
|
||||
assert host_group["latest_incident_id"] == "INC-20260513-ABCD"
|
||||
assert host_group["incident_ids"] == ["INC-20260513-ABCD"]
|
||||
assert host_group["run_state_counts"] == {"waiting_approval": 1, "completed": 1}
|
||||
assert host_group["alert_ref_total"] == 2
|
||||
assert host_group["repair_summary"]["status"] == "auto_repair_failed"
|
||||
assert host_group["repair_summary"]["latest_auto_repair_id"] == "repair-1"
|
||||
assert host_group["work_item"] == {
|
||||
"schema_version": "awooop_recurrence_work_item_link_v1",
|
||||
"work_item_id": "verification:INC-20260513-ABCD:repair-1",
|
||||
"incident_id": "INC-20260513-ABCD",
|
||||
"auto_repair_id": "repair-1",
|
||||
"status": "open",
|
||||
"kind": "verification",
|
||||
"needs_human": True,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -306,7 +347,9 @@ async def test_fetch_channel_event_dossier_uses_typed_run_filter(monkeypatch) ->
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_channel_event_dossier_coverage_uses_typed_provider_filter(monkeypatch) -> None:
|
||||
async def test_fetch_channel_event_dossier_coverage_uses_typed_provider_filter(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
class FakeMappings:
|
||||
@@ -353,7 +396,9 @@ async def test_fetch_channel_event_dossier_coverage_uses_typed_provider_filter(m
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_channel_event_dossier_recurrence_uses_joined_typed_filter(monkeypatch) -> None:
|
||||
async def test_fetch_channel_event_dossier_recurrence_uses_joined_typed_filter(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
class FakeMappings:
|
||||
|
||||
@@ -1911,13 +1911,17 @@
|
||||
"groups": "Link groups",
|
||||
"recurrent": "Recurring groups",
|
||||
"duplicates": "Duplicate events",
|
||||
"linkedRuns": "Linked Runs"
|
||||
"linkedRuns": "Linked Runs",
|
||||
"autoRepair": "Auto repair",
|
||||
"openWorkItems": "Open work items"
|
||||
},
|
||||
"details": {
|
||||
"sourceEvents": "{count} source events",
|
||||
"latest": "Latest {time}",
|
||||
"unlinked": "{count} items not linked to a Run",
|
||||
"limit": "Latest {count} item window"
|
||||
"limit": "Latest {count} item window",
|
||||
"verifiedRepair": "{count} verified repair groups",
|
||||
"manualGates": "{count} manual gates"
|
||||
},
|
||||
"states": {
|
||||
"pending": "Pending",
|
||||
@@ -1930,13 +1934,28 @@
|
||||
"timeout": "Timed out",
|
||||
"unlinked": "Not linked"
|
||||
},
|
||||
"repairStatuses": {
|
||||
"no_incident_link": "No Incident link",
|
||||
"auto_repair_verified": "Verified repair",
|
||||
"auto_repair_succeeded_unverified": "Repair needs verification",
|
||||
"auto_repair_failed": "Repair failed",
|
||||
"auto_repair_recorded": "Repair recorded",
|
||||
"manual_gate": "Manual gate needed",
|
||||
"investigating": "Investigating",
|
||||
"run_completed_no_repair": "Run completed without repair",
|
||||
"no_repair_record": "No repair record"
|
||||
},
|
||||
"item": {
|
||||
"latest": "Latest {time}",
|
||||
"duplicates": "Duplicates {count}",
|
||||
"refs": "Refs {count}",
|
||||
"linkedRuns": "Runs {count}",
|
||||
"incident": "Incident {incidentId}",
|
||||
"repair": "Repair {status}",
|
||||
"openRun": "Open Run",
|
||||
"noRun": "No Run yet"
|
||||
"noRun": "No Run yet",
|
||||
"openWorkItem": "Open work item",
|
||||
"noWorkItem": "No open work item"
|
||||
}
|
||||
},
|
||||
"callbackReply": {
|
||||
|
||||
@@ -1912,13 +1912,17 @@
|
||||
"groups": "關聯群組",
|
||||
"recurrent": "重複群組",
|
||||
"duplicates": "重複事件",
|
||||
"linkedRuns": "已連 Run"
|
||||
"linkedRuns": "已連 Run",
|
||||
"autoRepair": "自動修復",
|
||||
"openWorkItems": "待處理項"
|
||||
},
|
||||
"details": {
|
||||
"sourceEvents": "{count} 筆來源事件",
|
||||
"latest": "最新 {time}",
|
||||
"unlinked": "{count} 筆尚未連 Run",
|
||||
"limit": "最近 {count} 筆視窗"
|
||||
"limit": "最近 {count} 筆視窗",
|
||||
"verifiedRepair": "{count} 組已驗證修復",
|
||||
"manualGates": "{count} 組人工閘門"
|
||||
},
|
||||
"states": {
|
||||
"pending": "待執行",
|
||||
@@ -1931,13 +1935,28 @@
|
||||
"timeout": "已超時",
|
||||
"unlinked": "尚未連 Run"
|
||||
},
|
||||
"repairStatuses": {
|
||||
"no_incident_link": "尚未連 Incident",
|
||||
"auto_repair_verified": "已驗證修復",
|
||||
"auto_repair_succeeded_unverified": "修復待驗證",
|
||||
"auto_repair_failed": "修復失敗",
|
||||
"auto_repair_recorded": "修復已記錄",
|
||||
"manual_gate": "需人工閘門",
|
||||
"investigating": "調查中",
|
||||
"run_completed_no_repair": "Run 完成無修復",
|
||||
"no_repair_record": "無修復記錄"
|
||||
},
|
||||
"item": {
|
||||
"latest": "最新 {time}",
|
||||
"duplicates": "重複 {count}",
|
||||
"refs": "Refs {count}",
|
||||
"linkedRuns": "Run {count}",
|
||||
"incident": "Incident {incidentId}",
|
||||
"repair": "修復 {status}",
|
||||
"openRun": "開啟 Run",
|
||||
"noRun": "尚無 Run"
|
||||
"noRun": "尚無 Run",
|
||||
"openWorkItem": "開啟工作項",
|
||||
"noWorkItem": "無待處理項"
|
||||
}
|
||||
},
|
||||
"callbackReply": {
|
||||
|
||||
@@ -55,6 +55,16 @@ type RemediationStatus =
|
||||
| "write_observed"
|
||||
| "blocked"
|
||||
| "observed";
|
||||
type RecurrenceRepairStatus =
|
||||
| "no_incident_link"
|
||||
| "auto_repair_verified"
|
||||
| "auto_repair_succeeded_unverified"
|
||||
| "auto_repair_failed"
|
||||
| "auto_repair_recorded"
|
||||
| "manual_gate"
|
||||
| "investigating"
|
||||
| "run_completed_no_repair"
|
||||
| "no_repair_record";
|
||||
|
||||
interface RemediationSummary {
|
||||
schema_version?: string;
|
||||
@@ -183,9 +193,43 @@ interface EventRecurrenceSummary {
|
||||
duplicate_event_total: number;
|
||||
linked_run_total: number;
|
||||
unlinked_event_total: number;
|
||||
auto_repair_linked_total?: number;
|
||||
verified_repair_group_total?: number;
|
||||
open_work_item_group_total?: number;
|
||||
manual_gate_group_total?: number;
|
||||
latest_received_at?: string | null;
|
||||
}
|
||||
|
||||
interface EventRecurrenceRepairSummary {
|
||||
schema_version?: string;
|
||||
status?: RecurrenceRepairStatus | string;
|
||||
incident_id?: string | null;
|
||||
latest_auto_repair_id?: string | null;
|
||||
latest_playbook_id?: string | null;
|
||||
latest_playbook_name?: string | null;
|
||||
latest_success?: boolean | null;
|
||||
latest_error_message_preview?: string | null;
|
||||
latest_triggered_by?: string | null;
|
||||
latest_risk_level?: string | null;
|
||||
latest_execution_time_ms?: number | null;
|
||||
latest_auto_repair_at?: string | null;
|
||||
latest_verification_result?: string | null;
|
||||
latest_verification_at?: string | null;
|
||||
auto_repair_total?: number;
|
||||
success_total?: number;
|
||||
failed_total?: number;
|
||||
}
|
||||
|
||||
interface EventRecurrenceWorkItem {
|
||||
schema_version?: string;
|
||||
work_item_id?: string | null;
|
||||
incident_id?: string | null;
|
||||
auto_repair_id?: string | null;
|
||||
status?: "open" | "closed" | "none" | string;
|
||||
kind?: string | null;
|
||||
needs_human?: boolean;
|
||||
}
|
||||
|
||||
interface EventRecurrenceItem {
|
||||
recurrence_key: string;
|
||||
provider?: string | null;
|
||||
@@ -200,6 +244,10 @@ interface EventRecurrenceItem {
|
||||
latest_run_id?: string | null;
|
||||
latest_run_state?: RunState | string | null;
|
||||
latest_agent_id?: string | null;
|
||||
latest_incident_id?: string | null;
|
||||
incident_ids?: string[];
|
||||
repair_summary?: EventRecurrenceRepairSummary | null;
|
||||
work_item?: EventRecurrenceWorkItem | null;
|
||||
occurrence_total: number;
|
||||
duplicate_total: number;
|
||||
linked_run_total: number;
|
||||
@@ -944,6 +992,23 @@ function recurrenceStateLabelKey(state?: string | null) {
|
||||
return "states.unlinked";
|
||||
}
|
||||
|
||||
function recurrenceRepairStatusLabelKey(status?: string | null) {
|
||||
if (
|
||||
status === "no_incident_link" ||
|
||||
status === "auto_repair_verified" ||
|
||||
status === "auto_repair_succeeded_unverified" ||
|
||||
status === "auto_repair_failed" ||
|
||||
status === "auto_repair_recorded" ||
|
||||
status === "manual_gate" ||
|
||||
status === "investigating" ||
|
||||
status === "run_completed_no_repair" ||
|
||||
status === "no_repair_record"
|
||||
) {
|
||||
return `repairStatuses.${status}`;
|
||||
}
|
||||
return "repairStatuses.no_repair_record";
|
||||
}
|
||||
|
||||
function EventRecurrencePanel({
|
||||
recurrence,
|
||||
error,
|
||||
@@ -987,6 +1052,20 @@ function EventRecurrencePanel({
|
||||
detail: t("details.limit", { count: recurrence?.limit ?? 0 }),
|
||||
className: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
|
||||
},
|
||||
{
|
||||
label: t("metrics.autoRepair"),
|
||||
value: summary?.auto_repair_linked_total ?? 0,
|
||||
detail: t("details.verifiedRepair", { count: summary?.verified_repair_group_total ?? 0 }),
|
||||
className: "border-[#b9a6d9] bg-[#f5f0ff] text-[#51358f]",
|
||||
},
|
||||
{
|
||||
label: t("metrics.openWorkItems"),
|
||||
value: summary?.open_work_item_group_total ?? 0,
|
||||
detail: t("details.manualGates", { count: summary?.manual_gate_group_total ?? 0 }),
|
||||
className: (summary?.open_work_item_group_total ?? 0) > 0
|
||||
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
|
||||
: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
@@ -1010,7 +1089,7 @@ function EventRecurrencePanel({
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-2 xl:grid-cols-4">
|
||||
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-2 xl:grid-cols-6">
|
||||
{metrics.map((item) => (
|
||||
<div key={item.label} className="bg-white px-4 py-3">
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
@@ -1049,9 +1128,16 @@ function EventRecurrencePanel({
|
||||
: "--";
|
||||
const stateKey = recurrenceStateLabelKey(item.latest_run_state);
|
||||
const stateLabel = t(stateKey as never);
|
||||
const repair = item.repair_summary;
|
||||
const workItem = item.work_item;
|
||||
const repairKey = recurrenceRepairStatusLabelKey(repair?.status);
|
||||
const repairLabel = t(repairKey as never);
|
||||
const runHref = item.latest_run_id
|
||||
? `/awooop/runs/${item.latest_run_id}?project_id=${encodeURIComponent(recurrence?.project_id ?? "awoooi")}`
|
||||
: null;
|
||||
const workHref = workItem?.work_item_id
|
||||
? `/awooop/work-items?project_id=${encodeURIComponent(recurrence?.project_id ?? "awoooi")}&work_item_id=${encodeURIComponent(workItem.work_item_id)}${workItem.incident_id ? `&incident_id=${encodeURIComponent(workItem.incident_id)}` : ""}`
|
||||
: null;
|
||||
|
||||
return (
|
||||
<article key={item.recurrence_key} className="bg-white px-4 py-3">
|
||||
@@ -1073,11 +1159,25 @@ function EventRecurrencePanel({
|
||||
<p>{t("item.duplicates", { count: item.duplicate_total })}</p>
|
||||
<p>{t("item.refs", { count: item.source_ref_total })}</p>
|
||||
<p>{t("item.linkedRuns", { count: item.linked_run_total })}</p>
|
||||
<p>{t("item.incident", { incidentId: item.latest_incident_id || "--" })}</p>
|
||||
<p>{t("item.repair", { status: repairLabel })}</p>
|
||||
</div>
|
||||
<div className="mt-3 flex flex-wrap items-center gap-2">
|
||||
<span className="border border-[#d8d3c7] bg-[#faf9f3] px-2 py-0.5 text-xs font-semibold text-[#5f5b52]">
|
||||
{stateLabel}
|
||||
</span>
|
||||
<span
|
||||
className={cn(
|
||||
"border px-2 py-0.5 text-xs font-semibold",
|
||||
workItem?.status === "open"
|
||||
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
|
||||
: repair?.status === "auto_repair_verified"
|
||||
? "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]"
|
||||
: "border-[#d8d3c7] bg-[#faf9f3] text-[#5f5b52]"
|
||||
)}
|
||||
>
|
||||
{repairLabel}
|
||||
</span>
|
||||
{runHref ? (
|
||||
<Link
|
||||
href={runHref as never}
|
||||
@@ -1089,6 +1189,17 @@ function EventRecurrencePanel({
|
||||
) : (
|
||||
<span className="text-xs text-[#77736a]">{t("item.noRun")}</span>
|
||||
)}
|
||||
{workHref ? (
|
||||
<Link
|
||||
href={workHref as never}
|
||||
className="inline-flex items-center gap-1.5 border border-[#d8d3c7] bg-white px-2 py-1 text-xs font-semibold text-[#2e2b26] hover:border-[#1f6feb] hover:bg-[#edf4ff] hover:text-[#0f4fa8]"
|
||||
>
|
||||
<ListChecks className="h-3.5 w-3.5" aria-hidden="true" />
|
||||
{t("item.openWorkItem")}
|
||||
</Link>
|
||||
) : (
|
||||
<span className="text-xs text-[#77736a]">{t("item.noWorkItem")}</span>
|
||||
)}
|
||||
</div>
|
||||
</article>
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user