feat(awooop): surface source evidence review work items
This commit is contained in:
@@ -172,6 +172,7 @@ class ChannelEventRecurrenceSummary(BaseModel):
|
||||
manual_gate_group_total: int = 0
|
||||
automation_gap_group_total: int = 0
|
||||
failed_repair_group_total: int = 0
|
||||
source_correlation_review_group_total: int = 0
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
@@ -183,6 +184,7 @@ class ChannelEventRecurrenceItem(BaseModel):
|
||||
namespace: str | None
|
||||
target_resource: str | None
|
||||
fingerprint: str | None
|
||||
latest_stage: str | None = None
|
||||
latest_event_id: UUID | None
|
||||
latest_provider_event_id: str | None
|
||||
latest_content_preview: str | None
|
||||
@@ -201,6 +203,7 @@ class ChannelEventRecurrenceItem(BaseModel):
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
stage_counts: dict[str, int] = Field(default_factory=dict)
|
||||
run_state_counts: dict[str, int]
|
||||
first_received_at: datetime | None
|
||||
latest_received_at: datetime | None
|
||||
|
||||
@@ -23,6 +23,9 @@ _MAX_DOSSIER_EVENTS = 50
|
||||
_MAX_COVERAGE_EVENTS = 200
|
||||
_MAX_RECURRENCE_EVENTS = 300
|
||||
_MAX_REPAIR_INCIDENTS = 200
|
||||
_SOURCE_CORRELATION_REVIEW_PROVIDERS = {"sentry", "signoz"}
|
||||
_SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES = {"heartbeat"}
|
||||
_SOURCE_CORRELATION_WORK_ITEM_ID_MAX = 180
|
||||
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
|
||||
RecurrenceWorkItemMode = Literal["auto", "ticket", "reverify", "approval_review", "observe"]
|
||||
RecurrenceWorkItemHandoffKind = Literal["ticket_proposal", "manual_review"]
|
||||
@@ -53,6 +56,34 @@ def _ref_count(source_refs: dict[str, Any], key: str) -> int:
|
||||
return 1 if value else 0
|
||||
|
||||
|
||||
def _source_correlation_ref_total(group: dict[str, Any]) -> int:
|
||||
return int(group.get("sentry_ref_total") or 0) + int(
|
||||
group.get("signoz_ref_total") or 0
|
||||
)
|
||||
|
||||
|
||||
def _needs_source_correlation_review(
|
||||
group: dict[str, Any],
|
||||
latest_incident_id: str | None,
|
||||
) -> bool:
|
||||
if latest_incident_id:
|
||||
return False
|
||||
provider = str(group.get("provider") or "").lower()
|
||||
stage = str(group.get("latest_stage") or "").lower()
|
||||
if provider not in _SOURCE_CORRELATION_REVIEW_PROVIDERS:
|
||||
return False
|
||||
if stage in _SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES:
|
||||
return False
|
||||
return _source_correlation_ref_total(group) > 0
|
||||
|
||||
|
||||
def _source_correlation_work_item_id(group: dict[str, Any]) -> str:
|
||||
source_id = str(
|
||||
group.get("latest_provider_event_id") or group.get("recurrence_key") or "unknown"
|
||||
).strip()
|
||||
return f"source-evidence:{source_id}"[:_SOURCE_CORRELATION_WORK_ITEM_ID_MAX]
|
||||
|
||||
|
||||
def _append_unique(values: list[str], candidate: Any) -> None:
|
||||
text_value = str(candidate or "").strip()
|
||||
if text_value and text_value not in values:
|
||||
@@ -132,6 +163,7 @@ def build_dossier_recurrence(
|
||||
"namespace": event.get("namespace"),
|
||||
"target_resource": event.get("target_resource"),
|
||||
"fingerprint": event.get("fingerprint"),
|
||||
"latest_stage": event.get("stage"),
|
||||
"latest_event_id": event.get("event_id"),
|
||||
"latest_provider_event_id": event.get("provider_event_id"),
|
||||
"latest_content_preview": event.get("content_preview"),
|
||||
@@ -148,6 +180,7 @@ def build_dossier_recurrence(
|
||||
"sentry_ref_total": 0,
|
||||
"signoz_ref_total": 0,
|
||||
"alert_ref_total": 0,
|
||||
"stage_counts": {},
|
||||
"run_state_counts": {},
|
||||
"first_received_at": received_at,
|
||||
"latest_received_at": received_at,
|
||||
@@ -162,6 +195,10 @@ def build_dossier_recurrence(
|
||||
if event.get("is_duplicate"):
|
||||
group["duplicate_total"] += 1
|
||||
|
||||
stage = str(event.get("stage") or "received")
|
||||
stage_counts = group["stage_counts"]
|
||||
stage_counts[stage] = int(stage_counts.get(stage, 0)) + 1
|
||||
|
||||
for incident_id in incident_ids:
|
||||
_append_unique(group["incident_ids"], incident_id)
|
||||
|
||||
@@ -189,6 +226,10 @@ def build_dossier_recurrence(
|
||||
or str(received_at) > str(group.get("latest_received_at"))
|
||||
):
|
||||
group["latest_received_at"] = received_at
|
||||
group["latest_event_id"] = event.get("event_id")
|
||||
group["latest_provider_event_id"] = event.get("provider_event_id")
|
||||
group["latest_content_preview"] = event.get("content_preview")
|
||||
group["latest_stage"] = event.get("stage")
|
||||
group["latest_incident_id"] = (
|
||||
incident_ids[0] if incident_ids else group.get("latest_incident_id")
|
||||
)
|
||||
@@ -260,6 +301,12 @@ def build_dossier_recurrence(
|
||||
if _as_dict(item.get("repair_summary")).get("status")
|
||||
== "auto_repair_failed"
|
||||
),
|
||||
"source_correlation_review_group_total": sum(
|
||||
1
|
||||
for item in items
|
||||
if _as_dict(item.get("repair_summary")).get("status")
|
||||
== "source_correlation_review"
|
||||
),
|
||||
"latest_received_at": latest_received_at,
|
||||
},
|
||||
"items": items,
|
||||
@@ -304,6 +351,8 @@ def _work_item_status(repair_status: str) -> str:
|
||||
|
||||
|
||||
def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
|
||||
if repair_status == "source_correlation_review":
|
||||
return "source_correlation_review"
|
||||
if auto_repair_id:
|
||||
return "verification"
|
||||
if repair_status == "run_completed_no_repair":
|
||||
@@ -317,6 +366,7 @@ def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
|
||||
|
||||
def _work_item_next_step(repair_status: str) -> str:
|
||||
return {
|
||||
"source_correlation_review": "review_provider_source_match",
|
||||
"auto_repair_succeeded_unverified": "run_post_verification",
|
||||
"auto_repair_failed": "triage_failed_repair",
|
||||
"auto_repair_recorded": "review_repair_record",
|
||||
@@ -329,6 +379,7 @@ def _work_item_next_step(repair_status: str) -> str:
|
||||
|
||||
def _work_item_reason(repair_status: str) -> str:
|
||||
return {
|
||||
"source_correlation_review": "provider_native_evidence_unlinked",
|
||||
"auto_repair_succeeded_unverified": "auto_repair_missing_verification",
|
||||
"auto_repair_failed": "auto_repair_failed",
|
||||
"auto_repair_recorded": "auto_repair_record_needs_review",
|
||||
@@ -359,6 +410,9 @@ def _attach_work_item_summary(
|
||||
latest_run_state=group.get("latest_run_state"),
|
||||
repair_summary=repair_summary,
|
||||
)
|
||||
if _needs_source_correlation_review(group, latest_incident_id):
|
||||
status_value = "source_correlation_review"
|
||||
|
||||
if repair_summary:
|
||||
repair_payload = dict(repair_summary)
|
||||
repair_payload["status"] = status_value
|
||||
@@ -383,6 +437,8 @@ def _attach_work_item_summary(
|
||||
if auto_repair_id
|
||||
else f"incident:{latest_incident_id}"
|
||||
)
|
||||
elif status_value == "source_correlation_review" and work_status != "none":
|
||||
work_item_id = _source_correlation_work_item_id(group)
|
||||
|
||||
group["latest_incident_id"] = latest_incident_id
|
||||
group["repair_summary"] = repair_payload
|
||||
@@ -408,6 +464,7 @@ def _recurrence_work_item_target(item: dict[str, Any]) -> dict[str, Any]:
|
||||
"namespace": item.get("namespace"),
|
||||
"target_resource": item.get("target_resource"),
|
||||
"fingerprint": item.get("fingerprint"),
|
||||
"latest_stage": item.get("latest_stage"),
|
||||
"latest_event_id": item.get("latest_event_id"),
|
||||
"latest_provider_event_id": item.get("latest_provider_event_id"),
|
||||
"latest_run_id": item.get("latest_run_id"),
|
||||
@@ -445,6 +502,11 @@ def _recurrence_work_item_checks(
|
||||
) -> list[dict[str, Any]]:
|
||||
repair_summary = _as_dict(item.get("repair_summary"))
|
||||
source_ref_total = int(item.get("source_ref_total") or 0)
|
||||
is_source_review = work_item.get("kind") == "source_correlation_review"
|
||||
evidence_linked = bool(item.get("latest_provider_event_id")) and source_ref_total > 0
|
||||
incident_or_source_linked = bool(work_item.get("incident_id")) or (
|
||||
is_source_review and evidence_linked
|
||||
)
|
||||
return [
|
||||
{
|
||||
"name": "work_item_open",
|
||||
@@ -452,9 +514,13 @@ def _recurrence_work_item_checks(
|
||||
"detail": str(work_item.get("status") or "unknown"),
|
||||
},
|
||||
{
|
||||
"name": "incident_linked",
|
||||
"passed": bool(work_item.get("incident_id")),
|
||||
"detail": str(work_item.get("incident_id") or "missing incident_id"),
|
||||
"name": "incident_or_source_evidence_linked",
|
||||
"passed": incident_or_source_linked,
|
||||
"detail": str(
|
||||
work_item.get("incident_id")
|
||||
or item.get("latest_provider_event_id")
|
||||
or "missing incident_id/source evidence"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "known_next_step",
|
||||
@@ -521,19 +587,35 @@ def _ticket_preview(item: dict[str, Any], work_item: dict[str, Any]) -> dict[str
|
||||
alertname = str(item.get("alertname") or item.get("provider") or "recurrence")
|
||||
incident_id = str(work_item.get("incident_id") or item.get("latest_incident_id") or "")
|
||||
kind = str(work_item.get("kind") or "recurrence")
|
||||
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
|
||||
labels = ["awooop", "recurrence", kind]
|
||||
body_lines = [
|
||||
f"Incident: {incident_id or '--'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
|
||||
f"Occurrences: {item.get('occurrence_total') or 0}",
|
||||
f"Duplicates: {item.get('duplicate_total') or 0}",
|
||||
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
|
||||
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
|
||||
f"Next step: {work_item.get('next_step') or '--'}",
|
||||
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
|
||||
]
|
||||
if kind == "source_correlation_review":
|
||||
title = f"[AwoooP] Source evidence review: {alertname}"
|
||||
labels = ["awooop", "source-correlation", "review", str(item.get("provider") or "source")]
|
||||
body_lines = [
|
||||
f"Provider event: {item.get('latest_provider_event_id') or '--'}",
|
||||
f"Stage: {item.get('latest_stage') or '--'}",
|
||||
f"Provider: {item.get('provider') or '--'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
|
||||
f"Source refs: {item.get('source_ref_total') or 0}",
|
||||
f"Sentry refs: {item.get('sentry_ref_total') or 0}",
|
||||
f"SignOz refs: {item.get('signoz_ref_total') or 0}",
|
||||
f"Next step: {work_item.get('next_step') or '--'}",
|
||||
"Writes: none in preview/dry-run; source matching requires a later explicit review/apply path.",
|
||||
]
|
||||
else:
|
||||
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
|
||||
labels = ["awooop", "recurrence", kind]
|
||||
body_lines = [
|
||||
f"Incident: {incident_id or '--'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
|
||||
f"Occurrences: {item.get('occurrence_total') or 0}",
|
||||
f"Duplicates: {item.get('duplicate_total') or 0}",
|
||||
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
|
||||
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
|
||||
f"Next step: {work_item.get('next_step') or '--'}",
|
||||
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
|
||||
]
|
||||
return {
|
||||
"would_create": False,
|
||||
"title": title[:180],
|
||||
@@ -556,6 +638,9 @@ def _recurrence_current_state_summary(
|
||||
"duplicate_total": int(item.get("duplicate_total") or 0),
|
||||
"linked_run_total": int(item.get("linked_run_total") or 0),
|
||||
"run_state_counts": item.get("run_state_counts") or {},
|
||||
"stage_counts": item.get("stage_counts") or {},
|
||||
"latest_stage": item.get("latest_stage"),
|
||||
"latest_provider_event_id": item.get("latest_provider_event_id"),
|
||||
"latest_run_state": item.get("latest_run_state"),
|
||||
"latest_run_id": item.get("latest_run_id"),
|
||||
"repair_status": repair_summary.get("status"),
|
||||
|
||||
Reference in New Issue
Block a user