feat(awooop): surface source evidence review work items
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 4m6s
CD Pipeline / build-and-deploy (push) Successful in 4m8s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s

This commit is contained in:
Your Name
2026-05-21 08:54:45 +08:00
parent a2cbf9e328
commit cf8bb364a3
8 changed files with 324 additions and 20 deletions

View File

@@ -172,6 +172,7 @@ class ChannelEventRecurrenceSummary(BaseModel):
manual_gate_group_total: int = 0
automation_gap_group_total: int = 0
failed_repair_group_total: int = 0
source_correlation_review_group_total: int = 0
latest_received_at: datetime | None
@@ -183,6 +184,7 @@ class ChannelEventRecurrenceItem(BaseModel):
namespace: str | None
target_resource: str | None
fingerprint: str | None
latest_stage: str | None = None
latest_event_id: UUID | None
latest_provider_event_id: str | None
latest_content_preview: str | None
@@ -201,6 +203,7 @@ class ChannelEventRecurrenceItem(BaseModel):
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
stage_counts: dict[str, int] = Field(default_factory=dict)
run_state_counts: dict[str, int]
first_received_at: datetime | None
latest_received_at: datetime | None

View File

@@ -23,6 +23,9 @@ _MAX_DOSSIER_EVENTS = 50
_MAX_COVERAGE_EVENTS = 200
_MAX_RECURRENCE_EVENTS = 300
_MAX_REPAIR_INCIDENTS = 200
_SOURCE_CORRELATION_REVIEW_PROVIDERS = {"sentry", "signoz"}
_SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES = {"heartbeat"}
_SOURCE_CORRELATION_WORK_ITEM_ID_MAX = 180
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
RecurrenceWorkItemMode = Literal["auto", "ticket", "reverify", "approval_review", "observe"]
RecurrenceWorkItemHandoffKind = Literal["ticket_proposal", "manual_review"]
@@ -53,6 +56,34 @@ def _ref_count(source_refs: dict[str, Any], key: str) -> int:
return 1 if value else 0
def _source_correlation_ref_total(group: dict[str, Any]) -> int:
return int(group.get("sentry_ref_total") or 0) + int(
group.get("signoz_ref_total") or 0
)
def _needs_source_correlation_review(
group: dict[str, Any],
latest_incident_id: str | None,
) -> bool:
if latest_incident_id:
return False
provider = str(group.get("provider") or "").lower()
stage = str(group.get("latest_stage") or "").lower()
if provider not in _SOURCE_CORRELATION_REVIEW_PROVIDERS:
return False
if stage in _SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES:
return False
return _source_correlation_ref_total(group) > 0
def _source_correlation_work_item_id(group: dict[str, Any]) -> str:
source_id = str(
group.get("latest_provider_event_id") or group.get("recurrence_key") or "unknown"
).strip()
return f"source-evidence:{source_id}"[:_SOURCE_CORRELATION_WORK_ITEM_ID_MAX]
def _append_unique(values: list[str], candidate: Any) -> None:
text_value = str(candidate or "").strip()
if text_value and text_value not in values:
@@ -132,6 +163,7 @@ def build_dossier_recurrence(
"namespace": event.get("namespace"),
"target_resource": event.get("target_resource"),
"fingerprint": event.get("fingerprint"),
"latest_stage": event.get("stage"),
"latest_event_id": event.get("event_id"),
"latest_provider_event_id": event.get("provider_event_id"),
"latest_content_preview": event.get("content_preview"),
@@ -148,6 +180,7 @@ def build_dossier_recurrence(
"sentry_ref_total": 0,
"signoz_ref_total": 0,
"alert_ref_total": 0,
"stage_counts": {},
"run_state_counts": {},
"first_received_at": received_at,
"latest_received_at": received_at,
@@ -162,6 +195,10 @@ def build_dossier_recurrence(
if event.get("is_duplicate"):
group["duplicate_total"] += 1
stage = str(event.get("stage") or "received")
stage_counts = group["stage_counts"]
stage_counts[stage] = int(stage_counts.get(stage, 0)) + 1
for incident_id in incident_ids:
_append_unique(group["incident_ids"], incident_id)
@@ -189,6 +226,10 @@ def build_dossier_recurrence(
or str(received_at) > str(group.get("latest_received_at"))
):
group["latest_received_at"] = received_at
group["latest_event_id"] = event.get("event_id")
group["latest_provider_event_id"] = event.get("provider_event_id")
group["latest_content_preview"] = event.get("content_preview")
group["latest_stage"] = event.get("stage")
group["latest_incident_id"] = (
incident_ids[0] if incident_ids else group.get("latest_incident_id")
)
@@ -260,6 +301,12 @@ def build_dossier_recurrence(
if _as_dict(item.get("repair_summary")).get("status")
== "auto_repair_failed"
),
"source_correlation_review_group_total": sum(
1
for item in items
if _as_dict(item.get("repair_summary")).get("status")
== "source_correlation_review"
),
"latest_received_at": latest_received_at,
},
"items": items,
@@ -304,6 +351,8 @@ def _work_item_status(repair_status: str) -> str:
def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
if repair_status == "source_correlation_review":
return "source_correlation_review"
if auto_repair_id:
return "verification"
if repair_status == "run_completed_no_repair":
@@ -317,6 +366,7 @@ def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
def _work_item_next_step(repair_status: str) -> str:
return {
"source_correlation_review": "review_provider_source_match",
"auto_repair_succeeded_unverified": "run_post_verification",
"auto_repair_failed": "triage_failed_repair",
"auto_repair_recorded": "review_repair_record",
@@ -329,6 +379,7 @@ def _work_item_next_step(repair_status: str) -> str:
def _work_item_reason(repair_status: str) -> str:
return {
"source_correlation_review": "provider_native_evidence_unlinked",
"auto_repair_succeeded_unverified": "auto_repair_missing_verification",
"auto_repair_failed": "auto_repair_failed",
"auto_repair_recorded": "auto_repair_record_needs_review",
@@ -359,6 +410,9 @@ def _attach_work_item_summary(
latest_run_state=group.get("latest_run_state"),
repair_summary=repair_summary,
)
if _needs_source_correlation_review(group, latest_incident_id):
status_value = "source_correlation_review"
if repair_summary:
repair_payload = dict(repair_summary)
repair_payload["status"] = status_value
@@ -383,6 +437,8 @@ def _attach_work_item_summary(
if auto_repair_id
else f"incident:{latest_incident_id}"
)
elif status_value == "source_correlation_review" and work_status != "none":
work_item_id = _source_correlation_work_item_id(group)
group["latest_incident_id"] = latest_incident_id
group["repair_summary"] = repair_payload
@@ -408,6 +464,7 @@ def _recurrence_work_item_target(item: dict[str, Any]) -> dict[str, Any]:
"namespace": item.get("namespace"),
"target_resource": item.get("target_resource"),
"fingerprint": item.get("fingerprint"),
"latest_stage": item.get("latest_stage"),
"latest_event_id": item.get("latest_event_id"),
"latest_provider_event_id": item.get("latest_provider_event_id"),
"latest_run_id": item.get("latest_run_id"),
@@ -445,6 +502,11 @@ def _recurrence_work_item_checks(
) -> list[dict[str, Any]]:
repair_summary = _as_dict(item.get("repair_summary"))
source_ref_total = int(item.get("source_ref_total") or 0)
is_source_review = work_item.get("kind") == "source_correlation_review"
evidence_linked = bool(item.get("latest_provider_event_id")) and source_ref_total > 0
incident_or_source_linked = bool(work_item.get("incident_id")) or (
is_source_review and evidence_linked
)
return [
{
"name": "work_item_open",
@@ -452,9 +514,13 @@ def _recurrence_work_item_checks(
"detail": str(work_item.get("status") or "unknown"),
},
{
"name": "incident_linked",
"passed": bool(work_item.get("incident_id")),
"detail": str(work_item.get("incident_id") or "missing incident_id"),
"name": "incident_or_source_evidence_linked",
"passed": incident_or_source_linked,
"detail": str(
work_item.get("incident_id")
or item.get("latest_provider_event_id")
or "missing incident_id/source evidence"
),
},
{
"name": "known_next_step",
@@ -521,19 +587,35 @@ def _ticket_preview(item: dict[str, Any], work_item: dict[str, Any]) -> dict[str
alertname = str(item.get("alertname") or item.get("provider") or "recurrence")
incident_id = str(work_item.get("incident_id") or item.get("latest_incident_id") or "")
kind = str(work_item.get("kind") or "recurrence")
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
labels = ["awooop", "recurrence", kind]
body_lines = [
f"Incident: {incident_id or '--'}",
f"Alert: {alertname}",
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
f"Occurrences: {item.get('occurrence_total') or 0}",
f"Duplicates: {item.get('duplicate_total') or 0}",
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
f"Next step: {work_item.get('next_step') or '--'}",
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
]
if kind == "source_correlation_review":
title = f"[AwoooP] Source evidence review: {alertname}"
labels = ["awooop", "source-correlation", "review", str(item.get("provider") or "source")]
body_lines = [
f"Provider event: {item.get('latest_provider_event_id') or '--'}",
f"Stage: {item.get('latest_stage') or '--'}",
f"Provider: {item.get('provider') or '--'}",
f"Alert: {alertname}",
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
f"Source refs: {item.get('source_ref_total') or 0}",
f"Sentry refs: {item.get('sentry_ref_total') or 0}",
f"SignOz refs: {item.get('signoz_ref_total') or 0}",
f"Next step: {work_item.get('next_step') or '--'}",
"Writes: none in preview/dry-run; source matching requires a later explicit review/apply path.",
]
else:
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
labels = ["awooop", "recurrence", kind]
body_lines = [
f"Incident: {incident_id or '--'}",
f"Alert: {alertname}",
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
f"Occurrences: {item.get('occurrence_total') or 0}",
f"Duplicates: {item.get('duplicate_total') or 0}",
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
f"Next step: {work_item.get('next_step') or '--'}",
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
]
return {
"would_create": False,
"title": title[:180],
@@ -556,6 +638,9 @@ def _recurrence_current_state_summary(
"duplicate_total": int(item.get("duplicate_total") or 0),
"linked_run_total": int(item.get("linked_run_total") or 0),
"run_state_counts": item.get("run_state_counts") or {},
"stage_counts": item.get("stage_counts") or {},
"latest_stage": item.get("latest_stage"),
"latest_provider_event_id": item.get("latest_provider_event_id"),
"latest_run_state": item.get("latest_run_state"),
"latest_run_id": item.get("latest_run_id"),
"repair_status": repair_summary.get("status"),