feat(awooop): surface source evidence review work items
This commit is contained in:
@@ -172,6 +172,7 @@ class ChannelEventRecurrenceSummary(BaseModel):
|
||||
manual_gate_group_total: int = 0
|
||||
automation_gap_group_total: int = 0
|
||||
failed_repair_group_total: int = 0
|
||||
source_correlation_review_group_total: int = 0
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
@@ -183,6 +184,7 @@ class ChannelEventRecurrenceItem(BaseModel):
|
||||
namespace: str | None
|
||||
target_resource: str | None
|
||||
fingerprint: str | None
|
||||
latest_stage: str | None = None
|
||||
latest_event_id: UUID | None
|
||||
latest_provider_event_id: str | None
|
||||
latest_content_preview: str | None
|
||||
@@ -201,6 +203,7 @@ class ChannelEventRecurrenceItem(BaseModel):
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
stage_counts: dict[str, int] = Field(default_factory=dict)
|
||||
run_state_counts: dict[str, int]
|
||||
first_received_at: datetime | None
|
||||
latest_received_at: datetime | None
|
||||
|
||||
@@ -23,6 +23,9 @@ _MAX_DOSSIER_EVENTS = 50
|
||||
_MAX_COVERAGE_EVENTS = 200
|
||||
_MAX_RECURRENCE_EVENTS = 300
|
||||
_MAX_REPAIR_INCIDENTS = 200
|
||||
_SOURCE_CORRELATION_REVIEW_PROVIDERS = {"sentry", "signoz"}
|
||||
_SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES = {"heartbeat"}
|
||||
_SOURCE_CORRELATION_WORK_ITEM_ID_MAX = 180
|
||||
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
|
||||
RecurrenceWorkItemMode = Literal["auto", "ticket", "reverify", "approval_review", "observe"]
|
||||
RecurrenceWorkItemHandoffKind = Literal["ticket_proposal", "manual_review"]
|
||||
@@ -53,6 +56,34 @@ def _ref_count(source_refs: dict[str, Any], key: str) -> int:
|
||||
return 1 if value else 0
|
||||
|
||||
|
||||
def _source_correlation_ref_total(group: dict[str, Any]) -> int:
|
||||
return int(group.get("sentry_ref_total") or 0) + int(
|
||||
group.get("signoz_ref_total") or 0
|
||||
)
|
||||
|
||||
|
||||
def _needs_source_correlation_review(
|
||||
group: dict[str, Any],
|
||||
latest_incident_id: str | None,
|
||||
) -> bool:
|
||||
if latest_incident_id:
|
||||
return False
|
||||
provider = str(group.get("provider") or "").lower()
|
||||
stage = str(group.get("latest_stage") or "").lower()
|
||||
if provider not in _SOURCE_CORRELATION_REVIEW_PROVIDERS:
|
||||
return False
|
||||
if stage in _SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES:
|
||||
return False
|
||||
return _source_correlation_ref_total(group) > 0
|
||||
|
||||
|
||||
def _source_correlation_work_item_id(group: dict[str, Any]) -> str:
|
||||
source_id = str(
|
||||
group.get("latest_provider_event_id") or group.get("recurrence_key") or "unknown"
|
||||
).strip()
|
||||
return f"source-evidence:{source_id}"[:_SOURCE_CORRELATION_WORK_ITEM_ID_MAX]
|
||||
|
||||
|
||||
def _append_unique(values: list[str], candidate: Any) -> None:
|
||||
text_value = str(candidate or "").strip()
|
||||
if text_value and text_value not in values:
|
||||
@@ -132,6 +163,7 @@ def build_dossier_recurrence(
|
||||
"namespace": event.get("namespace"),
|
||||
"target_resource": event.get("target_resource"),
|
||||
"fingerprint": event.get("fingerprint"),
|
||||
"latest_stage": event.get("stage"),
|
||||
"latest_event_id": event.get("event_id"),
|
||||
"latest_provider_event_id": event.get("provider_event_id"),
|
||||
"latest_content_preview": event.get("content_preview"),
|
||||
@@ -148,6 +180,7 @@ def build_dossier_recurrence(
|
||||
"sentry_ref_total": 0,
|
||||
"signoz_ref_total": 0,
|
||||
"alert_ref_total": 0,
|
||||
"stage_counts": {},
|
||||
"run_state_counts": {},
|
||||
"first_received_at": received_at,
|
||||
"latest_received_at": received_at,
|
||||
@@ -162,6 +195,10 @@ def build_dossier_recurrence(
|
||||
if event.get("is_duplicate"):
|
||||
group["duplicate_total"] += 1
|
||||
|
||||
stage = str(event.get("stage") or "received")
|
||||
stage_counts = group["stage_counts"]
|
||||
stage_counts[stage] = int(stage_counts.get(stage, 0)) + 1
|
||||
|
||||
for incident_id in incident_ids:
|
||||
_append_unique(group["incident_ids"], incident_id)
|
||||
|
||||
@@ -189,6 +226,10 @@ def build_dossier_recurrence(
|
||||
or str(received_at) > str(group.get("latest_received_at"))
|
||||
):
|
||||
group["latest_received_at"] = received_at
|
||||
group["latest_event_id"] = event.get("event_id")
|
||||
group["latest_provider_event_id"] = event.get("provider_event_id")
|
||||
group["latest_content_preview"] = event.get("content_preview")
|
||||
group["latest_stage"] = event.get("stage")
|
||||
group["latest_incident_id"] = (
|
||||
incident_ids[0] if incident_ids else group.get("latest_incident_id")
|
||||
)
|
||||
@@ -260,6 +301,12 @@ def build_dossier_recurrence(
|
||||
if _as_dict(item.get("repair_summary")).get("status")
|
||||
== "auto_repair_failed"
|
||||
),
|
||||
"source_correlation_review_group_total": sum(
|
||||
1
|
||||
for item in items
|
||||
if _as_dict(item.get("repair_summary")).get("status")
|
||||
== "source_correlation_review"
|
||||
),
|
||||
"latest_received_at": latest_received_at,
|
||||
},
|
||||
"items": items,
|
||||
@@ -304,6 +351,8 @@ def _work_item_status(repair_status: str) -> str:
|
||||
|
||||
|
||||
def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
|
||||
if repair_status == "source_correlation_review":
|
||||
return "source_correlation_review"
|
||||
if auto_repair_id:
|
||||
return "verification"
|
||||
if repair_status == "run_completed_no_repair":
|
||||
@@ -317,6 +366,7 @@ def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
|
||||
|
||||
def _work_item_next_step(repair_status: str) -> str:
|
||||
return {
|
||||
"source_correlation_review": "review_provider_source_match",
|
||||
"auto_repair_succeeded_unverified": "run_post_verification",
|
||||
"auto_repair_failed": "triage_failed_repair",
|
||||
"auto_repair_recorded": "review_repair_record",
|
||||
@@ -329,6 +379,7 @@ def _work_item_next_step(repair_status: str) -> str:
|
||||
|
||||
def _work_item_reason(repair_status: str) -> str:
|
||||
return {
|
||||
"source_correlation_review": "provider_native_evidence_unlinked",
|
||||
"auto_repair_succeeded_unverified": "auto_repair_missing_verification",
|
||||
"auto_repair_failed": "auto_repair_failed",
|
||||
"auto_repair_recorded": "auto_repair_record_needs_review",
|
||||
@@ -359,6 +410,9 @@ def _attach_work_item_summary(
|
||||
latest_run_state=group.get("latest_run_state"),
|
||||
repair_summary=repair_summary,
|
||||
)
|
||||
if _needs_source_correlation_review(group, latest_incident_id):
|
||||
status_value = "source_correlation_review"
|
||||
|
||||
if repair_summary:
|
||||
repair_payload = dict(repair_summary)
|
||||
repair_payload["status"] = status_value
|
||||
@@ -383,6 +437,8 @@ def _attach_work_item_summary(
|
||||
if auto_repair_id
|
||||
else f"incident:{latest_incident_id}"
|
||||
)
|
||||
elif status_value == "source_correlation_review" and work_status != "none":
|
||||
work_item_id = _source_correlation_work_item_id(group)
|
||||
|
||||
group["latest_incident_id"] = latest_incident_id
|
||||
group["repair_summary"] = repair_payload
|
||||
@@ -408,6 +464,7 @@ def _recurrence_work_item_target(item: dict[str, Any]) -> dict[str, Any]:
|
||||
"namespace": item.get("namespace"),
|
||||
"target_resource": item.get("target_resource"),
|
||||
"fingerprint": item.get("fingerprint"),
|
||||
"latest_stage": item.get("latest_stage"),
|
||||
"latest_event_id": item.get("latest_event_id"),
|
||||
"latest_provider_event_id": item.get("latest_provider_event_id"),
|
||||
"latest_run_id": item.get("latest_run_id"),
|
||||
@@ -445,6 +502,11 @@ def _recurrence_work_item_checks(
|
||||
) -> list[dict[str, Any]]:
|
||||
repair_summary = _as_dict(item.get("repair_summary"))
|
||||
source_ref_total = int(item.get("source_ref_total") or 0)
|
||||
is_source_review = work_item.get("kind") == "source_correlation_review"
|
||||
evidence_linked = bool(item.get("latest_provider_event_id")) and source_ref_total > 0
|
||||
incident_or_source_linked = bool(work_item.get("incident_id")) or (
|
||||
is_source_review and evidence_linked
|
||||
)
|
||||
return [
|
||||
{
|
||||
"name": "work_item_open",
|
||||
@@ -452,9 +514,13 @@ def _recurrence_work_item_checks(
|
||||
"detail": str(work_item.get("status") or "unknown"),
|
||||
},
|
||||
{
|
||||
"name": "incident_linked",
|
||||
"passed": bool(work_item.get("incident_id")),
|
||||
"detail": str(work_item.get("incident_id") or "missing incident_id"),
|
||||
"name": "incident_or_source_evidence_linked",
|
||||
"passed": incident_or_source_linked,
|
||||
"detail": str(
|
||||
work_item.get("incident_id")
|
||||
or item.get("latest_provider_event_id")
|
||||
or "missing incident_id/source evidence"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "known_next_step",
|
||||
@@ -521,19 +587,35 @@ def _ticket_preview(item: dict[str, Any], work_item: dict[str, Any]) -> dict[str
|
||||
alertname = str(item.get("alertname") or item.get("provider") or "recurrence")
|
||||
incident_id = str(work_item.get("incident_id") or item.get("latest_incident_id") or "")
|
||||
kind = str(work_item.get("kind") or "recurrence")
|
||||
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
|
||||
labels = ["awooop", "recurrence", kind]
|
||||
body_lines = [
|
||||
f"Incident: {incident_id or '--'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
|
||||
f"Occurrences: {item.get('occurrence_total') or 0}",
|
||||
f"Duplicates: {item.get('duplicate_total') or 0}",
|
||||
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
|
||||
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
|
||||
f"Next step: {work_item.get('next_step') or '--'}",
|
||||
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
|
||||
]
|
||||
if kind == "source_correlation_review":
|
||||
title = f"[AwoooP] Source evidence review: {alertname}"
|
||||
labels = ["awooop", "source-correlation", "review", str(item.get("provider") or "source")]
|
||||
body_lines = [
|
||||
f"Provider event: {item.get('latest_provider_event_id') or '--'}",
|
||||
f"Stage: {item.get('latest_stage') or '--'}",
|
||||
f"Provider: {item.get('provider') or '--'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
|
||||
f"Source refs: {item.get('source_ref_total') or 0}",
|
||||
f"Sentry refs: {item.get('sentry_ref_total') or 0}",
|
||||
f"SignOz refs: {item.get('signoz_ref_total') or 0}",
|
||||
f"Next step: {work_item.get('next_step') or '--'}",
|
||||
"Writes: none in preview/dry-run; source matching requires a later explicit review/apply path.",
|
||||
]
|
||||
else:
|
||||
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
|
||||
labels = ["awooop", "recurrence", kind]
|
||||
body_lines = [
|
||||
f"Incident: {incident_id or '--'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
|
||||
f"Occurrences: {item.get('occurrence_total') or 0}",
|
||||
f"Duplicates: {item.get('duplicate_total') or 0}",
|
||||
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
|
||||
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
|
||||
f"Next step: {work_item.get('next_step') or '--'}",
|
||||
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
|
||||
]
|
||||
return {
|
||||
"would_create": False,
|
||||
"title": title[:180],
|
||||
@@ -556,6 +638,9 @@ def _recurrence_current_state_summary(
|
||||
"duplicate_total": int(item.get("duplicate_total") or 0),
|
||||
"linked_run_total": int(item.get("linked_run_total") or 0),
|
||||
"run_state_counts": item.get("run_state_counts") or {},
|
||||
"stage_counts": item.get("stage_counts") or {},
|
||||
"latest_stage": item.get("latest_stage"),
|
||||
"latest_provider_event_id": item.get("latest_provider_event_id"),
|
||||
"latest_run_state": item.get("latest_run_state"),
|
||||
"latest_run_id": item.get("latest_run_id"),
|
||||
"repair_status": repair_summary.get("status"),
|
||||
|
||||
@@ -263,10 +263,11 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
assert recurrence["summary"]["linked_run_total"] == 2
|
||||
assert recurrence["summary"]["unlinked_event_total"] == 1
|
||||
assert recurrence["summary"]["auto_repair_linked_total"] == 1
|
||||
assert recurrence["summary"]["open_work_item_group_total"] == 1
|
||||
assert recurrence["summary"]["open_work_item_group_total"] == 2
|
||||
assert recurrence["summary"]["verified_repair_group_total"] == 0
|
||||
assert recurrence["summary"]["automation_gap_group_total"] == 0
|
||||
assert recurrence["summary"]["failed_repair_group_total"] == 1
|
||||
assert recurrence["summary"]["source_correlation_review_group_total"] == 1
|
||||
|
||||
host_group = recurrence["items"][0]
|
||||
assert host_group["recurrence_key"] == "fingerprint:fp-host-disk"
|
||||
@@ -276,6 +277,8 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
assert host_group["latest_run_state"] == "waiting_approval"
|
||||
assert host_group["latest_incident_id"] == "INC-20260513-ABCD"
|
||||
assert host_group["incident_ids"] == ["INC-20260513-ABCD"]
|
||||
assert host_group["latest_stage"] == "received"
|
||||
assert host_group["stage_counts"] == {"received": 2}
|
||||
assert host_group["run_state_counts"] == {"waiting_approval": 1, "completed": 1}
|
||||
assert host_group["alert_ref_total"] == 2
|
||||
assert host_group["repair_summary"]["status"] == "auto_repair_failed"
|
||||
@@ -292,6 +295,96 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
|
||||
"needs_human": True,
|
||||
}
|
||||
|
||||
source_group = recurrence["items"][1]
|
||||
assert source_group["provider"] == "sentry"
|
||||
assert source_group["latest_stage"] == "received"
|
||||
assert source_group["stage_counts"] == {"received": 1}
|
||||
assert source_group["repair_summary"]["status"] == "source_correlation_review"
|
||||
assert source_group["work_item"] == {
|
||||
"schema_version": "awooop_recurrence_work_item_link_v1",
|
||||
"work_item_id": "source-evidence:sentry:received:issue-1",
|
||||
"incident_id": None,
|
||||
"auto_repair_id": None,
|
||||
"status": "open",
|
||||
"kind": "source_correlation_review",
|
||||
"next_step": "review_provider_source_match",
|
||||
"reason": "provider_native_evidence_unlinked",
|
||||
"needs_human": True,
|
||||
}
|
||||
|
||||
|
||||
def test_build_recurrence_work_item_preview_allows_source_correlation_review() -> None:
|
||||
recurrence = build_dossier_recurrence(
|
||||
[
|
||||
{
|
||||
"event_id": "event-1",
|
||||
"project_id": "awoooi",
|
||||
"channel_type": "internal",
|
||||
"provider_event_id": "signoz:upstream_canary:canary-1",
|
||||
"content_hash": "a" * 64,
|
||||
"content_preview": "SignOz upstream canary",
|
||||
"content_redacted": "SignOz upstream canary",
|
||||
"redaction_version": "audit_sink_v1",
|
||||
"source_envelope": {
|
||||
"provider": "signoz",
|
||||
"stage": "upstream_canary",
|
||||
"source_refs": {
|
||||
"signoz_alerts": ["alert-1"],
|
||||
"alert_ids": ["signoz:upstream_canary:canary-1"],
|
||||
},
|
||||
"log_correlation": {
|
||||
"alertname": "Source Provider Upstream Canary",
|
||||
"severity": "info",
|
||||
"namespace": "observability",
|
||||
"target_resource": "signoz",
|
||||
"fingerprint": "fp-signoz-canary",
|
||||
},
|
||||
},
|
||||
"is_duplicate": False,
|
||||
"provider_ts": None,
|
||||
"received_at": "2026-05-20T13:01:00",
|
||||
"run_id": None,
|
||||
"run_state": None,
|
||||
"run_agent_id": None,
|
||||
}
|
||||
],
|
||||
project_id="awoooi",
|
||||
limit=20,
|
||||
)
|
||||
|
||||
item = recurrence["items"][0]
|
||||
work_item_id = "source-evidence:signoz:upstream_canary:canary-1"
|
||||
assert recurrence["summary"]["source_correlation_review_group_total"] == 1
|
||||
assert recurrence["summary"]["open_work_item_group_total"] == 1
|
||||
assert item["latest_stage"] == "upstream_canary"
|
||||
assert item["repair_summary"]["status"] == "source_correlation_review"
|
||||
assert item["work_item"]["work_item_id"] == work_item_id
|
||||
assert item["work_item"]["kind"] == "source_correlation_review"
|
||||
assert item["work_item"]["next_step"] == "review_provider_source_match"
|
||||
|
||||
preview = build_recurrence_work_item_preview(
|
||||
recurrence,
|
||||
work_item_id=work_item_id,
|
||||
)
|
||||
|
||||
assert preview["mode"] == "observe"
|
||||
assert preview["allowed"] is True
|
||||
assert preview["plan"]["target_action"] == "review_provider_source_match"
|
||||
assert preview["plan"]["target"]["latest_stage"] == "upstream_canary"
|
||||
|
||||
dry_run = build_recurrence_work_item_dry_run(
|
||||
recurrence,
|
||||
work_item_id=work_item_id,
|
||||
)
|
||||
|
||||
assert dry_run["verification_result_preview"] == "observe_only"
|
||||
assert dry_run["ticket_preview"]["would_create"] is False
|
||||
assert "Source evidence review" in dry_run["ticket_preview"]["title"]
|
||||
assert dry_run["current_state_summary"]["latest_stage"] == "upstream_canary"
|
||||
assert dry_run["current_state_summary"]["latest_provider_event_id"] == (
|
||||
"signoz:upstream_canary:canary-1"
|
||||
)
|
||||
|
||||
|
||||
def test_build_dossier_recurrence_opens_work_item_for_completed_run_without_repair() -> None:
|
||||
recurrence = build_dossier_recurrence(
|
||||
@@ -556,6 +649,7 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
|
||||
"manual_gate_group_total": 0,
|
||||
"automation_gap_group_total": 0,
|
||||
"failed_repair_group_total": 0,
|
||||
"source_correlation_review_group_total": 0,
|
||||
"latest_received_at": "2026-05-13T13:47:00",
|
||||
},
|
||||
"items": [
|
||||
@@ -567,6 +661,7 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
|
||||
"namespace": "node",
|
||||
"target_resource": "host-110",
|
||||
"fingerprint": "fp-host-disk",
|
||||
"latest_stage": "incident_linked",
|
||||
"latest_event_id": "11111111-1111-4111-8111-111111111111",
|
||||
"latest_provider_event_id": "alertmanager:received:1",
|
||||
"latest_content_preview": "Host disk pressure",
|
||||
@@ -591,6 +686,7 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
|
||||
"sentry_ref_total": 0,
|
||||
"signoz_ref_total": 0,
|
||||
"alert_ref_total": 1,
|
||||
"stage_counts": {"incident_linked": 1},
|
||||
"run_state_counts": {"completed": 1},
|
||||
"first_received_at": "2026-05-13T13:47:00",
|
||||
"latest_received_at": "2026-05-13T13:47:00",
|
||||
@@ -601,7 +697,10 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
|
||||
|
||||
payload = response.model_dump()
|
||||
assert payload["summary"]["auto_repair_linked_total"] == 1
|
||||
assert payload["summary"]["source_correlation_review_group_total"] == 0
|
||||
assert payload["items"][0]["latest_incident_id"] == "INC-20260513-ABCD"
|
||||
assert payload["items"][0]["latest_stage"] == "incident_linked"
|
||||
assert payload["items"][0]["stage_counts"] == {"incident_linked": 1}
|
||||
assert payload["items"][0]["repair_summary"]["status"] == "auto_repair_verified"
|
||||
assert payload["items"][0]["work_item"]["status"] == "closed"
|
||||
|
||||
|
||||
@@ -1909,7 +1909,7 @@
|
||||
"evidence": {
|
||||
"channelEvents": "Recent Alertmanager channel events: {count}",
|
||||
"autoRepair": "Verified auto-repairs: {verified}/{evaluated}",
|
||||
"recurrenceWorkItems": "Recurring alert work: {open}; no repair: {gap}; failed repair: {failed}; manual gates: {manual}",
|
||||
"recurrenceWorkItems": "Recurring alert work: {open}; no repair: {gap}; failed repair: {failed}; manual gates: {manual}; source review: {source}",
|
||||
"recurrenceLatest": "Latest: {alert} / {incident}",
|
||||
"recurrenceReason": "Reason: {reason}",
|
||||
"recurrenceEmpty": "No open recurring-alert work item in the recent window",
|
||||
@@ -2155,10 +2155,14 @@
|
||||
"open": "Open {count}",
|
||||
"automationGap": "No repair {count}",
|
||||
"failed": "Failed {count}",
|
||||
"sourceReview": "Source review {count}",
|
||||
"unavailable": "The recurrence API has not responded, so work item state cannot be claimed.",
|
||||
"empty": "No open recurring-alert work items in the recent window.",
|
||||
"occurrences": "{count}x",
|
||||
"incident": "Incident: {incident}",
|
||||
"stage": "Stage: {stage}",
|
||||
"sourceEvent": "Source event: {event}",
|
||||
"sourceRefs": "Source refs: {refs} (Sentry {sentry} / SignOz {signoz})",
|
||||
"workItem": "Work item: {id}",
|
||||
"repair": "Repair status: {status}",
|
||||
"reason": "Reason: {reason}",
|
||||
@@ -2219,6 +2223,7 @@
|
||||
"manual_gate": "Manual gate needed",
|
||||
"investigating": "Investigating",
|
||||
"run_completed_no_repair": "Run completed without repair",
|
||||
"source_correlation_review": "Source evidence needs matching",
|
||||
"no_repair_record": "No repair record",
|
||||
"unknown": "Unknown"
|
||||
},
|
||||
@@ -2229,6 +2234,7 @@
|
||||
"approval_required": "Approval required",
|
||||
"run_still_investigating": "Run is still investigating",
|
||||
"completed_run_without_auto_repair": "Run completed without an auto-repair record",
|
||||
"provider_native_evidence_unlinked": "Provider-native source evidence is stored but not matched to an Incident",
|
||||
"incident_without_repair_record": "Incident has no repair record",
|
||||
"none": "None",
|
||||
"unknown": "Unknown"
|
||||
@@ -2240,6 +2246,7 @@
|
||||
"review_approval": "Review approval",
|
||||
"wait_for_run_completion": "Wait for Run completion",
|
||||
"create_repair_ticket": "Create repair ticket",
|
||||
"review_provider_source_match": "Review source-to-Incident match",
|
||||
"triage_missing_repair_record": "Fill missing repair record",
|
||||
"none": "None"
|
||||
}
|
||||
@@ -2340,6 +2347,7 @@
|
||||
"duplicates": "Duplicate events",
|
||||
"linkedRuns": "Linked Runs",
|
||||
"autoRepair": "Auto repair",
|
||||
"sourceReview": "Source review",
|
||||
"openWorkItems": "Open work items"
|
||||
},
|
||||
"details": {
|
||||
@@ -2348,6 +2356,7 @@
|
||||
"unlinked": "{count} items not linked to a Run",
|
||||
"limit": "Latest {count} item window",
|
||||
"verifiedRepair": "{count} verified repair groups",
|
||||
"sourceReview": "{count} Sentry / SignOz source groups need matching review",
|
||||
"manualGates": "{count} manual gates"
|
||||
},
|
||||
"states": {
|
||||
@@ -2370,6 +2379,7 @@
|
||||
"manual_gate": "Manual gate needed",
|
||||
"investigating": "Investigating",
|
||||
"run_completed_no_repair": "Run completed without repair",
|
||||
"source_correlation_review": "Source evidence needs matching",
|
||||
"no_repair_record": "No repair record"
|
||||
},
|
||||
"item": {
|
||||
@@ -2377,6 +2387,7 @@
|
||||
"duplicates": "Duplicates {count}",
|
||||
"refs": "Refs {count}",
|
||||
"linkedRuns": "Runs {count}",
|
||||
"stage": "Stage {stage}",
|
||||
"incident": "Incident {incidentId}",
|
||||
"repair": "Repair {status}",
|
||||
"openRun": "Open Run",
|
||||
|
||||
@@ -1910,7 +1910,7 @@
|
||||
"evidence": {
|
||||
"channelEvents": "最近 Alertmanager channel events:{count}",
|
||||
"autoRepair": "已驗證自動修復:{verified}/{evaluated}",
|
||||
"recurrenceWorkItems": "重複告警待處理:{open};無修復:{gap};修復失敗:{failed};人工閘門:{manual}",
|
||||
"recurrenceWorkItems": "重複告警待處理:{open};無修復:{gap};修復失敗:{failed};人工閘門:{manual};來源待審:{source}",
|
||||
"recurrenceLatest": "最新:{alert} / {incident}",
|
||||
"recurrenceReason": "原因:{reason}",
|
||||
"recurrenceEmpty": "近期重複告警尚無待處理工作項",
|
||||
@@ -2156,10 +2156,14 @@
|
||||
"open": "待處理 {count}",
|
||||
"automationGap": "無修復 {count}",
|
||||
"failed": "修復失敗 {count}",
|
||||
"sourceReview": "來源待審 {count}",
|
||||
"unavailable": "recurrence API 尚未回應,不能判定工作項狀態。",
|
||||
"empty": "近期重複告警沒有待處理工作項。",
|
||||
"occurrences": "{count} 次",
|
||||
"incident": "Incident:{incident}",
|
||||
"stage": "階段:{stage}",
|
||||
"sourceEvent": "來源事件:{event}",
|
||||
"sourceRefs": "來源 refs:{refs}(Sentry {sentry} / SignOz {signoz})",
|
||||
"workItem": "Work item:{id}",
|
||||
"repair": "修復狀態:{status}",
|
||||
"reason": "原因:{reason}",
|
||||
@@ -2220,6 +2224,7 @@
|
||||
"manual_gate": "需人工閘門",
|
||||
"investigating": "調查中",
|
||||
"run_completed_no_repair": "Run 完成無修復",
|
||||
"source_correlation_review": "來源證據待配對",
|
||||
"no_repair_record": "無修復記錄",
|
||||
"unknown": "未知"
|
||||
},
|
||||
@@ -2230,6 +2235,7 @@
|
||||
"approval_required": "需要審批",
|
||||
"run_still_investigating": "Run 尚在調查",
|
||||
"completed_run_without_auto_repair": "Run 已完成但沒有自動修復紀錄",
|
||||
"provider_native_evidence_unlinked": "Provider 原生來源已入庫,尚未配對 Incident",
|
||||
"incident_without_repair_record": "Incident 沒有修復紀錄",
|
||||
"none": "無",
|
||||
"unknown": "未知"
|
||||
@@ -2241,6 +2247,7 @@
|
||||
"review_approval": "處理審批",
|
||||
"wait_for_run_completion": "等待 Run 完成",
|
||||
"create_repair_ticket": "建立修復 Ticket",
|
||||
"review_provider_source_match": "審核來源與 Incident 配對",
|
||||
"triage_missing_repair_record": "補齊修復紀錄",
|
||||
"none": "無"
|
||||
}
|
||||
@@ -2341,6 +2348,7 @@
|
||||
"duplicates": "重複事件",
|
||||
"linkedRuns": "已連 Run",
|
||||
"autoRepair": "自動修復",
|
||||
"sourceReview": "來源待審",
|
||||
"openWorkItems": "待處理項"
|
||||
},
|
||||
"details": {
|
||||
@@ -2349,6 +2357,7 @@
|
||||
"unlinked": "{count} 筆尚未連 Run",
|
||||
"limit": "最近 {count} 筆視窗",
|
||||
"verifiedRepair": "{count} 組已驗證修復",
|
||||
"sourceReview": "{count} 組 Sentry / SignOz 來源需人工配對",
|
||||
"manualGates": "{count} 組人工閘門"
|
||||
},
|
||||
"states": {
|
||||
@@ -2371,6 +2380,7 @@
|
||||
"manual_gate": "需人工閘門",
|
||||
"investigating": "調查中",
|
||||
"run_completed_no_repair": "Run 完成無修復",
|
||||
"source_correlation_review": "來源證據待配對",
|
||||
"no_repair_record": "無修復記錄"
|
||||
},
|
||||
"item": {
|
||||
@@ -2378,6 +2388,7 @@
|
||||
"duplicates": "重複 {count}",
|
||||
"refs": "Refs {count}",
|
||||
"linkedRuns": "Run {count}",
|
||||
"stage": "階段 {stage}",
|
||||
"incident": "Incident {incidentId}",
|
||||
"repair": "修復 {status}",
|
||||
"openRun": "開啟 Run",
|
||||
|
||||
@@ -68,6 +68,7 @@ type RecurrenceRepairStatus =
|
||||
| "manual_gate"
|
||||
| "investigating"
|
||||
| "run_completed_no_repair"
|
||||
| "source_correlation_review"
|
||||
| "no_repair_record";
|
||||
|
||||
interface RemediationSummary {
|
||||
@@ -203,6 +204,7 @@ interface EventRecurrenceSummary {
|
||||
manual_gate_group_total?: number;
|
||||
automation_gap_group_total?: number;
|
||||
failed_repair_group_total?: number;
|
||||
source_correlation_review_group_total?: number;
|
||||
latest_received_at?: string | null;
|
||||
}
|
||||
|
||||
@@ -246,6 +248,7 @@ interface EventRecurrenceItem {
|
||||
namespace?: string | null;
|
||||
target_resource?: string | null;
|
||||
fingerprint?: string | null;
|
||||
latest_stage?: string | null;
|
||||
latest_event_id?: string | null;
|
||||
latest_provider_event_id?: string | null;
|
||||
latest_content_preview?: string | null;
|
||||
@@ -264,6 +267,7 @@ interface EventRecurrenceItem {
|
||||
sentry_ref_total: number;
|
||||
signoz_ref_total: number;
|
||||
alert_ref_total: number;
|
||||
stage_counts: Record<string, number>;
|
||||
run_state_counts: Record<string, number>;
|
||||
first_received_at?: string | null;
|
||||
latest_received_at?: string | null;
|
||||
@@ -1047,6 +1051,7 @@ function recurrenceRepairStatusLabelKey(status?: string | null) {
|
||||
status === "manual_gate" ||
|
||||
status === "investigating" ||
|
||||
status === "run_completed_no_repair" ||
|
||||
status === "source_correlation_review" ||
|
||||
status === "no_repair_record"
|
||||
) {
|
||||
return `repairStatuses.${status}`;
|
||||
@@ -1103,6 +1108,16 @@ function EventRecurrencePanel({
|
||||
detail: t("details.verifiedRepair", { count: summary?.verified_repair_group_total ?? 0 }),
|
||||
className: "border-[#b9a6d9] bg-[#f5f0ff] text-[#51358f]",
|
||||
},
|
||||
{
|
||||
label: t("metrics.sourceReview"),
|
||||
value: summary?.source_correlation_review_group_total ?? 0,
|
||||
detail: t("details.sourceReview", {
|
||||
count: summary?.source_correlation_review_group_total ?? 0,
|
||||
}),
|
||||
className: (summary?.source_correlation_review_group_total ?? 0) > 0
|
||||
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
|
||||
: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
|
||||
},
|
||||
{
|
||||
label: t("metrics.openWorkItems"),
|
||||
value: summary?.open_work_item_group_total ?? 0,
|
||||
@@ -1134,7 +1149,7 @@ function EventRecurrencePanel({
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-2 xl:grid-cols-6">
|
||||
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-2 xl:grid-cols-7">
|
||||
{metrics.map((item) => (
|
||||
<div key={item.label} className="bg-white px-4 py-3">
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
@@ -1204,6 +1219,7 @@ function EventRecurrencePanel({
|
||||
<p>{t("item.duplicates", { count: item.duplicate_total })}</p>
|
||||
<p>{t("item.refs", { count: item.source_ref_total })}</p>
|
||||
<p>{t("item.linkedRuns", { count: item.linked_run_total })}</p>
|
||||
<p>{t("item.stage", { stage: item.latest_stage || "--" })}</p>
|
||||
<p>{t("item.incident", { incidentId: item.latest_incident_id || "--" })}</p>
|
||||
<p>{t("item.repair", { status: repairLabel })}</p>
|
||||
</div>
|
||||
|
||||
@@ -114,11 +114,16 @@ type RecurrenceItem = {
|
||||
severity?: string | null;
|
||||
namespace?: string | null;
|
||||
target_resource?: string | null;
|
||||
latest_stage?: string | null;
|
||||
latest_provider_event_id?: string | null;
|
||||
latest_run_id?: string | null;
|
||||
latest_run_state?: string | null;
|
||||
latest_incident_id?: string | null;
|
||||
occurrence_total: number;
|
||||
duplicate_total: number;
|
||||
source_ref_total?: number;
|
||||
sentry_ref_total?: number;
|
||||
signoz_ref_total?: number;
|
||||
repair_summary?: {
|
||||
status?: string | null;
|
||||
latest_auto_repair_id?: string | null;
|
||||
@@ -142,6 +147,7 @@ type RecurrenceResponse = {
|
||||
manual_gate_group_total?: number;
|
||||
automation_gap_group_total?: number;
|
||||
failed_repair_group_total?: number;
|
||||
source_correlation_review_group_total?: number;
|
||||
};
|
||||
items: RecurrenceItem[];
|
||||
};
|
||||
@@ -170,6 +176,8 @@ type RecurrenceWorkItemActionResult = {
|
||||
occurrence_total?: number | null;
|
||||
duplicate_total?: number | null;
|
||||
linked_run_total?: number | null;
|
||||
latest_stage?: string | null;
|
||||
latest_provider_event_id?: string | null;
|
||||
} | null;
|
||||
ticket_preview?: {
|
||||
would_create?: boolean | null;
|
||||
@@ -543,6 +551,7 @@ function recurrenceRepairStatusKey(status?: string | null) {
|
||||
status === "manual_gate" ||
|
||||
status === "investigating" ||
|
||||
status === "run_completed_no_repair" ||
|
||||
status === "source_correlation_review" ||
|
||||
status === "no_repair_record"
|
||||
) {
|
||||
return status;
|
||||
@@ -892,6 +901,8 @@ function buildWorkItems(
|
||||
const recurrenceAutomationGap = recurrenceSummary?.automation_gap_group_total ?? 0;
|
||||
const recurrenceFailedRepair = recurrenceSummary?.failed_repair_group_total ?? 0;
|
||||
const recurrenceManualGate = recurrenceSummary?.manual_gate_group_total ?? 0;
|
||||
const recurrenceSourceReview =
|
||||
recurrenceSummary?.source_correlation_review_group_total ?? 0;
|
||||
const latestRecurrenceOpenItem = recurrenceOpenItems(telemetry.eventRecurrence)[0] ?? null;
|
||||
const driftState = telemetry.driftFingerprintState;
|
||||
const driftFsmKey = driftFsmStateKey(driftState?.fsm_state);
|
||||
@@ -951,6 +962,7 @@ function buildWorkItems(
|
||||
gap: recurrenceAutomationGap,
|
||||
failed: recurrenceFailedRepair,
|
||||
manual: recurrenceManualGate,
|
||||
source: recurrenceSourceReview,
|
||||
}),
|
||||
evidenceDetails: latestRecurrenceOpenItem
|
||||
? [
|
||||
@@ -1342,6 +1354,11 @@ function RecurrenceWorkQueuePanel({
|
||||
<span className="border border-[#d8d3c7] bg-white px-2 py-0.5">
|
||||
{t("failed", { count: summary?.failed_repair_group_total ?? 0 })}
|
||||
</span>
|
||||
<span className="border border-[#d9b36f] bg-[#fff7e8] px-2 py-0.5">
|
||||
{t("sourceReview", {
|
||||
count: summary?.source_correlation_review_group_total ?? 0,
|
||||
})}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1396,6 +1413,19 @@ function RecurrenceWorkQueuePanel({
|
||||
</div>
|
||||
<div className="mt-3 grid gap-1 text-xs leading-5 text-[#5f5b52]">
|
||||
<p>{t("incident", { incident: item.latest_incident_id ?? "--" })}</p>
|
||||
<p>{t("stage", { stage: item.latest_stage ?? "--" })}</p>
|
||||
<p className="truncate">
|
||||
{t("sourceEvent", {
|
||||
event: item.latest_provider_event_id ?? "--",
|
||||
})}
|
||||
</p>
|
||||
<p>
|
||||
{t("sourceRefs", {
|
||||
refs: item.source_ref_total ?? 0,
|
||||
sentry: item.sentry_ref_total ?? 0,
|
||||
signoz: item.signoz_ref_total ?? 0,
|
||||
})}
|
||||
</p>
|
||||
<p>{t("workItem", { id: workItem?.work_item_id ?? "--" })}</p>
|
||||
<p>
|
||||
{t("repair", {
|
||||
|
||||
@@ -1,3 +1,52 @@
|
||||
## 2026-05-21|T116 Provider source evidence review work items
|
||||
|
||||
**觸發**:
|
||||
|
||||
- T115 已證明 Sentry / SigNoz provider-native upstream canary 會寫入 AwoooP source dossier。
|
||||
- 但未連到 Incident 的 provider 原生事件仍只停在 source evidence,Operator 在前端看不到「已進來源鏈路,但需要審核是否配對到 Incident」。
|
||||
|
||||
**修正**:
|
||||
|
||||
- `channel_event_dossier_service.build_dossier_recurrence()` 新增 `latest_stage` / `stage_counts`,讓 recurrence group 顯示事件跑到 heartbeat、upstream_canary、received 或 incident_linked 哪個階段。
|
||||
- Sentry / SigNoz 事件若有 provider refs、不是 heartbeat、且尚未連 Incident,會形成 read-only `source_correlation_review` work item:
|
||||
- `kind=source_correlation_review`
|
||||
- `next_step=review_provider_source_match`
|
||||
- `reason=provider_native_evidence_unlinked`
|
||||
- 不寫入 Incident / AutoRepair / Ticket,只提供 preview / dry-run / handoff read model。
|
||||
- `/api/v1/platform/events/dossier/recurrence` summary 新增 `source_correlation_review_group_total`。
|
||||
- AwoooP Runs 前端「重複告警關聯」新增「來源待審」指標,卡片顯示事件 stage,讓 operator 可看見 provider-native evidence 已進 AwoooP 但仍需配對審核。
|
||||
- AwoooP Work Items 同步顯示 source review count、stage、provider event id、Sentry / SignOz refs,避免從 Runs 點進工作項後掉成 unknown。
|
||||
|
||||
**Verification**:
|
||||
|
||||
```text
|
||||
python -m py_compile apps/api/src/services/channel_event_dossier_service.py apps/api/src/api/v1/platform/events.py
|
||||
-> pass
|
||||
DATABASE_URL=postgresql+asyncpg://test:test@localhost/test pytest -q tests/test_channel_event_dossier_service.py
|
||||
-> 14 passed
|
||||
pnpm --dir apps/web exec tsc --noEmit
|
||||
-> pass
|
||||
NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build
|
||||
-> compiled successfully, 90/90 static pages
|
||||
python -m json.tool apps/web/messages/zh-TW.json
|
||||
python -m json.tool apps/web/messages/en.json
|
||||
-> pass
|
||||
git diff --check
|
||||
-> pass
|
||||
python -m ruff check src/services/channel_event_dossier_service.py src/api/v1/platform/events.py tests/test_channel_event_dossier_service.py
|
||||
-> pre-existing FastAPI Query B008 in events.py; no new logic failures observed
|
||||
```
|
||||
|
||||
**目前整體進度**:
|
||||
|
||||
- Provider-native upstream ingestion 可驗證性:99.5% → 99.6%。
|
||||
- Source refs / Sentry / SigNoz 可見性:99.9% → 99.93%。
|
||||
- Incident-level source correlation 可見性:86% → 88%。
|
||||
- AwoooP 告警可觀測鏈:99.985% → 99.988%。
|
||||
- 前端 AI 自動化管理介面同步:99.99%(Runs / Work Items recurrence panel 已同步來源待審)。
|
||||
- 完整 AI 自動化管理產品化:99.65% → 99.68%。
|
||||
- 剩餘:推 Gitea main、等待 CI/CD、production API / frontend 驗證。
|
||||
|
||||
## 2026-05-20|T115 Provider-native upstream canary 接入
|
||||
|
||||
**觸發**:
|
||||
|
||||
Reference in New Issue
Block a user