feat(awooop): surface source evidence review work items
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 4m6s
CD Pipeline / build-and-deploy (push) Successful in 4m8s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s

This commit is contained in:
Your Name
2026-05-21 08:54:45 +08:00
parent a2cbf9e328
commit cf8bb364a3
8 changed files with 324 additions and 20 deletions

View File

@@ -172,6 +172,7 @@ class ChannelEventRecurrenceSummary(BaseModel):
manual_gate_group_total: int = 0
automation_gap_group_total: int = 0
failed_repair_group_total: int = 0
source_correlation_review_group_total: int = 0
latest_received_at: datetime | None
@@ -183,6 +184,7 @@ class ChannelEventRecurrenceItem(BaseModel):
namespace: str | None
target_resource: str | None
fingerprint: str | None
latest_stage: str | None = None
latest_event_id: UUID | None
latest_provider_event_id: str | None
latest_content_preview: str | None
@@ -201,6 +203,7 @@ class ChannelEventRecurrenceItem(BaseModel):
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
stage_counts: dict[str, int] = Field(default_factory=dict)
run_state_counts: dict[str, int]
first_received_at: datetime | None
latest_received_at: datetime | None

View File

@@ -23,6 +23,9 @@ _MAX_DOSSIER_EVENTS = 50
_MAX_COVERAGE_EVENTS = 200
_MAX_RECURRENCE_EVENTS = 300
_MAX_REPAIR_INCIDENTS = 200
_SOURCE_CORRELATION_REVIEW_PROVIDERS = {"sentry", "signoz"}
_SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES = {"heartbeat"}
_SOURCE_CORRELATION_WORK_ITEM_ID_MAX = 180
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
RecurrenceWorkItemMode = Literal["auto", "ticket", "reverify", "approval_review", "observe"]
RecurrenceWorkItemHandoffKind = Literal["ticket_proposal", "manual_review"]
@@ -53,6 +56,34 @@ def _ref_count(source_refs: dict[str, Any], key: str) -> int:
return 1 if value else 0
def _source_correlation_ref_total(group: dict[str, Any]) -> int:
return int(group.get("sentry_ref_total") or 0) + int(
group.get("signoz_ref_total") or 0
)
def _needs_source_correlation_review(
group: dict[str, Any],
latest_incident_id: str | None,
) -> bool:
if latest_incident_id:
return False
provider = str(group.get("provider") or "").lower()
stage = str(group.get("latest_stage") or "").lower()
if provider not in _SOURCE_CORRELATION_REVIEW_PROVIDERS:
return False
if stage in _SOURCE_CORRELATION_REVIEW_EXCLUDED_STAGES:
return False
return _source_correlation_ref_total(group) > 0
def _source_correlation_work_item_id(group: dict[str, Any]) -> str:
source_id = str(
group.get("latest_provider_event_id") or group.get("recurrence_key") or "unknown"
).strip()
return f"source-evidence:{source_id}"[:_SOURCE_CORRELATION_WORK_ITEM_ID_MAX]
def _append_unique(values: list[str], candidate: Any) -> None:
text_value = str(candidate or "").strip()
if text_value and text_value not in values:
@@ -132,6 +163,7 @@ def build_dossier_recurrence(
"namespace": event.get("namespace"),
"target_resource": event.get("target_resource"),
"fingerprint": event.get("fingerprint"),
"latest_stage": event.get("stage"),
"latest_event_id": event.get("event_id"),
"latest_provider_event_id": event.get("provider_event_id"),
"latest_content_preview": event.get("content_preview"),
@@ -148,6 +180,7 @@ def build_dossier_recurrence(
"sentry_ref_total": 0,
"signoz_ref_total": 0,
"alert_ref_total": 0,
"stage_counts": {},
"run_state_counts": {},
"first_received_at": received_at,
"latest_received_at": received_at,
@@ -162,6 +195,10 @@ def build_dossier_recurrence(
if event.get("is_duplicate"):
group["duplicate_total"] += 1
stage = str(event.get("stage") or "received")
stage_counts = group["stage_counts"]
stage_counts[stage] = int(stage_counts.get(stage, 0)) + 1
for incident_id in incident_ids:
_append_unique(group["incident_ids"], incident_id)
@@ -189,6 +226,10 @@ def build_dossier_recurrence(
or str(received_at) > str(group.get("latest_received_at"))
):
group["latest_received_at"] = received_at
group["latest_event_id"] = event.get("event_id")
group["latest_provider_event_id"] = event.get("provider_event_id")
group["latest_content_preview"] = event.get("content_preview")
group["latest_stage"] = event.get("stage")
group["latest_incident_id"] = (
incident_ids[0] if incident_ids else group.get("latest_incident_id")
)
@@ -260,6 +301,12 @@ def build_dossier_recurrence(
if _as_dict(item.get("repair_summary")).get("status")
== "auto_repair_failed"
),
"source_correlation_review_group_total": sum(
1
for item in items
if _as_dict(item.get("repair_summary")).get("status")
== "source_correlation_review"
),
"latest_received_at": latest_received_at,
},
"items": items,
@@ -304,6 +351,8 @@ def _work_item_status(repair_status: str) -> str:
def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
if repair_status == "source_correlation_review":
return "source_correlation_review"
if auto_repair_id:
return "verification"
if repair_status == "run_completed_no_repair":
@@ -317,6 +366,7 @@ def _work_item_kind(repair_status: str, auto_repair_id: Any) -> str:
def _work_item_next_step(repair_status: str) -> str:
return {
"source_correlation_review": "review_provider_source_match",
"auto_repair_succeeded_unverified": "run_post_verification",
"auto_repair_failed": "triage_failed_repair",
"auto_repair_recorded": "review_repair_record",
@@ -329,6 +379,7 @@ def _work_item_next_step(repair_status: str) -> str:
def _work_item_reason(repair_status: str) -> str:
return {
"source_correlation_review": "provider_native_evidence_unlinked",
"auto_repair_succeeded_unverified": "auto_repair_missing_verification",
"auto_repair_failed": "auto_repair_failed",
"auto_repair_recorded": "auto_repair_record_needs_review",
@@ -359,6 +410,9 @@ def _attach_work_item_summary(
latest_run_state=group.get("latest_run_state"),
repair_summary=repair_summary,
)
if _needs_source_correlation_review(group, latest_incident_id):
status_value = "source_correlation_review"
if repair_summary:
repair_payload = dict(repair_summary)
repair_payload["status"] = status_value
@@ -383,6 +437,8 @@ def _attach_work_item_summary(
if auto_repair_id
else f"incident:{latest_incident_id}"
)
elif status_value == "source_correlation_review" and work_status != "none":
work_item_id = _source_correlation_work_item_id(group)
group["latest_incident_id"] = latest_incident_id
group["repair_summary"] = repair_payload
@@ -408,6 +464,7 @@ def _recurrence_work_item_target(item: dict[str, Any]) -> dict[str, Any]:
"namespace": item.get("namespace"),
"target_resource": item.get("target_resource"),
"fingerprint": item.get("fingerprint"),
"latest_stage": item.get("latest_stage"),
"latest_event_id": item.get("latest_event_id"),
"latest_provider_event_id": item.get("latest_provider_event_id"),
"latest_run_id": item.get("latest_run_id"),
@@ -445,6 +502,11 @@ def _recurrence_work_item_checks(
) -> list[dict[str, Any]]:
repair_summary = _as_dict(item.get("repair_summary"))
source_ref_total = int(item.get("source_ref_total") or 0)
is_source_review = work_item.get("kind") == "source_correlation_review"
evidence_linked = bool(item.get("latest_provider_event_id")) and source_ref_total > 0
incident_or_source_linked = bool(work_item.get("incident_id")) or (
is_source_review and evidence_linked
)
return [
{
"name": "work_item_open",
@@ -452,9 +514,13 @@ def _recurrence_work_item_checks(
"detail": str(work_item.get("status") or "unknown"),
},
{
"name": "incident_linked",
"passed": bool(work_item.get("incident_id")),
"detail": str(work_item.get("incident_id") or "missing incident_id"),
"name": "incident_or_source_evidence_linked",
"passed": incident_or_source_linked,
"detail": str(
work_item.get("incident_id")
or item.get("latest_provider_event_id")
or "missing incident_id/source evidence"
),
},
{
"name": "known_next_step",
@@ -521,19 +587,35 @@ def _ticket_preview(item: dict[str, Any], work_item: dict[str, Any]) -> dict[str
alertname = str(item.get("alertname") or item.get("provider") or "recurrence")
incident_id = str(work_item.get("incident_id") or item.get("latest_incident_id") or "")
kind = str(work_item.get("kind") or "recurrence")
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
labels = ["awooop", "recurrence", kind]
body_lines = [
f"Incident: {incident_id or '--'}",
f"Alert: {alertname}",
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
f"Occurrences: {item.get('occurrence_total') or 0}",
f"Duplicates: {item.get('duplicate_total') or 0}",
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
f"Next step: {work_item.get('next_step') or '--'}",
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
]
if kind == "source_correlation_review":
title = f"[AwoooP] Source evidence review: {alertname}"
labels = ["awooop", "source-correlation", "review", str(item.get("provider") or "source")]
body_lines = [
f"Provider event: {item.get('latest_provider_event_id') or '--'}",
f"Stage: {item.get('latest_stage') or '--'}",
f"Provider: {item.get('provider') or '--'}",
f"Alert: {alertname}",
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
f"Source refs: {item.get('source_ref_total') or 0}",
f"Sentry refs: {item.get('sentry_ref_total') or 0}",
f"SignOz refs: {item.get('signoz_ref_total') or 0}",
f"Next step: {work_item.get('next_step') or '--'}",
"Writes: none in preview/dry-run; source matching requires a later explicit review/apply path.",
]
else:
title = f"[AwoooP] {alertname} recurrence work item: {incident_id or 'unlinked'}"
labels = ["awooop", "recurrence", kind]
body_lines = [
f"Incident: {incident_id or '--'}",
f"Alert: {alertname}",
f"Namespace/Target: {item.get('namespace') or '--'} / {item.get('target_resource') or '--'}",
f"Occurrences: {item.get('occurrence_total') or 0}",
f"Duplicates: {item.get('duplicate_total') or 0}",
f"Latest run: {item.get('latest_run_id') or '--'} ({item.get('latest_run_state') or '--'})",
f"Repair status: {_as_dict(item.get('repair_summary')).get('status') or '--'}",
f"Next step: {work_item.get('next_step') or '--'}",
"Writes: none in preview/dry-run; ticket creation requires a later explicit apply path.",
]
return {
"would_create": False,
"title": title[:180],
@@ -556,6 +638,9 @@ def _recurrence_current_state_summary(
"duplicate_total": int(item.get("duplicate_total") or 0),
"linked_run_total": int(item.get("linked_run_total") or 0),
"run_state_counts": item.get("run_state_counts") or {},
"stage_counts": item.get("stage_counts") or {},
"latest_stage": item.get("latest_stage"),
"latest_provider_event_id": item.get("latest_provider_event_id"),
"latest_run_state": item.get("latest_run_state"),
"latest_run_id": item.get("latest_run_id"),
"repair_status": repair_summary.get("status"),

View File

@@ -263,10 +263,11 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
assert recurrence["summary"]["linked_run_total"] == 2
assert recurrence["summary"]["unlinked_event_total"] == 1
assert recurrence["summary"]["auto_repair_linked_total"] == 1
assert recurrence["summary"]["open_work_item_group_total"] == 1
assert recurrence["summary"]["open_work_item_group_total"] == 2
assert recurrence["summary"]["verified_repair_group_total"] == 0
assert recurrence["summary"]["automation_gap_group_total"] == 0
assert recurrence["summary"]["failed_repair_group_total"] == 1
assert recurrence["summary"]["source_correlation_review_group_total"] == 1
host_group = recurrence["items"][0]
assert host_group["recurrence_key"] == "fingerprint:fp-host-disk"
@@ -276,6 +277,8 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
assert host_group["latest_run_state"] == "waiting_approval"
assert host_group["latest_incident_id"] == "INC-20260513-ABCD"
assert host_group["incident_ids"] == ["INC-20260513-ABCD"]
assert host_group["latest_stage"] == "received"
assert host_group["stage_counts"] == {"received": 2}
assert host_group["run_state_counts"] == {"waiting_approval": 1, "completed": 1}
assert host_group["alert_ref_total"] == 2
assert host_group["repair_summary"]["status"] == "auto_repair_failed"
@@ -292,6 +295,96 @@ def test_build_dossier_recurrence_groups_events_and_run_state() -> None:
"needs_human": True,
}
source_group = recurrence["items"][1]
assert source_group["provider"] == "sentry"
assert source_group["latest_stage"] == "received"
assert source_group["stage_counts"] == {"received": 1}
assert source_group["repair_summary"]["status"] == "source_correlation_review"
assert source_group["work_item"] == {
"schema_version": "awooop_recurrence_work_item_link_v1",
"work_item_id": "source-evidence:sentry:received:issue-1",
"incident_id": None,
"auto_repair_id": None,
"status": "open",
"kind": "source_correlation_review",
"next_step": "review_provider_source_match",
"reason": "provider_native_evidence_unlinked",
"needs_human": True,
}
def test_build_recurrence_work_item_preview_allows_source_correlation_review() -> None:
recurrence = build_dossier_recurrence(
[
{
"event_id": "event-1",
"project_id": "awoooi",
"channel_type": "internal",
"provider_event_id": "signoz:upstream_canary:canary-1",
"content_hash": "a" * 64,
"content_preview": "SignOz upstream canary",
"content_redacted": "SignOz upstream canary",
"redaction_version": "audit_sink_v1",
"source_envelope": {
"provider": "signoz",
"stage": "upstream_canary",
"source_refs": {
"signoz_alerts": ["alert-1"],
"alert_ids": ["signoz:upstream_canary:canary-1"],
},
"log_correlation": {
"alertname": "Source Provider Upstream Canary",
"severity": "info",
"namespace": "observability",
"target_resource": "signoz",
"fingerprint": "fp-signoz-canary",
},
},
"is_duplicate": False,
"provider_ts": None,
"received_at": "2026-05-20T13:01:00",
"run_id": None,
"run_state": None,
"run_agent_id": None,
}
],
project_id="awoooi",
limit=20,
)
item = recurrence["items"][0]
work_item_id = "source-evidence:signoz:upstream_canary:canary-1"
assert recurrence["summary"]["source_correlation_review_group_total"] == 1
assert recurrence["summary"]["open_work_item_group_total"] == 1
assert item["latest_stage"] == "upstream_canary"
assert item["repair_summary"]["status"] == "source_correlation_review"
assert item["work_item"]["work_item_id"] == work_item_id
assert item["work_item"]["kind"] == "source_correlation_review"
assert item["work_item"]["next_step"] == "review_provider_source_match"
preview = build_recurrence_work_item_preview(
recurrence,
work_item_id=work_item_id,
)
assert preview["mode"] == "observe"
assert preview["allowed"] is True
assert preview["plan"]["target_action"] == "review_provider_source_match"
assert preview["plan"]["target"]["latest_stage"] == "upstream_canary"
dry_run = build_recurrence_work_item_dry_run(
recurrence,
work_item_id=work_item_id,
)
assert dry_run["verification_result_preview"] == "observe_only"
assert dry_run["ticket_preview"]["would_create"] is False
assert "Source evidence review" in dry_run["ticket_preview"]["title"]
assert dry_run["current_state_summary"]["latest_stage"] == "upstream_canary"
assert dry_run["current_state_summary"]["latest_provider_event_id"] == (
"signoz:upstream_canary:canary-1"
)
def test_build_dossier_recurrence_opens_work_item_for_completed_run_without_repair() -> None:
recurrence = build_dossier_recurrence(
@@ -556,6 +649,7 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
"manual_gate_group_total": 0,
"automation_gap_group_total": 0,
"failed_repair_group_total": 0,
"source_correlation_review_group_total": 0,
"latest_received_at": "2026-05-13T13:47:00",
},
"items": [
@@ -567,6 +661,7 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
"namespace": "node",
"target_resource": "host-110",
"fingerprint": "fp-host-disk",
"latest_stage": "incident_linked",
"latest_event_id": "11111111-1111-4111-8111-111111111111",
"latest_provider_event_id": "alertmanager:received:1",
"latest_content_preview": "Host disk pressure",
@@ -591,6 +686,7 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
"sentry_ref_total": 0,
"signoz_ref_total": 0,
"alert_ref_total": 1,
"stage_counts": {"incident_linked": 1},
"run_state_counts": {"completed": 1},
"first_received_at": "2026-05-13T13:47:00",
"latest_received_at": "2026-05-13T13:47:00",
@@ -601,7 +697,10 @@ def test_recurrence_response_model_preserves_repair_work_item_fields() -> None:
payload = response.model_dump()
assert payload["summary"]["auto_repair_linked_total"] == 1
assert payload["summary"]["source_correlation_review_group_total"] == 0
assert payload["items"][0]["latest_incident_id"] == "INC-20260513-ABCD"
assert payload["items"][0]["latest_stage"] == "incident_linked"
assert payload["items"][0]["stage_counts"] == {"incident_linked": 1}
assert payload["items"][0]["repair_summary"]["status"] == "auto_repair_verified"
assert payload["items"][0]["work_item"]["status"] == "closed"

View File

@@ -1909,7 +1909,7 @@
"evidence": {
"channelEvents": "Recent Alertmanager channel events: {count}",
"autoRepair": "Verified auto-repairs: {verified}/{evaluated}",
"recurrenceWorkItems": "Recurring alert work: {open}; no repair: {gap}; failed repair: {failed}; manual gates: {manual}",
"recurrenceWorkItems": "Recurring alert work: {open}; no repair: {gap}; failed repair: {failed}; manual gates: {manual}; source review: {source}",
"recurrenceLatest": "Latest: {alert} / {incident}",
"recurrenceReason": "Reason: {reason}",
"recurrenceEmpty": "No open recurring-alert work item in the recent window",
@@ -2155,10 +2155,14 @@
"open": "Open {count}",
"automationGap": "No repair {count}",
"failed": "Failed {count}",
"sourceReview": "Source review {count}",
"unavailable": "The recurrence API has not responded, so work item state cannot be claimed.",
"empty": "No open recurring-alert work items in the recent window.",
"occurrences": "{count}x",
"incident": "Incident: {incident}",
"stage": "Stage: {stage}",
"sourceEvent": "Source event: {event}",
"sourceRefs": "Source refs: {refs} (Sentry {sentry} / SignOz {signoz})",
"workItem": "Work item: {id}",
"repair": "Repair status: {status}",
"reason": "Reason: {reason}",
@@ -2219,6 +2223,7 @@
"manual_gate": "Manual gate needed",
"investigating": "Investigating",
"run_completed_no_repair": "Run completed without repair",
"source_correlation_review": "Source evidence needs matching",
"no_repair_record": "No repair record",
"unknown": "Unknown"
},
@@ -2229,6 +2234,7 @@
"approval_required": "Approval required",
"run_still_investigating": "Run is still investigating",
"completed_run_without_auto_repair": "Run completed without an auto-repair record",
"provider_native_evidence_unlinked": "Provider-native source evidence is stored but not matched to an Incident",
"incident_without_repair_record": "Incident has no repair record",
"none": "None",
"unknown": "Unknown"
@@ -2240,6 +2246,7 @@
"review_approval": "Review approval",
"wait_for_run_completion": "Wait for Run completion",
"create_repair_ticket": "Create repair ticket",
"review_provider_source_match": "Review source-to-Incident match",
"triage_missing_repair_record": "Fill missing repair record",
"none": "None"
}
@@ -2340,6 +2347,7 @@
"duplicates": "Duplicate events",
"linkedRuns": "Linked Runs",
"autoRepair": "Auto repair",
"sourceReview": "Source review",
"openWorkItems": "Open work items"
},
"details": {
@@ -2348,6 +2356,7 @@
"unlinked": "{count} items not linked to a Run",
"limit": "Latest {count} item window",
"verifiedRepair": "{count} verified repair groups",
"sourceReview": "{count} Sentry / SignOz source groups need matching review",
"manualGates": "{count} manual gates"
},
"states": {
@@ -2370,6 +2379,7 @@
"manual_gate": "Manual gate needed",
"investigating": "Investigating",
"run_completed_no_repair": "Run completed without repair",
"source_correlation_review": "Source evidence needs matching",
"no_repair_record": "No repair record"
},
"item": {
@@ -2377,6 +2387,7 @@
"duplicates": "Duplicates {count}",
"refs": "Refs {count}",
"linkedRuns": "Runs {count}",
"stage": "Stage {stage}",
"incident": "Incident {incidentId}",
"repair": "Repair {status}",
"openRun": "Open Run",

View File

@@ -1910,7 +1910,7 @@
"evidence": {
"channelEvents": "最近 Alertmanager channel events{count}",
"autoRepair": "已驗證自動修復:{verified}/{evaluated}",
"recurrenceWorkItems": "重複告警待處理:{open};無修復:{gap};修復失敗:{failed};人工閘門:{manual}",
"recurrenceWorkItems": "重複告警待處理:{open};無修復:{gap};修復失敗:{failed};人工閘門:{manual};來源待審:{source}",
"recurrenceLatest": "最新:{alert} / {incident}",
"recurrenceReason": "原因:{reason}",
"recurrenceEmpty": "近期重複告警尚無待處理工作項",
@@ -2156,10 +2156,14 @@
"open": "待處理 {count}",
"automationGap": "無修復 {count}",
"failed": "修復失敗 {count}",
"sourceReview": "來源待審 {count}",
"unavailable": "recurrence API 尚未回應,不能判定工作項狀態。",
"empty": "近期重複告警沒有待處理工作項。",
"occurrences": "{count} 次",
"incident": "Incident{incident}",
"stage": "階段:{stage}",
"sourceEvent": "來源事件:{event}",
"sourceRefs": "來源 refs{refs}Sentry {sentry} / SignOz {signoz}",
"workItem": "Work item{id}",
"repair": "修復狀態:{status}",
"reason": "原因:{reason}",
@@ -2220,6 +2224,7 @@
"manual_gate": "需人工閘門",
"investigating": "調查中",
"run_completed_no_repair": "Run 完成無修復",
"source_correlation_review": "來源證據待配對",
"no_repair_record": "無修復記錄",
"unknown": "未知"
},
@@ -2230,6 +2235,7 @@
"approval_required": "需要審批",
"run_still_investigating": "Run 尚在調查",
"completed_run_without_auto_repair": "Run 已完成但沒有自動修復紀錄",
"provider_native_evidence_unlinked": "Provider 原生來源已入庫,尚未配對 Incident",
"incident_without_repair_record": "Incident 沒有修復紀錄",
"none": "無",
"unknown": "未知"
@@ -2241,6 +2247,7 @@
"review_approval": "處理審批",
"wait_for_run_completion": "等待 Run 完成",
"create_repair_ticket": "建立修復 Ticket",
"review_provider_source_match": "審核來源與 Incident 配對",
"triage_missing_repair_record": "補齊修復紀錄",
"none": "無"
}
@@ -2341,6 +2348,7 @@
"duplicates": "重複事件",
"linkedRuns": "已連 Run",
"autoRepair": "自動修復",
"sourceReview": "來源待審",
"openWorkItems": "待處理項"
},
"details": {
@@ -2349,6 +2357,7 @@
"unlinked": "{count} 筆尚未連 Run",
"limit": "最近 {count} 筆視窗",
"verifiedRepair": "{count} 組已驗證修復",
"sourceReview": "{count} 組 Sentry / SignOz 來源需人工配對",
"manualGates": "{count} 組人工閘門"
},
"states": {
@@ -2371,6 +2380,7 @@
"manual_gate": "需人工閘門",
"investigating": "調查中",
"run_completed_no_repair": "Run 完成無修復",
"source_correlation_review": "來源證據待配對",
"no_repair_record": "無修復記錄"
},
"item": {
@@ -2378,6 +2388,7 @@
"duplicates": "重複 {count}",
"refs": "Refs {count}",
"linkedRuns": "Run {count}",
"stage": "階段 {stage}",
"incident": "Incident {incidentId}",
"repair": "修復 {status}",
"openRun": "開啟 Run",

View File

@@ -68,6 +68,7 @@ type RecurrenceRepairStatus =
| "manual_gate"
| "investigating"
| "run_completed_no_repair"
| "source_correlation_review"
| "no_repair_record";
interface RemediationSummary {
@@ -203,6 +204,7 @@ interface EventRecurrenceSummary {
manual_gate_group_total?: number;
automation_gap_group_total?: number;
failed_repair_group_total?: number;
source_correlation_review_group_total?: number;
latest_received_at?: string | null;
}
@@ -246,6 +248,7 @@ interface EventRecurrenceItem {
namespace?: string | null;
target_resource?: string | null;
fingerprint?: string | null;
latest_stage?: string | null;
latest_event_id?: string | null;
latest_provider_event_id?: string | null;
latest_content_preview?: string | null;
@@ -264,6 +267,7 @@ interface EventRecurrenceItem {
sentry_ref_total: number;
signoz_ref_total: number;
alert_ref_total: number;
stage_counts: Record<string, number>;
run_state_counts: Record<string, number>;
first_received_at?: string | null;
latest_received_at?: string | null;
@@ -1047,6 +1051,7 @@ function recurrenceRepairStatusLabelKey(status?: string | null) {
status === "manual_gate" ||
status === "investigating" ||
status === "run_completed_no_repair" ||
status === "source_correlation_review" ||
status === "no_repair_record"
) {
return `repairStatuses.${status}`;
@@ -1103,6 +1108,16 @@ function EventRecurrencePanel({
detail: t("details.verifiedRepair", { count: summary?.verified_repair_group_total ?? 0 }),
className: "border-[#b9a6d9] bg-[#f5f0ff] text-[#51358f]",
},
{
label: t("metrics.sourceReview"),
value: summary?.source_correlation_review_group_total ?? 0,
detail: t("details.sourceReview", {
count: summary?.source_correlation_review_group_total ?? 0,
}),
className: (summary?.source_correlation_review_group_total ?? 0) > 0
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
},
{
label: t("metrics.openWorkItems"),
value: summary?.open_work_item_group_total ?? 0,
@@ -1134,7 +1149,7 @@ function EventRecurrencePanel({
</div>
) : (
<>
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-2 xl:grid-cols-6">
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-2 xl:grid-cols-7">
{metrics.map((item) => (
<div key={item.label} className="bg-white px-4 py-3">
<div className="flex items-start justify-between gap-3">
@@ -1204,6 +1219,7 @@ function EventRecurrencePanel({
<p>{t("item.duplicates", { count: item.duplicate_total })}</p>
<p>{t("item.refs", { count: item.source_ref_total })}</p>
<p>{t("item.linkedRuns", { count: item.linked_run_total })}</p>
<p>{t("item.stage", { stage: item.latest_stage || "--" })}</p>
<p>{t("item.incident", { incidentId: item.latest_incident_id || "--" })}</p>
<p>{t("item.repair", { status: repairLabel })}</p>
</div>

View File

@@ -114,11 +114,16 @@ type RecurrenceItem = {
severity?: string | null;
namespace?: string | null;
target_resource?: string | null;
latest_stage?: string | null;
latest_provider_event_id?: string | null;
latest_run_id?: string | null;
latest_run_state?: string | null;
latest_incident_id?: string | null;
occurrence_total: number;
duplicate_total: number;
source_ref_total?: number;
sentry_ref_total?: number;
signoz_ref_total?: number;
repair_summary?: {
status?: string | null;
latest_auto_repair_id?: string | null;
@@ -142,6 +147,7 @@ type RecurrenceResponse = {
manual_gate_group_total?: number;
automation_gap_group_total?: number;
failed_repair_group_total?: number;
source_correlation_review_group_total?: number;
};
items: RecurrenceItem[];
};
@@ -170,6 +176,8 @@ type RecurrenceWorkItemActionResult = {
occurrence_total?: number | null;
duplicate_total?: number | null;
linked_run_total?: number | null;
latest_stage?: string | null;
latest_provider_event_id?: string | null;
} | null;
ticket_preview?: {
would_create?: boolean | null;
@@ -543,6 +551,7 @@ function recurrenceRepairStatusKey(status?: string | null) {
status === "manual_gate" ||
status === "investigating" ||
status === "run_completed_no_repair" ||
status === "source_correlation_review" ||
status === "no_repair_record"
) {
return status;
@@ -892,6 +901,8 @@ function buildWorkItems(
const recurrenceAutomationGap = recurrenceSummary?.automation_gap_group_total ?? 0;
const recurrenceFailedRepair = recurrenceSummary?.failed_repair_group_total ?? 0;
const recurrenceManualGate = recurrenceSummary?.manual_gate_group_total ?? 0;
const recurrenceSourceReview =
recurrenceSummary?.source_correlation_review_group_total ?? 0;
const latestRecurrenceOpenItem = recurrenceOpenItems(telemetry.eventRecurrence)[0] ?? null;
const driftState = telemetry.driftFingerprintState;
const driftFsmKey = driftFsmStateKey(driftState?.fsm_state);
@@ -951,6 +962,7 @@ function buildWorkItems(
gap: recurrenceAutomationGap,
failed: recurrenceFailedRepair,
manual: recurrenceManualGate,
source: recurrenceSourceReview,
}),
evidenceDetails: latestRecurrenceOpenItem
? [
@@ -1342,6 +1354,11 @@ function RecurrenceWorkQueuePanel({
<span className="border border-[#d8d3c7] bg-white px-2 py-0.5">
{t("failed", { count: summary?.failed_repair_group_total ?? 0 })}
</span>
<span className="border border-[#d9b36f] bg-[#fff7e8] px-2 py-0.5">
{t("sourceReview", {
count: summary?.source_correlation_review_group_total ?? 0,
})}
</span>
</div>
</div>
@@ -1396,6 +1413,19 @@ function RecurrenceWorkQueuePanel({
</div>
<div className="mt-3 grid gap-1 text-xs leading-5 text-[#5f5b52]">
<p>{t("incident", { incident: item.latest_incident_id ?? "--" })}</p>
<p>{t("stage", { stage: item.latest_stage ?? "--" })}</p>
<p className="truncate">
{t("sourceEvent", {
event: item.latest_provider_event_id ?? "--",
})}
</p>
<p>
{t("sourceRefs", {
refs: item.source_ref_total ?? 0,
sentry: item.sentry_ref_total ?? 0,
signoz: item.signoz_ref_total ?? 0,
})}
</p>
<p>{t("workItem", { id: workItem?.work_item_id ?? "--" })}</p>
<p>
{t("repair", {

View File

@@ -1,3 +1,52 @@
## 2026-05-21T116 Provider source evidence review work items
**觸發**
- T115 已證明 Sentry / SigNoz provider-native upstream canary 會寫入 AwoooP source dossier。
- 但未連到 Incident 的 provider 原生事件仍只停在 source evidenceOperator 在前端看不到「已進來源鏈路,但需要審核是否配對到 Incident」。
**修正**
- `channel_event_dossier_service.build_dossier_recurrence()` 新增 `latest_stage` / `stage_counts`,讓 recurrence group 顯示事件跑到 heartbeat、upstream_canary、received 或 incident_linked 哪個階段。
- Sentry / SigNoz 事件若有 provider refs、不是 heartbeat、且尚未連 Incident會形成 read-only `source_correlation_review` work item
- `kind=source_correlation_review`
- `next_step=review_provider_source_match`
- `reason=provider_native_evidence_unlinked`
- 不寫入 Incident / AutoRepair / Ticket只提供 preview / dry-run / handoff read model。
- `/api/v1/platform/events/dossier/recurrence` summary 新增 `source_correlation_review_group_total`
- AwoooP Runs 前端「重複告警關聯」新增「來源待審」指標,卡片顯示事件 stage讓 operator 可看見 provider-native evidence 已進 AwoooP 但仍需配對審核。
- AwoooP Work Items 同步顯示 source review count、stage、provider event id、Sentry / SignOz refs避免從 Runs 點進工作項後掉成 unknown。
**Verification**
```text
python -m py_compile apps/api/src/services/channel_event_dossier_service.py apps/api/src/api/v1/platform/events.py
-> pass
DATABASE_URL=postgresql+asyncpg://test:test@localhost/test pytest -q tests/test_channel_event_dossier_service.py
-> 14 passed
pnpm --dir apps/web exec tsc --noEmit
-> pass
NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build
-> compiled successfully, 90/90 static pages
python -m json.tool apps/web/messages/zh-TW.json
python -m json.tool apps/web/messages/en.json
-> pass
git diff --check
-> pass
python -m ruff check src/services/channel_event_dossier_service.py src/api/v1/platform/events.py tests/test_channel_event_dossier_service.py
-> pre-existing FastAPI Query B008 in events.py; no new logic failures observed
```
**目前整體進度**
- Provider-native upstream ingestion 可驗證性99.5% → 99.6%。
- Source refs / Sentry / SigNoz 可見性99.9% → 99.93%。
- Incident-level source correlation 可見性86% → 88%。
- AwoooP 告警可觀測鏈99.985% → 99.988%。
- 前端 AI 自動化管理介面同步99.99%Runs / Work Items recurrence panel 已同步來源待審)。
- 完整 AI 自動化管理產品化99.65% → 99.68%。
- 剩餘:推 Gitea main、等待 CI/CD、production API / frontend 驗證。
## 2026-05-20T115 Provider-native upstream canary 接入
**觸發**