From ff448ad282b3b2c0b9cdb606a4bd5b92161ea206 Mon Sep 17 00:00:00 2001 From: OG T Date: Wed, 15 Apr 2026 20:37:59 +0800 Subject: [PATCH] =?UTF-8?q?fix(incidents):=20=E4=BF=AE=E5=BE=A9=E5=85=A9?= =?UTF-8?q?=E5=80=8B=20DB=20=E5=AE=8C=E6=95=B4=E6=80=A7=E5=95=8F=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. alertname IS NULL(4 筆歷史修復 + code fallback) - incident_repository.py: alertname 補 labels["alertname"] fallback - SQL UPDATE: 用 signals->0->>'alert_name' 修補存量 4 筆 NULL 記錄 2. TYPE-1 incidents 永遠卡 INVESTIGATING(18 筆修復 + code fix) - webhooks.py: TYPE-1 短路後立即加 resolve_incident background task - SQL UPDATE: 批次將存量 TYPE-1 INVESTIGATING → RESOLVED 根因: ADR-073 TYPE-1 短路設計只發通知,未關閉 incident 狀態 backup/heartbeat 告警每小時觸發 → 無限累積 INVESTIGATING 記錄 2026-04-15 ogt + Claude Sonnet 4.6(亞太) Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/api/v1/webhooks.py | 8 ++++++++ apps/api/src/repositories/incident_repository.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/apps/api/src/api/v1/webhooks.py b/apps/api/src/api/v1/webhooks.py index 3c76e501..fad40c41 100644 --- a/apps/api/src/api/v1/webhooks.py +++ b/apps/api/src/api/v1/webhooks.py @@ -1350,6 +1350,14 @@ async def alertmanager_webhook( notification_type="TYPE-1", alert_category=alert_category, ) + # 2026-04-15 ogt: TYPE-1 純資訊告警建立後立即關閉 + # 設計原則: backup/heartbeat/info 告警無需追蹤狀態,通知即完成 + # 防止 incidents 表無限累積 INVESTIGATING 記錄(ADR-073 漏洞修補) + background_tasks.add_task( + get_incident_service().resolve_incident, + _info_incident_id, + "info_notification", + ) background_tasks.add_task( get_telegram_gateway().send_info_notification, incident_id=_info_incident_id, diff --git a/apps/api/src/repositories/incident_repository.py b/apps/api/src/repositories/incident_repository.py index 3e3558cd..f84d4748 100644 --- a/apps/api/src/repositories/incident_repository.py +++ b/apps/api/src/repositories/incident_repository.py @@ -79,7 +79,14 @@ def _incident_to_record_data(incident: Incident) -> dict[str, Any]: "resolved_at": incident.resolved_at, "closed_at": incident.closed_at, "frequency_snapshot": frequency_snapshot, - "alertname": incident.signals[0].alert_name if incident.signals else None, # ADR-073 Phase 2-1 + # ADR-073 Phase 2-1: alertname 欄位 — 優先從 signals[0].alert_name, + # fallback 到 signals[0].labels["alertname"](防止 signal 建立時 alert_name 為空) + # 2026-04-15 ogt: 補 labels fallback(4 筆歷史 NULL 根因修復) + "alertname": ( + (incident.signals[0].alert_name or + (incident.signals[0].labels or {}).get("alertname")) + if incident.signals else None + ), "notification_type": incident.notification_type, # ADR-073 Phase 2-2 "alert_category": incident.alert_category, # ADR-073 Phase 2-2 }