From 65a727a23c09e7ee1d887745e4857c90e040a09b Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 11 Jun 2026 12:37:54 +0800 Subject: [PATCH] fix(api): notify repeated alerts during AI analysis --- apps/api/src/api/v1/webhooks.py | 17 +++++++++++- .../converged_alert_recurrence_notifier.py | 27 ++++++++++++++++--- .../tests/test_alert_converged_recurrence.py | 23 ++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) diff --git a/apps/api/src/api/v1/webhooks.py b/apps/api/src/api/v1/webhooks.py index 87fd1a16..4f880968 100644 --- a/apps/api/src/api/v1/webhooks.py +++ b/apps/api/src/api/v1/webhooks.py @@ -2845,9 +2845,24 @@ async def alertmanager_webhook( labels=dict(alert.labels) if alert.labels else {}, annotations=dict(alert.annotations) if alert.annotations else {}, ) + background_tasks.add_task( + notify_converged_alert_recurrence, + source="alertmanager", + fingerprint=fingerprint, + alertname=alertname, + severity=severity, + namespace=namespace, + target_resource=target_resource, + hit_count=2, + incident_id=None, + approval_id=None, + alert_category=alert_category, + notification_type=notification_type, + recurrence_stage="llm_inflight", + ) return AlertResponse( success=True, - message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫", + message="🛡️ 告警已由同指紋背景 AI 分析處理中,已排程節流再通知", alert_id=alert_id, approval_created=False, converged=True, diff --git a/apps/api/src/services/converged_alert_recurrence_notifier.py b/apps/api/src/services/converged_alert_recurrence_notifier.py index b2b06d28..a8df6f90 100644 --- a/apps/api/src/services/converged_alert_recurrence_notifier.py +++ b/apps/api/src/services/converged_alert_recurrence_notifier.py @@ -80,13 +80,27 @@ def format_converged_alert_recurrence_message( approval_id: str | None, alert_category: str = "", notification_type: str = "", + recurrence_stage: str = "converged", ) -> str: """Build a concise recurrence notice for an already-open alert fingerprint.""" + is_llm_inflight = recurrence_stage == "llm_inflight" + title = "告警仍在發生:AI 分析中" if is_llm_inflight else "告警仍在發生" + summary = ( + "同一指紋已有背景 AI 分析鎖,重複告警已節流提醒,不再完全靜音。" + if is_llm_inflight + else "同一指紋已收斂,系統保留去重,但不再完全靜音。" + ) + next_step = ( + "下一步:等待背景分析完成;若持續超過分析鎖 TTL,請查看 AwoooP 事件時間線與 API logs。" + if is_llm_inflight + else "下一步:請查看 AwoooP 事件時間線。" + ) + return "\n".join( [ - "告警仍在發生", - "同一指紋已收斂,系統保留去重,但不再完全靜音。", + f"{title}", + summary, "", f"來源:{shorten_alert_text(source, limit=40)}", f"告警:{shorten_alert_text(alertname, limit=80)}", @@ -99,7 +113,7 @@ def format_converged_alert_recurrence_message( f"分類:{shorten_alert_text(alert_category or '-', limit=48)}", f"通知型別:{shorten_alert_text(notification_type or '-', limit=48)}", "", - "下一步:請查看 AwoooP 事件時間線;這不是新的自動修復授權。", + f"{next_step} 這不是新的自動修復授權。", ] ) @@ -117,6 +131,7 @@ async def notify_converged_alert_recurrence( approval_id: str | None, alert_category: str = "", notification_type: str = "", + recurrence_stage: str = "converged", ) -> None: """Send a throttled recurrence notice for an already-open alert fingerprint.""" @@ -129,6 +144,7 @@ async def notify_converged_alert_recurrence( source=source, hit_count=hit_count, approval_id=approval_id, + recurrence_stage=recurrence_stage, ) return @@ -143,6 +159,7 @@ async def notify_converged_alert_recurrence( approval_id=approval_id, alert_category=alert_category, notification_type=notification_type, + recurrence_stage=recurrence_stage, ) gateway = get_telegram_gateway() @@ -158,6 +175,7 @@ async def notify_converged_alert_recurrence( "converged_alert_recurrence_primary_failed", source=source, approval_id=approval_id, + recurrence_stage=recurrence_stage, error=str(exc), ) @@ -172,6 +190,7 @@ async def notify_converged_alert_recurrence( "converged_alert_recurrence_private_mirror_failed", source=source, approval_id=approval_id, + recurrence_stage=recurrence_stage, error=str(exc), ) @@ -181,6 +200,7 @@ async def notify_converged_alert_recurrence( source=source, hit_count=hit_count, approval_id=approval_id, + recurrence_stage=recurrence_stage, mirrored_to_private=bool(private_chat_id and private_chat_id != gateway.alert_chat_id), sent_count=sent_count, ) @@ -190,5 +210,6 @@ async def notify_converged_alert_recurrence( source=source, hit_count=hit_count, approval_id=approval_id, + recurrence_stage=recurrence_stage, failures=failures, ) diff --git a/apps/api/tests/test_alert_converged_recurrence.py b/apps/api/tests/test_alert_converged_recurrence.py index 317586b7..a2409fba 100644 --- a/apps/api/tests/test_alert_converged_recurrence.py +++ b/apps/api/tests/test_alert_converged_recurrence.py @@ -51,6 +51,29 @@ def test_converged_recurrence_message_escapes_html(): assert "這不是新的自動修復授權" in text +def test_converged_recurrence_message_supports_llm_inflight_state(): + text = notifier.format_converged_alert_recurrence_message( + source="alertmanager", + alertname="PodCrashLoop", + severity="warning", + namespace="awoooi-prod", + target_resource="api", + hit_count=2, + incident_id=None, + approval_id=None, + alert_category="kubernetes", + notification_type="TYPE-3", + recurrence_stage="llm_inflight", + ) + + assert "告警仍在發生:AI 分析中" in text + assert "背景 AI 分析鎖" in text + assert "PodCrashLoop<api>" in text + assert "事件:-" in text + assert "簽核:-" in text + assert "這不是新的自動修復授權" in text + + @pytest.mark.asyncio async def test_converged_recurrence_uses_redis_throttle(monkeypatch): redis = _FakeRedis(True)