fix(api): notify repeated alerts during AI analysis
This commit is contained in:
@@ -2845,9 +2845,24 @@ async def alertmanager_webhook(
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
background_tasks.add_task(
|
||||
notify_converged_alert_recurrence,
|
||||
source="alertmanager",
|
||||
fingerprint=fingerprint,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
hit_count=2,
|
||||
incident_id=None,
|
||||
approval_id=None,
|
||||
alert_category=alert_category,
|
||||
notification_type=notification_type,
|
||||
recurrence_stage="llm_inflight",
|
||||
)
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫",
|
||||
message="🛡️ 告警已由同指紋背景 AI 分析處理中,已排程節流再通知",
|
||||
alert_id=alert_id,
|
||||
approval_created=False,
|
||||
converged=True,
|
||||
|
||||
@@ -80,13 +80,27 @@ def format_converged_alert_recurrence_message(
|
||||
approval_id: str | None,
|
||||
alert_category: str = "",
|
||||
notification_type: str = "",
|
||||
recurrence_stage: str = "converged",
|
||||
) -> str:
|
||||
"""Build a concise recurrence notice for an already-open alert fingerprint."""
|
||||
|
||||
is_llm_inflight = recurrence_stage == "llm_inflight"
|
||||
title = "告警仍在發生:AI 分析中" if is_llm_inflight else "告警仍在發生"
|
||||
summary = (
|
||||
"同一指紋已有背景 AI 分析鎖,重複告警已節流提醒,不再完全靜音。"
|
||||
if is_llm_inflight
|
||||
else "同一指紋已收斂,系統保留去重,但不再完全靜音。"
|
||||
)
|
||||
next_step = (
|
||||
"下一步:等待背景分析完成;若持續超過分析鎖 TTL,請查看 AwoooP 事件時間線與 API logs。"
|
||||
if is_llm_inflight
|
||||
else "下一步:請查看 AwoooP 事件時間線。"
|
||||
)
|
||||
|
||||
return "\n".join(
|
||||
[
|
||||
"<b>告警仍在發生</b>",
|
||||
"同一指紋已收斂,系統保留去重,但不再完全靜音。",
|
||||
f"<b>{title}</b>",
|
||||
summary,
|
||||
"",
|
||||
f"來源:<code>{shorten_alert_text(source, limit=40)}</code>",
|
||||
f"告警:<code>{shorten_alert_text(alertname, limit=80)}</code>",
|
||||
@@ -99,7 +113,7 @@ def format_converged_alert_recurrence_message(
|
||||
f"分類:<code>{shorten_alert_text(alert_category or '-', limit=48)}</code>",
|
||||
f"通知型別:<code>{shorten_alert_text(notification_type or '-', limit=48)}</code>",
|
||||
"",
|
||||
"下一步:請查看 AwoooP 事件時間線;這不是新的自動修復授權。",
|
||||
f"{next_step} 這不是新的自動修復授權。",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -117,6 +131,7 @@ async def notify_converged_alert_recurrence(
|
||||
approval_id: str | None,
|
||||
alert_category: str = "",
|
||||
notification_type: str = "",
|
||||
recurrence_stage: str = "converged",
|
||||
) -> None:
|
||||
"""Send a throttled recurrence notice for an already-open alert fingerprint."""
|
||||
|
||||
@@ -129,6 +144,7 @@ async def notify_converged_alert_recurrence(
|
||||
source=source,
|
||||
hit_count=hit_count,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
)
|
||||
return
|
||||
|
||||
@@ -143,6 +159,7 @@ async def notify_converged_alert_recurrence(
|
||||
approval_id=approval_id,
|
||||
alert_category=alert_category,
|
||||
notification_type=notification_type,
|
||||
recurrence_stage=recurrence_stage,
|
||||
)
|
||||
|
||||
gateway = get_telegram_gateway()
|
||||
@@ -158,6 +175,7 @@ async def notify_converged_alert_recurrence(
|
||||
"converged_alert_recurrence_primary_failed",
|
||||
source=source,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
@@ -172,6 +190,7 @@ async def notify_converged_alert_recurrence(
|
||||
"converged_alert_recurrence_private_mirror_failed",
|
||||
source=source,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
@@ -181,6 +200,7 @@ async def notify_converged_alert_recurrence(
|
||||
source=source,
|
||||
hit_count=hit_count,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
mirrored_to_private=bool(private_chat_id and private_chat_id != gateway.alert_chat_id),
|
||||
sent_count=sent_count,
|
||||
)
|
||||
@@ -190,5 +210,6 @@ async def notify_converged_alert_recurrence(
|
||||
source=source,
|
||||
hit_count=hit_count,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
failures=failures,
|
||||
)
|
||||
|
||||
@@ -51,6 +51,29 @@ def test_converged_recurrence_message_escapes_html():
|
||||
assert "這不是新的自動修復授權" in text
|
||||
|
||||
|
||||
def test_converged_recurrence_message_supports_llm_inflight_state():
|
||||
text = notifier.format_converged_alert_recurrence_message(
|
||||
source="alertmanager",
|
||||
alertname="PodCrashLoop<api>",
|
||||
severity="warning",
|
||||
namespace="awoooi-prod",
|
||||
target_resource="api",
|
||||
hit_count=2,
|
||||
incident_id=None,
|
||||
approval_id=None,
|
||||
alert_category="kubernetes",
|
||||
notification_type="TYPE-3",
|
||||
recurrence_stage="llm_inflight",
|
||||
)
|
||||
|
||||
assert "告警仍在發生:AI 分析中" in text
|
||||
assert "背景 AI 分析鎖" in text
|
||||
assert "PodCrashLoop<api>" in text
|
||||
assert "事件:<code>-</code>" in text
|
||||
assert "簽核:<code>-</code>" in text
|
||||
assert "這不是新的自動修復授權" in text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_converged_recurrence_uses_redis_throttle(monkeypatch):
|
||||
redis = _FakeRedis(True)
|
||||
|
||||
Reference in New Issue
Block a user