fix(api): notify repeated alerts during AI analysis
All checks were successful
CD Pipeline / tests (push) Successful in 1m27s
Code Review / ai-code-review (push) Successful in 17s
CD Pipeline / build-and-deploy (push) Successful in 4m45s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s

This commit is contained in:
Your Name
2026-06-11 12:37:54 +08:00
parent aae47ed107
commit 65a727a23c
3 changed files with 63 additions and 4 deletions

View File

@@ -2845,9 +2845,24 @@ async def alertmanager_webhook(
labels=dict(alert.labels) if alert.labels else {},
annotations=dict(alert.annotations) if alert.annotations else {},
)
background_tasks.add_task(
notify_converged_alert_recurrence,
source="alertmanager",
fingerprint=fingerprint,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
hit_count=2,
incident_id=None,
approval_id=None,
alert_category=alert_category,
notification_type=notification_type,
recurrence_stage="llm_inflight",
)
return AlertResponse(
success=True,
message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫",
message="🛡️ 告警已由同指紋背景 AI 分析處理中,已排程節流再通知",
alert_id=alert_id,
approval_created=False,
converged=True,

View File

@@ -80,13 +80,27 @@ def format_converged_alert_recurrence_message(
approval_id: str | None,
alert_category: str = "",
notification_type: str = "",
recurrence_stage: str = "converged",
) -> str:
"""Build a concise recurrence notice for an already-open alert fingerprint."""
is_llm_inflight = recurrence_stage == "llm_inflight"
title = "告警仍在發生AI 分析中" if is_llm_inflight else "告警仍在發生"
summary = (
"同一指紋已有背景 AI 分析鎖,重複告警已節流提醒,不再完全靜音。"
if is_llm_inflight
else "同一指紋已收斂,系統保留去重,但不再完全靜音。"
)
next_step = (
"下一步:等待背景分析完成;若持續超過分析鎖 TTL請查看 AwoooP 事件時間線與 API logs。"
if is_llm_inflight
else "下一步:請查看 AwoooP 事件時間線。"
)
return "\n".join(
[
"<b>告警仍在發生</b>",
"同一指紋已收斂,系統保留去重,但不再完全靜音。",
f"<b>{title}</b>",
summary,
"",
f"來源:<code>{shorten_alert_text(source, limit=40)}</code>",
f"告警:<code>{shorten_alert_text(alertname, limit=80)}</code>",
@@ -99,7 +113,7 @@ def format_converged_alert_recurrence_message(
f"分類:<code>{shorten_alert_text(alert_category or '-', limit=48)}</code>",
f"通知型別:<code>{shorten_alert_text(notification_type or '-', limit=48)}</code>",
"",
"下一步:請查看 AwoooP 事件時間線;這不是新的自動修復授權。",
f"{next_step} 這不是新的自動修復授權。",
]
)
@@ -117,6 +131,7 @@ async def notify_converged_alert_recurrence(
approval_id: str | None,
alert_category: str = "",
notification_type: str = "",
recurrence_stage: str = "converged",
) -> None:
"""Send a throttled recurrence notice for an already-open alert fingerprint."""
@@ -129,6 +144,7 @@ async def notify_converged_alert_recurrence(
source=source,
hit_count=hit_count,
approval_id=approval_id,
recurrence_stage=recurrence_stage,
)
return
@@ -143,6 +159,7 @@ async def notify_converged_alert_recurrence(
approval_id=approval_id,
alert_category=alert_category,
notification_type=notification_type,
recurrence_stage=recurrence_stage,
)
gateway = get_telegram_gateway()
@@ -158,6 +175,7 @@ async def notify_converged_alert_recurrence(
"converged_alert_recurrence_primary_failed",
source=source,
approval_id=approval_id,
recurrence_stage=recurrence_stage,
error=str(exc),
)
@@ -172,6 +190,7 @@ async def notify_converged_alert_recurrence(
"converged_alert_recurrence_private_mirror_failed",
source=source,
approval_id=approval_id,
recurrence_stage=recurrence_stage,
error=str(exc),
)
@@ -181,6 +200,7 @@ async def notify_converged_alert_recurrence(
source=source,
hit_count=hit_count,
approval_id=approval_id,
recurrence_stage=recurrence_stage,
mirrored_to_private=bool(private_chat_id and private_chat_id != gateway.alert_chat_id),
sent_count=sent_count,
)
@@ -190,5 +210,6 @@ async def notify_converged_alert_recurrence(
source=source,
hit_count=hit_count,
approval_id=approval_id,
recurrence_stage=recurrence_stage,
failures=failures,
)

View File

@@ -51,6 +51,29 @@ def test_converged_recurrence_message_escapes_html():
assert "這不是新的自動修復授權" in text
def test_converged_recurrence_message_supports_llm_inflight_state():
text = notifier.format_converged_alert_recurrence_message(
source="alertmanager",
alertname="PodCrashLoop<api>",
severity="warning",
namespace="awoooi-prod",
target_resource="api",
hit_count=2,
incident_id=None,
approval_id=None,
alert_category="kubernetes",
notification_type="TYPE-3",
recurrence_stage="llm_inflight",
)
assert "告警仍在發生AI 分析中" in text
assert "背景 AI 分析鎖" in text
assert "PodCrashLoop&lt;api&gt;" in text
assert "事件:<code>-</code>" in text
assert "簽核:<code>-</code>" in text
assert "這不是新的自動修復授權" in text
@pytest.mark.asyncio
async def test_converged_recurrence_uses_redis_throttle(monkeypatch):
redis = _FakeRedis(True)