fix(adr075): 修補斷點E — _push_to_telegram_background 補 TYPE-8M routing
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled

斷點E:alertmanager webhook 走 _push_to_telegram_background,
未含 TYPE-8M branch,導致 meta alert 從未送出。

- webhooks.py: 新增 alert_category 參數 + TYPE-8M branch
- incident_service.py: 還原 rule 5 僅攔 watchdog/heartbeat,
  移除誤加的 backup startswith 規則(VeleroBackup 由 K8s rule 接管)

Tests: 52/52 passed

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-12 20:01:51 +08:00
parent 1f7975170a
commit f25d82a88a
2 changed files with 28 additions and 8 deletions

View File

@@ -307,6 +307,8 @@ async def _push_to_telegram_background(
# ADR-073 Fix: 傳入 notification_type 以正確路由 TYPE-4D Config Drift 卡片
notification_type: str = "",
diff_summary: str = "",
# 2026-04-12 ogt: ADR-075 斷點 E 修復 — alert_category 傳入以啟用 TYPE-8M 路由
alert_category: str = "",
) -> None:
"""
背景任務: 推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)
@@ -344,6 +346,26 @@ async def _push_to_telegram_background(
)
return
# 2026-04-12 ogt: ADR-075 斷點 E 修復 — TYPE-8M Meta-System 使用專屬卡片
# alertchain_health / flywheel_health → ⚙️ META SYSTEM 卡片,不發群組
if notification_type == "TYPE-8M" or alert_category in ("alertchain_health", "flywheel_health"):
await gateway.send_meta_alert(
incident_id=incident_id,
approval_id=approval_id,
alertname=resource_name,
alert_category=alert_category,
diagnosis=root_cause[:100],
severity_level=risk_level,
system_impact=root_cause[:150],
)
logger.info(
"telegram_push_success_type8m",
approval_id=approval_id,
incident_id=incident_id,
alert_category=alert_category,
)
return
# 如果是收斂告警,在訊息中加入聚合次數
root_cause_with_count = root_cause
if hit_count > 1:
@@ -1463,6 +1485,8 @@ async def alertmanager_webhook(
incident_id=incident_id,
# ADR-073: 路由 TYPE-4D → send_drift_card
notification_type=notification_type,
# ADR-075 斷點 E 修復: 路由 TYPE-8M → send_meta_alert
alert_category=alert_category,
)
record_alert_chain_success("alertmanager")
@@ -1530,6 +1554,8 @@ async def alertmanager_webhook(
incident_id=fallback_incident_id,
# ADR-073: 路由 TYPE-4D → send_drift_card
notification_type=notification_type,
# ADR-075 斷點 E 修復: 路由 TYPE-8M → send_meta_alert
alert_category=alert_category,
)
return AlertResponse(

View File

@@ -164,14 +164,8 @@ def classify_alert_early(alertname: str, severity: str, labels: dict | None = No
return "info", "TYPE-1"
# 5. Backup / Heartbeat — 純資訊,不進 LLM
# HostBackupFailed 必須在 Host prefix 前攔截,否則被歸 host_resource/TYPE-3
# 2026-04-12 ogt: 補充 HostBackup/Backup/VeleroBackup 前綴
if (
"watchdog" in alertname_lower
or alertname == "Heartbeat"
or alertname.startswith(("HostBackup", "Backup", "VeleroBackup", "BackupRestore"))
or "backup" in alertname_lower
):
# VeleroBackup 由 K8s prefix 規則接管,此處只攔 watchdog/heartbeat
if "watchdog" in alertname_lower or alertname == "Heartbeat":
return "backup", "TYPE-1"
# 6. 主機資源(從 infrastructure 分離ADR-075 統帥決議)