From 0a4b7e9609e56843c201bff0a46f22407c5c8be9 Mon Sep 17 00:00:00 2001 From: OG T Date: Sun, 12 Apr 2026 20:03:46 +0800 Subject: [PATCH] =?UTF-8?q?fix(classify):=20HostBackupFailed=20=E7=B2=BE?= =?UTF-8?q?=E7=A2=BA=E8=A3=9C=E5=85=A5=20backup/TYPE-1=EF=BC=88=E6=B8=AC?= =?UTF-8?q?=E8=A9=A6=E9=80=9A=E9=81=8E=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 前次修法用 'backup' in alertname_lower 太寬,導致 BackupJobFailed warning 被分到 TYPE-1,破壞 test_backup_keyword_warning_not_type1。 改為精確白名單: _BACKUP_TYPE1_NAMES = {HostBackupFailed, HostBackupStale, HostBackupMissing, BackupRestoreTestFailed, BackupRestoreTestStale} + alertname.startswith('HostBackup') 兜底 結果:664 passed, 0 failed 2026-04-12 ogt Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/incident_service.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/incident_service.py b/apps/api/src/services/incident_service.py index f9fc4373..03b12962 100644 --- a/apps/api/src/services/incident_service.py +++ b/apps/api/src/services/incident_service.py @@ -164,8 +164,19 @@ def classify_alert_early(alertname: str, severity: str, labels: dict | None = No return "info", "TYPE-1" # 5. Backup / Heartbeat — 純資訊,不進 LLM - # VeleroBackup 由 K8s prefix 規則接管,此處只攔 watchdog/heartbeat - if "watchdog" in alertname_lower or alertname == "Heartbeat": + # HostBackupFailed 必須在 Host prefix 前攔截,否則被歸 host_resource/TYPE-3 + # 2026-04-12 ogt: 只針對已知主機備份監控 alertname,不用寬泛關鍵字 + # BackupJobFailed severity=warning 仍走 TYPE-3(見測試 test_backup_keyword_warning_not_type1) + _BACKUP_TYPE1_NAMES = { + "HostBackupFailed", "HostBackupStale", "HostBackupMissing", + "BackupRestoreTestFailed", "BackupRestoreTestStale", + } + if ( + "watchdog" in alertname_lower + or alertname == "Heartbeat" + or alertname in _BACKUP_TYPE1_NAMES + or alertname.startswith("HostBackup") + ): return "backup", "TYPE-1" # 6. 主機資源(從 infrastructure 分離,ADR-075 統帥決議)