Files
awoooi/apps/api/tests/test_classify_alert_early.py
Your Name 0e14935351
Some checks failed
CD Pipeline / tests (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
Code Review / ai-code-review (push) Has been cancelled
fix(ops): classify systemd runner alerts as host resources
2026-05-05 14:28:18 +08:00

315 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
classify_alert_early() 分類函數單元測試 — ADR-073 Phase 2-2 + ADR-075
覆蓋 13 條分類規則、10 種告警分類:
config_drift (TYPE-4D)
alertchain_health, flywheel_health (TYPE-8M) — beats severity
info, backup (TYPE-1)
host_resource, infrastructure, kubernetes, database,
storage, devops_tool, external_site, ssl_cert, general (TYPE-3)
2026-04-12 ogt (ADR-073 P2-2 測試補強 + ADR-075 七類新增)
"""
import pytest
from src.services.incident_service import classify_alert_early
# --------------------------------------------------------------------------- #
# TYPE-4D: Config Drift
# --------------------------------------------------------------------------- #
class TestConfigDrift:
def test_configuration_drift(self):
ac, nt = classify_alert_early("ConfigurationDrift", "critical", {})
assert nt == "TYPE-4D"
assert ac == "config_drift"
def test_kube_config_drift(self):
ac, nt = classify_alert_early("KubeConfigDrift", "warning", {})
assert nt == "TYPE-4D"
assert ac == "config_drift"
def test_config_drift_case_sensitive(self):
# 不在白名單裡的變體 → 不應命中 config_drift
ac, nt = classify_alert_early("configurationdrift", "critical", {})
assert ac != "config_drift"
# --------------------------------------------------------------------------- #
# TYPE-1: Info / Heartbeat
# --------------------------------------------------------------------------- #
class TestInfoAlerts:
def test_severity_info(self):
ac, nt = classify_alert_early("SomeAlert", "info", {})
assert nt == "TYPE-1"
assert ac == "info"
def test_severity_none(self):
ac, nt = classify_alert_early("SomeAlert", "none", {})
assert nt == "TYPE-1"
assert ac == "info"
def test_backup_keyword_info_only(self):
# severity=info → severity 規則先命中TYPE-1
ac, nt = classify_alert_early("BackupJobComplete", "info", {})
assert nt == "TYPE-1"
def test_backup_keyword_warning_not_type1(self):
# BackupJobFailed severity=warning → 繼續走 prefix 規則,不應是 TYPE-1
ac, nt = classify_alert_early("BackupJobFailed", "warning", {})
assert nt == "TYPE-3"
def test_watchdog_heartbeat(self):
# Watchdog (Alertmanager 心跳) severity=none → severity 規則先命中TYPE-1
ac, nt = classify_alert_early("Watchdog", "none", {})
assert nt == "TYPE-1"
def test_deadmansswitch_heartbeat(self):
# DeadMansSwitch 心跳 → TYPE-1補入 2026-04-12 ogt
ac, nt = classify_alert_early("DeadMansSwitch", "warning", {})
assert ac == "backup"
assert nt == "TYPE-1"
def test_backup_critical_not_type1(self):
# critical backup 告警應走各自 prefix不是純資訊
ac, nt = classify_alert_early("BACKUP_MISSING", "critical", {})
assert nt == "TYPE-3"
# --------------------------------------------------------------------------- #
# TYPE-3: Infrastructure (Docker / Host)
# --------------------------------------------------------------------------- #
class TestInfrastructure:
def test_docker_prefix(self):
ac, nt = classify_alert_early("DockerContainerOOM", "critical", {})
assert nt == "TYPE-3"
assert ac == "infrastructure" # Docker → 保留 infrastructure
def test_docker_restart(self):
ac, nt = classify_alert_early("DockerContainerRestarting", "warning", {})
assert ac == "infrastructure"
# ADR-075: Host* 從 infrastructure 分離為 host_resource
def test_host_prefix_is_host_resource(self):
ac, nt = classify_alert_early("HostHighCpuLoad", "warning", {})
assert nt == "TYPE-3"
assert ac == "host_resource"
def test_host_down(self):
ac, nt = classify_alert_early("HostDown", "critical", {})
assert ac == "host_resource"
def test_host_memory(self):
ac, nt = classify_alert_early("HostOutOfMemory", "warning", {})
assert ac == "host_resource"
def test_host_disk(self):
ac, nt = classify_alert_early("HostOutOfDiskSpace", "warning", {})
assert ac == "host_resource"
@pytest.mark.parametrize("alertname", [
"SystemdRunnerRestartSpike",
"SystemdRunnerWatchdogEnabled",
"SystemdRunnerMissingResourceQuota",
])
def test_systemd_runner_is_host_resource_not_heartbeat(self, alertname):
ac, nt = classify_alert_early(alertname, "warning", {})
assert nt == "TYPE-3"
assert ac == "host_resource"
# --------------------------------------------------------------------------- #
# ADR-075: alertchain_health (TYPE-8M)
# --------------------------------------------------------------------------- #
class TestAlertchainHealth:
@pytest.mark.parametrize("alertname", [
"AlertChainBroken_Alertmanager",
"AlertChainBroken_Sentry",
"NoAlertsReceived2Hours",
"AlertChainUnhealthy",
"NoAlertsReceived",
"PrometheusNotConnectedToAlertmanager",
])
def test_alertchain_alerts(self, alertname):
ac, nt = classify_alert_early(alertname, "critical", {})
assert ac == "alertchain_health"
assert nt == "TYPE-8M"
def test_alertchain_beats_severity_info(self):
# 即使 severity=infoAlertChainBroken 也必須是 alertchain_health
ac, nt = classify_alert_early("AlertChainBroken_Alertmanager", "info", {})
assert ac == "alertchain_health"
assert nt == "TYPE-8M"
# --------------------------------------------------------------------------- #
# ADR-075: flywheel_health (TYPE-8M)
# --------------------------------------------------------------------------- #
class TestFlywheelHealth:
def test_auto_repair_low_success(self):
ac, nt = classify_alert_early("AutoRepairLowSuccessRate", "warning", {})
assert ac == "flywheel_health"
assert nt == "TYPE-8M"
def test_permanent_fix_required(self):
ac, nt = classify_alert_early("PermanentFixRequired", "warning", {})
assert ac == "flywheel_health"
assert nt == "TYPE-8M"
def test_flywheel_prefix(self):
ac, nt = classify_alert_early("FlywheelPlaybookZero", "critical", {})
assert ac == "flywheel_health"
assert nt == "TYPE-8M"
def test_flywheel_beats_severity_info(self):
ac, nt = classify_alert_early("AutoRepairLowSuccessRate", "info", {})
assert ac == "flywheel_health"
assert nt == "TYPE-8M"
# --------------------------------------------------------------------------- #
# ADR-075: storage (TYPE-3)
# --------------------------------------------------------------------------- #
class TestStorage:
def test_minio_down(self):
ac, nt = classify_alert_early("MinIODown", "critical", {})
assert ac == "storage"
assert nt == "TYPE-3"
# --------------------------------------------------------------------------- #
# ADR-075: devops_tool (TYPE-3)
# --------------------------------------------------------------------------- #
class TestDevopsTool:
@pytest.mark.parametrize("alertname", [
"OpenClawDown",
"SignOzDown",
"GiteaDown",
"HarborDown",
"SentryDown",
"AlertmanagerDown",
"KaliScannerDown",
"GiteaCIPipelineFailed",
])
def test_devops_tools(self, alertname):
ac, nt = classify_alert_early(alertname, "critical", {})
assert ac == "devops_tool"
assert nt == "TYPE-3"
# --------------------------------------------------------------------------- #
# ADR-075: external_site (TYPE-3)
# --------------------------------------------------------------------------- #
class TestExternalSite:
@pytest.mark.parametrize("alertname", [
"MoWoooWorkDown",
"TsenyangWebsiteDown",
"StockWoooWorkDown",
"BitanWoooWorkDown",
])
def test_external_sites(self, alertname):
ac, nt = classify_alert_early(alertname, "critical", {})
assert ac == "external_site"
assert nt == "TYPE-3"
# --------------------------------------------------------------------------- #
# ADR-075: ssl_cert (TYPE-3)
# --------------------------------------------------------------------------- #
class TestSslCert:
def test_external_site_ssl(self):
ac, nt = classify_alert_early("ExternalSiteSSLExpiringSoon", "warning", {})
assert ac == "ssl_cert"
assert nt == "TYPE-3"
def test_tls_cert(self):
ac, nt = classify_alert_early("TLSCertExpiryCritical", "critical", {})
assert ac == "ssl_cert"
assert nt == "TYPE-3"
# --------------------------------------------------------------------------- #
# TYPE-3: Kubernetes
# --------------------------------------------------------------------------- #
class TestKubernetes:
@pytest.mark.parametrize("alertname", [
"KubePodCrashLooping",
"PodHighMemory",
"DeploymentReplicasMismatch",
"NodeNotReady",
"ArgoCDSyncFailed",
])
def test_k8s_prefixes(self, alertname):
ac, nt = classify_alert_early(alertname, "critical", {})
assert nt == "TYPE-3"
assert ac == "kubernetes"
def test_velero_backup_failed_is_kubernetes(self):
# VeleroBackupFailed severity=critical → backup 規則不命中,走 Velero prefix → kubernetes TYPE-3
ac, nt = classify_alert_early("VeleroBackupFailed", "critical", {})
assert nt == "TYPE-3"
assert ac == "kubernetes"
def test_velero_backup_success_info_is_type1(self):
# VeleroBackupSuccess severity=info → TYPE-1
ac, nt = classify_alert_early("VeleroBackupSuccess", "info", {})
assert nt == "TYPE-1"
# --------------------------------------------------------------------------- #
# TYPE-3: Database
# --------------------------------------------------------------------------- #
class TestDatabase:
def test_postgres(self):
ac, nt = classify_alert_early("PostgresDown", "critical", {})
assert nt == "TYPE-3"
assert ac == "database"
def test_redis(self):
ac, nt = classify_alert_early("RedisMemoryHigh", "warning", {})
assert nt == "TYPE-3"
assert ac == "database"
# --------------------------------------------------------------------------- #
# TYPE-3: General (fallback)
# --------------------------------------------------------------------------- #
class TestGeneral:
def test_unknown_alert(self):
ac, nt = classify_alert_early("SomeUnknownAlert", "warning", {})
assert nt == "TYPE-3"
assert ac == "general"
def test_empty_alertname(self):
ac, nt = classify_alert_early("", "warning", {})
assert nt == "TYPE-3"
assert ac == "general"
# --------------------------------------------------------------------------- #
# 優先順序驗證 — config_drift 和 info 應優先於 prefix 規則
# --------------------------------------------------------------------------- #
class TestPriority:
def test_config_drift_beats_severity(self):
# ConfigurationDrift 即使 severity=warning 也應命中 config_drift
ac, nt = classify_alert_early("ConfigurationDrift", "warning", {})
assert ac == "config_drift"
def test_info_severity_beats_docker_prefix(self):
# Docker 前綴 + severity=info → info 規則先命中
ac, nt = classify_alert_early("DockerContainerOOM", "info", {})
assert ac == "info"