diff --git a/apps/api/src/api/v1/webhooks.py b/apps/api/src/api/v1/webhooks.py index f5109649..68dcd7f9 100644 --- a/apps/api/src/api/v1/webhooks.py +++ b/apps/api/src/api/v1/webhooks.py @@ -1099,17 +1099,67 @@ async def alertmanager_webhook( approval_created=False, ) - # 映射 alertname → alert_type + # BUG-008 修復 2026-04-11: 從 9 筆擴充為完整涵蓋 alerts-unified.yml 全部 42 個 alertname + # 新規則由 layer/component 語意推導,取代靜態小表 alertname_to_type = { - "KubePodCrashLooping": "k8s_pod_crash", - "KubePodNotReady": "k8s_pod_crash", - "KubeNodeNotReady": "k8s_node_failure", - "KubeNodeUnreachable": "k8s_node_failure", - "HighCPUUsage": "high_cpu", - "HighMemoryUsage": "high_memory", - "DiskSpaceLow": "disk_full", - "SSLCertExpiringSoon": "ssl_expiry", - "TargetDown": "service_404", + # --- 主機層 (host_alerts) --- + "HostDown": "host_down", + "HostHighCpuLoad": "host_cpu", + "HostOutOfMemory": "host_memory", + "HostOutOfDiskSpace": "disk_full", + "HostBackupFailed": "backup_failure", + # --- K8s 層 (kubernetes_alerts) --- + "K3sNodeNotReady": "k8s_node_failure", + "KubePodCrashLooping": "k8s_pod_crash", + "KubePodNotReady": "k8s_pod_crash", + "KubeNodeNotReady": "k8s_node_failure", + "KubeNodeUnreachable": "k8s_node_failure", + "KubeDeploymentReplicasMismatch": "k8s_deployment_mismatch", + "VeleroBackupFailed": "backup_failure", + "VeleroBackupNotRun": "backup_failure", + # --- 資料庫 (database_alerts / database_detail_alerts) --- + "PostgreSQLDown": "database_down", + "RedisDown": "database_down", + "PostgreSQLHighConnections": "database_performance", + "RedisMemoryHigh": "high_memory", + "PostgreSQLSlowQueries": "database_performance", + "PostgreSQLDeadlocks": "database_performance", + "PostgreSQLTooManyConnections": "database_performance", + "RedisKeyEviction": "database_performance", + "RedisConnectionsHigh": "database_performance", + "RedisCommandLatencyHigh": "database_performance", + # --- 服務可用性 (service_alerts) --- + "OpenClawDown": "service_down", + "SignOzDown": "service_down", + "SentryDown": "service_down", + "HarborDown": "service_down", + "GiteaDown": "service_down", + "AlertmanagerDown": "service_down", + "MinIODown": "service_down", + "KaliScannerDown": "service_down", + # --- 外部網站 (external_website_alerts) --- + "MoWoooWorkDown": "service_404", + "TsenyangWebsiteDown": "service_404", + "StockWoooWorkDown": "service_404", + "BitanWoooWorkDown": "service_404", + "ExternalSiteSSLExpiringSoon": "ssl_expiry", + # --- 告警鏈路 (alert_chain) --- + "AlertChainBroken_Alertmanager": "alert_chain_broken", + "AlertChainBroken_Sentry": "alert_chain_broken", + "NoAlertsReceived2Hours": "alert_chain_broken", + "AlertChainUnhealthy": "alert_chain_broken", + # --- Docker 容器 (docker_health_alerts) --- + "DockerContainerUnhealthy": "docker_container_unhealthy", + "DockerContainerExited": "docker_container_unhealthy", + # --- 自動修復監控 (auto_repair) --- + "AutoRepairLowSuccessRate": "auto_repair_degraded", + "PermanentFixRequired": "auto_repair_degraded", + # --- 舊版相容 --- + "HighCPUUsage": "high_cpu", + "HighMemoryUsage": "high_memory", + "DiskSpaceLow": "disk_full", + "SSLCertExpiringSoon": "ssl_expiry", + "TargetDown": "service_404", } alert_type = alertname_to_type.get(alertname, "custom")