refactor(M3): alertname_to_type 抽至 src/constants/alert_types.py
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled

- 新增 src/constants/__init__.py + alert_types.py
- ALERTNAME_TO_TYPE 常數(56 筆)從 webhooks.py 內聯 dict 遷移至模組
- webhooks.py 改用 ALERTNAME_TO_TYPE.get(alertname, "custom")
- TODO I1: 下 Sprint 整合 ADR-064 Rule Engine 動態推斷(此為中間狀態)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-11 21:19:46 +08:00
parent 37dfbaf26c
commit 1ede9f933f
3 changed files with 78 additions and 63 deletions

View File

@@ -30,6 +30,7 @@ from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request,
from pydantic import BaseModel, Field
from src.core.config import settings
from src.constants.alert_types import ALERTNAME_TO_TYPE
from src.core.constants import is_cicd_alertname, is_heartbeat_alertname
from src.core.logging import get_logger
from src.core.metrics import record_alert_chain_success
@@ -1099,69 +1100,9 @@ async def alertmanager_webhook(
approval_created=False,
)
# BUG-008 修復 2026-04-11: 從 9 筆擴充為完整涵蓋 alerts-unified.yml 全部 42 個 alertname
# 新規則由 layer/component 語意推導,取代靜態小表
alertname_to_type = {
# --- 主機層 (host_alerts) ---
"HostDown": "host_down",
"HostHighCpuLoad": "host_cpu",
"HostOutOfMemory": "host_memory",
"HostOutOfDiskSpace": "disk_full",
"HostBackupFailed": "backup_failure",
# --- K8s 層 (kubernetes_alerts) ---
"K3sNodeNotReady": "k8s_node_failure",
"KubePodCrashLooping": "k8s_pod_crash",
"KubePodNotReady": "k8s_pod_crash",
"KubeNodeNotReady": "k8s_node_failure",
"KubeNodeUnreachable": "k8s_node_failure",
"KubeDeploymentReplicasMismatch": "k8s_deployment_mismatch",
"VeleroBackupFailed": "backup_failure",
"VeleroBackupNotRun": "backup_failure",
# --- 資料庫 (database_alerts / database_detail_alerts) ---
"PostgreSQLDown": "database_down",
"RedisDown": "database_down",
"PostgreSQLHighConnections": "database_performance",
"RedisMemoryHigh": "high_memory",
"PostgreSQLSlowQueries": "database_performance",
"PostgreSQLDeadlocks": "database_performance",
"PostgreSQLTooManyConnections": "database_performance",
"RedisKeyEviction": "database_performance",
"RedisConnectionsHigh": "database_performance",
"RedisCommandLatencyHigh": "database_performance",
# --- 服務可用性 (service_alerts) ---
"OpenClawDown": "service_down",
"SignOzDown": "service_down",
"SentryDown": "service_down",
"HarborDown": "service_down",
"GiteaDown": "service_down",
"AlertmanagerDown": "service_down",
"MinIODown": "service_down",
"KaliScannerDown": "service_down",
# --- 外部網站 (external_website_alerts) ---
"MoWoooWorkDown": "service_404",
"TsenyangWebsiteDown": "service_404",
"StockWoooWorkDown": "service_404",
"BitanWoooWorkDown": "service_404",
"ExternalSiteSSLExpiringSoon": "ssl_expiry",
# --- 告警鏈路 (alert_chain) ---
"AlertChainBroken_Alertmanager": "alert_chain_broken",
"AlertChainBroken_Sentry": "alert_chain_broken",
"NoAlertsReceived2Hours": "alert_chain_broken",
"AlertChainUnhealthy": "alert_chain_broken",
# --- Docker 容器 (docker_health_alerts) ---
"DockerContainerUnhealthy": "docker_container_unhealthy",
"DockerContainerExited": "docker_container_unhealthy",
# --- 自動修復監控 (auto_repair) ---
"AutoRepairLowSuccessRate": "auto_repair_degraded",
"PermanentFixRequired": "auto_repair_degraded",
# --- 舊版相容 ---
"HighCPUUsage": "high_cpu",
"HighMemoryUsage": "high_memory",
"DiskSpaceLow": "disk_full",
"SSLCertExpiringSoon": "ssl_expiry",
"TargetDown": "service_404",
}
alert_type = alertname_to_type.get(alertname, "custom")
# M3 重構 2026-04-11: alertname_to_type 抽至 src/constants/alert_types.py
# TODO I1: 下 Sprint 整合 ADR-064 Rule Engine 動態推斷
alert_type = ALERTNAME_TO_TYPE.get(alertname, "custom")
severity_map = {"critical": "critical", "warning": "warning", "info": "info"}
severity = severity_map.get(

View File

@@ -0,0 +1 @@
# AWOOOI Constants Package

View File

@@ -0,0 +1,73 @@
"""
Alert Type Mapping Constants
=============================
alertname → incident_type 靜態對應表
來源: BUG-008 修復 2026-04-119筆 → 56筆涵蓋 alerts-unified.yml 全部 alertname
遷移: M3 2026-04-11 — 從 webhooks.py 內聯 dict 抽至此模組
TODO (I1): 下 Sprint 整合 ADR-064 Rule Engine改用 YAML 規則動態推斷;
此靜態 dict 為可接受中間狀態。
"""
# alertname → incident_type 對應56 筆)
ALERTNAME_TO_TYPE: dict[str, str] = {
# --- 主機層 (host_alerts) ---
"HostDown": "host_down",
"HostHighCpuLoad": "host_cpu",
"HostOutOfMemory": "host_memory",
"HostOutOfDiskSpace": "disk_full",
"HostBackupFailed": "backup_failure",
# --- K8s 層 (kubernetes_alerts) ---
"K3sNodeNotReady": "k8s_node_failure",
"KubePodCrashLooping": "k8s_pod_crash",
"KubePodNotReady": "k8s_pod_crash",
"KubeNodeNotReady": "k8s_node_failure",
"KubeNodeUnreachable": "k8s_node_failure",
"KubeDeploymentReplicasMismatch": "k8s_deployment_mismatch",
"VeleroBackupFailed": "backup_failure",
"VeleroBackupNotRun": "backup_failure",
# --- 資料庫 (database_alerts / database_detail_alerts) ---
"PostgreSQLDown": "database_down",
"RedisDown": "database_down",
"PostgreSQLHighConnections": "database_performance",
"RedisMemoryHigh": "high_memory",
"PostgreSQLSlowQueries": "database_performance",
"PostgreSQLDeadlocks": "database_performance",
"PostgreSQLTooManyConnections": "database_performance",
"RedisKeyEviction": "database_performance",
"RedisConnectionsHigh": "database_performance",
"RedisCommandLatencyHigh": "database_performance",
# --- 服務可用性 (service_alerts) ---
"OpenClawDown": "service_down",
"SignOzDown": "service_down",
"SentryDown": "service_down",
"HarborDown": "service_down",
"GiteaDown": "service_down",
"AlertmanagerDown": "service_down",
"MinIODown": "service_down",
"KaliScannerDown": "service_down",
# --- 外部網站 (external_website_alerts) ---
"MoWoooWorkDown": "service_404",
"TsenyangWebsiteDown": "service_404",
"StockWoooWorkDown": "service_404",
"BitanWoooWorkDown": "service_404",
"ExternalSiteSSLExpiringSoon": "ssl_expiry",
# --- 告警鏈路 (alert_chain) ---
"AlertChainBroken_Alertmanager": "alert_chain_broken",
"AlertChainBroken_Sentry": "alert_chain_broken",
"NoAlertsReceived2Hours": "alert_chain_broken",
"AlertChainUnhealthy": "alert_chain_broken",
# --- Docker 容器 (docker_health_alerts) ---
"DockerContainerUnhealthy": "docker_container_unhealthy",
"DockerContainerExited": "docker_container_unhealthy",
# --- 自動修復監控 (auto_repair) ---
"AutoRepairLowSuccessRate": "auto_repair_degraded",
"PermanentFixRequired": "auto_repair_degraded",
# --- 舊版相容 ---
"HighCPUUsage": "high_cpu",
"HighMemoryUsage": "high_memory",
"DiskSpaceLow": "disk_full",
"SSLCertExpiringSoon": "ssl_expiry",
"TargetDown": "service_404",
}