fix(api): Incident-Approval 同步 (活躍事件修復)
問題: Alertmanager webhook 只創建 Approval,沒有創建對應 Incident
導致「活躍事件」顯示 0 筆,但右側有待簽核卡片
修復:
- 新增 create_incident_for_approval() 函數
- Approval 創建後同步創建 Incident
- 存入 Redis (incident:INC-*) 7 天 TTL
- 支援 LLM 成功路徑和 fallback 路徑
遵循 feedback_incident_approval_sync.md 鐵律
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -42,6 +42,7 @@ from src.models.approval import (
|
|||||||
DryRunCheck,
|
DryRunCheck,
|
||||||
RiskLevel,
|
RiskLevel,
|
||||||
)
|
)
|
||||||
|
from src.models.incident import Incident, IncidentStatus, Severity, Signal
|
||||||
from src.services.approval_db import get_approval_service
|
from src.services.approval_db import get_approval_service
|
||||||
|
|
||||||
# Phase 5: OpenClaw AI Engine
|
# Phase 5: OpenClaw AI Engine
|
||||||
@@ -54,6 +55,87 @@ router = APIRouter(prefix="/webhooks", tags=["Webhooks"])
|
|||||||
logger = get_logger("awoooi.webhooks")
|
logger = get_logger("awoooi.webhooks")
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Incident-Approval 同步 (feedback_incident_approval_sync.md 鐵律)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# 風險等級 → 事件嚴重度映射
|
||||||
|
RISK_TO_SEVERITY = {
|
||||||
|
"critical": Severity.P0,
|
||||||
|
"high": Severity.P1,
|
||||||
|
"medium": Severity.P2,
|
||||||
|
"low": Severity.P3,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Incident TTL: 7 天 (秒)
|
||||||
|
INCIDENT_TTL_SECONDS = 7 * 24 * 60 * 60
|
||||||
|
|
||||||
|
|
||||||
|
async def create_incident_for_approval(
|
||||||
|
approval_id: str,
|
||||||
|
risk_level: str,
|
||||||
|
target_resource: str,
|
||||||
|
namespace: str,
|
||||||
|
alert_type: str,
|
||||||
|
message: str,
|
||||||
|
source: str = "alertmanager",
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
為 Approval 創建對應的 Incident (活躍事件同步)
|
||||||
|
|
||||||
|
設計原則:
|
||||||
|
- Approval 和 Incident 必須同時存在
|
||||||
|
- Incident 存入 Redis (Working Memory)
|
||||||
|
- 7 天 TTL 自動過期
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Incident ID
|
||||||
|
"""
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
redis_client = get_redis()
|
||||||
|
|
||||||
|
# 映射嚴重度
|
||||||
|
severity = RISK_TO_SEVERITY.get(risk_level.lower(), Severity.P2)
|
||||||
|
|
||||||
|
# 建立 Signal (原始告警)
|
||||||
|
signal = Signal(
|
||||||
|
alert_name=alert_type,
|
||||||
|
severity=severity,
|
||||||
|
source=source,
|
||||||
|
fired_at=datetime.now(UTC),
|
||||||
|
labels={"namespace": namespace, "resource": target_resource},
|
||||||
|
annotations={"message": message},
|
||||||
|
)
|
||||||
|
|
||||||
|
# 建立 Incident
|
||||||
|
incident = Incident(
|
||||||
|
status=IncidentStatus.INVESTIGATING,
|
||||||
|
severity=severity,
|
||||||
|
signals=[signal],
|
||||||
|
affected_services=[target_resource],
|
||||||
|
proposal_ids=[UUID(approval_id)],
|
||||||
|
)
|
||||||
|
|
||||||
|
# 存入 Redis (Working Memory)
|
||||||
|
key = f"incident:{incident.incident_id}"
|
||||||
|
await redis_client.set(
|
||||||
|
key,
|
||||||
|
incident.model_dump_json(),
|
||||||
|
ex=INCIDENT_TTL_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"incident_created_for_approval",
|
||||||
|
incident_id=incident.incident_id,
|
||||||
|
approval_id=approval_id,
|
||||||
|
severity=severity.value,
|
||||||
|
target=target_resource,
|
||||||
|
)
|
||||||
|
|
||||||
|
return incident.incident_id
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Phase 5: Telegram 背景推送任務 (非阻塞)
|
# Phase 5: Telegram 背景推送任務 (非阻塞)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -1236,6 +1318,19 @@ async def alertmanager_webhook(
|
|||||||
fingerprint=fingerprint,
|
fingerprint=fingerprint,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ================================================================
|
||||||
|
# Incident-Approval 同步 (鐵律: 必須同時創建)
|
||||||
|
# ================================================================
|
||||||
|
await create_incident_for_approval(
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
risk_level=risk_level.value,
|
||||||
|
target_resource=target_resource,
|
||||||
|
namespace=namespace,
|
||||||
|
alert_type=alert_type,
|
||||||
|
message=message,
|
||||||
|
source="alertmanager",
|
||||||
|
)
|
||||||
|
|
||||||
root_cause = analysis_result.description or message
|
root_cause = analysis_result.description or message
|
||||||
estimated_downtime = blast.estimated_downtime if blast else "~30s"
|
estimated_downtime = blast.estimated_downtime if blast else "~30s"
|
||||||
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
|
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
|
||||||
@@ -1294,6 +1389,19 @@ async def alertmanager_webhook(
|
|||||||
fingerprint=fingerprint,
|
fingerprint=fingerprint,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ================================================================
|
||||||
|
# Incident-Approval 同步 (鐵律: 即使 LLM 失敗也必須創建)
|
||||||
|
# ================================================================
|
||||||
|
await create_incident_for_approval(
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
risk_level="medium",
|
||||||
|
target_resource=target_resource,
|
||||||
|
namespace=namespace,
|
||||||
|
alert_type=alert_type,
|
||||||
|
message=message,
|
||||||
|
source="alertmanager",
|
||||||
|
)
|
||||||
|
|
||||||
background_tasks.add_task(
|
background_tasks.add_task(
|
||||||
_push_to_telegram_background,
|
_push_to_telegram_background,
|
||||||
approval_id=str(approval.id),
|
approval_id=str(approval.id),
|
||||||
|
|||||||
Reference in New Issue
Block a user