From b6459819e24f28d68befc1a0af14bbf8caf62638 Mon Sep 17 00:00:00 2001 From: OG T Date: Tue, 24 Mar 2026 23:43:19 +0800 Subject: [PATCH] =?UTF-8?q?fix(api):=20Incident-Approval=20=E5=90=8C?= =?UTF-8?q?=E6=AD=A5=20(=E6=B4=BB=E8=BA=8D=E4=BA=8B=E4=BB=B6=E4=BF=AE?= =?UTF-8?q?=E5=BE=A9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題: Alertmanager webhook 只創建 Approval,沒有創建對應 Incident 導致「活躍事件」顯示 0 筆,但右側有待簽核卡片 修復: - 新增 create_incident_for_approval() 函數 - Approval 創建後同步創建 Incident - 存入 Redis (incident:INC-*) 7 天 TTL - 支援 LLM 成功路徑和 fallback 路徑 遵循 feedback_incident_approval_sync.md 鐵律 Co-Authored-By: Claude Opus 4.5 --- apps/api/src/api/v1/webhooks.py | 108 ++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/apps/api/src/api/v1/webhooks.py b/apps/api/src/api/v1/webhooks.py index c8cdd65a..2410ed89 100644 --- a/apps/api/src/api/v1/webhooks.py +++ b/apps/api/src/api/v1/webhooks.py @@ -42,6 +42,7 @@ from src.models.approval import ( DryRunCheck, RiskLevel, ) +from src.models.incident import Incident, IncidentStatus, Severity, Signal from src.services.approval_db import get_approval_service # Phase 5: OpenClaw AI Engine @@ -54,6 +55,87 @@ router = APIRouter(prefix="/webhooks", tags=["Webhooks"]) logger = get_logger("awoooi.webhooks") +# ============================================================================= +# Incident-Approval 同步 (feedback_incident_approval_sync.md 鐵律) +# ============================================================================= + +# 風險等級 → 事件嚴重度映射 +RISK_TO_SEVERITY = { + "critical": Severity.P0, + "high": Severity.P1, + "medium": Severity.P2, + "low": Severity.P3, +} + +# Incident TTL: 7 天 (秒) +INCIDENT_TTL_SECONDS = 7 * 24 * 60 * 60 + + +async def create_incident_for_approval( + approval_id: str, + risk_level: str, + target_resource: str, + namespace: str, + alert_type: str, + message: str, + source: str = "alertmanager", +) -> str: + """ + 為 Approval 創建對應的 Incident (活躍事件同步) + + 設計原則: + - Approval 和 Incident 必須同時存在 + - Incident 存入 Redis (Working Memory) + - 7 天 TTL 自動過期 + + Returns: + str: Incident ID + """ + from uuid import UUID + + redis_client = get_redis() + + # 映射嚴重度 + severity = RISK_TO_SEVERITY.get(risk_level.lower(), Severity.P2) + + # 建立 Signal (原始告警) + signal = Signal( + alert_name=alert_type, + severity=severity, + source=source, + fired_at=datetime.now(UTC), + labels={"namespace": namespace, "resource": target_resource}, + annotations={"message": message}, + ) + + # 建立 Incident + incident = Incident( + status=IncidentStatus.INVESTIGATING, + severity=severity, + signals=[signal], + affected_services=[target_resource], + proposal_ids=[UUID(approval_id)], + ) + + # 存入 Redis (Working Memory) + key = f"incident:{incident.incident_id}" + await redis_client.set( + key, + incident.model_dump_json(), + ex=INCIDENT_TTL_SECONDS, + ) + + logger.info( + "incident_created_for_approval", + incident_id=incident.incident_id, + approval_id=approval_id, + severity=severity.value, + target=target_resource, + ) + + return incident.incident_id + + # ============================================================================= # Phase 5: Telegram 背景推送任務 (非阻塞) # ============================================================================= @@ -1236,6 +1318,19 @@ async def alertmanager_webhook( fingerprint=fingerprint, ) + # ================================================================ + # Incident-Approval 同步 (鐵律: 必須同時創建) + # ================================================================ + await create_incident_for_approval( + approval_id=str(approval.id), + risk_level=risk_level.value, + target_resource=target_resource, + namespace=namespace, + alert_type=alert_type, + message=message, + source="alertmanager", + ) + root_cause = analysis_result.description or message estimated_downtime = blast.estimated_downtime if blast else "~30s" primary_responsibility = analysis_result.primary_responsibility or "COLLAB" @@ -1294,6 +1389,19 @@ async def alertmanager_webhook( fingerprint=fingerprint, ) + # ================================================================ + # Incident-Approval 同步 (鐵律: 即使 LLM 失敗也必須創建) + # ================================================================ + await create_incident_for_approval( + approval_id=str(approval.id), + risk_level="medium", + target_resource=target_resource, + namespace=namespace, + alert_type=alert_type, + message=message, + source="alertmanager", + ) + background_tasks.add_task( _push_to_telegram_background, approval_id=str(approval.id),