fix(api): Incident-Approval 同步 (活躍事件修復)

問題: Alertmanager webhook 只創建 Approval,沒有創建對應 Incident
      導致「活躍事件」顯示 0 筆,但右側有待簽核卡片

修復:
- 新增 create_incident_for_approval() 函數
- Approval 創建後同步創建 Incident
- 存入 Redis (incident:INC-*) 7 天 TTL
- 支援 LLM 成功路徑和 fallback 路徑

遵循 feedback_incident_approval_sync.md 鐵律

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-03-24 23:43:19 +08:00
parent 2bb76433f1
commit b6459819e2

View File

@@ -42,6 +42,7 @@ from src.models.approval import (
DryRunCheck,
RiskLevel,
)
from src.models.incident import Incident, IncidentStatus, Severity, Signal
from src.services.approval_db import get_approval_service
# Phase 5: OpenClaw AI Engine
@@ -54,6 +55,87 @@ router = APIRouter(prefix="/webhooks", tags=["Webhooks"])
logger = get_logger("awoooi.webhooks")
# =============================================================================
# Incident-Approval 同步 (feedback_incident_approval_sync.md 鐵律)
# =============================================================================
# 風險等級 → 事件嚴重度映射
RISK_TO_SEVERITY = {
"critical": Severity.P0,
"high": Severity.P1,
"medium": Severity.P2,
"low": Severity.P3,
}
# Incident TTL: 7 天 (秒)
INCIDENT_TTL_SECONDS = 7 * 24 * 60 * 60
async def create_incident_for_approval(
approval_id: str,
risk_level: str,
target_resource: str,
namespace: str,
alert_type: str,
message: str,
source: str = "alertmanager",
) -> str:
"""
為 Approval 創建對應的 Incident (活躍事件同步)
設計原則:
- Approval 和 Incident 必須同時存在
- Incident 存入 Redis (Working Memory)
- 7 天 TTL 自動過期
Returns:
str: Incident ID
"""
from uuid import UUID
redis_client = get_redis()
# 映射嚴重度
severity = RISK_TO_SEVERITY.get(risk_level.lower(), Severity.P2)
# 建立 Signal (原始告警)
signal = Signal(
alert_name=alert_type,
severity=severity,
source=source,
fired_at=datetime.now(UTC),
labels={"namespace": namespace, "resource": target_resource},
annotations={"message": message},
)
# 建立 Incident
incident = Incident(
status=IncidentStatus.INVESTIGATING,
severity=severity,
signals=[signal],
affected_services=[target_resource],
proposal_ids=[UUID(approval_id)],
)
# 存入 Redis (Working Memory)
key = f"incident:{incident.incident_id}"
await redis_client.set(
key,
incident.model_dump_json(),
ex=INCIDENT_TTL_SECONDS,
)
logger.info(
"incident_created_for_approval",
incident_id=incident.incident_id,
approval_id=approval_id,
severity=severity.value,
target=target_resource,
)
return incident.incident_id
# =============================================================================
# Phase 5: Telegram 背景推送任務 (非阻塞)
# =============================================================================
@@ -1236,6 +1318,19 @@ async def alertmanager_webhook(
fingerprint=fingerprint,
)
# ================================================================
# Incident-Approval 同步 (鐵律: 必須同時創建)
# ================================================================
await create_incident_for_approval(
approval_id=str(approval.id),
risk_level=risk_level.value,
target_resource=target_resource,
namespace=namespace,
alert_type=alert_type,
message=message,
source="alertmanager",
)
root_cause = analysis_result.description or message
estimated_downtime = blast.estimated_downtime if blast else "~30s"
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
@@ -1294,6 +1389,19 @@ async def alertmanager_webhook(
fingerprint=fingerprint,
)
# ================================================================
# Incident-Approval 同步 (鐵律: 即使 LLM 失敗也必須創建)
# ================================================================
await create_incident_for_approval(
approval_id=str(approval.id),
risk_level="medium",
target_resource=target_resource,
namespace=namespace,
alert_type=alert_type,
message=message,
source="alertmanager",
)
background_tasks.add_task(
_push_to_telegram_background,
approval_id=str(approval.id),