From 251554c0440f0b6c0f2668dcee7780495c873c57 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 7 May 2026 01:35:09 +0800 Subject: [PATCH] fix(awooop): record grouped alert events --- apps/api/src/api/v1/platform/__init__.py | 2 + apps/api/src/api/v1/platform/events.py | 58 +++++++++ apps/api/src/api/v1/webhooks.py | 14 +++ apps/api/src/services/channel_hub.py | 110 ++++++++++++++++++ .../src/services/platform_operator_service.py | 50 ++++++++ .../test_channel_hub_grouped_alert_events.py | 36 ++++++ apps/api/tests/test_platform_router_order.py | 10 ++ .../web/src/app/[locale]/awooop/runs/page.tsx | 93 +++++++++++++++ 8 files changed, 373 insertions(+) create mode 100644 apps/api/src/api/v1/platform/events.py create mode 100644 apps/api/tests/test_channel_hub_grouped_alert_events.py diff --git a/apps/api/src/api/v1/platform/__init__.py b/apps/api/src/api/v1/platform/__init__.py index a9ff3e19..e3ca2802 100644 --- a/apps/api/src/api/v1/platform/__init__.py +++ b/apps/api/src/api/v1/platform/__init__.py @@ -9,11 +9,13 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116 from fastapi import APIRouter from src.api.v1.platform.contracts import router as contracts_router +from src.api.v1.platform.events import router as events_router from src.api.v1.platform.operator_runs import router as operator_runs_router from src.api.v1.platform.runs import router as runs_router from src.api.v1.platform.tenants import router as tenants_router router = APIRouter() +router.include_router(events_router) # 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的 # `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成 # run_id,造成前端 Run 監控頁 HTTP 422。 diff --git a/apps/api/src/api/v1/platform/events.py b/apps/api/src/api/v1/platform/events.py new file mode 100644 index 00000000..e4fd7f60 --- /dev/null +++ b/apps/api/src/api/v1/platform/events.py @@ -0,0 +1,58 @@ +""" +AwoooP Operator Console — Channel Events API +============================================ +提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。 +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any +from uuid import UUID + +from fastapi import APIRouter, Query +from pydantic import BaseModel + +from src.services.platform_operator_service import list_recent_channel_events + +router = APIRouter() + + +class ChannelEventItem(BaseModel): + event_id: UUID + project_id: str + channel_type: str + provider_event_id: str + channel_chat_id: str | None + content_preview: str | None + is_duplicate: bool + received_at: datetime + + +class RecentEventsResponse(BaseModel): + events: list[ChannelEventItem] + total: int + limit: int + + +@router.get( + "/events/recent", + response_model=RecentEventsResponse, + summary="列出最近 Channel Events", + description=( + "返回 awooop_conversation_event 最近事件。" + "可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。" + ), +) +async def list_recent_events( + project_id: str | None = Query(None, description="租戶 ID(可選)"), + channel_type: str | None = Query(None, description="通道類型(可選)"), + provider_prefix: str | None = Query(None, description="provider_event_id 前綴(可選)"), + limit: int = Query(20, ge=1, le=100, description="最多返回筆數"), +) -> dict[str, Any]: + return await list_recent_channel_events( + project_id=project_id, + channel_type=channel_type, + provider_prefix=provider_prefix, + limit=limit, + ) diff --git a/apps/api/src/api/v1/webhooks.py b/apps/api/src/api/v1/webhooks.py index d7c68ec1..b38bbc2b 100644 --- a/apps/api/src/api/v1/webhooks.py +++ b/apps/api/src/api/v1/webhooks.py @@ -55,6 +55,7 @@ from src.services.alertmanager_llm_guard import ( from src.services.approval_db import get_approval_service from src.services.auto_approve import get_auto_approve_policy from src.services.auto_repair_service import AutoRepairService +from src.services.channel_hub import record_grouped_alert_event # Phase 15.2: Trace Context (moved to SignalProducerService) # get_trace_context 已移至 Service 層 @@ -2322,6 +2323,19 @@ async def alertmanager_webhook( parent_fingerprint=grouping_result.parent_fingerprint, reason="Alert storm suppressed — child alert within 5-min window", ) + background_tasks.add_task( + record_grouped_alert_event, + project_id="awoooi", + alert_id=alert_id, + alertname=alertname, + severity=severity, + namespace=namespace, + target_resource=target_resource, + group_key=grouping_result.group_key, + count=grouping_result.count, + parent_fingerprint=grouping_result.parent_fingerprint, + fingerprint=fingerprint, + ) return AlertResponse( success=True, message=( diff --git a/apps/api/src/services/channel_hub.py b/apps/api/src/services/channel_hub.py index b3c49bfe..07f12122 100644 --- a/apps/api/src/services/channel_hub.py +++ b/apps/api/src/services/channel_hub.py @@ -128,6 +128,116 @@ async def mirror_inbound_event( return event_id +def build_grouped_alert_provider_event_id(alert_id: str, fingerprint: str) -> str: + """建立 grouped child alert 的冪等 provider_event_id。""" + safe_alert_id = str(alert_id).strip() or "unknown" + safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint" + return f"alert-group:{safe_alert_id}:{safe_fingerprint}" + + +def format_grouped_alert_event_content( + *, + alert_id: str, + alertname: str, + severity: str, + namespace: str, + target_resource: str, + group_key: str, + count: int, + parent_fingerprint: str | None, + fingerprint: str, +) -> str: + """格式化只落 AwoooP、不發 Telegram 的告警收斂事件摘要。""" + parent = parent_fingerprint or "-" + target = target_resource or "-" + ns = namespace or "default" + return "\n".join( + [ + "告警已收斂,不發 Telegram", + f"Alert ID: {alert_id}", + f"Alert: {alertname}", + f"Severity: {severity}", + f"Namespace: {ns}", + f"Target: {target}", + f"Group: {group_key}", + f"Group Count: {count}", + f"Parent Fingerprint: {parent}", + f"Child Fingerprint: {fingerprint}", + ] + ) + + +async def record_grouped_alert_event( + *, + project_id: str, + alert_id: str, + alertname: str, + severity: str, + namespace: str, + target_resource: str, + group_key: str, + count: int, + parent_fingerprint: str | None, + fingerprint: str, +) -> UUID | None: + """ + 將被 AlertGroupingService 收斂的子告警落到 AwoooP conversation_event。 + + 這條路徑刻意不發 Telegram,只保留 operator-facing 脈絡: + - 群組不洗版 + - Console 仍能看到同組告警正在持續發生 + - DB 失敗 fail-open,不影響 Alertmanager webhook ACK + """ + try: + from src.db.base import get_db_context + + provider_event_id = build_grouped_alert_provider_event_id(alert_id, fingerprint) + content = format_grouped_alert_event_content( + alert_id=alert_id, + alertname=alertname, + severity=severity, + namespace=namespace, + target_resource=target_resource, + group_key=group_key, + count=count, + parent_fingerprint=parent_fingerprint, + fingerprint=fingerprint, + ) + + async with get_db_context(project_id) as db: + event_id = await mirror_inbound_event( + db, + project_id=project_id, + channel_type="internal", + provider_event_id=provider_event_id, + platform_subject_id="alertmanager", + channel_user_id="alertmanager", + channel_chat_id=f"alert-group:{group_key}", + content_type="text", + raw_content=content, + provider_ts=datetime.now(timezone.utc), + ) + + logger.info( + "grouped_alert_event_recorded", + project_id=project_id, + alert_id=alert_id, + event_id=str(event_id), + group_key=group_key, + count=count, + ) + return event_id + except Exception as exc: + logger.warning( + "grouped_alert_event_record_failed", + project_id=project_id, + alert_id=alert_id, + group_key=group_key, + error=str(exc), + ) + return None + + # ───────────────────────────────────────────────────────────────────────────── # 出站訊息記錄 # ───────────────────────────────────────────────────────────────────────────── diff --git a/apps/api/src/services/platform_operator_service.py b/apps/api/src/services/platform_operator_service.py index b72a2d2f..704d9a9e 100644 --- a/apps/api/src/services/platform_operator_service.py +++ b/apps/api/src/services/platform_operator_service.py @@ -18,6 +18,7 @@ from sqlalchemy import func, select from src.db.awooop_models import ( AwoooPContractRevision, + AwoooPConversationEvent, AwoooPProject, AwoooPRunState, ) @@ -31,6 +32,7 @@ logger = structlog.get_logger(__name__) _MAX_CONTRACTS = 200 _DEFAULT_PER_PAGE = 50 _MAX_PER_PAGE = 200 +_MAX_EVENTS = 100 # ============================================================================= # Tenants @@ -147,6 +149,54 @@ async def list_runs( return {"runs": runs, "total": total, "page": page, "per_page": per_page} +# ============================================================================= +# Channel Events +# ============================================================================= + +async def list_recent_channel_events( + *, + project_id: str | None, + channel_type: str | None, + provider_prefix: str | None, + limit: int, +) -> dict[str, Any]: + """列出最近 channel events,供 Operator Console 顯示收斂/鏡像脈絡。""" + safe_limit = max(1, min(limit, _MAX_EVENTS)) + + async with get_db_context("awoooi") as db: + stmt = select(AwoooPConversationEvent).order_by( + AwoooPConversationEvent.received_at.desc() + ) + if project_id is not None: + stmt = stmt.where(AwoooPConversationEvent.project_id == project_id) + if channel_type is not None: + stmt = stmt.where(AwoooPConversationEvent.channel_type == channel_type) + if provider_prefix is not None: + stmt = stmt.where( + AwoooPConversationEvent.provider_event_id.like( + f"{provider_prefix}%" + ) + ) + + result = await db.execute(stmt.limit(safe_limit)) + rows = list(result.scalars().all()) + + events = [ + { + "event_id": r.event_id, + "project_id": r.project_id, + "channel_type": r.channel_type, + "provider_event_id": r.provider_event_id, + "channel_chat_id": r.channel_chat_id, + "content_preview": r.content_preview, + "is_duplicate": r.is_duplicate, + "received_at": r.received_at, + } + for r in rows + ] + return {"events": events, "total": len(events), "limit": safe_limit} + + # ============================================================================= # Approvals # ============================================================================= diff --git a/apps/api/tests/test_channel_hub_grouped_alert_events.py b/apps/api/tests/test_channel_hub_grouped_alert_events.py new file mode 100644 index 00000000..c16020d6 --- /dev/null +++ b/apps/api/tests/test_channel_hub_grouped_alert_events.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from src.services.channel_hub import ( + build_grouped_alert_provider_event_id, + format_grouped_alert_event_content, +) + + +def test_build_grouped_alert_provider_event_id_is_deterministic() -> None: + event_id = build_grouped_alert_provider_event_id( + "INC-20260507-ABCD12", + "1234567890abcdef" * 4, + ) + + assert event_id == "alert-group:INC-20260507-ABCD12:1234567890abcdef1234567890abcdef" + assert len(event_id) < 256 + + +def test_format_grouped_alert_event_content_keeps_operator_context() -> None: + content = format_grouped_alert_event_content( + alert_id="INC-20260507-ABCD12", + alertname="DockerContainerRestartSpike", + severity="critical", + namespace="default", + target_resource="sentry-self-hosted-events-consumer-1", + group_key="DockerContainerRestartSpike:default", + count=4, + parent_fingerprint="parent-fp", + fingerprint="child-fp", + ) + + assert "告警已收斂,不發 Telegram" in content + assert "Alert: DockerContainerRestartSpike" in content + assert "Target: sentry-self-hosted-events-consumer-1" in content + assert "Group Count: 4" in content + assert "Parent Fingerprint: parent-fp" in content diff --git a/apps/api/tests/test_platform_router_order.py b/apps/api/tests/test_platform_router_order.py index 815fbf5e..c9c5eeee 100644 --- a/apps/api/tests/test_platform_router_order.py +++ b/apps/api/tests/test_platform_router_order.py @@ -13,3 +13,13 @@ def test_runs_list_route_is_registered_before_dynamic_run_id() -> None: assert "/runs/list" in paths assert "/runs/{run_id}" in paths assert paths.index("/runs/list") < paths.index("/runs/{run_id}") + + +def test_recent_events_route_is_registered() -> None: + paths = [ + route.path + for route in router.routes + if "GET" in getattr(route, "methods", set()) + ] + + assert "/events/recent" in paths diff --git a/apps/web/src/app/[locale]/awooop/runs/page.tsx b/apps/web/src/app/[locale]/awooop/runs/page.tsx index 3683ac2a..949bce55 100644 --- a/apps/web/src/app/[locale]/awooop/runs/page.tsx +++ b/apps/web/src/app/[locale]/awooop/runs/page.tsx @@ -8,6 +8,7 @@ import { useState, useEffect, useCallback, useMemo, useRef } from "react"; import { Activity, + BellOff, RefreshCw, AlertCircle, Filter, @@ -62,6 +63,23 @@ interface RunsResponse { per_page: number; } +interface PlatformEvent { + event_id: string; + project_id: string; + channel_type: string; + provider_event_id: string; + channel_chat_id?: string | null; + content_preview?: string | null; + is_duplicate: boolean; + received_at: string; +} + +interface RecentEventsResponse { + events?: PlatformEvent[]; + total: number; + limit: number; +} + // ============================================================================= // 常數 // ============================================================================= @@ -293,12 +311,70 @@ function RunRow({ run }: { run: Run }) { ); } +function GroupedAlertEventsPanel({ events }: { events: PlatformEvent[] }) { + return ( +
+
+
+
+ + {events.length} 筆 + +
+ {events.length === 0 ? ( +
+ 目前沒有近期收斂事件。若同組告警再次觸發,第二筆起會出現在這裡。 +
+ ) : ( +
+ {events.slice(0, 6).map((event) => { + const receivedAt = event.received_at + ? new Date(event.received_at).toLocaleTimeString("zh-TW", { + hour: "2-digit", + minute: "2-digit", + }) + : "--"; + return ( +
+
+
+

+ {event.provider_event_id.replace("alert-group:", "")} +

+

+ {event.project_id} · {receivedAt} +

+
+ + internal + +
+

+ {event.content_preview || "無摘要"} +

+
+ ); + })} +
+ )} +
+ ); +} + // ============================================================================= // Main Component // ============================================================================= export default function RunsPage() { const [runs, setRuns] = useState([]); + const [groupedEvents, setGroupedEvents] = useState([]); const [tenants, setTenants] = useState([]); const [total, setTotal] = useState(0); const [loading, setLoading] = useState(true); @@ -337,6 +413,21 @@ export default function RunsPage() { const rows = Array.isArray(data.runs) ? data.runs : data.items; setRuns(Array.isArray(rows) ? rows : []); setTotal(data.total ?? 0); + + const eventParams = new URLSearchParams(); + eventParams.set("channel_type", "internal"); + eventParams.set("provider_prefix", "alert-group"); + eventParams.set("limit", "6"); + if (projectFilter) eventParams.set("project_id", projectFilter); + + const eventsRes = await fetch( + `${API_BASE}/api/v1/platform/events/recent?${eventParams.toString()}` + ); + if (eventsRes.ok) { + const eventsData: RecentEventsResponse = await eventsRes.json(); + setGroupedEvents(Array.isArray(eventsData.events) ? eventsData.events : []); + } + setLastRefresh(new Date()); } catch (err) { setError(err instanceof Error ? err.message : "載入失敗"); @@ -427,6 +518,8 @@ export default function RunsPage() { })} + + {/* Filters */}