fix(awooop): record grouped alert events
This commit is contained in:
@@ -9,11 +9,13 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116
|
||||
from fastapi import APIRouter
|
||||
|
||||
from src.api.v1.platform.contracts import router as contracts_router
|
||||
from src.api.v1.platform.events import router as events_router
|
||||
from src.api.v1.platform.operator_runs import router as operator_runs_router
|
||||
from src.api.v1.platform.runs import router as runs_router
|
||||
from src.api.v1.platform.tenants import router as tenants_router
|
||||
|
||||
router = APIRouter()
|
||||
router.include_router(events_router)
|
||||
# 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的
|
||||
# `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成
|
||||
# run_id,造成前端 Run 監控頁 HTTP 422。
|
||||
|
||||
58
apps/api/src/api/v1/platform/events.py
Normal file
58
apps/api/src/api/v1/platform/events.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
AwoooP Operator Console — Channel Events API
|
||||
============================================
|
||||
提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.services.platform_operator_service import list_recent_channel_events
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ChannelEventItem(BaseModel):
|
||||
event_id: UUID
|
||||
project_id: str
|
||||
channel_type: str
|
||||
provider_event_id: str
|
||||
channel_chat_id: str | None
|
||||
content_preview: str | None
|
||||
is_duplicate: bool
|
||||
received_at: datetime
|
||||
|
||||
|
||||
class RecentEventsResponse(BaseModel):
|
||||
events: list[ChannelEventItem]
|
||||
total: int
|
||||
limit: int
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/recent",
|
||||
response_model=RecentEventsResponse,
|
||||
summary="列出最近 Channel Events",
|
||||
description=(
|
||||
"返回 awooop_conversation_event 最近事件。"
|
||||
"可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。"
|
||||
),
|
||||
)
|
||||
async def list_recent_events(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
channel_type: str | None = Query(None, description="通道類型(可選)"),
|
||||
provider_prefix: str | None = Query(None, description="provider_event_id 前綴(可選)"),
|
||||
limit: int = Query(20, ge=1, le=100, description="最多返回筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_recent_channel_events(
|
||||
project_id=project_id,
|
||||
channel_type=channel_type,
|
||||
provider_prefix=provider_prefix,
|
||||
limit=limit,
|
||||
)
|
||||
@@ -55,6 +55,7 @@ from src.services.alertmanager_llm_guard import (
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.auto_approve import get_auto_approve_policy
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
from src.services.channel_hub import record_grouped_alert_event
|
||||
|
||||
# Phase 15.2: Trace Context (moved to SignalProducerService)
|
||||
# get_trace_context 已移至 Service 層
|
||||
@@ -2322,6 +2323,19 @@ async def alertmanager_webhook(
|
||||
parent_fingerprint=grouping_result.parent_fingerprint,
|
||||
reason="Alert storm suppressed — child alert within 5-min window",
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_grouped_alert_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=grouping_result.group_key,
|
||||
count=grouping_result.count,
|
||||
parent_fingerprint=grouping_result.parent_fingerprint,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message=(
|
||||
|
||||
@@ -128,6 +128,116 @@ async def mirror_inbound_event(
|
||||
return event_id
|
||||
|
||||
|
||||
def build_grouped_alert_provider_event_id(alert_id: str, fingerprint: str) -> str:
|
||||
"""建立 grouped child alert 的冪等 provider_event_id。"""
|
||||
safe_alert_id = str(alert_id).strip() or "unknown"
|
||||
safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint"
|
||||
return f"alert-group:{safe_alert_id}:{safe_fingerprint}"
|
||||
|
||||
|
||||
def format_grouped_alert_event_content(
|
||||
*,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
group_key: str,
|
||||
count: int,
|
||||
parent_fingerprint: str | None,
|
||||
fingerprint: str,
|
||||
) -> str:
|
||||
"""格式化只落 AwoooP、不發 Telegram 的告警收斂事件摘要。"""
|
||||
parent = parent_fingerprint or "-"
|
||||
target = target_resource or "-"
|
||||
ns = namespace or "default"
|
||||
return "\n".join(
|
||||
[
|
||||
"告警已收斂,不發 Telegram",
|
||||
f"Alert ID: {alert_id}",
|
||||
f"Alert: {alertname}",
|
||||
f"Severity: {severity}",
|
||||
f"Namespace: {ns}",
|
||||
f"Target: {target}",
|
||||
f"Group: {group_key}",
|
||||
f"Group Count: {count}",
|
||||
f"Parent Fingerprint: {parent}",
|
||||
f"Child Fingerprint: {fingerprint}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
async def record_grouped_alert_event(
|
||||
*,
|
||||
project_id: str,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
group_key: str,
|
||||
count: int,
|
||||
parent_fingerprint: str | None,
|
||||
fingerprint: str,
|
||||
) -> UUID | None:
|
||||
"""
|
||||
將被 AlertGroupingService 收斂的子告警落到 AwoooP conversation_event。
|
||||
|
||||
這條路徑刻意不發 Telegram,只保留 operator-facing 脈絡:
|
||||
- 群組不洗版
|
||||
- Console 仍能看到同組告警正在持續發生
|
||||
- DB 失敗 fail-open,不影響 Alertmanager webhook ACK
|
||||
"""
|
||||
try:
|
||||
from src.db.base import get_db_context
|
||||
|
||||
provider_event_id = build_grouped_alert_provider_event_id(alert_id, fingerprint)
|
||||
content = format_grouped_alert_event_content(
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
parent_fingerprint=parent_fingerprint,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
|
||||
async with get_db_context(project_id) as db:
|
||||
event_id = await mirror_inbound_event(
|
||||
db,
|
||||
project_id=project_id,
|
||||
channel_type="internal",
|
||||
provider_event_id=provider_event_id,
|
||||
platform_subject_id="alertmanager",
|
||||
channel_user_id="alertmanager",
|
||||
channel_chat_id=f"alert-group:{group_key}",
|
||||
content_type="text",
|
||||
raw_content=content,
|
||||
provider_ts=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"grouped_alert_event_recorded",
|
||||
project_id=project_id,
|
||||
alert_id=alert_id,
|
||||
event_id=str(event_id),
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
)
|
||||
return event_id
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"grouped_alert_event_record_failed",
|
||||
project_id=project_id,
|
||||
alert_id=alert_id,
|
||||
group_key=group_key,
|
||||
error=str(exc),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 出站訊息記錄
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -18,6 +18,7 @@ from sqlalchemy import func, select
|
||||
|
||||
from src.db.awooop_models import (
|
||||
AwoooPContractRevision,
|
||||
AwoooPConversationEvent,
|
||||
AwoooPProject,
|
||||
AwoooPRunState,
|
||||
)
|
||||
@@ -31,6 +32,7 @@ logger = structlog.get_logger(__name__)
|
||||
_MAX_CONTRACTS = 200
|
||||
_DEFAULT_PER_PAGE = 50
|
||||
_MAX_PER_PAGE = 200
|
||||
_MAX_EVENTS = 100
|
||||
|
||||
# =============================================================================
|
||||
# Tenants
|
||||
@@ -147,6 +149,54 @@ async def list_runs(
|
||||
return {"runs": runs, "total": total, "page": page, "per_page": per_page}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Channel Events
|
||||
# =============================================================================
|
||||
|
||||
async def list_recent_channel_events(
|
||||
*,
|
||||
project_id: str | None,
|
||||
channel_type: str | None,
|
||||
provider_prefix: str | None,
|
||||
limit: int,
|
||||
) -> dict[str, Any]:
|
||||
"""列出最近 channel events,供 Operator Console 顯示收斂/鏡像脈絡。"""
|
||||
safe_limit = max(1, min(limit, _MAX_EVENTS))
|
||||
|
||||
async with get_db_context("awoooi") as db:
|
||||
stmt = select(AwoooPConversationEvent).order_by(
|
||||
AwoooPConversationEvent.received_at.desc()
|
||||
)
|
||||
if project_id is not None:
|
||||
stmt = stmt.where(AwoooPConversationEvent.project_id == project_id)
|
||||
if channel_type is not None:
|
||||
stmt = stmt.where(AwoooPConversationEvent.channel_type == channel_type)
|
||||
if provider_prefix is not None:
|
||||
stmt = stmt.where(
|
||||
AwoooPConversationEvent.provider_event_id.like(
|
||||
f"{provider_prefix}%"
|
||||
)
|
||||
)
|
||||
|
||||
result = await db.execute(stmt.limit(safe_limit))
|
||||
rows = list(result.scalars().all())
|
||||
|
||||
events = [
|
||||
{
|
||||
"event_id": r.event_id,
|
||||
"project_id": r.project_id,
|
||||
"channel_type": r.channel_type,
|
||||
"provider_event_id": r.provider_event_id,
|
||||
"channel_chat_id": r.channel_chat_id,
|
||||
"content_preview": r.content_preview,
|
||||
"is_duplicate": r.is_duplicate,
|
||||
"received_at": r.received_at,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
return {"events": events, "total": len(events), "limit": safe_limit}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Approvals
|
||||
# =============================================================================
|
||||
|
||||
36
apps/api/tests/test_channel_hub_grouped_alert_events.py
Normal file
36
apps/api/tests/test_channel_hub_grouped_alert_events.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from src.services.channel_hub import (
|
||||
build_grouped_alert_provider_event_id,
|
||||
format_grouped_alert_event_content,
|
||||
)
|
||||
|
||||
|
||||
def test_build_grouped_alert_provider_event_id_is_deterministic() -> None:
|
||||
event_id = build_grouped_alert_provider_event_id(
|
||||
"INC-20260507-ABCD12",
|
||||
"1234567890abcdef" * 4,
|
||||
)
|
||||
|
||||
assert event_id == "alert-group:INC-20260507-ABCD12:1234567890abcdef1234567890abcdef"
|
||||
assert len(event_id) < 256
|
||||
|
||||
|
||||
def test_format_grouped_alert_event_content_keeps_operator_context() -> None:
|
||||
content = format_grouped_alert_event_content(
|
||||
alert_id="INC-20260507-ABCD12",
|
||||
alertname="DockerContainerRestartSpike",
|
||||
severity="critical",
|
||||
namespace="default",
|
||||
target_resource="sentry-self-hosted-events-consumer-1",
|
||||
group_key="DockerContainerRestartSpike:default",
|
||||
count=4,
|
||||
parent_fingerprint="parent-fp",
|
||||
fingerprint="child-fp",
|
||||
)
|
||||
|
||||
assert "告警已收斂,不發 Telegram" in content
|
||||
assert "Alert: DockerContainerRestartSpike" in content
|
||||
assert "Target: sentry-self-hosted-events-consumer-1" in content
|
||||
assert "Group Count: 4" in content
|
||||
assert "Parent Fingerprint: parent-fp" in content
|
||||
@@ -13,3 +13,13 @@ def test_runs_list_route_is_registered_before_dynamic_run_id() -> None:
|
||||
assert "/runs/list" in paths
|
||||
assert "/runs/{run_id}" in paths
|
||||
assert paths.index("/runs/list") < paths.index("/runs/{run_id}")
|
||||
|
||||
|
||||
def test_recent_events_route_is_registered() -> None:
|
||||
paths = [
|
||||
route.path
|
||||
for route in router.routes
|
||||
if "GET" in getattr(route, "methods", set())
|
||||
]
|
||||
|
||||
assert "/events/recent" in paths
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import { useState, useEffect, useCallback, useMemo, useRef } from "react";
|
||||
import {
|
||||
Activity,
|
||||
BellOff,
|
||||
RefreshCw,
|
||||
AlertCircle,
|
||||
Filter,
|
||||
@@ -62,6 +63,23 @@ interface RunsResponse {
|
||||
per_page: number;
|
||||
}
|
||||
|
||||
interface PlatformEvent {
|
||||
event_id: string;
|
||||
project_id: string;
|
||||
channel_type: string;
|
||||
provider_event_id: string;
|
||||
channel_chat_id?: string | null;
|
||||
content_preview?: string | null;
|
||||
is_duplicate: boolean;
|
||||
received_at: string;
|
||||
}
|
||||
|
||||
interface RecentEventsResponse {
|
||||
events?: PlatformEvent[];
|
||||
total: number;
|
||||
limit: number;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// 常數
|
||||
// =============================================================================
|
||||
@@ -293,12 +311,70 @@ function RunRow({ run }: { run: Run }) {
|
||||
);
|
||||
}
|
||||
|
||||
function GroupedAlertEventsPanel({ events }: { events: PlatformEvent[] }) {
|
||||
return (
|
||||
<section className="border border-[#e0ddd4] bg-white">
|
||||
<div className="flex flex-wrap items-center justify-between gap-3 border-b border-[#e0ddd4] bg-[#faf9f3] px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<BellOff className="h-4 w-4 text-[#8a5a08]" aria-hidden="true" />
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-[#141413]">最近告警收斂</h3>
|
||||
<p className="text-xs text-[#77736a]">
|
||||
子告警不再洗 Telegram,改保留在 AwoooP 事件流
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<span className="border border-[#d9b36f] bg-[#fff7e8] px-2 py-0.5 text-xs font-semibold text-[#8a5a08]">
|
||||
{events.length} 筆
|
||||
</span>
|
||||
</div>
|
||||
{events.length === 0 ? (
|
||||
<div className="px-4 py-4 text-sm text-[#5f5b52]">
|
||||
目前沒有近期收斂事件。若同組告警再次觸發,第二筆起會出現在這裡。
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid gap-px bg-[#eee9dd] md:grid-cols-2 xl:grid-cols-3">
|
||||
{events.slice(0, 6).map((event) => {
|
||||
const receivedAt = event.received_at
|
||||
? new Date(event.received_at).toLocaleTimeString("zh-TW", {
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
})
|
||||
: "--";
|
||||
return (
|
||||
<article key={event.event_id} className="bg-white px-4 py-3">
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="min-w-0">
|
||||
<p className="truncate font-mono text-xs font-semibold text-[#141413]">
|
||||
{event.provider_event_id.replace("alert-group:", "")}
|
||||
</p>
|
||||
<p className="mt-1 text-xs text-[#77736a]">
|
||||
{event.project_id} · {receivedAt}
|
||||
</p>
|
||||
</div>
|
||||
<span className="shrink-0 border border-[#d8d3c7] bg-[#faf9f3] px-2 py-0.5 text-xs text-[#5f5b52]">
|
||||
internal
|
||||
</span>
|
||||
</div>
|
||||
<p className="mt-3 line-clamp-3 whitespace-pre-line text-xs leading-5 text-[#5f5b52]">
|
||||
{event.content_preview || "無摘要"}
|
||||
</p>
|
||||
</article>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Main Component
|
||||
// =============================================================================
|
||||
|
||||
export default function RunsPage() {
|
||||
const [runs, setRuns] = useState<Run[]>([]);
|
||||
const [groupedEvents, setGroupedEvents] = useState<PlatformEvent[]>([]);
|
||||
const [tenants, setTenants] = useState<Tenant[]>([]);
|
||||
const [total, setTotal] = useState(0);
|
||||
const [loading, setLoading] = useState(true);
|
||||
@@ -337,6 +413,21 @@ export default function RunsPage() {
|
||||
const rows = Array.isArray(data.runs) ? data.runs : data.items;
|
||||
setRuns(Array.isArray(rows) ? rows : []);
|
||||
setTotal(data.total ?? 0);
|
||||
|
||||
const eventParams = new URLSearchParams();
|
||||
eventParams.set("channel_type", "internal");
|
||||
eventParams.set("provider_prefix", "alert-group");
|
||||
eventParams.set("limit", "6");
|
||||
if (projectFilter) eventParams.set("project_id", projectFilter);
|
||||
|
||||
const eventsRes = await fetch(
|
||||
`${API_BASE}/api/v1/platform/events/recent?${eventParams.toString()}`
|
||||
);
|
||||
if (eventsRes.ok) {
|
||||
const eventsData: RecentEventsResponse = await eventsRes.json();
|
||||
setGroupedEvents(Array.isArray(eventsData.events) ? eventsData.events : []);
|
||||
}
|
||||
|
||||
setLastRefresh(new Date());
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "載入失敗");
|
||||
@@ -427,6 +518,8 @@ export default function RunsPage() {
|
||||
})}
|
||||
</section>
|
||||
|
||||
<GroupedAlertEventsPanel events={groupedEvents} />
|
||||
|
||||
{/* Filters */}
|
||||
<div className="flex flex-wrap items-center gap-3 border border-[#e0ddd4] bg-white p-4">
|
||||
<Filter className="w-4 h-4 text-muted-foreground flex-shrink-0" aria-hidden="true" />
|
||||
|
||||
Reference in New Issue
Block a user