fix(awooop): record grouped alert events
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 3m48s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s

This commit is contained in:
Your Name
2026-05-07 01:35:09 +08:00
parent 1a1dea00eb
commit 251554c044
8 changed files with 373 additions and 0 deletions

View File

@@ -9,11 +9,13 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116
from fastapi import APIRouter
from src.api.v1.platform.contracts import router as contracts_router
from src.api.v1.platform.events import router as events_router
from src.api.v1.platform.operator_runs import router as operator_runs_router
from src.api.v1.platform.runs import router as runs_router
from src.api.v1.platform.tenants import router as tenants_router
router = APIRouter()
router.include_router(events_router)
# 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的
# `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成
# run_id造成前端 Run 監控頁 HTTP 422。

View File

@@ -0,0 +1,58 @@
"""
AwoooP Operator Console — Channel Events API
============================================
提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。
"""
from __future__ import annotations
from datetime import datetime
from typing import Any
from uuid import UUID
from fastapi import APIRouter, Query
from pydantic import BaseModel
from src.services.platform_operator_service import list_recent_channel_events
router = APIRouter()
class ChannelEventItem(BaseModel):
event_id: UUID
project_id: str
channel_type: str
provider_event_id: str
channel_chat_id: str | None
content_preview: str | None
is_duplicate: bool
received_at: datetime
class RecentEventsResponse(BaseModel):
events: list[ChannelEventItem]
total: int
limit: int
@router.get(
"/events/recent",
response_model=RecentEventsResponse,
summary="列出最近 Channel Events",
description=(
"返回 awooop_conversation_event 最近事件。"
"可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。"
),
)
async def list_recent_events(
project_id: str | None = Query(None, description="租戶 ID可選"),
channel_type: str | None = Query(None, description="通道類型(可選)"),
provider_prefix: str | None = Query(None, description="provider_event_id 前綴(可選)"),
limit: int = Query(20, ge=1, le=100, description="最多返回筆數"),
) -> dict[str, Any]:
return await list_recent_channel_events(
project_id=project_id,
channel_type=channel_type,
provider_prefix=provider_prefix,
limit=limit,
)

View File

@@ -55,6 +55,7 @@ from src.services.alertmanager_llm_guard import (
from src.services.approval_db import get_approval_service
from src.services.auto_approve import get_auto_approve_policy
from src.services.auto_repair_service import AutoRepairService
from src.services.channel_hub import record_grouped_alert_event
# Phase 15.2: Trace Context (moved to SignalProducerService)
# get_trace_context 已移至 Service 層
@@ -2322,6 +2323,19 @@ async def alertmanager_webhook(
parent_fingerprint=grouping_result.parent_fingerprint,
reason="Alert storm suppressed — child alert within 5-min window",
)
background_tasks.add_task(
record_grouped_alert_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
group_key=grouping_result.group_key,
count=grouping_result.count,
parent_fingerprint=grouping_result.parent_fingerprint,
fingerprint=fingerprint,
)
return AlertResponse(
success=True,
message=(

View File

@@ -128,6 +128,116 @@ async def mirror_inbound_event(
return event_id
def build_grouped_alert_provider_event_id(alert_id: str, fingerprint: str) -> str:
"""建立 grouped child alert 的冪等 provider_event_id。"""
safe_alert_id = str(alert_id).strip() or "unknown"
safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint"
return f"alert-group:{safe_alert_id}:{safe_fingerprint}"
def format_grouped_alert_event_content(
*,
alert_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
group_key: str,
count: int,
parent_fingerprint: str | None,
fingerprint: str,
) -> str:
"""格式化只落 AwoooP、不發 Telegram 的告警收斂事件摘要。"""
parent = parent_fingerprint or "-"
target = target_resource or "-"
ns = namespace or "default"
return "\n".join(
[
"告警已收斂,不發 Telegram",
f"Alert ID: {alert_id}",
f"Alert: {alertname}",
f"Severity: {severity}",
f"Namespace: {ns}",
f"Target: {target}",
f"Group: {group_key}",
f"Group Count: {count}",
f"Parent Fingerprint: {parent}",
f"Child Fingerprint: {fingerprint}",
]
)
async def record_grouped_alert_event(
*,
project_id: str,
alert_id: str,
alertname: str,
severity: str,
namespace: str,
target_resource: str,
group_key: str,
count: int,
parent_fingerprint: str | None,
fingerprint: str,
) -> UUID | None:
"""
將被 AlertGroupingService 收斂的子告警落到 AwoooP conversation_event。
這條路徑刻意不發 Telegram只保留 operator-facing 脈絡:
- 群組不洗版
- Console 仍能看到同組告警正在持續發生
- DB 失敗 fail-open不影響 Alertmanager webhook ACK
"""
try:
from src.db.base import get_db_context
provider_event_id = build_grouped_alert_provider_event_id(alert_id, fingerprint)
content = format_grouped_alert_event_content(
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
group_key=group_key,
count=count,
parent_fingerprint=parent_fingerprint,
fingerprint=fingerprint,
)
async with get_db_context(project_id) as db:
event_id = await mirror_inbound_event(
db,
project_id=project_id,
channel_type="internal",
provider_event_id=provider_event_id,
platform_subject_id="alertmanager",
channel_user_id="alertmanager",
channel_chat_id=f"alert-group:{group_key}",
content_type="text",
raw_content=content,
provider_ts=datetime.now(timezone.utc),
)
logger.info(
"grouped_alert_event_recorded",
project_id=project_id,
alert_id=alert_id,
event_id=str(event_id),
group_key=group_key,
count=count,
)
return event_id
except Exception as exc:
logger.warning(
"grouped_alert_event_record_failed",
project_id=project_id,
alert_id=alert_id,
group_key=group_key,
error=str(exc),
)
return None
# ─────────────────────────────────────────────────────────────────────────────
# 出站訊息記錄
# ─────────────────────────────────────────────────────────────────────────────

View File

@@ -18,6 +18,7 @@ from sqlalchemy import func, select
from src.db.awooop_models import (
AwoooPContractRevision,
AwoooPConversationEvent,
AwoooPProject,
AwoooPRunState,
)
@@ -31,6 +32,7 @@ logger = structlog.get_logger(__name__)
_MAX_CONTRACTS = 200
_DEFAULT_PER_PAGE = 50
_MAX_PER_PAGE = 200
_MAX_EVENTS = 100
# =============================================================================
# Tenants
@@ -147,6 +149,54 @@ async def list_runs(
return {"runs": runs, "total": total, "page": page, "per_page": per_page}
# =============================================================================
# Channel Events
# =============================================================================
async def list_recent_channel_events(
*,
project_id: str | None,
channel_type: str | None,
provider_prefix: str | None,
limit: int,
) -> dict[str, Any]:
"""列出最近 channel events供 Operator Console 顯示收斂/鏡像脈絡。"""
safe_limit = max(1, min(limit, _MAX_EVENTS))
async with get_db_context("awoooi") as db:
stmt = select(AwoooPConversationEvent).order_by(
AwoooPConversationEvent.received_at.desc()
)
if project_id is not None:
stmt = stmt.where(AwoooPConversationEvent.project_id == project_id)
if channel_type is not None:
stmt = stmt.where(AwoooPConversationEvent.channel_type == channel_type)
if provider_prefix is not None:
stmt = stmt.where(
AwoooPConversationEvent.provider_event_id.like(
f"{provider_prefix}%"
)
)
result = await db.execute(stmt.limit(safe_limit))
rows = list(result.scalars().all())
events = [
{
"event_id": r.event_id,
"project_id": r.project_id,
"channel_type": r.channel_type,
"provider_event_id": r.provider_event_id,
"channel_chat_id": r.channel_chat_id,
"content_preview": r.content_preview,
"is_duplicate": r.is_duplicate,
"received_at": r.received_at,
}
for r in rows
]
return {"events": events, "total": len(events), "limit": safe_limit}
# =============================================================================
# Approvals
# =============================================================================

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from src.services.channel_hub import (
build_grouped_alert_provider_event_id,
format_grouped_alert_event_content,
)
def test_build_grouped_alert_provider_event_id_is_deterministic() -> None:
event_id = build_grouped_alert_provider_event_id(
"INC-20260507-ABCD12",
"1234567890abcdef" * 4,
)
assert event_id == "alert-group:INC-20260507-ABCD12:1234567890abcdef1234567890abcdef"
assert len(event_id) < 256
def test_format_grouped_alert_event_content_keeps_operator_context() -> None:
content = format_grouped_alert_event_content(
alert_id="INC-20260507-ABCD12",
alertname="DockerContainerRestartSpike",
severity="critical",
namespace="default",
target_resource="sentry-self-hosted-events-consumer-1",
group_key="DockerContainerRestartSpike:default",
count=4,
parent_fingerprint="parent-fp",
fingerprint="child-fp",
)
assert "告警已收斂,不發 Telegram" in content
assert "Alert: DockerContainerRestartSpike" in content
assert "Target: sentry-self-hosted-events-consumer-1" in content
assert "Group Count: 4" in content
assert "Parent Fingerprint: parent-fp" in content

View File

@@ -13,3 +13,13 @@ def test_runs_list_route_is_registered_before_dynamic_run_id() -> None:
assert "/runs/list" in paths
assert "/runs/{run_id}" in paths
assert paths.index("/runs/list") < paths.index("/runs/{run_id}")
def test_recent_events_route_is_registered() -> None:
paths = [
route.path
for route in router.routes
if "GET" in getattr(route, "methods", set())
]
assert "/events/recent" in paths

View File

@@ -8,6 +8,7 @@
import { useState, useEffect, useCallback, useMemo, useRef } from "react";
import {
Activity,
BellOff,
RefreshCw,
AlertCircle,
Filter,
@@ -62,6 +63,23 @@ interface RunsResponse {
per_page: number;
}
interface PlatformEvent {
event_id: string;
project_id: string;
channel_type: string;
provider_event_id: string;
channel_chat_id?: string | null;
content_preview?: string | null;
is_duplicate: boolean;
received_at: string;
}
interface RecentEventsResponse {
events?: PlatformEvent[];
total: number;
limit: number;
}
// =============================================================================
// 常數
// =============================================================================
@@ -293,12 +311,70 @@ function RunRow({ run }: { run: Run }) {
);
}
function GroupedAlertEventsPanel({ events }: { events: PlatformEvent[] }) {
return (
<section className="border border-[#e0ddd4] bg-white">
<div className="flex flex-wrap items-center justify-between gap-3 border-b border-[#e0ddd4] bg-[#faf9f3] px-4 py-3">
<div className="flex items-center gap-2">
<BellOff className="h-4 w-4 text-[#8a5a08]" aria-hidden="true" />
<div>
<h3 className="text-sm font-semibold text-[#141413]"></h3>
<p className="text-xs text-[#77736a]">
Telegram AwoooP
</p>
</div>
</div>
<span className="border border-[#d9b36f] bg-[#fff7e8] px-2 py-0.5 text-xs font-semibold text-[#8a5a08]">
{events.length}
</span>
</div>
{events.length === 0 ? (
<div className="px-4 py-4 text-sm text-[#5f5b52]">
</div>
) : (
<div className="grid gap-px bg-[#eee9dd] md:grid-cols-2 xl:grid-cols-3">
{events.slice(0, 6).map((event) => {
const receivedAt = event.received_at
? new Date(event.received_at).toLocaleTimeString("zh-TW", {
hour: "2-digit",
minute: "2-digit",
})
: "--";
return (
<article key={event.event_id} className="bg-white px-4 py-3">
<div className="flex items-start justify-between gap-3">
<div className="min-w-0">
<p className="truncate font-mono text-xs font-semibold text-[#141413]">
{event.provider_event_id.replace("alert-group:", "")}
</p>
<p className="mt-1 text-xs text-[#77736a]">
{event.project_id} · {receivedAt}
</p>
</div>
<span className="shrink-0 border border-[#d8d3c7] bg-[#faf9f3] px-2 py-0.5 text-xs text-[#5f5b52]">
internal
</span>
</div>
<p className="mt-3 line-clamp-3 whitespace-pre-line text-xs leading-5 text-[#5f5b52]">
{event.content_preview || "無摘要"}
</p>
</article>
);
})}
</div>
)}
</section>
);
}
// =============================================================================
// Main Component
// =============================================================================
export default function RunsPage() {
const [runs, setRuns] = useState<Run[]>([]);
const [groupedEvents, setGroupedEvents] = useState<PlatformEvent[]>([]);
const [tenants, setTenants] = useState<Tenant[]>([]);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(true);
@@ -337,6 +413,21 @@ export default function RunsPage() {
const rows = Array.isArray(data.runs) ? data.runs : data.items;
setRuns(Array.isArray(rows) ? rows : []);
setTotal(data.total ?? 0);
const eventParams = new URLSearchParams();
eventParams.set("channel_type", "internal");
eventParams.set("provider_prefix", "alert-group");
eventParams.set("limit", "6");
if (projectFilter) eventParams.set("project_id", projectFilter);
const eventsRes = await fetch(
`${API_BASE}/api/v1/platform/events/recent?${eventParams.toString()}`
);
if (eventsRes.ok) {
const eventsData: RecentEventsResponse = await eventsRes.json();
setGroupedEvents(Array.isArray(eventsData.events) ? eventsData.events : []);
}
setLastRefresh(new Date());
} catch (err) {
setError(err instanceof Error ? err.message : "載入失敗");
@@ -427,6 +518,8 @@ export default function RunsPage() {
})}
</section>
<GroupedAlertEventsPanel events={groupedEvents} />
{/* Filters */}
<div className="flex flex-wrap items-center gap-3 border border-[#e0ddd4] bg-white p-4">
<Filter className="w-4 h-4 text-muted-foreground flex-shrink-0" aria-hidden="true" />