feat(awooop): mirror alertmanager events into truth chain
All checks were successful
Code Review / ai-code-review (push) Successful in 19s
CD Pipeline / tests (push) Successful in 2m10s
CD Pipeline / build-and-deploy (push) Successful in 3m22s
CD Pipeline / post-deploy-checks (push) Successful in 1m17s

This commit is contained in:
Your Name
2026-05-13 20:16:42 +08:00
parent 21042ad0e7
commit c2d01eb6f1
5 changed files with 423 additions and 7 deletions

View File

@@ -55,7 +55,10 @@ from src.services.alertmanager_llm_guard import (
from src.services.approval_db import get_approval_service
from src.services.auto_approve import get_auto_approve_policy
from src.services.auto_repair_service import AutoRepairService
from src.services.channel_hub import record_grouped_alert_event
from src.services.channel_hub import (
record_alertmanager_event,
record_grouped_alert_event,
)
# Phase 15.2: Trace Context (moved to SignalProducerService)
# get_trace_context 已移至 Service 層
@@ -1509,6 +1512,11 @@ async def _process_new_alert_background(
try:
service = get_approval_service()
openclaw = get_openclaw()
traced_alert_labels = {
**(alert_labels or {}),
"fingerprint": fingerprint,
"alert_id": alert_id,
}
rule_response = match_rule(alert_context)
should_bypass_llm = _should_use_alertmanager_rule_first(rule_response, alert_category)
@@ -1703,7 +1711,7 @@ async def _process_new_alert_background(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert_labels,
alert_labels=traced_alert_labels,
notification_type=notification_type,
alert_category=alert_category,
)
@@ -1719,6 +1727,22 @@ async def _process_new_alert_background(
error=str(_meta_err),
)
await record_alertmanager_event(
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type=notification_type,
alert_category=alert_category,
incident_id=incident_id,
approval_id=str(approval.id),
repeat_count=1,
)
if _cs2_auto_approval is not None and _cs2_exec_success is not None:
try:
_cs2_auto_approval.incident_id = incident_id
@@ -1963,7 +1987,7 @@ async def _process_new_alert_background(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert_labels,
alert_labels=traced_alert_labels,
notification_type=notification_type,
alert_category=alert_category,
)
@@ -1979,6 +2003,22 @@ async def _process_new_alert_background(
error=str(_meta_err),
)
await record_alertmanager_event(
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type=notification_type,
alert_category=alert_category,
incident_id=incident_id,
approval_id=str(approval.id),
repeat_count=1,
)
if _cs3_auto_approval is not None and _cs3_exec_success is not None:
try:
_cs3_auto_approval.incident_id = incident_id
@@ -2127,7 +2167,7 @@ async def _process_new_alert_background(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert_labels,
alert_labels=traced_alert_labels,
notification_type=notification_type,
alert_category=alert_category,
)
@@ -2143,6 +2183,22 @@ async def _process_new_alert_background(
error=str(_meta_err),
)
await record_alertmanager_event(
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type=notification_type,
alert_category=alert_category,
incident_id=fallback_incident_id,
approval_id=str(approval.id),
repeat_count=1,
)
await _push_to_telegram_background(
approval_id=str(approval.id),
risk_level="medium",
@@ -2389,6 +2445,19 @@ async def alertmanager_webhook(
target=target_resource,
fingerprint=fingerprint,
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="received",
notification_type=notification_type,
alert_category=alert_category,
)
# ==========================================================================
# ADR-076: 告警聚合引擎 — 5 分鐘滑動視窗,防止告警風暴
@@ -2471,6 +2540,23 @@ async def alertmanager_webhook(
hit_count=updated_approval.hit_count,
reason="Converged alert - Telegram already sent for this fingerprint",
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="converged",
notification_type=notification_type,
alert_category=alert_category,
incident_id=getattr(updated_approval, "incident_id", None),
approval_id=str(updated_approval.id),
repeat_count=updated_approval.hit_count,
is_duplicate=True,
)
return AlertResponse(
success=True,
@@ -2498,10 +2584,24 @@ async def alertmanager_webhook(
message=message,
source="alertmanager",
alertname=alertname,
alert_labels=alert.labels,
alert_labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
notification_type="TYPE-1",
alert_category=alert_category,
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="incident_linked",
notification_type="TYPE-1",
alert_category=alert_category,
incident_id=_info_incident_id,
)
# 2026-04-15 ogt: TYPE-1 純資訊告警建立後立即關閉
# 設計原則: backup/heartbeat/info 告警無需追蹤狀態,通知即完成
# 防止 incidents 表無限累積 INVESTIGATING 記錄ADR-073 漏洞修補)
@@ -2533,6 +2633,20 @@ async def alertmanager_webhook(
fingerprint=fingerprint,
ttl_seconds=ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
)
background_tasks.add_task(
record_alertmanager_event,
project_id="awoooi",
alert_id=alert_id,
alertname=alertname,
severity=severity,
namespace=namespace,
target_resource=target_resource,
fingerprint=fingerprint,
stage="llm_inflight_suppressed",
notification_type=notification_type,
alert_category=alert_category,
is_duplicate=True,
)
return AlertResponse(
success=True,
message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫",