fix(flywheel): 修補剩餘 P0/P1 缺陷
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled

- CRITICAL-1: TYPE-1 path approval_id=str(alert_id) → uuid.uuid4(),
  避免 UUID(approval_id) 拋 ValueError 導致所有 Heartbeat/Info 告警崩潰
- CRITICAL-2: asyncio.create_task() 結果存入 _exec_task 並加 done_callback,
  防止 GC 在執行中途回收任務
- FORMAT: _push_to_telegram_background 新增 notification_type + diff_summary 參數,
  TYPE-4D → send_drift_card(),其他 → send_approval_card()(修正 ConfigDrift 顯示錯誤卡片)
- 傳遞 notification_type 至 Alertmanager 兩個呼叫點

ADR-073 四斷點修補最終收尾

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-12 17:14:46 +08:00
parent cce55d560d
commit 99b489ca63
2 changed files with 32 additions and 4 deletions

View File

@@ -24,6 +24,7 @@ Endpoints:
import hashlib
import hmac
import uuid
from typing import Literal
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
@@ -303,12 +304,16 @@ async def _push_to_telegram_background(
ai_provider: str = "",
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
incident_id: str = "",
# ADR-073 Fix: 傳入 notification_type 以正確路由 TYPE-4D Config Drift 卡片
notification_type: str = "",
diff_summary: str = "",
) -> None:
"""
背景任務: 推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)
使用 BackgroundTasks 執行,絕不阻塞 Webhook 回應。
任何 Telegram API 錯誤都會被捕捉並記錄,不影響主流程。
TYPE-4D Config Drift → send_drift_card(); 其他 → send_approval_card()
"""
try:
gateway = get_telegram_gateway()
@@ -322,13 +327,28 @@ async def _push_to_telegram_background(
)
return
# TYPE-4D: Config Drift 使用專屬卡片 (send_drift_card)
# ADR-071-F: [查看Diff][採納變更][回滾][忽略] 四鍵格式
if notification_type == "TYPE-4D":
await gateway.send_drift_card(
incident_id=incident_id,
approval_id=approval_id,
resource_name=resource_name[:50],
diff_summary=diff_summary or root_cause,
detected_at="",
)
logger.info(
"telegram_push_success_type4d",
approval_id=approval_id,
incident_id=incident_id,
)
return
# 如果是收斂告警,在訊息中加入聚合次數
root_cause_with_count = root_cause
if hit_count > 1:
root_cause_with_count = f"[x{hit_count}] {root_cause}"
# TODO(2026-04-05): Alertmanager 路徑透過 Approval 建立,尚無 incident_id
# 待 Approval→Incident 關聯建立後,補傳 incident_id 以啟用 detail/reanalyze/history 按鈕
await gateway.send_approval_card(
approval_id=approval_id,
risk_level=risk_level,
@@ -930,6 +950,7 @@ async def receive_alert(
ai_provider=ai_provider,
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
incident_id="", # /alerts 路徑尚無 incidentdetail/reanalyze/history 按鈕不顯示
# /alerts 路徑沒有 notification_type非 Alertmanager 路徑),不需 TYPE-4D routing
)
return AlertResponse(
@@ -1242,7 +1263,7 @@ async def alertmanager_webhook(
# ==========================================================================
if notification_type == "TYPE-1":
_info_incident_id = await create_incident_for_approval(
approval_id=str(alert_id), # 純資訊無 approval用 alert_id 佔位
approval_id=str(uuid.uuid4()), # 純資訊無 approvalUUID 佔位避免 ValueError
risk_level="low",
target_resource=target_resource,
namespace=namespace,
@@ -1433,6 +1454,8 @@ async def alertmanager_webhook(
ai_provider=ai_provider,
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
incident_id=incident_id,
# ADR-073: 路由 TYPE-4D → send_drift_card
notification_type=notification_type,
)
record_alert_chain_success("alertmanager")
@@ -1498,6 +1521,8 @@ async def alertmanager_webhook(
confidence=0.0,
namespace=namespace,
incident_id=fallback_incident_id,
# ADR-073: 路由 TYPE-4D → send_drift_card
notification_type=notification_type,
)
return AlertResponse(

View File

@@ -4582,7 +4582,10 @@ class TelegramGateway:
if execution_triggered:
import asyncio
from src.services.approval_execution import get_execution_service
asyncio.create_task(get_execution_service().execute_approved_action(approval))
_exec_task = asyncio.create_task(
get_execution_service().execute_approved_action(approval)
)
_exec_task.add_done_callback(lambda t: t.exception() if not t.cancelled() else None)
logger.info(
"telegram_approval_execution_triggered",
approval_id=approval_id,