fix(flywheel): 修補剩餘 P0/P1 缺陷
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
- CRITICAL-1: TYPE-1 path approval_id=str(alert_id) → uuid.uuid4(), 避免 UUID(approval_id) 拋 ValueError 導致所有 Heartbeat/Info 告警崩潰 - CRITICAL-2: asyncio.create_task() 結果存入 _exec_task 並加 done_callback, 防止 GC 在執行中途回收任務 - FORMAT: _push_to_telegram_background 新增 notification_type + diff_summary 參數, TYPE-4D → send_drift_card(),其他 → send_approval_card()(修正 ConfigDrift 顯示錯誤卡片) - 傳遞 notification_type 至 Alertmanager 兩個呼叫點 ADR-073 四斷點修補最終收尾 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,7 @@ Endpoints:
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import uuid
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
|
||||
@@ -303,12 +304,16 @@ async def _push_to_telegram_background(
|
||||
ai_provider: str = "",
|
||||
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
|
||||
incident_id: str = "",
|
||||
# ADR-073 Fix: 傳入 notification_type 以正確路由 TYPE-4D Config Drift 卡片
|
||||
notification_type: str = "",
|
||||
diff_summary: str = "",
|
||||
) -> None:
|
||||
"""
|
||||
背景任務: 推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)
|
||||
|
||||
使用 BackgroundTasks 執行,絕不阻塞 Webhook 回應。
|
||||
任何 Telegram API 錯誤都會被捕捉並記錄,不影響主流程。
|
||||
TYPE-4D Config Drift → send_drift_card(); 其他 → send_approval_card()
|
||||
"""
|
||||
try:
|
||||
gateway = get_telegram_gateway()
|
||||
@@ -322,13 +327,28 @@ async def _push_to_telegram_background(
|
||||
)
|
||||
return
|
||||
|
||||
# TYPE-4D: Config Drift 使用專屬卡片 (send_drift_card)
|
||||
# ADR-071-F: [查看Diff][採納變更][回滾][忽略] 四鍵格式
|
||||
if notification_type == "TYPE-4D":
|
||||
await gateway.send_drift_card(
|
||||
incident_id=incident_id,
|
||||
approval_id=approval_id,
|
||||
resource_name=resource_name[:50],
|
||||
diff_summary=diff_summary or root_cause,
|
||||
detected_at="",
|
||||
)
|
||||
logger.info(
|
||||
"telegram_push_success_type4d",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
)
|
||||
return
|
||||
|
||||
# 如果是收斂告警,在訊息中加入聚合次數
|
||||
root_cause_with_count = root_cause
|
||||
if hit_count > 1:
|
||||
root_cause_with_count = f"[x{hit_count}] {root_cause}"
|
||||
|
||||
# TODO(2026-04-05): Alertmanager 路徑透過 Approval 建立,尚無 incident_id
|
||||
# 待 Approval→Incident 關聯建立後,補傳 incident_id 以啟用 detail/reanalyze/history 按鈕
|
||||
await gateway.send_approval_card(
|
||||
approval_id=approval_id,
|
||||
risk_level=risk_level,
|
||||
@@ -930,6 +950,7 @@ async def receive_alert(
|
||||
ai_provider=ai_provider,
|
||||
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
|
||||
incident_id="", # /alerts 路徑尚無 incident,detail/reanalyze/history 按鈕不顯示
|
||||
# /alerts 路徑沒有 notification_type(非 Alertmanager 路徑),不需 TYPE-4D routing
|
||||
)
|
||||
|
||||
return AlertResponse(
|
||||
@@ -1242,7 +1263,7 @@ async def alertmanager_webhook(
|
||||
# ==========================================================================
|
||||
if notification_type == "TYPE-1":
|
||||
_info_incident_id = await create_incident_for_approval(
|
||||
approval_id=str(alert_id), # 純資訊無 approval,用 alert_id 佔位
|
||||
approval_id=str(uuid.uuid4()), # 純資訊無 approval,UUID 佔位避免 ValueError
|
||||
risk_level="low",
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
@@ -1433,6 +1454,8 @@ async def alertmanager_webhook(
|
||||
ai_provider=ai_provider,
|
||||
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
|
||||
incident_id=incident_id,
|
||||
# ADR-073: 路由 TYPE-4D → send_drift_card
|
||||
notification_type=notification_type,
|
||||
)
|
||||
|
||||
record_alert_chain_success("alertmanager")
|
||||
@@ -1498,6 +1521,8 @@ async def alertmanager_webhook(
|
||||
confidence=0.0,
|
||||
namespace=namespace,
|
||||
incident_id=fallback_incident_id,
|
||||
# ADR-073: 路由 TYPE-4D → send_drift_card
|
||||
notification_type=notification_type,
|
||||
)
|
||||
|
||||
return AlertResponse(
|
||||
|
||||
@@ -4582,7 +4582,10 @@ class TelegramGateway:
|
||||
if execution_triggered:
|
||||
import asyncio
|
||||
from src.services.approval_execution import get_execution_service
|
||||
asyncio.create_task(get_execution_service().execute_approved_action(approval))
|
||||
_exec_task = asyncio.create_task(
|
||||
get_execution_service().execute_approved_action(approval)
|
||||
)
|
||||
_exec_task.add_done_callback(lambda t: t.exception() if not t.cancelled() else None)
|
||||
logger.info(
|
||||
"telegram_approval_execution_triggered",
|
||||
approval_id=approval_id,
|
||||
|
||||
Reference in New Issue
Block a user