feat(adr075): Step1-3 classify補丁+新按鈕+TYPE-5S/6B/7E格式函數
Step-1 incident_service.py classify_alert_early(): - 新增 secops (TYPE-5S): UnauthorizedSSH/KubeAudit/CVE/WAFAttack/PodAbnormal - 新增 business (TYPE-6B): AITokenCost/GeminiAPIError/SLOBurn/MomoScraper - 新增 flywheel_health MCPProvider/OllamaDown/NemotronDown 前綴 - ssl_cert: 依 days_remaining 決定 TYPE-1(≥14d) vs TYPE-3(<14d) Step-2 telegram_gateway.py _build_inline_keyboard(): - 新增 secops: [隔離] [封鎖IP] [驅逐] [確認授權] - 新增 business: [暫停1h] [查SignOz] [忽略] - 新增 flywheel_health: [觸發診斷] [飛輪面板] [靜默] Step-3 telegram_gateway.py 新增格式化函數 (Tier 2): - send_secops_card() — TYPE-5S 防禦按鈕+nonce - send_business_alert() — TYPE-6B 業務損失速率 - send_escalation_card() — TYPE-7E P0/P1 升級,發 DM+群組 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -138,11 +138,28 @@ def classify_alert_early(alertname: str, severity: str, labels: dict | None = No
|
||||
):
|
||||
return "alertchain_health", "TYPE-8M"
|
||||
|
||||
# 3. 飛輪/AI 系統健康(優先於 severity 判斷)
|
||||
if alertname in ("AutoRepairLowSuccessRate", "PermanentFixRequired") or alertname.startswith("Flywheel"):
|
||||
# 3. 資安告警(高優先,防止被 severity/prefix 規則覆蓋)
|
||||
# ADR-075 TYPE-5S (2026-04-12 ogt)
|
||||
if any(alertname.startswith(p) for p in (
|
||||
"UnauthorizedSSH", "KubeAudit", "CVECritical", "WAFAttack",
|
||||
"PodAbnormal", "SecurityBreach",
|
||||
)):
|
||||
return "secops", "TYPE-5S"
|
||||
|
||||
# 4. 飛輪/AI 系統健康(優先於 severity 判斷)
|
||||
if alertname in ("AutoRepairLowSuccessRate", "PermanentFixRequired") or any(
|
||||
alertname.startswith(p) for p in ("Flywheel", "MCPProvider", "OllamaDown", "NemotronDown")
|
||||
):
|
||||
return "flywheel_health", "TYPE-8M"
|
||||
|
||||
# 4. 純資訊
|
||||
# 4a. 業務/FinOps 告警(ADR-075 TYPE-6B)
|
||||
if any(alertname.startswith(p) for p in (
|
||||
"AITokenCost", "GeminiAPIError", "SLOBurn", "APIErrorBudget",
|
||||
"MomoScraper", "ScraperSuccess",
|
||||
)):
|
||||
return "business", "TYPE-6B"
|
||||
|
||||
# 5. 純資訊
|
||||
if severity in ("info", "none"):
|
||||
return "info", "TYPE-1"
|
||||
|
||||
@@ -185,8 +202,10 @@ def classify_alert_early(alertname: str, severity: str, labels: dict | None = No
|
||||
return "external_site", "TYPE-3"
|
||||
|
||||
# 13. SSL 憑證(ADR-075 修正:從 general 分離)
|
||||
# ≥14 天→TYPE-1(提醒,無需審核);<14 天→TYPE-3(緊急審核)
|
||||
if alertname.startswith(("ExternalSiteSSL", "TLSCert")):
|
||||
return "ssl_cert", "TYPE-3"
|
||||
days = int((labels or {}).get("days_remaining", 0)) if labels else 0
|
||||
return "ssl_cert", ("TYPE-1" if days >= 14 else "TYPE-3")
|
||||
|
||||
return "general", "TYPE-3"
|
||||
|
||||
|
||||
@@ -1391,7 +1391,24 @@ class TelegramGateway:
|
||||
("🔍 查健康狀態", f"action:check_health:{incident_id}"),
|
||||
("📋 查 Log", f"action:check_log:{incident_id}"),
|
||||
],
|
||||
# alertchain_health / flywheel_health → TYPE-8M → send_meta_alert,不走此字典
|
||||
# ADR-075 新增分類按鈕 (2026-04-12 ogt)
|
||||
"secops": [
|
||||
("🚫 隔離資源", f"secops_isolate:{incident_id}"),
|
||||
("⛔ 封鎖來源 IP", f"secops_block_ip:{incident_id}"),
|
||||
("🔄 強制驅逐", f"secops_evict:{incident_id}"),
|
||||
("✅ 確認授權", f"secops_authorize:{incident_id}"),
|
||||
],
|
||||
"business": [
|
||||
("⏸️ 暫停 1h", f"action:pause_1h:{incident_id}"),
|
||||
("🔍 查 SignOz", f"action:open_signoz:{incident_id}"),
|
||||
("❌ 忽略", f"action:ignore:{incident_id}"),
|
||||
],
|
||||
"flywheel_health": [
|
||||
("🔄 觸發診斷", f"flywheel_diagnose:{incident_id}"),
|
||||
("📊 飛輪面板", f"action:open_flywheel:{incident_id}"),
|
||||
("🔕 靜默", f"action:silence:{incident_id}"),
|
||||
],
|
||||
# alertchain_health → TYPE-8M → send_meta_alert,不走此字典
|
||||
"ai_system": [
|
||||
("🔀 切換 Provider", f"action:switch_provider:{incident_id}"),
|
||||
],
|
||||
@@ -1924,6 +1941,192 @@ class TelegramGateway:
|
||||
},
|
||||
)
|
||||
|
||||
async def send_secops_card(
|
||||
self,
|
||||
incident_id: str,
|
||||
approval_id: str,
|
||||
alertname: str,
|
||||
threat_level: str,
|
||||
source: str = "",
|
||||
threat_behavior: str = "",
|
||||
defense_action: str = "",
|
||||
resource: str = "",
|
||||
) -> dict:
|
||||
"""
|
||||
TYPE-5S SecOps 資安威脅告警卡片。
|
||||
|
||||
ADR-075 (2026-04-12 ogt)
|
||||
按鈕: [隔離資源] [封鎖來源IP] [強制驅逐] [確認授權]
|
||||
只發個人 DM(指令敏感,不發群組)。
|
||||
"""
|
||||
level_icon = {"critical": "🔴", "warning": "🟠"}.get(threat_level.lower(), "⚠️")
|
||||
|
||||
text = (
|
||||
f"🥷 SECOPS | {level_icon} 資安威脅\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"📋 <code>{html.escape(incident_id)}</code>\n"
|
||||
f"🚨 威脅類型:<b>{html.escape(alertname)}</b>\n"
|
||||
)
|
||||
if resource:
|
||||
text += f"🎯 受害資源:<code>{html.escape(resource)}</code>\n"
|
||||
text += "\n🧠 <b>AI 威脅分析</b>\n"
|
||||
if source:
|
||||
text += f"├─ 來源:{html.escape(source)}\n"
|
||||
if threat_behavior:
|
||||
text += f"├─ 異常行為:{html.escape(threat_behavior[:200])}\n"
|
||||
text += f"└─ 風險評估:<b>{html.escape(threat_level)}</b>\n"
|
||||
if defense_action:
|
||||
text += f"\n🛡️ <b>建議防禦動作</b>\n<code>{html.escape(defense_action[:200])}</code>\n"
|
||||
|
||||
isolate_nonce = self._security.generate_callback_nonce(approval_id, "secops_isolate")
|
||||
auth_nonce = self._security.generate_callback_nonce(approval_id, "secops_authorize")
|
||||
|
||||
keyboard = {
|
||||
"inline_keyboard": [
|
||||
[
|
||||
{"text": "🚫 隔離資源", "callback_data": isolate_nonce},
|
||||
{"text": "⛔ 封鎖來源 IP", "callback_data": f"secops_block_ip:{incident_id}"},
|
||||
],
|
||||
[
|
||||
{"text": "🔄 強制驅逐 Pod", "callback_data": f"secops_evict:{incident_id}"},
|
||||
{"text": "✅ 確認授權操作", "callback_data": auth_nonce},
|
||||
],
|
||||
]
|
||||
}
|
||||
|
||||
return await self._make_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": settings.OPENCLAW_TG_CHAT_ID,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
"reply_markup": keyboard,
|
||||
},
|
||||
)
|
||||
|
||||
async def send_business_alert(
|
||||
self,
|
||||
incident_id: str,
|
||||
alertname: str,
|
||||
business_domain: str,
|
||||
metric_name: str,
|
||||
current_value: str,
|
||||
threshold: str,
|
||||
loss_rate: str = "",
|
||||
group_chat_id: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
TYPE-6B 業務/FinOps 告警。
|
||||
|
||||
ADR-075 (2026-04-12 ogt)
|
||||
路由: TYPE-1 發群組,此為 TYPE-6B 也發群組(業務趨勢數字)
|
||||
按鈕: [暫停] [查 SignOz] [忽略]
|
||||
"""
|
||||
text = (
|
||||
f"📉 SLO ALERT | 業務指標異常\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"📋 <code>{html.escape(incident_id)}</code>\n"
|
||||
f"🚨 告警:<b>{html.escape(alertname)}</b>\n"
|
||||
f"🎯 影響業務:{html.escape(business_domain)}\n"
|
||||
f"📊 異常指標:<code>{html.escape(metric_name)}</code>\n"
|
||||
f"\n🧠 <b>業務衝擊分析</b>\n"
|
||||
f"├─ 當前狀態:{html.escape(current_value)} (閾值: {html.escape(threshold)})\n"
|
||||
)
|
||||
if loss_rate:
|
||||
text += f"└─ 損失速率:{html.escape(loss_rate)}\n"
|
||||
|
||||
keyboard = {
|
||||
"inline_keyboard": [
|
||||
[
|
||||
{"text": "⏸️ 暫停 1h", "callback_data": f"action:pause_1h:{incident_id}"},
|
||||
{"text": "❌ 忽略", "callback_data": f"action:ignore:{incident_id}"},
|
||||
],
|
||||
]
|
||||
}
|
||||
|
||||
target_chat = group_chat_id or settings.OPENCLAW_TG_CHAT_ID
|
||||
return await self._make_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": target_chat,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
"reply_markup": keyboard,
|
||||
},
|
||||
)
|
||||
|
||||
async def send_escalation_card(
|
||||
self,
|
||||
incident_id: str,
|
||||
original_alertname: str,
|
||||
duration_min: int,
|
||||
priority: int = 0,
|
||||
attempted_actions: str = "",
|
||||
failure_reason: str = "",
|
||||
current_impact: str = "",
|
||||
group_chat_id: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
TYPE-7E 重大事故升級通知。
|
||||
|
||||
ADR-075 (2026-04-12 ogt)
|
||||
觸發: SLA 超時(P0: 15分鐘; P1: 45分鐘)
|
||||
路由: 個人 DM + 群組(緊急事故全員知情)
|
||||
按鈕: [建立戰情室] [Postmortem草稿] [DR手冊] [確認接手]
|
||||
"""
|
||||
duration_str = f"{duration_min} 分鐘" if duration_min < 60 else f"{duration_min//60} 小時 {duration_min%60} 分"
|
||||
|
||||
text = (
|
||||
f"🚨 ESCALATION | P{priority} 事故升級\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"📋 <code>{html.escape(incident_id)}</code> | 已持續 <b>{duration_str}</b>\n"
|
||||
f"⚠️ <b>超出自動修復能力範圍</b>\n"
|
||||
f"🎯 核心問題:<code>{html.escape(original_alertname)}</code>\n"
|
||||
)
|
||||
if attempted_actions or failure_reason or current_impact:
|
||||
text += "\n🧠 <b>AI 戰局總結</b>\n"
|
||||
if attempted_actions:
|
||||
text += f"├─ 嘗試動作:{html.escape(attempted_actions[:100])}\n"
|
||||
if failure_reason:
|
||||
text += f"├─ 失敗原因:{html.escape(failure_reason[:100])}\n"
|
||||
if current_impact:
|
||||
text += f"└─ 目前影響:{html.escape(current_impact[:100])}\n"
|
||||
|
||||
keyboard = {
|
||||
"inline_keyboard": [
|
||||
[
|
||||
{"text": "📄 產生 Postmortem 草稿", "callback_data": f"action:postmortem:{incident_id}"},
|
||||
],
|
||||
[
|
||||
{"text": "✅ 確認已接手處理", "callback_data": f"action:escalation_ack:{incident_id}"},
|
||||
{"text": "📖 DR 手冊", "callback_data": f"action:dr_manual:{incident_id}"},
|
||||
],
|
||||
]
|
||||
}
|
||||
|
||||
results = []
|
||||
# 發個人 DM
|
||||
results.append(await self._make_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": settings.OPENCLAW_TG_CHAT_ID,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
"reply_markup": keyboard,
|
||||
},
|
||||
))
|
||||
# 發群組(若有)
|
||||
if group_chat_id:
|
||||
results.append(await self._make_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": group_chat_id,
|
||||
"text": text + "\n📣 @所有人 事故升級,請協助!",
|
||||
"parse_mode": "HTML",
|
||||
},
|
||||
))
|
||||
return results[0]
|
||||
|
||||
# =========================================================================
|
||||
# 新訊息發送方法 (2026-03-29 ogt: ADR-038)
|
||||
# =========================================================================
|
||||
|
||||
Reference in New Issue
Block a user