fix(telegram): clarify auto repair handoff cards
This commit is contained in:
@@ -21,6 +21,7 @@ Decision Manager - Phase 6.5 非同步決策狀態機
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import html
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
@@ -1007,6 +1008,58 @@ def _format_metrics_delta(before: dict, after: dict) -> str:
|
||||
return " | ".join(parts)
|
||||
|
||||
|
||||
def _clip_telegram_field(value: str | None, limit: int) -> str:
|
||||
"""Normalize a short Telegram field without leaking multiline command noise."""
|
||||
text = " ".join(str(value or "").split())
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return f"{text[: max(0, limit - 3)]}..."
|
||||
|
||||
|
||||
def _format_auto_repair_status_line(
|
||||
*,
|
||||
incident_id: str,
|
||||
target: str,
|
||||
action: str,
|
||||
success: bool,
|
||||
error: str = "",
|
||||
metrics_delta_text: str = "",
|
||||
) -> str:
|
||||
"""Render auto-repair result as a scannable operation card."""
|
||||
safe_incident = html.escape(_clip_telegram_field(incident_id, 40))
|
||||
safe_target = html.escape(_clip_telegram_field(target, 80) or "unknown")
|
||||
safe_action = html.escape(_clip_telegram_field(action, 160) or "已執行")
|
||||
safe_error = html.escape(_clip_telegram_field(error, 180) or "未回傳錯誤")
|
||||
|
||||
if success:
|
||||
delta_line = (
|
||||
f"\n├ 指標:<code>{html.escape(_clip_telegram_field(metrics_delta_text, 120))}</code>"
|
||||
if metrics_delta_text
|
||||
else ""
|
||||
)
|
||||
return (
|
||||
"✅ <b>AUTO RESOLVED|AI 自動修復完成</b>\n"
|
||||
"──────────────────────\n"
|
||||
f"├ 事件:<code>{safe_incident}</code>\n"
|
||||
f"├ 對象:<code>{safe_target}</code>\n"
|
||||
f"├ 執行:<code>{safe_action}</code>\n"
|
||||
"├ 狀態:自動化已完成,等待後驗證觀察\n"
|
||||
"├ Actor:leWOOOgo autonomous"
|
||||
f"{delta_line}"
|
||||
)
|
||||
|
||||
return (
|
||||
"🧑🔧 <b>HANDOFF REQUIRED|AI 自動修復失敗,已轉人工</b>\n"
|
||||
"──────────────────────\n"
|
||||
f"├ 事件:<code>{safe_incident}</code>\n"
|
||||
f"├ 對象:<code>{safe_target}</code>\n"
|
||||
f"├ 嘗試:<code>{safe_action}</code>\n"
|
||||
f"├ 原因:<code>{safe_error}</code>\n"
|
||||
"├ 狀態:自動化已停止,不再重試\n"
|
||||
"└ 下一步:請 SRE 依 AwoooP Run / 原告警卡處理"
|
||||
)
|
||||
|
||||
|
||||
async def _push_auto_repair_result(
|
||||
incident: Incident,
|
||||
action: str,
|
||||
@@ -1082,24 +1135,16 @@ async def _push_auto_repair_result(
|
||||
except Exception as _k8s_err:
|
||||
logger.debug("k8s_state_after_failed", incident_id=inc_id, error=str(_k8s_err))
|
||||
|
||||
# 2026-05-02 ogt + Claude Sonnet 4.6: 強制標記 [AUTO],避免事後抵賴
|
||||
# 統帥要求「就算是自動化處理,也要發告警訊息出來」—— 所有自治動作必須留痕,
|
||||
# 且 Telegram 上能明顯與人工點擊區隔。
|
||||
if success:
|
||||
delta_line = f"\n├ 指標: <code>{metrics_delta_text}</code>" if metrics_delta_text else ""
|
||||
status_line = (
|
||||
f"🤖 <b>[AUTO] AI 自動修復完成</b>\n"
|
||||
f"├ 動作: <code>{action[:100] if action else '已執行'}</code>\n"
|
||||
f"├ Actor: leWOOOgo (autonomous)"
|
||||
f"{delta_line}"
|
||||
)
|
||||
else:
|
||||
status_line = (
|
||||
f"🤖❌ <b>[AUTO] AI 自動修復失敗,已升級人工介入</b>\n"
|
||||
f"├ 動作: <code>{action[:80] if action else '未知'}</code>\n"
|
||||
f"├ Actor: leWOOOgo (autonomous)\n"
|
||||
f"└ 錯誤: {error[:100] if error else '未知錯誤'}"
|
||||
)
|
||||
# 2026-05-07 Codex: 自動化結果必須讓 SRE 一眼分辨「已自動解決」或
|
||||
# 「已停止並轉人工」,不能再用 raw command / exception 片段洗版。
|
||||
status_line = _format_auto_repair_status_line(
|
||||
incident_id=inc_id,
|
||||
target=target,
|
||||
action=action,
|
||||
success=success,
|
||||
error=error,
|
||||
metrics_delta_text=metrics_delta_text,
|
||||
)
|
||||
|
||||
# BUG-006 修復 2026-04-11: outcome + verification_result 全為 null
|
||||
# 原因:_push_auto_repair_result 只送 Telegram,沒寫 DB
|
||||
|
||||
@@ -8,6 +8,7 @@ test_telegram_message_templates.py - Telegram 訊息模板測試
|
||||
import pytest
|
||||
|
||||
import src.services.telegram_gateway as telegram_gateway_module
|
||||
from src.services.decision_manager import _format_auto_repair_status_line
|
||||
from src.services.telegram_gateway import (
|
||||
DailySummaryMessage,
|
||||
DeploySuccessMessage,
|
||||
@@ -20,6 +21,38 @@ from src.services.telegram_gateway import (
|
||||
)
|
||||
|
||||
|
||||
def test_auto_repair_status_line_distinguishes_handoff_required() -> None:
|
||||
"""自動化失敗 reply 必須明確標示轉人工,且不把 raw error 當純文字噴出。"""
|
||||
result = _format_auto_repair_status_line(
|
||||
incident_id="INC-20260507-AAAAAA",
|
||||
target="node-exporter-110",
|
||||
action='ssh 192.168.0.110 "ps aux --sort=-%cpu | head -15"',
|
||||
success=False,
|
||||
error="Unsupported <scheme> & %d format: a real number is required, not str",
|
||||
)
|
||||
|
||||
assert "HANDOFF REQUIRED|AI 自動修復失敗,已轉人工" in result
|
||||
assert "自動化已停止,不再重試" in result
|
||||
assert "請 SRE 依 AwoooP Run / 原告警卡處理" in result
|
||||
assert "<scheme> & %d format" in result
|
||||
assert "<scheme>" not in result
|
||||
|
||||
|
||||
def test_auto_repair_status_line_distinguishes_auto_resolved() -> None:
|
||||
"""自動化成功 reply 必須明確標示已自動解決。"""
|
||||
result = _format_auto_repair_status_line(
|
||||
incident_id="INC-20260507-BBBBBB",
|
||||
target="awoooi-api",
|
||||
action="kubectl rollout restart deployment/awoooi-api",
|
||||
success=True,
|
||||
metrics_delta_text="CPU 92%->30%",
|
||||
)
|
||||
|
||||
assert "AUTO RESOLVED|AI 自動修復完成" in result
|
||||
assert "自動化已完成,等待後驗證觀察" in result
|
||||
assert "CPU 92%->30%" in result
|
||||
|
||||
|
||||
class TestTelegramMessageFormat:
|
||||
"""測試現有 TelegramMessage 格式化"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user