fix(telegram): clarify auto repair handoff cards
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m47s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s

This commit is contained in:
Your Name
2026-05-07 02:07:43 +08:00
parent 57df3582dd
commit 3f69e03fcb
2 changed files with 96 additions and 18 deletions

View File

@@ -21,6 +21,7 @@ Decision Manager - Phase 6.5 非同步決策狀態機
"""
import asyncio
import html
import json
from datetime import UTC, datetime
from enum import Enum
@@ -1007,6 +1008,58 @@ def _format_metrics_delta(before: dict, after: dict) -> str:
return " | ".join(parts)
def _clip_telegram_field(value: str | None, limit: int) -> str:
"""Normalize a short Telegram field without leaking multiline command noise."""
text = " ".join(str(value or "").split())
if len(text) <= limit:
return text
return f"{text[: max(0, limit - 3)]}..."
def _format_auto_repair_status_line(
*,
incident_id: str,
target: str,
action: str,
success: bool,
error: str = "",
metrics_delta_text: str = "",
) -> str:
"""Render auto-repair result as a scannable operation card."""
safe_incident = html.escape(_clip_telegram_field(incident_id, 40))
safe_target = html.escape(_clip_telegram_field(target, 80) or "unknown")
safe_action = html.escape(_clip_telegram_field(action, 160) or "已執行")
safe_error = html.escape(_clip_telegram_field(error, 180) or "未回傳錯誤")
if success:
delta_line = (
f"\n├ 指標:<code>{html.escape(_clip_telegram_field(metrics_delta_text, 120))}</code>"
if metrics_delta_text
else ""
)
return (
"✅ <b>AUTO RESOLVEDAI 自動修復完成</b>\n"
"──────────────────────\n"
f"├ 事件:<code>{safe_incident}</code>\n"
f"├ 對象:<code>{safe_target}</code>\n"
f"├ 執行:<code>{safe_action}</code>\n"
"├ 狀態:自動化已完成,等待後驗證觀察\n"
"├ ActorleWOOOgo autonomous"
f"{delta_line}"
)
return (
"🧑‍🔧 <b>HANDOFF REQUIREDAI 自動修復失敗,已轉人工</b>\n"
"──────────────────────\n"
f"├ 事件:<code>{safe_incident}</code>\n"
f"├ 對象:<code>{safe_target}</code>\n"
f"├ 嘗試:<code>{safe_action}</code>\n"
f"├ 原因:<code>{safe_error}</code>\n"
"├ 狀態:自動化已停止,不再重試\n"
"└ 下一步:請 SRE 依 AwoooP Run / 原告警卡處理"
)
async def _push_auto_repair_result(
incident: Incident,
action: str,
@@ -1082,24 +1135,16 @@ async def _push_auto_repair_result(
except Exception as _k8s_err:
logger.debug("k8s_state_after_failed", incident_id=inc_id, error=str(_k8s_err))
# 2026-05-02 ogt + Claude Sonnet 4.6: 強制標記 [AUTO],避免事後抵賴
# 統帥要求「就算是自動化處理,也要發告警訊息出來」—— 所有自治動作必須留痕,
# 且 Telegram 上能明顯與人工點擊區隔。
if success:
delta_line = f"\n├ 指標: <code>{metrics_delta_text}</code>" if metrics_delta_text else ""
status_line = (
f"🤖 <b>[AUTO] AI 自動修復完成</b>\n"
f"├ 動作: <code>{action[:100] if action else '已執行'}</code>\n"
f"├ Actor: leWOOOgo (autonomous)"
f"{delta_line}"
)
else:
status_line = (
f"🤖❌ <b>[AUTO] AI 自動修復失敗,已升級人工介入</b>\n"
f"├ 動作: <code>{action[:80] if action else '未知'}</code>\n"
f"├ Actor: leWOOOgo (autonomous)\n"
f"└ 錯誤: {error[:100] if error else '未知錯誤'}"
)
# 2026-05-07 Codex: 自動化結果必須讓 SRE 一眼分辨「已自動解決」或
# 「已停止並轉人工」,不能再用 raw command / exception 片段洗版。
status_line = _format_auto_repair_status_line(
incident_id=inc_id,
target=target,
action=action,
success=success,
error=error,
metrics_delta_text=metrics_delta_text,
)
# BUG-006 修復 2026-04-11: outcome + verification_result 全為 null
# 原因_push_auto_repair_result 只送 Telegram沒寫 DB

View File

@@ -8,6 +8,7 @@ test_telegram_message_templates.py - Telegram 訊息模板測試
import pytest
import src.services.telegram_gateway as telegram_gateway_module
from src.services.decision_manager import _format_auto_repair_status_line
from src.services.telegram_gateway import (
DailySummaryMessage,
DeploySuccessMessage,
@@ -20,6 +21,38 @@ from src.services.telegram_gateway import (
)
def test_auto_repair_status_line_distinguishes_handoff_required() -> None:
"""自動化失敗 reply 必須明確標示轉人工,且不把 raw error 當純文字噴出。"""
result = _format_auto_repair_status_line(
incident_id="INC-20260507-AAAAAA",
target="node-exporter-110",
action='ssh 192.168.0.110 "ps aux --sort=-%cpu | head -15"',
success=False,
error="Unsupported <scheme> & %d format: a real number is required, not str",
)
assert "HANDOFF REQUIREDAI 自動修復失敗,已轉人工" in result
assert "自動化已停止,不再重試" in result
assert "請 SRE 依 AwoooP Run / 原告警卡處理" in result
assert "&lt;scheme&gt; &amp; %d format" in result
assert "<scheme>" not in result
def test_auto_repair_status_line_distinguishes_auto_resolved() -> None:
"""自動化成功 reply 必須明確標示已自動解決。"""
result = _format_auto_repair_status_line(
incident_id="INC-20260507-BBBBBB",
target="awoooi-api",
action="kubectl rollout restart deployment/awoooi-api",
success=True,
metrics_delta_text="CPU 92%->30%",
)
assert "AUTO RESOLVEDAI 自動修復完成" in result
assert "自動化已完成,等待後驗證觀察" in result
assert "CPU 92%-&gt;30%" in result
class TestTelegramMessageFormat:
"""測試現有 TelegramMessage 格式化"""