From 3f69e03fcb915514aabf25263b5004b7de5912dc Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 7 May 2026 02:07:43 +0800 Subject: [PATCH] fix(telegram): clarify auto repair handoff cards --- apps/api/src/services/decision_manager.py | 81 ++++++++++++++----- .../tests/test_telegram_message_templates.py | 33 ++++++++ 2 files changed, 96 insertions(+), 18 deletions(-) diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py index e98d5df2..acc0d60d 100644 --- a/apps/api/src/services/decision_manager.py +++ b/apps/api/src/services/decision_manager.py @@ -21,6 +21,7 @@ Decision Manager - Phase 6.5 非同步決策狀態機 """ import asyncio +import html import json from datetime import UTC, datetime from enum import Enum @@ -1007,6 +1008,58 @@ def _format_metrics_delta(before: dict, after: dict) -> str: return " | ".join(parts) +def _clip_telegram_field(value: str | None, limit: int) -> str: + """Normalize a short Telegram field without leaking multiline command noise.""" + text = " ".join(str(value or "").split()) + if len(text) <= limit: + return text + return f"{text[: max(0, limit - 3)]}..." + + +def _format_auto_repair_status_line( + *, + incident_id: str, + target: str, + action: str, + success: bool, + error: str = "", + metrics_delta_text: str = "", +) -> str: + """Render auto-repair result as a scannable operation card.""" + safe_incident = html.escape(_clip_telegram_field(incident_id, 40)) + safe_target = html.escape(_clip_telegram_field(target, 80) or "unknown") + safe_action = html.escape(_clip_telegram_field(action, 160) or "已執行") + safe_error = html.escape(_clip_telegram_field(error, 180) or "未回傳錯誤") + + if success: + delta_line = ( + f"\n├ 指標:{html.escape(_clip_telegram_field(metrics_delta_text, 120))}" + if metrics_delta_text + else "" + ) + return ( + "✅ AUTO RESOLVED|AI 自動修復完成\n" + "──────────────────────\n" + f"├ 事件:{safe_incident}\n" + f"├ 對象:{safe_target}\n" + f"├ 執行:{safe_action}\n" + "├ 狀態:自動化已完成,等待後驗證觀察\n" + "├ Actor:leWOOOgo autonomous" + f"{delta_line}" + ) + + return ( + "🧑‍🔧 HANDOFF REQUIRED|AI 自動修復失敗,已轉人工\n" + "──────────────────────\n" + f"├ 事件:{safe_incident}\n" + f"├ 對象:{safe_target}\n" + f"├ 嘗試:{safe_action}\n" + f"├ 原因:{safe_error}\n" + "├ 狀態:自動化已停止,不再重試\n" + "└ 下一步:請 SRE 依 AwoooP Run / 原告警卡處理" + ) + + async def _push_auto_repair_result( incident: Incident, action: str, @@ -1082,24 +1135,16 @@ async def _push_auto_repair_result( except Exception as _k8s_err: logger.debug("k8s_state_after_failed", incident_id=inc_id, error=str(_k8s_err)) - # 2026-05-02 ogt + Claude Sonnet 4.6: 強制標記 [AUTO],避免事後抵賴 - # 統帥要求「就算是自動化處理,也要發告警訊息出來」—— 所有自治動作必須留痕, - # 且 Telegram 上能明顯與人工點擊區隔。 - if success: - delta_line = f"\n├ 指標: {metrics_delta_text}" if metrics_delta_text else "" - status_line = ( - f"🤖 [AUTO] AI 自動修復完成\n" - f"├ 動作: {action[:100] if action else '已執行'}\n" - f"├ Actor: leWOOOgo (autonomous)" - f"{delta_line}" - ) - else: - status_line = ( - f"🤖❌ [AUTO] AI 自動修復失敗,已升級人工介入\n" - f"├ 動作: {action[:80] if action else '未知'}\n" - f"├ Actor: leWOOOgo (autonomous)\n" - f"└ 錯誤: {error[:100] if error else '未知錯誤'}" - ) + # 2026-05-07 Codex: 自動化結果必須讓 SRE 一眼分辨「已自動解決」或 + # 「已停止並轉人工」,不能再用 raw command / exception 片段洗版。 + status_line = _format_auto_repair_status_line( + incident_id=inc_id, + target=target, + action=action, + success=success, + error=error, + metrics_delta_text=metrics_delta_text, + ) # BUG-006 修復 2026-04-11: outcome + verification_result 全為 null # 原因:_push_auto_repair_result 只送 Telegram,沒寫 DB diff --git a/apps/api/tests/test_telegram_message_templates.py b/apps/api/tests/test_telegram_message_templates.py index 12eb8f25..710daa3d 100644 --- a/apps/api/tests/test_telegram_message_templates.py +++ b/apps/api/tests/test_telegram_message_templates.py @@ -8,6 +8,7 @@ test_telegram_message_templates.py - Telegram 訊息模板測試 import pytest import src.services.telegram_gateway as telegram_gateway_module +from src.services.decision_manager import _format_auto_repair_status_line from src.services.telegram_gateway import ( DailySummaryMessage, DeploySuccessMessage, @@ -20,6 +21,38 @@ from src.services.telegram_gateway import ( ) +def test_auto_repair_status_line_distinguishes_handoff_required() -> None: + """自動化失敗 reply 必須明確標示轉人工,且不把 raw error 當純文字噴出。""" + result = _format_auto_repair_status_line( + incident_id="INC-20260507-AAAAAA", + target="node-exporter-110", + action='ssh 192.168.0.110 "ps aux --sort=-%cpu | head -15"', + success=False, + error="Unsupported & %d format: a real number is required, not str", + ) + + assert "HANDOFF REQUIRED|AI 自動修復失敗,已轉人工" in result + assert "自動化已停止,不再重試" in result + assert "請 SRE 依 AwoooP Run / 原告警卡處理" in result + assert "<scheme> & %d format" in result + assert "" not in result + + +def test_auto_repair_status_line_distinguishes_auto_resolved() -> None: + """自動化成功 reply 必須明確標示已自動解決。""" + result = _format_auto_repair_status_line( + incident_id="INC-20260507-BBBBBB", + target="awoooi-api", + action="kubectl rollout restart deployment/awoooi-api", + success=True, + metrics_delta_text="CPU 92%->30%", + ) + + assert "AUTO RESOLVED|AI 自動修復完成" in result + assert "自動化已完成,等待後驗證觀察" in result + assert "CPU 92%->30%" in result + + class TestTelegramMessageFormat: """測試現有 TelegramMessage 格式化"""