修復: 1. telegram_gateway.py - HTML 轉義 (html.escape) 防止 "Can't parse entities" 2. openclaw-state-machine.tsx - 簽核後顯示結果 2 秒再導航 問題根因: - URL 和用戶輸入內容可能包含 <, >, & 破壞 HTML - 簽核後立即刷新列表,已簽核項目消失 Memory: feedback_approval_preserve_content.md Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1240 lines
42 KiB
Python
1240 lines
42 KiB
Python
"""
|
||
Telegram Gateway - OpenClaw 行動戰情室 + SignOz 整合
|
||
====================================================
|
||
Phase 5.4.3 & 5.4.4: Telegram 推送與簽核接收
|
||
統帥校正: SignOz 為唯一全能視力中心
|
||
|
||
Features:
|
||
- 推送待簽核卡片到 Telegram (含 SignOz 指標)
|
||
- 動態 SignOz Trace URL (告警前後 5 分鐘)
|
||
- 自動調優按鈕 (Shadow Mode: 僅日誌輸出)
|
||
- 接收統帥簽核回調
|
||
- SOUL.md 訊息壓縮原則 100% 遵守
|
||
|
||
SOUL.md 鐵律 (4.1 Telegram 訊息壓縮原則):
|
||
- 狀態標籤: 20 字元
|
||
- 資源名稱: 50 字元
|
||
- 根因摘要: 100 字元
|
||
- 建議行動: 50 字元
|
||
- 總長度: 800 字元 (v7.0 擴展以容納 SignOz 區塊)
|
||
|
||
修復紀錄:
|
||
- 2026-03-26 Claude Code: 修復 HTML 解析錯誤 (Can't parse entities)
|
||
"""
|
||
|
||
import asyncio
|
||
import html
|
||
from dataclasses import dataclass
|
||
from datetime import UTC, datetime
|
||
|
||
import httpx
|
||
import structlog
|
||
|
||
from src.core.config import settings
|
||
from src.services.security_interceptor import (
|
||
NonceReplayError,
|
||
UserNotWhitelistedError,
|
||
get_security_interceptor,
|
||
)
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# =============================================================================
|
||
# Long Polling 配置 (Phase 5 內網修復)
|
||
# =============================================================================
|
||
LONG_POLLING_TIMEOUT = 30 # getUpdates timeout (秒)
|
||
LONG_POLLING_RETRY_DELAY = 5 # 錯誤後重試延遲 (秒)
|
||
|
||
|
||
# =============================================================================
|
||
# SignOz Metrics Block (v7.0)
|
||
# =============================================================================
|
||
|
||
@dataclass
|
||
class SignOzMetricsBlock:
|
||
"""
|
||
SignOz 指標區塊 (嵌入 Telegram 卡片)
|
||
|
||
格式:
|
||
📊 SignOz 指標
|
||
├ RPS: 150.2 📈
|
||
├ Error: 🟢 0.5%
|
||
└ P99: 245ms ➡️
|
||
"""
|
||
rps: float = 0.0
|
||
rps_trend: str = "stable" # up, down, stable
|
||
error_rate: float = 0.0
|
||
p99_latency_ms: float = 0.0
|
||
latency_trend: str = "stable"
|
||
trace_url: str = ""
|
||
|
||
def format(self) -> str:
|
||
"""格式化為 Telegram HTML"""
|
||
trend_emoji = {"up": "📈", "down": "📉", "stable": "➡️"}
|
||
error_emoji = "🟢" if self.error_rate < 1 else ("🟡" if self.error_rate < 5 else "🔴")
|
||
|
||
return (
|
||
f"📊 <b>SignOz 指標</b>\n"
|
||
f"├ RPS: <code>{self.rps:.1f}</code> {trend_emoji.get(self.rps_trend, '➡️')}\n"
|
||
f"├ Error: {error_emoji} <code>{self.error_rate:.2f}%</code>\n"
|
||
f"└ P99: <code>{self.p99_latency_ms:.0f}ms</code> {trend_emoji.get(self.latency_trend, '➡️')}"
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# SOUL.md 訊息格式定義 (v7.0 + SignOz)
|
||
# =============================================================================
|
||
|
||
@dataclass
|
||
class TelegramMessage:
|
||
"""
|
||
Telegram 訊息結構 (SOUL.md 4.1 + v7.0 SignOz 整合)
|
||
|
||
格式:
|
||
═══════════════════════════
|
||
🚨 CRITICAL | harbor-core
|
||
═══════════════════════════
|
||
📋 INC-20260321-0001
|
||
🎯 資源: harbor-core-7d4b8c9f5
|
||
━━━━━━━━━━━━━━━━━━━
|
||
🤖 AI 仲裁判定
|
||
👥 責任: BE (後端)
|
||
📊 信心: 🟢 88%
|
||
💡 原因: JVM Heap 配置不當
|
||
━━━━━━━━━━━━━━━━━━━
|
||
📊 SignOz 指標
|
||
├ RPS: 150.2 📈
|
||
├ Error: 🟢 0.5%
|
||
└ P99: 245ms ➡️
|
||
━━━━━━━━━━━━━━━━━━━
|
||
🔧 建議: 刪除 Pod
|
||
⏱️ 停機: ~30s
|
||
🔍 SignOz Trace (±5min)
|
||
|
||
[✅ 簽核] [❌ 拒絕] [⚡ 自動調優]
|
||
"""
|
||
status_emoji: str # 🚨, ⚠️, ℹ️
|
||
risk_level: str # CRITICAL, MEDIUM, LOW
|
||
resource_name: str # Pod/Deployment 名稱 (max 50)
|
||
root_cause: str # 根因摘要 (max 100)
|
||
suggested_action: str # 建議操作 (max 50)
|
||
estimated_downtime: str # 預計停機時間
|
||
approval_id: str # 簽核單 ID
|
||
# v6.0 AI 仲裁欄位
|
||
incident_id: str = "" # 事件編號 INC-YYYYMMDD-XXXX
|
||
primary_responsibility: str = "COLLAB" # FE/BE/INFRA/DB/COLLAB
|
||
confidence: float = 0.0 # 信心度 0.0-1.0
|
||
namespace: str = "default" # K8s namespace
|
||
# v7.0 SignOz 整合
|
||
signoz_metrics: SignOzMetricsBlock | None = None
|
||
signoz_trace_url: str = "" # 動態時間參數 URL
|
||
auto_tuning_command: str = "" # kubectl 調優指令
|
||
|
||
def format(self) -> str:
|
||
"""
|
||
格式化為 SOUL.md 規範的訊息 (含 AI 仲裁 + SignOz)
|
||
|
||
Returns:
|
||
str: 格式化的 Telegram 訊息 (max 900 字元)
|
||
"""
|
||
# 責任映射
|
||
resp_map = {
|
||
"FE": "👨💻 FE (前端)",
|
||
"BE": "⚙️ BE (後端)",
|
||
"INFRA": "🏗️ INFRA (基礎設施)",
|
||
"DB": "🗄️ DB (資料庫)",
|
||
"COLLAB": "🤝 COLLAB (協同處理)",
|
||
}
|
||
resp_display = resp_map.get(self.primary_responsibility, "❓ 未知")
|
||
|
||
# 信心度顯示
|
||
confidence_pct = int(self.confidence * 100)
|
||
if confidence_pct >= 80:
|
||
conf_emoji = "🟢"
|
||
elif confidence_pct >= 70:
|
||
conf_emoji = "🟡"
|
||
else:
|
||
conf_emoji = "🔴"
|
||
|
||
# 自動生成事件編號
|
||
incident_id = self.incident_id or f"INC-{self.approval_id[:8].upper()}"
|
||
|
||
# SignOz URL (優先使用動態 URL) - 必須 HTML 轉義防止解析錯誤
|
||
service_name = self.resource_name.split("-")[0] if "-" in self.resource_name else self.resource_name
|
||
raw_url = self.signoz_trace_url or f"http://192.168.0.188:3301/traces?service={service_name}"
|
||
signoz_url = html.escape(raw_url, quote=True)
|
||
|
||
# SignOz 指標區塊
|
||
signoz_block = ""
|
||
if self.signoz_metrics:
|
||
signoz_block = f"━━━━━━━━━━━━━━━━━━━\n{self.signoz_metrics.format()}\n"
|
||
|
||
# HTML 轉義用戶輸入內容,防止 "Can't parse entities" 錯誤
|
||
safe_resource = html.escape(self.resource_name[:35])
|
||
safe_root_cause = html.escape(self.root_cause[:50])
|
||
safe_action = html.escape(self.suggested_action[:35])
|
||
safe_downtime = html.escape(self.estimated_downtime)
|
||
|
||
# 組裝訊息
|
||
message = (
|
||
f"═══════════════════════════\n"
|
||
f"{self.status_emoji} <b>{html.escape(self.risk_level)}</b> | {html.escape(self.resource_name[:25])}\n"
|
||
f"═══════════════════════════\n"
|
||
f"📋 <code>{html.escape(incident_id)}</code>\n"
|
||
f"🎯 資源: <code>{safe_resource}</code>\n"
|
||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||
f"🤖 <b>AI 仲裁判定</b>\n"
|
||
f"👥 責任: {resp_display}\n"
|
||
f"📊 信心: {conf_emoji} {confidence_pct}%\n"
|
||
f"💡 原因: {safe_root_cause}\n"
|
||
f"{signoz_block}"
|
||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||
f"🔧 建議: {safe_action}\n"
|
||
f"⏱️ 停機: {safe_downtime}\n"
|
||
f"🔍 <a href='{signoz_url}'>查看 SignOz Trace (±5min)</a>"
|
||
)
|
||
|
||
return message[:900]
|
||
|
||
|
||
# =============================================================================
|
||
# Risk Level Emoji Mapping
|
||
# =============================================================================
|
||
|
||
RISK_EMOJI_MAP = {
|
||
"critical": "🚨",
|
||
"high": "🔴",
|
||
"medium": "⚠️",
|
||
"low": "ℹ️",
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Telegram Gateway
|
||
# =============================================================================
|
||
|
||
class TelegramGatewayError(Exception):
|
||
"""Telegram Gateway 錯誤"""
|
||
pass
|
||
|
||
|
||
class TelegramGateway:
|
||
"""
|
||
Telegram Gateway - 行動戰情室 + SignOz 整合
|
||
|
||
職責:
|
||
1. 推送待簽核卡片到 Telegram (含 SignOz 指標)
|
||
2. 接收並驗證簽核/調優回調
|
||
3. Shadow Mode 調優執行 (僅日誌)
|
||
4. 遵守 SOUL.md 訊息壓縮原則
|
||
"""
|
||
|
||
TELEGRAM_API_BASE = "https://api.telegram.org"
|
||
|
||
def __init__(self):
|
||
self._http_client: httpx.AsyncClient | None = None
|
||
self._security = get_security_interceptor()
|
||
self._initialized = False
|
||
# Long Polling 狀態 (Phase 5 內網修復)
|
||
self._polling_active = False
|
||
self._polling_task: asyncio.Task | None = None
|
||
self._last_update_id = 0
|
||
# Phase 6.5: 心跳監控 (防止沉默盲點)
|
||
self._last_message_time: datetime | None = None
|
||
self._heartbeat_task: asyncio.Task | None = None
|
||
self._heartbeat_active = False
|
||
|
||
async def initialize(self) -> bool:
|
||
"""初始化 Gateway"""
|
||
if not settings.OPENCLAW_TG_BOT_TOKEN:
|
||
logger.warning("telegram_gateway_disabled", reason="Bot token not configured")
|
||
return False
|
||
|
||
if not settings.OPENCLAW_TG_CHAT_ID:
|
||
logger.warning("telegram_gateway_disabled", reason="Chat ID not configured")
|
||
return False
|
||
|
||
self._http_client = httpx.AsyncClient(
|
||
timeout=30.0,
|
||
headers={"Content-Type": "application/json"},
|
||
)
|
||
|
||
await self._security.initialize()
|
||
self._initialized = True
|
||
|
||
logger.info("telegram_gateway_initialized")
|
||
return True
|
||
|
||
@property
|
||
def bot_token(self) -> str:
|
||
"""取得 Bot Token"""
|
||
return settings.OPENCLAW_TG_BOT_TOKEN
|
||
|
||
@property
|
||
def chat_id(self) -> str:
|
||
"""取得 Chat ID"""
|
||
return settings.OPENCLAW_TG_CHAT_ID
|
||
|
||
@property
|
||
def api_url(self) -> str:
|
||
"""取得 Telegram API URL"""
|
||
return f"{self.TELEGRAM_API_BASE}/bot{self.bot_token}"
|
||
|
||
async def _send_request(
|
||
self,
|
||
method: str,
|
||
payload: dict,
|
||
) -> dict:
|
||
"""
|
||
發送 Telegram API 請求
|
||
|
||
Args:
|
||
method: API 方法 (sendMessage, editMessageText, etc.)
|
||
payload: 請求 Payload
|
||
|
||
Returns:
|
||
dict: API 回應
|
||
"""
|
||
if not self._initialized:
|
||
await self.initialize()
|
||
|
||
if not self._http_client:
|
||
raise TelegramGatewayError("HTTP client not initialized")
|
||
|
||
url = f"{self.api_url}/{method}"
|
||
|
||
try:
|
||
response = await self._http_client.post(url, json=payload)
|
||
response.raise_for_status()
|
||
result = response.json()
|
||
|
||
if not result.get("ok"):
|
||
raise TelegramGatewayError(
|
||
f"Telegram API error: {result.get('description', 'Unknown error')}"
|
||
)
|
||
|
||
return result
|
||
|
||
except httpx.HTTPStatusError as e:
|
||
logger.error("telegram_api_error", method=method, status=e.response.status_code)
|
||
raise TelegramGatewayError(f"HTTP error: {e.response.status_code}") from e
|
||
|
||
except Exception as e:
|
||
logger.error("telegram_request_failed", method=method, error=str(e))
|
||
raise TelegramGatewayError(str(e)) from e
|
||
|
||
def _build_inline_keyboard(
|
||
self,
|
||
approval_id: str,
|
||
include_auto_tuning: bool = True,
|
||
auto_tuning_command: str = "",
|
||
) -> dict:
|
||
"""
|
||
建立 Inline Keyboard (簽核按鈕 + 自動調優)
|
||
|
||
SOUL.md 規範 + v7.0:
|
||
[✅ 簽核] [❌ 拒絕]
|
||
[⚡ 執行自動調優]
|
||
|
||
Args:
|
||
approval_id: 簽核單 ID
|
||
include_auto_tuning: 是否包含自動調優按鈕
|
||
auto_tuning_command: kubectl 調優指令
|
||
|
||
Returns:
|
||
dict: Telegram InlineKeyboardMarkup
|
||
"""
|
||
# 產生 Nonce (防重放)
|
||
approve_nonce = self._security.generate_callback_nonce(approval_id, "approve")
|
||
reject_nonce = self._security.generate_callback_nonce(approval_id, "reject")
|
||
|
||
# 基本按鈕行
|
||
buttons = [
|
||
[
|
||
{
|
||
"text": "✅ 簽核",
|
||
"callback_data": approve_nonce,
|
||
},
|
||
{
|
||
"text": "❌ 拒絕",
|
||
"callback_data": reject_nonce,
|
||
},
|
||
]
|
||
]
|
||
|
||
# 自動調優按鈕 (v7.0)
|
||
if include_auto_tuning and auto_tuning_command:
|
||
tuning_nonce = self._security.generate_callback_nonce(approval_id, "tune")
|
||
buttons.append([
|
||
{
|
||
"text": "⚡ 執行自動調優",
|
||
"callback_data": tuning_nonce,
|
||
}
|
||
])
|
||
|
||
return {"inline_keyboard": buttons}
|
||
|
||
async def send_approval_card(
|
||
self,
|
||
approval_id: str,
|
||
risk_level: str,
|
||
resource_name: str,
|
||
root_cause: str,
|
||
suggested_action: str,
|
||
estimated_downtime: str = "~30s",
|
||
# v6.0 AI 仲裁欄位
|
||
primary_responsibility: str = "COLLAB",
|
||
confidence: float = 0.0,
|
||
namespace: str = "default",
|
||
# v7.0 SignOz 整合
|
||
signoz_rps: float = 0.0,
|
||
signoz_rps_trend: str = "stable",
|
||
signoz_error_rate: float = 0.0,
|
||
signoz_p99_latency: float = 0.0,
|
||
signoz_latency_trend: str = "stable",
|
||
signoz_trace_url: str = "",
|
||
auto_tuning_command: str = "",
|
||
) -> dict:
|
||
"""
|
||
推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)
|
||
|
||
SOUL.md 4.1 + AI 仲裁 + SignOz 訊息格式
|
||
|
||
Args:
|
||
approval_id: 簽核單 ID
|
||
risk_level: 風險等級 (critical/medium/low)
|
||
resource_name: 資源名稱
|
||
root_cause: 根因摘要
|
||
suggested_action: 建議操作
|
||
estimated_downtime: 預計停機時間
|
||
primary_responsibility: 責任團隊 (FE/BE/INFRA/DB/COLLAB)
|
||
confidence: AI 信心度 (0.0-1.0)
|
||
namespace: K8s namespace
|
||
signoz_*: SignOz Gold Metrics
|
||
signoz_trace_url: 動態時間參數的 Trace URL
|
||
auto_tuning_command: kubectl 調優指令
|
||
|
||
Returns:
|
||
dict: Telegram API 回應
|
||
"""
|
||
# 取得狀態 Emoji
|
||
emoji = RISK_EMOJI_MAP.get(risk_level.lower(), "⚠️")
|
||
|
||
# 建立 SignOz 指標區塊
|
||
signoz_metrics = None
|
||
if signoz_rps > 0 or signoz_error_rate > 0 or signoz_p99_latency > 0:
|
||
signoz_metrics = SignOzMetricsBlock(
|
||
rps=signoz_rps,
|
||
rps_trend=signoz_rps_trend,
|
||
error_rate=signoz_error_rate,
|
||
p99_latency_ms=signoz_p99_latency,
|
||
latency_trend=signoz_latency_trend,
|
||
trace_url=signoz_trace_url,
|
||
)
|
||
|
||
# 建立訊息結構 (含 AI 仲裁 + SignOz)
|
||
message = TelegramMessage(
|
||
status_emoji=emoji,
|
||
risk_level=risk_level.upper(),
|
||
resource_name=resource_name,
|
||
root_cause=root_cause,
|
||
suggested_action=suggested_action,
|
||
estimated_downtime=estimated_downtime,
|
||
approval_id=approval_id,
|
||
primary_responsibility=primary_responsibility,
|
||
confidence=confidence,
|
||
namespace=namespace,
|
||
signoz_metrics=signoz_metrics,
|
||
signoz_trace_url=signoz_trace_url,
|
||
auto_tuning_command=auto_tuning_command,
|
||
)
|
||
|
||
# 格式化訊息
|
||
text = message.format()
|
||
|
||
# 建立按鈕 (含自動調優)
|
||
keyboard = self._build_inline_keyboard(
|
||
approval_id=approval_id,
|
||
include_auto_tuning=bool(auto_tuning_command),
|
||
auto_tuning_command=auto_tuning_command,
|
||
)
|
||
|
||
# 發送訊息
|
||
payload = {
|
||
"chat_id": self.chat_id,
|
||
"text": text,
|
||
"parse_mode": "HTML",
|
||
"reply_markup": keyboard,
|
||
"disable_web_page_preview": True, # 避免 SignOz URL 預覽
|
||
}
|
||
|
||
logger.info(
|
||
"telegram_approval_card_sending",
|
||
approval_id=approval_id,
|
||
risk_level=risk_level,
|
||
resource=resource_name,
|
||
signoz_integrated=signoz_metrics is not None,
|
||
auto_tuning_available=bool(auto_tuning_command),
|
||
)
|
||
|
||
result = await self._send_request("sendMessage", payload)
|
||
|
||
logger.info(
|
||
"telegram_approval_card_sent",
|
||
approval_id=approval_id,
|
||
message_id=result.get("result", {}).get("message_id"),
|
||
)
|
||
|
||
return result
|
||
|
||
async def handle_callback(
|
||
self,
|
||
callback_query_id: str,
|
||
callback_data: str,
|
||
user_id: int,
|
||
message_id: int,
|
||
original_text: str = "",
|
||
username: str = "",
|
||
) -> dict:
|
||
"""
|
||
處理簽核/調優回調
|
||
|
||
Args:
|
||
callback_query_id: Telegram Callback Query ID
|
||
callback_data: Callback Data (包含 nonce)
|
||
user_id: Telegram User ID
|
||
message_id: 原始訊息 ID
|
||
original_text: 原始卡片內容 (用於保留上下文)
|
||
username: 簽核者使用者名稱
|
||
|
||
Returns:
|
||
dict: 處理結果 {action, approval_id, user, auto_tuning_result?}
|
||
"""
|
||
try:
|
||
# ===================================================================
|
||
# Step 1: 安全驗證 (白名單 + Nonce)
|
||
# ===================================================================
|
||
parsed = self._security.parse_callback_data(callback_data)
|
||
action = parsed["action"]
|
||
approval_id = parsed["approval_id"]
|
||
nonce = parsed["nonce"]
|
||
|
||
# 驗證使用者 + Nonce
|
||
user = await self._security.verify_callback(
|
||
user_id=user_id,
|
||
callback_id=callback_query_id,
|
||
nonce=nonce,
|
||
)
|
||
|
||
# ===================================================================
|
||
# Step 2: 處理自動調優 (Shadow Mode)
|
||
# ===================================================================
|
||
auto_tuning_result = None
|
||
if action == "tune":
|
||
auto_tuning_result = await self._handle_auto_tuning(
|
||
approval_id=approval_id,
|
||
user_id=user_id,
|
||
username=username,
|
||
)
|
||
# 回應 Callback Query
|
||
await self._answer_callback(
|
||
callback_query_id,
|
||
"tune",
|
||
text="⚡ 調優指令已記錄 (Shadow Mode)",
|
||
)
|
||
# 更新訊息
|
||
await self._update_message_after_action(
|
||
message_id=message_id,
|
||
action="tune",
|
||
username=username,
|
||
original_text=original_text,
|
||
extra_info=auto_tuning_result.get("command", ""),
|
||
)
|
||
|
||
return {
|
||
"action": action,
|
||
"approval_id": approval_id,
|
||
"user": user,
|
||
"success": True,
|
||
"auto_tuning_result": auto_tuning_result,
|
||
}
|
||
|
||
# ===================================================================
|
||
# Step 3: 回應 Callback Query (簽核/拒絕)
|
||
# ===================================================================
|
||
await self._answer_callback(callback_query_id, action)
|
||
|
||
# ===================================================================
|
||
# Step 4: 更新訊息 (保留原始內容 + 簽核鋼印)
|
||
# ===================================================================
|
||
await self._update_message_after_action(
|
||
message_id=message_id,
|
||
action=action,
|
||
username=username,
|
||
original_text=original_text,
|
||
)
|
||
|
||
logger.info(
|
||
"telegram_callback_processed",
|
||
action=action,
|
||
approval_id=approval_id,
|
||
user_id=user_id,
|
||
)
|
||
|
||
return {
|
||
"action": action,
|
||
"approval_id": approval_id,
|
||
"user": user,
|
||
"success": True,
|
||
}
|
||
|
||
except UserNotWhitelistedError as e:
|
||
logger.warning("telegram_callback_denied", error=str(e), user_id=user_id)
|
||
await self._answer_callback(
|
||
callback_query_id,
|
||
"denied",
|
||
text="⛔ 您沒有簽核權限",
|
||
)
|
||
return {"success": False, "error": str(e)}
|
||
|
||
except NonceReplayError as e:
|
||
logger.warning("telegram_callback_replay", error=str(e))
|
||
await self._answer_callback(
|
||
callback_query_id,
|
||
"replay",
|
||
text="⚠️ 此操作已處理過",
|
||
)
|
||
return {"success": False, "error": str(e)}
|
||
|
||
except Exception as e:
|
||
logger.error("telegram_callback_error", error=str(e))
|
||
await self._answer_callback(
|
||
callback_query_id,
|
||
"error",
|
||
text="❌ 處理失敗",
|
||
)
|
||
return {"success": False, "error": str(e)}
|
||
|
||
async def _handle_auto_tuning(
|
||
self,
|
||
approval_id: str,
|
||
user_id: int,
|
||
username: str,
|
||
) -> dict:
|
||
"""
|
||
處理自動調優請求 (Shadow Mode)
|
||
|
||
統帥鐵律: Shadow Mode 下嚴禁實際執行 K8s 命令
|
||
|
||
Args:
|
||
approval_id: 簽核單 ID
|
||
user_id: 執行者 Telegram ID
|
||
username: 執行者名稱
|
||
|
||
Returns:
|
||
dict: 調優結果
|
||
"""
|
||
try:
|
||
# Shadow Mode: 僅記錄調優請求
|
||
# 實際生產環境需從 ApprovalRecord 取得完整調優指令
|
||
# Shadow Mode: 僅記錄調優請求
|
||
# 實際生產環境需從 ApprovalRecord 取得完整調優指令
|
||
log_message = f"[SHADOW MODE] 自動調優請求 - 簽核單: {approval_id}"
|
||
|
||
if settings.SHADOW_MODE_ENABLED:
|
||
logger.info(
|
||
"shadow_mode_auto_tuning_triggered",
|
||
approval_id=approval_id,
|
||
user_id=user_id,
|
||
username=username,
|
||
shadow_mode=True,
|
||
)
|
||
print(f"\n{'='*60}")
|
||
print("[SHADOW MODE] AI 生成的調優指令請求")
|
||
print(f"簽核單: {approval_id}")
|
||
print(f"執行者: @{username} (ID: {user_id})")
|
||
print(f"時間: {datetime.now(UTC).isoformat()}")
|
||
print("狀態: 僅記錄,未實際執行")
|
||
print(f"{'='*60}\n")
|
||
|
||
return {
|
||
"executed": False,
|
||
"shadow_mode": True,
|
||
"approval_id": approval_id,
|
||
"triggered_by": username,
|
||
"command": "kubectl command logged (see server logs)",
|
||
"log": log_message,
|
||
}
|
||
else:
|
||
logger.warning(
|
||
"auto_tuning_blocked_not_shadow_mode",
|
||
approval_id=approval_id,
|
||
message="Production execution requires multi-sig approval",
|
||
)
|
||
return {
|
||
"executed": False,
|
||
"shadow_mode": False,
|
||
"approval_id": approval_id,
|
||
"error": "Production execution requires multi-sig approval",
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error("auto_tuning_error", error=str(e), approval_id=approval_id)
|
||
return {
|
||
"executed": False,
|
||
"error": str(e),
|
||
}
|
||
|
||
async def _answer_callback(
|
||
self,
|
||
callback_query_id: str,
|
||
action: str,
|
||
text: str | None = None,
|
||
) -> None:
|
||
"""回應 Callback Query"""
|
||
if text is None:
|
||
if action == "approve":
|
||
text = "✅ 已簽核"
|
||
elif action == "reject":
|
||
text = "❌ 已拒絕"
|
||
elif action == "tune":
|
||
text = "⚡ 調優中..."
|
||
else:
|
||
text = "✓ 已處理"
|
||
|
||
await self._send_request("answerCallbackQuery", {
|
||
"callback_query_id": callback_query_id,
|
||
"text": text,
|
||
"show_alert": False,
|
||
})
|
||
|
||
async def _update_message_after_action(
|
||
self,
|
||
message_id: int,
|
||
action: str,
|
||
username: str,
|
||
original_text: str,
|
||
extra_info: str = "",
|
||
) -> None:
|
||
"""
|
||
更新訊息: 保留原始卡片內容 + 簽核/調優鋼印
|
||
|
||
UX 要求:
|
||
- 嚴禁覆蓋原始內容
|
||
- 必須在底部加上分隔線與簽核狀態
|
||
- 移除所有按鈕
|
||
"""
|
||
# 構建鋼印
|
||
if action == "approve":
|
||
stamp = f"✅ 已由 @{username} 授權執行"
|
||
elif action == "reject":
|
||
stamp = f"❌ 已由 @{username} 拒絕執行"
|
||
elif action == "tune":
|
||
stamp = f"⚡ 已由 @{username} 觸發自動調優 (Shadow Mode)"
|
||
if extra_info:
|
||
stamp += "\n📝 指令已記錄"
|
||
else:
|
||
stamp = f"✓ 已由 @{username} 處理"
|
||
|
||
# 組合: 原始內容 + 分隔線 + 鋼印
|
||
separator = "──────────────"
|
||
updated_text = f"{original_text}\n{separator}\n{stamp}"
|
||
|
||
# 使用 editMessageText 同時更新內容並移除按鈕
|
||
await self._send_request("editMessageText", {
|
||
"chat_id": self.chat_id,
|
||
"message_id": message_id,
|
||
"text": updated_text,
|
||
"parse_mode": "HTML",
|
||
"reply_markup": {"inline_keyboard": []},
|
||
"disable_web_page_preview": True,
|
||
})
|
||
|
||
async def send_notification(
|
||
self,
|
||
text: str,
|
||
parse_mode: str = "HTML",
|
||
) -> dict:
|
||
"""
|
||
發送純文字通知
|
||
|
||
Args:
|
||
text: 訊息內容
|
||
parse_mode: 解析模式
|
||
|
||
Returns:
|
||
dict: API 回應
|
||
"""
|
||
payload = {
|
||
"chat_id": self.chat_id,
|
||
"text": text[:500], # SOUL.md 字數限制
|
||
"parse_mode": parse_mode,
|
||
}
|
||
|
||
return await self._send_request("sendMessage", payload)
|
||
|
||
async def close(self) -> None:
|
||
"""關閉 Gateway"""
|
||
# 停止 Long Polling
|
||
self._polling_active = False
|
||
if self._polling_task and not self._polling_task.done():
|
||
self._polling_task.cancel()
|
||
try:
|
||
await self._polling_task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
self._polling_task = None
|
||
|
||
if self._http_client:
|
||
await self._http_client.aclose()
|
||
self._http_client = None
|
||
self._initialized = False
|
||
logger.info("telegram_gateway_closed")
|
||
|
||
# =========================================================================
|
||
# Long Polling 實作 (Phase 5 內網修復)
|
||
# =========================================================================
|
||
|
||
async def start_long_polling(self) -> None:
|
||
"""
|
||
啟動 Long Polling 背景任務
|
||
|
||
取代 Webhook 模式,適用於內網環境
|
||
統帥鐵律: 內網無法接收外部 Webhook,必須主動輪詢
|
||
"""
|
||
if not self._initialized:
|
||
success = await self.initialize()
|
||
if not success:
|
||
logger.error("telegram_long_polling_failed", reason="Gateway not initialized")
|
||
return
|
||
|
||
if self._polling_active:
|
||
logger.warning("telegram_long_polling_already_running")
|
||
return
|
||
|
||
# 先刪除 Webhook,確保可以使用 getUpdates
|
||
await self._delete_webhook()
|
||
|
||
self._polling_active = True
|
||
self._last_update_id = 0
|
||
self._polling_task = asyncio.create_task(self._polling_loop())
|
||
|
||
logger.info(
|
||
"telegram_long_polling_started",
|
||
timeout=LONG_POLLING_TIMEOUT,
|
||
chat_id=self.chat_id[:10] + "..." if self.chat_id else "N/A",
|
||
)
|
||
|
||
async def _delete_webhook(self) -> None:
|
||
"""
|
||
刪除現有 Webhook (切換至 Long Polling 模式)
|
||
|
||
統帥鐵律: Webhook 和 Long Polling 不能共存
|
||
必須先刪除 Webhook 才能使用 getUpdates
|
||
"""
|
||
if not self._http_client:
|
||
return
|
||
|
||
try:
|
||
# Step 1: 刪除 Webhook
|
||
url = f"{self.api_url}/deleteWebhook"
|
||
response = await self._http_client.post(url, json={"drop_pending_updates": True})
|
||
result = response.json()
|
||
|
||
if result.get("ok"):
|
||
logger.info(
|
||
"telegram_webhook_deleted",
|
||
description=result.get("description", "Webhook deleted"),
|
||
)
|
||
else:
|
||
logger.warning(
|
||
"telegram_webhook_delete_failed",
|
||
error=result.get("description"),
|
||
)
|
||
|
||
# Step 2: 等待 Telegram 伺服器同步 (避免 409 Conflict)
|
||
await asyncio.sleep(1)
|
||
|
||
# Step 3: 驗證 Webhook 狀態
|
||
info_url = f"{self.api_url}/getWebhookInfo"
|
||
info_response = await self._http_client.get(info_url)
|
||
info_result = info_response.json()
|
||
|
||
webhook_url = info_result.get("result", {}).get("url", "")
|
||
if webhook_url:
|
||
logger.warning(
|
||
"telegram_webhook_still_active",
|
||
url=webhook_url[:50],
|
||
)
|
||
else:
|
||
logger.info("telegram_webhook_confirmed_deleted")
|
||
|
||
except Exception as e:
|
||
logger.error("telegram_webhook_delete_error", error=str(e))
|
||
|
||
async def _polling_loop(self) -> None:
|
||
"""
|
||
Long Polling 主循環
|
||
|
||
使用 getUpdates API 持續監聽 Telegram 更新
|
||
"""
|
||
logger.info("[Telegram] Long polling started - 神經已接通,等待統帥指令...")
|
||
|
||
while self._polling_active:
|
||
try:
|
||
updates = await self._get_updates()
|
||
|
||
for update in updates:
|
||
await self._process_update(update)
|
||
|
||
except asyncio.CancelledError:
|
||
logger.info("telegram_long_polling_cancelled")
|
||
break
|
||
|
||
except httpx.TimeoutException:
|
||
# Long polling timeout 是正常的,繼續下一輪
|
||
continue
|
||
|
||
except httpx.HTTPStatusError as e:
|
||
if e.response.status_code == 409:
|
||
# 409 Conflict: 另一個 getUpdates session 殘留
|
||
# 可能是舊 pod/host 的連線未清除,等待後重試
|
||
logger.warning(
|
||
"telegram_polling_conflict",
|
||
status=409,
|
||
message="409 衝突,等待殘留 session 過期...",
|
||
)
|
||
# 等待 LONG_POLLING_TIMEOUT 讓舊 session 自然過期
|
||
await asyncio.sleep(LONG_POLLING_TIMEOUT + 5)
|
||
else:
|
||
logger.error("telegram_polling_http_error", status=e.response.status_code)
|
||
await asyncio.sleep(LONG_POLLING_RETRY_DELAY)
|
||
|
||
except Exception as e:
|
||
logger.error("telegram_polling_error", error=str(e))
|
||
# 錯誤後等待再重試
|
||
await asyncio.sleep(LONG_POLLING_RETRY_DELAY)
|
||
|
||
logger.info("telegram_long_polling_stopped")
|
||
|
||
async def _get_updates(self) -> list[dict]:
|
||
"""
|
||
呼叫 Telegram getUpdates API
|
||
|
||
Returns:
|
||
list[dict]: 更新列表
|
||
"""
|
||
if not self._http_client:
|
||
return []
|
||
|
||
url = f"{self.api_url}/getUpdates"
|
||
payload = {
|
||
"offset": self._last_update_id + 1,
|
||
"timeout": LONG_POLLING_TIMEOUT,
|
||
"allowed_updates": ["callback_query"], # 僅監聽按鈕點擊
|
||
}
|
||
|
||
response = await self._http_client.post(
|
||
url,
|
||
json=payload,
|
||
timeout=LONG_POLLING_TIMEOUT + 10, # 比 API timeout 多一點
|
||
)
|
||
response.raise_for_status()
|
||
result = response.json()
|
||
|
||
if not result.get("ok"):
|
||
raise TelegramGatewayError(f"getUpdates failed: {result.get('description')}")
|
||
|
||
updates = result.get("result", [])
|
||
|
||
# 更新 offset
|
||
if updates:
|
||
self._last_update_id = updates[-1]["update_id"]
|
||
|
||
return updates
|
||
|
||
async def _process_update(self, update: dict) -> None:
|
||
"""
|
||
處理單個 Telegram Update
|
||
|
||
Args:
|
||
update: Telegram Update 物件
|
||
"""
|
||
update_id = update.get("update_id")
|
||
callback_query = update.get("callback_query")
|
||
|
||
if not callback_query:
|
||
logger.debug("telegram_update_ignored", update_id=update_id, reason="not callback_query")
|
||
return
|
||
|
||
# 解析 callback_query
|
||
callback_query_id = callback_query.get("id")
|
||
callback_data = callback_query.get("data")
|
||
user = callback_query.get("from", {})
|
||
user_id = user.get("id")
|
||
username = user.get("username") or user.get("first_name") or str(user_id)
|
||
message = callback_query.get("message", {})
|
||
message_id = message.get("message_id")
|
||
original_text = message.get("text", "")
|
||
|
||
if not all([callback_query_id, callback_data, user_id]):
|
||
logger.warning("telegram_callback_invalid", update_id=update_id)
|
||
return
|
||
|
||
logger.info(
|
||
"telegram_callback_received",
|
||
update_id=update_id,
|
||
user_id=user_id,
|
||
username=username,
|
||
)
|
||
|
||
# 呼叫現有的 handle_callback 邏輯
|
||
result = await self.handle_callback(
|
||
callback_query_id=callback_query_id,
|
||
callback_data=callback_data,
|
||
user_id=user_id,
|
||
message_id=message_id,
|
||
original_text=original_text,
|
||
username=username,
|
||
)
|
||
|
||
if result.get("success"):
|
||
# 執行資料庫更新 (簽核/拒絕)
|
||
await self._execute_approval_action(
|
||
action=result["action"],
|
||
approval_id=result["approval_id"],
|
||
user_id=user_id,
|
||
username=username,
|
||
message_id=message_id,
|
||
)
|
||
|
||
async def _execute_approval_action(
|
||
self,
|
||
action: str,
|
||
approval_id: str,
|
||
user_id: int,
|
||
username: str,
|
||
message_id: int,
|
||
) -> None:
|
||
"""
|
||
執行簽核動作 (更新資料庫)
|
||
|
||
Args:
|
||
action: approve/reject/tune
|
||
approval_id: 簽核單 ID
|
||
user_id: Telegram User ID
|
||
username: 使用者名稱
|
||
message_id: 訊息 ID
|
||
"""
|
||
from uuid import UUID
|
||
|
||
from src.models.approval import Signature, SignatureSource
|
||
from src.services.approval_db import get_approval_service
|
||
|
||
try:
|
||
service = get_approval_service()
|
||
|
||
if action == "approve":
|
||
signature = Signature(
|
||
signer_id=f"tg_{user_id}",
|
||
signer_name=username,
|
||
comment="Telegram 簽核 (Long Polling)",
|
||
source=SignatureSource.TELEGRAM,
|
||
telegram_user_id=user_id,
|
||
telegram_message_id=message_id,
|
||
)
|
||
|
||
approval = await service.add_signature(UUID(approval_id), signature)
|
||
|
||
if approval:
|
||
logger.info(
|
||
"telegram_approval_signed_via_polling",
|
||
approval_id=approval_id,
|
||
user_id=user_id,
|
||
status=approval.status.value,
|
||
)
|
||
print(f"\n{'='*60}")
|
||
print("✅ 統帥已授權執行!")
|
||
print(f"簽核單: {approval_id}")
|
||
print(f"簽核者: @{username} (ID: {user_id})")
|
||
print(f"狀態: {approval.status.value}")
|
||
print(f"時間: {datetime.now(UTC).isoformat()}")
|
||
print(f"{'='*60}\n")
|
||
|
||
elif action == "reject":
|
||
approval = await service.reject(
|
||
UUID(approval_id),
|
||
rejector_id=f"tg_{user_id}",
|
||
rejector_name=username,
|
||
reason="Telegram 拒絕 (Long Polling)",
|
||
)
|
||
|
||
if approval:
|
||
logger.info(
|
||
"telegram_approval_rejected_via_polling",
|
||
approval_id=approval_id,
|
||
user_id=user_id,
|
||
)
|
||
print(f"\n{'='*60}")
|
||
print("❌ 統帥已拒絕執行!")
|
||
print(f"簽核單: {approval_id}")
|
||
print(f"拒絕者: @{username}")
|
||
print(f"{'='*60}\n")
|
||
|
||
elif action == "tune":
|
||
# 自動調優已在 handle_callback 中處理
|
||
logger.info(
|
||
"telegram_auto_tuning_via_polling",
|
||
approval_id=approval_id,
|
||
user_id=user_id,
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(
|
||
"telegram_approval_action_failed",
|
||
action=action,
|
||
approval_id=approval_id,
|
||
error=str(e),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Phase 6.5: 心跳監控方法
|
||
# =============================================================================
|
||
|
||
async def send_heartbeat(self) -> bool:
|
||
"""
|
||
發送心跳訊息 (系統狀態摘要)
|
||
|
||
每 30 分鐘執行一次,證明告警鏈路正常運作
|
||
"""
|
||
try:
|
||
if not self._initialized:
|
||
await self.initialize()
|
||
|
||
now = datetime.now(UTC)
|
||
|
||
# 計算上次訊息時間
|
||
last_msg_ago = "N/A"
|
||
if self._last_message_time:
|
||
delta = now - self._last_message_time
|
||
minutes = int(delta.total_seconds() // 60)
|
||
if minutes < 60:
|
||
last_msg_ago = f"{minutes} 分鐘前"
|
||
else:
|
||
hours = minutes // 60
|
||
last_msg_ago = f"{hours} 小時前"
|
||
|
||
# 心跳訊息
|
||
text = f"""💓 <b>AWOOOI 心跳</b>
|
||
━━━━━━━━━━━━━━━━
|
||
⏰ {now.strftime('%Y-%m-%d %H:%M:%S')} UTC
|
||
📡 告警鏈路: ✅ 正常
|
||
📨 上次訊息: {last_msg_ago}
|
||
━━━━━━━━━━━━━━━━
|
||
<i>每 30 分鐘自動發送</i>"""
|
||
|
||
await self.send_notification(text)
|
||
self._last_message_time = now
|
||
|
||
logger.info("telegram_heartbeat_sent")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error("telegram_heartbeat_failed", error=str(e))
|
||
return False
|
||
|
||
async def start_heartbeat_monitor(
|
||
self,
|
||
heartbeat_interval_minutes: int = 30,
|
||
silence_threshold_hours: int = 2,
|
||
) -> None:
|
||
"""
|
||
啟動心跳監控背景任務
|
||
|
||
Args:
|
||
heartbeat_interval_minutes: 心跳間隔 (預設 30 分鐘)
|
||
silence_threshold_hours: 沉默告警閾值 (預設 2 小時)
|
||
"""
|
||
if self._heartbeat_active:
|
||
logger.warning("telegram_heartbeat_already_running")
|
||
return
|
||
|
||
self._heartbeat_active = True
|
||
self._heartbeat_task = asyncio.create_task(
|
||
self._heartbeat_loop(heartbeat_interval_minutes, silence_threshold_hours)
|
||
)
|
||
|
||
logger.info(
|
||
"telegram_heartbeat_monitor_started",
|
||
interval_minutes=heartbeat_interval_minutes,
|
||
silence_threshold_hours=silence_threshold_hours,
|
||
)
|
||
|
||
async def _heartbeat_loop(
|
||
self,
|
||
interval_minutes: int,
|
||
silence_hours: int,
|
||
) -> None:
|
||
"""心跳監控循環"""
|
||
interval_seconds = interval_minutes * 60
|
||
silence_seconds = silence_hours * 3600
|
||
|
||
while self._heartbeat_active:
|
||
try:
|
||
# 1. 檢查沉默告警
|
||
if self._last_message_time:
|
||
silence_duration = (datetime.now(UTC) - self._last_message_time).total_seconds()
|
||
if silence_duration > silence_seconds:
|
||
# 發送沉默告警
|
||
hours = int(silence_duration // 3600)
|
||
await self.send_notification(
|
||
f"⚠️ <b>沉默告警</b>\n\n"
|
||
f"Telegram 已 <b>{hours} 小時</b>沒有收到任何訊息!\n"
|
||
f"請檢查告警鏈路是否正常運作。"
|
||
)
|
||
logger.warning(
|
||
"telegram_silence_alert",
|
||
silence_hours=hours,
|
||
)
|
||
|
||
# 2. 發送心跳
|
||
await self.send_heartbeat()
|
||
|
||
# 3. 等待下一次
|
||
await asyncio.sleep(interval_seconds)
|
||
|
||
except asyncio.CancelledError:
|
||
break
|
||
except Exception as e:
|
||
logger.error("telegram_heartbeat_loop_error", error=str(e))
|
||
await asyncio.sleep(60) # 錯誤後等待 1 分鐘重試
|
||
|
||
async def stop_heartbeat_monitor(self) -> None:
|
||
"""停止心跳監控"""
|
||
self._heartbeat_active = False
|
||
if self._heartbeat_task and not self._heartbeat_task.done():
|
||
self._heartbeat_task.cancel()
|
||
try:
|
||
await self._heartbeat_task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
self._heartbeat_task = None
|
||
logger.info("telegram_heartbeat_monitor_stopped")
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_gateway: TelegramGateway | None = None
|
||
|
||
|
||
def get_telegram_gateway() -> TelegramGateway:
|
||
"""取得全域 TelegramGateway 實例"""
|
||
global _gateway
|
||
if _gateway is None:
|
||
_gateway = TelegramGateway()
|
||
return _gateway
|