awoooi/apps/api/src/services/telegram_gateway.py

"""
Telegram Gateway - OpenClaw 行動戰情室 + SignOz 整合
====================================================
Phase 5.4.3 & 5.4.4: Telegram 推送與簽核接收
統帥校正: SignOz 為唯一全能視力中心

Features:
- 推送待簽核卡片到 Telegram (含 SignOz 指標)
- 動態 SignOz Trace URL (告警前後 5 分鐘)
- 自動調優按鈕 (Shadow Mode: 僅日誌輸出)
- 接收統帥簽核回調
- SOUL.md 訊息壓縮原則 100% 遵守

SOUL.md 鐵律 (4.1 Telegram 訊息壓縮原則):
- 狀態標籤: 20 字元
- 資源名稱: 50 字元
- 根因摘要: 100 字元
- 建議行動: 50 字元
- 總長度: 800 字元 (v7.0 擴展以容納 SignOz 區塊)

修復紀錄:
- 2026-03-26 Claude Code: 修復 HTML 解析錯誤 (Can't parse entities)
"""

import asyncio
import hashlib
import html
import json
import os
import re
from dataclasses import dataclass
from datetime import UTC, datetime
from urllib.parse import quote
from uuid import NAMESPACE_URL, UUID, uuid5

import httpx
import structlog
from opentelemetry import trace

from src.core.config import settings
from src.core.redis_client import get_redis
from src.services.security_interceptor import (
    NonceReplayError,
    UserNotWhitelistedError,
    get_security_interceptor,
)
from src.services.chat_manager import get_chat_manager

# =============================================================================
# Snooze/Silence Redis Keys (2026-03-27 P1 優化)
# =============================================================================
SNOOZE_KEY_PREFIX = "telegram_snooze:"  # {approval_id} -> 稍後提醒
SILENCE_KEY_PREFIX = "telegram_silence:"  # {resource_name} -> 靜默
SNOOZE_TTL_SECONDS = 30 * 60  # 30 分鐘
SILENCE_TTL_SECONDS = 60 * 60  # 1 小時
INCIDENT_UPDATE_DEDUP_PREFIX = "awoooi:tg_update_dedup:"  # {incident_id}:{status_hash}
INCIDENT_UPDATE_DEDUP_TTL_SECONDS = 5 * 60  # 5 分鐘內相同狀態不重複洗版
INCIDENT_UPDATE_GLOBAL_FAILURE_DEDUP_PREFIX = "awoooi:tg_update_global_failure_dedup:"
INCIDENT_UPDATE_GLOBAL_FAILURE_DEDUP_TTL_SECONDS = 10 * 60  # 相同失敗摘要跨 incident 10 分鐘只推一次
GROUPED_ALERT_DIGEST_DEDUP_PREFIX = "awoooi:tg_group_digest:"  # {group_key}
GROUPED_ALERT_DIGEST_DEDUP_TTL_SECONDS = 5 * 60  # 同一告警群組 5 分鐘只推一則 digest

# 2026-04-01 Claude Code: Long Polling 分散式 Leader Election
# 防止多 Pod 同時 getUpdates → 409 Conflict 互搶問題
POLLING_LEADER_KEY = "telegram:polling:leader"
POLLING_LEADER_TTL = 45   # seconds - Pod 宕掉後 45s 自動轉移
POLLING_LEADER_RENEW = 20  # seconds - 每 20s 續約
POLLING_LEADER_WATCH = 30  # seconds - 非 Leader Pod 每 30s 嘗試接管

logger = structlog.get_logger(__name__)
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
_CODE_REF_RE = re.compile(r"<code>([0-9a-f]{7,12})</code>", re.IGNORECASE)
_TELEGRAM_HTML_CHUNK_LIMIT = 3600
_AWOOOP_WEB_BASE_URL = "https://awoooi.wooo.work"


def _top_gateway_bucket(
    buckets: list[dict[str, object]],
    field: str,
) -> str | None:
    if not buckets:
        return None
    top = max(buckets, key=lambda row: int(row.get("total") or 0))
    value = top.get(field)
    if value is None:
        return None
    return f"{value} ({top.get('total', 0)})"


def _format_gateway_summary_lines(summary: dict[str, object] | None) -> list[str]:
    if not summary or int(summary.get("total") or 0) <= 0:
        return []

    by_agent = summary.get("by_agent") if isinstance(summary.get("by_agent"), list) else []
    by_tool = summary.get("by_tool") if isinstance(summary.get("by_tool"), list) else []
    by_scope = summary.get("by_scope") if isinstance(summary.get("by_scope"), list) else []
    blockers = summary.get("blockers") if isinstance(summary.get("blockers"), list) else []

    lines = [
        "",
        "🛡️ <b>MCP Gateway</b>",
        (
            "階段: "
            f"<code>{html.escape(str(summary.get('stage') or 'unknown'))}</code>"
            " / "
            f"<code>{html.escape(str(summary.get('stage_status') or 'unknown'))}</code>"
        ),
        (
            "治理: "
            f"first-class <code>{int(summary.get('first_class_total') or 0)}</code> / "
            f"policy <code>{int(summary.get('policy_enforced_total') or 0)}</code> / "
            f"legacy <code>{int(summary.get('legacy_bridge_total') or 0)}</code>"
        ),
    ]

    agent = _top_gateway_bucket(by_agent, "agent_id")
    tool = _top_gateway_bucket(by_tool, "tool_name")
    scope = _top_gateway_bucket(by_scope, "required_scope")
    if agent:
        lines.append(f"Agent: <code>{html.escape(agent)}</code>")
    if tool:
        lines.append(f"Tool: <code>{html.escape(tool)}</code>")
    if scope:
        lines.append(f"Scope: <code>{html.escape(scope)}</code>")
    if blockers:
        lines.append(
            "卡點: "
            + html.escape(", ".join(str(item) for item in blockers[:3]))
        )
    return lines


def _format_automation_quality_lines(quality: dict[str, object] | None) -> list[str]:
    if not quality or quality.get("applicable") is False:
        return []

    facts = quality.get("facts") if isinstance(quality.get("facts"), dict) else {}
    blockers = quality.get("blockers") if isinstance(quality.get("blockers"), list) else []
    verdict = html.escape(str(quality.get("verdict") or "unknown"))
    score = int(quality.get("score") or 0)

    lines = [
        "",
        "🧪 <b>自動化品質</b>",
        f"判定: <code>{verdict}</code> / <code>{score}</code>",
        (
            "執行: "
            f"auto-repair <code>{int(facts.get('auto_repair_execution_records') or 0)}</code> / "
            f"ops <code>{int(facts.get('automation_operation_records') or 0)}</code> / "
            f"verify <code>{html.escape(str(facts.get('verification_result') or 'missing'))}</code>"
        ),
        (
            "證據: "
            f"sensors <code>{int(facts.get('sensors_succeeded') or 0)}/"
            f"{int(facts.get('sensors_attempted') or 0)}</code> / "
            f"gateway <code>{int(facts.get('mcp_gateway_total') or 0)}</code> / "
            f"KM <code>{int(facts.get('knowledge_entries') or 0)}</code>"
        ),
    ]

    if blockers:
        lines.append("缺口: " + html.escape(", ".join(str(item) for item in blockers[:4])))
    return lines


def _format_remediation_history_lines(history: dict[str, object] | None) -> list[str]:
    if not history or int(history.get("total") or 0) <= 0:
        return []

    items = history.get("items") if isinstance(history.get("items"), list) else []
    latest = items[0] if items and isinstance(items[0], dict) else {}
    agent = latest.get("agent_id") or "unknown_agent"
    tool = latest.get("tool_name") or "current_state"
    scope = latest.get("required_scope") or "unknown"
    writes_incident = latest.get("writes_incident_state")
    writes_auto_repair = latest.get("writes_auto_repair_result")

    return [
        "",
        "🧪 <b>ADR-100 補救試跑</b>",
        f"歷史: <code>{int(history.get('total') or 0)}</code> 次",
        (
            "上次: "
            f"<code>{html.escape(str(latest.get('mode') or 'unknown'))}</code> / "
            f"<code>{html.escape(str(latest.get('verification_result_preview') or 'unknown'))}</code>"
        ),
        (
            "MCP: "
            f"<code>{html.escape(str(agent))}/{html.escape(str(tool))}</code> / "
            f"<code>{html.escape(str(scope))}</code>"
        ),
        (
            "寫入: "
            f"incident <code>{html.escape(str(writes_incident))}</code> / "
            f"auto-repair <code>{html.escape(str(writes_auto_repair))}</code>"
        ),
    ]


def _awooop_runs_url_for_incident(incident_id: str) -> str:
    safe_incident_id = quote(str(incident_id or ""), safe="")
    return (
        f"{_AWOOOP_WEB_BASE_URL}/zh-TW/awooop/runs"
        f"?project_id=awoooi&incident_id={safe_incident_id}"
    )


def _awooop_runs_button_row(incident_id: str) -> list[dict[str, str]]:
    if not incident_id:
        return []
    return [{
        "text": "🧭 AwoooP",
        "url": _awooop_runs_url_for_incident(incident_id),
    }]


def _awooop_runs_reply_markup(incident_id: str) -> dict | None:
    row = _awooop_runs_button_row(incident_id)
    if not row:
        return None
    return {"inline_keyboard": [row]}


def _latest_remediation_history_item(history: dict[str, object] | None) -> dict[str, object]:
    if not history:
        return {}
    items = history.get("items") if isinstance(history.get("items"), list) else []
    latest = items[0] if items and isinstance(items[0], dict) else {}
    return latest


def _remediation_evidence_state(history: dict[str, object] | None) -> str:
    """Classify ADR-100 dry-run evidence for first-screen Telegram status."""
    if not history:
        return ""

    total = int(history.get("total") or 0)
    if total <= 0:
        if history.get("status") == "fetch_failed":
            return "fetch_failed"
        return "missing"

    latest = _latest_remediation_history_item(history)
    if latest.get("writes_incident_state") or latest.get("writes_auto_repair_result"):
        return "write_observed"
    if latest.get("allowed") is False or latest.get("success") is False:
        return "blocked"

    safety_level = str(latest.get("safety_level") or "").lower()
    required_scope = str(latest.get("required_scope") or "").lower()
    if safety_level == "read_only" or required_scope == "read":
        return "read_only"
    return "observed"


def _format_remediation_evidence_block(history: dict[str, object] | None) -> str:
    """Compact ADR-100 dry-run evidence for the root ACTION REQUIRED card."""
    if not history:
        return ""

    state = _remediation_evidence_state(history)
    total = int(history.get("total") or 0)
    if total <= 0:
        label = (
            "補救試跑查詢失敗"
            if state == "fetch_failed"
            else "尚無補救試跑紀錄"
        )
        return f"🧪 AI 證據：<code>{html.escape(label)}</code>\n"

    latest = _latest_remediation_history_item(history)
    agent = latest.get("agent_id") or "unknown_agent"
    tool = latest.get("tool_name") or "current_state"
    scope = latest.get("required_scope") or "unknown"
    writes_incident = str(bool(latest.get("writes_incident_state"))).lower()
    writes_auto_repair = str(bool(latest.get("writes_auto_repair_result"))).lower()
    route = f"{agent}/{tool}/{scope}"
    preview = latest.get("verification_result_preview") or "unknown"

    state_label = {
        "read_only": "只讀試跑",
        "write_observed": "有寫入旗標",
        "blocked": "試跑受阻",
        "observed": "已試跑",
    }.get(state, "已試跑")

    return (
        f"🧪 AI 證據：<b>{state_label} {total} 次</b> | "
        f"<code>{html.escape(str(route))}</code>\n"
        f"├ preview：<code>{html.escape(str(preview))}</code>\n"
        f"└ 寫入：incident <code>{writes_incident}</code> / "
        f"auto-repair <code>{writes_auto_repair}</code>\n"
    )


async def _fetch_remediation_summary_for_card(
    *,
    approval_id: str,
    incident_id: str,
) -> dict[str, object] | None:
    if not incident_id:
        return None
    try:
        from src.services.adr100_remediation_service import (
            get_adr100_remediation_service,
        )

        history = await asyncio.wait_for(
            get_adr100_remediation_service().history(
                limit=5,
                incident_id=incident_id,
            ),
            timeout=2.5,
        )
        return history if isinstance(history, dict) else None
    except Exception as remediation_exc:
        logger.debug(
            "telegram_approval_card_remediation_history_fetch_failed",
            approval_id=approval_id,
            incident_id=incident_id,
            error=str(remediation_exc),
        )
        return {
            "schema_version": "adr100_remediation_history_v1",
            "total": 0,
            "items": [],
            "status": "fetch_failed",
        }


def _telegram_html_chunks(lines: list[str], limit: int = _TELEGRAM_HTML_CHUNK_LIMIT) -> list[str]:
    """Split HTML messages by complete lines so Telegram does not receive broken tags."""
    chunks: list[str] = []
    current: list[str] = []
    current_len = 0
    for raw_line in lines:
        line = str(raw_line)
        line_len = len(line) + 1
        if current and current_len + line_len > limit:
            chunks.append("\n".join(current))
            current = []
            current_len = 0
        if line_len > limit:
            chunks.append(_html_safe_plain_chunk(line, limit=limit))
            continue
        current.append(line)
        current_len += line_len
    if current:
        chunks.append("\n".join(current))
    return chunks


def _plain_text_from_html(text: str, limit: int = 3900) -> str:
    """Fallback renderer for Telegram HTML parse failures."""
    plain = re.sub(r"</?[^>]+>", "", text)
    return html.unescape(plain)[:limit]


def _html_safe_plain_chunk(text: str, limit: int) -> str:
    """Render one overlong HTML line as parse-safe text for HTML mode chunks."""
    plain = _plain_text_from_html(text, limit=limit)
    escaped = html.escape(plain)
    if len(escaped) <= limit:
        return escaped
    # Escaping may expand &, <, >. Trim once more after escaping; a partial HTML
    # entity is still plain text to Telegram, while a partial tag is not.
    return escaped[:limit]


def _sanitize_telegram_error(text: str) -> str:
    """遮蔽 Telegram Bot URL 中的 token，避免例外字串污染 log / trace。"""
    return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)


def _is_noisy_failure_update(status_line: str) -> bool:
    """判斷是否屬於容易跨 incident 洗版的失敗摘要。"""
    return (
        "AI 自動修復失敗" in status_line
        or "AI 診斷工具失敗" in status_line
    )


def _extract_incident_id_from_text(text: str) -> str | None:
    """從 Telegram 出站文字擷取 Incident ID。"""
    match = _INCIDENT_ID_RE.search(text or "")
    return match.group(0) if match else None


def _has_reply_context(payload: dict) -> bool:
    return "reply_to_message_id" in payload or "reply_parameters" in payload


def _is_root_action_required_card(text: str) -> bool:
    """主告警卡片本身不自動 reply，避免把新主卡接到舊訊息下。"""
    return "ACTION REQUIRED" in text and "AI 自動化鏈路" in text


def _legacy_outbound_run_id(chat_id: str, provider_message_id: str) -> UUID:
    """Legacy Telegram 發送尚未有 run_id 時，產生穩定 soft run_id 供 Channel Hub 串接。"""
    return uuid5(NAMESPACE_URL, f"awoooi:legacy-telegram:{chat_id}:{provider_message_id}")


def _infer_outbound_message_type(text: str, payload: dict) -> str:
    """將既有 Telegram 訊息映射成 AwoooP outbound_message 的有限分類。"""
    if "RUNBOOK REVIEW" in text or "待審核" in text:
        return "approval_request"
    if _has_reply_context(payload):
        if "失敗" in text or "錯誤" in text or "FAILED" in text:
            return "error"
        return "final"
    if payload.get("reply_markup"):
        return "approval_request"
    if "ACTION REQUIRED" in text or "待審" in text or "審批" in text:
        return "approval_request"
    if "失敗" in text or "錯誤" in text or "FAILED" in text:
        return "error"
    return "final"


def _outbound_payload_hash(payload: dict) -> str:
    """Stable hash for Telegram payload replay without storing raw payload."""
    canonical = json.dumps(payload, ensure_ascii=False, sort_keys=True, default=str)
    return hashlib.sha256(canonical.encode()).hexdigest()


def _reply_markup_summary(payload: dict) -> dict[str, object]:
    """Summarize Telegram buttons without turning callback payloads into policy."""
    reply_markup = payload.get("reply_markup")
    if not isinstance(reply_markup, dict):
        return {"present": False, "button_count": 0}

    buttons: list[dict[str, object]] = []
    for row in reply_markup.get("inline_keyboard") or []:
        if not isinstance(row, list):
            continue
        for button in row:
            if not isinstance(button, dict):
                continue
            callback_data = str(button.get("callback_data") or "")
            buttons.append({
                "text": str(button.get("text") or ""),
                "callback_prefix": callback_data.split(":", 1)[0] if callback_data else "",
                "has_url": bool(button.get("url")),
            })

    return {
        "present": True,
        "button_count": len(buttons),
        "buttons": buttons[:12],
        "truncated": len(buttons) > 12,
    }


def _outbound_source_envelope(method: str, payload: dict) -> dict[str, object]:
    """Build a redaction-friendly source envelope for Channel Hub replay."""
    text = str(payload.get("text") or payload.get("caption") or "")
    incident_ids = sorted(set(_INCIDENT_ID_RE.findall(text)))
    code_refs = sorted(set(match.group(1) for match in _CODE_REF_RE.finditer(text)))
    return {
        "adapter": "legacy_telegram_gateway",
        "method": method,
        "payload_sha256": _outbound_payload_hash(payload),
        "payload_keys": sorted(str(key) for key in payload.keys()),
        "parse_mode": payload.get("parse_mode"),
        "disable_web_page_preview": payload.get("disable_web_page_preview"),
        "has_reply_context": _has_reply_context(payload),
        "reply_markup": _reply_markup_summary(payload),
        "source_refs": {
            "incident_ids": incident_ids[:20],
            "code_refs": code_refs[:20],
        },
    }

# 2026-04-27 Claude Sonnet 4.6: B3 — LLM 動態 Telegram 按鈕 Feature Flag
# true  → 優先使用 ActionPlan.recommended_actions 動態生成按鈕
# false → 維持現有 callback_action_spec.yaml 路徑（預設，向下相容）
USE_LLM_DYNAMIC_BUTTONS = os.environ.get("USE_LLM_DYNAMIC_BUTTONS", "false").lower() == "true"

# =============================================================================
# OTEL Tracer (Phase C P1 可觀測性)
# 2026-03-30 Claude Code: 新增 Telegram Gateway 追蹤
# =============================================================================
_tracer = trace.get_tracer("awoooi.telegram_gateway", "1.0.0")


# =============================================================================
# 智能截斷 (2026-04-17 ogt + Claude Sonnet 4.6 — ADR-075 修復)
# 根因：粗暴 [:N] 在括號/中文字中間切斷 → 幽靈截斷「質疑：無（通」
# 規則：在完整句子邊界截斷；若無邊界則補 …[截斷] 標記
# =============================================================================

def _smart_truncate(text: str, limit: int, suffix: str = "…[截斷]") -> str:
    """
    在句子邊界截斷文字，防止破壞括號閉合或切斷中文字。

    優先序：。！？ > ； > ，、, > 空白
    若在合理位置（>50% limit）找到邊界 → 在邊界後截斷
    否則 → 在 limit 處截斷並加 suffix
    """
    if len(text) <= limit:
        return text
    # 依優先序嘗試各邊界字元
    for boundary in ("。", "！", "？", "；", "，", "、", ",", " "):
        pos = text.rfind(boundary, 0, limit)
        if pos >= limit // 2:           # 至少在一半後才算有效邊界
            return text[:pos + len(boundary)] + suffix
    # 無邊界：硬截 + 標記
    return text[:limit] + suffix


def _format_resolved_guard_stamp(resolved_at: datetime | None) -> str:
    """格式化 ADR-071-D 已解決狀態守衛文案。"""
    if resolved_at is None:
        return "✅ 此事件已解決"
    return f"✅ 此事件已於 {resolved_at.strftime('%Y-%m-%d %H:%M')} 解決"


# =============================================================================
# Long Polling 配置 (Phase 5 內網修復)
# =============================================================================
LONG_POLLING_TIMEOUT = 30  # getUpdates timeout (秒)
LONG_POLLING_RETRY_DELAY = 5  # 錯誤後重試延遲 (秒)


# =============================================================================
# SignOz Metrics Block (v7.0)
# =============================================================================

@dataclass
class SignOzMetricsBlock:
    """
    SignOz 指標區塊 (嵌入 Telegram 卡片)

    格式:
    📊 SignOz 指標
    ├ RPS: 150.2 📈
    ├ Error: 🟢 0.5%
    └ P99: 245ms ➡️
    """
    rps: float = 0.0
    rps_trend: str = "stable"  # up, down, stable
    error_rate: float = 0.0
    p99_latency_ms: float = 0.0
    latency_trend: str = "stable"
    trace_url: str = ""

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        trend_emoji = {"up": "📈", "down": "📉", "stable": "➡️"}
        error_emoji = "🟢" if self.error_rate < 1 else ("🟡" if self.error_rate < 5 else "🔴")

        return (
            f"📊 <b>SignOz 指標</b>\n"
            f"├ RPS: <code>{self.rps:.1f}</code> {trend_emoji.get(self.rps_trend, '➡️')}\n"
            f"├ Error: {error_emoji} <code>{self.error_rate:.2f}%</code>\n"
            f"└ P99: <code>{self.p99_latency_ms:.0f}ms</code> {trend_emoji.get(self.latency_trend, '➡️')}"
        )


# =============================================================================
# SOUL.md 訊息格式定義 (v7.0 + SignOz)
# =============================================================================

@dataclass
class TelegramMessage:
    """
    Telegram 訊息結構 (SOUL.md 4.1 + v7.0 SignOz 整合)

    格式:
    ═══════════════════════════
    🚨 CRITICAL | harbor-core
    ═══════════════════════════
    📋 INC-20260321-0001
    🎯 資源: harbor-core-7d4b8c9f5
    ━━━━━━━━━━━━━━━━━━━
    🤖 AI 仲裁判定
    👥 責任: BE (後端)
    📊 信心: 🟢 88%
    💡 原因: JVM Heap 配置不當
    ━━━━━━━━━━━━━━━━━━━
    📊 SignOz 指標
    ├ RPS: 150.2 📈
    ├ Error: 🟢 0.5%
    └ P99: 245ms ➡️
    ━━━━━━━━━━━━━━━━━━━
    🔧 建議: 刪除 Pod
    ⏱️ 停機: ~30s
    🔍 SignOz Trace (±5min)

    [✅ 簽核] [❌ 拒絕] [⚡ 自動調優]
    """
    status_emoji: str           # 🚨, ⚠️, ℹ️
    risk_level: str             # CRITICAL, MEDIUM, LOW
    resource_name: str          # Pod/Deployment 名稱 (max 50)
    root_cause: str             # 根因摘要 (max 100)
    suggested_action: str       # 建議操作 (max 50)
    estimated_downtime: str     # 預計停機時間
    approval_id: str            # 簽核單 ID
    # v6.0 AI 仲裁欄位
    incident_id: str = ""       # 事件編號 INC-YYYYMMDD-XXXX
    primary_responsibility: str = "COLLAB"  # FE/BE/INFRA/DB/COLLAB
    confidence: float = 0.0     # 信心度 0.0-1.0
    namespace: str = "default"  # K8s namespace
    # v7.0 SignOz 整合
    signoz_metrics: SignOzMetricsBlock | None = None
    signoz_trace_url: str = ""  # 動態時間參數 URL
    auto_tuning_command: str = ""  # kubectl 調優指令
    # 2026-03-29 ogt: AI Token/Cost 追蹤
    ai_tokens: int = 0          # LLM Token 使用量
    ai_cost: float = 0.0        # LLM 成本 (USD)
    # 2026-03-29 ogt: ADR-037 異常頻率統計
    anomaly_frequency: dict | None = None  # AnomalyCounter 統計
    # 2026-03-29 ogt: AI Provider 來源顯示
    ai_provider: str = ""  # ollama/gemini/claude/expert_system/mock
    # 2026-04-04 ogt: 底層模型名稱 (e.g. qwen2.5:7b-instruct, nemotron-70b)
    ai_model: str = ""
    # 2026-04-16 ogt + Claude Sonnet 4.6: 告警分類與修復鏈路顯示 (ADR-076)
    alert_category: str = ""   # host/k8s/database/service/external_site/secops 等
    playbook_name: str = ""    # 匹配到的 Playbook 名稱（空字串=規則匹配）
    automation_state: str = ""  # diagnosis_collected_manual_required / diagnosis_failed_manual_required
    automation_quality: dict | None = None  # truth-chain automation_quality 摘要
    remediation_summary: dict | None = None  # ADR-100 read-only dry-run history 摘要

    # ==========================================================================
    # Phase 22: Nemotron 協作欄位 (ADR-044)
    # 2026-03-31 Claude Code: OpenClaw + Nemotron 雙軌顯示
    # ==========================================================================
    nemotron_enabled: bool = False  # 是否啟用 Nemotron 協作
    nemotron_tool_model: str = ""    # Tool Calling 模型 (e.g. "llama3.1:8b")
    nemotron_tool_backend: str = ""  # Tool Calling 後端 (e.g. "Ollama 本機" / "NVIDIA 雲端")
    nemotron_tools: list[dict] | None = None  # Tool Calling 結果 [{"tool": str, "args": dict, "valid": bool}]
    nemotron_validation: str = ""  # "✅ 驗證通過" / "❌ 驗證失敗" / "⏳ 驗證中"
    nemotron_latency_ms: float = 0.0  # Nemotron 呼叫延遲 (ms)

    def _provider_display(self) -> tuple[str, str]:
        """Return display provider and optional model suffix.
        2026-05-04 ogt: 加入具體 Ollama 伺服器顯示（GCP-A/B/Local）
        """
        provider_names = {
            "ollama": "Ollama",
            # 2026-05-04 ogt: ADR-110 三層容災具體伺服器識別
            "ollama_gcp_a": "Ollama GCP-A (34.143.170.20)",
            "ollama_gcp_b": "Ollama GCP-B (34.21.145.224)",
            "ollama_local": "Ollama Local (111)",
            "gemini": "Gemini",
            "claude": "Claude",
            "nvidia": "Nemotron",
            "openclaw_nemo": "OpenClaw Nemo",
            "openclaw_nvidia_nim": "OpenClaw Nemo",
            "openclaw_qwen": "OpenClaw Nemo",
        }
        provider = (self.ai_provider or "").strip().lower()
        if provider:
            provider_display = provider_names.get(provider, self.ai_provider.upper())
        elif self.confidence > 0:
            provider_display = "AI Router"
        else:
            provider_display = "rule_fallback"
        model_suffix = f" ({html.escape(self.ai_model)})" if self.ai_model else ""
        return provider_display, model_suffix

    def _automation_mode(self) -> str:
        text = f"{self.root_cause} {self.suggested_action}".lower()
        if "超時" in text or "timeout" in text:
            return "llm_timeout_manual_gate"
        if self.confidence > 0 and self.suggested_action and self.suggested_action != "待分析":
            return "ai_proposal_ready"
        if self.suggested_action in {"待分析", "", "NO_ACTION"}:
            return "analysis_degraded"
        return "safe_gate_pending"

    def _automation_status_summary(self) -> str:
        """Telegram 首屏的人類可讀處置狀態。

        這行是值班判斷入口：先讓人知道這張卡是「AI 已有建議待審批」、
        「AI 無法修復需人工」或「純觀察」，細節才放到後面的鏈路區塊。
        """
        mode = self._automation_mode()
        action = (self.suggested_action or "").upper()
        text = f"{self.root_cause} {self.suggested_action}".lower()
        state = (self.automation_state or "").lower()
        quality = self.automation_quality or {}
        facts = quality.get("facts") if isinstance(quality.get("facts"), dict) else {}
        verdict = str(quality.get("verdict") or "")
        auto_repair_records = int(facts.get("auto_repair_execution_records") or 0)
        operation_records = int(facts.get("automation_operation_records") or 0)
        verification = str(facts.get("verification_result") or "missing")
        remediation_state = _remediation_evidence_state(self.remediation_summary)

        if verdict == "auto_repaired_verified":
            return "✅ 已驗證自動修復完成"
        if auto_repair_records > 0 or operation_records > 0:
            if verification == "missing":
                return "🔄 已自動執行，等待驗證證據"
            return f"🔄 已自動執行，驗證結果：{verification}"
        if remediation_state == "read_only":
            return "🔎 AI 已完成只讀補救試跑，等待人工審批"
        if remediation_state == "write_observed":
            return "⚠️ AI 補救試跑出現寫入旗標，需人工確認"
        if remediation_state == "blocked":
            return "🔴 AI 補救試跑受阻，需人工處理"
        if remediation_state == "fetch_failed":
            return "🟠 AI 補救試跑證據查詢失敗，需人工判斷"
        if verdict == "approval_required":
            return "🟡 需要審批後才會執行"
        if verdict.startswith("manual_required"):
            return "🟠 未自動修復，需人工判斷"

        if state == "diagnosis_collected_manual_required":
            return "🔎 AI 已完成只讀診斷，需人工判斷"
        if state == "diagnosis_failed_manual_required":
            return "🔴 AI 診斷工具失敗，需人工排查"
        if mode == "llm_timeout_manual_gate":
            return "🔴 AI 分析超時，需人工排查"
        if action in {"NO_ACTION", "待分析", ""} or "invalid_target" in text:
            return "🟠 AI 無可安全執行動作，需人工判斷"
        if self.confidence <= 0:
            return "🟡 規則建議待審批"
        if mode == "analysis_degraded":
            return "🟠 AI 降級分析，需人工判斷"
        if mode == "ai_proposal_ready":
            return "🟡 AI 已提出修復建議，等待人工批准"
        return "🟡 安全閘門待審批"

    def _format_automation_block(self) -> str:
        """Visible AI automation chain for every ACTION REQUIRED card.
        2026-05-04 ogt: 加入 Token 用量 + 具體 Ollama 伺服器顯示
        """
        provider_display, model_suffix = self._provider_display()
        mode = self._automation_mode()
        openclaw_state = provider_display if provider_display != "rule_fallback" else "degraded"
        nemotron_state = "tool_ready" if self.nemotron_enabled else "standby"
        hermes_state = self.playbook_name or "rule_catalog"
        elephant_state = "timeline_km_pending"
        flow = "webhook&gt;investigator&gt;router&gt;llm/rule&gt;safe&gt;approval"

        # 2026-05-04 ogt: Token 用量顯示（有資料才顯示）
        token_line = ""
        if self.ai_tokens > 0:
            cost_str = f" / ${self.ai_cost:.4f}" if self.ai_cost > 0 else ""
            token_line = f"├ Tokens：<code>{self.ai_tokens:,}{cost_str}</code>\n"

        return (
            f"🤖 <b>AI 自動化鏈路</b>\n"
            f"├ Router：<code>{html.escape(provider_display)}{model_suffix}</code>\n"
            f"├ Mode：<code>{html.escape(mode)}</code>\n"
            f"├ OpenClaw：<code>{html.escape(openclaw_state)}</code> | "
            f"NemoTron：<code>{html.escape(nemotron_state)}</code>\n"
            f"├ Hermes：<code>{html.escape(hermes_state)}</code> | "
            f"ElephantAlpha：<code>{html.escape(elephant_state)}</code>\n"
            f"{token_line}"
            f"└ Flow：<code>{flow}</code>\n"
        )

    def _format_flow_progress_block(self) -> str:
        """Operator-facing state of where the alert is in the automation loop."""
        quality = self.automation_quality or {}
        facts = quality.get("facts") if isinstance(quality.get("facts"), dict) else {}
        verdict = str(quality.get("verdict") or self._automation_mode())

        action_upper = (self.suggested_action or "").upper()
        is_noop = (
            "NO_ACTION" in action_upper
            or action_upper.startswith("OBSERVE")
            or action_upper.startswith("INVESTIGATE")
            or not action_upper.strip()
            or action_upper == "待分析"
        )
        auto_repair_records = int(facts.get("auto_repair_execution_records") or 0)
        operation_records = int(facts.get("automation_operation_records") or 0)
        verification = str(facts.get("verification_result") or "missing")
        gateway_total = int(facts.get("mcp_gateway_total") or 0)
        km_entries = int(facts.get("knowledge_entries") or 0)

        if self.confidence > 0:
            diagnose_state = "ai_ready"
        elif self.automation_state == "diagnosis_failed_manual_required":
            diagnose_state = "failed"
        else:
            diagnose_state = "rule_or_degraded"

        match_state = self.playbook_name or "rule_catalog"
        if auto_repair_records > 0:
            execute_state = f"auto_repair_recorded:{auto_repair_records}"
        elif operation_records > 0:
            execute_state = f"operation_recorded:{operation_records}"
        elif is_noop:
            execute_state = "no_action_or_observe"
        elif "approval" in verdict or self._automation_mode() == "ai_proposal_ready":
            execute_state = "awaiting_approval"
        else:
            execute_state = "not_started"

        if verification != "missing":
            verify_state = verification
        elif auto_repair_records > 0 or operation_records > 0:
            verify_state = "pending_or_missing"
        else:
            verify_state = "not_started"

        if verdict == "auto_repaired_verified":
            conclusion = "已驗證自動修復"
        elif auto_repair_records > 0 or operation_records > 0:
            conclusion = "已記錄執行，等待或缺少驗證"
        elif is_noop:
            conclusion = "未自動修復，需人工判斷"
        elif "approval" in verdict:
            conclusion = "等待審批後才會執行"
        elif "manual" in verdict:
            conclusion = "轉人工處理"
        else:
            conclusion = "尚未形成可宣稱自動修復的證據鏈"

        return (
            "🧭 <b>流程進度</b>\n"
            f"├ 收件：<code>received</code> | 診斷：<code>{html.escape(diagnose_state)}</code>\n"
            f"├ 匹配：<code>{html.escape(str(match_state)[:60])}</code> | "
            f"執行：<code>{html.escape(execute_state)}</code>\n"
            f"├ 驗證：<code>{html.escape(verify_state)}</code> | "
            f"KM：<code>{km_entries}</code> | MCP：<code>{gateway_total}</code>\n"
            f"└ 判定：<code>{html.escape(verdict)}</code> — {html.escape(conclusion)}\n"
        )

    def _format_remediation_evidence_block(self) -> str:
        return _format_remediation_evidence_block(self.remediation_summary)

    def format(self) -> str:
        """
        格式化為 SOUL.md 規範的訊息 (含 AI 仲裁 + SignOz)

        Returns:
            str: 格式化的 Telegram 訊息 (max 900 字元)
        """
        # 責任映射
        resp_map = {
            "FE": "👨‍💻 FE (前端)",
            "BE": "⚙️ BE (後端)",
            "INFRA": "🏗️ INFRA (基礎設施)",
            "DB": "🗄️ DB (資料庫)",
            "COLLAB": "🤝 COLLAB (協同處理)",
        }
        resp_display = resp_map.get(self.primary_responsibility, "❓ 未知")

        # 信心度顯示
        confidence_pct = int(self.confidence * 100)
        if confidence_pct >= 80:
            conf_emoji = "🟢"
        elif confidence_pct >= 70:
            conf_emoji = "🟡"
        else:
            conf_emoji = "🔴"

        # 自動生成事件編號 (2026-03-27 ogt: 修復 INC-INC- 重複前綴)
        if self.incident_id:
            incident_id = self.incident_id
        elif self.approval_id.startswith("INC-"):
            incident_id = self.approval_id
        else:
            incident_id = f"INC-{self.approval_id[:8].upper()}"

        # SignOz URL (優先使用動態 URL) - 必須 HTML 轉義防止解析錯誤
        service_name = self.resource_name.split("-")[0] if "-" in self.resource_name else self.resource_name
        raw_url = self.signoz_trace_url or f"http://192.168.0.188:3301/traces?service={service_name}"
        signoz_url = html.escape(raw_url, quote=True)

        # SignOz 指標區塊（含 Trace 連結）
        signoz_block = ""
        if self.signoz_metrics:
            signoz_block = (
                f"━━━━━━━━━━━━━━━━━━━\n"
                f"{self.signoz_metrics.format()}\n"
                f"🔍 <a href='{signoz_url}'>SignOz Trace</a>\n"
            )

        # HTML 轉義用戶輸入內容，防止 "Can't parse entities" 錯誤
        safe_resource = html.escape(self.resource_name)
        safe_root_cause = html.escape(self.root_cause)
        safe_action = html.escape(self.suggested_action)
        safe_downtime = html.escape(self.estimated_downtime)
        safe_automation_summary = html.escape(self._automation_status_summary())

        # 2026-03-29 ogt: AI Token/Cost 顯示
        ai_cost_display = ""
        if self.ai_tokens > 0 or self.ai_cost > 0:
            ai_cost_display = f"💰 Tokens: {self.ai_tokens:,} / ${self.ai_cost:.4f}\n"

        # 2026-03-29 ogt: ADR-037 異常頻率顯示
        frequency_block = ""
        if self.anomaly_frequency and self.anomaly_frequency.get("count_24h", 0) >= 1:
            freq = self.anomaly_frequency
            escalation_emoji = {
                None: "",
                "REPEAT": "⚠️",
                "ESCALATE": "🔴",
                "PERMANENT_FIX": "🚨",
            }.get(freq.get("escalation_level"), "")

            # 2026-04-07 Claude Code: Sprint 4 D1 — 處置統計行
            auto_r = freq.get("auto_repair_count", 0)
            human_a = freq.get("human_approved_count", 0)
            manual_r = freq.get("manual_resolved_count", 0)
            cold_s = freq.get("cold_start_trust_count", 0)
            total_res = freq.get("total_resolution_count", 0)

            # 處置分佈行 (只在有處置紀錄時顯示)
            disposition_line = ""
            if total_res > 0:
                auto_total = auto_r + cold_s
                auto_rate = int(auto_total / total_res * 100) if total_res > 0 else 0
                disposition_line = (
                    f"├ 🤖 自動: <code>{auto_total}</code>"
                    f" | 👤 審核: <code>{human_a}</code>"
                    f" | 🔧 手動: <code>{manual_r}</code>\n"
                    f"├ 自動化率: <b>{auto_rate}%</b>\n"
                )

            frequency_block = (
                f"━━━━━━━━━━━━━━━━━━━\n"
                f"📊 <b>頻率統計</b> {escalation_emoji}\n"
                f"├ 1h: <code>{freq.get('count_1h', 0)}</code> 次"
                f" | 24h: <code>{freq.get('count_24h', 0)}</code> 次\n"
                f"{disposition_line}"
                f"└ 累計修復: <code>{auto_r}</code> 次\n"
            )
            if freq.get("escalation_level"):
                frequency_block += f"🔺 升級: <b>{freq['escalation_level']}</b>\n"

        # ADR-075 TYPE-3 格式 (2026-04-12 ogt)
        # AI 來源標籤：confidence=0 不顯示 0%，顯示 📋 規則分析
        if self.confidence > 0 and self.ai_provider:
            provider_display, model_suffix = self._provider_display()
            ai_source = f"🤖 <b>{provider_display}{model_suffix}</b>  {conf_emoji} {confidence_pct}%"
        elif self.confidence > 0:
            ai_source = f"🤖 <b>AI 仲裁</b>  {conf_emoji} {confidence_pct}%"
        else:
            ai_source = "⚙️ <b>規則/降級分析</b>"

        # 風險等級中文
        risk_zh = {
            "CRITICAL": "嚴重",
            "HIGH": "高風險",
            "MEDIUM": "中風險",
            "LOW": "低風險",
        }.get(self.risk_level.upper(), self.risk_level)

        # ADR-076 分類標籤 (2026-04-16 ogt + Claude Sonnet 4.6)
        _category_map = {
            "host": "🖥️ 主機", "host_resource": "🖥️ 主機",
            "k8s": "☸️ K8s", "kubernetes": "☸️ K8s",
            "database": "🗄️ 資料庫",
            "service": "⚙️ 服務",
            "external_site": "🌐 外部網站",
            "secops": "🔐 安全",
            "auto_repair": "🔧 自動修復",
            "alertchain_health": "📡 告警鏈路",
            "flywheel_health": "🔄 飛輪健康",
            "docker": "🐳 Docker",
            "infrastructure": "🏗️ 基礎設施",
        }
        category_line = ""
        if self.alert_category:
            cat_display = html.escape(_category_map.get(self.alert_category, self.alert_category))
            category_line = f"🏷️ 分類：<b>{cat_display}</b>\n"

        # Playbook 顯示
        playbook_line = ""
        if self.playbook_name:
            playbook_line = f"📖 Playbook：<code>{html.escape(self.playbook_name)}</code>\n"
        remediation_evidence_block = self._format_remediation_evidence_block()
        flow_progress_block = self._format_flow_progress_block()
        automation_block = self._format_automation_block()

        # ADR-075 TYPE-3 格式組裝
        message = (
            f"{self.status_emoji} ACTION REQUIRED | <b>{html.escape(risk_zh)}</b>\n"
            f"──────────────────────\n"
            f"📋 <code>{html.escape(incident_id)}</code>\n"
            f"🎯 資源：<code>{safe_resource}</code>\n"
            f"{category_line}"
            f"🧭 處置狀態：<b>{safe_automation_summary}</b>\n"
            f"{remediation_evidence_block}\n"
            f"{flow_progress_block}\n"
            f"{automation_block}"
            f"\n"
            f"🧠 <b>AI 深度診斷</b>\n"
            f"├─ 分析：{safe_root_cause}\n"
            f"├─ 責任：{resp_display}\n"
            f"└─ {ai_source}\n"
            f"\n"
            f"⚡ <b>建議修復動作</b>\n"
            f"{playbook_line}"
            f"<code>{safe_action}</code>\n"
        )
        if ai_cost_display:
            message += f"{ai_cost_display}"
        if signoz_block:
            message += f"\n{signoz_block}"
        if frequency_block:
            message += f"\n{frequency_block}"
        message += f"\n⏱️ 停機: {safe_downtime}"

        return message[:4096]  # Telegram 硬限制

    def format_with_nemotron(self) -> str:
        """
        格式化含 Nemotron 結果的訊息 (Phase 22 ADR-044)

        格式:
        ═══════════════════════════
        🚨 CRITICAL | harbor-core
        ═══════════════════════════
        📋 INC-20260331-0001
        🎯 資源: harbor-core-7d4b8c9f5
        ━━━━━━━━━━━━━━━━━━━
        🤖 OpenClaw 仲裁
        ├ 📊 信心: 🟢 85%
        ├ 👥 責任: BE (後端)
        └ 💡 原因: JVM Heap 配置不當
        ━━━━━━━━━━━━━━━━━━━
        🔧 Nemotron 執行方案
          ✅ restart_deployment: awoooi-api
          ✅ scale_deployment: replicas=3
        └ 驗證: ✅ 驗證通過
        ━━━━━━━━━━━━━━━━━━━
        🔧 建議: 刪除 Pod
        ⏱️ 停機: ~30s

        Returns:
            str: 格式化的 Telegram 訊息 (max 1000 字元)
        """
        # 責任映射
        resp_map = {
            "FE": "👨‍💻 FE (前端)",
            "BE": "⚙️ BE (後端)",
            "INFRA": "🏗️ INFRA (基礎設施)",
            "DB": "🗄️ DB (資料庫)",
            "COLLAB": "🤝 COLLAB (協同處理)",
        }
        resp_display = resp_map.get(self.primary_responsibility, "❓ 未知")

        # 信心度顯示
        confidence_pct = int(self.confidence * 100)
        if confidence_pct >= 80:
            conf_emoji = "🟢"
        elif confidence_pct >= 70:
            conf_emoji = "🟡"
        else:
            conf_emoji = "🔴"

        # 自動生成事件編號
        if self.incident_id:
            incident_id = self.incident_id
        elif self.approval_id.startswith("INC-"):
            incident_id = self.approval_id
        else:
            incident_id = f"INC-{self.approval_id[:8].upper()}"

        # HTML 轉義
        safe_resource = html.escape(self.resource_name)
        safe_root_cause = html.escape(self.root_cause)
        safe_action = html.escape(self.suggested_action)
        safe_downtime = html.escape(self.estimated_downtime)

        # AI Provider 顯示
        # 2026-04-04 ogt: 加入 ai_model 顯示底層模型名稱
        # 2026-04-12 ogt: 規則匹配不顯示 🔴 0%，改用 ✅
        if self.confidence > 0 and self.ai_provider:
            provider_display, model_suffix = self._provider_display()
            conf_line = f"🤖 <b>{provider_display} 仲裁</b>{model_suffix}  {conf_emoji} {confidence_pct}%"
        elif self.confidence > 0:
            conf_line = f"🤖 <b>OpenClaw 仲裁</b>  {conf_emoji} {confidence_pct}%"
        else:
            conf_line = "⚙️ <b>規則匹配</b>  ✅"

        # Nemotron 區塊
        # 2026-04-09 Claude Sonnet 4.6: 顯示 AI 鏈路 — OpenClaw 用哪個模型，Tool Calling 用哪個模型
        nemotron_block = ""
        if self.nemotron_enabled and self.nemotron_tools:
            tools_lines = []
            for t in self.nemotron_tools[:3]:  # 最多顯示 3 個
                valid_emoji = "✅" if t.get("valid", False) else "❌"
                tool_name = html.escape(str(t.get("tool", "unknown"))[:20])
                args = t.get("args", {})
                if isinstance(args, dict) and args:
                    args_str = ", ".join(f"{k}={v}" for k, v in list(args.items())[:2])
                else:
                    args_str = str(args)[:30]
                safe_args = html.escape(args_str[:40])
                tools_lines.append(f"  {valid_emoji} {tool_name}: {safe_args}")

            tools_str = "\n".join(tools_lines)
            validation_display = html.escape(self.nemotron_validation or "⏳ 驗證中")

            # Tool Calling 模型/後端標籤
            if self.nemotron_tool_model and self.nemotron_tool_backend:
                tool_model_label = f"<code>{html.escape(self.nemotron_tool_model)}</code> ({html.escape(self.nemotron_tool_backend)})"
            elif self.nemotron_tool_model:
                tool_model_label = f"<code>{html.escape(self.nemotron_tool_model)}</code>"
            else:
                tool_model_label = "Nemotron"

            latency_line = f"└ 延遲: {self.nemotron_latency_ms:.0f}ms\n" if self.nemotron_latency_ms > 0 else ""

            nemotron_block = (
                f"━━━━━━━━━━━━━━━━━━━\n"
                f"🔧 <b>Tool Calling</b>: {tool_model_label}\n"
                f"{tools_str}\n"
                f"└ 驗證: {validation_display}\n"
                f"{latency_line}"
            )

        # 2026-04-05 Claude Code: 重設計訊息格式，提升易讀性
        # 2026-04-16 ogt + Claude Sonnet 4.6: 補 alert_category + playbook_name 顯示
        _category_map = {
            "host": "🖥️ 主機", "host_resource": "🖥️ 主機",
            "k8s": "☸️ K8s", "kubernetes": "☸️ K8s",
            "database": "🗄️ 資料庫", "service": "⚙️ 服務",
            "external_site": "🌐 外部網站", "secops": "🔐 安全",
            "auto_repair": "🔧 自動修復", "docker": "🐳 Docker",
        }
        category_line = ""
        if self.alert_category:
            cat_display = html.escape(_category_map.get(self.alert_category, self.alert_category))
            category_line = f"🏷️ {cat_display}\n"
        playbook_line = ""
        if self.playbook_name:
            playbook_line = f"📖 <code>{html.escape(self.playbook_name)}</code>\n"
        remediation_evidence_block = self._format_remediation_evidence_block()
        flow_progress_block = self._format_flow_progress_block()

        # 組裝訊息
        message = (
            f"{self.status_emoji} <b>{html.escape(self.risk_level)}</b>  <code>{html.escape(incident_id)}</code>\n"
            f"<b>{safe_resource}</b>\n"
            f"{category_line}"
            f"\n"
            f"{remediation_evidence_block}"
            f"{flow_progress_block}\n"
            f"{self._format_automation_block()}\n"
            f"{conf_line}\n"
            f"👥 {resp_display}\n"
            f"💡 {safe_root_cause}\n"
        )
        if nemotron_block:
            message += f"\n{nemotron_block}"
        message += (
            f"\n🔧 <b>建議:</b> {playbook_line}{safe_action}\n"
            f"⏱️ 停機: {safe_downtime}"
        )

        return message[:4096]  # Telegram 硬限制


# =============================================================================
# 新訊息模板 (2026-03-29 ogt: ADR-038 Telegram 訊息規範)
# =============================================================================

@dataclass
class SentryErrorMessage:
    """
    Sentry 錯誤訊息 (SENTRY_ERROR)

    2026-03-29 ogt: 新增，用於 Sentry 錯誤通知
    按鈕: [🔍 查看詳情] [🔕 靜默 1h]
    """
    error_id: str               # Sentry Issue ID
    error_type: str             # TypeError, ValueError, etc.
    error_message: str          # 錯誤訊息 (max 100)
    service_name: str           # awoooi-api, awoooi-web, etc.
    file_location: str          # src/api/v1/incidents.py:123
    occurrence_count: int = 1   # 發生次數
    affected_users: int = 0     # 影響用戶數
    first_seen: str = ""        # 首次發生時間
    stack_trace: list[str] | None = None  # Stack trace (前 3 行)
    sentry_url: str = ""        # Sentry 連結

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        safe_error = html.escape(self.error_message[:80])
        safe_type = html.escape(self.error_type[:30])
        safe_service = html.escape(self.service_name[:25])
        safe_file = html.escape(self.file_location[:50])

        # Stack trace 區塊
        trace_block = ""
        if self.stack_trace:
            trace_lines = "\n".join(f"  → {html.escape(line[:50])}" for line in self.stack_trace[:3])
            trace_block = f"🔗 Stack Trace (前 3 行):\n{trace_lines}\n"

        # Sentry URL
        sentry_link = ""
        if self.sentry_url:
            safe_url = html.escape(self.sentry_url, quote=True)
            sentry_link = f"\n🔍 <a href='{safe_url}'>查看 Sentry</a>"

        message = (
            f"═══════════════════════════\n"
            f"🐛 <b>SENTRY ERROR</b> | {safe_service}\n"
            f"═══════════════════════════\n"
            f"📋 <code>{html.escape(self.error_id)}</code>\n"
            f"🎯 錯誤: <code>{safe_type}</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"💬 {safe_error}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📊 <b>統計</b>\n"
            f"├ 發生次數: <code>{self.occurrence_count}</code>\n"
            f"├ 影響用戶: <code>{self.affected_users}</code>\n"
            f"└ 首次發生: {html.escape(self.first_seen) if self.first_seen else 'N/A'}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📍 位置: <code>{safe_file}</code>\n"
            f"{trace_block}"
            f"{sentry_link}"
        )

        return message[:900]


@dataclass
class ResourceWarnMessage:
    """
    資源告警訊息 (RESOURCE_WARN)

    2026-03-29 ogt: 新增，用於資源耗盡警告
    按鈕: [⚡ 自動擴展] [🔕 靜默 1h]
    """
    resource_id: str            # RES-YYYYMMDD-XXXX
    pod_name: str               # Pod 名稱
    namespace: str = "default"  # K8s namespace
    cpu_percent: float = 0.0    # CPU 使用率
    cpu_limit: str = ""         # CPU limit (e.g., 500m)
    memory_percent: float = 0.0 # Memory 使用率
    memory_limit: str = ""      # Memory limit (e.g., 512Mi)
    disk_percent: float = 0.0   # Disk 使用率
    trend_info: str = ""        # 趨勢資訊
    suggestion: str = ""        # 建議操作

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        safe_pod = html.escape(self.pod_name[:35])
        safe_ns = html.escape(self.namespace[:20])

        # 資源狀態 emoji
        def get_status_emoji(percent: float) -> str:
            if percent >= 90:
                return "🔴"
            elif percent >= 70:
                return "🟡"
            return "🟢"

        cpu_emoji = get_status_emoji(self.cpu_percent)
        mem_emoji = get_status_emoji(self.memory_percent)
        disk_emoji = get_status_emoji(self.disk_percent)

        # 趨勢和建議
        trend_block = ""
        if self.trend_info:
            trend_block = f"📈 趨勢: {html.escape(self.trend_info[:50])}\n"

        suggestion_block = ""
        if self.suggestion:
            suggestion_block = f"💡 建議: {html.escape(self.suggestion)}\n"

        message = (
            f"═══════════════════════════\n"
            f"⚠️ <b>資源告警</b> | {safe_ns}\n"
            f"═══════════════════════════\n"
            f"📋 <code>{html.escape(self.resource_id)}</code>\n"
            f"🎯 Pod: <code>{safe_pod}</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📊 <b>資源使用率</b>\n"
            f"├ CPU: {cpu_emoji} <code>{self.cpu_percent:.1f}%</code>"
            f"{f' (limit: {self.cpu_limit})' if self.cpu_limit else ''}\n"
            f"├ Memory: {mem_emoji} <code>{self.memory_percent:.1f}%</code>"
            f"{f' (limit: {self.memory_limit})' if self.memory_limit else ''}\n"
            f"└ Disk: {disk_emoji} <code>{self.disk_percent:.1f}%</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{trend_block}"
            f"{suggestion_block}"
        )

        return message[:900]


@dataclass
class RepairReportMessage:
    """
    自動修復報告訊息 (REPAIR_REPORT)

    2026-03-29 ogt: 新增，用於每日自動修復彙總
    按鈕: 無
    """
    report_date: str            # 報告日期 (YYYY-MM-DD)
    total_repairs: int = 0      # 總修復次數
    success_count: int = 0      # 成功次數
    failure_count: int = 0      # 失敗次數
    saved_minutes: int = 0      # 節省人工時間 (分鐘)
    top_issues: list[tuple[str, int]] | None = None  # Top 問題 [(name, count)]
    ai_cost_gemini: float = 0.0  # Gemini 成本
    ai_cost_nvidia: float = 0.0  # NVIDIA 成本 (免費)
    ai_tokens_total: int = 0     # 總 Token 數

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        # 成功率
        success_rate = (self.success_count / self.total_repairs * 100) if self.total_repairs > 0 else 0

        # Top 問題區塊
        issues_block = ""
        if self.top_issues:
            issues_lines = "\n".join(
                f"  {i+1}. {html.escape(name[:30])} ({count} 次)"
                for i, (name, count) in enumerate(self.top_issues[:3])
            )
            issues_block = f"━━━━━━━━━━━━━━━━━━━\n🔝 <b>Top 3 問題</b>:\n{issues_lines}\n"

        # AI 成本
        total_cost = self.ai_cost_gemini + self.ai_cost_nvidia

        message = (
            f"═══════════════════════════\n"
            f"🔧 <b>自動修復報告</b> | 每日彙總\n"
            f"═══════════════════════════\n"
            f"📅 {html.escape(self.report_date)}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📊 <b>統計</b>\n"
            f"├ 總修復次數: <code>{self.total_repairs}</code>\n"
            f"├ 成功: ✅ <code>{self.success_count}</code> ({success_rate:.0f}%)\n"
            f"├ 失敗: ❌ <code>{self.failure_count}</code>\n"
            f"└ 節省人工: ~<code>{self.saved_minutes}</code> 分鐘\n"
            f"{issues_block}"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"💰 <b>AI 成本</b>\n"
            f"├ Gemini: ${self.ai_cost_gemini:.4f} ({self.ai_tokens_total:,} tokens)\n"
            f"├ NVIDIA: ${self.ai_cost_nvidia:.4f} (免費)\n"
            f"└ 總計: ${total_cost:.4f}"
        )

        return message[:900]


@dataclass
class DailySummaryMessage:
    """
    每日摘要訊息 (DAILY_SUMMARY)

    2026-03-29 ogt: 新增，用於每日系統狀態摘要
    按鈕: 無
    """
    summary_date: str           # 摘要日期 (YYYY-MM-DD)
    # 告警統計
    alert_total: int = 0
    alert_critical: int = 0
    alert_medium: int = 0
    alert_low: int = 0
    # 處理統計
    auto_repair_count: int = 0
    manual_approval_count: int = 0
    ignored_count: int = 0
    avg_response_minutes: float = 0.0
    # 可用性
    api_availability: float = 99.9
    web_availability: float = 99.9
    worker_availability: float = 99.9
    # 成本
    ai_cost: float = 0.0
    cloud_cost: float = 0.0
    budget_remaining: float = 0.0

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        # 處理百分比
        total_handled = self.auto_repair_count + self.manual_approval_count + self.ignored_count
        auto_pct = (self.auto_repair_count / total_handled * 100) if total_handled > 0 else 0
        manual_pct = (self.manual_approval_count / total_handled * 100) if total_handled > 0 else 0
        ignored_pct = (self.ignored_count / total_handled * 100) if total_handled > 0 else 0

        message = (
            f"═══════════════════════════\n"
            f"📊 <b>每日摘要</b> | AWOOOI\n"
            f"═══════════════════════════\n"
            f"📅 {html.escape(self.summary_date)}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"🚨 <b>告警統計</b>\n"
            f"├ 總數: <code>{self.alert_total}</code>\n"
            f"├ Critical: <code>{self.alert_critical}</code>\n"
            f"├ Medium: <code>{self.alert_medium}</code>\n"
            f"└ Low: <code>{self.alert_low}</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"✅ <b>處理統計</b>\n"
            f"├ 自動修復: <code>{self.auto_repair_count}</code> ({auto_pct:.0f}%)\n"
            f"├ 人工簽核: <code>{self.manual_approval_count}</code> ({manual_pct:.0f}%)\n"
            f"├ 忽略/靜默: <code>{self.ignored_count}</code> ({ignored_pct:.0f}%)\n"
            f"└ 平均回應: <code>{self.avg_response_minutes:.1f}</code> 分鐘\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📈 <b>可用性</b>\n"
            f"├ API: <code>{self.api_availability:.2f}%</code>\n"
            f"├ Web: <code>{self.web_availability:.2f}%</code>\n"
            f"└ Worker: <code>{self.worker_availability:.2f}%</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"💰 <b>成本</b>\n"
            f"├ AI: ${self.ai_cost:.2f}\n"
            f"├ 雲端: ${self.cloud_cost:.2f}\n"
            f"└ 預算剩餘: ${self.budget_remaining:.2f}"
        )

        return message[:900]


@dataclass
class CICDProgressMessage:
    """
    CI/CD 進度訊息 (CICD_PROGRESS)

    2026-03-30 ogt: 新增，用於 CI/CD 流程進度通知
    特性: 簡潔、不走 AI 仲裁、無按鈕
    """
    job_name: str               # Job 名稱 (e.g., Build, Test, Deploy)
    status: str                 # running, success, failed
    stage: str = ""             # CI/CD 階段 (e.g., build, test, deploy)
    commit_sha: str = ""        # Git commit SHA
    triggered_by: str = ""      # 觸發者
    duration_seconds: int = 0   # 執行時間
    message: str = ""           # 額外訊息
    workflow_url: str = ""      # Workflow 連結

    def format(self) -> str:
        """格式化為 Telegram HTML (簡潔版)"""
        # 狀態 emoji
        status_emoji = {
            "running": "🔄",
            "success": "✅",
            "failed": "❌",
            "pending": "⏳",
        }.get(self.status.lower(), "📦")

        safe_job = html.escape(self.job_name[:40])
        safe_stage = html.escape(self.stage[:20]) if self.stage else ""

        # 時間格式化
        duration_str = ""
        if self.duration_seconds > 0:
            minutes = self.duration_seconds // 60
            seconds = self.duration_seconds % 60
            duration_str = f" ({minutes}m {seconds}s)" if minutes > 0 else f" ({seconds}s)"

        # Commit 資訊
        commit_info = ""
        if self.commit_sha:
            commit_info = f"\n📋 <code>{html.escape(self.commit_sha[:8])}</code>"

        # Workflow 連結
        workflow_link = ""
        if self.workflow_url:
            safe_url = html.escape(self.workflow_url, quote=True)
            workflow_link = f"\n🔗 <a href='{safe_url}'>Workflow</a>"

        detail = ""
        if self.message:
            safe_message = html.escape(self.message[:240])
            detail = f"\n📝 {safe_message}"

        # 簡潔訊息
        stage_label = f" | {safe_stage}" if safe_stage else ""
        message = (
            f"{status_emoji} <b>[AWOOOI CI/CD]</b>{stage_label}\n"
            f"📦 {safe_job}{duration_str}"
            f"{commit_info}"
            f"{detail}"
            f"{workflow_link}"
        )

        return message[:900]


@dataclass
class DeploySuccessMessage:
    """
    部署成功訊息 (DEPLOY_SUCCESS)

    2026-03-29 ogt: 新增，用於 CD 部署成功通知
    按鈕: 無
    """
    commit_sha: str             # Git commit SHA (short)
    triggered_by: str           # 觸發者
    environment: str = "Production"  # 環境
    # 版本資訊
    api_version: str = ""
    web_version: str = ""
    worker_version: str = ""
    # 部署時間
    duration_seconds: int = 0
    # 測試結果
    e2e_passed: int = 0
    e2e_total: int = 0
    health_check_passed: bool = True
    # 連結
    workflow_url: str = ""

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        safe_commit = html.escape(self.commit_sha[:8])
        safe_user = html.escape(self.triggered_by[:20])
        safe_env = html.escape(self.environment[:15])

        # 部署時間格式化
        minutes = self.duration_seconds // 60
        seconds = self.duration_seconds % 60
        duration_str = f"{minutes}m {seconds}s" if minutes > 0 else f"{seconds}s"

        # 測試結果
        e2e_status = "✅" if self.e2e_passed == self.e2e_total else "⚠️"
        health_status = "✅ 全部通過" if self.health_check_passed else "❌ 部分失敗"

        # Workflow 連結
        workflow_link = ""
        if self.workflow_url:
            safe_url = html.escape(self.workflow_url, quote=True)
            workflow_link = f"\n🔗 <a href='{safe_url}'>查看 Workflow</a>"

        message = (
            f"✅ <b>部署成功</b> | {safe_env}\n\n"
            f"📋 Commit: <code>{safe_commit}</code>\n"
            f"👤 觸發者: @{safe_user}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📊 <b>部署詳情</b>\n"
            f"├ API: {html.escape(self.api_version) if self.api_version else 'N/A'} ✅\n"
            f"├ Web: {html.escape(self.web_version) if self.web_version else 'N/A'} ✅\n"
            f"├ Worker: {html.escape(self.worker_version) if self.worker_version else 'N/A'} ✅\n"
            f"└ 耗時: {duration_str}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"🧪 E2E 測試: {e2e_status} {self.e2e_passed}/{self.e2e_total} PASSED\n"
            f"📊 健康檢查: {health_status}"
            f"{workflow_link}"
        )

        return message[:900]


@dataclass
class RateLimitMessage:
    """
    API 限額警告訊息 (RATE_LIMIT)

    2026-03-29 ogt: 新增，用於 AI API 限額警告
    按鈕: 無
    """
    provider: str               # gemini, openai, etc.
    # 用量統計
    daily_usage: int = 0
    daily_limit: int = 0
    token_usage: int = 0
    token_limit: int = 0
    cost_usd: float = 0.0
    # 建議
    suggestions: list[str] | None = None
    # 重置時間
    reset_time: str = ""

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        safe_provider = html.escape(self.provider.upper()[:15])

        # 使用率百分比
        usage_pct = (self.daily_usage / self.daily_limit * 100) if self.daily_limit > 0 else 0
        token_pct = (self.token_usage / self.token_limit * 100) if self.token_limit > 0 else 0

        # 建議區塊
        suggestion_block = ""
        if self.suggestions:
            suggestion_lines = "\n".join(f"  - {html.escape(s[:50])}" for s in self.suggestions[:3])
            suggestion_block = f"━━━━━━━━━━━━━━━━━━━\n💡 <b>建議</b>:\n{suggestion_lines}\n"

        # 重置時間
        reset_block = ""
        if self.reset_time:
            reset_block = f"\n🔄 將於 {html.escape(self.reset_time)} 重置"

        message = (
            f"⚠️ <b>API 限額警告</b>\n\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📊 <b>{safe_provider} API</b>\n"
            f"├ 今日用量: <code>{self.daily_usage}/{self.daily_limit}</code> ({usage_pct:.0f}%)\n"
            f"├ Token: <code>{self.token_usage:,}/{self.token_limit:,}</code> ({token_pct:.0f}%)\n"
            f"└ 成本: ${self.cost_usd:.4f}\n"
            f"{suggestion_block}"
            f"{reset_block}"
        )

        return message[:900]


@dataclass
class K3sStatusMessage:
    """
    K3s 叢集狀態報告訊息 (K3S_STATUS)

    2026-03-31 Claude Code: Phase 21.2 定期報告
    用於每日 K3s 健康狀態推送
    按鈕: 無
    """
    report_date: str            # 報告日期 (YYYY-MM-DD HH:MM)
    # 節點狀態
    node_total: int = 0
    node_ready: int = 0
    # Pod 狀態
    pod_total: int = 0
    pod_running: int = 0
    pod_pending: int = 0
    pod_failed: int = 0
    # HPA 狀態
    hpa_api_replicas: str = "2/6"
    hpa_web_replicas: str = "2/6"
    hpa_worker_replicas: str = "1/3"
    # 備份狀態
    etcd_backup_last: str = ""
    velero_backup_last: str = ""
    # 穩定指標
    alert_count_48h: int = 0
    pod_restart_48h: int = 0
    # 版本資訊
    k3s_version: str = ""

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        # 健康狀態 emoji
        node_health = "✅" if self.node_ready == self.node_total else "⚠️"
        pod_health = "✅" if self.pod_failed == 0 and self.pod_pending == 0 else "⚠️"
        stability = "✅" if self.alert_count_48h == 0 and self.pod_restart_48h == 0 else "⚠️"

        # 備份狀態
        etcd_status = html.escape(self.etcd_backup_last[:20]) if self.etcd_backup_last else "N/A"
        velero_status = html.escape(self.velero_backup_last[:20]) if self.velero_backup_last else "N/A"

        message = (
            f"═══════════════════════════\n"
            f"🎛️ <b>K3s 叢集狀態</b> | Daily\n"
            f"═══════════════════════════\n"
            f"📅 {html.escape(self.report_date)}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{node_health} <b>節點</b>: {self.node_ready}/{self.node_total} Ready\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{pod_health} <b>Pod 狀態</b>\n"
            f"├ Running: <code>{self.pod_running}</code>\n"
            f"├ Pending: <code>{self.pod_pending}</code>\n"
            f"└ Failed: <code>{self.pod_failed}</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📊 <b>HPA 副本數</b>\n"
            f"├ API: <code>{html.escape(self.hpa_api_replicas)}</code>\n"
            f"├ Web: <code>{html.escape(self.hpa_web_replicas)}</code>\n"
            f"└ Worker: <code>{html.escape(self.hpa_worker_replicas)}</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"💾 <b>備份</b>\n"
            f"├ etcd: {etcd_status}\n"
            f"└ Velero: {velero_status}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{stability} <b>48h 穩定度</b>\n"
            f"├ 告警: <code>{self.alert_count_48h}</code>\n"
            f"└ Pod 重啟: <code>{self.pod_restart_48h}</code>"
        )

        return message[:900]


@dataclass
class WeeklyReportMessage:
    """
    週報訊息 (WEEKLY_REPORT)

    2026-03-31 Claude Code: Phase 21.3 定期報告
    每週五 18:00 台北發送
    按鈕: 無
    """
    week_range: str             # 週次 (e.g., "2026-W14")
    report_date: str            # 報告日期時間
    # 告警統計
    alert_total: int = 0
    alert_critical: int = 0
    alert_resolved: int = 0
    resolved_rate: float = 0.0
    # AI 效能
    ai_proposal_count: int = 0
    ai_executed_count: int = 0
    ai_success_rate: float = 0.0
    avg_response_minutes: float = 0.0
    # K3s 健康
    k3s_uptime_pct: float = 99.9
    pod_restart_total: int = 0
    hpa_scale_events: int = 0
    # Git 活動
    commits_count: int = 0
    deploy_count: int = 0
    # 成本
    ai_cost_week: float = 0.0
    ai_tokens_week: int = 0
    # 2026-04-07 Claude Code: Sprint 4 F1 — 處置分佈
    disposition_auto: int = 0
    disposition_human: int = 0
    disposition_manual: int = 0
    disposition_cold_start: int = 0
    disposition_total: int = 0

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        # 健康狀態 emoji
        alert_health = "✅" if self.resolved_rate >= 80 else "⚠️"
        ai_health = "✅" if self.ai_success_rate >= 70 else "⚠️"
        k3s_health = "✅" if self.k3s_uptime_pct >= 99 else "⚠️"

        message = (
            f"═══════════════════════════\n"
            f"📊 <b>AWOOOI 週報</b>\n"
            f"═══════════════════════════\n"
            f"📅 {html.escape(self.week_range)} | {html.escape(self.report_date)}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{alert_health} <b>告警統計</b>\n"
            f"├ 總數: <code>{self.alert_total}</code>\n"
            f"├ Critical: <code>{self.alert_critical}</code>\n"
            f"├ 已解決: <code>{self.alert_resolved}</code>\n"
            f"└ 解決率: <code>{self.resolved_rate:.1f}%</code>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{ai_health} <b>AI 效能</b>\n"
            f"├ 提案數: <code>{self.ai_proposal_count}</code>\n"
            f"├ 執行數: <code>{self.ai_executed_count}</code>\n"
            f"├ 成功率: <code>{self.ai_success_rate:.1f}%</code>\n"
            f"└ 平均回應: <code>{self.avg_response_minutes:.1f}</code> 分鐘\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"{k3s_health} <b>K3s 健康</b>\n"
            f"├ Uptime: <code>{self.k3s_uptime_pct:.2f}%</code>\n"
            f"├ Pod 重啟: <code>{self.pod_restart_total}</code>\n"
            f"└ HPA 擴縮: <code>{self.hpa_scale_events}</code> 次\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📦 <b>開發活動</b>\n"
            f"├ Commits: <code>{self.commits_count}</code>\n"
            f"└ 部署: <code>{self.deploy_count}</code> 次\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"💰 <b>AI 成本</b>\n"
            f"├ 費用: $<code>{self.ai_cost_week:.2f}</code>\n"
            f"└ Tokens: <code>{self.ai_tokens_week:,}</code>\n"
        )

        # Sprint 4 F1: 處置分佈（有資料才加）
        if self.disposition_total > 0:
            auto_total = self.disposition_auto + self.disposition_cold_start
            auto_rate = int(auto_total / self.disposition_total * 100) if self.disposition_total > 0 else 0
            message += (
                f"━━━━━━━━━━━━━━━━━━━\n"
                f"📋 <b>處置分佈</b>\n"
                f"├ 🤖 自動修復: <code>{self.disposition_auto}</code>\n"
                f"├ ❄️ 冷啟動信任: <code>{self.disposition_cold_start}</code>\n"
                f"├ 👤 人工審核: <code>{self.disposition_human}</code>\n"
                f"├ 🔧 手動處理: <code>{self.disposition_manual}</code>\n"
                f"└ 自動化率: <b>{auto_rate}%</b>"
            )

        return message[:1200]


@dataclass
class InfraAlertMessage:
    """
    基礎設施異常告警訊息 (INFRA_ALERT)

    2026-04-03 ogt: 新增 — 補足 Nemotron/NIM 等基礎設施異常的標準告警格式
    用途: 非 incident 型的系統元件異常通知 (AI provider, DB, 外部 API 等)
    按鈕: 無 (資訊型告警)
    """
    component: str          # 元件名稱 (e.g., "Nemotron NIM")
    status: str             # 狀態描述 (e.g., "⚠️ 超時 (>25s)")
    impact: str             # 影響說明
    auto_fixed: bool = False        # 是否已自動修復
    fix_action: str = ""            # 執行的修復動作 (auto_fixed=True 時顯示)
    note: str = ""                  # 附加說明 (info_only 情境用，不顯示修復區塊)

    def format(self) -> str:
        """格式化為 Telegram HTML"""
        # 有 note 表示「資訊性提示」，不顯示修復區塊
        if self.note:
            footer = f"━━━━━━━━━━━━━━━━━━━\n💡 {html.escape(self.note)}\n"
        elif self.auto_fixed:
            footer = f"━━━━━━━━━━━━━━━━━━━\n✅ <b>已自動修復</b>\n└ {html.escape(self.fix_action)}\n"
        else:
            footer = f"━━━━━━━━━━━━━━━━━━━\n⚠️ <b>需要關注</b>\n└ {html.escape(self.fix_action or '請確認元件狀態')}\n"

        return (
            f"🚨 <b>基礎設施異常</b>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"⚙️ <b>{html.escape(self.component)}</b>: {html.escape(self.status)}\n"
            f"📛 影響: {html.escape(self.impact)}\n"
            f"{footer}"
        )[:900]


# =============================================================================
# Risk Level Emoji Mapping
# =============================================================================

RISK_EMOJI_MAP = {
    "critical": "🚨",
    "high": "🔴",
    "medium": "⚠️",
    "low": "ℹ️",
}


# =============================================================================
# ADR-071-B: 告警通知四類型分類器 (2026-04-11 Claude Sonnet 4.6)
# =============================================================================

from enum import Enum

class NotificationType(str, Enum):
    TYPE_1      = "TYPE-1"   # 純資訊，無按鈕
    TYPE_2      = "TYPE-2"   # 已自動修復
    TYPE_3      = "TYPE-3"   # 需人工審核（預設）
    TYPE_4      = "TYPE-4"   # AI 無法判斷
    TYPE_4_DRIFT = "TYPE-4D" # Config Drift 專屬
    TYPE_8M     = "TYPE-8M"  # ADR-075: 飛輪/告警鏈路自身健康（Meta-System）


def classify_notification(
    incident,
    confidence: float,
    auto_executed: bool,
    mcp_all_failed: bool = False,
    decision_state: str = "",
) -> NotificationType:
    """
    告警通知分類器 — 決定要送哪種類型的 Telegram 卡片

    分類優先順序:
      TYPE-4D > TYPE-1 > TYPE-2 > TYPE-4 > TYPE-3(預設)

    Args:
        incident: Incident Pydantic 模型（需要 signals[].labels + title）
        confidence: AI 決策信心值 (0.0~1.0)
        auto_executed: 是否已自動修復執行完成
        mcp_all_failed: 所有 MCP provider 是否全失敗
        decision_state: DecisionState 字串 ("COMPLETED" / "ERROR" / ...)
    """
    # ADR-073 Phase 3-1: 優先採用 classify_alert_early() 已設定的 notification_type
    # 這樣 TYPE-1/TYPE-4D/TYPE-8M 告警不需進入 LLM 分析路徑 (2026-04-12 ogt)
    _early_type = getattr(incident, "notification_type", None)
    if _early_type == "TYPE-4D":
        return NotificationType.TYPE_4_DRIFT
    if _early_type == "TYPE-8M":
        return NotificationType.TYPE_8M
    if _early_type == "TYPE-1":
        return NotificationType.TYPE_1

    labels = incident.signals[0].labels if incident.signals else {}
    alertname = labels.get("alertname", "")
    label_severity = labels.get("severity", "")

    # TYPE-4D：Config Drift 專屬（最優先）
    if alertname in ("ConfigDrift", "ConfigurationDrift", "KubeConfigDrift"):
        return NotificationType.TYPE_4_DRIFT

    # TYPE-1：純資訊（severity=info + 成功類告警）
    # 2026-04-12 ogt: Incident 無 title 欄位，改用 alertname + signal annotations
    _tg_sig_summary = (
        incident.signals[0].annotations.get("summary", "") or
        incident.signals[0].annotations.get("description", "") or
        incident.signals[0].alert_name
    ) if incident.signals else ""
    title_lower = (alertname + " " + _tg_sig_summary).lower()
    if label_severity == "info" and any(kw in title_lower for kw in ["success", "完成", "completed"]):
        return NotificationType.TYPE_1
    if alertname.startswith(("Backup.", "VeleroBackup")) and label_severity == "info":
        return NotificationType.TYPE_1
    if alertname in ("AlertChainHealthy", "AutoRepairHighSuccessRate"):
        return NotificationType.TYPE_1

    # TYPE-2：已自動修復完成
    if auto_executed and decision_state == "COMPLETED":
        return NotificationType.TYPE_2

    # TYPE-4：AI 無法判斷（信心不足 / MCP 全失敗 / 決策錯誤）
    if confidence < 0.5 or mcp_all_failed or decision_state == "ERROR":
        return NotificationType.TYPE_4

    # TYPE-3：預設（需人工審核）
    return NotificationType.TYPE_3


# =============================================================================
# Telegram Gateway
# =============================================================================

class TelegramGatewayError(Exception):
    """Telegram Gateway 錯誤"""
    pass


class TelegramGateway:
    """
    Telegram Gateway - 行動戰情室 + SignOz 整合

    職責:
    1. 推送待簽核卡片到 Telegram (含 SignOz 指標)
    2. 接收並驗證簽核/調優回調
    3. Shadow Mode 調優執行 (僅日誌)
    4. 遵守 SOUL.md 訊息壓縮原則
    """

    TELEGRAM_API_BASE = "https://api.telegram.org"

    def __init__(self):
        self._http_client: httpx.AsyncClient | None = None
        self._security = get_security_interceptor()
        self._initialized = False
        # Long Polling 狀態 (Phase 5 內網修復)
        self._polling_active = False
        self._polling_task: asyncio.Task | None = None
        self._last_update_id = 0
        # 2026-04-01 Claude Code: 分散式 Leader Election (防 2-Pod 409 互搶)
        self._pod_id = os.environ.get("POD_NAME", os.urandom(8).hex())
        self._leader_task: asyncio.Task | None = None
        # Phase 6.5: 心跳監控 (防止沉默盲點)
        self._last_message_time: datetime | None = None
        self._heartbeat_task: asyncio.Task | None = None
        self._heartbeat_active = False

    async def initialize(self) -> bool:
        """初始化 Gateway"""
        if not settings.OPENCLAW_TG_BOT_TOKEN:
            logger.warning("telegram_gateway_disabled", reason="Bot token not configured")
            return False

        if not settings.OPENCLAW_TG_CHAT_ID and not settings.SRE_GROUP_CHAT_ID:
            logger.warning("telegram_gateway_disabled", reason="No Telegram chat ID configured")
            return False

        # 2026-04-03 ogt: timeout 改用 httpx.Timeout 分開設定
        # connect=10s, read=50s (getUpdates long polling timeout 40s + buffer)
        # 舊的 timeout=30.0 會讓 getUpdates(timeout=40s) 每次都被 client 先打斷
        self._http_client = httpx.AsyncClient(
            timeout=httpx.Timeout(connect=10.0, read=50.0, write=10.0, pool=10.0),
            headers={"Content-Type": "application/json"},
        )

        await self._security.initialize()
        self._initialized = True

        logger.info("telegram_gateway_initialized")
        return True

    @property
    def bot_token(self) -> str:
        """取得 Bot Token"""
        return settings.OPENCLAW_TG_BOT_TOKEN

    @property
    def chat_id(self) -> str:
        """取得 Chat ID"""
        return settings.OPENCLAW_TG_CHAT_ID

    @property
    def alert_chat_id(self) -> str:
        """告警訊息收件人：SRE 群組優先，缺設定時才回退個人頻道。"""
        return settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID

    @property
    def api_url(self) -> str:
        """取得 Telegram API URL"""
        return f"{self.TELEGRAM_API_BASE}/bot{self.bot_token}"

    async def _send_request(
        self,
        method: str,
        payload: dict,
    ) -> dict:
        """
        發送 Telegram API 請求

        Phase C P1: 新增 OTEL 追蹤
        @author Claude Code
        @date 2026-03-30 (台北時間)

        Args:
            method: API 方法 (sendMessage, editMessageText, etc.)
            payload: 請求 Payload

        Returns:
            dict: API 回應
        """
        if not self._initialized:
            await self.initialize()

        if not self._http_client:
            raise TelegramGatewayError("HTTP client not initialized")

        await self._attach_incident_thread_reply(method, payload)

        url = f"{self.api_url}/{method}"

        # OTEL Span: telegram.api.{method}
        with _tracer.start_as_current_span(
            f"telegram.api.{method}",
            attributes={
                "telegram.method": method,
                "telegram.chat_id": str(payload.get("chat_id", "")),
                "telegram.has_reply_markup": "reply_markup" in payload,
            },
        ) as span:
            try:
                response = await self._http_client.post(url, json=payload)
                response.raise_for_status()
                result = response.json()

                if not result.get("ok"):
                    span.set_attribute("telegram.error", result.get("description", "Unknown"))
                    span.set_status(trace.Status(trace.StatusCode.ERROR))
                    raise TelegramGatewayError(
                        f"Telegram API error: {result.get('description', 'Unknown error')}"
                    )

                # 成功: 記錄 message_id (result 可能是 dict 或 bool，需防禦)
                result_val = result.get("result")
                if isinstance(result_val, dict) and "message_id" in result_val:
                    span.set_attribute("telegram.message_id", result_val["message_id"])
                    await self._mirror_outbound_message(
                        method=method,
                        payload=payload,
                        provider_message_id=str(result_val["message_id"]),
                    )

                span.set_status(trace.Status(trace.StatusCode.OK))
                return result

            except httpx.HTTPStatusError as e:
                span.set_attribute("telegram.http_status", e.response.status_code)
                span.set_status(trace.Status(trace.StatusCode.ERROR))
                span.record_exception(
                    TelegramGatewayError(f"HTTP error: {e.response.status_code}")
                )
                logger.error("telegram_api_error", method=method, status=e.response.status_code,
                             response_body=e.response.text[:500])
                raise TelegramGatewayError(f"HTTP error: {e.response.status_code}") from None

            except TelegramGatewayError:
                # 已處理的錯誤，直接拋出
                raise

            except Exception as e:
                safe_error = _sanitize_telegram_error(str(e))
                span.set_status(trace.Status(trace.StatusCode.ERROR))
                span.record_exception(TelegramGatewayError(safe_error))
                logger.error(
                    "telegram_request_failed",
                    method=method,
                    error=safe_error,
                    error_type=type(e).__name__,
                )
                raise TelegramGatewayError(safe_error) from None

    async def _attach_incident_thread_reply(self, method: str, payload: dict) -> None:
        """將同一 Incident 的後續 Telegram 訊息接回原告警卡片。

        2026-05-07 Codex — 主卡 `tg_msg:{incident_id}` 已存在時，後續
        Runbook / escalation / 執行摘要不要再形成頂層訊息洪水，而是以
        Telegram reply thread 延續；主 ACTION REQUIRED 卡與已顯式 reply 的
        payload 不改動。
        """
        if payload.pop("_skip_incident_thread_reply", False):
            return

        if method != "sendMessage" or _has_reply_context(payload):
            return

        text = str(payload.get("text") or "")
        if not text or _is_root_action_required_card(text):
            return

        incident_id = _extract_incident_id_from_text(text)
        if not incident_id:
            return

        try:
            stored = await get_redis().get(f"tg_msg:{incident_id}")
        except Exception as exc:
            logger.debug(
                "telegram_incident_thread_lookup_failed",
                incident_id=incident_id,
                error=str(exc),
            )
            return

        if not stored:
            return

        try:
            message_id = int(stored)
        except (TypeError, ValueError):
            logger.debug(
                "telegram_incident_thread_invalid_message_id",
                incident_id=incident_id,
                stored=str(stored),
            )
            return

        payload["reply_parameters"] = {
            "message_id": message_id,
            "allow_sending_without_reply": True,
        }
        logger.info(
            "telegram_incident_thread_reply_attached",
            incident_id=incident_id,
            message_id=message_id,
        )

    async def _mirror_outbound_message(
        self,
        *,
        method: str,
        payload: dict,
        provider_message_id: str,
    ) -> None:
        """將 legacy Telegram 出站訊息鏡像到 AwoooP，不改變實際發送行為。"""
        if method != "sendMessage":
            return

        chat_id = str(payload.get("chat_id") or "")
        text = str(payload.get("text") or payload.get("caption") or "")
        if not chat_id or not text:
            return

        try:
            from src.core.context import get_current_project_id
            from src.db.base import get_db_context
            from src.services.channel_hub import record_outbound_message

            project_id = get_current_project_id() or "awoooi"
            run_id = _legacy_outbound_run_id(chat_id, provider_message_id)
            async with get_db_context(project_id) as db:
                await record_outbound_message(
                    db,
                    project_id=project_id,
                    run_id=run_id,
                    channel_type="telegram",
                    channel_chat_id=chat_id,
                    message_type=_infer_outbound_message_type(text, payload),
                    content=text,
                    source_envelope=_outbound_source_envelope(method, payload),
                    provider_message_id=provider_message_id,
                    send_status="sent",
                    triggered_by_state="legacy_gateway",
                    is_shadow=False,
                )
        except Exception as exc:
            logger.warning(
                "telegram_outbound_mirror_failed",
                method=method,
                chat_id=chat_id,
                provider_message_id=provider_message_id,
                error=str(exc),
            )

    async def _build_inline_keyboard(
        self,
        approval_id: str,
        include_auto_tuning: bool = True,
        auto_tuning_command: str = "",
        incident_id: str = "",
        # ADR-071-E: TYPE-3 動態按鈕 (2026-04-11 Claude Sonnet 4.6)
        alert_category: str = "",
        notification_type: str = "",
        # 2026-04-27 Claude Sonnet 4.6: B3 — LLM 動態按鈕（ActionPlan，可選）
        action_plan: object = None,
    ) -> dict:
        """
        建立 Inline Keyboard

        ADR-050 v2.0 (2026-04-01): 六鍵佈局
        ADR-071-E (2026-04-11): TYPE-3 依 alert_category 動態組合操作按鈕
        ADR-082 B3 (2026-04-27): USE_LLM_DYNAMIC_BUTTONS → 優先使用 Solver LLM 動態按鈕

        TYPE-3 按鈕對應 alert_category:
          k8s_workload  → [重啟] [擴容] [縮容] [回滾]
          database      → [終止慢查詢] [清連線池]
          host_resource → [查程序] [重啟服務] [清 Log]
          network       → [重載 Nginx] [查 Port]
          devops_tool   → [重啟服務] [查 Log]
          ai_system     → [切換 Provider]
          ssl_cert      → [更新憑證]
          (其他)        → [批准] [拒絕] (舊版通用鍵)

        Args:
            approval_id: 簽核單 ID (用於 nonce 生成)
            include_auto_tuning: 是否包含自動調優按鈕
            auto_tuning_command: kubectl 調優指令
            incident_id: 關聯 Incident ID (用於 detail/reanalyze/history 按鈕)
            alert_category: 告警類別 (ADR-071-E: 決定 TYPE-3 按鈕組合)
            notification_type: 通知類型 (TYPE-1/2/3/4/4D)
            action_plan: ActionPlan dataclass（B3: 有值且 USE_LLM_DYNAMIC_BUTTONS=true 時走 LLM 路徑）
        """
        # 產生 Nonce (防重放，用於寫操作)
        approve_nonce = self._security.generate_callback_nonce(approval_id, "approve")
        reject_nonce = self._security.generate_callback_nonce(approval_id, "reject")
        silence_nonce = self._security.generate_callback_nonce(approval_id, "silence")

        # 第一排永遠置頂（HARD RULE，任何路徑不得改動）
        first_row: list[dict] = [
            {"text": "✅ 批准", "callback_data": approve_nonce},
            {"text": "❌ 拒絕", "callback_data": reject_nonce},
        ]

        # ── B3: LLM 動態路徑 ─────────────────────────────────────────────────
        # 2026-04-27 Claude Sonnet 4.6: B3 — USE_LLM_DYNAMIC_BUTTONS=true 且
        # action_plan.recommended_actions 非空時走此路徑，否則 fallback 到 YAML。
        _llm_actions = (
            getattr(action_plan, "recommended_actions", None)
            if action_plan is not None
            else None
        )
        if USE_LLM_DYNAMIC_BUTTONS and _llm_actions:
            llm_rows = await self._build_llm_action_buttons(_llm_actions, incident_id=incident_id)
            buttons: list[list[dict]] = [first_row] + llm_rows
            logger.info(
                "telegram_keyboard_built",
                source="llm",
                action_count=len(_llm_actions),
            )

            # 自動調優按鈕 (v7.0)
            if include_auto_tuning and auto_tuning_command:
                tuning_nonce = self._security.generate_callback_nonce(approval_id, "tune")
                buttons.append([{"text": "⚡ 執行自動調優", "callback_data": tuning_nonce}])

            awooop_row = _awooop_runs_button_row(incident_id)
            if awooop_row:
                buttons.append(awooop_row)

            return {"inline_keyboard": buttons}

        # ── YAML Fallback 路徑（保留既有 callback 佈局，另補 AwoooP evidence deep link）────
        # 2026-04-14 Claude Sonnet 4.6 (Phase 5 Sprint 5.4):
        # 從 callback_action_spec registry 動態產生按鈕（原 _CATEGORY_BUTTONS hardcode 已下架）
        # 優點：新增按鈕只需改 yaml，callback_data 格式由 spec.callback_format 決定
        # 安全：yaml 裡每個 action 都有對應 MCP dispatcher handler（Sprint 5.2/5.3 實作）
        from src.services.callback_dispatcher import list_actions_for_category as _list_actions

        def _build_category_buttons_for(category: str) -> list[tuple[str, str]]:
            """從 registry 產生 (label, callback_data) list"""
            actions = _list_actions(category)
            btns: list[tuple[str, str]] = []
            for spec_it in actions:
                emoji_label = f"{spec_it.emoji} {spec_it.label}".strip()
                if spec_it.callback_format == "nonce":
                    # 寫類：產生 4-part nonce
                    cb = self._security.generate_callback_nonce(approval_id, spec_it.name)
                else:
                    # 查類：2-part info
                    cb = f"{spec_it.name}:{incident_id}"
                btns.append((emoji_label, cb))
            return btns

        is_type3 = notification_type in ("TYPE-3", NotificationType.TYPE_3, "")

        _dynamic_buttons = _build_category_buttons_for(alert_category) if alert_category else []

        if is_type3 and _dynamic_buttons:
            # TYPE-3 動態按鈕：批准/拒絕永遠置頂第一行
            # 2026-04-17 ogt + Claude Sonnet 4.6 (BUG-C): 強制置頂批准/拒絕
            # 舊：批准/拒絕列在最後且受 requires_human_approval 控制 → K8s 按鈕蓋台 → 死卡
            # 新：[批准][拒絕] 永遠第一行，K8s 類別按鈕置後，SRE 第一眼就看到審核扳機
            rows: list[list[dict]] = [first_row]
            # K8s/DB/Host 等類別操作按鈕（每行最多 3 個）置於第二列以後
            category_btns = [
                {"text": text, "callback_data": cb_data}
                for text, cb_data in _dynamic_buttons
            ]
            rows += [category_btns[i:i+3] for i in range(0, len(category_btns), 3)]
            # 通用操作：[詳情] [忽略]
            rows.append([
                {"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
                {"text": "🔕 忽略", "callback_data": silence_nonce},
            ])
            awooop_row = _awooop_runs_button_row(incident_id)
            if awooop_row:
                rows.append(awooop_row)
            buttons = rows
        else:
            # 舊版通用鍵（向下相容）
            buttons = [
                [
                    {"text": "✅ 批准", "callback_data": approve_nonce},
                    {"text": "❌ 拒絕", "callback_data": reject_nonce},
                    {"text": "🔕 靜默", "callback_data": silence_nonce},
                ],
            ]
            # 第二行: 資訊查詢按鈕 (ADR-050: read-only, format: action:incident_id)
            if incident_id:
                buttons.append([
                    {"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
                    {"text": "🔄 重診", "callback_data": f"reanalyze:{incident_id}"},
                    {"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
                ])
                awooop_row = _awooop_runs_button_row(incident_id)
                if awooop_row:
                    buttons.append(awooop_row)

        logger.info(
            "telegram_keyboard_built",
            source="yaml_fallback",
            action_count=len(_dynamic_buttons),
        )

        # 自動調優按鈕 (v7.0)
        if include_auto_tuning and auto_tuning_command:
            tuning_nonce = self._security.generate_callback_nonce(approval_id, "tune")
            buttons.append([
                {"text": "⚡ 執行自動調優", "callback_data": tuning_nonce}
            ])

        return {"inline_keyboard": buttons}

    async def _build_llm_action_buttons(
        self,
        actions: list,
        incident_id: str = "",
    ) -> list[list[dict]]:
        """
        2026-04-27 Claude Sonnet 4.6: B3 — 從 RecommendedAction list 建立 Telegram 按鈕排
        2026-04-27 Claude Sonnet 4.6: H3+M6 Fix — short_id Redis 映射 + critical 過濾
        2026-04-27 Claude Sonnet 4.6: P0 Fix — async setex 在 return 前完成，消除按鈕已過期 race

        規格：
        - critical risk action → 直接跳過，不生成按鈕（M6）
        - 每個 RecommendedAction → 一個按鈕
        - text = f"{action.emoji} {action.label}"（risk=high 前綴 ⚠️）
        - callback_data = f"la:{short_id}"（16-hex-chars，≤19 bytes，絕不截斷）（H3）
        - 完整 payload（含 incident_id）寫入 Redis tg:la:{short_id}，TTL=3600s（H3）
        - Redis setex 在 return 之前 await 完成（P0 race fix）
        - 每排最多 2 個（同 YAML fallback 排版）
        - 不包含第一排 [批准][拒絕]（由呼叫方負責置頂）

        Args:
            actions: list[RecommendedAction]
            incident_id: 真實 incident ID，寫入 Redis payload 供 callback handler 還原

        Returns:
            list[list[dict]]  — 按鈕行列（不含第一排）
        """
        import json  # noqa: PLC0415
        import secrets  # noqa: PLC0415

        btn_list: list[dict] = []
        redis_writes: list[tuple[str, str]] = []  # (key, json_str)

        for action in actions:
            name: str = getattr(action, "name", "")
            label: str = getattr(action, "label", "")
            emoji: str = getattr(action, "emoji", "")
            provider: str = getattr(action, "mcp_provider", "")
            tool: str = getattr(action, "mcp_tool", "")
            risk: str = getattr(action, "risk", "low")

            # M6: critical risk 直接跳過，不出按鈕
            # 2026-04-27 Claude Sonnet 4.6: M6 Fix — critical action 不可被 Telegram 觸發
            if risk == "critical":
                logger.info(
                    "llm_button_critical_skipped",
                    name=name,
                    mcp_tool=tool,
                )
                continue

            # risk=high 前綴 ⚠️ 警示
            prefix = "⚠️ " if risk == "high" else ""
            text = f"{prefix}{emoji} {label}".strip()

            # H3: 16-hex short_id（64-bit），callback_data ≤19 bytes
            short_id = secrets.token_hex(8)  # 16-hex-chars（P1: 4→8 bytes 防碰撞）
            cb_str = f"la:{short_id}"

            payload_str = json.dumps(
                {
                    "name": name,
                    "provider": provider,
                    "tool": tool,
                    "risk": risk,
                    "incident_id": incident_id,  # P0: 真實 incident_id 進 Redis
                },
                ensure_ascii=False,
                separators=(",", ":"),
            )
            redis_writes.append((f"tg:la:{short_id}", payload_str))

            btn_list.append({"text": text, "callback_data": cb_str})

        # P0 Fix: await 完成再 return，消除「按鈕發出→點擊→Redis 還沒寫」的 race
        if redis_writes:
            try:
                redis = get_redis()
                for key, value in redis_writes:
                    await redis.setex(key, 3600, value)
                logger.debug("llm_button_redis_written", count=len(redis_writes))
            except Exception as exc:
                # 2026-04-28 ogt + Claude Opus 4.7: P0-4 鬼魂按鈕守門
                # feedback_no_ghost_buttons.md 三缺一鐵律：callback 對應 short_id 找不到 = 鬼魂
                # Redis 寫入失敗 → LLM 動態按鈕的 callback_data 在 Redis 撈不到 payload → 鬼魂風險
                # 對策：清空 LLM 動態按鈕，caller (build_keyboard) 1488 行的 first_row 永遠保留
                #       (✅ 批准 / ❌ 拒絕 用 HMAC nonce，無狀態，不依賴 Redis)
                # 統帥仍可走核心通道，少了 LLM 推薦的 specific actions（可接受的降級）
                logger.error(
                    "llm_button_redis_write_failed_fallback_to_static",
                    error=str(exc),
                    dropped_count=len(btn_list),
                    hint="user will see only first_row (approve/reject), LLM-recommended actions dropped",
                )
                btn_list.clear()

        # 每排最多 2 個
        rows: list[list[dict]] = [btn_list[i:i+2] for i in range(0, len(btn_list), 2)]
        return rows

    async def send_analyzing_placeholder(
        self,
        alert_type: str,
        resource_name: str,
        severity: str = "medium",
    ) -> int | None:
        """
        P2.4 中間態推播 2026-04-24 ogt + Claude Sonnet 4.6
        在 LLM 分析開始前送出佔位卡，讓使用者知道系統正在處理。
        分析完成後用 delete_message() 刪除，再由 send_approval_card 補上完整卡。
        Returns: Telegram message_id 或 None（Bot 未設定 / API 失敗）
        """
        if not self.bot_token:
            return None
        emoji = {"critical": "🔴", "medium": "🟡", "low": "🟢"}.get(severity.lower(), "⚠️")
        text = (
            f"{emoji} <b>告警收到，AI 正在分析中...</b>\n\n"
            f"資源: <code>{html.escape(resource_name or 'unknown')}</code>\n"
            f"類型: <code>{html.escape(alert_type or 'unknown')}</code>\n\n"
            f"<i>預計 10-30 秒完成，請稍候...</i>"
        )
        try:
            result = await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": text,
                "parse_mode": "HTML",
                "disable_web_page_preview": True,
            })
            msg_id: int | None = None
            result_val = result.get("result")
            if isinstance(result_val, dict):
                msg_id = result_val.get("message_id")
            logger.info("analyzing_placeholder_sent", message_id=msg_id, resource=resource_name)
            return msg_id
        except Exception as e:
            logger.warning("analyzing_placeholder_failed", error=str(e))
            return None

    async def delete_message(self, message_id: int) -> bool:
        """
        P2.4 中間態清理 2026-04-24 ogt + Claude Sonnet 4.6
        刪除佔位卡（分析完成、完整卡已發出後呼叫）。
        """
        if not self.bot_token or not message_id:
            return False
        try:
            await self._send_request("deleteMessage", {
                "chat_id": self.alert_chat_id,
                "message_id": message_id,
            })
            logger.info("placeholder_deleted", message_id=message_id)
            return True
        except Exception as e:
            logger.warning("placeholder_delete_failed", message_id=message_id, error=str(e))
            return False

    async def send_approval_card(
        self,
        approval_id: str,
        risk_level: str,
        resource_name: str,
        root_cause: str,
        suggested_action: str,
        estimated_downtime: str = "~30s",
        # v6.0 AI 仲裁欄位
        primary_responsibility: str = "COLLAB",
        confidence: float = 0.0,
        namespace: str = "default",
        # v7.0 SignOz 整合
        signoz_rps: float = 0.0,
        signoz_rps_trend: str = "stable",
        signoz_error_rate: float = 0.0,
        signoz_p99_latency: float = 0.0,
        signoz_latency_trend: str = "stable",
        signoz_trace_url: str = "",
        auto_tuning_command: str = "",
        # 2026-03-29 ogt: AI Token/Cost 追蹤
        ai_tokens: int = 0,
        ai_cost: float = 0.0,
        # 2026-03-29 ogt: ADR-037 異常頻率統計
        anomaly_frequency: dict | None = None,
        # 2026-03-29 ogt: AI Provider 來源顯示
        ai_provider: str = "",
        # 2026-04-04 ogt: 底層模型名稱
        ai_model: str = "",
        # 2026-04-02 ogt: Phase 22 Nemotron 協作 (ADR-044)
        nemotron_enabled: bool = False,
        nemotron_tools: list[dict] | None = None,
        nemotron_validation: str = "",
        nemotron_latency_ms: float = 0.0,
        # 2026-04-09 Claude Sonnet 4.6: Tool Calling 模型/後端顯示
        nemotron_tool_model: str = "",
        nemotron_tool_backend: str = "",
        # 2026-04-05 Claude Code: incident_id 用於 detail/reanalyze/history 按鈕
        incident_id: str = "",
        # 2026-04-12 ogt: ADR-075 告警分類與通知類型（斷點 B 修復）
        alert_category: str = "",
        notification_type: str = "",
        # 2026-04-16 ogt + Claude Sonnet 4.6: 修復鏈路顯示 (ADR-076)
        playbook_name: str = "",
        automation_state: str = "",
    ) -> dict:
        """
        推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)

        SOUL.md 4.1 + AI 仲裁 + SignOz 訊息格式
        Phase 21 (ADR-037): 含異常頻率統計

        Args:
            approval_id: 簽核單 ID
            risk_level: 風險等級 (critical/medium/low)
            resource_name: 資源名稱
            root_cause: 根因摘要
            suggested_action: 建議操作
            estimated_downtime: 預計停機時間
            primary_responsibility: 責任團隊 (FE/BE/INFRA/DB/COLLAB)
            confidence: AI 信心度 (0.0-1.0)
            namespace: K8s namespace
            signoz_*: SignOz Gold Metrics
            signoz_trace_url: 動態時間參數的 Trace URL
            auto_tuning_command: kubectl 調優指令
            anomaly_frequency: 異常頻率統計 (ADR-037)

        Returns:
            dict: Telegram API 回應
        """
        # 取得狀態 Emoji
        emoji = RISK_EMOJI_MAP.get(risk_level.lower(), "⚠️")

        # 建立 SignOz 指標區塊
        signoz_metrics = None
        if signoz_rps > 0 or signoz_error_rate > 0 or signoz_p99_latency > 0:
            signoz_metrics = SignOzMetricsBlock(
                rps=signoz_rps,
                rps_trend=signoz_rps_trend,
                error_rate=signoz_error_rate,
                p99_latency_ms=signoz_p99_latency,
                latency_trend=signoz_latency_trend,
                trace_url=signoz_trace_url,
            )

        automation_quality: dict | None = None
        remediation_summary = await _fetch_remediation_summary_for_card(
            approval_id=approval_id,
            incident_id=incident_id,
        )
        if incident_id:
            try:
                from src.services.awooop_truth_chain_service import fetch_truth_chain

                truth_chain = await asyncio.wait_for(
                    fetch_truth_chain(
                        source_id=incident_id,
                        project_id="awoooi",
                    ),
                    timeout=2.5,
                )
                quality = truth_chain.get("automation_quality")
                if isinstance(quality, dict):
                    automation_quality = quality
            except Exception as truth_exc:
                logger.debug(
                    "telegram_approval_card_truth_chain_fetch_failed",
                    approval_id=approval_id,
                    incident_id=incident_id,
                    error=str(truth_exc),
                )

        # 建立訊息結構 (含 AI 仲裁 + SignOz + Token/Cost + 頻率統計)
        message = TelegramMessage(
            status_emoji=emoji,
            risk_level=risk_level.upper(),
            resource_name=resource_name,
            root_cause=root_cause,
            suggested_action=suggested_action,
            estimated_downtime=estimated_downtime,
            approval_id=approval_id,
            incident_id=incident_id,
            primary_responsibility=primary_responsibility,
            confidence=confidence,
            namespace=namespace,
            signoz_metrics=signoz_metrics,
            signoz_trace_url=signoz_trace_url,
            auto_tuning_command=auto_tuning_command,
            # 2026-03-29 ogt: AI Token/Cost 追蹤
            ai_tokens=ai_tokens,
            ai_cost=ai_cost,
            # 2026-03-29 ogt: ADR-037 異常頻率統計
            anomaly_frequency=anomaly_frequency,
            # 2026-03-29 ogt: AI Provider 來源顯示
            ai_provider=ai_provider,
            # 2026-04-04 ogt: 底層模型名稱
            ai_model=ai_model,
            # 2026-04-02 ogt: Phase 22 Nemotron 協作 (ADR-044)
            nemotron_enabled=nemotron_enabled,
            nemotron_tools=nemotron_tools,
            nemotron_validation=nemotron_validation,
            nemotron_latency_ms=nemotron_latency_ms,
            # 2026-04-09 Claude Sonnet 4.6: Tool Calling 模型/後端
            nemotron_tool_model=nemotron_tool_model,
            nemotron_tool_backend=nemotron_tool_backend,
            # 2026-04-16 ogt + Claude Sonnet 4.6: 修復鏈路顯示 (ADR-076)
            alert_category=alert_category,
            playbook_name=playbook_name,
            automation_state=automation_state,
            automation_quality=automation_quality,
            remediation_summary=remediation_summary,
        )

        # 格式化訊息 — Phase 22: 如果 Nemotron 啟用，使用雙軌格式
        text = message.format_with_nemotron() if nemotron_enabled else message.format()

        # 建立按鈕 (含自動調優)
        # 2026-04-05 Claude Code: 傳入 incident_id 以啟用 detail/reanalyze/history 按鈕
        # ADR-075: 傳入 alert_category/notification_type 以啟用分類動態按鈕（斷點 B 修復）
        keyboard = await self._build_inline_keyboard(
            approval_id=approval_id,
            include_auto_tuning=bool(auto_tuning_command),
            auto_tuning_command=auto_tuning_command,
            incident_id=incident_id,
            alert_category=alert_category,
            notification_type=notification_type,
        )

        # 發送訊息：2026-04-30 統帥指示，告警卡片完整切到 SRE 戰情室群組。
        target_chat_id = self.alert_chat_id
        if not target_chat_id:
            logger.warning("telegram_approval_card_skipped", reason="alert_chat_id_missing")
            return {}
        payload = {
            "chat_id": target_chat_id,
            "text": text,
            "parse_mode": "HTML",
            "reply_markup": keyboard,
            "disable_web_page_preview": True,  # 避免 SignOz URL 預覽
        }

        logger.info(
            "telegram_approval_card_sending",
            approval_id=approval_id,
            risk_level=risk_level,
            resource=resource_name,
            target_chat_id=str(target_chat_id),
            signoz_integrated=signoz_metrics is not None,
            auto_tuning_available=bool(auto_tuning_command),
        )

        result = await self._send_request("sendMessage", payload)

        _msg_id = result.get("result", {}).get("message_id")
        logger.info(
            "telegram_approval_card_sent",
            approval_id=approval_id,
            message_id=_msg_id,
            target_chat_id=str(target_chat_id),
        )

        # 2026-04-18 ADR-090-D: 寫入 notification_outcomes (MASTER §7.1 #10 KPI)
        try:
            from sqlalchemy import text as _sql
            from src.db.base import get_db_context
            _delivered = "delivered" if _msg_id else "failed"
            _notif_type = f"TYPE-3-{alert_category}" if alert_category else "TYPE-3"
            async with get_db_context() as _db:
                await _db.execute(
                    _sql("""
                        INSERT INTO notification_outcomes (
                            approval_id, channel, notification_type, recipient,
                            message_id, delivery_status, metadata
                        ) VALUES (
                            :aid, 'telegram', :nt, :rp,
                            :mid, :ds, CAST(:md AS jsonb)
                        )
                    """),
                    {
                        "aid": approval_id,
                        "nt": _notif_type,
                        "rp": str(target_chat_id),
                        "mid": str(_msg_id) if _msg_id else None,
                        "ds": _delivered,
                        "md": '{"risk_level":"' + str(risk_level) + '"}',
                    },
                )
        except Exception as _db_e:
            logger.warning("notification_outcomes_db_write_failed", error=str(_db_e))

        # 2026-04-19 ogt + Claude Opus 4.7: 修 AP-1 — message_id 同時存進
        # approval_records.telegram_message_id,不只 Redis(重啟會丟)
        if _msg_id:
            try:
                from src.services.approval_db import get_approval_service
                _svc = get_approval_service()
                if hasattr(_svc, "update_telegram_message"):
                    # 若有 update_telegram_message 方法(通常用 incident_id)
                    # 先用 incident_id 更新,再 fallback 直接 UPDATE approval_records
                    from sqlalchemy import text as _sql2
                    from src.db.base import get_db_context as _gdc
                    async with _gdc() as _db2:
                        await _db2.execute(
                            _sql2("""
                                UPDATE approval_records
                                SET telegram_message_id = :mid,
                                    telegram_chat_id = :cid
                                WHERE id = :aid
                            """),
                            {
                                "mid": int(_msg_id),
                                "cid": int(target_chat_id),
                                "aid": str(approval_id),
                            },
                        )
            except Exception as _db_e2:
                logger.warning("approval_tg_msg_id_db_persist_failed",
                               approval_id=str(approval_id), error=str(_db_e2))

        # 2026-04-10 Claude Sonnet 4.6 Asia/Taipei: 儲存 message_id 供自動修復後更新卡片
        # key: tg_approval:{approval_id}，TTL 24h
        if _msg_id:
            try:
                await get_redis().setex(f"tg_approval:{approval_id}", 86400, str(_msg_id))
                if incident_id:
                    await get_redis().setex(f"tg_msg:{incident_id}", 86400, str(_msg_id))
            except Exception as _e:
                logger.warning("tg_approval_msg_id_store_failed", approval_id=approval_id, error=str(_e))

        return result

    async def _send_approval_card_to_group(
        self,
        approval_id: str,
        risk_level: str,
        resource_name: str,
        root_cause: str,
        suggested_action: str,
        estimated_downtime: str = "~30s",
        primary_responsibility: str = "COLLAB",
        confidence: float = 0.0,
        namespace: str = "default",
        signoz_rps: float = 0.0,
        signoz_rps_trend: str = "stable",
        signoz_error_rate: float = 0.0,
        signoz_p99_latency: float = 0.0,
        signoz_latency_trend: str = "stable",
        signoz_trace_url: str = "",
        auto_tuning_command: str = "",
        ai_tokens: int = 0,
        ai_cost: float = 0.0,
        anomaly_frequency: dict | None = None,
        ai_provider: str = "",
        ai_model: str = "",
        nemotron_enabled: bool = False,
        nemotron_tools: list[dict] | None = None,
        nemotron_validation: str = "",
        nemotron_latency_ms: float = 0.0,
        incident_id: str = "",
        alert_category: str = "",
        notification_type: str = "",
    ) -> None:
        """
        發送告警卡片到 SRE 群組 — 與個人 chat 相同的完整 v7.0 格式

        2026-04-05 ogt: 升級為完整格式（含 SignOz/AI/Nemotron），移除精簡版
        由 asyncio.create_task 非同步呼叫，失敗不影響主告警流程。
        """
        try:
            emoji = RISK_EMOJI_MAP.get(risk_level.lower(), "⚠️")

            signoz_metrics = None
            if signoz_rps > 0 or signoz_error_rate > 0 or signoz_p99_latency > 0:
                signoz_metrics = SignOzMetricsBlock(
                    rps=signoz_rps,
                    rps_trend=signoz_rps_trend,
                    error_rate=signoz_error_rate,
                    p99_latency_ms=signoz_p99_latency,
                    latency_trend=signoz_latency_trend,
                    trace_url=signoz_trace_url,
                )

            remediation_summary = await _fetch_remediation_summary_for_card(
                approval_id=approval_id,
                incident_id=incident_id,
            )
            message = TelegramMessage(
                status_emoji=emoji,
                risk_level=risk_level.upper(),
                resource_name=resource_name,
                root_cause=root_cause,
                suggested_action=suggested_action,
                estimated_downtime=estimated_downtime,
                approval_id=approval_id,
                incident_id=incident_id,
                primary_responsibility=primary_responsibility,
                confidence=confidence,
                namespace=namespace,
                signoz_metrics=signoz_metrics,
                signoz_trace_url=signoz_trace_url,
                auto_tuning_command=auto_tuning_command,
                ai_tokens=ai_tokens,
                ai_cost=ai_cost,
                anomaly_frequency=anomaly_frequency,
                ai_provider=ai_provider,
                ai_model=ai_model,
                nemotron_enabled=nemotron_enabled,
                nemotron_tools=nemotron_tools,
                nemotron_validation=nemotron_validation,
                nemotron_latency_ms=nemotron_latency_ms,
                remediation_summary=remediation_summary,
            )
            text = message.format_with_nemotron() if nemotron_enabled else message.format()

            # 2026-04-25 ogt + Claude Sonnet 4.6: 群組卡片使用完整 _build_inline_keyboard
            # 統帥決策: 群組成員為受信任 SRE，完整批准/拒絕/暫默/詳情/重診/歷史按鈕從 DM 移植至群組
            _group_keyboard = await self._build_inline_keyboard(
                approval_id=approval_id,
                incident_id=incident_id,
                alert_category=alert_category,
                notification_type=notification_type,
            )
            resp = await self.send_to_group(text=text, reply_markup=_group_keyboard)

            # 2026-04-10 Claude Sonnet 4.6: 儲存 message_id 到 Redis，供 append_incident_update 使用
            # tg_msg:{incident_id} → Telegram message_id (TTL 24h)
            if incident_id and resp:
                tg_message_id = (resp.get("result") or {}).get("message_id") or resp.get("message_id")
                if tg_message_id:
                    from src.core.redis_client import get_redis
                    redis = get_redis()
                    await redis.set(f"tg_msg:{incident_id}", str(tg_message_id), ex=86400)
                    logger.info("tg_msg_id_stored", incident_id=incident_id, message_id=tg_message_id)
        except Exception as e:
            logger.error("send_approval_card_to_group_failed", error=str(e))

    # =========================================================================
    # ADR-071-C: TYPE-1 純資訊通知 (2026-04-11 Claude Sonnet 4.6)
    # =========================================================================

    async def send_info_notification(
        self,
        incident_id: str,
        title: str,
        message: str,
        alertname: str = "",
        severity: str = "info",
    ) -> dict:
        """
        TYPE-1 純資訊通知 — FYI 類告警

        用於: severity=info 成功類 / Backup 完成 / AlertChainHealthy 等
        格式: 簡潔文字 + [詳情][歷史] 查類按鈕（read-only，2-part info 格式，ADR-050）

        2026-04-25 ogt + Claude Sonnet 4.6: 補充 read-only 按鈕（鬼魂按鈕鐵律：
        detail/history 已有 handler 且無副作用，符合三條件才加）

        Args:
            incident_id: 事件 ID
            title: 訊息標題
            message: 訊息內容
            alertname: 告警名稱
            severity: 嚴重度 (info/success)
        """
        severity_emoji = {"info": "ℹ️", "success": "✅"}.get(severity, "ℹ️")
        text = (
            f"{severity_emoji} <b>{html.escape(title)}</b>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📋 <code>{html.escape(incident_id)}</code>\n"
        )
        if alertname:
            text += f"🔔 告警: <code>{html.escape(alertname)}</code>\n"
        text += f"\n{html.escape(message)}"

        # read-only 查類按鈕（2-part info 格式，handler 已在 handle_callback 實作）
        # detail/history 均在 INFO_ACTIONS 白名單，無 nonce 無副作用
        inline_keyboard = [[
            {"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
            {"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
        ]]
        awooop_row = _awooop_runs_button_row(incident_id)
        if awooop_row:
            inline_keyboard.append(awooop_row)
        keyboard = {"inline_keyboard": inline_keyboard}
        return await self._send_request(
            "sendMessage",
            {
                "chat_id": self.alert_chat_id,
                "text": text,
                "parse_mode": "HTML",
                "reply_markup": keyboard,
            },
        )

    # =========================================================================
    # ADR-071-F: TYPE-4D Config Drift 專屬卡片 (2026-04-11 Claude Sonnet 4.6)
    # =========================================================================

    async def send_drift_card(
        self,
        incident_id: str,
        approval_id: str,
        resource_name: str,
        diff_summary: str,
        detected_at: str = "",
    ) -> dict:
        """
        TYPE-4D Config Drift 通知卡片

        按鈕: [查看Diff] [採納變更] [回滾] [忽略]
        Diff 長度 > 500 字 → 改送 Web 連結，避免 Telegram 訊息過長

        Args:
            incident_id: 事件 ID
            approval_id: 簽核單 ID (用於 nonce 生成)
            resource_name: 漂移的資源名稱
            diff_summary: Diff 摘要文字
            detected_at: 偵測時間
        """
        # 2026-04-19 Claude Opus 4.7 修 diff_summary 被 <pre> 包成 code block (copy 按鈕 UI)
        # 根因：<pre> 在 Telegram HTML mode 渲染為 code block，但 diff_summary 是 AI
        #      研判敘述 + emoji 清單（非 code），應以純文字顯示
        # Diff 長度處理 (ADR-071, Section 14.9.6)
        # 2026-04-20 P0.2 ogt + Claude Opus 4.7: 500 → 1500 讓 AI 建議 + narrative + items 完整顯示
        if len(diff_summary) <= 1500:
            diff_block = f"\n━━━━━━━━━━━━━━━━━━━\n{html.escape(diff_summary)}"
        else:
            web_url = f"https://aiops.wooo.work/incidents/{incident_id}/drift-diff"
            diff_block = f"\n⚠️ 差異過大（{len(diff_summary)} 字）\n🔗 <a href='{web_url}'>查看完整 Diff</a>"

        text = (
            f"⚙️ <b>Config Drift 偵測</b>\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📋 <code>{html.escape(incident_id)}</code>\n"
            f"🎯 資源: <code>{html.escape(resource_name[:50])}</code>\n"
        )
        if detected_at:
            text += f"🕐 偵測時間: {html.escape(detected_at)}\n"
        text += diff_block

        # 按鈕組合 (TYPE-4D 固定四鍵)
        view_nonce   = self._security.generate_callback_nonce(approval_id, "drift_view")
        adopt_nonce  = self._security.generate_callback_nonce(approval_id, "drift_adopt")
        revert_nonce = self._security.generate_callback_nonce(approval_id, "drift_revert")
        ignore_nonce = self._security.generate_callback_nonce(approval_id, "silence")

        keyboard = {
            "inline_keyboard": [
                [
                    {"text": "🔍 查看 Diff", "callback_data": view_nonce},
                    {"text": "✅ 採納變更", "callback_data": adopt_nonce},
                ],
                [
                    {"text": "⏪ 回滾", "callback_data": revert_nonce},
                    {"text": "🔕 忽略", "callback_data": ignore_nonce},
                ],
            ]
        }

        _result = await self._send_request(
            "sendMessage",
            {
                "chat_id": self.alert_chat_id,
                "text": text,
                "parse_mode": "HTML",
                "reply_markup": keyboard,
            },
        )

        # 2026-04-19 ogt + Claude Opus 4.7: 修 TG-4 存 drift message_id 到 Redis
        # 供 drift_adopt/drift_revert 執行後 edit 回原卡片
        try:
            _msg_id = _result.get("result", {}).get("message_id")
            if _msg_id:
                await get_redis().setex(
                    f"tg_drift:{incident_id}", 86400, str(_msg_id)
                )
        except Exception as _e:
            logger.warning("tg_drift_msg_id_store_failed", incident_id=incident_id, error=str(_e))

        return _result

    # =========================================================================
    # 2026-04-19 ogt + Claude Opus 4.7: drift_* 按鈕 handler (修 TG-2)
    # =========================================================================

    async def _handle_drift_action(
        self,
        action: str,
        approval_id: str,
        callback_query_id: str,
        user_id: int,
        username: str,
        user: dict,
    ) -> dict:
        """
        處理 drift_view / drift_adopt / drift_revert 按鈕。
        approval_id 在 drift card 即 report_id (send_drift_card 設計)。
        """
        report_id = approval_id
        logger.info(
            "drift_callback_dispatched",
            action=action, report_id=report_id,
            user_id=user_id, username=username,
        )
        try:
            if action == "drift_view":
                await self._answer_callback(callback_query_id, action, text="🔍 撈全部 Diff...")
                await self._send_drift_diff_detail(report_id)
                return {
                    "action": action, "approval_id": approval_id,
                    "user": user, "success": True, "info_action": True,
                }

            if action == "drift_adopt":
                await self._answer_callback(callback_query_id, action, text="✅ 採納中...")
                try:
                    from src.services.drift_adopt_service import get_drift_adopt_service
                    _adopt_result = await get_drift_adopt_service().adopt_drift(report_id)
                    _ok = bool(_adopt_result.get("success") if isinstance(_adopt_result, dict) else _adopt_result)
                except Exception as _e:
                    logger.warning("drift_adopt_failed", report_id=report_id, error=str(_e))
                    _ok = False
                await self._edit_drift_card_outcome(
                    report_id=report_id, verb="已採納", by=username, ok=_ok,
                )
                return {"action": action, "approval_id": approval_id, "user": user, "success": _ok}

            if action == "drift_revert":
                await self._answer_callback(callback_query_id, action, text="⏪ 回滾中...")
                try:
                    from src.services.drift_remediator import get_drift_remediator
                    _revert_result = await get_drift_remediator().revert(report_id)
                    _ok = bool(_revert_result.get("success") if isinstance(_revert_result, dict) else _revert_result)
                except Exception as _e:
                    logger.warning("drift_revert_failed", report_id=report_id, error=str(_e))
                    _ok = False
                await self._edit_drift_card_outcome(
                    report_id=report_id, verb="已回滾", by=username, ok=_ok,
                )
                return {"action": action, "approval_id": approval_id, "user": user, "success": _ok}

        except Exception as _outer:
            logger.exception("drift_action_handler_error", action=action, error=str(_outer))

        return {"action": action, "approval_id": approval_id, "user": user, "success": False}

    # 2026-04-20 P0.1 ogt + Claude Opus 4.7: drift_view 分頁 + 分類桶
    # 原邏輯: _send_drift_diff_detail 一次列 3800 字元 → 30 項洗版
    # 新邏輯: 分頁 10 項/頁、header 顯示 3 桶分類計數、⬅️/➡️ 按鈕切頁
    _DRIFT_PAGE_SIZE = 10

    def _classify_drift_item(self, item) -> str:
        """
        分類 drift item 到 3 桶（規則式，不走 LLM 省 token）:
          - k8s_default: K8s controller 自動補齊（白名單或空↔空）
          - human_high: HIGH level 且非 trivial（像是 image/env/ports 被人工改）
          - routine_medium: MEDIUM 非 trivial（一般設定調整）
        """
        level = getattr(item.drift_level, "value", str(item.drift_level))
        # 白名單或 trivial → K8s 自動補齊
        if item.is_allowlisted:
            return "k8s_default"
        _g, _a = item.git_value, item.actual_value
        _empty_g = _g is None or str(_g).strip() in ("", "{}", "[]", "null", "None")
        _empty_a = _a is None or str(_a).strip() in ("", "{}", "[]", "null", "None")
        if _empty_g and _empty_a:
            return "k8s_default"
        if level == "high":
            return "human_high"
        return "routine_medium"

    async def _send_drift_diff_detail(self, report_id: str, page: int = 0) -> None:
        """
        送分頁 Drift Diff 到 Telegram (drift_view / drift_view_page 按鈕回應)

        每頁 _DRIFT_PAGE_SIZE 項，header 顯示 3 桶分類計數 + 分頁位置，
        底部含「⬅️ 上頁 / 下頁 ➡️」按鈕 (callback: drift_view_page:{report_id}_{page})。
        """
        try:
            from src.repositories.drift_repository import get_drift_repository
            _rpt = await get_drift_repository().get_by_id(report_id)
            if not _rpt:
                await self._send_request("sendMessage", {
                    "chat_id": self.alert_chat_id,
                    "text": f"⚠️ 找不到 Drift report <code>{html.escape(report_id)}</code>",
                    "parse_mode": "HTML",
                })
                return

            # 1. 分類 & 排序（HIGH 優先 → routine → trivial）
            _classified: list[tuple[str, object]] = [
                (self._classify_drift_item(_it), _it) for _it in _rpt.items
            ]
            _bucket_order = {"human_high": 0, "routine_medium": 1, "k8s_default": 2}
            _classified.sort(key=lambda x: _bucket_order[x[0]])

            _bucket_counts = {"human_high": 0, "routine_medium": 0, "k8s_default": 0}
            for _bk, _ in _classified:
                _bucket_counts[_bk] += 1

            _total = len(_classified)
            _total_pages = max(1, (_total + self._DRIFT_PAGE_SIZE - 1) // self._DRIFT_PAGE_SIZE)
            _page = max(0, min(page, _total_pages - 1))
            _start = _page * self._DRIFT_PAGE_SIZE
            _end = min(_start + self._DRIFT_PAGE_SIZE, _total)
            _slice = _classified[_start:_end]

            # 2. Header（AI 分類桶）
            _header = [
                f"📊 <b>Drift Diff (頁 {_page + 1}/{_total_pages})</b> — <code>{html.escape(report_id)[:24]}</code>",
                f"Namespace: <code>{html.escape(_rpt.namespace)}</code>",
                (
                    f"🔴 人工高風險 {_bucket_counts['human_high']} | "
                    f"🟡 一般修改 {_bucket_counts['routine_medium']} | "
                    f"🔧 K8s 自動 {_bucket_counts['k8s_default']}"
                ),
                "━" * 20,
            ]
            _lines = list(_header)
            _MAX_LEN = 3800
            _used_len = sum(len(s) + 1 for s in _header)

            # 3. 本頁項目（每項仍守 _MAX_LEN 上限，極端長值時寧可提早中斷也不洗版）
            _rendered = 0
            _bucket_emoji = {"human_high": "🔴", "routine_medium": "🟡", "k8s_default": "🔧"}
            for _bk, _item in _slice:
                _emoji = _bucket_emoji[_bk]
                _field = (_item.field_path or "")[:80]
                _git = str(_item.git_value)[:40] if _item.git_value is not None else "(未設)"
                _k8s = str(_item.actual_value)[:40] if _item.actual_value is not None else "(未設)"
                _block = (
                    f"{_emoji} <b>{html.escape(_field)}</b>\n"
                    f"   Git: <code>{html.escape(_git)}</code>\n"
                    f"   K8s: <code>{html.escape(_k8s)}</code>"
                )
                if _used_len + len(_block) + 1 > _MAX_LEN:
                    break
                _lines.append(_block)
                _used_len += len(_block) + 1
                _rendered += 1

            _skipped_in_page = len(_slice) - _rendered
            if _skipped_in_page > 0:
                _lines.append(f"… 本頁還有 {_skipped_in_page} 項過長未顯示，請縮小 field 範圍")

            _full = "\n".join(_lines)

            # 4. 分頁按鈕（INFO_ACTIONS 2-part 格式，payload 用底線分隔 report_id 與 page）
            _rows = []
            _nav = []
            if _page > 0:
                _nav.append({
                    "text": "⬅️ 上頁",
                    "callback_data": f"drift_view_page:{report_id}_{_page - 1}",
                })
            if _page < _total_pages - 1:
                _nav.append({
                    "text": "下頁 ➡️",
                    "callback_data": f"drift_view_page:{report_id}_{_page + 1}",
                })
            if _nav:
                _rows.append(_nav)
            _keyboard = {"inline_keyboard": _rows} if _rows else None

            _payload = {
                "chat_id": self.alert_chat_id,
                "text": _full,
                "parse_mode": "HTML",
                "disable_web_page_preview": True,
            }
            if _keyboard:
                _payload["reply_markup"] = _keyboard
            await self._send_request("sendMessage", _payload)
        except Exception as _e:
            logger.warning("drift_diff_detail_send_failed", report_id=report_id, page=page, error=str(_e))
            await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": f"⚠️ Drift Diff 查詢失敗: <code>{html.escape(str(_e)[:150])}</code>",
                "parse_mode": "HTML",
            })

    async def _handle_ai_advisory_action(
        self,
        action: str,
        advisory_payload: str,  # 格式: '{type}:{id}'
        callback_query_id: str,
        user_id: int,
        username: str,
        user: dict,
        message_id: int | None = None,
    ) -> dict:
        """
        2026-04-19 P0 修 (ADR-092): 處理 4 LLM scanner 的互動按鈕.

        action: ai_advisory_handled / ai_advisory_snooze / ai_advisory_view / ai_advisory_produce_cmd
        advisory_payload: '{advisory_type}:{advisory_id}' (nonce 解析後的 approval_id 位置)

        流程:
          1. 解析 payload → advisory_type + advisory_id
          2. 呼叫 ai_advisory_helpers.handle_ai_advisory_callback
          3. answer_callback (Telegram 按鈕回饋 toast)
          4. 編輯原訊息尾部加「✅ 已處理 by user@時間」
        """
        try:
            # 解析 '{type}:{id}'
            if ":" in advisory_payload:
                advisory_type, advisory_id = advisory_payload.split(":", 1)
            else:
                advisory_type, advisory_id = "unknown", advisory_payload

            # action 去掉 'ai_advisory_' 前綴 → 得到純 action 名 (handled/snooze/view/produce_cmd)
            pure_action = action.replace("ai_advisory_", "", 1)

            logger.info(
                "ai_advisory_callback",
                action=pure_action, advisory_type=advisory_type,
                advisory_id=advisory_id, user=username,
            )

            from src.services.ai_advisory_helpers import handle_ai_advisory_callback
            result = await handle_ai_advisory_callback(
                action=pure_action,
                advisory_type=advisory_type,
                advisory_id=advisory_id,
                username=username,
            )

            feedback_text = result.get("feedback_text", "已收到")
            await self._answer_callback(callback_query_id, action, text=feedback_text)

            # 2026-04-22 Claude Sonnet 4.6: 發群組 reply（toast 2-3 秒消失，群組才是永久可見）
            if message_id and feedback_text:
                try:
                    await self._send_request("sendMessage", {
                        "chat_id": self.alert_chat_id,
                        "text": feedback_text,
                        "reply_to_message_id": message_id,
                    })
                    logger.info("ai_advisory_group_reply_sent", action=pure_action, message_id=message_id)
                except Exception as _ge:
                    logger.warning("ai_advisory_group_reply_failed", action=pure_action, error=str(_ge))

            return {
                "action": action, "advisory_type": advisory_type, "advisory_id": advisory_id,
                "user": user, "success": result.get("success", False),
                "info_action": pure_action in ("view", "produce_cmd"),
            }
        except Exception as _e:
            logger.exception("ai_advisory_callback_error", action=action, error=str(_e))
            try:
                await self._answer_callback(callback_query_id, action, text="⚠️ 處理失敗")
            except Exception:
                pass
            return {"action": action, "user": user, "success": False}

    async def _edit_drift_card_outcome(
        self, report_id: str, verb: str, by: str, ok: bool,
    ) -> None:
        """
        drift_adopt/drift_revert 執行後:
          1. 原卡片移除按鈕（用 editMessageReplyMarkup）
          2. 在原卡片下 reply 執行結果訊息（包含 verb/by/成功失敗）
        """
        _icon = "✅" if ok else "❌"
        _stamp = (
            f"{_icon} <b>{html.escape(verb)}</b> by @{html.escape(by)} "
            f"({'成功' if ok else '失敗'})\n"
            f"Drift <code>{html.escape(report_id)}</code>"
        )
        _msg_id: int | None = None
        try:
            _msg_id_raw = await get_redis().get(f"tg_drift:{report_id}")
            if _msg_id_raw:
                _msg_id = int(_msg_id_raw)
                # 先移除按鈕
                await self._send_request("editMessageReplyMarkup", {
                    "chat_id": self.alert_chat_id,
                    "message_id": _msg_id,
                    "reply_markup": {"inline_keyboard": []},
                })
        except Exception as _e:
            logger.warning("drift_card_buttons_remove_failed", report_id=report_id, error=str(_e))

        # 送簽核戳訊息（reply_to 原卡片,若有 msg_id）
        try:
            _payload: dict = {
                "chat_id": self.alert_chat_id,
                "text": _stamp,
                "parse_mode": "HTML",
            }
            if _msg_id:
                _payload["reply_to_message_id"] = _msg_id
            await self._send_request("sendMessage", _payload)
        except Exception as _e:
            logger.warning("drift_outcome_stamp_send_failed", report_id=report_id, error=str(_e))

    # =========================================================================
    # ADR-075: TYPE-8M Meta-System 告警（飛輪/告警鏈路健康）
    # 2026-04-12 ogt
    # =========================================================================

    async def send_meta_alert(
        self,
        incident_id: str,
        approval_id: str,
        alertname: str,
        alert_category: str,  # "flywheel_health" or "alertchain_health"
        diagnosis: str,
        severity_level: str = "critical",
        system_impact: str = "",
        probable_cause: str = "",
    ) -> dict:
        """
        TYPE-8M Meta-System 告警 — 飛輪或告警鏈路自身健康異常。

        適用: FlywheelPlaybookZero / AlertChainBroken_* 等
        按鈕: 固定 3 個（[觸發診斷] [查看面板] [靜默]）
        正式收件通道為 AwoooI SRE 戰情室群組。
        """
        severity_emoji = "🔴" if severity_level == "critical" else "🟠"
        category_label = "飛輪核心異常" if alert_category == "flywheel_health" else "告警鏈路異常"

        text = (
            f"⚙️ META SYSTEM | {severity_emoji} {category_label}\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📋 <code>{html.escape(incident_id)}</code>\n"
            f"🚨 異常元件：<b>{html.escape(alertname)}</b>\n"
            f"🎯 診斷結果：{html.escape(_smart_truncate(diagnosis, 320))}\n"
        )
        if system_impact:
            text += f"\n🧠 <b>系統影響</b>\n{html.escape(_smart_truncate(system_impact, 320))}\n"
        if probable_cause:
            text += f"└─ 可能根因：{html.escape(_smart_truncate(probable_cause, 320))}\n"

        # 2026-04-16 ogt: 移除 flywheel_diag / flywheel_dashboard (3-part ghost button，無 handler)
        # 鐵律: 寧可沒按鈕，不可有死按鈕 (feedback_no_ghost_buttons.md)
        silence_nonce = self._security.generate_callback_nonce(approval_id, "silence")
        keyboard = {
            "inline_keyboard": [
                [
                    {"text": "🔕 靜默 1h", "callback_data": silence_nonce},
                ],
            ]
        }

        return await self._send_request(
            "sendMessage",
            {
                "chat_id": self.alert_chat_id,
                "text": text,
                "parse_mode": "HTML",
                "reply_markup": keyboard,
            },
        )

    async def send_secops_card(
        self,
        incident_id: str,
        approval_id: str,
        alertname: str,
        threat_level: str,
        source: str = "",
        threat_behavior: str = "",
        defense_action: str = "",
        resource: str = "",
    ) -> dict:
        """
        TYPE-5S SecOps 資安威脅告警卡片。

        ADR-075 (2026-04-12 ogt)
        按鈕: [隔離資源] [封鎖來源IP] [強制驅逐] [確認授權]
        正式收件通道為 AwoooI SRE 戰情室群組。
        """
        level_icon = {"critical": "🔴", "warning": "🟠"}.get(threat_level.lower(), "⚠️")

        text = (
            f"🥷 SECOPS | {level_icon} 資安威脅\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📋 <code>{html.escape(incident_id)}</code>\n"
            f"🚨 威脅類型：<b>{html.escape(alertname)}</b>\n"
        )
        if resource:
            text += f"🎯 受害資源：<code>{html.escape(resource)}</code>\n"
        text += "\n🧠 <b>AI 威脅分析</b>\n"
        if source:
            text += f"├─ 來源：{html.escape(source)}\n"
        if threat_behavior:
            text += f"├─ 異常行為：{html.escape(threat_behavior[:200])}\n"
        text += f"└─ 風險評估：<b>{html.escape(threat_level)}</b>\n"
        if defense_action:
            text += f"\n🛡️ <b>建議防禦動作</b>\n<code>{html.escape(defense_action[:200])}</code>\n"

        # 2026-04-16 ogt: 移除 secops_block_ip / secops_evict (spec=nonce 但用了 2-part格式，無 handler)
        # secops 高危操作需 multi-sig，現階段無多簽機制，暫移除等補齊
        # 鐵律: 寧可沒按鈕，不可有死按鈕 (feedback_no_ghost_buttons.md)
        isolate_nonce = self._security.generate_callback_nonce(approval_id, "secops_isolate")
        auth_nonce = self._security.generate_callback_nonce(approval_id, "secops_authorize")

        keyboard = {
            "inline_keyboard": [
                [
                    {"text": "🚫 隔離資源", "callback_data": isolate_nonce},
                    {"text": "✅ 確認授權操作", "callback_data": auth_nonce},
                ],
            ]
        }

        return await self._send_request(
            "sendMessage",
            {
                "chat_id": self.alert_chat_id,
                "text": text,
                "parse_mode": "HTML",
                "reply_markup": keyboard,
            },
        )

    async def send_business_alert(
        self,
        incident_id: str,
        alertname: str,
        business_domain: str,
        metric_name: str,
        current_value: str,
        threshold: str,
        loss_rate: str = "",
        group_chat_id: str | None = None,
    ) -> dict:
        """
        TYPE-6B 業務/FinOps 告警。

        ADR-075 (2026-04-12 ogt)
        路由: TYPE-1 發群組，此為 TYPE-6B 也發群組（業務趨勢數字）
        按鈕: [暫停] [查 SignOz] [忽略]
        """
        text = (
            f"📉 SLO ALERT | 業務指標異常\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📋 <code>{html.escape(incident_id)}</code>\n"
            f"🚨 告警：<b>{html.escape(alertname)}</b>\n"
            f"🎯 影響業務：{html.escape(business_domain)}\n"
            f"📊 異常指標：<code>{html.escape(metric_name)}</code>\n"
            f"\n🧠 <b>業務衝擊分析</b>\n"
            f"├─ 當前狀態：{html.escape(current_value)} (閾值: {html.escape(threshold)})\n"
        )
        if loss_rate:
            text += f"└─ 損失速率：{html.escape(loss_rate)}\n"

        # 2026-04-16 ogt: 移除 pause_1h / ignore (3-part ghost button，無 handler)
        # 鐵律: 寧可沒按鈕，不可有死按鈕 (feedback_no_ghost_buttons.md)
        keyboard = {"inline_keyboard": []}

        target_chat = group_chat_id or self.alert_chat_id
        return await self._send_request(
            "sendMessage",
            {
                "chat_id": target_chat,
                "text": text,
                "parse_mode": "HTML",
                "reply_markup": keyboard,
            },
        )

    async def send_escalation_card(
        self,
        incident_id: str,
        original_alertname: str,
        duration_min: int,
        priority: int = 0,
        attempted_actions: str = "",
        failure_reason: str = "",
        current_impact: str = "",
        group_chat_id: str | None = None,
    ) -> dict:
        """
        TYPE-7E 重大事故升級通知。

        ADR-075 (2026-04-12 ogt)
        觸發: SLA 超時（P0: 15分鐘; P1: 45分鐘）
        路由: SRE 戰情室群組（緊急事故全員知情）
        按鈕: [建立戰情室] [Postmortem草稿] [DR手冊] [確認接手]
        """
        duration_str = f"{duration_min} 分鐘" if duration_min < 60 else f"{duration_min//60} 小時 {duration_min%60} 分"

        text = (
            f"🚨 ESCALATION | P{priority} 事故升級\n"
            f"━━━━━━━━━━━━━━━━━━━\n"
            f"📋 <code>{html.escape(incident_id)}</code> | 已持續 <b>{duration_str}</b>\n"
            f"⚠️ <b>超出自動修復能力範圍</b>\n"
            f"🎯 核心問題：<code>{html.escape(original_alertname)}</code>\n"
        )
        if attempted_actions or failure_reason or current_impact:
            text += "\n🧠 <b>AI 戰局總結</b>\n"
            if attempted_actions:
                text += f"├─ 嘗試動作：{html.escape(attempted_actions[:100])}\n"
            if failure_reason:
                text += f"├─ 失敗原因：{html.escape(failure_reason[:100])}\n"
            if current_impact:
                text += f"└─ 目前影響：{html.escape(current_impact[:100])}\n"

        # 2026-04-16 ogt: 移除 postmortem / escalation_ack / dr_manual (3-part ghost button，無 handler)
        # 鐵律: 寧可沒按鈕，不可有死按鈕 (feedback_no_ghost_buttons.md)
        keyboard = {"inline_keyboard": []}

        target_chat = group_chat_id or self.alert_chat_id
        return await self._send_request(
            "sendMessage",
            {
                "chat_id": target_chat,
                "text": text + ("\n📣 @所有人 事故升級，請協助！" if settings.SRE_GROUP_CHAT_ID else ""),
                "parse_mode": "HTML",
                "reply_markup": keyboard,
            },
        )

    # =========================================================================
    # 新訊息發送方法 (2026-03-29 ogt: ADR-038)
    # =========================================================================

    def _build_sentry_keyboard(self, error_id: str) -> dict:
        """建立 Sentry 錯誤訊息按鈕"""
        view_nonce = self._security.generate_callback_nonce(error_id, "view")
        silence_nonce = self._security.generate_callback_nonce(error_id, "silence")

        return {
            "inline_keyboard": [
                [
                    {"text": "🔍 查看詳情", "callback_data": view_nonce},
                    {"text": "🔕 靜默 1h", "callback_data": silence_nonce},
                ]
            ]
        }

    def _build_resource_keyboard(self, resource_id: str) -> dict:
        """建立資源告警按鈕"""
        scale_nonce = self._security.generate_callback_nonce(resource_id, "scale")
        silence_nonce = self._security.generate_callback_nonce(resource_id, "silence")

        return {
            "inline_keyboard": [
                [
                    {"text": "⚡ 自動擴展", "callback_data": scale_nonce},
                    {"text": "🔕 靜默 1h", "callback_data": silence_nonce},
                ]
            ]
        }

    async def send_sentry_error(
        self,
        error_id: str,
        error_type: str,
        error_message: str,
        service_name: str,
        file_location: str,
        occurrence_count: int = 1,
        affected_users: int = 0,
        first_seen: str = "",
        stack_trace: list[str] | None = None,
        sentry_url: str = "",
    ) -> dict:
        """
        發送 Sentry 錯誤通知

        2026-03-29 ogt: 新增

        Args:
            error_id: Sentry Issue ID
            error_type: 錯誤類型 (TypeError, etc.)
            error_message: 錯誤訊息
            service_name: 服務名稱
            file_location: 檔案位置
            occurrence_count: 發生次數
            affected_users: 影響用戶數
            first_seen: 首次發生時間
            stack_trace: Stack trace
            sentry_url: Sentry 連結

        Returns:
            dict: Telegram API 回應
        """
        message = SentryErrorMessage(
            error_id=error_id,
            error_type=error_type,
            error_message=error_message,
            service_name=service_name,
            file_location=file_location,
            occurrence_count=occurrence_count,
            affected_users=affected_users,
            first_seen=first_seen,
            stack_trace=stack_trace,
            sentry_url=sentry_url,
        )

        payload = {
            "chat_id": self.alert_chat_id,
            "text": message.format(),
            "parse_mode": "HTML",
            "reply_markup": self._build_sentry_keyboard(error_id),
            "disable_web_page_preview": True,
        }

        logger.info("telegram_sentry_error_sending", error_id=error_id, service=service_name)
        result = await self._send_request("sendMessage", payload)
        logger.info("telegram_sentry_error_sent", error_id=error_id)

        return result

    async def send_resource_warning(
        self,
        resource_id: str,
        pod_name: str,
        namespace: str = "default",
        cpu_percent: float = 0.0,
        cpu_limit: str = "",
        memory_percent: float = 0.0,
        memory_limit: str = "",
        disk_percent: float = 0.0,
        trend_info: str = "",
        suggestion: str = "",
    ) -> dict:
        """
        發送資源告警通知

        2026-03-29 ogt: 新增

        Args:
            resource_id: 資源 ID
            pod_name: Pod 名稱
            namespace: K8s namespace
            cpu_percent: CPU 使用率
            memory_percent: Memory 使用率
            disk_percent: Disk 使用率
            trend_info: 趨勢資訊
            suggestion: 建議

        Returns:
            dict: Telegram API 回應
        """
        message = ResourceWarnMessage(
            resource_id=resource_id,
            pod_name=pod_name,
            namespace=namespace,
            cpu_percent=cpu_percent,
            cpu_limit=cpu_limit,
            memory_percent=memory_percent,
            memory_limit=memory_limit,
            disk_percent=disk_percent,
            trend_info=trend_info,
            suggestion=suggestion,
        )

        payload = {
            "chat_id": self.alert_chat_id,
            "text": message.format(),
            "parse_mode": "HTML",
            "reply_markup": self._build_resource_keyboard(resource_id),
            "disable_web_page_preview": True,
        }

        logger.info("telegram_resource_warning_sending", resource_id=resource_id, pod=pod_name)
        result = await self._send_request("sendMessage", payload)
        logger.info("telegram_resource_warning_sent", resource_id=resource_id)

        return result

    async def send_repair_report(
        self,
        report_date: str,
        total_repairs: int = 0,
        success_count: int = 0,
        failure_count: int = 0,
        saved_minutes: int = 0,
        top_issues: list[tuple[str, int]] | None = None,
        ai_cost_gemini: float = 0.0,
        ai_cost_nvidia: float = 0.0,
        ai_tokens_total: int = 0,
    ) -> dict:
        """
        發送自動修復報告

        2026-03-29 ogt: 新增

        Args:
            report_date: 報告日期
            total_repairs: 總修復次數
            success_count: 成功次數
            failure_count: 失敗次數
            saved_minutes: 節省人工時間
            top_issues: Top 問題列表
            ai_cost_gemini: Gemini 成本
            ai_cost_nvidia: NVIDIA 成本
            ai_tokens_total: 總 Token 數

        Returns:
            dict: Telegram API 回應
        """
        message = RepairReportMessage(
            report_date=report_date,
            total_repairs=total_repairs,
            success_count=success_count,
            failure_count=failure_count,
            saved_minutes=saved_minutes,
            top_issues=top_issues,
            ai_cost_gemini=ai_cost_gemini,
            ai_cost_nvidia=ai_cost_nvidia,
            ai_tokens_total=ai_tokens_total,
        )

        payload = {
            "chat_id": self.alert_chat_id,
            "text": message.format(),
            "parse_mode": "HTML",
            "disable_web_page_preview": True,
        }

        logger.info("telegram_repair_report_sending", date=report_date)
        result = await self._send_request("sendMessage", payload)
        logger.info("telegram_repair_report_sent", date=report_date)

        return result

    async def send_daily_summary(
        self,
        summary_date: str,
        alert_total: int = 0,
        alert_critical: int = 0,
        alert_medium: int = 0,
        alert_low: int = 0,
        auto_repair_count: int = 0,
        manual_approval_count: int = 0,
        ignored_count: int = 0,
        avg_response_minutes: float = 0.0,
        api_availability: float = 99.9,
        web_availability: float = 99.9,
        worker_availability: float = 99.9,
        ai_cost: float = 0.0,
        cloud_cost: float = 0.0,
        budget_remaining: float = 0.0,
    ) -> dict:
        """
        發送每日摘要

        2026-03-29 ogt: 新增

        Returns:
            dict: Telegram API 回應
        """
        message = DailySummaryMessage(
            summary_date=summary_date,
            alert_total=alert_total,
            alert_critical=alert_critical,
            alert_medium=alert_medium,
            alert_low=alert_low,
            auto_repair_count=auto_repair_count,
            manual_approval_count=manual_approval_count,
            ignored_count=ignored_count,
            avg_response_minutes=avg_response_minutes,
            api_availability=api_availability,
            web_availability=web_availability,
            worker_availability=worker_availability,
            ai_cost=ai_cost,
            cloud_cost=cloud_cost,
            budget_remaining=budget_remaining,
        )

        payload = {
            "chat_id": self.alert_chat_id,
            "text": message.format(),
            "parse_mode": "HTML",
            "disable_web_page_preview": True,
        }

        logger.info("telegram_daily_summary_sending", date=summary_date)
        result = await self._send_request("sendMessage", payload)
        logger.info("telegram_daily_summary_sent", date=summary_date)

        return result

    async def send_cicd_progress(
        self,
        job_name: str,
        status: str,
        stage: str = "",
        commit_sha: str = "",
        triggered_by: str = "",
        duration_seconds: int = 0,
        message: str = "",
        workflow_url: str = "",
        max_retries: int = 3,
    ) -> dict:
        """
        發送 CI/CD 進度通知 (簡潔版，不走 AI 仲裁)

        2026-03-30 ogt: 新增，解決 CI/CD 告警被當成事件處理的問題
        2026-03-30 P1: 新增重試機制 (指數退避)

        Args:
            max_retries: 最大重試次數 (預設 3)

        Returns:
            dict: Telegram API 回應
        """
        # OTEL Span: telegram.send_cicd_progress
        with _tracer.start_as_current_span(
            "telegram.send_cicd_progress",
            attributes={
                "telegram.job_name": job_name,
                "telegram.status": status,
                "telegram.stage": stage,
                "telegram.max_retries": max_retries,
            },
        ) as span:
            msg = CICDProgressMessage(
                job_name=job_name,
                status=status,
                stage=stage,
                commit_sha=commit_sha,
                triggered_by=triggered_by,
                duration_seconds=duration_seconds,
                message=message,
                workflow_url=workflow_url,
            )

            payload = {
                "chat_id": self.alert_chat_id,
                "text": msg.format(),
                "parse_mode": "HTML",
                "disable_web_page_preview": True,
            }

            logger.info("telegram_cicd_progress_sending", job=job_name, status=status)

            # 重試機制 (指數退避)
            last_error = None
            for attempt in range(max_retries):
                try:
                    result = await self._send_request("sendMessage", payload)
                    span.set_attribute("telegram.attempts", attempt + 1)
                    span.set_status(trace.Status(trace.StatusCode.OK))
                    logger.info("telegram_cicd_progress_sent", job=job_name, status=status, attempt=attempt + 1)
                    return result
                except TelegramGatewayError as e:
                    last_error = e
                    if attempt < max_retries - 1:
                        delay = 2 ** attempt  # 1, 2, 4 秒
                        logger.warning(
                            "telegram_cicd_progress_retry",
                            job=job_name,
                            attempt=attempt + 1,
                            delay=delay,
                            error=str(e),
                        )
                        await asyncio.sleep(delay)

            # 所有重試都失敗
            span.set_attribute("telegram.attempts", max_retries)
            span.set_status(trace.Status(trace.StatusCode.ERROR))
            span.record_exception(last_error)
            logger.error(
                "telegram_cicd_progress_failed",
                job=job_name,
                status=status,
                max_retries=max_retries,
                error=str(last_error),
            )
            raise last_error

    async def send_deploy_success(
        self,
        commit_sha: str,
        triggered_by: str,
        environment: str = "Production",
        api_version: str = "",
        web_version: str = "",
        worker_version: str = "",
        duration_seconds: int = 0,
        e2e_passed: int = 0,
        e2e_total: int = 0,
        health_check_passed: bool = True,
        workflow_url: str = "",
    ) -> dict:
        """
        發送部署成功通知

        2026-03-29 ogt: 新增

        Returns:
            dict: Telegram API 回應
        """
        message = DeploySuccessMessage(
            commit_sha=commit_sha,
            triggered_by=triggered_by,
            environment=environment,
            api_version=api_version,
            web_version=web_version,
            worker_version=worker_version,
            duration_seconds=duration_seconds,
            e2e_passed=e2e_passed,
            e2e_total=e2e_total,
            health_check_passed=health_check_passed,
            workflow_url=workflow_url,
        )

        payload = {
            "chat_id": self.alert_chat_id,
            "text": message.format(),
            "parse_mode": "HTML",
            "disable_web_page_preview": True,
        }

        logger.info("telegram_deploy_success_sending", commit=commit_sha[:8])
        result = await self._send_request("sendMessage", payload)
        logger.info("telegram_deploy_success_sent", commit=commit_sha[:8])

        return result

    async def send_rate_limit_warning(
        self,
        provider: str,
        daily_usage: int = 0,
        daily_limit: int = 0,
        token_usage: int = 0,
        token_limit: int = 0,
        cost_usd: float = 0.0,
        suggestions: list[str] | None = None,
        reset_time: str = "",
    ) -> dict:
        """
        發送 API 限額警告

        2026-03-29 ogt: 新增

        Returns:
            dict: Telegram API 回應
        """
        message = RateLimitMessage(
            provider=provider,
            daily_usage=daily_usage,
            daily_limit=daily_limit,
            token_usage=token_usage,
            token_limit=token_limit,
            cost_usd=cost_usd,
            suggestions=suggestions,
            reset_time=reset_time,
        )

        payload = {
            "chat_id": self.alert_chat_id,
            "text": message.format(),
            "parse_mode": "HTML",
            "disable_web_page_preview": True,
        }

        logger.info("telegram_rate_limit_warning_sending", provider=provider)
        result = await self._send_request("sendMessage", payload)
        logger.info("telegram_rate_limit_warning_sent", provider=provider)

        return result

    async def handle_callback(
        self,
        callback_query_id: str,
        callback_data: str,
        user_id: int,
        message_id: int,
        original_text: str = "",
        username: str = "",
    ) -> dict:
        """
        處理簽核/調優回調

        Args:
            callback_query_id: Telegram Callback Query ID
            callback_data: Callback Data (包含 nonce)
            user_id: Telegram User ID
            message_id: 原始訊息 ID
            original_text: 原始卡片內容 (用於保留上下文)
            username: 簽核者使用者名稱

        Returns:
            dict: 處理結果 {action, approval_id, user, auto_tuning_result?}
        """
        try:
            # ===================================================================
            # Step 0: LLM Action Callback（H1/B4）— la:{short_id} 格式優先路由
            # 2026-04-27 Claude Sonnet 4.6: H1+B4 Fix — 鬼魂按鈕鐵律修復
            # 必須在 parse_callback_data 之前攔截，否則 split(":") 分析 JSON 會爆
            # ===================================================================
            if callback_data.startswith("la:"):
                return await self._handle_llm_action_callback(
                    callback_query_id=callback_query_id,
                    callback_data=callback_data,
                    user_id=user_id,
                    username=username,
                )

            # ===================================================================
            # Step 1: 解析 Callback Data (支援兩種格式)
            # ===================================================================
            parsed = self._security.parse_callback_data(callback_data)
            action = parsed["action"]
            approval_id = parsed["approval_id"]

            # ===================================================================
            # Step 1.5: ADR-050 Info Actions (read-only, 只需白名單驗證)
            # ===================================================================
            # 2026-04-01 Claude Code (ADR-050 P1): detail/reanalyze/history
            if parsed.get("is_info_action"):
                if not self._security.is_whitelisted(user_id):
                    raise UserNotWhitelistedError(f"User {user_id} not in whitelist")

                incident_id = parsed.get("incident_id", approval_id)

                if action == "detail":
                    # ADR-050 P2: 取得事件詳情，傳送新訊息 (保留原始簽核卡片+按鈕)
                    # 2026-04-01 Claude Code (ADR-050 P2)
                    await self._answer_callback_nonfatal(callback_query_id, action, text="📋 詳情傳送中...")
                    await self._send_incident_detail(incident_id)
                elif action == "history":
                    # ADR-050 P2: 取得頻率統計
                    # 2026-04-01 Claude Code (ADR-050 P2)
                    await self._answer_callback_nonfatal(callback_query_id, action, text="📊 歷史統計傳送中...")
                    await self._send_incident_history(incident_id)
                elif action == "reanalyze":
                    # ADR-050 P2: 觸發重診
                    # 2026-04-01 Claude Code (ADR-050 P2): reanalyze button handler
                    await self._answer_callback_nonfatal(callback_query_id, action, text="🔄 重診排程中...")
                    await self._send_reanalyze_result(incident_id)
                elif action == "drift_view_page":
                    # 2026-04-20 P0.1 ogt + Claude Opus 4.7: drift_view 分頁切頁
                    # incident_id 格式: {report_id}_{page}（底線分隔）
                    _rid, _, _page_str = incident_id.rpartition("_")
                    try:
                        _page_num = int(_page_str)
                    except ValueError:
                        _rid, _page_num = incident_id, 0
                    await self._answer_callback_nonfatal(
                        callback_query_id, action, text=f"📄 切換至第 {_page_num + 1} 頁..."
                    )
                    await self._send_drift_diff_detail(_rid or incident_id, page=_page_num)
                else:
                    # 2026-04-14 Claude Sonnet 4.6 (Phase 5 Sprint 5.1):
                    # 未知 action → fallback dispatcher (查看 callback_action_spec.yaml 是否有註冊)
                    await self._dispatch_category_action(
                        callback_query_id=callback_query_id,
                        action=action,
                        incident_id=incident_id,
                        user_id=user_id,
                    )

                return {
                    "action": action,
                    "approval_id": approval_id,
                    "user": {"id": user_id, "username": username},
                    "success": True,
                    "info_action": True,
                }

            nonce = parsed["nonce"]  # 4-part nonce action

            # 2026-04-14 Claude Sonnet 4.6 (Phase 5 Sprint 5.1):
            # 寫類 nonce action 先驗 nonce 再 fallback dispatcher（若 action 在 registry）
            # 這段邏輯在 Step 2 之後再處理，這裡只是佔位註解

            # 驗證使用者 + Nonce
            user = await self._security.verify_callback(
                user_id=user_id,
                callback_id=callback_query_id,
                nonce=nonce,
            )

            # ===================================================================
            # Step 1.8: ADR-071-D 狀態機守衛（State Machine Guardrail）
            # 2026-04-11 Claude Sonnet 4.6 (ADR-071 第一批最高優先)
            # 防止已 RESOLVED/CLOSED 的事件卡片被誤點再次執行
            # 防止 MITIGATING 中的事件被重複觸發
            # ===================================================================
            guard_result = await self._check_incident_state_guard(
                approval_id=approval_id,
                callback_query_id=callback_query_id,
                message_id=message_id,
                original_text=original_text,
            )
            if guard_result is not None:
                return guard_result

            # ===================================================================
            # Step 1.85: 2026-04-19 ogt + Claude Opus 4.7 — drift_* 按鈕直接處理
            # 修 Telegram 子系統 bug TG-2: drift_view/drift_adopt/drift_revert
            # 過去無 handler → 按下永遠「執行中」/ fallthrough 誤觸發 approve
            # ===================================================================
            if action in ("drift_view", "drift_adopt", "drift_revert"):
                return await self._handle_drift_action(
                    action=action,
                    approval_id=approval_id,  # 本身即 report_id
                    callback_query_id=callback_query_id,
                    user_id=user_id,
                    username=username,
                    user=user,
                )

            # ===================================================================
            # 2026-04-19 P0 修 (ADR-092): ai_advisory_* 按鈕路由
            # 4 LLM scanner (capacity/compliance/coverage/rule_quality) 的互動按鈕
            # callback_data 格式: 'ai_advisory_{handled|snooze|view|produce_cmd}:{type}:{id}'
            # nonce 解析後 action = 'ai_advisory_handled' 等,approval_id 內嵌 type+id
            # ===================================================================
            if action.startswith("ai_advisory_"):
                return await self._handle_ai_advisory_action(
                    action=action,
                    advisory_payload=approval_id,  # 格式: '{type}:{id}'
                    callback_query_id=callback_query_id,
                    user_id=user_id,
                    username=username,
                    user=user,
                    message_id=message_id,
                )

            # ===================================================================
            # Step 1.9: Phase 5 Sprint 5.3 — 分類按鈕寫類 action 路由
            # 2026-04-14 Claude Sonnet 4.6
            # 若 action 在 callback_action_spec registry 且非 approve/reject/silence/tune
            # → 走 dispatcher 執行 MCP + audit log
            # ===================================================================
            from src.services.callback_dispatcher import get_action_spec as _get_spec
            _category_spec = _get_spec(action)
            if _category_spec and action not in (
                "approve", "reject", "silence", "tune", "log_manual_fix"
            ):
                # Multi-Sig 守衛 (Sprint 5.4 secops 類)
                if _category_spec.requires_multi_sig:
                    # 檢查 approval_records.current_signatures 是否已達 2
                    try:
                        from src.services.approval_db import get_approval_service as _svc
                        from uuid import UUID as _UUID
                        _existing = await _svc().get_approval(_UUID(approval_id))
                        _sigs = (
                            len(_existing.signatures) if _existing and _existing.signatures else 0
                        )
                    except Exception:
                        _sigs = 0
                    if _sigs < 2:
                        await self._answer_callback(
                            callback_query_id, action,
                            text=f"⚠️ 需 2 人簽核 ({_sigs}/2)",
                        )
                        logger.info(
                            "category_action_multi_sig_pending",
                            action=action, approval_id=approval_id, current_sigs=_sigs,
                        )
                        return {
                            "action": action, "approval_id": approval_id,
                            "user": user, "success": False,
                            "reason": "multi_sig_pending",
                        }

                # Audit log 開始（寫類動作）
                logger.info(
                    "category_write_action_audit_start",
                    action=action,
                    approval_id=approval_id,
                    user_id=user_id,
                    username=username,
                    risk=_category_spec.risk,
                    provider=_category_spec.mcp_provider,
                    tool=_category_spec.mcp_tool,
                )

                # Ack Telegram
                await self._answer_callback(
                    callback_query_id, action,
                    text=f"{_category_spec.emoji} {_category_spec.label} 執行中...",
                )

                # 查 incident_id + labels for template
                _incident_id_resolved = approval_id  # fallback
                _labels: dict = {}
                try:
                    from src.repositories.incident_repository import get_incident_repository
                    _repo = get_incident_repository()
                    # approval_id 可能是 INC-xxx 或 UUID，先試 INC 格式
                    if approval_id.startswith("INC-"):
                        _inc = await _repo.get_by_id(approval_id)
                    else:
                        # UUID → 找 approval → incident_id
                        from src.services.approval_db import get_approval_service
                        from uuid import UUID
                        _app = await get_approval_service().get_approval(UUID(approval_id))
                        _inc_id = getattr(_app, "incident_id", None) if _app else None
                        _inc = await _repo.get_by_id(_inc_id) if _inc_id else None
                        if _inc:
                            _incident_id_resolved = _inc.incident_id
                    if _inc and _inc.signals:
                        _labels = _inc.signals[0].labels or {}
                except Exception as _e:
                    logger.debug("category_action_labels_lookup_failed", error=str(_e))

                # Dispatch
                from src.services.callback_dispatcher import dispatch_action as _dispatch
                _result = await _dispatch(
                    action_name=action,
                    incident_id=_incident_id_resolved,
                    user_id=user_id,
                    labels=_labels,
                )

                # Reply 結果到原告警卡片
                try:
                    from src.core.redis_client import get_redis as _gr
                    _rds = _gr()
                    _msg_id_raw = await _rds.get(f"tg_msg:{_incident_id_resolved}")
                    _orig_msg = int(_msg_id_raw) if _msg_id_raw else None
                except Exception:
                    _orig_msg = None
                try:
                    _payload = {
                        "chat_id": self.alert_chat_id,
                        "text": _result.result_text,
                        "parse_mode": "HTML",
                    }
                    if _orig_msg:
                        _payload["reply_to_message_id"] = _orig_msg
                    await self._send_request("sendMessage", _payload)
                except Exception as _re:
                    logger.warning("category_action_reply_send_failed", error=str(_re))

                # Audit log 完成
                logger.info(
                    "category_write_action_audit_complete",
                    action=action,
                    approval_id=approval_id,
                    user_id=user_id,
                    success=_result.success,
                    error=_result.error,
                    duration_ms=round(_result.duration_ms, 1),
                )

                return {
                    "action": action,
                    "approval_id": approval_id,
                    "user": user,
                    "success": _result.success,
                    "category_action": True,
                }

            # ===================================================================
            # Step 2: 處理自動調優 (Shadow Mode)
            # ===================================================================
            auto_tuning_result = None
            if action == "tune":
                auto_tuning_result = await self._handle_auto_tuning(
                    approval_id=approval_id,
                    user_id=user_id,
                    username=username,
                )
                # 回應 Callback Query
                await self._answer_callback(
                    callback_query_id,
                    "tune",
                    text="⚡ 調優指令已記錄 (Shadow Mode)",
                )
                # 更新訊息
                await self._update_message_after_action(
                    message_id=message_id,
                    action="tune",
                    username=username,
                    original_text=original_text,
                    extra_info=auto_tuning_result.get("command", ""),
                )

                return {
                    "action": action,
                    "approval_id": approval_id,
                    "user": user,
                    "success": True,
                    "auto_tuning_result": auto_tuning_result,
                }

            # ===================================================================
            # Step 2.5: 處理稍後/靜默 (2026-03-27 P1 優化)
            # ===================================================================
            if action == "snooze":
                snooze_result = await self._handle_snooze(
                    approval_id=approval_id,
                    username=username,
                )
                await self._answer_callback(
                    callback_query_id,
                    "snooze",
                    text="⏰ 30 分鐘後再提醒",
                )
                await self._update_message_after_action(
                    message_id=message_id,
                    action="snooze",
                    username=username,
                    original_text=original_text,
                )
                return {
                    "action": action,
                    "approval_id": approval_id,
                    "user": user,
                    "success": True,
                    "snooze_result": snooze_result,
                }

            if action == "silence":
                silence_result = await self._handle_silence(
                    approval_id=approval_id,
                    username=username,
                    original_text=original_text,
                )
                await self._answer_callback(
                    callback_query_id,
                    "silence",
                    text="🔕 此類告警靜默 1 小時",
                )
                await self._update_message_after_action(
                    message_id=message_id,
                    action="silence",
                    username=username,
                    original_text=original_text,
                    extra_info=silence_result.get("resource_name", ""),
                )
                return {
                    "action": action,
                    "approval_id": approval_id,
                    "user": user,
                    "success": True,
                    "silence_result": silence_result,
                }

            # ===================================================================
            # Step 2.8: ADR-071-H 手動修復記錄 (TYPE-4)
            # 2026-04-11 Claude Sonnet 4.6 (ADR-071 第一批)
            # 使用者點擊 [手動修復後記錄] → Bot 提示輸入步驟
            # 實際步驟收集在 handle_message() 的 /done 流程中完成
            # ===================================================================
            if action == "log_manual_fix":
                await self._answer_callback(
                    callback_query_id,
                    "log_manual_fix",
                    text="📝 請輸入修復步驟，完成後傳送 /done",
                )
                # 在 Redis 儲存「等待手動修復輸入」狀態
                try:
                    redis = get_redis()
                    await redis.setex(
                        f"manual_fix_pending:{user_id}",
                        1800,  # 30 分鐘
                        approval_id,
                    )
                except Exception as _e:
                    logger.warning("manual_fix_pending_store_failed", error=str(_e))

                await self._send_request(
                    "sendMessage",
                    {
                        "chat_id": self.alert_chat_id,
                        "text": (
                            "📝 <b>手動修復記錄</b>\n"
                            "━━━━━━━━━━━━━━━━━━━\n"
                            "請輸入您的修復步驟（可多行）。\n"
                            "輸入完畢後傳送 <code>/done</code>\n\n"
                            "<i>30 分鐘內有效</i>"
                        ),
                        "parse_mode": "HTML",
                    },
                )
                return {
                    "action": action,
                    "approval_id": approval_id,
                    "user": user,
                    "success": True,
                    "waiting_for_manual_fix": True,
                }

            # ===================================================================
            # Step 3: 回應 Callback Query (簽核/拒絕)
            # ===================================================================
            await self._answer_callback(callback_query_id, action)

            # ===================================================================
            # Step 4: 更新訊息 (保留原始內容 + 簽核鋼印)
            # ===================================================================
            await self._update_message_after_action(
                message_id=message_id,
                action=action,
                username=username,
                original_text=original_text,
            )

            logger.info(
                "telegram_callback_processed",
                action=action,
                approval_id=approval_id,
                user_id=user_id,
            )

            return {
                "action": action,
                "approval_id": approval_id,
                "user": user,
                "success": True,
            }

        except UserNotWhitelistedError as e:
            logger.warning("telegram_callback_denied", error=str(e), user_id=user_id)
            await self._answer_callback_nonfatal(
                callback_query_id,
                "denied",
                text="⛔ 您沒有簽核權限",
            )
            return {"success": False, "error": str(e)}

        except NonceReplayError as e:
            logger.warning("telegram_callback_replay", error=str(e))
            await self._answer_callback_nonfatal(
                callback_query_id,
                "replay",
                text="⚠️ 此操作已處理過",
            )
            return {"success": False, "error": str(e)}

        except Exception as e:
            logger.error("telegram_callback_error", error=str(e))
            await self._answer_callback_nonfatal(
                callback_query_id,
                "error",
                text="❌ 處理失敗",
            )
            return {"success": False, "error": str(e)}

    async def _check_incident_state_guard(
        self,
        approval_id: str,
        callback_query_id: str,
        message_id: int,
        original_text: str,
    ) -> dict | None:
        """
        ADR-071-D 狀態機守衛

        從 approval_id 查詢關聯 incident 的當下狀態：
        - RESOLVED / CLOSED → 拒絕執行，更新卡片文字，移除按鈕
        - MITIGATING → 防止重複觸發，回覆「修復中」提示
        - 其他 / 查不到 → 返回 None（讓主流程繼續）

        2026-04-11 Claude Sonnet 4.6 (ADR-071-D)
        """
        try:
            from uuid import UUID
            from src.services.approval_db import get_approval_service
            from src.repositories.incident_repository import get_incident_repository
            from src.models.incident import IncidentStatus

            approval_svc = get_approval_service()
            try:
                approval = await approval_svc.get_approval_by_id(UUID(approval_id))
            except (ValueError, Exception):
                return None  # approval_id 格式異常，讓主流程處理

            if not approval or not approval.incident_id:
                return None  # 無關聯 incident，放行

            incident_repo = get_incident_repository()
            incident = await incident_repo.get_by_id(approval.incident_id)
            if not incident:
                return None

            if incident.status in (IncidentStatus.RESOLVED, IncidentStatus.CLOSED):
                await self._answer_callback(
                    callback_query_id,
                    "blocked",
                    text="✅ 此事件已解決",
                )
                try:
                    separator = "──────────────"
                    safe_original = html.escape(original_text) if original_text else ""
                    stamp = _format_resolved_guard_stamp(incident.resolved_at)
                    await self._send_request("editMessageText", {
                        "chat_id": self.alert_chat_id,
                        "message_id": message_id,
                        "text": f"{safe_original}\n{separator}\n{stamp}" if safe_original else stamp,
                        "parse_mode": "HTML",
                        "reply_markup": {"inline_keyboard": []},
                        "disable_web_page_preview": True,
                    })
                except Exception:
                    # 移除按鈕保底
                    try:
                        await self._send_request("editMessageReplyMarkup", {
                            "chat_id": self.alert_chat_id,
                            "message_id": message_id,
                            "reply_markup": {"inline_keyboard": []},
                        })
                    except Exception:
                        pass
                logger.info(
                    "state_guard_blocked_resolved",
                    approval_id=approval_id,
                    incident_id=approval.incident_id,
                    incident_status=incident.status.value,
                )
                return {"blocked": True, "reason": "already_resolved", "approval_id": approval_id}

            if incident.status == IncidentStatus.MITIGATING:
                await self._answer_callback(
                    callback_query_id,
                    "blocked",
                    text="⏳ 正在修復中，請稍候...",
                )
                logger.info(
                    "state_guard_blocked_mitigating",
                    approval_id=approval_id,
                    incident_id=approval.incident_id,
                )
                return {"blocked": True, "reason": "already_executing", "approval_id": approval_id}

        except Exception as e:
            # 守衛失敗不阻塞主流程
            logger.warning("state_guard_error", approval_id=approval_id, error=str(e))

        return None

    async def handle_manual_fix_done(
        self,
        user_id: int,
        username: str,
        fix_steps: str,
    ) -> dict:
        """
        ADR-071-H: 處理使用者輸入 /done 後的手動修復步驟記錄

        流程:
          1. 從 Redis 取得 pending approval_id
          2. 查詢 ApprovalRecord → 取得 incident_id
          3. 更新 incidents.manual_fix_steps + manual_fix_by
          4. 寫入 alert_operation_log MANUAL_FIX_RECORDED
          5. 觸發 KMConversionService.convert()
          6. 回覆 Telegram 確認訊息

        Args:
            user_id: Telegram user ID
            username: Telegram username
            fix_steps: 使用者輸入的修復步驟
        """
        try:
            from src.core.redis_client import get_redis as _get_redis
            redis = _get_redis()

            pending_key = f"manual_fix_pending:{user_id}"
            approval_id_bytes = await redis.get(pending_key)
            if not approval_id_bytes:
                await self._send_request("sendMessage", {
                    "chat_id": self.alert_chat_id,
                    "text": "⚠️ 找不到待記錄的修復任務，或已逾時。",
                    "parse_mode": "HTML",
                })
                return {"success": False, "reason": "no_pending_task"}

            approval_id = approval_id_bytes.decode() if isinstance(approval_id_bytes, bytes) else str(approval_id_bytes)
            await redis.delete(pending_key)

            # 查 ApprovalRecord → incident
            from src.repositories.incident_repository import IncidentDBRepository
            from src.repositories.approval_repository import ApprovalDBRepository

            approval_repo = ApprovalDBRepository()
            approval = await approval_repo.get_by_approval_id(approval_id)
            if not approval:
                await self._send_request("sendMessage", {
                    "chat_id": self.alert_chat_id,
                    "text": f"⚠️ 找不到簽核單 <code>{html.escape(approval_id)}</code>",
                    "parse_mode": "HTML",
                })
                return {"success": False, "reason": "approval_not_found"}

            incident_repo = IncidentDBRepository()
            incident = await incident_repo.get_by_id(approval.incident_id)
            if not incident:
                return {"success": False, "reason": "incident_not_found"}

            # 更新 incidents.manual_fix_steps + manual_fix_by
            from src.db.base import get_db_context
            from src.db.models import Incident as IncidentORM
            from sqlalchemy import update

            async with get_db_context() as db:
                await db.execute(
                    update(IncidentORM)
                    .where(IncidentORM.incident_id == approval.incident_id)
                    .values(
                        manual_fix_steps=fix_steps,
                        manual_fix_by=username or str(user_id),
                    )
                )
                await db.commit()

            # 寫操作日誌
            from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
            op_log_repo = get_alert_operation_log_repository()
            await op_log_repo.append(
                event_type="MANUAL_FIX_RECORDED",
                incident_id=approval.incident_id,
                approval_id=approval_id,
                actor=username or str(user_id),
                action_detail=fix_steps[:500],
                success=True,
            )

            # 觸發 KM 轉換（直接 await，避免 create_task() 在 DB session 關閉後的競爭條件）
            # 重讀 incident 確保 manual_fix_steps 已寫入
            incident_updated = await incident_repo.get_by_id(approval.incident_id)
            if incident_updated:
                from src.services.km_conversion_service import get_km_conversion_service
                km_svc = get_km_conversion_service()
                try:
                    await km_svc.convert(incident_updated)
                except Exception as _km_err:
                    logger.warning(
                        "km_conversion_failed",
                        incident_id=approval.incident_id,
                        error=str(_km_err),
                    )

            # 回覆確認
            await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": (
                    f"✅ <b>手動修復步驟已記錄</b>\n"
                    f"━━━━━━━━━━━━━━━━━━━\n"
                    f"📋 事件: <code>{html.escape(approval.incident_id)}</code>\n"
                    f"👤 記錄者: @{html.escape(username or str(user_id))}\n\n"
                    f"<i>正在建立草稿 Playbook，請至 AWOOOI 審核後生效。</i>"
                ),
                "parse_mode": "HTML",
            })

            logger.info(
                "manual_fix_recorded",
                incident_id=approval.incident_id,
                user=username,
            )
            return {"success": True, "incident_id": approval.incident_id}

        except Exception as e:
            logger.error("handle_manual_fix_done_failed", error=str(e))
            return {"success": False, "error": str(e)}

    async def _handle_auto_tuning(
        self,
        approval_id: str,
        user_id: int,
        username: str,
    ) -> dict:
        """
        處理自動調優請求 (Shadow Mode)

        統帥鐵律: Shadow Mode 下嚴禁實際執行 K8s 命令

        Args:
            approval_id: 簽核單 ID
            user_id: 執行者 Telegram ID
            username: 執行者名稱

        Returns:
            dict: 調優結果
        """
        try:
            # Shadow Mode: 僅記錄調優請求
            # 實際生產環境需從 ApprovalRecord 取得完整調優指令
            # Shadow Mode: 僅記錄調優請求
            # 實際生產環境需從 ApprovalRecord 取得完整調優指令
            log_message = f"[SHADOW MODE] 自動調優請求 - 簽核單: {approval_id}"

            if settings.SHADOW_MODE_ENABLED:
                logger.info(
                    "shadow_mode_auto_tuning_triggered",
                    approval_id=approval_id,
                    user_id=user_id,
                    username=username,
                    shadow_mode=True,
                )
                print(f"\n{'='*60}")
                print("[SHADOW MODE] AI 生成的調優指令請求")
                print(f"簽核單: {approval_id}")
                print(f"執行者: @{username} (ID: {user_id})")
                print(f"時間: {datetime.now(UTC).isoformat()}")
                print("狀態: 僅記錄，未實際執行")
                print(f"{'='*60}\n")

                return {
                    "executed": False,
                    "shadow_mode": True,
                    "approval_id": approval_id,
                    "triggered_by": username,
                    "command": "kubectl command logged (see server logs)",
                    "log": log_message,
                }
            else:
                logger.warning(
                    "auto_tuning_blocked_not_shadow_mode",
                    approval_id=approval_id,
                    message="Production execution requires multi-sig approval",
                )
                return {
                    "executed": False,
                    "shadow_mode": False,
                    "approval_id": approval_id,
                    "error": "Production execution requires multi-sig approval",
                }

        except Exception as e:
            logger.error("auto_tuning_error", error=str(e), approval_id=approval_id)
            return {
                "executed": False,
                "error": str(e),
            }

    async def _handle_snooze(
        self,
        approval_id: str,
        username: str,
    ) -> dict:
        """
        處理稍後提醒 (2026-03-27 P1 優化)

        功能: 延遲 30 分鐘後再提醒此告警

        Args:
            approval_id: 簽核單 ID
            username: 執行者名稱

        Returns:
            dict: 處理結果
        """
        try:
            redis = get_redis()
            snooze_key = f"{SNOOZE_KEY_PREFIX}{approval_id}"

            # 設置 30 分鐘延遲標記
            await redis.setex(
                snooze_key,
                SNOOZE_TTL_SECONDS,
                f"{username}:{datetime.now(UTC).isoformat()}",
            )

            logger.info(
                "telegram_snooze_set",
                approval_id=approval_id,
                username=username,
                ttl_minutes=SNOOZE_TTL_SECONDS // 60,
            )

            return {
                "snoozed": True,
                "approval_id": approval_id,
                "snooze_until": datetime.now(UTC).isoformat(),
                "ttl_minutes": SNOOZE_TTL_SECONDS // 60,
            }

        except Exception as e:
            logger.error("snooze_error", error=str(e), approval_id=approval_id)
            return {
                "snoozed": False,
                "error": str(e),
            }

    async def _handle_silence(
        self,
        approval_id: str,
        username: str,
        original_text: str,
    ) -> dict:
        """
        處理靜默 1 小時 (2026-03-27 P1 優化)

        功能: 同類告警 (相同資源) 1 小時內不再發送

        Args:
            approval_id: 簽核單 ID
            username: 執行者名稱
            original_text: 原始訊息 (用於解析資源名稱)

        Returns:
            dict: 處理結果
        """
        try:
            redis = get_redis()

            # 從原始訊息解析資源名稱 (格式: 🎯 資源: xxx)
            resource_name = "unknown"
            for line in original_text.split("\n"):
                if "🎯 資源:" in line or "🎯 資源: " in line:
                    resource_name = line.split(":")[-1].strip()
                    break

            silence_key = f"{SILENCE_KEY_PREFIX}{resource_name}"

            # 設置 1 小時靜默標記
            await redis.setex(
                silence_key,
                SILENCE_TTL_SECONDS,
                f"{username}:{datetime.now(UTC).isoformat()}:{approval_id}",
            )

            logger.info(
                "telegram_silence_set",
                approval_id=approval_id,
                resource_name=resource_name,
                username=username,
                ttl_hours=SILENCE_TTL_SECONDS // 3600,
            )

            return {
                "silenced": True,
                "approval_id": approval_id,
                "resource_name": resource_name,
                "silence_until": datetime.now(UTC).isoformat(),
                "ttl_hours": SILENCE_TTL_SECONDS // 3600,
            }

        except Exception as e:
            logger.error("silence_error", error=str(e), approval_id=approval_id)
            return {
                "silenced": False,
                "error": str(e),
            }

    async def _handle_llm_action_callback(
        self,
        callback_query_id: str,
        callback_data: str,
        user_id: int,
        username: str = "",
    ) -> dict:
        """
        B4: 處理 LLM 動態按鈕 callback（格式 la:{short_id}）

        2026-04-27 Claude Sonnet 4.6: H1+B4 Fix — 鬼魂按鈕鐵律修復
        鬼魂按鈕三缺一絕不發送（callback格式+handler+MCP能力）；
        本方法補上 handler，與 H3 Redis short_id 映射配合。

        流程：
        1. 白名單驗證
        2. Redis GET tg:la:{short_id} → 還原 payload（找不到 → 按鈕已過期）
        3. 呼叫 dispatch_llm_action 取得執行規格
        4. high risk 未確認 → 回應確認提示（TODO: 實作二次確認流程）
        5. low/medium → answer_callback_query + 執行 MCP → 回報結果
        6. 失敗 → 回報錯誤，不 crash
        """
        import json as _json  # noqa: PLC0415

        from src.services.callback_dispatcher import dispatch_llm_action  # noqa: PLC0415

        # ── 1. 白名單驗證 ─────────────────────────────────────────────────────
        if not self._security.is_whitelisted(user_id):
            await self._send_request("answerCallbackQuery", {
                "callback_query_id": callback_query_id,
                "text": "❌ 您沒有執行此操作的權限",
                "show_alert": True,
            })
            return {"action": "llm_action", "ok": False, "reason": "not_whitelisted"}

        # ── 2. Redis GET → 還原 payload ───────────────────────────────────────
        short_id = callback_data[3:]  # 去掉 "la:" 前綴
        redis_key = f"tg:la:{short_id}"
        payload: dict | None = None
        try:
            redis = get_redis()
            raw = await redis.get(redis_key)
            if raw:
                payload = _json.loads(raw)
        except Exception as exc:
            # P1: Redis 故障與按鈕過期分開處理
            logger.error("llm_action_redis_get_failed", short_id=short_id, error=str(exc))
            await self._send_request("answerCallbackQuery", {
                "callback_query_id": callback_query_id,
                "text": "⚠️ 系統暫時不可用，請稍後重試",
                "show_alert": True,
            })
            return {"action": "llm_action", "ok": False, "reason": "redis_unavailable"}

        if payload is None:
            await self._send_request("answerCallbackQuery", {
                "callback_query_id": callback_query_id,
                "text": "⏰ 此按鈕已過期，請重新觸發告警流程",
                "show_alert": True,
            })
            logger.info("llm_action_button_expired", short_id=short_id)
            return {"action": "llm_action", "ok": False, "reason": "button_expired"}

        name: str = payload.get("name", "")
        provider: str = payload.get("provider", "")
        tool: str = payload.get("tool", "")
        risk: str = payload.get("risk", "low")

        # ── 3. 組裝 stub action + 呼叫 dispatch_llm_action ───────────────────
        class _StubAction:
            pass

        stub = _StubAction()
        stub.name = name  # type: ignore[attr-defined]
        stub.mcp_provider = provider  # type: ignore[attr-defined]
        stub.mcp_tool = tool  # type: ignore[attr-defined]
        stub.risk = risk  # type: ignore[attr-defined]
        stub.params = {}  # type: ignore[attr-defined]

        # P0 Fix: 從 Redis payload 取真實 incident_id，不用隨機 short_id
        real_incident_id: str = payload.get("incident_id", "") or short_id
        context = {"incident_id": real_incident_id, "confirmed": False}
        result = dispatch_llm_action(stub, context)

        # ── 4. high risk → 二次確認提示 ───────────────────────────────────────
        if not result.get("ok") and result.get("reason") == "high_risk_requires_confirmation":
            await self._send_request("answerCallbackQuery", {
                "callback_query_id": callback_query_id,
                "text": f"⚠️ 高風險操作：{name}，請傳送指令確認後再執行",
                "show_alert": True,
            })
            logger.info(
                "llm_action_high_risk_pending",
                name=name,
                mcp_tool=tool,
                user_id=user_id,
            )
            return {"action": "llm_action", "ok": False, "reason": "high_risk_requires_confirmation"}

        # ── 5. dispatch 失敗（allowlist / critical 等） ───────────────────────
        if not result.get("ok"):
            reason = result.get("reason", "unknown")
            await self._send_request("answerCallbackQuery", {
                "callback_query_id": callback_query_id,
                "text": f"❌ 無法執行：{reason}",
                "show_alert": True,
            })
            logger.warning(
                "llm_action_dispatch_rejected",
                name=name,
                mcp_tool=tool,
                reason=reason,
            )
            return {"action": "llm_action", "ok": False, "reason": reason}

        # ── 6. 允許執行 → answer_callback + 回報結果 ─────────────────────────
        await self._send_request("answerCallbackQuery", {
            "callback_query_id": callback_query_id,
            "text": f"▶️ 執行中：{name}",
            "show_alert": False,
        })

        logger.info(
            "llm_action_executing",
            name=name,
            mcp_tool=tool,
            mcp_provider=provider,
            risk=risk,
            user_id=user_id,
            username=username,
        )

        # 回報執行結果到 Telegram（MCP 實際呼叫由外部整合，此處發送確認訊息）
        import html as _html  # noqa: PLC0415
        result_text = (
            f"✅ <b>LLM 動作已觸發</b>\n"
            f"動作：<code>{_html.escape(name)}</code>\n"
            f"工具：<code>{_html.escape(provider)}/{_html.escape(tool)}</code>\n"
            f"風險：<code>{_html.escape(risk)}</code>\n"
            f"操作者：@{_html.escape(str(username or user_id))}"
        )
        try:
            await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": result_text,
                "parse_mode": "HTML",
            })
        except Exception as exc:
            logger.warning("llm_action_result_notify_failed", error=str(exc))

        return {
            "action": "llm_action",
            "ok": True,
            "name": name,
            "mcp_tool": tool,
            "mcp_provider": provider,
            "risk": risk,
            "user": {"id": user_id, "username": username},
        }

    async def _answer_callback(
        self,
        callback_query_id: str,
        action: str,
        text: str | None = None,
    ) -> None:
        """回應 Callback Query"""
        if text is None:
            if action == "approve":
                text = "✅ 已簽核"
            elif action == "reject":
                text = "❌ 已拒絕"
            elif action == "tune":
                text = "⚡ 調優中..."
            elif action == "snooze":
                text = "⏰ 30 分鐘後再提醒"
            elif action == "silence":
                text = "🔕 此類告警靜默 1 小時"
            else:
                text = "✓ 已處理"

        await self._send_request("answerCallbackQuery", {
            "callback_query_id": callback_query_id,
            "text": text,
            "show_alert": False,
        })

    async def _answer_callback_nonfatal(
        self,
        callback_query_id: str,
        action: str,
        text: str | None = None,
    ) -> None:
        """Best-effort callback toast; never block the actual DB-backed reply."""
        try:
            await self._answer_callback(callback_query_id, action, text=text)
        except Exception as exc:
            logger.warning(
                "telegram_answer_callback_nonfatal_failed",
                action=action,
                error=str(exc),
            )

    async def _update_message_after_action(
        self,
        message_id: int,
        action: str,
        username: str,
        original_text: str,
        extra_info: str = "",
    ) -> None:
        """
        更新訊息: 保留原始卡片內容 + 簽核/調優鋼印

        UX 要求:
        - 嚴禁覆蓋原始內容
        - 必須在底部加上分隔線與簽核狀態
        - 移除所有按鈕
        """
        # 構建鋼印 (2026-03-27 ogt: 新增 snooze/silence)
        if action == "approve":
            stamp = f"✅ 已由 @{username} 授權執行"
        elif action == "reject":
            stamp = f"❌ 已由 @{username} 拒絕執行"
        elif action == "tune":
            stamp = f"⚡ 已由 @{username} 觸發自動調優 (Shadow Mode)"
            if extra_info:
                stamp += "\n📝 指令已記錄"
        elif action == "snooze":
            stamp = f"⏰ @{username} 已設定 30 分鐘後再提醒"
        elif action == "silence":
            resource_info = f" ({extra_info})" if extra_info else ""
            stamp = f"🔕 @{username} 已靜默此類告警 1 小時{resource_info}"
        else:
            stamp = f"✓ 已由 @{username} 處理"

        # Step 1: 先移除按鈕 (確保按鈕一定消失，即使文字更新失敗)
        # 2026-04-05 Claude Code: editMessageText 因 HTML 特殊字符可能失敗，
        # 先用 editMessageReplyMarkup 確保按鈕移除，再嘗試更新文字
        try:
            await self._send_request("editMessageReplyMarkup", {
                "chat_id": self.alert_chat_id,
                "message_id": message_id,
                "reply_markup": {"inline_keyboard": []},
            })
        except TelegramGatewayError as e:
            logger.warning("telegram_remove_buttons_failed", message_id=message_id, error=str(e))

        # Step 2: 嘗試更新文字 (原始文字已轉義，確保 HTML 安全)
        separator = "──────────────"
        safe_original = html.escape(original_text)
        safe_updated_text = f"{safe_original}\n{separator}\n{stamp}"
        try:
            await self._send_request("editMessageText", {
                "chat_id": self.alert_chat_id,
                "message_id": message_id,
                "text": safe_updated_text,
                "parse_mode": "HTML",
                "reply_markup": {"inline_keyboard": []},
                "disable_web_page_preview": True,
            })
        except TelegramGatewayError as e:
            # 文字更新失敗不影響整體流程，按鈕已移除
            logger.warning("telegram_update_text_failed", message_id=message_id, error=str(e))

    async def mark_auto_repaired(
        self,
        approval_id: str,
        playbook_name: str,
        execution_time_ms: int,
        success: bool = True,
    ) -> bool:
        """
        自動修復完成後更新 Telegram 卡片:
        1. 移除批准/拒絕/靜默按鈕
        2. 回覆原訊息顯示修復結果

        2026-04-10 Claude Sonnet 4.6 Asia/Taipei (ADR-068 閉環)
        """
        try:
            stored = await get_redis().get(f"tg_approval:{approval_id}")
            if not stored:
                logger.warning("mark_auto_repaired_no_msg_id", approval_id=approval_id)
                return False

            message_id = int(stored)

            # 移除按鈕
            try:
                await self._send_request("editMessageReplyMarkup", {
                    "chat_id": self.alert_chat_id,
                    "message_id": message_id,
                    "reply_markup": {"inline_keyboard": []},
                })
            except TelegramGatewayError as e:
                logger.warning("mark_auto_repaired_remove_buttons_failed", message_id=message_id, error=str(e))

            # 回覆原訊息說明結果
            _status = "✅ 已自動修復" if success else "❌ 自動修復失敗"
            await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": (
                    f"{_status}\n"
                    f"Playbook: <code>{html.escape(playbook_name)}</code>\n"
                    f"耗時: {execution_time_ms}ms"
                ),
                "parse_mode": "HTML",
                "reply_parameters": {"message_id": message_id},
            })
            return True

        except Exception as e:
            logger.warning("mark_auto_repaired_failed", approval_id=approval_id, error=str(e))
            return False

    async def append_incident_update(
        self,
        incident_id: str,
        status_line: str,
        keep_info_buttons: bool = True,
    ) -> bool:
        """
        在原始告警訊息追加狀態行，並換掉操作按鈕。
        用於自動修復完成/失敗後更新原訊息，讓狀態變更在同一則訊息上延續。

        流程:
          1. 從 Redis 取 tg_msg:{incident_id} 得到 message_id
          2. editMessageText: 原文 + 分隔線 + status_line
          3. editMessageReplyMarkup: 移除 Row 1 (批准/拒絕/靜默)，保留 Row 2 (詳情/重診/歷史)

        Args:
            incident_id: Incident ID（用於查 Redis 的 message_id）
            status_line:  追加的狀態文字，如「✅ 已自動修復: kubectl rollout restart…」
            keep_info_buttons: 是否保留詳情/重診/歷史按鈕（預設 True）

        Returns:
            bool: True = 成功 edit 原訊息；False = 找不到 message_id（fallback 需另行處理）

        2026-04-09 Claude Sonnet 4.6 Asia/Taipei (統帥要求: 狀態變更在原訊息延續)
        """
        redis = get_redis()
        redis_key = f"tg_msg:{incident_id}"
        stored = await redis.get(redis_key)
        if not stored:
            logger.warning(
                "append_incident_update_no_message_id",
                incident_id=incident_id,
                reason="message_id not in Redis",
            )
            return False

        try:
            message_id = int(stored)
        except (ValueError, TypeError):
            logger.warning("append_incident_update_invalid_message_id", stored=stored)
            return False

        # Telegram 只適合放決策摘要；同一 incident 的相同狀態 5 分鐘內不重複回覆，
        # 詳細執行紀錄應進 timeline / AwoooP Run Monitor，避免群組被 auto-failure 洗版。
        status_hash = hashlib.sha1(status_line.encode("utf-8")).hexdigest()[:16]
        dedup_key = f"{INCIDENT_UPDATE_DEDUP_PREFIX}{incident_id}:{status_hash}"
        try:
            was_set = await redis.set(
                dedup_key,
                "1",
                ex=INCIDENT_UPDATE_DEDUP_TTL_SECONDS,
                nx=True,
            )
            if not was_set:
                logger.info(
                    "append_incident_update_dedup_suppressed",
                    incident_id=incident_id,
                    dedup_key=dedup_key,
                )
                return True
        except Exception as exc:
            logger.warning(
                "append_incident_update_dedup_failed",
                incident_id=incident_id,
                error=str(exc),
            )

        suppress_reply = False
        if _is_noisy_failure_update(status_line):
            # 不同 incident 若卡在同一個自動修復/診斷失敗摘要，Telegram 只推第一則；
            # 每個 incident 仍會繼續移除原卡危險按鈕，完整細節交給 timeline / AwoooP。
            global_hash = hashlib.sha1(status_line.encode("utf-8")).hexdigest()[:16]
            global_dedup_key = f"{INCIDENT_UPDATE_GLOBAL_FAILURE_DEDUP_PREFIX}{global_hash}"
            try:
                was_global_set = await redis.set(
                    global_dedup_key,
                    incident_id,
                    ex=INCIDENT_UPDATE_GLOBAL_FAILURE_DEDUP_TTL_SECONDS,
                    nx=True,
                )
                suppress_reply = not bool(was_global_set)
                if suppress_reply:
                    logger.info(
                        "append_incident_update_global_failure_dedup_suppressed",
                        incident_id=incident_id,
                        dedup_key=global_dedup_key,
                    )
            except Exception as exc:
                logger.warning(
                    "append_incident_update_global_failure_dedup_failed",
                    incident_id=incident_id,
                    error=str(exc),
                )

        # Step 1: 取得原始訊息文字（Telegram Bot API 不提供讀取原文，只能在 editMessageText 裡重建）
        # 策略: 只追加 status_line，不讀取原文（Telegram edit 要傳完整新文字）
        # 所以先用 editMessageReplyMarkup 換按鈕，再 sendMessage 同 chat 以 reply 方式追加狀態
        # → 實際上用 reply_to_message_id 讓 Telegram 顯示連結更直觀

        # Step 1: 換掉按鈕 (移除 Row 1 批准/拒絕/靜默，保留 Row 2 資訊按鈕)
        if keep_info_buttons:
            inline_keyboard = [
                [
                    {"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
                    {"text": "🔄 重診", "callback_data": f"reanalyze:{incident_id}"},
                    {"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
                ],
            ]
            awooop_row = _awooop_runs_button_row(incident_id)
            if awooop_row:
                inline_keyboard.append(awooop_row)
            new_keyboard = {"inline_keyboard": inline_keyboard}
        else:
            new_keyboard = {"inline_keyboard": []}

        try:
            await self._send_request("editMessageReplyMarkup", {
                "chat_id": self.alert_chat_id,
                "message_id": message_id,
                "reply_markup": new_keyboard,
            })
        except TelegramGatewayError as e:
            logger.warning("append_incident_update_edit_buttons_failed", message_id=message_id, error=str(e))

        if suppress_reply:
            return True

        # Step 2: Reply 原訊息追加狀態（保留原文不動，以 reply 方式延續）
        try:
            await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": status_line,
                "parse_mode": "HTML",
                "reply_to_message_id": message_id,
                "disable_web_page_preview": True,
            })
        except TelegramGatewayError as e:
            logger.warning("append_incident_update_reply_failed", message_id=message_id, error=str(e))

        logger.info(
            "append_incident_update_done",
            incident_id=incident_id,
            message_id=message_id,
        )
        return True

    async def append_grouped_alert_digest(
        self,
        *,
        incident_id: str,
        group_key: str,
        digest_text: str,
    ) -> bool:
        """
        將同組告警收斂摘要回覆到父告警卡，不移除原卡按鈕。

        與 append_incident_update 不同：digest 是觀測訊息，不代表執行狀態改變，
        因此不能動 approve/reject/silence 按鈕。
        """
        redis = get_redis()
        stored = await redis.get(f"tg_msg:{incident_id}")
        if not stored:
            logger.info(
                "grouped_alert_digest_no_parent_message",
                incident_id=incident_id,
                group_key=group_key,
            )
            return False

        try:
            message_id = int(stored)
        except (ValueError, TypeError):
            logger.warning(
                "grouped_alert_digest_invalid_parent_message",
                incident_id=incident_id,
                stored=stored,
            )
            return False

        dedup_key = f"{GROUPED_ALERT_DIGEST_DEDUP_PREFIX}{group_key}"
        try:
            was_set = await redis.set(
                dedup_key,
                incident_id,
                ex=GROUPED_ALERT_DIGEST_DEDUP_TTL_SECONDS,
                nx=True,
            )
            if not was_set:
                logger.info(
                    "grouped_alert_digest_dedup_suppressed",
                    incident_id=incident_id,
                    group_key=group_key,
                )
                return True
        except Exception as exc:
            logger.warning(
                "grouped_alert_digest_dedup_failed",
                incident_id=incident_id,
                group_key=group_key,
                error=str(exc),
            )

        try:
            await self._send_request("sendMessage", {
                "chat_id": self.alert_chat_id,
                "text": digest_text[:1400],
                "parse_mode": "HTML",
                "reply_parameters": {
                    "message_id": message_id,
                    "allow_sending_without_reply": True,
                },
                "disable_web_page_preview": True,
            })
        except TelegramGatewayError as exc:
            logger.warning(
                "grouped_alert_digest_reply_failed",
                incident_id=incident_id,
                group_key=group_key,
                message_id=message_id,
                error=str(exc),
            )
            return False

        logger.info(
            "grouped_alert_digest_reply_sent",
            incident_id=incident_id,
            group_key=group_key,
            message_id=message_id,
        )
        return True

    async def _dispatch_category_action(
        self,
        callback_query_id: str,
        action: str,
        incident_id: str,
        user_id: int,
    ) -> None:
        """
        Phase 5 Sprint 5.1 (2026-04-14 Claude Sonnet 4.6):
        Fallback dispatcher — 未知 info action 查 callback_action_spec.yaml

        流程:
        1. 查 action registry
        2. 若不存在 → 原「⚠️ 未知操作」回覆
        3. 若存在 → 從 incident 取 labels → dispatch_action → reply_to 原卡片

        注意: 此方法只處理 info action (查類)。nonce action (寫類) 走另一路徑。
        """
        from src.services.callback_dispatcher import dispatch_action, get_action_spec

        spec = get_action_spec(action)
        if not spec:
            await self._answer_callback(callback_query_id, action, text="⚠️ 未知操作")
            return

        # Acknowledge callback immediately（避免 Telegram 端 timeout）
        await self._answer_callback(
            callback_query_id, action, text=f"{spec.emoji} 執行中..."
        )

        # 從 incident 取 labels (供模板替換)
        labels: dict = {}
        try:
            from src.repositories.incident_repository import get_incident_repository
            repo = get_incident_repository()
            incident = await repo.get_by_id(incident_id)
            if incident and incident.signals:
                labels = incident.signals[0].labels or {}
        except Exception as _e:
            logger.debug("dispatch_labels_lookup_failed", incident_id=incident_id, error=str(_e))

        # Dispatch
        result = await dispatch_action(
            action_name=action,
            incident_id=incident_id,
            user_id=user_id,
            labels=labels,
        )

        # Reply to 原卡片 — 從 Redis tg_msg 查 message_id
        try:
            from src.core.redis_client import get_redis
            redis = get_redis()
            msg_id_raw = await redis.get(f"tg_msg:{incident_id}")
            orig_msg_id = int(msg_id_raw) if msg_id_raw else None
        except Exception:
            orig_msg_id = None

        try:
            payload: dict = {
                "chat_id": self.alert_chat_id,
                "text": result.result_text,
                "parse_mode": "HTML",
            }
            if orig_msg_id:
                payload["reply_to_message_id"] = orig_msg_id
            await self._send_request("sendMessage", payload)
            logger.info(
                "category_action_reply_sent",
                action=action,
                incident_id=incident_id,
                success=result.success,
                duration_ms=round(result.duration_ms, 1),
            )
        except Exception as _e:
            logger.warning("category_action_reply_failed", action=action, error=str(_e))

    async def _send_incident_detail(self, incident_id: str) -> None:
        """
        ADR-050 P2: 傳送事件詳情訊息 (不修改原始簽核卡片)

        2026-04-01 Claude Code (ADR-050 P2): detail button handler
        """
        # 延遲 import 避免循環依賴 (與 approval_service 同一模式)
        from src.repositories.incident_repository import get_incident_repository
        from src.services.incident_timeline_service import fetch_incident_timeline

        try:
            repo = get_incident_repository()
            incident = await repo.get_by_id(incident_id)

            if not incident:
                await self.send_notification(f"⚠️ 找不到事件 <code>{html.escape(incident_id)}</code>")
                return

            dc = incident.decision_chain
            confidence_bar = "█" * int((dc.confidence if dc else 0) * 10) + "░" * (10 - int((dc.confidence if dc else 0) * 10))

            lines = [
                "📋 <b>事件詳情</b>",
                "",
                f"🔖 <b>ID:</b> <code>{html.escape(incident.incident_id)}</code>",
                f"📊 <b>狀態:</b> {incident.status.value}",
                f"⚡ <b>嚴重度:</b> {incident.severity.value}",
            ]

            if incident.affected_services:
                lines.append(f"🎯 <b>受影響服務:</b> {', '.join(html.escape(s) for s in incident.affected_services[:3])}")

            if dc:
                lines += [
                    "",
                    f"🤖 <b>AI 分析</b> ({html.escape(dc.model_used)})",
                    f"💡 {html.escape(dc.hypothesis)}",
                    f"📈 信心: [{confidence_bar}] {dc.confidence:.0%}",
                ]
                if dc.probable_root_causes:
                    lines.append(f"🔍 根因: {html.escape(dc.probable_root_causes[0][:100])}")

            # 2026-04-02 Claude Code: 修正時區 — 必須轉台北時區 (feedback_timezone_taipei.md)
            from zoneinfo import ZoneInfo
            created_taipei = incident.created_at.astimezone(ZoneInfo("Asia/Taipei")) if incident.created_at else incident.created_at
            lines += [
                "",
                f"🕐 <b>建立:</b> {created_taipei.strftime('%m/%d %H:%M') if created_taipei else 'N/A'}",
            ]

            if incident.frequency_stats:
                fs = incident.frequency_stats
                lines.append(f"📉 <b>頻率:</b> 1h={fs.count_1h} 24h={fs.count_24h} 7d={fs.count_7d}")

            timeline = await fetch_incident_timeline(incident_id)
            if timeline and timeline.get("ascii_timeline"):
                lines += [
                    "",
                    "🧭 <b>處理歷程</b>",
                    f"<code>{html.escape(timeline['ascii_timeline'])}</code>",
                ]
                reconciliation = timeline.get("reconciliation") or {}
                if reconciliation.get("consistency_status") in {"blocked", "degraded"}:
                    mismatch_codes = [
                        str(row.get("code"))
                        for row in reconciliation.get("mismatches", [])
                        if row.get("code")
                    ]
                    lines += [
                        "",
                        "🚦 <b>真相鏈狀態</b>",
                        f"狀態: <code>{html.escape(str(reconciliation.get('consistency_status')))}</code>",
                        f"下一步: <code>{html.escape(str(reconciliation.get('operator_next_state')))}</code>",
                    ]
                    if mismatch_codes:
                        lines.append(
                            "矛盾: "
                            + html.escape(", ".join(mismatch_codes[:4]))
                        )

            try:
                from src.services.adr100_remediation_service import (
                    get_adr100_remediation_service,
                )

                remediation_history = await get_adr100_remediation_service().history(
                    limit=5,
                    incident_id=incident_id,
                )
                lines += _format_remediation_history_lines(remediation_history)
            except Exception as remediation_exc:
                logger.warning(
                    "incident_detail_remediation_history_summary_failed",
                    incident_id=incident_id,
                    error=str(remediation_exc),
                )

            try:
                from src.services.awooop_truth_chain_service import fetch_truth_chain

                truth_chain = await fetch_truth_chain(
                    source_id=incident_id,
                    project_id=getattr(incident, "project_id", None) or "awoooi",
                )
                gateway_summary = (
                    (truth_chain.get("mcp") or {})
                    .get("awooop_gateway")
                )
                lines += _format_gateway_summary_lines(gateway_summary)
                lines += _format_automation_quality_lines(
                    truth_chain.get("automation_quality")
                )
            except Exception as truth_exc:
                logger.warning(
                    "incident_detail_truth_chain_summary_failed",
                    incident_id=incident_id,
                    error=str(truth_exc),
                )

            await self._send_html_line_message(
                lines,
                failure_context="incident_detail",
                reply_markup=_awooop_runs_reply_markup(incident_id),
            )

        except Exception as e:
            logger.warning("send_incident_detail_failed", incident_id=incident_id, error=str(e))
            await self.send_notification(f"⚠️ 無法取得事件詳情: {html.escape(str(e)[:100])}")

    async def _send_incident_history(self, incident_id: str) -> None:
        """
        ADR-050 P2: 傳送事件頻率統計訊息與 DB truth-chain 摘要

        Phase 27 雙層策略 (2026-04-10 ogt):
        - Layer 1: DB frequency_snapshot — 建立時刻快照，永久保存
        - Layer 2: Redis AnomalyCounter — 跨 incident 累積統計 (35d TTL)
        - Layer 3: AwoooP truth-chain — 補足 auto-repair / evidence / KM / MCP 階段
        """
        from src.repositories.incident_repository import get_incident_repository
        from src.services.anomaly_counter import get_anomaly_counter

        try:
            repo = get_incident_repository()
            incident = await repo.get_by_id(incident_id)

            if not incident:
                await self.send_notification(f"⚠️ 找不到事件 <code>{html.escape(incident_id)}</code>")
                return

            lines = [
                "📊 <b>事件歷史統計</b>",
                "",
                f"🔖 <code>{html.escape(incident_id)}</code>",
            ]

            # === Layer 1: DB 快照 (建立時刻，永久) ===
            fs = incident.frequency_stats
            if fs:
                lines += [
                    "",
                    "📌 <b>建立時刻快照</b>",
                    f"  1小時: {fs.count_1h} 次",
                    f"  24小時: {fs.count_24h} 次",
                    f"  7天: {fs.count_7d} 次",
                    f"  30天: {fs.count_30d} 次",
                ]
                if fs.auto_repair_count > 0:
                    lines.append(f"  自動修復: {fs.auto_repair_count} 次")
                    if fs.last_repair_action:
                        lines.append(f"  最後動作: {html.escape(fs.last_repair_action)}")
                if fs.escalation_level:
                    lines.append(f"  升級等級: {html.escape(fs.escalation_level)}")
                if fs.anomaly_key:
                    lines.append(f"🔑 告警鍵: <code>{html.escape(fs.anomaly_key)}</code>")
                anomaly_key = fs.anomaly_key
            else:
                lines += ["", "⚠️ 無建立時快照（舊 incident 或 Redis 已超期）"]
                # 嘗試從 signals 推導 anomaly_key
                anomaly_key = None
                if incident.signals:
                    sig = incident.signals[0]
                    parts = [
                        sig.alert_name or "",
                        incident.affected_services[0] if incident.affected_services else "",
                        (sig.labels or {}).get("namespace", ""),
                        (sig.labels or {}).get("error_type", ""),
                    ]
                    candidate = ":".join(p for p in parts if p)
                    if candidate:
                        anomaly_key = candidate

            # === Layer 2: Redis 累積統計 (35d TTL) ===
            if anomaly_key:
                try:
                    counter = get_anomaly_counter()
                    disposition = await counter.get_disposition_stats(anomaly_key)
                    auto_r = disposition.get("auto_repair_count", 0)
                    cold_s = disposition.get("cold_start_trust_count", 0)
                    human_a = disposition.get("human_approved_count", 0)
                    manual_r = disposition.get("manual_resolved_count", 0)
                    total_res = auto_r + cold_s + human_a + manual_r
                    if total_res > 0:
                        auto_rate = int((auto_r + cold_s) / total_res * 100)
                        lines += [
                            "",
                            f"📋 <b>累積處置分佈</b> (共 {total_res} 次，35天內)",
                            f"  🤖 自動修復: {auto_r}",
                            f"  ❄️ 冷啟動信任: {cold_s}",
                            f"  👤 人工審核: {human_a}",
                            f"  🔧 手動處理: {manual_r}",
                            f"  📈 自動化率: <b>{auto_rate}%</b>",
                        ]
                    else:
                        lines += ["", "📋 <b>累積處置</b>: 尚無記錄 (Redis TTL 35天)"]
                except Exception as redis_err:
                    logger.warning("incident_history_redis_error", error=str(redis_err))
                    lines += ["", "⚠️ Redis 統計暫時無法取得"]

            # === Layer 3: DB truth-chain（避免 Redis TTL / frequency_snapshot 缺口造成誤判） ===
            try:
                from src.services.awooop_truth_chain_service import fetch_truth_chain

                truth_chain = await fetch_truth_chain(
                    source_id=incident_id,
                    project_id=getattr(incident, "project_id", None) or "awoooi",
                )
                truth_status = truth_chain.get("truth_status") or {}
                if truth_status:
                    lines += [
                        "",
                        "🧭 <b>DB Truth-chain</b>",
                        (
                            "階段: "
                            f"<code>{html.escape(str(truth_status.get('current_stage') or 'unknown'))}</code>"
                            " / "
                            f"<code>{html.escape(str(truth_status.get('stage_status') or 'unknown'))}</code>"
                        ),
                        (
                            "人工介入: "
                            f"<code>{'yes' if truth_status.get('needs_human') else 'no'}</code>"
                        ),
                    ]
                    blockers = truth_status.get("blockers")
                    if isinstance(blockers, list) and blockers:
                        lines.append(
                            "卡點: "
                            + html.escape(", ".join(str(item) for item in blockers[:4]))
                        )
                lines += _format_automation_quality_lines(
                    truth_chain.get("automation_quality")
                )
            except Exception as truth_exc:
                logger.warning(
                    "incident_history_truth_chain_summary_failed",
                    incident_id=incident_id,
                    error=str(truth_exc),
                )

            await self._send_html_line_message(
                lines,
                failure_context="incident_history",
                reply_markup=_awooop_runs_reply_markup(incident_id),
            )

        except Exception as e:
            logger.warning("send_incident_history_failed", incident_id=incident_id, error=str(e))
            await self.send_notification(f"⚠️ 無法取得歷史統計: {html.escape(str(e))}")

    async def _send_reanalyze_result(self, incident_id: str) -> None:
        """
        ADR-050 P2: 觸發重診並傳送結果訊息

        呼叫 IncidentService.trigger_reanalysis()，以新訊息回報排程結果。
        不修改原始簽核卡片，避免干擾授權流程。

        2026-04-01 Claude Code (ADR-050 P2): reanalyze button handler
        """
        from src.services.incident_service import get_incident_service

        try:
            service = get_incident_service()
            result = await service.trigger_reanalysis(incident_id)

            if result["already_analyzing"]:
                msg = (
                    f"⏳ <b>重診進行中</b>\n\n"
                    f"🔖 <code>{html.escape(incident_id)}</code>\n\n"
                    f"{html.escape(result['message'])}"
                )
            elif result["triggered"]:
                msg = (
                    f"🔄 <b>重診已排程</b>\n\n"
                    f"🔖 <code>{html.escape(incident_id)}</code>\n\n"
                    f"✅ {html.escape(result['message'])}\n"
                    f"AI 分析結果將自動更新事件狀態。"
                )
            else:
                msg = (
                    f"⚠️ <b>重診失敗</b>\n\n"
                    f"🔖 <code>{html.escape(incident_id)}</code>\n\n"
                    f"{html.escape(result['message'])}"
                )

            await self.send_notification(msg)

        except Exception as e:
            logger.warning("send_reanalyze_result_failed", incident_id=incident_id, error=str(e))
            await self.send_notification(
                f"⚠️ 重診觸發失敗: {html.escape(str(e)[:100])}"
            )

    # =========================================================================
    # Sprint 5.1 T1-T6: Data Safety Guardrail 通知場景
    # (2026-04-08 Claude Sonnet 4.6 Asia/Taipei，ADR-062)
    # =========================================================================

    async def send_guardrail_blocked(
        self,
        service_name: str,
        alertname: str,
        reason: str,
    ) -> None:
        """T1: GUARDRAIL_BLOCKED — 服務屬於 BLOCK 等級，禁止自動修復"""
        try:
            text = (
                "🚫 <b>[服務保護] 自動修復已阻擋</b>\n"
                "━━━━━━━━━━━━━━━━━\n"
                f"服務: <code>{html.escape(service_name)}</code>\n"
                f"告警: <code>{html.escape(alertname)}</code>\n"
                f"原因: {html.escape(reason)}\n"
                "━━━━━━━━━━━━━━━━━\n"
                "⚠️ 請人工評估並手動處理"
            )
            await self.send_notification(text)
        except Exception as e:
            logger.error("t1_guardrail_blocked_notify_failed", service=service_name, error=str(e))

    async def send_preflight_failed(
        self,
        service_name: str,
        backup_age_hours: float,
        max_age_hours: float,
        backup_name: str | None,
    ) -> None:
        """T2: PRE_FLIGHT_FAILED + BACKUP_TRIGGERED — 備份過期，修復暫停"""
        try:
            backup_status = (
                f"緊急備份: 已啟動 <code>{html.escape(backup_name)}</code>"
                if backup_name
                else "緊急備份: <b>啟動失敗</b>，請人工處理"
            )
            text = (
                "⏸ <b>[Pre-flight 阻擋] 備份已過期，修復暫停</b>\n"
                "━━━━━━━━━━━━━━━━━\n"
                f"服務: <code>{html.escape(service_name)}</code>\n"
                f"備份距今: {backup_age_hours:.1f} 小時（上限 {max_age_hours:.0f} 小時）\n"
                f"{backup_status}\n"
                "━━━━━━━━━━━━━━━━━\n"
                "請等待備份完成後，人工重新評估修復方案"
            )
            await self.send_notification(text)
        except Exception as e:
            logger.error("t2_preflight_failed_notify_failed", service=service_name, error=str(e))

    async def send_backup_result(
        self,
        backup_name: str,
        success: bool,
        error_msg: str | None = None,
    ) -> None:
        """T3: BACKUP_COMPLETED / BACKUP_FAILED — 緊急備份結果"""
        try:
            if success:
                text = (
                    "✅ <b>緊急備份完成</b>\n"
                    f"備份: <code>{html.escape(backup_name)}</code>\n"
                    "可繼續手動執行修復"
                )
            else:
                err = html.escape(error_msg or "未知錯誤")
                text = (
                    "❌ <b>緊急備份失敗</b>\n"
                    f"備份: <code>{html.escape(backup_name)}</code>\n"
                    f"錯誤: {err}\n"
                    "請人工介入，備份異常"
                )
            await self.send_notification(text)
        except Exception as e:
            logger.error("t3_backup_result_notify_failed", backup=backup_name, error=str(e))

    async def send_multisig_waiting(
        self,
        action: str,
        service_name: str,
        votes_received: int,
        votes_required: int,
        approval_id: str,
    ) -> None:
        """T4: APPROVAL_ESCALATED — 第 1 票完成，等待第 2 票"""
        try:
            text = (
                "🔐 <b>[MultiSig] 等待第 2 票授權</b>\n"
                "━━━━━━━━━━━━━━━━━\n"
                f"操作: {html.escape(action)}\n"
                f"服務: <code>{html.escape(service_name)}</code>\n"
                f"風險: CRITICAL（HITL 雙簽）\n"
                f"已獲授權: {votes_received}/{votes_required} 票\n"
                f"審核 ID: <code>{html.escape(approval_id)}</code>\n"
                "━━━━━━━━━━━━━━━━━\n"
                "請第二位審核者登入確認"
            )
            await self.send_notification(text)
        except Exception as e:
            logger.error("t4_multisig_waiting_notify_failed", approval=approval_id, error=str(e))

    async def send_multisig_approved(
        self,
        action: str,
        service_name: str,
    ) -> None:
        """T5: MultiSig 完成（2/2）"""
        try:
            text = (
                "✅ <b>[MultiSig 完成] 雙簽授權通過</b>\n"
                f"操作: {html.escape(action)}\n"
                f"服務: <code>{html.escape(service_name)}</code>\n"
                "授權: 2/2 票  開始執行..."
            )
            await self.send_notification(text)
        except Exception as e:
            logger.error("t5_multisig_approved_notify_failed", service=service_name, error=str(e))

    async def send_change_applied(
        self,
        operator: str,
        action_description: str,
        timestamp: str,
    ) -> None:
        """T6: CHANGE_APPLIED — 手動變更記錄"""
        try:
            text = (
                "📝 <b>[變更記錄] 手動操作已記錄</b>\n"
                "━━━━━━━━━━━━━━━━━\n"
                f"操作者: {html.escape(operator)}\n"
                f"動作: {html.escape(action_description)}\n"
                f"時間: {html.escape(timestamp)}"
            )
            await self.send_notification(text)
        except Exception as e:
            logger.error("t6_change_applied_notify_failed", operator=operator, error=str(e))

    async def send_notification(
        self,
        text: str,
        parse_mode: str = "HTML",
        chat_id: str | int | None = None,
    ) -> dict:
        """
        發送純文字通知

        Args:
            text: 訊息內容
            parse_mode: 解析模式

        Returns:
            dict: API 回應
        """
        payload_text = text[:500]
        payload_parse_mode = parse_mode
        if parse_mode and parse_mode.upper() == "HTML" and len(text) > 500:
            payload_text = _plain_text_from_html(text, limit=500)
            payload_parse_mode = None

        payload = {
            "chat_id": chat_id or self.alert_chat_id,
            "text": payload_text,  # SOUL.md 字數限制
        }
        if payload_parse_mode:
            payload["parse_mode"] = payload_parse_mode

        try:
            return await self._send_request("sendMessage", payload)
        except TelegramGatewayError as exc:
            if payload_parse_mode and payload_parse_mode.upper() == "HTML" and "HTTP error: 400" in str(exc):
                fallback_payload = {
                    "chat_id": chat_id or self.alert_chat_id,
                    "text": _plain_text_from_html(text, limit=500),
                }
                return await self._send_request("sendMessage", fallback_payload)
            raise

    async def _send_html_line_message(
        self,
        lines: list[str],
        *,
        chat_id: str | int | None = None,
        failure_context: str,
        reply_markup: dict | None = None,
    ) -> None:
        """Send a multi-line HTML message without cutting Telegram tags in half."""
        chunks = _telegram_html_chunks(lines)
        for index, chunk in enumerate(chunks):
            try:
                payload: dict = {
                    "chat_id": chat_id or self.alert_chat_id,
                    "text": chunk,
                    "parse_mode": "HTML",
                }
                if index == 0 and reply_markup:
                    payload["reply_markup"] = reply_markup
                await self._send_request(
                    "sendMessage",
                    payload,
                )
            except Exception as exc:
                logger.warning(
                    "telegram_html_line_message_failed",
                    failure_context=failure_context,
                    chunk_index=index,
                    chunk_count=len(chunks),
                    error=str(exc),
                )
                fallback_payload: dict = {
                    "chat_id": chat_id or self.alert_chat_id,
                    "text": _plain_text_from_html(chunk),
                }
                if index == 0 and reply_markup:
                    fallback_payload["reply_markup"] = reply_markup
                try:
                    await self._send_request(
                        "sendMessage",
                        fallback_payload,
                    )
                except Exception as fallback_exc:
                    logger.warning(
                        "telegram_html_line_message_plain_fallback_failed",
                        failure_context=failure_context,
                        chunk_index=index,
                        chunk_count=len(chunks),
                        error=str(fallback_exc),
                    )
                    rescue_payload: dict = {
                        "chat_id": chat_id or self.alert_chat_id,
                        "text": _plain_text_from_html(chunk, limit=3500),
                        "_skip_incident_thread_reply": True,
                    }
                    try:
                        await self._send_request(
                            "sendMessage",
                            rescue_payload,
                        )
                    except Exception as rescue_exc:
                        logger.error(
                            "telegram_html_line_message_rescue_failed",
                            failure_context=failure_context,
                            chunk_index=index,
                            chunk_count=len(chunks),
                            error=str(rescue_exc),
                        )

    async def send_alert_notification(
        self,
        text: str,
        parse_mode: str = "HTML",
        reply_markup: dict | None = None,
    ) -> dict:
        """發送告警型純文字通知到 SRE 戰情室群組。"""
        payload: dict = {
            "chat_id": self.alert_chat_id,
            "text": text[:4096],
            "parse_mode": parse_mode,
        }
        if reply_markup:
            payload["reply_markup"] = reply_markup
        return await self._send_request("sendMessage", payload)

    # =========================================================================
    # 2026-05-04 Claude Sonnet 4.6: send_text 公開 wrapper（修復 drift_adopt_telegram_failed）
    # =========================================================================

    async def send_text(
        self,
        text: str,
        chat_id: int | str | None = None,
        parse_mode: str = "HTML",
        disable_web_page_preview: bool = True,
    ) -> dict:
        """
        公開 send_text wrapper — 委派至 _send_request('sendMessage', ...)

        給 drift_adopt_service / drift_remediator / runbook_generator /
        signoz_webhook 等服務使用的通用純文字送出方法。
        預設送往 alert_chat_id（SRE 群組）。

        Args:
            text: 訊息內容（最多 4096 字元）
            chat_id: 目標 chat ID，None 時使用 alert_chat_id
            parse_mode: 解析模式（預設 HTML）
            disable_web_page_preview: 是否關閉網頁預覽

        Returns:
            dict: Telegram API 回應
        """
        payload: dict = {
            "chat_id": chat_id or self.alert_chat_id,
            "text": text[:4096],
            "parse_mode": parse_mode,
            "disable_web_page_preview": disable_web_page_preview,
        }
        return await self._send_request("sendMessage", payload)

    # =========================================================================
    # 2026-04-24 Claude Sonnet 4.6 (ADR-095 WS4): Hermes NL 回覆
    # =========================================================================

    async def send_hermes_reply(
        self,
        text: str,
        chat_id: str | int,
        reply_to_message_id: int | None = None,
    ) -> dict:
        """
        傳送 Hermes NL 回覆（長文，最多 4096 字元，純文字模式）。

        Args:
            text: 回覆內容（由 nl_gateway 已截斷至 4000 字以內）
            chat_id: 目標 chat ID
            reply_to_message_id: 回覆哪則訊息（可選）
        """
        payload: dict = {
            "chat_id": chat_id,
            "text": text[:4096],
        }
        if reply_to_message_id:
            payload["reply_to_message_id"] = reply_to_message_id
        return await self._send_request("sendMessage", payload)

    # =========================================================================
    # 2026-04-03 ogt: SRE 戰情室群組三頭政治 (Triumvirate) — ADR-053
    # @tsenyangbot 發告警卡片到群組，OpenClaw/NemoClaw Bot 各自回覆分析
    # =========================================================================

    async def send_to_group(
        self,
        text: str,
        parse_mode: str = "HTML",
        reply_markup: dict | None = None,
    ) -> dict:
        """
        用 @tsenyangbot 發訊息到 SRE 群組 (SRE_GROUP_CHAT_ID)

        Args:
            text: 訊息內容
            parse_mode: 解析模式
            reply_markup: 按鈕 (可選)

        Returns:
            dict: Telegram API 回應 (含 message_id)
        """
        if not settings.SRE_GROUP_CHAT_ID:
            logger.warning("send_to_group_skipped", reason="SRE_GROUP_CHAT_ID not configured")
            return {}

        payload: dict = {
            "chat_id": settings.SRE_GROUP_CHAT_ID,
            "text": text[:4096],
            "parse_mode": parse_mode,
        }
        if reply_markup:
            payload["reply_markup"] = reply_markup

        return await self._send_request("sendMessage", payload)

    async def _send_as_bot(
        self,
        token: str,
        chat_id: str,
        text: str,
        reply_to_message_id: int | None = None,
        parse_mode: str = "HTML",
    ) -> dict:
        """
        用指定 Bot Token 發訊息。

        Args:
            token: Bot Token
            chat_id: 群組 Chat ID
            text: 訊息內容
            reply_to_message_id: 回覆哪則訊息的 message_id
            parse_mode: 解析模式

        Returns:
            dict: Telegram API 回應
        """
        if not self._http_client:
            raise TelegramGatewayError("HTTP client not initialized")

        url = f"{self.TELEGRAM_API_BASE}/bot{token}/sendMessage"
        payload: dict = {
            "chat_id": chat_id,
            "text": text[:4096],
            "parse_mode": parse_mode,
        }
        # 2026-04-03 ogt: supergroup 跨 Bot reply 需用 reply_parameters (Bot API v6.7+)
        # 舊的 reply_to_message_id 在 supergroup 會 400，改用新格式 + allow_sending_without_reply
        if reply_to_message_id:
            payload["reply_parameters"] = {
                "message_id": reply_to_message_id,
                "allow_sending_without_reply": True,
            }

        response = await self._http_client.post(url, json=payload)
        response.raise_for_status()
        result = response.json()
        result_val = result.get("result") if isinstance(result, dict) else None
        if isinstance(result_val, dict) and "message_id" in result_val:
            await self._mirror_outbound_message(
                method="sendMessage",
                payload=payload,
                provider_message_id=str(result_val["message_id"]),
            )
        return result

    async def send_as_openclaw(
        self,
        text: str,
        reply_to_message_id: int | None = None,
    ) -> dict:
        """
        用 @OpenClawAwoooI_Bot 在群組發言

        Args:
            text: 訊息內容
            reply_to_message_id: 回覆哪則訊息

        Returns:
            dict: Telegram API 回應
        """
        if not settings.OPENCLAW_BOT_TOKEN or not settings.SRE_GROUP_CHAT_ID:
            logger.warning("send_as_openclaw_skipped", reason="OPENCLAW_BOT_TOKEN or SRE_GROUP_CHAT_ID not configured")
            return {}

        return await self._send_as_bot(
            token=settings.OPENCLAW_BOT_TOKEN,
            chat_id=settings.SRE_GROUP_CHAT_ID,
            text=text,
            reply_to_message_id=reply_to_message_id,
        )

    async def send_as_nemotron(
        self,
        text: str,
        reply_to_message_id: int | None = None,
    ) -> dict:
        """
        用 @NemoTronAwoooI_Bot 在群組發言

        Args:
            text: 訊息內容
            reply_to_message_id: 回覆哪則訊息

        Returns:
            dict: Telegram API 回應
        """
        if not settings.NEMOTRON_BOT_TOKEN or not settings.SRE_GROUP_CHAT_ID:
            logger.warning("send_as_nemotron_skipped", reason="NEMOTRON_BOT_TOKEN or SRE_GROUP_CHAT_ID not configured")
            return {}

        return await self._send_as_bot(
            token=settings.NEMOTRON_BOT_TOKEN,
            chat_id=settings.SRE_GROUP_CHAT_ID,
            text=text,
            reply_to_message_id=reply_to_message_id,
        )

    async def trigger_group_ai_discussion(
        self,
        alert_message_id: int,
        alert_summary: str,
    ) -> None:
        """
        觸發群組 AI 並行分析（三頭政治核心流程）

        流程 (2026-04-03 ogt: 統帥指示改為並行):
        - OpenClaw 和 NemoClaw 同時對告警進行獨立分析
        - 兩者都 reply 同一條告警訊息
        - 並行執行，總等待時間 = max(OpenClaw, NemoClaw) 而非相加

        此方法由 asyncio.create_task 非同步呼叫，失敗不影響主流程。

        Args:
            alert_message_id: 告警訊息的 message_id（兩個 Bot 回覆的起點）
            alert_summary: 告警摘要文字（提供給 AI 分析用）
        """
        try:
            from src.services.chat_manager import ChatManager  # noqa: PLC0415
        except ImportError:
            logger.error("trigger_group_ai_discussion_failed", reason="Cannot import ChatManager")
            return

        try:
            chat_mgr = ChatManager()

            # 2026-04-03 ogt: 老闆指示 — 告警分析只由 OpenClaw 負責，NemoClaw 不分析告警
            openclaw_prompt = (
                f"你是 OpenClaw，AWOOOI SRE 戰情室首席 AI，精通 K8s、Prometheus、告警分析。\n"
                f"以下是一則基礎設施告警，請進行 RCA 根因分析並給出 3 點具體建議行動。\n"
                f"繁體中文回應，不超過 300 字：\n\n"
                f"{alert_summary}"
            )

            openclaw_analysis = await chat_mgr._call_openclaw(
                system_prompt="你是 OpenClaw，AWOOOI SRE 戰情室首席 AI。稱呼用戶為「老闆」。",
                user_message=openclaw_prompt,
            )

            if openclaw_analysis and not isinstance(openclaw_analysis, Exception):
                await self.send_as_openclaw(
                    text=f"🦞 <b>OpenClaw 分析</b>\n\n{openclaw_analysis}",
                    reply_to_message_id=alert_message_id,
                )
                logger.info("group_ai_discussion_openclaw_sent")
            else:
                logger.warning("trigger_group_ai_discussion_openclaw_empty")

            logger.info("group_ai_discussion_completed", alert_message_id=alert_message_id)

        except Exception as e:
            # 群組 AI 討論失敗不影響主流程
            logger.error("trigger_group_ai_discussion_failed", error=str(e))

    async def close(self) -> None:
        """關閉 Gateway"""
        # 停止 Long Polling 與 Leader 相關 Tasks
        self._polling_active = False
        for task in (self._polling_task, self._leader_task):
            if task and not task.done():
                task.cancel()
                try:
                    await task
                except asyncio.CancelledError:
                    pass
        self._polling_task = None
        self._leader_task = None

        if self._http_client:
            await self._http_client.aclose()
            self._http_client = None
        self._initialized = False
        logger.info("telegram_gateway_closed")

    # =========================================================================
    # Long Polling 實作 (Phase 5 內網修復)
    # =========================================================================

    async def start_long_polling(self) -> None:
        """
        啟動 Long Polling 背景任務

        取代 Webhook 模式，適用於內網環境
        統帥鐵律: 內網無法接收外部 Webhook，必須主動輪詢

        2026-04-01 Claude Code: 加入 Redis Leader Election
        多 Pod 環境下，只有 Leader 執行 getUpdates，其餘 Pod 進入 Watcher 模式
        """
        if not self._initialized:
            success = await self.initialize()
            if not success:
                logger.error("telegram_long_polling_failed", reason="Gateway not initialized")
                return

        if self._polling_active:
            logger.warning("telegram_long_polling_already_running")
            return

        # 嘗試取得 Leader Lock (NX = 僅在不存在時設定)
        redis = await get_redis()
        acquired = await redis.set(POLLING_LEADER_KEY, self._pod_id, nx=True, ex=POLLING_LEADER_TTL)

        if not acquired:
            current_leader = await redis.get(POLLING_LEADER_KEY)
            logger.info(
                "telegram_polling_not_leader",
                pod_id=self._pod_id,
                current_leader=current_leader,
                action="watcher_mode",
            )
            # 啟動 Watcher：定期嘗試接管
            self._leader_task = asyncio.create_task(self._leader_watcher())
            return

        # 取得 Leader Lock，開始 Polling
        await self._delete_webhook()

        self._polling_active = True
        self._last_update_id = 0
        self._polling_task = asyncio.create_task(self._polling_loop())
        self._leader_task = asyncio.create_task(self._leader_renewer())

        logger.info(
            "telegram_long_polling_started",
            pod_id=self._pod_id,
            timeout=LONG_POLLING_TIMEOUT,
            chat_id=self.chat_id[:10] + "..." if self.chat_id else "N/A",
        )

    async def _delete_webhook(self) -> None:
        """
        刪除現有 Webhook (切換至 Long Polling 模式)

        統帥鐵律: Webhook 和 Long Polling 不能共存
        必須先刪除 Webhook 才能使用 getUpdates
        """
        if not self._http_client:
            return

        try:
            # Step 1: 刪除 Webhook
            url = f"{self.api_url}/deleteWebhook"
            response = await self._http_client.post(url, json={"drop_pending_updates": True})
            result = response.json()

            if result.get("ok"):
                logger.info(
                    "telegram_webhook_deleted",
                    description=result.get("description", "Webhook deleted"),
                )
            else:
                logger.warning(
                    "telegram_webhook_delete_failed",
                    error=result.get("description"),
                )

            # Step 2: 等待 Telegram 伺服器同步 (避免 409 Conflict)
            await asyncio.sleep(1)

            # Step 3: 驗證 Webhook 狀態
            info_url = f"{self.api_url}/getWebhookInfo"
            info_response = await self._http_client.get(info_url)
            info_result = info_response.json()

            webhook_url = info_result.get("result", {}).get("url", "")
            if webhook_url:
                logger.warning(
                    "telegram_webhook_still_active",
                    url=webhook_url[:50],
                )
            else:
                logger.info("telegram_webhook_confirmed_deleted")

        except Exception as e:
            logger.error("telegram_webhook_delete_error", error=str(e))

    async def _polling_loop(self) -> None:
        """
        Long Polling 主循環

        使用 getUpdates API 持續監聽 Telegram 更新
        """
        logger.info("[Telegram] Long polling started - 神經已接通，等待統帥指令...")

        while self._polling_active:
            try:
                updates = await self._get_updates()

                for update in updates:
                    await self._process_update(update)

            except asyncio.CancelledError:
                logger.info("telegram_long_polling_cancelled")
                break

            except httpx.TimeoutException:
                # Long polling timeout 是正常的，繼續下一輪
                continue

            except httpx.HTTPStatusError as e:
                if e.response.status_code == 409:
                    # 409 Conflict: 另一個 Pod 正在 polling，主動釋放 Leader Lock
                    # 2026-04-01 Claude Code: 改為釋放 Lock 讓 Watcher 競爭
                    # (舊: 侵略性搶佔 2s，已不適用 - 現在是多 Pod 場景而非 .188 搶佔)
                    logger.warning(
                        "telegram_polling_conflict",
                        status=409,
                        pod_id=self._pod_id,
                        action="releasing_leader_lock",
                    )
                    redis = await get_redis()
                    current = await redis.get(POLLING_LEADER_KEY)
                    if current == self._pod_id:
                        await redis.delete(POLLING_LEADER_KEY)
                    self._polling_active = False
                    # Watcher 會在 POLLING_LEADER_WATCH 秒後重新競爭
                    self._leader_task = asyncio.create_task(self._leader_watcher())
                    break
                else:
                    logger.error("telegram_polling_http_error", status=e.response.status_code)
                    await asyncio.sleep(LONG_POLLING_RETRY_DELAY)

            except Exception as e:
                logger.error("telegram_polling_error", error=str(e))
                # 錯誤後等待再重試
                await asyncio.sleep(LONG_POLLING_RETRY_DELAY)

        logger.info("telegram_long_polling_stopped")

    async def _leader_renewer(self) -> None:
        """
        Leader Lock 續約背景任務

        每 POLLING_LEADER_RENEW 秒更新 Redis TTL，
        確保 Leader 在 Poll 期間持續持有 Lock。
        若 Lock 被搶走，停止 Polling。

        2026-04-01 Claude Code: 分散式 Leader Election
        """
        while self._polling_active:
            await asyncio.sleep(POLLING_LEADER_RENEW)
            if not self._polling_active:
                break
            try:
                redis = await get_redis()
                current = await redis.get(POLLING_LEADER_KEY)
                if current != self._pod_id:
                    logger.warning(
                        "telegram_leader_lock_lost",
                        pod_id=self._pod_id,
                        current_leader=current,
                    )
                    self._polling_active = False
                    break
                await redis.expire(POLLING_LEADER_KEY, POLLING_LEADER_TTL)
            except Exception as e:
                logger.error("telegram_leader_renew_error", error=str(e))

    async def _leader_watcher(self) -> None:
        """
        非 Leader Pod 的接管監控任務

        每 POLLING_LEADER_WATCH 秒嘗試取得 Leader Lock。
        若原 Leader 宕掉（TTL 過期），此 Pod 接管 Polling。

        2026-04-01 Claude Code: 分散式 Leader Election
        """
        while not self._polling_active:
            await asyncio.sleep(POLLING_LEADER_WATCH)
            try:
                redis = await get_redis()
                acquired = await redis.set(
                    POLLING_LEADER_KEY, self._pod_id, nx=True, ex=POLLING_LEADER_TTL
                )
                if acquired:
                    logger.info(
                        "telegram_leader_acquired",
                        pod_id=self._pod_id,
                        action="starting_polling",
                    )
                    await self._delete_webhook()
                    self._polling_active = True
                    self._last_update_id = 0
                    self._polling_task = asyncio.create_task(self._polling_loop())
                    self._leader_task = asyncio.create_task(self._leader_renewer())
                    break
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error("telegram_leader_watch_error", error=str(e))

    async def _get_updates(self) -> list[dict]:
        """
        呼叫 Telegram getUpdates API

        Returns:
            list[dict]: 更新列表
        """
        if not self._http_client:
            return []

        url = f"{self.api_url}/getUpdates"
        payload = {
            "offset": self._last_update_id + 1,
            "timeout": LONG_POLLING_TIMEOUT,
            "allowed_updates": ["callback_query", "message"],  # 監聽按鈕與文字訊息
        }

        response = await self._http_client.post(
            url,
            json=payload,
            timeout=LONG_POLLING_TIMEOUT + 10,  # 比 API timeout 多一點
        )
        response.raise_for_status()
        result = response.json()

        if not result.get("ok"):
            raise TelegramGatewayError(f"getUpdates failed: {result.get('description')}")

        updates = result.get("result", [])

        # 更新 offset
        if updates:
            self._last_update_id = updates[-1]["update_id"]

        return updates

    async def _process_update(self, update: dict) -> None:
        """
        處理單個 Telegram Update

        Args:
            update: Telegram Update 物件
        """
        update_id = update.get("update_id")
        callback_query = update.get("callback_query")
        message = update.get("message")

        if not callback_query and not message:
            logger.debug("telegram_update_ignored", update_id=update_id, reason="unsupported update type")
            return

        if callback_query:
            await self._handle_callback_query(update_id, callback_query)
        elif message:
            await self._handle_chat_message(update_id, message)

    async def _handle_callback_query(self, update_id: int, callback_query: dict) -> None:
        """處理按鈕點擊更新"""
        callback_query_id = callback_query.get("id")
        callback_data = callback_query.get("data")
        user = callback_query.get("from", {})
        user_id = user.get("id")

        if not all([callback_query_id, callback_data, user_id]):
            logger.warning("telegram_callback_invalid", update_id=update_id)
            return

        username = user.get("username") or user.get("first_name") or str(user_id)
        original_text = callback_query.get("message", {}).get("text", "")
        message_id = callback_query.get("message", {}).get("message_id")

        logger.info(
            "telegram_callback_received",
            update_id=update_id,
            user_id=user_id,
            username=username,
        )

        # 呼叫現有的 handle_callback 邏輯
        result = await self.handle_callback(
            callback_query_id=callback_query_id,
            callback_data=callback_data,
            user_id=user_id,
            message_id=message_id,
            original_text=original_text,
            username=username,
        )

        if result.get("success"):
            # 執行資料庫更新 (簽核/拒絕)
            await self._execute_approval_action(
                action=result["action"],
                approval_id=result["approval_id"],
                user_id=user_id,
                username=username,
                message_id=message_id,
            )

    async def _handle_chat_message(self, update_id: int, message: dict) -> None:
        """處理统帥的文字訊息（個人 chat 或 SRE 群組）"""
        text = message.get("text")
        user = message.get("from", {})
        user_id = user.get("id")
        chat_id = message.get("chat", {}).get("id")
        chat_type = message.get("chat", {}).get("type", "private")
        message_id = message.get("message_id")
        username = user.get("username") or user.get("first_name") or str(user_id)

        # Phase 34 (ADR-067 2026-04-10): 圖片訊息路由
        photos = message.get("photo")
        if photos and user_id:
            if not user.get("is_bot"):
                best = max(photos, key=lambda p: p.get("file_size", 0))
                file_id = best.get("file_id", "")
                caption = message.get("caption", "請用繁體中文描述這張圖片")
                if file_id:
                    try:
                        from src.services.image_analysis_service import get_image_analysis_service
                        svc = get_image_analysis_service()
                        await svc.download_and_analyze(
                            chat_id=str(chat_id),
                            file_id=file_id,
                            question=caption,
                        )
                    except Exception as _img_err:
                        logger.warning("image_analysis_polling_failed", error=str(_img_err))
            return

        if not text or not user_id:
            return

        # Bot 訊息忽略（避免 Bot 互相觸發無限循環）
        if user.get("is_bot"):
            return

        logger.info(
            "telegram_chat_received",
            update_id=update_id,
            user_id=user_id,
            username=username,
            chat_type=chat_type,
            text=text[:50],
        )

        # 1. 群組訊息路由優先 (2026-04-03 ogt: SRE 戰情室群組無需個人白名單)
        # 群組是封閉環境，成員由 Telegram 群組管理員控制，不走個人 whitelist
        is_group = chat_type in ("group", "supergroup")
        is_sre_group = str(chat_id) == str(settings.SRE_GROUP_CHAT_ID)

        if is_group and is_sre_group:
            reply_to_message = message.get("reply_to_message")
            await self._handle_group_message(text, user_id, username, chat_id, message_id, reply_to_message)
            return

        # 2. 個人 chat 安全檢查 (ADR-012)
        try:
            interceptor = get_security_interceptor()
            await interceptor.intercept_telegram(user_id)
        except Exception as e:
            logger.warning("telegram_chat_unauthorized", user_id=user_id, error=str(e))
            return

        # 3. /ai 指令攔截 (Phase 24 C — 2026-04-03 ogt)
        if text.strip().lower().startswith("/ai"):
            whitelist = settings.get_tg_user_whitelist()
            if not whitelist or user_id not in whitelist:
                logger.warning("telegram_ai_command_unauthorized", user_id=user_id, whitelist_empty=not whitelist)
                await self.send_notification("⛔ 未授權：/ai 指令僅限白名單用戶", parse_mode="HTML", chat_id=chat_id)
                return
            from src.services.ai_control import handle_ai_command
            response = await handle_ai_command(text.strip())
            await self.send_notification(response, parse_mode="HTML", chat_id=chat_id)
            logger.info("telegram_ai_command_handled", user_id=user_id, text=text[:50])
            return

        # 4. 個人 chat — 顯示輸入狀態
        await self._send_chat_action(chat_id, "typing")

        # 5. ChatManager 處理（個人 chat）
        chat_manager = get_chat_manager()
        response = await chat_manager.generate_response(
            user_id=user_id,
            username=username,
            message_text=text,
        )
        await self.send_notification(response, parse_mode="HTML", chat_id=chat_id)

    async def _handle_group_message(
        self,
        text: str,
        user_id: int,
        username: str,
        chat_id: int,  # noqa: ARG002
        message_id: int | None,
        reply_to_message: dict | None = None,
    ) -> None:
        """
        處理 SRE 群組訊息 (2026-04-03 ogt: Phase 22.6 Triumvirate)

        路由規則:
          Reply OpenClaw 訊息         → 只有 OpenClaw 回應
          Reply NemoClaw 訊息         → 只有 NemoClaw 回應
          @OpenClawAwoooI_Bot <msg>   → 只有 OpenClaw 回應
          @NemoTronAwoooI_Bot <msg>   → 只有 NemoClaw 回應
          其他訊息                    → 兩個 AI 並行回應
        """
        # ── 指令路由 (2026-04-03 ogt: 方案B slash commands) ──────────────────
        cmd = text.strip().split()[0].lower().split("@")[0] if text.strip() else ""
        if cmd.startswith("/"):
            await self._handle_group_command(cmd, chat_id, message_id, full_text=text.strip())
            return

        # ── Hermes NL: @tsenyangbot @mention → Claude Agent SDK 12-Agent (ADR-094/095) ──
        # 2026-04-25 Claude Sonnet 4.6: 接入 polling 路徑，HERMES_NL_ENABLED 控制
        if settings.HERMES_NL_ENABLED:
            _bot_un = getattr(settings, "TELEGRAM_BOT_USERNAME", "tsenyangbot")
            import unicodedata as _uc
            if f"@{_bot_un}".lower() in _uc.normalize("NFKC", text).lower():
                _clean = text.replace(f"@{_bot_un}", "").strip()
                if _clean:
                    from src.hermes.nl_gateway import process_nl_message as _nl
                    try:
                        _reply = await _nl(
                            _clean,
                            chat_id=str(chat_id),
                            user_id=user_id,
                            username=username,
                        )
                        await self.send_hermes_reply(
                            text=_reply,
                            chat_id=str(chat_id),
                            reply_to_message_id=message_id,
                        )
                    except Exception as _hermes_err:
                        logger.error("hermes_nl_polling_failed", error=str(_hermes_err))
                    return

        from src.services.chat_manager import get_chat_manager as _get_cm
        chat_mgr = _get_cm()

        # 全形/半形統一化後比較
        import unicodedata
        text_normalized = unicodedata.normalize("NFKC", text).lower()

        # Reply 路由: 若 Reply 的是 Bot 訊息，直接認定目標 AI (2026-04-03 ogt)
        if reply_to_message:
            replied_from = reply_to_message.get("from", {})
            if replied_from.get("is_bot"):
                replied_username = (replied_from.get("username") or "").lower()
                if "openclawawoooi" in replied_username:
                    mention_openclaw, mention_nemo = True, False
                elif "nemotronawoooi" in replied_username:
                    mention_openclaw, mention_nemo = False, True
                else:
                    mention_openclaw = "@openclawawoooi_bot" in text_normalized or "小o" in text_normalized
                    mention_nemo = "@nemotronawoooi_bot" in text_normalized or "小賀" in text_normalized or "小贺" in text_normalized
            else:
                mention_openclaw = "@openclawawoooi_bot" in text_normalized or "小o" in text_normalized
                mention_nemo = "@nemotronawoooi_bot" in text_normalized or "小賀" in text_normalized or "小贺" in text_normalized
        else:
            # 別名: 小O / 小o (含全形Ｏ) → OpenClaw; 小賀 / 小贺 → NemoClaw
            mention_openclaw = "@openclawawoooi_bot" in text_normalized or "小o" in text_normalized
            mention_nemo = "@nemotronawoooi_bot" in text_normalized or "小賀" in text_normalized or "小贺" in text_normalized

        # 去掉 @ mention 與別名，取出純訊息
        clean_text = unicodedata.normalize("NFKC", text)
        for token in ["@openclawawoooi_bot", "@OpenClawAwoooI_Bot", "@nemotronawoooi_bot", "@NemoTronAwoooI_Bot",
                      "小O", "小o", "小Ｏ", "小ｏ", "小賀", "小贺"]:
            clean_text = clean_text.replace(token, "").strip()
        if not clean_text:
            clean_text = text

        context = await chat_mgr.get_system_context()

        def _clean_ai_reply(text: str, max_chars: int = 600) -> str:
            """清理 AI 回覆：移除 Markdown 語法，截斷超長內容"""
            import re
            # 移除 Markdown bold/italic (**text**, *text*, __text__, _text_)
            text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
            text = re.sub(r'\*(.+?)\*', r'\1', text)
            text = re.sub(r'__(.+?)__', r'\1', text)
            text = re.sub(r'_(.+?)_', r'\1', text)
            # 移除 Markdown header (#, ##, ###)
            text = re.sub(r'^#{1,3}\s+', '', text, flags=re.MULTILINE)
            # 移除 <think> 標籤（deepseek-r1）
            text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()
            # 截斷
            if len(text) > max_chars:
                text = text[:max_chars].rsplit('\n', 1)[0] + '…'
            return text.strip()

        if mention_openclaw and not mention_nemo:
            # 只 OpenClaw 回應
            result = await chat_mgr._call_openclaw(
                f"{context}\n用戶 {username} 在 SRE 戰情室問你：",
                clean_text,
            )
            body = _clean_ai_reply(result) if result else '🔴 無響應'
            await self.send_as_openclaw(
                text=f"🦞 <b>OpenClaw</b>\n\n{body}",
                reply_to_message_id=message_id,
            )

        elif mention_nemo and not mention_openclaw:
            # 只 NemoClaw 回應
            result = await chat_mgr._call_nemotron(
                f"{context}\n用戶 {username} 在 SRE 戰情室問你：",
                clean_text,
            )
            body = (_clean_ai_reply(result) if result else '') or '🔴 無響應 (deepseek-r1 超時或思考截斷)'
            await self.send_as_nemotron(
                text=f"🤖 <b>NemoClaw</b>\n\n{body}",
                reply_to_message_id=message_id,
            )

        else:
            # 兩個 AI 並行回應，完成後互相評論
            oc_task = asyncio.create_task(
                chat_mgr._call_openclaw(f"{context}\n用戶 {username} 在 SRE 戰情室：", clean_text)
            )
            nemo_task = asyncio.create_task(
                chat_mgr._call_nemotron(f"{context}\n用戶 {username} 在 SRE 戰情室：", clean_text)
            )
            oc_result, nemo_result = await asyncio.gather(oc_task, nemo_task, return_exceptions=True)

            if oc_result and not isinstance(oc_result, Exception):
                await self.send_as_openclaw(
                    text=f"🦞 <b>OpenClaw</b>\n\n{_clean_ai_reply(oc_result)}",
                    reply_to_message_id=message_id,
                )

            if nemo_result and not isinstance(nemo_result, Exception):
                nemo_body = _clean_ai_reply(nemo_result) or "🔴 回覆清理後為空 (deepseek-r1 思考超時)"
                await self.send_as_nemotron(
                    text=f"🤖 <b>NemoClaw</b>\n\n{nemo_body}",
                    reply_to_message_id=message_id,
                )

        logger.info("group_message_handled", user_id=user_id, text=text[:50])

    async def _handle_group_command(self, cmd: str, _chat_id: int, message_id: int | None, full_text: str = "") -> None:
        """
        SRE 群組 Slash Commands (2026-04-03 ogt: 方案B)

        /status    → K8s Cluster 健康狀態
        /incidents → 活躍告警列表
        /cost      → 本月 AI 費用統計
        /pods      → 異常 Pod 列表
        /rag       → RAG 知識庫查詢 (ADR-067 Phase 33)
        /help      → 指令說明
        """
        from src.repositories.k8s_repository import get_k8s_repository
        from src.repositories.incident_repository import get_incident_repository
        from src.core.redis_client import get_redis
        from src.utils.timezone import now_taipei

        if cmd == "/status":
            try:
                k8s = get_k8s_repository()
                s = await k8s.get_pod_status_summary(namespace="awoooi-prod")
                running, total = s.get("running", 0), s.get("total", 0)
                problems = s.get("problem_pods", [])
                lines = ["<b>🖥 Cluster 狀態</b>", f"• Pods: {running}/{total} Running"]
                if problems:
                    lines.append(f"• 異常: {len(problems)} 個")
                    for p in problems[:5]:
                        lines.append(f"  ⚠️ {p}")
                else:
                    lines.append("• 全部正常 ✅")
                msg = "\n".join(lines)
            except Exception as e:
                msg = f"<b>🖥 Cluster 狀態</b>\n⚠️ 無法取得: {e}"
            await self.send_as_openclaw(text=msg, reply_to_message_id=message_id)

        elif cmd == "/incidents":
            try:
                repo = get_incident_repository()
                incidents = await repo.get_active()
                if incidents:
                    lines = ["<b>🚨 活躍告警</b>"]
                    for inc in incidents[:10]:
                        lines.append(f"• <code>{inc.incident_id}</code> SEV{inc.severity.value} — {inc.status.value}")
                    msg = "\n".join(lines)
                else:
                    msg = "<b>🚨 活躍告警</b>\n✅ 目前無告警"
            except Exception as e:
                msg = f"<b>🚨 活躍告警</b>\n⚠️ 無法取得: {e}"
            await self.send_as_openclaw(text=msg, reply_to_message_id=message_id)

        elif cmd == "/cost":
            redis = get_redis()
            month = now_taipei().strftime("%Y-%m")
            try:
                gemini_cost = float(await redis.get(f"gemini_cost:{month}") or 0)
                claude_cost = float(await redis.get(f"claude_cost:{month}") or 0)
                total = gemini_cost + claude_cost
                msg = (
                    f"<b>💰 {month} AI 費用統計</b>\n"
                    f"• 🦞 OpenClaw (Gemini Flash-Lite): <b>${gemini_cost:.4f}</b> / $10.00 上限\n"
                    f"• 🤖 NemoClaw (Claude Haiku 4.5): <b>${claude_cost:.4f}</b>\n"
                    f"• 合計: <b>${total:.4f}</b>"
                )
            except Exception as e:
                msg = f"<b>💰 費用統計</b>\n⚠️ 無法取得: {e}"
            await self.send_as_openclaw(text=msg, reply_to_message_id=message_id)

        elif cmd == "/pods":
            try:
                k8s = get_k8s_repository()
                s = await k8s.get_pod_status_summary(namespace="awoooi-prod")
                problems = s.get("problem_pods", [])
                if problems:
                    lines = [f"<b>⚠️ 異常 Pod ({len(problems)} 個)</b>"]
                    for p in problems[:15]:
                        lines.append(f"• <code>{p}</code>")
                    msg = "\n".join(lines)
                else:
                    msg = "<b>⚠️ 異常 Pod</b>\n✅ 全部 Pod 正常"
            except Exception as e:
                msg = f"<b>⚠️ 異常 Pod</b>\n⚠️ 無法取得: {e}"
            await self.send_as_openclaw(text=msg, reply_to_message_id=message_id)

        elif cmd == "/rag":
            # /rag <查詢內容> — RAG 知識庫語義查詢 (ADR-067 Phase 33)
            # 2026-04-10 Claude Sonnet 4.6 Asia/Taipei
            parts = full_text.split(None, 1)
            if len(parts) < 2 or not parts[1].strip():
                await self.send_as_openclaw(
                    text="<b>📚 RAG 知識庫查詢</b>\n用法: <code>/rag 你的問題</code>\n例如: <code>/rag 什麼是 ADR-067？</code>",
                    reply_to_message_id=message_id,
                )
                return
            question = parts[1].strip()
            await self.send_as_openclaw(
                text=f"<b>📚 查詢知識庫中...</b>\n<code>{question[:80]}</code>",
                reply_to_message_id=message_id,
            )
            try:
                from src.services.knowledge_rag_service import get_knowledge_rag_service
                svc = get_knowledge_rag_service()
                answer = await svc.query(question, top_k=5)
                msg = f"<b>📚 RAG 知識庫</b>\n<i>Q: {question[:80]}</i>\n\n{answer}"
            except Exception as e:
                logger.warning("rag_telegram_query_failed", error=str(e))
                msg = f"<b>📚 RAG 查詢失敗</b>\n{e}"
            await self.send_as_openclaw(text=msg, reply_to_message_id=message_id)

        elif cmd == "/help":
            msg = (
                "<b>🤖 SRE 戰情室指令</b>\n\n"
                "/status — 查詢 K8s Cluster 狀態\n"
                "/incidents — 列出活躍告警\n"
                "/cost — 查詢本月 AI 費用\n"
                "/pods — 列出異常 Pod\n"
                "/rag &lt;問題&gt; — 查詢 RAG 知識庫\n"
                "/help — 顯示此說明\n\n"
                "<b>對話方式:</b>\n"
                "• 直接輸入 → 小O + 小賀 同時回應\n"
                "• 小O 或 @OpenClawAwoooI_Bot → 只有 OpenClaw\n"
                "• 小賀 或 @NemoTronAwoooI_Bot → 只有 NemoClaw\n"
                "• Reply 某個 Bot 的訊息 → 只有那個 Bot 回應"
            )
            await self.send_as_openclaw(text=msg, reply_to_message_id=message_id)

        else:
            logger.debug("group_unknown_command", cmd=cmd)

    async def _send_chat_action(self, chat_id: int, action: str) -> None:
        """發送聊天狀態 (e.g., typing)"""
        if not self._http_client: return
        try:
            url = f"{self.api_url}/sendChatAction"
            await self._http_client.post(url, json={"chat_id": chat_id, "action": action})
        except: pass

    async def _notify_approval_result(
        self,
        message_id: int | None,
        incident_id: str,
        action: str,
        username: str,
        execution_triggered: bool,
    ) -> None:
        """
        2026-04-09 Claude Sonnet 4.6: 批准/拒絕後立即更新 Telegram 訊息狀態。

        策略：
        1. editMessageReplyMarkup — 移除批准/拒絕按鈕，保留資訊按鈕
        2. sendMessage reply_to → 在原訊息下方附加狀態行
        3. 如果 message_id 找不到，fallback 到 send_notification
        """
        import html as _html

        chat_id = self.alert_chat_id
        if not chat_id:
            return

        # 找到原始告警訊息 ID（優先 Redis，fallback DB）
        orig_msg_id = message_id
        if not orig_msg_id:
            try:
                redis = await get_redis()
                _val = await redis.get(f"tg_msg:{incident_id}")
                if _val:
                    orig_msg_id = int(_val)
                else:
                    # DB fallback
                    from src.services.approval_db import get_approval_service as _svc
                    _approvals = await _svc().get_all_approvals(incident_id=incident_id)
                    if _approvals and _approvals[0].telegram_message_id:
                        orig_msg_id = _approvals[0].telegram_message_id
            except Exception:
                pass

        if action == "approve":
            status_emoji = "✅"
            status_text = f"<b>已批准</b> by {_html.escape(username)}"
            # 2026-04-14 Claude Sonnet 4.6: 原「等待執行」誤導（實際沒有 gate 會卡住路徑）
            # 批准後一律顯示「執行中」，真實結果由 _push_execution_result_to_alert reply 補上
            suffix = "⚡ 執行中..."
        else:
            status_emoji = "❌"
            status_text = f"<b>已拒絕</b> by {_html.escape(username)}"
            suffix = ""

        status_line = f"{status_emoji} {status_text}　{suffix}".strip()

        if orig_msg_id:
            try:
                # 1. 移除批准/拒絕按鈕（只保留資訊按鈕列）
                info_buttons = [[
                    {"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
                    {"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
                ]]
                awooop_row = _awooop_runs_button_row(incident_id)
                if awooop_row:
                    info_buttons.append(awooop_row)
                await self._send_request(
                    "editMessageReplyMarkup",
                    {
                        "chat_id": chat_id,
                        "message_id": orig_msg_id,
                        "reply_markup": {"inline_keyboard": info_buttons},
                    },
                )
            except Exception as _e:
                # 2026-04-09 Claude Sonnet 4.6: I3 架構Review修復 — 加 warning 防止靜默失敗
                logger.warning("notify_approval_edit_keyboard_failed", incident_id=incident_id, error=str(_e))

            try:
                # 2. 在原訊息下回覆狀態
                await self._send_request(
                    "sendMessage",
                    {
                        "chat_id": chat_id,
                        "text": status_line,
                        "parse_mode": "HTML",
                        "reply_to_message_id": orig_msg_id,
                    },
                )
                return
            except Exception as _e:
                logger.warning("notify_approval_reply_failed", incident_id=incident_id, error=str(_e))

        # fallback: 發新通知
        try:
            await self.send_alert_notification(status_line, parse_mode="HTML")
        except Exception as _e:
            logger.warning("notify_approval_fallback_failed", incident_id=incident_id, error=str(_e))

    async def _execute_approval_action(
        self,
        action: str,
        approval_id: str,
        user_id: int,
        username: str,
        message_id: int,  # noqa: ARG002
    ) -> None:
        """
        執行簽核動作 (更新資料庫)

        Args:
            action: approve/reject/tune
            approval_id: 簽核單 ID
            user_id: Telegram User ID
            username: 使用者名稱
            message_id: 訊息 ID
        """
        # 2026-03-29 ogt: 修復方法呼叫 - add_signature/reject 不存在
        # 正確方法: sign_approval / reject_approval
        from uuid import UUID

        from src.services.approval_db import get_approval_service

        try:
            service = get_approval_service()

            # approval_id 可能是 INC-xxx (incident_id) 格式，需查出真正的 UUID
            # 2026-04-06 Claude Code: decision_manager 傳入的是 incident.incident_id
            approval_uuid: UUID | None = None
            try:
                approval_uuid = UUID(approval_id)
            except ValueError:
                # 非 UUID 格式，嘗試用 incident_id 查出 pending approval
                pending_list = await service.get_all_approvals(incident_id=approval_id)
                if pending_list:
                    approval_uuid = UUID(pending_list[0].id) if isinstance(pending_list[0].id, str) else pending_list[0].id
                else:
                    logger.warning(
                        "telegram_approval_not_found_by_incident",
                        approval_id=approval_id,
                    )
                    return

            if action == "approve":
                approval, message, execution_triggered = await service.sign_approval(
                    approval_id=approval_uuid,
                    signer_id=f"tg_{user_id}",
                    signer_name=username,
                    comment="Telegram 簽核 (Long Polling)",
                )

                if approval:
                    from src.models.approval import ApprovalStatus
                    status_val = approval.status.value if hasattr(approval.status, "value") else str(approval.status)
                    logger.info(
                        "telegram_approval_signed_via_polling",
                        approval_id=approval_id,
                        user_id=user_id,
                        status=status_val,
                        execution_triggered=execution_triggered,
                    )
                    # 2026-04-22 Claude Sonnet 4.6: 只有真正轉為 APPROVED 才發「執行中...」
                    # 非 PENDING 狀態下 sign_approval early-return → approval 是舊 record
                    # 此時不應發「執行中...」，應告知用戶告警已處理過
                    if approval.status == ApprovalStatus.APPROVED:
                        # 2026-04-09 Claude Sonnet 4.6: 回應 Telegram — 更新訊息狀態 + answer callback
                        await self._notify_approval_result(
                            message_id=message_id,
                            incident_id=approval_id,
                            action="approve",
                            username=username,
                            execution_triggered=execution_triggered,
                        )
                    else:
                        # 告警已是 execution_failed / execution_success / rejected 等終態
                        try:
                            await self._send_request("sendMessage", {
                                "chat_id": self.alert_chat_id,
                                "text": f"ℹ️ 此告警已處理（狀態：{status_val}），無法重複批准 by @{username}",
                                "reply_to_message_id": message_id,
                            })
                        except Exception as _ne:
                            logger.warning("telegram_approval_already_resolved_notify_failed", error=str(_ne))
                        return

                    # ADR-073 修補 + 2026-04-14 Claude Sonnet 4.6 修復:
                    # 原本 gate 用 execution_triggered，race condition 時失效（樂觀鎖失敗）
                    # 改用 approval.status == APPROVED（與 REST API 路徑 approvals.py:360 對齊）
                    # 用 Redis lock exec:{approval_id} 防重入（REST + Telegram 同時簽核）
                    if approval.status == ApprovalStatus.APPROVED:
                        import asyncio

                        from src.core.redis_client import get_redis
                        from src.services.approval_execution import get_execution_service

                        _redis = get_redis()
                        _lock_key = f"exec:{approval.id}"
                        # SET NX EX 60 — 60s 內同一 approval 只能執行一次
                        _acquired = await _redis.set(_lock_key, "1", nx=True, ex=60)
                        if _acquired:
                            _exec_task = asyncio.create_task(
                                get_execution_service().execute_approved_action(approval)
                            )
                            _exec_task.add_done_callback(
                                lambda t: t.exception() if not t.cancelled() else None
                            )
                            logger.info(
                                "telegram_approval_execution_triggered",
                                approval_id=approval_id,
                                action=approval.action,
                                gate="status=APPROVED",
                            )
                        else:
                            logger.info(
                                "telegram_approval_execution_skipped_lock_held",
                                approval_id=approval_id,
                                reason="另一路徑 (REST/自動) 已取得 exec lock",
                            )

            elif action == "reject":
                approval, message = await service.reject_approval(
                    approval_id=approval_uuid,
                    rejector_id=f"tg_{user_id}",
                    rejector_name=username,
                    reason="Telegram 拒絕 (Long Polling)",
                )

                if approval:
                    logger.info(
                        "telegram_approval_rejected_via_polling",
                        approval_id=approval_id,
                        user_id=user_id,
                    )
                    # 2026-04-09 Claude Sonnet 4.6: 回應 Telegram — 更新訊息狀態
                    await self._notify_approval_result(
                        message_id=message_id,
                        incident_id=approval_id,
                        action="reject",
                        username=username,
                        execution_triggered=False,
                    )
                    try:
                        from src.services.incident_approval_service import (
                            get_incident_approval_service,
                        )

                        await get_incident_approval_service().on_approval_status_change(
                            approval_id=str(approval_uuid),
                            new_status="rejected",
                        )
                        logger.info(
                            "telegram_rejection_incident_synced_via_polling",
                            approval_id=str(approval_uuid),
                            incident_id=getattr(approval, "incident_id", None),
                        )
                    except Exception as _sync_e:
                        logger.warning(
                            "telegram_rejection_incident_sync_failed_via_polling",
                            approval_id=str(approval_uuid),
                            incident_id=getattr(approval, "incident_id", None),
                            error=str(_sync_e),
                        )

            elif action == "tune":
                logger.info(
                    "telegram_auto_tuning_via_polling",
                    approval_id=approval_id,
                    user_id=user_id,
                )

        except Exception as e:
            logger.error(
                "telegram_approval_action_failed",
                action=action,
                approval_id=approval_id,
                error=str(e),
            )


# =============================================================================
# Phase 6.5: 心跳監控方法
# =============================================================================

    async def _check_nemotron_health(self) -> tuple[bool, str]:
        """
        探測 Nemotron (NVIDIA NIM) 是否可用

        2026-04-03 ogt: 新增 — Nemotron 100% 超時但沒有告警，補足監控盲區
        Returns: (is_healthy, status_text)
        """
        import httpx
        from src.core.config import get_settings
        settings = get_settings()

        api_key = settings.NVIDIA_API_KEY
        if not api_key:
            return False, "❌ NVIDIA_API_KEY 未設定"

        # 2026-04-03 ogt: 用 /v1/models 輕量端點探測，避免觸發推理計費
        # timeout 改為 25s — NIM 免費 tier 冷啟動可能需要 15-20s
        try:
            async with httpx.AsyncClient(timeout=25.0) as client:
                resp = await client.get(
                    "https://integrate.api.nvidia.com/v1/models",
                    headers={"Authorization": f"Bearer {api_key}"},
                )
                if resp.status_code == 200:
                    return True, "✅ 正常"
                return False, f"❌ HTTP {resp.status_code}"
        except httpx.TimeoutException:
            return False, "⚠️ 超時 (>25s)"
        except Exception as e:
            return False, f"❌ {str(e)[:40]}"

    async def send_heartbeat(self) -> bool:
        """
        發送心跳報告到 SRE 戰情室群組

        ADR-073 重構 (2026-04-12 ogt):
        - Redis 分散式鎖：2 個 replica 只發一條
        - 並行探測所有服務（HeartbeatReportService）
        - 一條彙整報告發到 SRE_GROUP_CHAT_ID，不散發
        - 沉默告警整合進報告 warnings，不額外多發

        2026-04-15 ogt: 修復多 replica 重複發送 bug
        - 舊做法：RedisLock 在 async with 結束後立即 release，
          同 slot 另一 pod 若 10s 後 wake 可再次搶到鎖 → 重複發送
        - 新做法：slot-based key（heartbeat:slot:{slot_id}），
          SET NX EX interval_seconds，讓 key 自然過期；
          不主動 release，整個 slot 週期只有一個 pod 能寫入
        """
        try:
            if not self._initialized:
                await self.initialize()

            from src.core.redis_client import get_redis
            from src.services.heartbeat_report_service import (
                HeartbeatReportService,
                report_to_telegram_html,
            )

            # Slot-based 去重：每個 30min slot 只有第一個搶到的 replica 發送
            # key 自然過期（TTL = interval_seconds），不主動 release
            interval_seconds = 30 * 60
            slot_id = int(datetime.now(UTC).timestamp() / interval_seconds)
            slot_key = f"heartbeat:slot:{slot_id}"

            redis_client = get_redis()
            acquired = await redis_client.set(slot_key, "1", nx=True, ex=interval_seconds)
            if not acquired:
                logger.debug("heartbeat_skipped_slot_taken", slot_id=slot_id)
                return True

            report = await HeartbeatReportService().collect()

            # 2026-05-03 Claude Opus 4.7 + 統帥 ogt：P0 #4 heartbeat 噪音降頻
            # 鐵證：原本 30min/次 = 一天 48 條，統帥每天看相同內容 = 變相重複告警
            # 修法（不違反「監控工具必須被監控」鐵律）：
            #   健康（無 warnings）→ 6h 內最多 1 次「我活著」訊號
            #   有 warnings 跟上次相同 → 跳過（hash 對比）
            #   有 warnings 跟上次不同 → 立即推送（新狀況不漏）
            import hashlib
            SILENT_REPORT_INTERVAL_HOURS = 6
            WARNINGS_HASH_TTL = 24 * 3600
            silent_key = "heartbeat:silent_last_sent"
            warnings_hash_key = "heartbeat:warnings_hash"

            warnings_str = "|".join(sorted(report.warnings))
            warnings_hash = hashlib.md5(warnings_str.encode()).hexdigest()[:12]

            if not report.warnings:
                # 健康狀態：6h 1 次「我活著」訊號
                if await redis_client.exists(silent_key):
                    logger.debug(
                        "telegram_heartbeat_skipped_silent_recent",
                        slot_id=slot_id,
                    )
                    return True
                await redis_client.setex(
                    silent_key, SILENT_REPORT_INTERVAL_HOURS * 3600, "1",
                )
                # 清掉舊的 warnings hash（從有事 → 健康，下次有事要立即推）
                await redis_client.delete(warnings_hash_key)
            else:
                # 有事：跟上次同 hash 跳過
                last_hash_raw = await redis_client.get(warnings_hash_key)
                last_hash = (
                    last_hash_raw.decode() if isinstance(last_hash_raw, bytes)
                    else last_hash_raw
                )
                if last_hash == warnings_hash:
                    logger.debug(
                        "telegram_heartbeat_skipped_warnings_unchanged",
                        slot_id=slot_id,
                        warnings_hash=warnings_hash,
                    )
                    return True
                await redis_client.setex(
                    warnings_hash_key, WARNINGS_HASH_TTL, warnings_hash,
                )
                # 清掉 silent marker（從健康 → 有事，下次健康要過 6h 才再推）
                await redis_client.delete(silent_key)

            text = report_to_telegram_html(report)

            # 只發到 SRE 戰情室群組
            if settings.SRE_GROUP_CHAT_ID:
                await self.send_to_group(text=text)
            else:
                # SRE_GROUP_CHAT_ID 未注入時，fallback 到個人頻道並加警告
                fallback = (
                    "⚠️ <b>SRE_GROUP_CHAT_ID 未設定</b>，心跳報告暫發到個人頻道\n\n"
                    + text
                )
                await self.send_notification(fallback)

            self._last_message_time = datetime.now(UTC)
            logger.info(
                "telegram_heartbeat_sent",
                warnings=len(report.warnings),
                warnings_hash=warnings_hash,
                has_sre_group=bool(settings.SRE_GROUP_CHAT_ID),
            )

            return True

        except Exception as e:
            logger.error("telegram_heartbeat_failed", error=str(e))
            return False

    async def start_heartbeat_monitor(
        self,
        heartbeat_interval_minutes: int = 30,
        silence_threshold_hours: int = 2,
    ) -> None:
        """
        啟動心跳監控背景任務

        Args:
            heartbeat_interval_minutes: 心跳間隔 (預設 30 分鐘)
            silence_threshold_hours: 沉默告警閾值 (預設 2 小時)
        """
        if self._heartbeat_active:
            logger.warning("telegram_heartbeat_already_running")
            return

        self._heartbeat_active = True
        self._heartbeat_task = asyncio.create_task(
            self._heartbeat_loop(heartbeat_interval_minutes, silence_threshold_hours)
        )

        logger.info(
            "telegram_heartbeat_monitor_started",
            interval_minutes=heartbeat_interval_minutes,
            silence_threshold_hours=silence_threshold_hours,
        )

    async def _heartbeat_loop(
        self,
        interval_minutes: int,
        _silence_hours: int,  # 保留參數簽名相容性，沉默判斷已整合進 HeartbeatReport.warnings
    ) -> None:
        """
        心跳監控循環

        ADR-073 重構 (2026-04-12 ogt):
        - 移除額外沉默告警多發邏輯（已整合進 HeartbeatReport.warnings）
        - send_heartbeat() 內部有 RedisLock，2 個 replica 各自跑 loop 也只發一條
        """
        interval_seconds = interval_minutes * 60

        # 對齊到下一個整點倍數（例如 interval=30 → 對齊到 :00 或 :30）
        # 避免多 replica 因啟動時間不同而各自發送
        now_ts = datetime.now(UTC).timestamp()
        next_slot = (int(now_ts / interval_seconds) + 1) * interval_seconds
        wait_seconds = next_slot - now_ts
        try:
            await asyncio.sleep(wait_seconds)
        except asyncio.CancelledError:
            return

        while self._heartbeat_active:
            try:
                await self.send_heartbeat()
                await asyncio.sleep(interval_seconds)
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error("telegram_heartbeat_loop_error", error=str(e))
                await asyncio.sleep(60)

    async def stop_heartbeat_monitor(self) -> None:
        """停止心跳監控"""
        self._heartbeat_active = False
        if self._heartbeat_task and not self._heartbeat_task.done():
            self._heartbeat_task.cancel()
            try:
                await self._heartbeat_task
            except asyncio.CancelledError:
                pass
        self._heartbeat_task = None
        logger.info("telegram_heartbeat_monitor_stopped")


# =============================================================================
# Singleton
# =============================================================================

_gateway: TelegramGateway | None = None


def get_telegram_gateway() -> TelegramGateway:
    """取得全域 TelegramGateway 實例"""
    global _gateway
    if _gateway is None:
        _gateway = TelegramGateway()
    return _gateway