feat(dispatch): B2 LLM 動態 MCP 派發安全閘 + telegram_gateway LLM 按鈕流程

ADR-082 §B2：dispatch_llm_action() 風險閘控 + allowlist + 模板渲染 23 tests pass Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 15:22:31 +08:00
parent 92a5d94382
commit ea23972f7a
4 changed files with 895 additions and 9 deletions
--- a/apps/api/src/services/callback_dispatcher.py
+++ b/apps/api/src/services/callback_dispatcher.py
@@ -404,3 +404,165 @@ def _format_reply(
            text += "...\n<i>(已截斷)</i>"
        return f"{header}\n<pre>{text}</pre>"
    return f"{header}\n{mcp_result}"
+
+
+# =============================================================================
+# B2: LLM Dynamic Action Dispatcher
+# 2026-04-27 Claude Sonnet 4.6: B2 — dispatch_llm_action()
+# 支援 RecommendedAction 結構化動作的風險閘控 + allowlist 驗證 + 模板渲染
+# ADR-082 §B2：LLM 動態 MCP 規格派發安全閘
+# =============================================================================
+
+import re as _re
+
+
+def _render_llm_params(params: dict[str, str], context: dict) -> dict[str, str]:
+    """
+    渲染 RecommendedAction.params 模板。
+
+    支援兩個命名空間：
+    - {labels.xxx}   → context["labels"]["xxx"]
+    - {context.xxx}  → context["xxx"]（如 context.incident_id）
+    - {incident_id}  → context["incident_id"]（舊式相容）
+
+    渲染失敗的 key → 保留原始字串，不 crash。
+    """
+    def _repl(m: _re.Match) -> str:
+        key = m.group(1)
+        parts = key.split(".", 1)
+        try:
+            if parts[0] == "labels" and len(parts) == 2:
+                val = (context.get("labels") or {}).get(parts[1])
+                return str(val) if val is not None else m.group(0)
+            if parts[0] == "context" and len(parts) == 2:
+                val = context.get(parts[1])
+                return str(val) if val is not None else m.group(0)
+            # 舊式：直接 top-level key（如 {incident_id}）
+            val = context.get(key)
+            return str(val) if val is not None else m.group(0)
+        except Exception:
+            return m.group(0)
+
+    rendered: dict[str, str] = {}
+    for k, v in params.items():
+        if isinstance(v, str) and "{" in v:
+            try:
+                rendered[k] = _re.sub(r"\{([a-zA-Z0-9_.]+)\}", _repl, v)
+            except Exception:
+                rendered[k] = v
+        else:
+            rendered[k] = v
+    return rendered
+
+
+def _load_llm_tool_registry() -> dict[str, dict]:
+    """
+    Lazy import _load_mcp_tool_registry from solver_agent，避免 circular import。
+    失敗時返回 {} 並 log warning（不 crash）。
+    """
+    try:
+        from src.agents.solver_agent import _load_mcp_tool_registry  # noqa: PLC0415
+        return _load_mcp_tool_registry()
+    except Exception as exc:
+        logger.warning("llm_dispatch_registry_load_failed", error=str(exc))
+        return {}
+
+
+def dispatch_llm_action(
+    action: Any,
+    context: dict,
+) -> dict:
+    """
+    B2: LLM 動態 MCP 規格派發閘控器
+
+    安全層次（依序執行）：
+    1. Risk Gating  — critical 直接拒絕；high 需要 confirmed=True
+    2. Allowlist    — mcp_tool 必須在 registry 中
+    3. Params 渲染  — 支援 {labels.xxx} / {context.xxx} / {incident_id}
+    4. Nonce 生成   — medium/high 允許執行時附帶 nonce
+
+    Args:
+        action:  RecommendedAction dataclass（來自 solver_agent B1 輸出）
+        context: 執行上下文 dict（含 labels / incident_id / confirmed 等）
+
+    Returns:
+        dict — ok=True 為允許執行，ok=False 附 reason 拒絕原因
+    """
+    import time as _time  # noqa: PLC0415
+
+    risk: str = getattr(action, "risk", "medium")
+    mcp_tool: str = getattr(action, "mcp_tool", "")
+    mcp_provider: str = getattr(action, "mcp_provider", "")
+    name: str = getattr(action, "name", "")
+    params: dict = dict(getattr(action, "params", {}) or {})
+
+    # ── 1. Risk Gating ────────────────────────────────────────────────────────
+
+    if risk == "critical":
+        logger.warning(
+            "llm_dispatch_critical_rejected",
+            mcp_tool=mcp_tool,
+            name=name,
+            incident_id=context.get("incident_id"),
+        )
+        return {"ok": False, "reason": "critical_action_rejected"}
+
+    if risk == "high":
+        if not context.get("confirmed"):
+            nonce = (
+                f"{mcp_tool}:{name}:{context.get('incident_id', '?')}:{int(_time.time())}"
+            )
+            logger.info(
+                "llm_dispatch_high_risk_pending",
+                mcp_tool=mcp_tool,
+                name=name,
+                incident_id=context.get("incident_id"),
+            )
+            return {
+                "ok": False,
+                "reason": "high_risk_requires_confirmation",
+                "nonce": nonce,
+            }
+
+    # ── 2. Allowlist 驗證 ─────────────────────────────────────────────────────
+
+    registry = _load_llm_tool_registry()
+    if mcp_tool not in registry:
+        logger.warning(
+            "llm_dispatch_tool_not_in_registry",
+            mcp_tool=mcp_tool,
+            registry_keys=list(registry.keys()),
+        )
+        return {"ok": False, "reason": "tool_not_in_registry"}
+
+    # ── 3. Params 模板渲染 ────────────────────────────────────────────────────
+
+    rendered_params = _render_llm_params(params, context)
+
+    # ── 4. Nonce 生成（medium/high 允許時） ───────────────────────────────────
+
+    nonce: str | None = None
+    if risk in ("medium", "high"):
+        nonce = (
+            f"{mcp_tool}:{name}:{context.get('incident_id', '?')}:{int(_time.time())}"
+        )
+
+    logger.info(
+        "llm_dispatch_allowed",
+        mcp_tool=mcp_tool,
+        mcp_provider=mcp_provider,
+        name=name,
+        risk=risk,
+        incident_id=context.get("incident_id"),
+        has_nonce=nonce is not None,
+    )
+
+    return {
+        "ok": True,
+        "mcp_provider": mcp_provider,
+        "mcp_tool": mcp_tool,
+        "params": rendered_params,
+        "risk": risk,
+        "nonce": nonce,
+        "button_source": "llm",
+    }
--- a/apps/api/src/services/telegram_gateway.py
+++ b/apps/api/src/services/telegram_gateway.py
@@ -59,6 +59,11 @@ POLLING_LEADER_WATCH = 30  # seconds - 非 Leader Pod 每 30s 嘗試接管

 logger = structlog.get_logger(__name__)

+# 2026-04-27 Claude Sonnet 4.6: B3 — LLM 動態 Telegram 按鈕 Feature Flag
+# true  → 優先使用 ActionPlan.recommended_actions 動態生成按鈕
+# false → 維持現有 callback_action_spec.yaml 路徑（預設，向下相容）
+USE_LLM_DYNAMIC_BUTTONS = os.environ.get("USE_LLM_DYNAMIC_BUTTONS", "false").lower() == "true"
+
 # =============================================================================
 # OTEL Tracer (Phase C P1 可觀測性)
 # 2026-03-30 Claude Code: 新增 Telegram Gateway 追蹤
@@ -1431,12 +1436,15 @@ class TelegramGateway:
        # ADR-071-E: TYPE-3 動態按鈕 (2026-04-11 Claude Sonnet 4.6)
        alert_category: str = "",
        notification_type: str = "",
+        # 2026-04-27 Claude Sonnet 4.6: B3 — LLM 動態按鈕（ActionPlan，可選）
+        action_plan: object = None,
    ) -> dict:
        """
        建立 Inline Keyboard

        ADR-050 v2.0 (2026-04-01): 六鍵佈局
        ADR-071-E (2026-04-11): TYPE-3 依 alert_category 動態組合操作按鈕
+        ADR-082 B3 (2026-04-27): USE_LLM_DYNAMIC_BUTTONS → 優先使用 Solver LLM 動態按鈕

        TYPE-3 按鈕對應 alert_category:
          k8s_workload  → [重啟] [擴容] [縮容] [回滾]
@@ -1455,7 +1463,44 @@ class TelegramGateway:
            incident_id: 關聯 Incident ID (用於 detail/reanalyze/history 按鈕)
            alert_category: 告警類別 (ADR-071-E: 決定 TYPE-3 按鈕組合)
            notification_type: 通知類型 (TYPE-1/2/3/4/4D)
+            action_plan: ActionPlan dataclass（B3: 有值且 USE_LLM_DYNAMIC_BUTTONS=true 時走 LLM 路徑）
        """
+        # 產生 Nonce (防重放，用於寫操作)
+        approve_nonce = self._security.generate_callback_nonce(approval_id, "approve")
+        reject_nonce = self._security.generate_callback_nonce(approval_id, "reject")
+        silence_nonce = self._security.generate_callback_nonce(approval_id, "silence")
+
+        # 第一排永遠置頂（HARD RULE，任何路徑不得改動）
+        first_row: list[dict] = [
+            {"text": "✅ 批准", "callback_data": approve_nonce},
+            {"text": "❌ 拒絕", "callback_data": reject_nonce},
+        ]
+
+        # ── B3: LLM 動態路徑 ─────────────────────────────────────────────────
+        # 2026-04-27 Claude Sonnet 4.6: B3 — USE_LLM_DYNAMIC_BUTTONS=true 且
+        # action_plan.recommended_actions 非空時走此路徑，否則 fallback 到 YAML。
+        _llm_actions = (
+            getattr(action_plan, "recommended_actions", None)
+            if action_plan is not None
+            else None
+        )
+        if USE_LLM_DYNAMIC_BUTTONS and _llm_actions:
+            llm_rows = self._build_llm_action_buttons(_llm_actions)
+            buttons: list[list[dict]] = [first_row] + llm_rows
+            logger.info(
+                "telegram_keyboard_built",
+                source="llm",
+                action_count=len(_llm_actions),
+            )
+
+            # 自動調優按鈕 (v7.0)
+            if include_auto_tuning and auto_tuning_command:
+                tuning_nonce = self._security.generate_callback_nonce(approval_id, "tune")
+                buttons.append([{"text": "⚡ 執行自動調優", "callback_data": tuning_nonce}])
+
+            return {"inline_keyboard": buttons}
+
+        # ── YAML Fallback 路徑（原有邏輯，不改動任何行為）────────────────────
        # 2026-04-14 Claude Sonnet 4.6 (Phase 5 Sprint 5.4):
        # 從 callback_action_spec registry 動態產生按鈕（原 _CATEGORY_BUTTONS hardcode 已下架）
        # 優點：新增按鈕只需改 yaml，callback_data 格式由 spec.callback_format 決定
@@ -1477,11 +1522,6 @@ class TelegramGateway:
                btns.append((emoji_label, cb))
            return btns

-        # 產生 Nonce (防重放，用於寫操作)
-        approve_nonce = self._security.generate_callback_nonce(approval_id, "approve")
-        reject_nonce = self._security.generate_callback_nonce(approval_id, "reject")
-        silence_nonce = self._security.generate_callback_nonce(approval_id, "silence")
-
        is_type3 = notification_type in ("TYPE-3", NotificationType.TYPE_3, "")

        _dynamic_buttons = _build_category_buttons_for(alert_category) if alert_category else []
@@ -1491,10 +1531,7 @@ class TelegramGateway:
            # 2026-04-17 ogt + Claude Sonnet 4.6 (BUG-C): 強制置頂批准/拒絕
            # 舊：批准/拒絕列在最後且受 requires_human_approval 控制 → K8s 按鈕蓋台 → 死卡
            # 新：[批准][拒絕] 永遠第一行，K8s 類別按鈕置後，SRE 第一眼就看到審核扳機
-            rows: list[list[dict]] = [[
-                {"text": "✅ 批准", "callback_data": approve_nonce},
-                {"text": "❌ 拒絕", "callback_data": reject_nonce},
-            ]]
+            rows: list[list[dict]] = [first_row]
            # K8s/DB/Host 等類別操作按鈕（每行最多 3 個）置於第二列以後
            category_btns = [
                {"text": text, "callback_data": cb_data}
@@ -1524,6 +1561,12 @@ class TelegramGateway:
                    {"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
                ])

+        logger.info(
+            "telegram_keyboard_built",
+            source="yaml_fallback",
+            action_count=len(_dynamic_buttons),
+        )
+
        # 自動調優按鈕 (v7.0)
        if include_auto_tuning and auto_tuning_command:
            tuning_nonce = self._security.generate_callback_nonce(approval_id, "tune")
@@ -1533,6 +1576,64 @@ class TelegramGateway:

        return {"inline_keyboard": buttons}

+    @staticmethod
+    def _build_llm_action_buttons(
+        actions: list,
+    ) -> list[list[dict]]:
+        """
+        2026-04-27 Claude Sonnet 4.6: B3 — 從 RecommendedAction list 建立 Telegram 按鈕排
+
+        規格：
+        - 每個 RecommendedAction → 一個按鈕
+        - text = f"{action.emoji} {action.label}"（risk=high 前綴 ⚠️）
+        - callback_data = JSON {"t":"llm_action","name":..,"provider":..,"tool":..}（限 64 bytes）
+        - 每排最多 2 個（同 YAML fallback 排版）
+        - 不包含第一排 [批准][拒絕]（由呼叫方負責置頂）
+
+        Args:
+            actions: list[RecommendedAction]
+
+        Returns:
+            list[list[dict]]  — 按鈕行列（不含第一排）
+        """
+        import json
+
+        btn_list: list[dict] = []
+        for action in actions:
+            name: str = getattr(action, "name", "")
+            label: str = getattr(action, "label", "")
+            emoji: str = getattr(action, "emoji", "")
+            provider: str = getattr(action, "mcp_provider", "")
+            tool: str = getattr(action, "mcp_tool", "")
+            risk: str = getattr(action, "risk", "low")
+
+            # risk=high 前綴 ⚠️ 警示
+            prefix = "⚠️ " if risk == "high" else ""
+            text = f"{prefix}{emoji} {label}".strip()
+
+            # callback_data JSON，限 64 bytes（Telegram 上限）
+            # 使用縮短 key：t=la(llm_action), n=name, p=provider, tl=tool
+            # 縮短後框架約 47 bytes，留 ~17 bytes 給 name
+            cb_payload = {"t": "la", "n": "", "p": provider, "tl": tool}
+            frame_bytes = len(
+                json.dumps(cb_payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
+            )
+            available = max(0, 64 - frame_bytes)
+            if len(name.encode("utf-8")) <= available:
+                truncated_name = name
+            else:
+                # 按 UTF-8 bytes 截斷（中文字可能多 bytes）
+                encoded = name.encode("utf-8")[:available]
+                truncated_name = encoded.decode("utf-8", errors="ignore")
+            cb_payload["n"] = truncated_name
+            cb_str = json.dumps(cb_payload, ensure_ascii=False, separators=(",", ":"))
+
+            btn_list.append({"text": text, "callback_data": cb_str})
+
+        # 每排最多 2 個
+        rows: list[list[dict]] = [btn_list[i:i+2] for i in range(0, len(btn_list), 2)]
+        return rows
+
    async def send_analyzing_placeholder(
        self,
        alert_type: str,
--- a/apps/api/tests/test_callback_dispatcher_llm.py
+++ b/apps/api/tests/test_callback_dispatcher_llm.py
@@ -0,0 +1,289 @@
+"""
+B2: dispatch_llm_action 單元測試
+==================================
+2026-04-27 Claude Sonnet 4.6: B2 — LLM 動態 MCP 規格派發閘控
+
+覆蓋：
+1. critical risk 被拒
+2. high risk 無 confirmed → 拒（附 nonce）
+3. high risk + confirmed=True → 允許
+4. low / medium → 直接允許
+5. mcp_tool 不在 registry → 拒
+6. params 模板渲染（{labels.instance} / {context.incident_id}）
+7. 渲染失敗時不 crash
+
+🔴 遵循「禁止 Mock 測試鐵律」: 不 mock registry，使用真實 YAML 或
+   stub registry patch（僅 _load_llm_tool_registry 以 monkeypatch 替換，
+   原因：solver_agent 有 prometheus metric 依賴，在 pure unit test 環境無法
+   正常 import；stub 清單來自真實 YAML 中已知 action 名稱）。
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+import pytest
+
+from src.services.callback_dispatcher import (
+    dispatch_llm_action,
+    _render_llm_params,
+)
+
+
+# =============================================================================
+# Fixture: stub RecommendedAction（避免 import solver_agent metrics 依賴）
+# =============================================================================
+
+@dataclass
+class _StubAction:
+    """最小化 RecommendedAction stub，欄位與 protocol.py 完全對齊。"""
+    name: str
+    label: str
+    emoji: str
+    mcp_provider: str
+    mcp_tool: str
+    params: dict[str, str]
+    risk: Literal["low", "medium", "high", "critical"]
+    reasoning: str = ""
+
+
+# =============================================================================
+# Fixture: stub registry（monkeypatch _load_llm_tool_registry）
+# =============================================================================
+
+_STUB_REGISTRY: dict[str, dict] = {
+    "check_pod_logs": {
+        "provider": "k8s",
+        "tool": "check_pod_logs",
+        "risk": "low",
+        "label": "查 Pod 日誌",
+        "emoji": "📋",
+    },
+    "restart_deployment": {
+        "provider": "k8s",
+        "tool": "restart_deployment",
+        "risk": "medium",
+        "label": "重啟 Deployment",
+        "emoji": "🔄",
+    },
+    "drain_node": {
+        "provider": "k8s",
+        "tool": "drain_node",
+        "risk": "high",
+        "label": "排空節點",
+        "emoji": "⚠️",
+    },
+}
+
+
+@pytest.fixture(autouse=True)
+def _patch_registry(monkeypatch):
+    """將 _load_llm_tool_registry 替換為 stub，隔離 prometheus metric 依賴。"""
+    import src.services.callback_dispatcher as mod
+    monkeypatch.setattr(mod, "_load_llm_tool_registry", lambda: _STUB_REGISTRY)
+
+
+# =============================================================================
+# 測試 1: critical risk 直接拒絕
+# =============================================================================
+
+def test_critical_risk_rejected():
+    action = _StubAction(
+        name="nuke_cluster",
+        label="核爆",
+        emoji="💣",
+        mcp_provider="k8s",
+        mcp_tool="delete_all_pods",
+        params={},
+        risk="critical",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-001"})
+    assert result["ok"] is False
+    assert result["reason"] == "critical_action_rejected"
+
+
+# =============================================================================
+# 測試 2: high risk 無 confirmed → 拒，附 nonce
+# =============================================================================
+
+def test_high_risk_no_confirmed_rejected_with_nonce():
+    action = _StubAction(
+        name="drain_node",
+        label="排空節點",
+        emoji="⚠️",
+        mcp_provider="k8s",
+        mcp_tool="drain_node",
+        params={},
+        risk="high",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-002"})
+    assert result["ok"] is False
+    assert result["reason"] == "high_risk_requires_confirmation"
+    assert "nonce" in result
+    assert isinstance(result["nonce"], str)
+    assert len(result["nonce"]) > 0
+
+
+# =============================================================================
+# 測試 3: high risk + confirmed=True → 允許
+# =============================================================================
+
+def test_high_risk_with_confirmed_allowed():
+    action = _StubAction(
+        name="drain_node",
+        label="排空節點",
+        emoji="⚠️",
+        mcp_provider="k8s",
+        mcp_tool="drain_node",
+        params={"node": "worker-1"},
+        risk="high",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-003", "confirmed": True})
+    assert result["ok"] is True
+    assert result["mcp_tool"] == "drain_node"
+    assert result["button_source"] == "llm"
+    # high risk 允許後也應有 nonce
+    assert result["nonce"] is not None
+
+
+# =============================================================================
+# 測試 4a: low risk 直接允許，無 nonce
+# =============================================================================
+
+def test_low_risk_allowed_no_nonce():
+    action = _StubAction(
+        name="check_pod_logs",
+        label="查 Pod 日誌",
+        emoji="📋",
+        mcp_provider="k8s",
+        mcp_tool="check_pod_logs",
+        params={},
+        risk="low",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-004"})
+    assert result["ok"] is True
+    assert result["nonce"] is None
+    assert result["button_source"] == "llm"
+
+
+# =============================================================================
+# 測試 4b: medium risk 直接允許，附 nonce
+# =============================================================================
+
+def test_medium_risk_allowed_with_nonce():
+    action = _StubAction(
+        name="restart_deployment",
+        label="重啟 Deployment",
+        emoji="🔄",
+        mcp_provider="k8s",
+        mcp_tool="restart_deployment",
+        params={},
+        risk="medium",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-005"})
+    assert result["ok"] is True
+    assert result["nonce"] is not None
+    assert result["risk"] == "medium"
+
+
+# =============================================================================
+# 測試 5: mcp_tool 不在 registry → 拒
+# =============================================================================
+
+def test_tool_not_in_registry_rejected():
+    action = _StubAction(
+        name="mystery_action",
+        label="神秘動作",
+        emoji="❓",
+        mcp_provider="k8s",
+        mcp_tool="non_existent_tool",
+        params={},
+        risk="low",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-006"})
+    assert result["ok"] is False
+    assert result["reason"] == "tool_not_in_registry"
+
+
+# =============================================================================
+# 測試 6: params 模板渲染
+# =============================================================================
+
+def test_params_template_rendering():
+    """{labels.instance} 和 {context.incident_id} 都應正確渲染。"""
+    action = _StubAction(
+        name="check_pod_logs",
+        label="查 Pod 日誌",
+        emoji="📋",
+        mcp_provider="k8s",
+        mcp_tool="check_pod_logs",
+        params={
+            "host": "{labels.instance}",
+            "namespace": "{labels.namespace}",
+            "incident": "{context.incident_id}",
+            "raw_id": "{incident_id}",
+        },
+        risk="low",
+    )
+    context = {
+        "incident_id": "INC-007",
+        "labels": {
+            "instance": "192.168.0.110",
+            "namespace": "production",
+        },
+    }
+    result = dispatch_llm_action(action, context)
+    assert result["ok"] is True
+    assert result["params"]["host"] == "192.168.0.110"
+    assert result["params"]["namespace"] == "production"
+    assert result["params"]["incident"] == "INC-007"
+    assert result["params"]["raw_id"] == "INC-007"
+
+
+# =============================================================================
+# 測試 7: 渲染失敗時不 crash，保留原始字串
+# =============================================================================
+
+def test_params_render_failure_keeps_original():
+    """找不到的 key → 保留 {xxx} 原始字串，不 crash。"""
+    action = _StubAction(
+        name="check_pod_logs",
+        label="查 Pod 日誌",
+        emoji="📋",
+        mcp_provider="k8s",
+        mcp_tool="check_pod_logs",
+        params={
+            "host": "{labels.nonexistent_key}",
+            "static": "no_template",
+        },
+        risk="low",
+    )
+    result = dispatch_llm_action(action, {"incident_id": "INC-008", "labels": {}})
+    assert result["ok"] is True
+    # 找不到的 key 保留原始模板字串
+    assert result["params"]["host"] == "{labels.nonexistent_key}"
+    # 靜態值不變
+    assert result["params"]["static"] == "no_template"
+
+
+# =============================================================================
+# 測試 8: _render_llm_params 單元測試（直接測渲染函數）
+# =============================================================================
+
+def test_render_llm_params_direct():
+    params = {
+        "a": "{labels.zone}",
+        "b": "{context.user_id}",
+        "c": "literal",
+        "d": "{labels.missing}",
+    }
+    context = {
+        "labels": {"zone": "ap-east-1"},
+        "user_id": "u42",
+    }
+    rendered = _render_llm_params(params, context)
+    assert rendered["a"] == "ap-east-1"
+    assert rendered["b"] == "u42"
+    assert rendered["c"] == "literal"
+    assert rendered["d"] == "{labels.missing}"  # 找不到 → 原始字串
--- a/apps/api/tests/test_telegram_gateway_llm_buttons.py
+++ b/apps/api/tests/test_telegram_gateway_llm_buttons.py
@@ -0,0 +1,334 @@
+"""
+B3: LLM 動態 Telegram 按鈕 — 單元測試
+=====================================
+2026-04-27 Claude Sonnet 4.6: ADR-082 B3
+
+測試範圍：
+1. USE_LLM_DYNAMIC_BUTTONS=false → 走 YAML 路徑（現有行為）
+2. USE_LLM_DYNAMIC_BUTTONS=true + recommended_actions 空 → 走 YAML 路徑
+3. USE_LLM_DYNAMIC_BUTTONS=true + recommended_actions 非空 → 走 LLM 路徑，button text 正確
+4. high risk action 前綴 ⚠️
+5. callback_data 是合法 JSON，t="llm_action"
+6. [批准][拒絕] 第一排永遠存在
+7. 超過 2 個 action → 多排
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass, field
+from typing import Literal
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# =============================================================================
+# 輕量 Stub：避免載入完整 telegram_gateway（需要 Redis / httpx / OTEL）
+# =============================================================================
+
+@dataclass
+class _RecommendedAction:
+    name: str
+    label: str
+    emoji: str
+    mcp_provider: str
+    mcp_tool: str
+    params: dict
+    risk: Literal["low", "medium", "high", "critical"]
+    reasoning: str = ""
+
+
+@dataclass
+class _ActionPlan:
+    recommended_actions: list[_RecommendedAction] = field(default_factory=list)
+
+
+# =============================================================================
+# 直接測試 _build_llm_action_buttons 靜態方法
+# =============================================================================
+
+def _import_builder():
+    """
+    延遲 import，確保環境變數先設定。
+    返回 TelegramGateway._build_llm_action_buttons 靜態方法。
+    """
+    # 使用 importlib 動態取得，避免頂層 import 觸發 settings 初始化
+    import importlib
+    import sys
+
+    # 清除快取以確保 env 設定生效
+    for mod_name in list(sys.modules.keys()):
+        if "telegram_gateway" in mod_name:
+            del sys.modules[mod_name]
+
+    mod = importlib.import_module("src.services.telegram_gateway")
+    return mod.TelegramGateway._build_llm_action_buttons, mod
+
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+@pytest.fixture
+def low_action():
+    return _RecommendedAction(
+        name="check_pod_logs",
+        label="查 Pod 日誌",
+        emoji="📋",
+        mcp_provider="k8s",
+        mcp_tool="get_pod_logs",
+        params={"pod": "{labels.pod}"},
+        risk="low",
+    )
+
+
+@pytest.fixture
+def high_action():
+    return _RecommendedAction(
+        name="restart_pod",
+        label="重啟 Pod",
+        emoji="🔄",
+        mcp_provider="k8s",
+        mcp_tool="restart_pod",
+        params={"pod": "{labels.pod}"},
+        risk="high",
+    )
+
+
+@pytest.fixture
+def medium_action():
+    return _RecommendedAction(
+        name="scale_deployment",
+        label="擴容",
+        emoji="📈",
+        mcp_provider="k8s",
+        mcp_tool="scale_deployment",
+        params={"replicas": "3"},
+        risk="medium",
+    )
+
+
+# =============================================================================
+# Test 1: _build_llm_action_buttons — 基本 text 格式
+# =============================================================================
+
+class TestBuildLlmActionButtons:
+    """直接測試靜態方法，不需要完整 gateway 初始化"""
+
+    def _get_builder(self):
+        from src.services.telegram_gateway import TelegramGateway
+        return TelegramGateway._build_llm_action_buttons
+
+    def test_low_risk_text_format(self, low_action):
+        builder = self._get_builder()
+        rows = builder([low_action])
+        assert len(rows) == 1
+        btn = rows[0][0]
+        assert btn["text"] == "📋 查 Pod 日誌"
+
+    def test_high_risk_prefix(self, high_action):
+        """Test 4: high risk 前綴 ⚠️"""
+        builder = self._get_builder()
+        rows = builder([high_action])
+        btn = rows[0][0]
+        assert btn["text"].startswith("⚠️")
+        assert "重啟 Pod" in btn["text"]
+
+    def test_medium_risk_no_prefix(self, medium_action):
+        builder = self._get_builder()
+        rows = builder([medium_action])
+        btn = rows[0][0]
+        assert not btn["text"].startswith("⚠️")
+
+    def test_callback_data_valid_json(self, low_action):
+        """Test 5: callback_data 是合法 JSON，t="la"（llm_action 縮寫），含必要欄位"""
+        builder = self._get_builder()
+        rows = builder([low_action])
+        cb = rows[0][0]["callback_data"]
+        payload = json.loads(cb)
+        # t="la" 是 llm_action 的縮寫，與 YAML callback type 不衝突
+        assert payload["t"] == "la"
+        # n=name, p=provider, tl=tool（縮短 key 以符合 64 bytes 限制）
+        assert "n" in payload
+        assert "p" in payload
+        assert "tl" in payload
+
+    def test_callback_data_within_64_bytes(self, low_action):
+        builder = self._get_builder()
+        rows = builder([low_action])
+        cb = rows[0][0]["callback_data"]
+        assert len(cb.encode("utf-8")) <= 64
+
+    def test_long_name_truncated_within_64_bytes(self):
+        """callback_data 超過 64 bytes 時自動截斷 name"""
+        action = _RecommendedAction(
+            name="a" * 50,
+            label="長名稱動作",
+            emoji="🔧",
+            mcp_provider="k8s",
+            mcp_tool="do_something",
+            params={},
+            risk="low",
+        )
+        from src.services.telegram_gateway import TelegramGateway
+        rows = TelegramGateway._build_llm_action_buttons([action])
+        cb = rows[0][0]["callback_data"]
+        assert len(cb.encode("utf-8")) <= 64
+        payload = json.loads(cb)
+        assert payload["t"] == "la"
+
+    def test_two_actions_same_row(self, low_action, medium_action):
+        """Test 7 前置：2 個 action → 同一排"""
+        builder = self._get_builder()
+        rows = builder([low_action, medium_action])
+        assert len(rows) == 1
+        assert len(rows[0]) == 2
+
+    def test_three_actions_two_rows(self, low_action, medium_action, high_action):
+        """Test 7: 超過 2 個 action → 多排（每排最多 2 個）"""
+        builder = self._get_builder()
+        rows = builder([low_action, medium_action, high_action])
+        assert len(rows) == 2
+        assert len(rows[0]) == 2
+        assert len(rows[1]) == 1
+
+    def test_empty_actions_returns_empty(self):
+        builder = self._get_builder()
+        rows = builder([])
+        assert rows == []
+
+
+# =============================================================================
+# Test 2-3, 6: _build_inline_keyboard 路徑切換
+# =============================================================================
+
+class TestBuildInlineKeyboardRouting:
+    """
+    測試 _build_inline_keyboard() 的路徑選擇邏輯。
+    用 MagicMock 模擬 security，避免初始化完整 gateway。
+    """
+
+    def _make_gateway_cls(self):
+        from src.services.telegram_gateway import TelegramGateway
+        return TelegramGateway
+
+    def _make_mock_gateway(self):
+        cls = self._make_gateway_cls()
+        gw = object.__new__(cls)
+        mock_security = MagicMock()
+        mock_security.generate_callback_nonce.side_effect = (
+            lambda approval_id, action: f"nonce:{approval_id}:{action}"
+        )
+        gw._security = mock_security
+        return gw
+
+    def _first_row_texts(self, keyboard: dict) -> list[str]:
+        return [btn["text"] for btn in keyboard["inline_keyboard"][0]]
+
+    # Test 1: flag=false → YAML 路徑（第一排還是批准/拒絕）
+    @patch("src.services.telegram_gateway.USE_LLM_DYNAMIC_BUTTONS", False)
+    @patch("src.services.callback_dispatcher.list_actions_for_category", return_value=[])
+    def test_flag_false_uses_yaml_path(self, mock_list, low_action):
+        gw = self._make_mock_gateway()
+        action_plan = _ActionPlan(recommended_actions=[low_action])
+        kb = gw._build_inline_keyboard(
+            approval_id="APR-001",
+            incident_id="INC-001",
+            action_plan=action_plan,
+        )
+        first_texts = self._first_row_texts(kb)
+        assert "✅ 批准" in first_texts
+        assert "❌ 拒絕" in first_texts
+        # LLM 按鈕不應出現（t="la" 代表 llm_action）
+        all_payloads_1 = [
+            json.loads(btn["callback_data"])
+            for row in kb["inline_keyboard"]
+            for btn in row
+            if btn["callback_data"].startswith("{")
+        ]
+        assert not any(p.get("t") == "la" for p in all_payloads_1)
+
+    # Test 2: flag=true + actions 空 → YAML fallback
+    @patch("src.services.telegram_gateway.USE_LLM_DYNAMIC_BUTTONS", True)
+    @patch("src.services.callback_dispatcher.list_actions_for_category", return_value=[])
+    def test_flag_true_empty_actions_uses_yaml(self, mock_list):
+        gw = self._make_mock_gateway()
+        action_plan = _ActionPlan(recommended_actions=[])
+        kb = gw._build_inline_keyboard(
+            approval_id="APR-002",
+            incident_id="INC-002",
+            action_plan=action_plan,
+        )
+        first_texts = self._first_row_texts(kb)
+        assert "✅ 批准" in first_texts
+        assert "❌ 拒絕" in first_texts
+        all_payloads_2 = [
+            json.loads(btn["callback_data"])
+            for row in kb["inline_keyboard"]
+            for btn in row
+            if btn["callback_data"].startswith("{")
+        ]
+        assert not any(p.get("t") == "la" for p in all_payloads_2)
+
+    # Test 2b: flag=true + action_plan=None → YAML fallback
+    @patch("src.services.telegram_gateway.USE_LLM_DYNAMIC_BUTTONS", True)
+    @patch("src.services.callback_dispatcher.list_actions_for_category", return_value=[])
+    def test_flag_true_no_action_plan_uses_yaml(self, mock_list):
+        gw = self._make_mock_gateway()
+        kb = gw._build_inline_keyboard(
+            approval_id="APR-003",
+            incident_id="INC-003",
+        )
+        first_texts = self._first_row_texts(kb)
+        assert "✅ 批准" in first_texts
+        assert "❌ 拒絕" in first_texts
+
+    # Test 3: flag=true + actions 非空 → LLM 路徑
+    @patch("src.services.telegram_gateway.USE_LLM_DYNAMIC_BUTTONS", True)
+    def test_flag_true_with_actions_uses_llm(self, low_action):
+        gw = self._make_mock_gateway()
+        action_plan = _ActionPlan(recommended_actions=[low_action])
+        kb = gw._build_inline_keyboard(
+            approval_id="APR-004",
+            incident_id="INC-004",
+            action_plan=action_plan,
+        )
+        # Test 6: 第一排永遠是 [批准][拒絕]
+        first_texts = self._first_row_texts(kb)
+        assert "✅ 批准" in first_texts
+        assert "❌ 拒絕" in first_texts
+
+        # LLM 按鈕在第二排以後（callback_data 含 "la" type）
+        all_payloads = [
+            json.loads(btn["callback_data"])
+            for row in kb["inline_keyboard"][1:]
+            for btn in row
+            if btn["callback_data"].startswith("{")
+        ]
+        assert any(p.get("t") == "la" for p in all_payloads)
+
+        # button text 正確
+        all_texts = [
+            btn["text"]
+            for row in kb["inline_keyboard"][1:]
+            for btn in row
+        ]
+        assert any("查 Pod 日誌" in t for t in all_texts)
+
+    # Test 6: 第一排永遠是 [批准][拒絕]（LLM 路徑）
+    @patch("src.services.telegram_gateway.USE_LLM_DYNAMIC_BUTTONS", True)
+    def test_approve_reject_always_first_row_in_llm_path(self, low_action, high_action, medium_action):
+        gw = self._make_mock_gateway()
+        action_plan = _ActionPlan(recommended_actions=[low_action, high_action, medium_action])
+        kb = gw._build_inline_keyboard(
+            approval_id="APR-005",
+            incident_id="INC-005",
+            action_plan=action_plan,
+        )
+        first_texts = self._first_row_texts(kb)
+        assert first_texts[0] == "✅ 批准"
+        assert first_texts[1] == "❌ 拒絕"
+        # 總排數 = 1 (approve/reject) + 2 (3 actions, 2 per row)
+        assert len(kb["inline_keyboard"]) == 3