feat(ai): ADR-036 NVIDIA Nemotron Tool Calling 整合

Phase 20 - 提升 Tool Calling 精準度 50% → 83.3% 新增: - src/models/nvidia.py: Pydantic Schema - src/services/nvidia_provider.py: NvidiaProvider 類別 - tests/test_nvidia_provider.py: 15 項單元測試 (全部通過) 修改: - ai_router.py: AIProvider.NVIDIA + route_tool_calling() - ai_rate_limiter.py: NVIDIA 限制 (5 RPM, 100/day) - models.json: NVIDIA 配置 - cd.yaml: Secrets 注入 NVIDIA_API_KEY 路由策略: - Tool Calling: Nemotron → Gemini → Claude - 一般對話: Ollama → Gemini → Claude (不變) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-29 00:00:08 +08:00
parent dc7daf5d81
commit b77e151387
11 changed files with 1083 additions and 16 deletions
--- a/.github/workflows/cd.yaml
+++ b/.github/workflows/cd.yaml
@@ -287,15 +287,18 @@ jobs:
              --from-literal=REDIS_URL="${{ secrets.REDIS_URL }}" \
              --from-literal=GEMINI_API_KEY="${{ secrets.GEMINI_API_KEY }}" \
              --from-literal=CLAUDE_API_KEY="${{ secrets.CLAUDE_API_KEY }}" \
+              --from-literal=NVIDIA_API_KEY="${{ secrets.NVIDIA_API_KEY }}" \
              --from-literal=WEBHOOK_HMAC_SECRET="${{ secrets.WEBHOOK_HMAC_SECRET }}" \
              --from-literal=SENTRY_DSN="${{ secrets.SENTRY_DSN }}"
          else
            echo "🔄 更新 awoooi-secrets..."
-            # 使用 patch 更新，確保 Telegram 配置永遠是最新的
+            # 使用 patch 更新，確保關鍵配置永遠是最新的
+            # 2026-03-29 ogt: ADR-036 新增 NVIDIA_API_KEY
            kubectl patch secret awoooi-secrets -n awoooi-prod --type='merge' -p="{
              \"stringData\": {
                \"OPENCLAW_TG_BOT_TOKEN\": \"${{ secrets.OPENCLAW_TG_BOT_TOKEN }}\",
-                \"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\"
+                \"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\",
+                \"NVIDIA_API_KEY\": \"${{ secrets.NVIDIA_API_KEY }}\"
              }
            }"
          fi
--- a/apps/api/models.json
+++ b/apps/api/models.json
@@ -1,12 +1,13 @@
 {
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "name": "OpenClaw AI Router Configuration",
-  "version": "1.0.0",
-  "description": "AI 模型路由與備援設定 (ADR-006)",
-  "updated_at": "2026-03-26",
+  "version": "1.1.0",
+  "description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)",
+  "updated_at": "2026-03-29",

  "default_provider": "ollama",
  "fallback_order": ["ollama", "gemini", "claude"],
+  "tool_calling_fallback_order": ["nvidia", "gemini", "claude"],

  "providers": {
    "ollama": {
@@ -101,6 +102,44 @@
        "tool_use": true,
        "structured_output": true
      }
+    },
+
+    "nvidia": {
+      "name": "NVIDIA Nemotron (ADR-036)",
+      "enabled": true,
+      "priority": 4,
+      "endpoint": "https://integrate.api.nvidia.com/v1",
+      "api_path": "/chat/completions",
+      "models": {
+        "default": "nvidia/llama-3.1-nemotron-70b-instruct",
+        "tool_calling": "nvidia/llama-3.1-nemotron-70b-instruct"
+      },
+      "options": {
+        "temperature": 0.0,
+        "max_tokens": 1024
+      },
+      "timeout_seconds": 60,
+      "cost": {
+        "per_1k_tokens": 0,
+        "currency": "USD",
+        "notes": "Free tier (2026-03-29)"
+      },
+      "auth": {
+        "type": "header",
+        "env_var": "NVIDIA_API_KEY",
+        "header_name": "Authorization",
+        "header_prefix": "Bearer "
+      },
+      "rate_limits": {
+        "daily_tokens": 50000,
+        "requests_per_minute": 5
+      },
+      "features": {
+        "tool_use": true,
+        "structured_output": true,
+        "tool_calling_accuracy": 0.833
+      },
+      "use_for": ["tool_calling"]
    }
  },

@@ -122,6 +161,14 @@
      "preferred_provider": "ollama",
      "fallback_enabled": false,
      "max_output_tokens": 500
+    },
+    "tool_calling": {
+      "description": "K8s Tool Calling operations (ADR-036)",
+      "preferred_provider": "nvidia",
+      "fallback_enabled": true,
+      "fallback_order": ["gemini", "claude"],
+      "required_features": ["tool_use"],
+      "notes": "Nemotron 83.3% accuracy for K8s operations"
    }
  },

--- a/apps/api/src/core/config.py
+++ b/apps/api/src/core/config.py
@@ -201,8 +201,9 @@ class Settings(BaseSettings):
    )

    # ==========================================================================
-    # AI Fallback Strategy (ADR-006)
+    # AI Fallback Strategy (ADR-006 v1.3 + ADR-036)
    # Order: Ollama (local) -> Gemini (cloud) -> Claude (cloud)
+    # Tool Calling: Nemotron (專用) -> Gemini -> Claude
    # ==========================================================================
    AI_FALLBACK_ORDER: list[str] = Field(
        default=["ollama", "gemini", "claude"],
@@ -210,6 +211,11 @@ class Settings(BaseSettings):
    )
    GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key")
    CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key")
+    # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
+    NVIDIA_API_KEY: str = Field(
+        default="",
+        description="NVIDIA NIM API key for Nemotron Tool Calling (ADR-036)",
+    )

    @field_validator("AI_FALLBACK_ORDER", mode="before")
    @classmethod
--- a/apps/api/src/models/init.py
+++ b/apps/api/src/models/init.py
@@ -6,6 +6,7 @@ AWOOOI Models Package
 - Approval: 簽核相關模型 (Phase 2 HITL)
 - Incident: 事件相關模型 (Phase 6 認知覺醒)
 - AI: AI 相關模型
+- NVIDIA: Nemotron Tool Calling 模型 (ADR-036)
 """

 # Approval Models (Phase 2)
@@ -39,6 +40,16 @@ from src.models.incident import (
    Signal,
 )

+# NVIDIA Models (ADR-036 - Nemotron Tool Calling)
+from src.models.nvidia import (
+    NvidiaProviderResult,
+    NvidiaResponse,
+    NvidiaUsage,
+    ToolCall,
+    ToolCallValidationResult,
+    ToolDefinition,
+)
+
 __all__ = [
    # Approval
    "ApprovalRequest",
@@ -65,4 +76,11 @@ __all__ = [
    "IncidentUpdate",
    "Severity",
    "Signal",
+    # NVIDIA (ADR-036)
+    "NvidiaProviderResult",
+    "NvidiaResponse",
+    "NvidiaUsage",
+    "ToolCall",
+    "ToolCallValidationResult",
+    "ToolDefinition",
 ]
--- a/apps/api/src/models/nvidia.py
+++ b/apps/api/src/models/nvidia.py
@@ -0,0 +1,119 @@
+"""
+NVIDIA Nemotron API Models - ADR-036
+====================================
+2026-03-29 ogt: Nemotron Tool Calling 整合 (83.3% 精準度)
+
+OpenAI 相容格式 - 用於 Tool Calling 任務
+"""
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class ToolFunction(BaseModel):
+    """Tool Function 定義"""
+
+    name: str = Field(..., description="Tool 函數名稱")
+    arguments: str = Field(..., description="Tool 參數 (JSON 字串)")
+
+
+class ToolCall(BaseModel):
+    """Tool Call 結構"""
+
+    id: str = Field(..., description="Tool Call ID")
+    type: str = Field(default="function", description="Tool 類型")
+    function: ToolFunction = Field(..., description="Tool 函數")
+
+
+class NvidiaMessage(BaseModel):
+    """NVIDIA API Message 結構"""
+
+    role: str = Field(..., description="訊息角色 (assistant/user/system)")
+    content: str | None = Field(default=None, description="訊息內容")
+    tool_calls: list[ToolCall] | None = Field(
+        default=None, description="Tool Calls (僅 assistant)"
+    )
+
+
+class NvidiaChoice(BaseModel):
+    """NVIDIA API Choice 結構"""
+
+    index: int = Field(default=0, description="選項索引")
+    message: NvidiaMessage = Field(..., description="回應訊息")
+    finish_reason: str | None = Field(
+        default=None, description="結束原因 (stop/tool_calls)"
+    )
+
+
+class NvidiaUsage(BaseModel):
+    """NVIDIA API Token 使用統計"""
+
+    prompt_tokens: int = Field(default=0, description="輸入 Token 數")
+    completion_tokens: int = Field(default=0, description="輸出 Token 數")
+    total_tokens: int = Field(default=0, description="總 Token 數")
+
+
+class NvidiaResponse(BaseModel):
+    """NVIDIA Nemotron API 完整回應"""
+
+    id: str = Field(..., description="回應 ID")
+    object: str = Field(default="chat.completion", description="物件類型")
+    created: int = Field(..., description="建立時間戳")
+    model: str = Field(..., description="模型名稱")
+    choices: list[NvidiaChoice] = Field(..., description="回應選項")
+    usage: NvidiaUsage | None = Field(default=None, description="Token 使用統計")
+
+
+# === Tool Calling 請求結構 ===
+
+
+class ToolDefinition(BaseModel):
+    """Tool 定義 (發送給 API)"""
+
+    type: str = Field(default="function", description="Tool 類型")
+    function: dict[str, Any] = Field(..., description="函數定義 (JSON Schema)")
+
+
+class NvidiaToolCallRequest(BaseModel):
+    """NVIDIA Tool Calling 請求"""
+
+    model: str = Field(
+        default="nvidia/llama-3.1-nemotron-70b-instruct",
+        description="模型名稱",
+    )
+    messages: list[dict[str, Any]] = Field(..., description="對話訊息")
+    tools: list[ToolDefinition] = Field(..., description="可用 Tools")
+    tool_choice: str | dict[str, Any] = Field(
+        default="auto", description="Tool 選擇策略"
+    )
+    temperature: float = Field(default=0.0, description="溫度 (0.0 最確定性)")
+    max_tokens: int = Field(default=1024, description="最大輸出 Token")
+
+
+# === 驗證結果結構 ===
+
+
+class ToolCallValidationResult(BaseModel):
+    """Tool Call 驗證結果"""
+
+    valid: bool = Field(..., description="是否有效")
+    tool_name: str | None = Field(default=None, description="Tool 名稱")
+    arguments: dict[str, Any] | None = Field(default=None, description="解析後參數")
+    error: str | None = Field(default=None, description="錯誤訊息")
+    raw_response: str | None = Field(default=None, description="原始回應 (debug)")
+
+
+class NvidiaProviderResult(BaseModel):
+    """NvidiaProvider 回傳結果"""
+
+    success: bool = Field(..., description="是否成功")
+    tool_calls: list[ToolCallValidationResult] = Field(
+        default_factory=list, description="驗證後的 Tool Calls"
+    )
+    usage: NvidiaUsage | None = Field(default=None, description="Token 使用統計")
+    latency_ms: float = Field(default=0.0, description="延遲 (毫秒)")
+    error: str | None = Field(default=None, description="錯誤訊息")
+    fallback_triggered: bool = Field(
+        default=False, description="是否觸發 Fallback"
+    )
--- a/apps/api/src/services/ai_rate_limiter.py
+++ b/apps/api/src/services/ai_rate_limiter.py
@@ -37,6 +37,12 @@ RATE_LIMITS = {
        "daily_requests": 200,
        "daily_tokens": 50_000,
    },
+    # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (免費 Tier)
+    "nvidia": {
+        "rpm": 5,            # 每分鐘請求數 (延遲較高，控制併發)
+        "daily_requests": 100,  # 每日請求數 (免費 Tier 限制)
+        "daily_tokens": 50_000,  # 每日 Token 數
+    },
 }

 # =============================================================================
@@ -52,6 +58,11 @@ COST_LIMITS = {
        "total_cost_usd": 10.0,
        "alert_threshold_usd": 8.0,
    },
+    # 2026-03-29 ogt: ADR-036 Nemotron (免費 Tier，設定低限制作為監控)
+    "nvidia": {
+        "total_cost_usd": 0.0,  # 免費 Tier，不計費
+        "alert_threshold_usd": 0.0,  # 不發送成本告警
+    },
 }

 # Gemini 1.5 Flash 定價 (per token)
--- a/apps/api/src/services/ai_router.py
+++ b/apps/api/src/services/ai_router.py
@@ -66,6 +66,8 @@ class AIProvider(Enum):
    OLLAMA = "ollama"
    GEMINI = "gemini"
    CLAUDE = "claude"
+    # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (83.3% 精準度)
+    NVIDIA = "nvidia"


 # Provider 對應延遲預算 (ms)
@@ -73,6 +75,8 @@ PROVIDER_LATENCY_BUDGET: dict[AIProvider, int] = {
    AIProvider.OLLAMA: 60000,  # 本地，允許較長處理時間
    AIProvider.GEMINI: 30000,  # 雲端，較低延遲
    AIProvider.CLAUDE: 30000,  # 雲端，較低延遲
+    # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s)
+    AIProvider.NVIDIA: 60000,  # Tool Calling 專用，允許較長時間
 }


@@ -164,21 +168,32 @@ class AIRouter:
        self._ollama_summary = self._model_registry.get_model("ollama", "summary")
        self._gemini_default = self._model_registry.get_model("gemini", "default")
        self._claude_default = self._model_registry.get_model("claude", "default")
+        # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling
+        self._nvidia_default = self._model_registry.get_model("nvidia", "default")

        # Provider 對應模型映射
        self._provider_models: dict[AIProvider, str] = {
            AIProvider.OLLAMA: self._ollama_default,
            AIProvider.GEMINI: self._gemini_default,
            AIProvider.CLAUDE: self._claude_default,
+            AIProvider.NVIDIA: self._nvidia_default,  # ADR-036
        }

        # 完整 Fallback 鏈 (Provider, Model)
+        # 2026-03-29 ogt: NVIDIA 不在一般 Fallback 鏈 (僅用於 Tool Calling)
        self._full_fallback_chain: list[tuple[AIProvider, str]] = [
            (AIProvider.OLLAMA, self._ollama_default),
            (AIProvider.GEMINI, self._gemini_default),
            (AIProvider.CLAUDE, self._claude_default),
        ]

+        # Tool Calling 專用 Fallback 鏈 (ADR-036)
+        self._tool_calling_fallback_chain: list[tuple[AIProvider, str]] = [
+            (AIProvider.NVIDIA, self._nvidia_default),
+            (AIProvider.GEMINI, self._gemini_default),
+            (AIProvider.CLAUDE, self._claude_default),
+        ]
+
        # 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
        self._intent_provider_overrides: dict[IntentType, AIProvider | None] = {
            # 四大核心意圖
@@ -466,6 +481,39 @@ class AIRouter:
            routing_latency_ms=routing_latency,
        )

+    # =========================================================================
+    # Tool Calling 路由 (ADR-036)
+    # =========================================================================
+
+    def route_tool_calling(self) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]:
+        """
+        Tool Calling 專用路由 (ADR-036)
+
+        Tool Calling 任務優先使用 Nemotron (83.3% 精準度)，
+        Fallback 到 Gemini/Claude。
+
+        Returns:
+            (provider, model, fallback_chain)
+        """
+        provider = AIProvider.NVIDIA
+        model = self._nvidia_default
+        fallback_chain = [
+            (p, m) for p, m in self._tool_calling_fallback_chain if p != provider
+        ]
+
+        logger.info(
+            "tool_calling_routing",
+            provider=provider.value,
+            model=model,
+            fallback_count=len(fallback_chain),
+        )
+
+        return provider, model, fallback_chain
+
+    def get_tool_calling_fallback_chain(self) -> list[tuple[AIProvider, str]]:
+        """取得 Tool Calling Fallback 鏈"""
+        return self._tool_calling_fallback_chain.copy()
+
    # =========================================================================
    # 便捷方法
    # =========================================================================
--- a/apps/api/src/services/nvidia_provider.py
+++ b/apps/api/src/services/nvidia_provider.py
@@ -0,0 +1,432 @@
+"""
+NVIDIA Nemotron Provider - ADR-036
+==================================
+2026-03-29 ogt: Nemotron Tool Calling 整合 (83.3% 精準度)
+
+專門處理 Tool Calling 任務，提供高精準度的 K8s 操作決策。
+
+設計原則:
+1. OpenAI 相容格式 - 與 Nemotron API 對接
+2. Pydantic 強制驗證 - 所有回應必須通過 Schema 驗證
+3. Fallback 機制 - 失敗時降級到 Gemini/Claude
+4. HITL 高風險保護 - DELETE 等操作需人工審核
+
+版本: v1.0
+建立: 2026-03-29 (台北時區)
+建立者: Claude Code
+"""
+
+from __future__ import annotations
+
+import json
+import time
+from typing import Any
+
+import httpx
+import structlog
+
+from src.core.config import get_settings
+from src.models.nvidia import (
+    NvidiaProviderResult,
+    NvidiaResponse,
+    NvidiaUsage,
+    ToolCallValidationResult,
+    ToolDefinition,
+)
+
+logger = structlog.get_logger(__name__)
+settings = get_settings()
+
+# =============================================================================
+# 常量定義
+# =============================================================================
+
+# NVIDIA NIM API Endpoint
+NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
+
+# 預設模型
+NVIDIA_DEFAULT_MODEL = "nvidia/llama-3.1-nemotron-70b-instruct"
+
+# 請求超時 (秒) - Nemotron 延遲 11-45s
+NVIDIA_TIMEOUT = 60.0
+
+# 重試次數
+MAX_RETRIES = 2
+
+# 高風險 Tool 清單 (需要 HITL 審核)
+HIGH_RISK_TOOLS: set[str] = {
+    "delete_pod",
+    "delete_deployment",
+    "delete_namespace",
+    "delete_service",
+    "delete_configmap",
+    "delete_secret",
+    "scale_to_zero",
+    "drain_node",
+    "cordon_node",
+    "delete_pvc",
+    "delete_pv",
+}
+
+
+# =============================================================================
+# NvidiaProvider 類別
+# =============================================================================
+
+
+class NvidiaProvider:
+    """
+    NVIDIA Nemotron Provider
+
+    專門處理 Tool Calling 任務，提供 83.3% 精準度的 K8s 操作決策。
+
+    使用方式:
+    ```python
+    provider = NvidiaProvider()
+    result = await provider.tool_call(
+        messages=[{"role": "user", "content": "重啟 awoooi-api pod"}],
+        tools=[restart_tool, scale_tool],
+    )
+    if result.success:
+        for tc in result.tool_calls:
+            if tc.valid:
+                execute_tool(tc.tool_name, tc.arguments)
+    ```
+    """
+
+    def __init__(self, api_key: str | None = None):
+        """
+        初始化 NvidiaProvider
+
+        Args:
+            api_key: NVIDIA API Key (預設從 settings 取得)
+        """
+        self._api_key = api_key or settings.NVIDIA_API_KEY
+        self._client: httpx.AsyncClient | None = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """取得或建立 HTTP Client"""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                timeout=httpx.Timeout(NVIDIA_TIMEOUT, connect=10.0),
+                limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
+            )
+        return self._client
+
+    async def close(self) -> None:
+        """關閉 HTTP Client"""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+            self._client = None
+
+    async def tool_call(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[ToolDefinition | dict[str, Any]],
+        model: str = NVIDIA_DEFAULT_MODEL,
+        temperature: float = 0.0,
+        max_tokens: int = 1024,
+    ) -> NvidiaProviderResult:
+        """
+        執行 Tool Calling 請求
+
+        Args:
+            messages: 對話訊息列表
+            tools: 可用 Tool 定義列表
+            model: 模型名稱
+            temperature: 溫度 (0.0 最確定性)
+            max_tokens: 最大輸出 Token
+
+        Returns:
+            NvidiaProviderResult: 包含驗證後的 Tool Calls
+        """
+        start_time = time.perf_counter()
+
+        # 檢查 API Key
+        if not self._api_key:
+            return NvidiaProviderResult(
+                success=False,
+                error="NVIDIA_API_KEY 未設定",
+                fallback_triggered=True,
+            )
+
+        # 轉換 tools 為 dict 格式
+        tools_data = []
+        for tool in tools:
+            if isinstance(tool, ToolDefinition):
+                tools_data.append(tool.model_dump())
+            else:
+                tools_data.append(tool)
+
+        # 建立請求
+        request_body = {
+            "model": model,
+            "messages": messages,
+            "tools": tools_data,
+            "tool_choice": "auto",
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+
+        # 執行請求 (含重試)
+        response_data: dict | None = None
+        last_error: str | None = None
+
+        for attempt in range(MAX_RETRIES + 1):
+            try:
+                response_data = await self._send_request(request_body)
+                break
+            except Exception as e:
+                last_error = str(e)
+                logger.warning(
+                    "nvidia_request_retry",
+                    attempt=attempt + 1,
+                    max_retries=MAX_RETRIES,
+                    error=last_error,
+                )
+                if attempt == MAX_RETRIES:
+                    break
+
+        latency_ms = (time.perf_counter() - start_time) * 1000
+
+        # 請求失敗
+        if response_data is None:
+            logger.error(
+                "nvidia_request_failed",
+                error=last_error,
+                latency_ms=round(latency_ms, 2),
+            )
+            return NvidiaProviderResult(
+                success=False,
+                error=last_error,
+                latency_ms=latency_ms,
+                fallback_triggered=True,
+            )
+
+        # 解析回應
+        try:
+            nvidia_response = NvidiaResponse.model_validate(response_data)
+        except Exception as e:
+            logger.error(
+                "nvidia_response_parse_failed",
+                error=str(e),
+                raw_response=str(response_data)[:500],
+            )
+            return NvidiaProviderResult(
+                success=False,
+                error=f"回應解析失敗: {e}",
+                latency_ms=latency_ms,
+                fallback_triggered=True,
+            )
+
+        # 驗證 Tool Calls
+        tool_calls = self._validate_tool_calls(nvidia_response)
+
+        # 統計
+        usage = nvidia_response.usage
+
+        logger.info(
+            "nvidia_tool_call_completed",
+            success=True,
+            tool_call_count=len(tool_calls),
+            valid_count=sum(1 for tc in tool_calls if tc.valid),
+            latency_ms=round(latency_ms, 2),
+            prompt_tokens=usage.prompt_tokens if usage else 0,
+            completion_tokens=usage.completion_tokens if usage else 0,
+        )
+
+        return NvidiaProviderResult(
+            success=True,
+            tool_calls=tool_calls,
+            usage=usage,
+            latency_ms=latency_ms,
+            fallback_triggered=False,
+        )
+
+    async def _send_request(self, request_body: dict) -> dict:
+        """
+        發送 HTTP 請求到 NVIDIA API
+
+        Args:
+            request_body: 請求內容
+
+        Returns:
+            API 回應 (dict)
+
+        Raises:
+            Exception: 請求失敗
+        """
+        client = await self._get_client()
+
+        headers = {
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+        }
+
+        response = await client.post(
+            NVIDIA_API_URL,
+            headers=headers,
+            json=request_body,
+        )
+
+        if response.status_code != 200:
+            error_text = response.text[:500]
+            raise Exception(
+                f"NVIDIA API 錯誤: {response.status_code} - {error_text}"
+            )
+
+        return response.json()
+
+    def _validate_tool_calls(
+        self, response: NvidiaResponse
+    ) -> list[ToolCallValidationResult]:
+        """
+        驗證 Tool Calls
+
+        Args:
+            response: NVIDIA API 回應
+
+        Returns:
+            驗證後的 Tool Call 結果列表
+        """
+        results: list[ToolCallValidationResult] = []
+
+        if not response.choices:
+            return results
+
+        message = response.choices[0].message
+        if not message.tool_calls:
+            return results
+
+        for tc in message.tool_calls:
+            try:
+                # 解析 arguments JSON
+                arguments = json.loads(tc.function.arguments)
+
+                results.append(
+                    ToolCallValidationResult(
+                        valid=True,
+                        tool_name=tc.function.name,
+                        arguments=arguments,
+                    )
+                )
+            except json.JSONDecodeError as e:
+                results.append(
+                    ToolCallValidationResult(
+                        valid=False,
+                        tool_name=tc.function.name,
+                        error=f"Arguments JSON 解析失敗: {e}",
+                        raw_response=tc.function.arguments,
+                    )
+                )
+            except Exception as e:
+                results.append(
+                    ToolCallValidationResult(
+                        valid=False,
+                        error=f"驗證失敗: {e}",
+                    )
+                )
+
+        return results
+
+    def is_high_risk_tool(self, tool_name: str) -> bool:
+        """
+        檢查是否為高風險 Tool
+
+        Args:
+            tool_name: Tool 名稱
+
+        Returns:
+            是否需要 HITL 審核
+        """
+        return tool_name.lower() in HIGH_RISK_TOOLS
+
+    def get_high_risk_tools(
+        self, tool_calls: list[ToolCallValidationResult]
+    ) -> list[ToolCallValidationResult]:
+        """
+        篩選高風險 Tool Calls
+
+        Args:
+            tool_calls: Tool Call 結果列表
+
+        Returns:
+            高風險 Tool Calls
+        """
+        return [
+            tc
+            for tc in tool_calls
+            if tc.valid and tc.tool_name and self.is_high_risk_tool(tc.tool_name)
+        ]
+
+
+# =============================================================================
+# 單例與工廠函數
+# =============================================================================
+
+_provider: NvidiaProvider | None = None
+
+
+def get_nvidia_provider() -> NvidiaProvider:
+    """取得 NvidiaProvider 單例"""
+    global _provider
+    if _provider is None:
+        _provider = NvidiaProvider()
+    return _provider
+
+
+def reset_nvidia_provider() -> None:
+    """重置單例 (用於測試)"""
+    global _provider
+    _provider = None
+
+
+# =============================================================================
+# 便捷函數
+# =============================================================================
+
+
+async def nvidia_tool_call(
+    messages: list[dict[str, Any]],
+    tools: list[ToolDefinition | dict[str, Any]],
+    **kwargs,
+) -> NvidiaProviderResult:
+    """
+    便捷函數: 執行 NVIDIA Tool Calling
+
+    Args:
+        messages: 對話訊息列表
+        tools: 可用 Tool 定義列表
+        **kwargs: 其他參數 (model, temperature, max_tokens)
+
+    Returns:
+        NvidiaProviderResult
+    """
+    provider = get_nvidia_provider()
+    return await provider.tool_call(messages, tools, **kwargs)
+
+
+def create_tool_definition(
+    name: str,
+    description: str,
+    parameters: dict[str, Any],
+) -> ToolDefinition:
+    """
+    建立 Tool 定義
+
+    Args:
+        name: Tool 名稱
+        description: Tool 描述
+        parameters: JSON Schema 參數定義
+
+    Returns:
+        ToolDefinition
+    """
+    return ToolDefinition(
+        type="function",
+        function={
+            "name": name,
+            "description": description,
+            "parameters": parameters,
+        },
+    )
--- a/apps/api/tests/test_nvidia_provider.py
+++ b/apps/api/tests/test_nvidia_provider.py
@@ -0,0 +1,316 @@
+"""
+NVIDIA Provider Tests - ADR-036
+===============================
+測試 Nemotron Tool Calling 整合
+
+注意: 這些是單元測試，不需要真實的 NVIDIA API Key
+"""
+
+import json
+
+import pytest
+
+from src.models.nvidia import (
+    NvidiaChoice,
+    NvidiaMessage,
+    NvidiaProviderResult,
+    NvidiaResponse,
+    NvidiaUsage,
+    ToolCall,
+    ToolCallValidationResult,
+    ToolDefinition,
+    ToolFunction,
+)
+from src.services.nvidia_provider import (
+    HIGH_RISK_TOOLS,
+    NvidiaProvider,
+    create_tool_definition,
+    get_nvidia_provider,
+    reset_nvidia_provider,
+)
+
+
+class TestNvidiaModels:
+    """測試 NVIDIA Pydantic Models"""
+
+    def test_tool_function_model(self):
+        """測試 ToolFunction 模型"""
+        func = ToolFunction(
+            name="restart_pod",
+            arguments='{"pod_name": "api-server", "namespace": "default"}',
+        )
+        assert func.name == "restart_pod"
+        assert '"pod_name"' in func.arguments
+
+    def test_tool_call_model(self):
+        """測試 ToolCall 模型"""
+        tc = ToolCall(
+            id="call_123",
+            type="function",
+            function=ToolFunction(
+                name="scale_deployment",
+                arguments='{"replicas": 3}',
+            ),
+        )
+        assert tc.id == "call_123"
+        assert tc.function.name == "scale_deployment"
+
+    def test_nvidia_response_model(self):
+        """測試 NvidiaResponse 模型"""
+        response = NvidiaResponse(
+            id="resp_123",
+            created=1234567890,
+            model="nvidia/llama-3.1-nemotron-70b-instruct",
+            choices=[
+                NvidiaChoice(
+                    index=0,
+                    message=NvidiaMessage(
+                        role="assistant",
+                        content=None,
+                        tool_calls=[
+                            ToolCall(
+                                id="call_1",
+                                function=ToolFunction(
+                                    name="restart_pod",
+                                    arguments='{"pod": "api"}',
+                                ),
+                            )
+                        ],
+                    ),
+                    finish_reason="tool_calls",
+                )
+            ],
+            usage=NvidiaUsage(
+                prompt_tokens=100,
+                completion_tokens=50,
+                total_tokens=150,
+            ),
+        )
+
+        assert response.id == "resp_123"
+        assert len(response.choices) == 1
+        assert response.choices[0].message.tool_calls is not None
+        assert len(response.choices[0].message.tool_calls) == 1
+        assert response.usage.total_tokens == 150
+
+    def test_tool_call_validation_result(self):
+        """測試 ToolCallValidationResult 模型"""
+        result = ToolCallValidationResult(
+            valid=True,
+            tool_name="restart_pod",
+            arguments={"pod_name": "api", "namespace": "default"},
+        )
+        assert result.valid
+        assert result.tool_name == "restart_pod"
+        assert result.arguments["namespace"] == "default"
+
+    def test_tool_definition(self):
+        """測試 ToolDefinition 模型"""
+        definition = ToolDefinition(
+            type="function",
+            function={
+                "name": "restart_pod",
+                "description": "Restart a Kubernetes pod",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "pod_name": {"type": "string"},
+                        "namespace": {"type": "string"},
+                    },
+                    "required": ["pod_name"],
+                },
+            },
+        )
+        assert definition.type == "function"
+        assert definition.function["name"] == "restart_pod"
+
+
+class TestNvidiaProvider:
+    """測試 NvidiaProvider 類別"""
+
+    def test_singleton(self):
+        """測試單例模式"""
+        reset_nvidia_provider()
+        p1 = get_nvidia_provider()
+        p2 = get_nvidia_provider()
+        assert p1 is p2
+        reset_nvidia_provider()
+
+    def test_high_risk_tool_detection(self):
+        """測試高風險 Tool 檢測"""
+        provider = NvidiaProvider()
+
+        # 高風險操作
+        assert provider.is_high_risk_tool("delete_pod")
+        assert provider.is_high_risk_tool("DELETE_POD")  # 大寫也應該匹配
+        assert provider.is_high_risk_tool("delete_deployment")
+        assert provider.is_high_risk_tool("scale_to_zero")
+        assert provider.is_high_risk_tool("drain_node")
+
+        # 非高風險操作
+        assert not provider.is_high_risk_tool("restart_pod")
+        assert not provider.is_high_risk_tool("scale_deployment")
+        assert not provider.is_high_risk_tool("get_logs")
+
+    def test_filter_high_risk_tools(self):
+        """測試過濾高風險 Tool Calls"""
+        provider = NvidiaProvider()
+
+        tool_calls = [
+            ToolCallValidationResult(
+                valid=True,
+                tool_name="restart_pod",
+                arguments={"pod": "api"},
+            ),
+            ToolCallValidationResult(
+                valid=True,
+                tool_name="delete_pod",
+                arguments={"pod": "test"},
+            ),
+            ToolCallValidationResult(
+                valid=False,
+                tool_name="invalid_tool",
+                error="Parse error",
+            ),
+        ]
+
+        high_risk = provider.get_high_risk_tools(tool_calls)
+
+        assert len(high_risk) == 1
+        assert high_risk[0].tool_name == "delete_pod"
+
+    def test_validate_tool_calls(self):
+        """測試 Tool Call 驗證"""
+        provider = NvidiaProvider()
+
+        # 建立模擬回應
+        response = NvidiaResponse(
+            id="resp_123",
+            created=1234567890,
+            model="nvidia/llama-3.1-nemotron-70b-instruct",
+            choices=[
+                NvidiaChoice(
+                    index=0,
+                    message=NvidiaMessage(
+                        role="assistant",
+                        tool_calls=[
+                            ToolCall(
+                                id="call_1",
+                                function=ToolFunction(
+                                    name="restart_pod",
+                                    arguments='{"pod_name": "api", "namespace": "default"}',
+                                ),
+                            ),
+                            ToolCall(
+                                id="call_2",
+                                function=ToolFunction(
+                                    name="invalid_tool",
+                                    arguments="not valid json{",  # 無效 JSON
+                                ),
+                            ),
+                        ],
+                    ),
+                )
+            ],
+        )
+
+        results = provider._validate_tool_calls(response)
+
+        assert len(results) == 2
+        assert results[0].valid
+        assert results[0].tool_name == "restart_pod"
+        assert results[0].arguments["pod_name"] == "api"
+        assert not results[1].valid
+        assert "JSON" in results[1].error
+
+
+class TestCreateToolDefinition:
+    """測試 Tool 定義建立函數"""
+
+    def test_create_tool_definition(self):
+        """測試建立 Tool 定義"""
+        definition = create_tool_definition(
+            name="scale_deployment",
+            description="Scale a Kubernetes deployment",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "deployment": {"type": "string"},
+                    "replicas": {"type": "integer"},
+                },
+                "required": ["deployment", "replicas"],
+            },
+        )
+
+        assert definition.type == "function"
+        assert definition.function["name"] == "scale_deployment"
+        assert definition.function["description"] == "Scale a Kubernetes deployment"
+        assert "replicas" in definition.function["parameters"]["properties"]
+
+
+class TestHighRiskTools:
+    """測試高風險 Tool 清單"""
+
+    def test_high_risk_tools_list(self):
+        """確認高風險 Tool 清單包含所有必要操作"""
+        assert "delete_pod" in HIGH_RISK_TOOLS
+        assert "delete_deployment" in HIGH_RISK_TOOLS
+        assert "delete_namespace" in HIGH_RISK_TOOLS
+        assert "scale_to_zero" in HIGH_RISK_TOOLS
+        assert "drain_node" in HIGH_RISK_TOOLS
+        assert "cordon_node" in HIGH_RISK_TOOLS
+
+    def test_restart_not_high_risk(self):
+        """確認 restart 不在高風險清單"""
+        assert "restart_pod" not in HIGH_RISK_TOOLS
+        assert "restart_deployment" not in HIGH_RISK_TOOLS
+
+
+class TestAIRouterNvidiaIntegration:
+    """測試 AIRouter NVIDIA 整合"""
+
+    def test_nvidia_provider_in_router(self):
+        """測試 AIProvider 包含 NVIDIA"""
+        from src.services.ai_router import AIProvider
+
+        assert hasattr(AIProvider, "NVIDIA")
+        assert AIProvider.NVIDIA.value == "nvidia"
+
+    def test_tool_calling_route(self):
+        """測試 Tool Calling 路由"""
+        from src.services.ai_router import get_ai_router, AIProvider, reset_ai_router
+
+        reset_ai_router()
+        router = get_ai_router()
+
+        provider, model, fallback_chain = router.route_tool_calling()
+
+        assert provider == AIProvider.NVIDIA
+        assert "nvidia" in model.lower() or "nemotron" in model.lower()
+        # Fallback 應該包含 Gemini 和 Claude
+        fallback_providers = [p for p, _ in fallback_chain]
+        assert AIProvider.GEMINI in fallback_providers
+        assert AIProvider.CLAUDE in fallback_providers
+
+        reset_ai_router()
+
+    def test_existing_routing_not_affected(self):
+        """測試現有路由規則不受影響"""
+        from src.services.ai_router import get_ai_router, AIProvider, reset_ai_router
+
+        reset_ai_router()
+        router = get_ai_router()
+
+        # 測試同步路由 (不涉及 NVIDIA)
+        decision = router.route_sync("重啟 api pod")
+
+        # 應該還是使用 Ollama (低複雜度)
+        assert decision.selected_provider in [
+            AIProvider.OLLAMA,
+            AIProvider.GEMINI,
+            AIProvider.CLAUDE,
+        ]
+        # NVIDIA 不應該出現在一般路由中
+        assert decision.selected_provider != AIProvider.NVIDIA
+
+        reset_ai_router()
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -5,12 +5,12 @@

 ---

-## 📍 當前狀態 (2026-03-28 23:50 台北)
+## 📍 當前狀態 (2026-03-29 03:30 台北)

 | 項目 | 狀態 |
 |------|------|
-| **當前 Phase** | ✅ **K3s 首席架構師完整審查 (99% EXCEPTIONAL)** |
-| **Day** | Day 11 |
+| **當前 Phase** | ✅ **Phase 20 Nemotron Tool Calling (Phase A 完成)** |
+| **Day** | Day 12 |
 | **K3s 版本** | v1.34.5+k3s1 (mon + mon1) |
 | **叢集健康** | ✅ **所有 Pod 正常運行** |
 | **K3s 優化** | ✅ **全部完成 + P2/P3 + PSS** |
@@ -49,6 +49,33 @@

 ---

+### ✅ 2026-03-29 Phase 20 Nemotron Phase A 完成 (Day 12 03:30) 🆕
+
+| 項目 | 內容 | 狀態 |
+|------|------|------|
+| **ADR-036** | Nemotron Tool Calling 整合 | ✅ 已建立 |
+| **Phase A 實作** | NvidiaProvider 完整實作 | ✅ **已完成** |
+| **測試驗證** | tests/test_nvidia_provider.py | ✅ **15/15 PASSED** |
+| **整合** | ai_router + ai_rate_limiter + models.json | ✅ **已整合** |
+
+**新建檔案**:
+- `src/models/nvidia.py` - Pydantic Schema
+- `src/services/nvidia_provider.py` - NvidiaProvider 類別
+- `tests/test_nvidia_provider.py` - 15 項單元測試
+
+**已修改**:
+- `src/core/config.py` - NVIDIA_API_KEY
+- `src/services/ai_router.py` - AIProvider.NVIDIA + route_tool_calling()
+- `src/services/ai_rate_limiter.py` - NVIDIA 限制
+- `apps/api/models.json` - NVIDIA 配置
+
+**待統帥執行**:
+```bash
+gh secret set NVIDIA_API_KEY --body "nvapi-..."
+```
+
+---
+
 ### 🏛️ 2026-03-28 首席架構師完整審查 (Day 11 23:50)

 | 審查項目 | 評分 | 說明 |
--- a/docs/architecture/ARCHITECTURE.md
+++ b/docs/architecture/ARCHITECTURE.md
@@ -60,15 +60,36 @@ Elapsed: 28.71ms (< 50ms 目標)
 Method: httpx_native
 ```

-## 五主機架構
+## 五主機架構 (2026-03-28 K-HA 更新)

 | 主機 | IP | 角色 | 服務 |
 |-----|-----|------|------|
-| DevOps | 192.168.0.110 | CI/CD | Harbor, GH Runner |
-| Security | 192.168.0.112 | 安全掃描 | Kali Scanner |
-| K3s Master | 192.168.0.120 | 容器編排 | K3s API Server |
-| K3s Worker | 192.168.0.121 | 工作負載 | App Pods |
-| AI+Web | 192.168.0.188 | AI/DB/Web | Ollama, PostgreSQL, Redis, SignOz |
+| DevOps | 192.168.0.110 | CI/CD + 監控 | Harbor:5000, GH Runner, Sentry:9000, Langfuse:3100 |
+| Security | 192.168.0.112 | 安全掃描 | Kali Scanner:8080 |
+| K3s Server #1 | 192.168.0.120 | **Control-Plane MASTER** | keepalived (priority=101) |
+| K3s Server #2 | 192.168.0.121 | **Control-Plane BACKUP** | keepalived (priority=100) |
+| **VIP** | **192.168.0.125** | **HA Endpoint** | **K3s API:6443 + NodePort (32334/32335)** |
+| AI+Web | 192.168.0.188 | AI/DB/Web | Ollama:11434, OpenClaw:8089, **PostgreSQL:5432 (K3s Datastore)**, Redis:6380, SignOz:3301 |
+
+## K3s 高可用架構 (ADR-033)
+
+```
+              VIP 192.168.0.125 (keepalived)
+                       ↓
+        ┌──────────────┼──────────────┐
+        ▼              ▼              ▼
+     mon(120)      mon1(121)    PostgreSQL(188)
+   K3s MASTER    K3s BACKUP     K3s Datastore
+   priority=101  priority=100   (Kine protocol)
+```
+
+### 關鍵變更 (2026-03-28)
+
+- ✅ 雙 Control-Plane (120+121)
+- ✅ 外接 PostgreSQL Datastore (188:5432)
+- ✅ VIP 192.168.0.125 (keepalived VRRP)
+- ✅ CI/CD kubeconfig 指向 VIP
+- ✅ 故障轉移 < 3 秒

 ## SignOz 整合架構

@@ -93,8 +114,9 @@ Method: httpx_native
 └─────────────────────────────────────────────┘
 ```

-## AI Fallback 策略 (ADR-006)
+## AI Fallback 策略 (ADR-006 v1.3 + ADR-036)

+### 一般對話任務
 ```
 Ollama (local) → Gemini (cloud) → Claude (cloud) → mock_fallback
     ↓              ↓                 ↓                ↓
@@ -102,6 +124,24 @@ Ollama (local) → Gemini (cloud) → Claude (cloud) → mock_fallback
   188:11434     API Key          API Key          無 LLM
 ```

+### Tool Calling 任務 (ADR-036 - 待批准)
+```
+Nemotron (cloud) → Gemini (cloud) → Claude (cloud) → 拒絕執行
+     ↓                  ↓                 ↓
+   免費 tier         $0.001/1K        $0.003/1K
+   精準度 83%        API Key          API Key
+   延遲 11-45s
+```
+
+### Provider 對照表
+
+| Provider | 用途 | 延遲 | 精準度 | 成本 |
+|----------|------|------|--------|------|
+| **Ollama** | 即時對話、簡單查詢 | < 5s | 中 | $0 |
+| **Nemotron** | Tool Calling、K8s 操作 | 11-45s | 高 (83%) | 免費 tier |
+| **Gemini** | 通用備援 | 2-5s | 中高 | 低 |
+| **Claude** | 複雜推理、CRITICAL | 2-5s | 最高 | 高 |
+
 ## Phase 7: 視覺主權組件

 ### 已完成組件