From b77e1513875cd9e287937eae4be23adb93b7bd2f Mon Sep 17 00:00:00 2001 From: OG T Date: Sun, 29 Mar 2026 00:00:08 +0800 Subject: [PATCH] =?UTF-8?q?feat(ai):=20ADR-036=20NVIDIA=20Nemotron=20Tool?= =?UTF-8?q?=20Calling=20=E6=95=B4=E5=90=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 20 - 提升 Tool Calling 精準度 50% → 83.3% 新增: - src/models/nvidia.py: Pydantic Schema - src/services/nvidia_provider.py: NvidiaProvider 類別 - tests/test_nvidia_provider.py: 15 項單元測試 (全部通過) 修改: - ai_router.py: AIProvider.NVIDIA + route_tool_calling() - ai_rate_limiter.py: NVIDIA 限制 (5 RPM, 100/day) - models.json: NVIDIA 配置 - cd.yaml: Secrets 注入 NVIDIA_API_KEY 路由策略: - Tool Calling: Nemotron → Gemini → Claude - 一般對話: Ollama → Gemini → Claude (不變) Co-Authored-By: Claude Opus 4.5 --- .github/workflows/cd.yaml | 7 +- apps/api/models.json | 53 ++- apps/api/src/core/config.py | 8 +- apps/api/src/models/__init__.py | 18 + apps/api/src/models/nvidia.py | 119 +++++++ apps/api/src/services/ai_rate_limiter.py | 11 + apps/api/src/services/ai_router.py | 48 +++ apps/api/src/services/nvidia_provider.py | 432 +++++++++++++++++++++++ apps/api/tests/test_nvidia_provider.py | 316 +++++++++++++++++ docs/LOGBOOK.md | 33 +- docs/architecture/ARCHITECTURE.md | 54 ++- 11 files changed, 1083 insertions(+), 16 deletions(-) create mode 100644 apps/api/src/models/nvidia.py create mode 100644 apps/api/src/services/nvidia_provider.py create mode 100644 apps/api/tests/test_nvidia_provider.py diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index 2bdcb2a9..39bbfc04 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -287,15 +287,18 @@ jobs: --from-literal=REDIS_URL="${{ secrets.REDIS_URL }}" \ --from-literal=GEMINI_API_KEY="${{ secrets.GEMINI_API_KEY }}" \ --from-literal=CLAUDE_API_KEY="${{ secrets.CLAUDE_API_KEY }}" \ + --from-literal=NVIDIA_API_KEY="${{ secrets.NVIDIA_API_KEY }}" \ --from-literal=WEBHOOK_HMAC_SECRET="${{ secrets.WEBHOOK_HMAC_SECRET }}" \ --from-literal=SENTRY_DSN="${{ secrets.SENTRY_DSN }}" else echo "🔄 更新 awoooi-secrets..." - # 使用 patch 更新,確保 Telegram 配置永遠是最新的 + # 使用 patch 更新,確保關鍵配置永遠是最新的 + # 2026-03-29 ogt: ADR-036 新增 NVIDIA_API_KEY kubectl patch secret awoooi-secrets -n awoooi-prod --type='merge' -p="{ \"stringData\": { \"OPENCLAW_TG_BOT_TOKEN\": \"${{ secrets.OPENCLAW_TG_BOT_TOKEN }}\", - \"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\" + \"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\", + \"NVIDIA_API_KEY\": \"${{ secrets.NVIDIA_API_KEY }}\" } }" fi diff --git a/apps/api/models.json b/apps/api/models.json index fc018eb8..08adff1c 100644 --- a/apps/api/models.json +++ b/apps/api/models.json @@ -1,12 +1,13 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "name": "OpenClaw AI Router Configuration", - "version": "1.0.0", - "description": "AI 模型路由與備援設定 (ADR-006)", - "updated_at": "2026-03-26", + "version": "1.1.0", + "description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)", + "updated_at": "2026-03-29", "default_provider": "ollama", "fallback_order": ["ollama", "gemini", "claude"], + "tool_calling_fallback_order": ["nvidia", "gemini", "claude"], "providers": { "ollama": { @@ -101,6 +102,44 @@ "tool_use": true, "structured_output": true } + }, + + "nvidia": { + "name": "NVIDIA Nemotron (ADR-036)", + "enabled": true, + "priority": 4, + "endpoint": "https://integrate.api.nvidia.com/v1", + "api_path": "/chat/completions", + "models": { + "default": "nvidia/llama-3.1-nemotron-70b-instruct", + "tool_calling": "nvidia/llama-3.1-nemotron-70b-instruct" + }, + "options": { + "temperature": 0.0, + "max_tokens": 1024 + }, + "timeout_seconds": 60, + "cost": { + "per_1k_tokens": 0, + "currency": "USD", + "notes": "Free tier (2026-03-29)" + }, + "auth": { + "type": "header", + "env_var": "NVIDIA_API_KEY", + "header_name": "Authorization", + "header_prefix": "Bearer " + }, + "rate_limits": { + "daily_tokens": 50000, + "requests_per_minute": 5 + }, + "features": { + "tool_use": true, + "structured_output": true, + "tool_calling_accuracy": 0.833 + }, + "use_for": ["tool_calling"] } }, @@ -122,6 +161,14 @@ "preferred_provider": "ollama", "fallback_enabled": false, "max_output_tokens": 500 + }, + "tool_calling": { + "description": "K8s Tool Calling operations (ADR-036)", + "preferred_provider": "nvidia", + "fallback_enabled": true, + "fallback_order": ["gemini", "claude"], + "required_features": ["tool_use"], + "notes": "Nemotron 83.3% accuracy for K8s operations" } }, diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index dc73049b..8c08caf2 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -201,8 +201,9 @@ class Settings(BaseSettings): ) # ========================================================================== - # AI Fallback Strategy (ADR-006) + # AI Fallback Strategy (ADR-006 v1.3 + ADR-036) # Order: Ollama (local) -> Gemini (cloud) -> Claude (cloud) + # Tool Calling: Nemotron (專用) -> Gemini -> Claude # ========================================================================== AI_FALLBACK_ORDER: list[str] = Field( default=["ollama", "gemini", "claude"], @@ -210,6 +211,11 @@ class Settings(BaseSettings): ) GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key") CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key") + # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合 + NVIDIA_API_KEY: str = Field( + default="", + description="NVIDIA NIM API key for Nemotron Tool Calling (ADR-036)", + ) @field_validator("AI_FALLBACK_ORDER", mode="before") @classmethod diff --git a/apps/api/src/models/__init__.py b/apps/api/src/models/__init__.py index 4f3342b5..96baf4b9 100644 --- a/apps/api/src/models/__init__.py +++ b/apps/api/src/models/__init__.py @@ -6,6 +6,7 @@ AWOOOI Models Package - Approval: 簽核相關模型 (Phase 2 HITL) - Incident: 事件相關模型 (Phase 6 認知覺醒) - AI: AI 相關模型 +- NVIDIA: Nemotron Tool Calling 模型 (ADR-036) """ # Approval Models (Phase 2) @@ -39,6 +40,16 @@ from src.models.incident import ( Signal, ) +# NVIDIA Models (ADR-036 - Nemotron Tool Calling) +from src.models.nvidia import ( + NvidiaProviderResult, + NvidiaResponse, + NvidiaUsage, + ToolCall, + ToolCallValidationResult, + ToolDefinition, +) + __all__ = [ # Approval "ApprovalRequest", @@ -65,4 +76,11 @@ __all__ = [ "IncidentUpdate", "Severity", "Signal", + # NVIDIA (ADR-036) + "NvidiaProviderResult", + "NvidiaResponse", + "NvidiaUsage", + "ToolCall", + "ToolCallValidationResult", + "ToolDefinition", ] diff --git a/apps/api/src/models/nvidia.py b/apps/api/src/models/nvidia.py new file mode 100644 index 00000000..b8043086 --- /dev/null +++ b/apps/api/src/models/nvidia.py @@ -0,0 +1,119 @@ +""" +NVIDIA Nemotron API Models - ADR-036 +==================================== +2026-03-29 ogt: Nemotron Tool Calling 整合 (83.3% 精準度) + +OpenAI 相容格式 - 用於 Tool Calling 任務 +""" + +from typing import Any + +from pydantic import BaseModel, Field + + +class ToolFunction(BaseModel): + """Tool Function 定義""" + + name: str = Field(..., description="Tool 函數名稱") + arguments: str = Field(..., description="Tool 參數 (JSON 字串)") + + +class ToolCall(BaseModel): + """Tool Call 結構""" + + id: str = Field(..., description="Tool Call ID") + type: str = Field(default="function", description="Tool 類型") + function: ToolFunction = Field(..., description="Tool 函數") + + +class NvidiaMessage(BaseModel): + """NVIDIA API Message 結構""" + + role: str = Field(..., description="訊息角色 (assistant/user/system)") + content: str | None = Field(default=None, description="訊息內容") + tool_calls: list[ToolCall] | None = Field( + default=None, description="Tool Calls (僅 assistant)" + ) + + +class NvidiaChoice(BaseModel): + """NVIDIA API Choice 結構""" + + index: int = Field(default=0, description="選項索引") + message: NvidiaMessage = Field(..., description="回應訊息") + finish_reason: str | None = Field( + default=None, description="結束原因 (stop/tool_calls)" + ) + + +class NvidiaUsage(BaseModel): + """NVIDIA API Token 使用統計""" + + prompt_tokens: int = Field(default=0, description="輸入 Token 數") + completion_tokens: int = Field(default=0, description="輸出 Token 數") + total_tokens: int = Field(default=0, description="總 Token 數") + + +class NvidiaResponse(BaseModel): + """NVIDIA Nemotron API 完整回應""" + + id: str = Field(..., description="回應 ID") + object: str = Field(default="chat.completion", description="物件類型") + created: int = Field(..., description="建立時間戳") + model: str = Field(..., description="模型名稱") + choices: list[NvidiaChoice] = Field(..., description="回應選項") + usage: NvidiaUsage | None = Field(default=None, description="Token 使用統計") + + +# === Tool Calling 請求結構 === + + +class ToolDefinition(BaseModel): + """Tool 定義 (發送給 API)""" + + type: str = Field(default="function", description="Tool 類型") + function: dict[str, Any] = Field(..., description="函數定義 (JSON Schema)") + + +class NvidiaToolCallRequest(BaseModel): + """NVIDIA Tool Calling 請求""" + + model: str = Field( + default="nvidia/llama-3.1-nemotron-70b-instruct", + description="模型名稱", + ) + messages: list[dict[str, Any]] = Field(..., description="對話訊息") + tools: list[ToolDefinition] = Field(..., description="可用 Tools") + tool_choice: str | dict[str, Any] = Field( + default="auto", description="Tool 選擇策略" + ) + temperature: float = Field(default=0.0, description="溫度 (0.0 最確定性)") + max_tokens: int = Field(default=1024, description="最大輸出 Token") + + +# === 驗證結果結構 === + + +class ToolCallValidationResult(BaseModel): + """Tool Call 驗證結果""" + + valid: bool = Field(..., description="是否有效") + tool_name: str | None = Field(default=None, description="Tool 名稱") + arguments: dict[str, Any] | None = Field(default=None, description="解析後參數") + error: str | None = Field(default=None, description="錯誤訊息") + raw_response: str | None = Field(default=None, description="原始回應 (debug)") + + +class NvidiaProviderResult(BaseModel): + """NvidiaProvider 回傳結果""" + + success: bool = Field(..., description="是否成功") + tool_calls: list[ToolCallValidationResult] = Field( + default_factory=list, description="驗證後的 Tool Calls" + ) + usage: NvidiaUsage | None = Field(default=None, description="Token 使用統計") + latency_ms: float = Field(default=0.0, description="延遲 (毫秒)") + error: str | None = Field(default=None, description="錯誤訊息") + fallback_triggered: bool = Field( + default=False, description="是否觸發 Fallback" + ) diff --git a/apps/api/src/services/ai_rate_limiter.py b/apps/api/src/services/ai_rate_limiter.py index 14922841..4d8cf092 100644 --- a/apps/api/src/services/ai_rate_limiter.py +++ b/apps/api/src/services/ai_rate_limiter.py @@ -37,6 +37,12 @@ RATE_LIMITS = { "daily_requests": 200, "daily_tokens": 50_000, }, + # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (免費 Tier) + "nvidia": { + "rpm": 5, # 每分鐘請求數 (延遲較高,控制併發) + "daily_requests": 100, # 每日請求數 (免費 Tier 限制) + "daily_tokens": 50_000, # 每日 Token 數 + }, } # ============================================================================= @@ -52,6 +58,11 @@ COST_LIMITS = { "total_cost_usd": 10.0, "alert_threshold_usd": 8.0, }, + # 2026-03-29 ogt: ADR-036 Nemotron (免費 Tier,設定低限制作為監控) + "nvidia": { + "total_cost_usd": 0.0, # 免費 Tier,不計費 + "alert_threshold_usd": 0.0, # 不發送成本告警 + }, } # Gemini 1.5 Flash 定價 (per token) diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index 1b5c60f2..3c8b9db5 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -66,6 +66,8 @@ class AIProvider(Enum): OLLAMA = "ollama" GEMINI = "gemini" CLAUDE = "claude" + # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (83.3% 精準度) + NVIDIA = "nvidia" # Provider 對應延遲預算 (ms) @@ -73,6 +75,8 @@ PROVIDER_LATENCY_BUDGET: dict[AIProvider, int] = { AIProvider.OLLAMA: 60000, # 本地,允許較長處理時間 AIProvider.GEMINI: 30000, # 雲端,較低延遲 AIProvider.CLAUDE: 30000, # 雲端,較低延遲 + # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s) + AIProvider.NVIDIA: 60000, # Tool Calling 專用,允許較長時間 } @@ -164,21 +168,32 @@ class AIRouter: self._ollama_summary = self._model_registry.get_model("ollama", "summary") self._gemini_default = self._model_registry.get_model("gemini", "default") self._claude_default = self._model_registry.get_model("claude", "default") + # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling + self._nvidia_default = self._model_registry.get_model("nvidia", "default") # Provider 對應模型映射 self._provider_models: dict[AIProvider, str] = { AIProvider.OLLAMA: self._ollama_default, AIProvider.GEMINI: self._gemini_default, AIProvider.CLAUDE: self._claude_default, + AIProvider.NVIDIA: self._nvidia_default, # ADR-036 } # 完整 Fallback 鏈 (Provider, Model) + # 2026-03-29 ogt: NVIDIA 不在一般 Fallback 鏈 (僅用於 Tool Calling) self._full_fallback_chain: list[tuple[AIProvider, str]] = [ (AIProvider.OLLAMA, self._ollama_default), (AIProvider.GEMINI, self._gemini_default), (AIProvider.CLAUDE, self._claude_default), ] + # Tool Calling 專用 Fallback 鏈 (ADR-036) + self._tool_calling_fallback_chain: list[tuple[AIProvider, str]] = [ + (AIProvider.NVIDIA, self._nvidia_default), + (AIProvider.GEMINI, self._gemini_default), + (AIProvider.CLAUDE, self._claude_default), + ] + # 意圖對應 Provider 強制覆寫 (None = 依複雜度決定) self._intent_provider_overrides: dict[IntentType, AIProvider | None] = { # 四大核心意圖 @@ -466,6 +481,39 @@ class AIRouter: routing_latency_ms=routing_latency, ) + # ========================================================================= + # Tool Calling 路由 (ADR-036) + # ========================================================================= + + def route_tool_calling(self) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]: + """ + Tool Calling 專用路由 (ADR-036) + + Tool Calling 任務優先使用 Nemotron (83.3% 精準度), + Fallback 到 Gemini/Claude。 + + Returns: + (provider, model, fallback_chain) + """ + provider = AIProvider.NVIDIA + model = self._nvidia_default + fallback_chain = [ + (p, m) for p, m in self._tool_calling_fallback_chain if p != provider + ] + + logger.info( + "tool_calling_routing", + provider=provider.value, + model=model, + fallback_count=len(fallback_chain), + ) + + return provider, model, fallback_chain + + def get_tool_calling_fallback_chain(self) -> list[tuple[AIProvider, str]]: + """取得 Tool Calling Fallback 鏈""" + return self._tool_calling_fallback_chain.copy() + # ========================================================================= # 便捷方法 # ========================================================================= diff --git a/apps/api/src/services/nvidia_provider.py b/apps/api/src/services/nvidia_provider.py new file mode 100644 index 00000000..5004a642 --- /dev/null +++ b/apps/api/src/services/nvidia_provider.py @@ -0,0 +1,432 @@ +""" +NVIDIA Nemotron Provider - ADR-036 +================================== +2026-03-29 ogt: Nemotron Tool Calling 整合 (83.3% 精準度) + +專門處理 Tool Calling 任務,提供高精準度的 K8s 操作決策。 + +設計原則: +1. OpenAI 相容格式 - 與 Nemotron API 對接 +2. Pydantic 強制驗證 - 所有回應必須通過 Schema 驗證 +3. Fallback 機制 - 失敗時降級到 Gemini/Claude +4. HITL 高風險保護 - DELETE 等操作需人工審核 + +版本: v1.0 +建立: 2026-03-29 (台北時區) +建立者: Claude Code +""" + +from __future__ import annotations + +import json +import time +from typing import Any + +import httpx +import structlog + +from src.core.config import get_settings +from src.models.nvidia import ( + NvidiaProviderResult, + NvidiaResponse, + NvidiaUsage, + ToolCallValidationResult, + ToolDefinition, +) + +logger = structlog.get_logger(__name__) +settings = get_settings() + +# ============================================================================= +# 常量定義 +# ============================================================================= + +# NVIDIA NIM API Endpoint +NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions" + +# 預設模型 +NVIDIA_DEFAULT_MODEL = "nvidia/llama-3.1-nemotron-70b-instruct" + +# 請求超時 (秒) - Nemotron 延遲 11-45s +NVIDIA_TIMEOUT = 60.0 + +# 重試次數 +MAX_RETRIES = 2 + +# 高風險 Tool 清單 (需要 HITL 審核) +HIGH_RISK_TOOLS: set[str] = { + "delete_pod", + "delete_deployment", + "delete_namespace", + "delete_service", + "delete_configmap", + "delete_secret", + "scale_to_zero", + "drain_node", + "cordon_node", + "delete_pvc", + "delete_pv", +} + + +# ============================================================================= +# NvidiaProvider 類別 +# ============================================================================= + + +class NvidiaProvider: + """ + NVIDIA Nemotron Provider + + 專門處理 Tool Calling 任務,提供 83.3% 精準度的 K8s 操作決策。 + + 使用方式: + ```python + provider = NvidiaProvider() + result = await provider.tool_call( + messages=[{"role": "user", "content": "重啟 awoooi-api pod"}], + tools=[restart_tool, scale_tool], + ) + if result.success: + for tc in result.tool_calls: + if tc.valid: + execute_tool(tc.tool_name, tc.arguments) + ``` + """ + + def __init__(self, api_key: str | None = None): + """ + 初始化 NvidiaProvider + + Args: + api_key: NVIDIA API Key (預設從 settings 取得) + """ + self._api_key = api_key or settings.NVIDIA_API_KEY + self._client: httpx.AsyncClient | None = None + + async def _get_client(self) -> httpx.AsyncClient: + """取得或建立 HTTP Client""" + if self._client is None or self._client.is_closed: + self._client = httpx.AsyncClient( + timeout=httpx.Timeout(NVIDIA_TIMEOUT, connect=10.0), + limits=httpx.Limits(max_connections=10, max_keepalive_connections=5), + ) + return self._client + + async def close(self) -> None: + """關閉 HTTP Client""" + if self._client and not self._client.is_closed: + await self._client.aclose() + self._client = None + + async def tool_call( + self, + messages: list[dict[str, Any]], + tools: list[ToolDefinition | dict[str, Any]], + model: str = NVIDIA_DEFAULT_MODEL, + temperature: float = 0.0, + max_tokens: int = 1024, + ) -> NvidiaProviderResult: + """ + 執行 Tool Calling 請求 + + Args: + messages: 對話訊息列表 + tools: 可用 Tool 定義列表 + model: 模型名稱 + temperature: 溫度 (0.0 最確定性) + max_tokens: 最大輸出 Token + + Returns: + NvidiaProviderResult: 包含驗證後的 Tool Calls + """ + start_time = time.perf_counter() + + # 檢查 API Key + if not self._api_key: + return NvidiaProviderResult( + success=False, + error="NVIDIA_API_KEY 未設定", + fallback_triggered=True, + ) + + # 轉換 tools 為 dict 格式 + tools_data = [] + for tool in tools: + if isinstance(tool, ToolDefinition): + tools_data.append(tool.model_dump()) + else: + tools_data.append(tool) + + # 建立請求 + request_body = { + "model": model, + "messages": messages, + "tools": tools_data, + "tool_choice": "auto", + "temperature": temperature, + "max_tokens": max_tokens, + } + + # 執行請求 (含重試) + response_data: dict | None = None + last_error: str | None = None + + for attempt in range(MAX_RETRIES + 1): + try: + response_data = await self._send_request(request_body) + break + except Exception as e: + last_error = str(e) + logger.warning( + "nvidia_request_retry", + attempt=attempt + 1, + max_retries=MAX_RETRIES, + error=last_error, + ) + if attempt == MAX_RETRIES: + break + + latency_ms = (time.perf_counter() - start_time) * 1000 + + # 請求失敗 + if response_data is None: + logger.error( + "nvidia_request_failed", + error=last_error, + latency_ms=round(latency_ms, 2), + ) + return NvidiaProviderResult( + success=False, + error=last_error, + latency_ms=latency_ms, + fallback_triggered=True, + ) + + # 解析回應 + try: + nvidia_response = NvidiaResponse.model_validate(response_data) + except Exception as e: + logger.error( + "nvidia_response_parse_failed", + error=str(e), + raw_response=str(response_data)[:500], + ) + return NvidiaProviderResult( + success=False, + error=f"回應解析失敗: {e}", + latency_ms=latency_ms, + fallback_triggered=True, + ) + + # 驗證 Tool Calls + tool_calls = self._validate_tool_calls(nvidia_response) + + # 統計 + usage = nvidia_response.usage + + logger.info( + "nvidia_tool_call_completed", + success=True, + tool_call_count=len(tool_calls), + valid_count=sum(1 for tc in tool_calls if tc.valid), + latency_ms=round(latency_ms, 2), + prompt_tokens=usage.prompt_tokens if usage else 0, + completion_tokens=usage.completion_tokens if usage else 0, + ) + + return NvidiaProviderResult( + success=True, + tool_calls=tool_calls, + usage=usage, + latency_ms=latency_ms, + fallback_triggered=False, + ) + + async def _send_request(self, request_body: dict) -> dict: + """ + 發送 HTTP 請求到 NVIDIA API + + Args: + request_body: 請求內容 + + Returns: + API 回應 (dict) + + Raises: + Exception: 請求失敗 + """ + client = await self._get_client() + + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + } + + response = await client.post( + NVIDIA_API_URL, + headers=headers, + json=request_body, + ) + + if response.status_code != 200: + error_text = response.text[:500] + raise Exception( + f"NVIDIA API 錯誤: {response.status_code} - {error_text}" + ) + + return response.json() + + def _validate_tool_calls( + self, response: NvidiaResponse + ) -> list[ToolCallValidationResult]: + """ + 驗證 Tool Calls + + Args: + response: NVIDIA API 回應 + + Returns: + 驗證後的 Tool Call 結果列表 + """ + results: list[ToolCallValidationResult] = [] + + if not response.choices: + return results + + message = response.choices[0].message + if not message.tool_calls: + return results + + for tc in message.tool_calls: + try: + # 解析 arguments JSON + arguments = json.loads(tc.function.arguments) + + results.append( + ToolCallValidationResult( + valid=True, + tool_name=tc.function.name, + arguments=arguments, + ) + ) + except json.JSONDecodeError as e: + results.append( + ToolCallValidationResult( + valid=False, + tool_name=tc.function.name, + error=f"Arguments JSON 解析失敗: {e}", + raw_response=tc.function.arguments, + ) + ) + except Exception as e: + results.append( + ToolCallValidationResult( + valid=False, + error=f"驗證失敗: {e}", + ) + ) + + return results + + def is_high_risk_tool(self, tool_name: str) -> bool: + """ + 檢查是否為高風險 Tool + + Args: + tool_name: Tool 名稱 + + Returns: + 是否需要 HITL 審核 + """ + return tool_name.lower() in HIGH_RISK_TOOLS + + def get_high_risk_tools( + self, tool_calls: list[ToolCallValidationResult] + ) -> list[ToolCallValidationResult]: + """ + 篩選高風險 Tool Calls + + Args: + tool_calls: Tool Call 結果列表 + + Returns: + 高風險 Tool Calls + """ + return [ + tc + for tc in tool_calls + if tc.valid and tc.tool_name and self.is_high_risk_tool(tc.tool_name) + ] + + +# ============================================================================= +# 單例與工廠函數 +# ============================================================================= + +_provider: NvidiaProvider | None = None + + +def get_nvidia_provider() -> NvidiaProvider: + """取得 NvidiaProvider 單例""" + global _provider + if _provider is None: + _provider = NvidiaProvider() + return _provider + + +def reset_nvidia_provider() -> None: + """重置單例 (用於測試)""" + global _provider + _provider = None + + +# ============================================================================= +# 便捷函數 +# ============================================================================= + + +async def nvidia_tool_call( + messages: list[dict[str, Any]], + tools: list[ToolDefinition | dict[str, Any]], + **kwargs, +) -> NvidiaProviderResult: + """ + 便捷函數: 執行 NVIDIA Tool Calling + + Args: + messages: 對話訊息列表 + tools: 可用 Tool 定義列表 + **kwargs: 其他參數 (model, temperature, max_tokens) + + Returns: + NvidiaProviderResult + """ + provider = get_nvidia_provider() + return await provider.tool_call(messages, tools, **kwargs) + + +def create_tool_definition( + name: str, + description: str, + parameters: dict[str, Any], +) -> ToolDefinition: + """ + 建立 Tool 定義 + + Args: + name: Tool 名稱 + description: Tool 描述 + parameters: JSON Schema 參數定義 + + Returns: + ToolDefinition + """ + return ToolDefinition( + type="function", + function={ + "name": name, + "description": description, + "parameters": parameters, + }, + ) diff --git a/apps/api/tests/test_nvidia_provider.py b/apps/api/tests/test_nvidia_provider.py new file mode 100644 index 00000000..eb522d07 --- /dev/null +++ b/apps/api/tests/test_nvidia_provider.py @@ -0,0 +1,316 @@ +""" +NVIDIA Provider Tests - ADR-036 +=============================== +測試 Nemotron Tool Calling 整合 + +注意: 這些是單元測試,不需要真實的 NVIDIA API Key +""" + +import json + +import pytest + +from src.models.nvidia import ( + NvidiaChoice, + NvidiaMessage, + NvidiaProviderResult, + NvidiaResponse, + NvidiaUsage, + ToolCall, + ToolCallValidationResult, + ToolDefinition, + ToolFunction, +) +from src.services.nvidia_provider import ( + HIGH_RISK_TOOLS, + NvidiaProvider, + create_tool_definition, + get_nvidia_provider, + reset_nvidia_provider, +) + + +class TestNvidiaModels: + """測試 NVIDIA Pydantic Models""" + + def test_tool_function_model(self): + """測試 ToolFunction 模型""" + func = ToolFunction( + name="restart_pod", + arguments='{"pod_name": "api-server", "namespace": "default"}', + ) + assert func.name == "restart_pod" + assert '"pod_name"' in func.arguments + + def test_tool_call_model(self): + """測試 ToolCall 模型""" + tc = ToolCall( + id="call_123", + type="function", + function=ToolFunction( + name="scale_deployment", + arguments='{"replicas": 3}', + ), + ) + assert tc.id == "call_123" + assert tc.function.name == "scale_deployment" + + def test_nvidia_response_model(self): + """測試 NvidiaResponse 模型""" + response = NvidiaResponse( + id="resp_123", + created=1234567890, + model="nvidia/llama-3.1-nemotron-70b-instruct", + choices=[ + NvidiaChoice( + index=0, + message=NvidiaMessage( + role="assistant", + content=None, + tool_calls=[ + ToolCall( + id="call_1", + function=ToolFunction( + name="restart_pod", + arguments='{"pod": "api"}', + ), + ) + ], + ), + finish_reason="tool_calls", + ) + ], + usage=NvidiaUsage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + ), + ) + + assert response.id == "resp_123" + assert len(response.choices) == 1 + assert response.choices[0].message.tool_calls is not None + assert len(response.choices[0].message.tool_calls) == 1 + assert response.usage.total_tokens == 150 + + def test_tool_call_validation_result(self): + """測試 ToolCallValidationResult 模型""" + result = ToolCallValidationResult( + valid=True, + tool_name="restart_pod", + arguments={"pod_name": "api", "namespace": "default"}, + ) + assert result.valid + assert result.tool_name == "restart_pod" + assert result.arguments["namespace"] == "default" + + def test_tool_definition(self): + """測試 ToolDefinition 模型""" + definition = ToolDefinition( + type="function", + function={ + "name": "restart_pod", + "description": "Restart a Kubernetes pod", + "parameters": { + "type": "object", + "properties": { + "pod_name": {"type": "string"}, + "namespace": {"type": "string"}, + }, + "required": ["pod_name"], + }, + }, + ) + assert definition.type == "function" + assert definition.function["name"] == "restart_pod" + + +class TestNvidiaProvider: + """測試 NvidiaProvider 類別""" + + def test_singleton(self): + """測試單例模式""" + reset_nvidia_provider() + p1 = get_nvidia_provider() + p2 = get_nvidia_provider() + assert p1 is p2 + reset_nvidia_provider() + + def test_high_risk_tool_detection(self): + """測試高風險 Tool 檢測""" + provider = NvidiaProvider() + + # 高風險操作 + assert provider.is_high_risk_tool("delete_pod") + assert provider.is_high_risk_tool("DELETE_POD") # 大寫也應該匹配 + assert provider.is_high_risk_tool("delete_deployment") + assert provider.is_high_risk_tool("scale_to_zero") + assert provider.is_high_risk_tool("drain_node") + + # 非高風險操作 + assert not provider.is_high_risk_tool("restart_pod") + assert not provider.is_high_risk_tool("scale_deployment") + assert not provider.is_high_risk_tool("get_logs") + + def test_filter_high_risk_tools(self): + """測試過濾高風險 Tool Calls""" + provider = NvidiaProvider() + + tool_calls = [ + ToolCallValidationResult( + valid=True, + tool_name="restart_pod", + arguments={"pod": "api"}, + ), + ToolCallValidationResult( + valid=True, + tool_name="delete_pod", + arguments={"pod": "test"}, + ), + ToolCallValidationResult( + valid=False, + tool_name="invalid_tool", + error="Parse error", + ), + ] + + high_risk = provider.get_high_risk_tools(tool_calls) + + assert len(high_risk) == 1 + assert high_risk[0].tool_name == "delete_pod" + + def test_validate_tool_calls(self): + """測試 Tool Call 驗證""" + provider = NvidiaProvider() + + # 建立模擬回應 + response = NvidiaResponse( + id="resp_123", + created=1234567890, + model="nvidia/llama-3.1-nemotron-70b-instruct", + choices=[ + NvidiaChoice( + index=0, + message=NvidiaMessage( + role="assistant", + tool_calls=[ + ToolCall( + id="call_1", + function=ToolFunction( + name="restart_pod", + arguments='{"pod_name": "api", "namespace": "default"}', + ), + ), + ToolCall( + id="call_2", + function=ToolFunction( + name="invalid_tool", + arguments="not valid json{", # 無效 JSON + ), + ), + ], + ), + ) + ], + ) + + results = provider._validate_tool_calls(response) + + assert len(results) == 2 + assert results[0].valid + assert results[0].tool_name == "restart_pod" + assert results[0].arguments["pod_name"] == "api" + assert not results[1].valid + assert "JSON" in results[1].error + + +class TestCreateToolDefinition: + """測試 Tool 定義建立函數""" + + def test_create_tool_definition(self): + """測試建立 Tool 定義""" + definition = create_tool_definition( + name="scale_deployment", + description="Scale a Kubernetes deployment", + parameters={ + "type": "object", + "properties": { + "deployment": {"type": "string"}, + "replicas": {"type": "integer"}, + }, + "required": ["deployment", "replicas"], + }, + ) + + assert definition.type == "function" + assert definition.function["name"] == "scale_deployment" + assert definition.function["description"] == "Scale a Kubernetes deployment" + assert "replicas" in definition.function["parameters"]["properties"] + + +class TestHighRiskTools: + """測試高風險 Tool 清單""" + + def test_high_risk_tools_list(self): + """確認高風險 Tool 清單包含所有必要操作""" + assert "delete_pod" in HIGH_RISK_TOOLS + assert "delete_deployment" in HIGH_RISK_TOOLS + assert "delete_namespace" in HIGH_RISK_TOOLS + assert "scale_to_zero" in HIGH_RISK_TOOLS + assert "drain_node" in HIGH_RISK_TOOLS + assert "cordon_node" in HIGH_RISK_TOOLS + + def test_restart_not_high_risk(self): + """確認 restart 不在高風險清單""" + assert "restart_pod" not in HIGH_RISK_TOOLS + assert "restart_deployment" not in HIGH_RISK_TOOLS + + +class TestAIRouterNvidiaIntegration: + """測試 AIRouter NVIDIA 整合""" + + def test_nvidia_provider_in_router(self): + """測試 AIProvider 包含 NVIDIA""" + from src.services.ai_router import AIProvider + + assert hasattr(AIProvider, "NVIDIA") + assert AIProvider.NVIDIA.value == "nvidia" + + def test_tool_calling_route(self): + """測試 Tool Calling 路由""" + from src.services.ai_router import get_ai_router, AIProvider, reset_ai_router + + reset_ai_router() + router = get_ai_router() + + provider, model, fallback_chain = router.route_tool_calling() + + assert provider == AIProvider.NVIDIA + assert "nvidia" in model.lower() or "nemotron" in model.lower() + # Fallback 應該包含 Gemini 和 Claude + fallback_providers = [p for p, _ in fallback_chain] + assert AIProvider.GEMINI in fallback_providers + assert AIProvider.CLAUDE in fallback_providers + + reset_ai_router() + + def test_existing_routing_not_affected(self): + """測試現有路由規則不受影響""" + from src.services.ai_router import get_ai_router, AIProvider, reset_ai_router + + reset_ai_router() + router = get_ai_router() + + # 測試同步路由 (不涉及 NVIDIA) + decision = router.route_sync("重啟 api pod") + + # 應該還是使用 Ollama (低複雜度) + assert decision.selected_provider in [ + AIProvider.OLLAMA, + AIProvider.GEMINI, + AIProvider.CLAUDE, + ] + # NVIDIA 不應該出現在一般路由中 + assert decision.selected_provider != AIProvider.NVIDIA + + reset_ai_router() diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 45d3ba79..168aa2e2 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -5,12 +5,12 @@ --- -## 📍 當前狀態 (2026-03-28 23:50 台北) +## 📍 當前狀態 (2026-03-29 03:30 台北) | 項目 | 狀態 | |------|------| -| **當前 Phase** | ✅ **K3s 首席架構師完整審查 (99% EXCEPTIONAL)** | -| **Day** | Day 11 | +| **當前 Phase** | ✅ **Phase 20 Nemotron Tool Calling (Phase A 完成)** | +| **Day** | Day 12 | | **K3s 版本** | v1.34.5+k3s1 (mon + mon1) | | **叢集健康** | ✅ **所有 Pod 正常運行** | | **K3s 優化** | ✅ **全部完成 + P2/P3 + PSS** | @@ -49,6 +49,33 @@ --- +### ✅ 2026-03-29 Phase 20 Nemotron Phase A 完成 (Day 12 03:30) 🆕 + +| 項目 | 內容 | 狀態 | +|------|------|------| +| **ADR-036** | Nemotron Tool Calling 整合 | ✅ 已建立 | +| **Phase A 實作** | NvidiaProvider 完整實作 | ✅ **已完成** | +| **測試驗證** | tests/test_nvidia_provider.py | ✅ **15/15 PASSED** | +| **整合** | ai_router + ai_rate_limiter + models.json | ✅ **已整合** | + +**新建檔案**: +- `src/models/nvidia.py` - Pydantic Schema +- `src/services/nvidia_provider.py` - NvidiaProvider 類別 +- `tests/test_nvidia_provider.py` - 15 項單元測試 + +**已修改**: +- `src/core/config.py` - NVIDIA_API_KEY +- `src/services/ai_router.py` - AIProvider.NVIDIA + route_tool_calling() +- `src/services/ai_rate_limiter.py` - NVIDIA 限制 +- `apps/api/models.json` - NVIDIA 配置 + +**待統帥執行**: +```bash +gh secret set NVIDIA_API_KEY --body "nvapi-..." +``` + +--- + ### 🏛️ 2026-03-28 首席架構師完整審查 (Day 11 23:50) | 審查項目 | 評分 | 說明 | diff --git a/docs/architecture/ARCHITECTURE.md b/docs/architecture/ARCHITECTURE.md index 3a2ffa73..67e58b2d 100644 --- a/docs/architecture/ARCHITECTURE.md +++ b/docs/architecture/ARCHITECTURE.md @@ -60,15 +60,36 @@ Elapsed: 28.71ms (< 50ms 目標) Method: httpx_native ``` -## 五主機架構 +## 五主機架構 (2026-03-28 K-HA 更新) | 主機 | IP | 角色 | 服務 | |-----|-----|------|------| -| DevOps | 192.168.0.110 | CI/CD | Harbor, GH Runner | -| Security | 192.168.0.112 | 安全掃描 | Kali Scanner | -| K3s Master | 192.168.0.120 | 容器編排 | K3s API Server | -| K3s Worker | 192.168.0.121 | 工作負載 | App Pods | -| AI+Web | 192.168.0.188 | AI/DB/Web | Ollama, PostgreSQL, Redis, SignOz | +| DevOps | 192.168.0.110 | CI/CD + 監控 | Harbor:5000, GH Runner, Sentry:9000, Langfuse:3100 | +| Security | 192.168.0.112 | 安全掃描 | Kali Scanner:8080 | +| K3s Server #1 | 192.168.0.120 | **Control-Plane MASTER** | keepalived (priority=101) | +| K3s Server #2 | 192.168.0.121 | **Control-Plane BACKUP** | keepalived (priority=100) | +| **VIP** | **192.168.0.125** | **HA Endpoint** | **K3s API:6443 + NodePort (32334/32335)** | +| AI+Web | 192.168.0.188 | AI/DB/Web | Ollama:11434, OpenClaw:8089, **PostgreSQL:5432 (K3s Datastore)**, Redis:6380, SignOz:3301 | + +## K3s 高可用架構 (ADR-033) + +``` + VIP 192.168.0.125 (keepalived) + ↓ + ┌──────────────┼──────────────┐ + ▼ ▼ ▼ + mon(120) mon1(121) PostgreSQL(188) + K3s MASTER K3s BACKUP K3s Datastore + priority=101 priority=100 (Kine protocol) +``` + +### 關鍵變更 (2026-03-28) + +- ✅ 雙 Control-Plane (120+121) +- ✅ 外接 PostgreSQL Datastore (188:5432) +- ✅ VIP 192.168.0.125 (keepalived VRRP) +- ✅ CI/CD kubeconfig 指向 VIP +- ✅ 故障轉移 < 3 秒 ## SignOz 整合架構 @@ -93,8 +114,9 @@ Method: httpx_native └─────────────────────────────────────────────┘ ``` -## AI Fallback 策略 (ADR-006) +## AI Fallback 策略 (ADR-006 v1.3 + ADR-036) +### 一般對話任務 ``` Ollama (local) → Gemini (cloud) → Claude (cloud) → mock_fallback ↓ ↓ ↓ ↓ @@ -102,6 +124,24 @@ Ollama (local) → Gemini (cloud) → Claude (cloud) → mock_fallback 188:11434 API Key API Key 無 LLM ``` +### Tool Calling 任務 (ADR-036 - 待批准) +``` +Nemotron (cloud) → Gemini (cloud) → Claude (cloud) → 拒絕執行 + ↓ ↓ ↓ + 免費 tier $0.001/1K $0.003/1K + 精準度 83% API Key API Key + 延遲 11-45s +``` + +### Provider 對照表 + +| Provider | 用途 | 延遲 | 精準度 | 成本 | +|----------|------|------|--------|------| +| **Ollama** | 即時對話、簡單查詢 | < 5s | 中 | $0 | +| **Nemotron** | Tool Calling、K8s 操作 | 11-45s | 高 (83%) | 免費 tier | +| **Gemini** | 通用備援 | 2-5s | 中高 | 低 | +| **Claude** | 複雜推理、CRITICAL | 2-5s | 最高 | 高 | + ## Phase 7: 視覺主權組件 ### 已完成組件