feat(ai): ADR-036 NVIDIA Nemotron Tool Calling 整合
Phase 20 - 提升 Tool Calling 精準度 50% → 83.3% 新增: - src/models/nvidia.py: Pydantic Schema - src/services/nvidia_provider.py: NvidiaProvider 類別 - tests/test_nvidia_provider.py: 15 項單元測試 (全部通過) 修改: - ai_router.py: AIProvider.NVIDIA + route_tool_calling() - ai_rate_limiter.py: NVIDIA 限制 (5 RPM, 100/day) - models.json: NVIDIA 配置 - cd.yaml: Secrets 注入 NVIDIA_API_KEY 路由策略: - Tool Calling: Nemotron → Gemini → Claude - 一般對話: Ollama → Gemini → Claude (不變) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
7
.github/workflows/cd.yaml
vendored
7
.github/workflows/cd.yaml
vendored
@@ -287,15 +287,18 @@ jobs:
|
||||
--from-literal=REDIS_URL="${{ secrets.REDIS_URL }}" \
|
||||
--from-literal=GEMINI_API_KEY="${{ secrets.GEMINI_API_KEY }}" \
|
||||
--from-literal=CLAUDE_API_KEY="${{ secrets.CLAUDE_API_KEY }}" \
|
||||
--from-literal=NVIDIA_API_KEY="${{ secrets.NVIDIA_API_KEY }}" \
|
||||
--from-literal=WEBHOOK_HMAC_SECRET="${{ secrets.WEBHOOK_HMAC_SECRET }}" \
|
||||
--from-literal=SENTRY_DSN="${{ secrets.SENTRY_DSN }}"
|
||||
else
|
||||
echo "🔄 更新 awoooi-secrets..."
|
||||
# 使用 patch 更新,確保 Telegram 配置永遠是最新的
|
||||
# 使用 patch 更新,確保關鍵配置永遠是最新的
|
||||
# 2026-03-29 ogt: ADR-036 新增 NVIDIA_API_KEY
|
||||
kubectl patch secret awoooi-secrets -n awoooi-prod --type='merge' -p="{
|
||||
\"stringData\": {
|
||||
\"OPENCLAW_TG_BOT_TOKEN\": \"${{ secrets.OPENCLAW_TG_BOT_TOKEN }}\",
|
||||
\"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\"
|
||||
\"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\",
|
||||
\"NVIDIA_API_KEY\": \"${{ secrets.NVIDIA_API_KEY }}\"
|
||||
}
|
||||
}"
|
||||
fi
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"name": "OpenClaw AI Router Configuration",
|
||||
"version": "1.0.0",
|
||||
"description": "AI 模型路由與備援設定 (ADR-006)",
|
||||
"updated_at": "2026-03-26",
|
||||
"version": "1.1.0",
|
||||
"description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)",
|
||||
"updated_at": "2026-03-29",
|
||||
|
||||
"default_provider": "ollama",
|
||||
"fallback_order": ["ollama", "gemini", "claude"],
|
||||
"tool_calling_fallback_order": ["nvidia", "gemini", "claude"],
|
||||
|
||||
"providers": {
|
||||
"ollama": {
|
||||
@@ -101,6 +102,44 @@
|
||||
"tool_use": true,
|
||||
"structured_output": true
|
||||
}
|
||||
},
|
||||
|
||||
"nvidia": {
|
||||
"name": "NVIDIA Nemotron (ADR-036)",
|
||||
"enabled": true,
|
||||
"priority": 4,
|
||||
"endpoint": "https://integrate.api.nvidia.com/v1",
|
||||
"api_path": "/chat/completions",
|
||||
"models": {
|
||||
"default": "nvidia/llama-3.1-nemotron-70b-instruct",
|
||||
"tool_calling": "nvidia/llama-3.1-nemotron-70b-instruct"
|
||||
},
|
||||
"options": {
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 1024
|
||||
},
|
||||
"timeout_seconds": 60,
|
||||
"cost": {
|
||||
"per_1k_tokens": 0,
|
||||
"currency": "USD",
|
||||
"notes": "Free tier (2026-03-29)"
|
||||
},
|
||||
"auth": {
|
||||
"type": "header",
|
||||
"env_var": "NVIDIA_API_KEY",
|
||||
"header_name": "Authorization",
|
||||
"header_prefix": "Bearer "
|
||||
},
|
||||
"rate_limits": {
|
||||
"daily_tokens": 50000,
|
||||
"requests_per_minute": 5
|
||||
},
|
||||
"features": {
|
||||
"tool_use": true,
|
||||
"structured_output": true,
|
||||
"tool_calling_accuracy": 0.833
|
||||
},
|
||||
"use_for": ["tool_calling"]
|
||||
}
|
||||
},
|
||||
|
||||
@@ -122,6 +161,14 @@
|
||||
"preferred_provider": "ollama",
|
||||
"fallback_enabled": false,
|
||||
"max_output_tokens": 500
|
||||
},
|
||||
"tool_calling": {
|
||||
"description": "K8s Tool Calling operations (ADR-036)",
|
||||
"preferred_provider": "nvidia",
|
||||
"fallback_enabled": true,
|
||||
"fallback_order": ["gemini", "claude"],
|
||||
"required_features": ["tool_use"],
|
||||
"notes": "Nemotron 83.3% accuracy for K8s operations"
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -201,8 +201,9 @@ class Settings(BaseSettings):
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# AI Fallback Strategy (ADR-006)
|
||||
# AI Fallback Strategy (ADR-006 v1.3 + ADR-036)
|
||||
# Order: Ollama (local) -> Gemini (cloud) -> Claude (cloud)
|
||||
# Tool Calling: Nemotron (專用) -> Gemini -> Claude
|
||||
# ==========================================================================
|
||||
AI_FALLBACK_ORDER: list[str] = Field(
|
||||
default=["ollama", "gemini", "claude"],
|
||||
@@ -210,6 +211,11 @@ class Settings(BaseSettings):
|
||||
)
|
||||
GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key")
|
||||
CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key")
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
|
||||
NVIDIA_API_KEY: str = Field(
|
||||
default="",
|
||||
description="NVIDIA NIM API key for Nemotron Tool Calling (ADR-036)",
|
||||
)
|
||||
|
||||
@field_validator("AI_FALLBACK_ORDER", mode="before")
|
||||
@classmethod
|
||||
|
||||
@@ -6,6 +6,7 @@ AWOOOI Models Package
|
||||
- Approval: 簽核相關模型 (Phase 2 HITL)
|
||||
- Incident: 事件相關模型 (Phase 6 認知覺醒)
|
||||
- AI: AI 相關模型
|
||||
- NVIDIA: Nemotron Tool Calling 模型 (ADR-036)
|
||||
"""
|
||||
|
||||
# Approval Models (Phase 2)
|
||||
@@ -39,6 +40,16 @@ from src.models.incident import (
|
||||
Signal,
|
||||
)
|
||||
|
||||
# NVIDIA Models (ADR-036 - Nemotron Tool Calling)
|
||||
from src.models.nvidia import (
|
||||
NvidiaProviderResult,
|
||||
NvidiaResponse,
|
||||
NvidiaUsage,
|
||||
ToolCall,
|
||||
ToolCallValidationResult,
|
||||
ToolDefinition,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Approval
|
||||
"ApprovalRequest",
|
||||
@@ -65,4 +76,11 @@ __all__ = [
|
||||
"IncidentUpdate",
|
||||
"Severity",
|
||||
"Signal",
|
||||
# NVIDIA (ADR-036)
|
||||
"NvidiaProviderResult",
|
||||
"NvidiaResponse",
|
||||
"NvidiaUsage",
|
||||
"ToolCall",
|
||||
"ToolCallValidationResult",
|
||||
"ToolDefinition",
|
||||
]
|
||||
|
||||
119
apps/api/src/models/nvidia.py
Normal file
119
apps/api/src/models/nvidia.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""
|
||||
NVIDIA Nemotron API Models - ADR-036
|
||||
====================================
|
||||
2026-03-29 ogt: Nemotron Tool Calling 整合 (83.3% 精準度)
|
||||
|
||||
OpenAI 相容格式 - 用於 Tool Calling 任務
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ToolFunction(BaseModel):
|
||||
"""Tool Function 定義"""
|
||||
|
||||
name: str = Field(..., description="Tool 函數名稱")
|
||||
arguments: str = Field(..., description="Tool 參數 (JSON 字串)")
|
||||
|
||||
|
||||
class ToolCall(BaseModel):
|
||||
"""Tool Call 結構"""
|
||||
|
||||
id: str = Field(..., description="Tool Call ID")
|
||||
type: str = Field(default="function", description="Tool 類型")
|
||||
function: ToolFunction = Field(..., description="Tool 函數")
|
||||
|
||||
|
||||
class NvidiaMessage(BaseModel):
|
||||
"""NVIDIA API Message 結構"""
|
||||
|
||||
role: str = Field(..., description="訊息角色 (assistant/user/system)")
|
||||
content: str | None = Field(default=None, description="訊息內容")
|
||||
tool_calls: list[ToolCall] | None = Field(
|
||||
default=None, description="Tool Calls (僅 assistant)"
|
||||
)
|
||||
|
||||
|
||||
class NvidiaChoice(BaseModel):
|
||||
"""NVIDIA API Choice 結構"""
|
||||
|
||||
index: int = Field(default=0, description="選項索引")
|
||||
message: NvidiaMessage = Field(..., description="回應訊息")
|
||||
finish_reason: str | None = Field(
|
||||
default=None, description="結束原因 (stop/tool_calls)"
|
||||
)
|
||||
|
||||
|
||||
class NvidiaUsage(BaseModel):
|
||||
"""NVIDIA API Token 使用統計"""
|
||||
|
||||
prompt_tokens: int = Field(default=0, description="輸入 Token 數")
|
||||
completion_tokens: int = Field(default=0, description="輸出 Token 數")
|
||||
total_tokens: int = Field(default=0, description="總 Token 數")
|
||||
|
||||
|
||||
class NvidiaResponse(BaseModel):
|
||||
"""NVIDIA Nemotron API 完整回應"""
|
||||
|
||||
id: str = Field(..., description="回應 ID")
|
||||
object: str = Field(default="chat.completion", description="物件類型")
|
||||
created: int = Field(..., description="建立時間戳")
|
||||
model: str = Field(..., description="模型名稱")
|
||||
choices: list[NvidiaChoice] = Field(..., description="回應選項")
|
||||
usage: NvidiaUsage | None = Field(default=None, description="Token 使用統計")
|
||||
|
||||
|
||||
# === Tool Calling 請求結構 ===
|
||||
|
||||
|
||||
class ToolDefinition(BaseModel):
|
||||
"""Tool 定義 (發送給 API)"""
|
||||
|
||||
type: str = Field(default="function", description="Tool 類型")
|
||||
function: dict[str, Any] = Field(..., description="函數定義 (JSON Schema)")
|
||||
|
||||
|
||||
class NvidiaToolCallRequest(BaseModel):
|
||||
"""NVIDIA Tool Calling 請求"""
|
||||
|
||||
model: str = Field(
|
||||
default="nvidia/llama-3.1-nemotron-70b-instruct",
|
||||
description="模型名稱",
|
||||
)
|
||||
messages: list[dict[str, Any]] = Field(..., description="對話訊息")
|
||||
tools: list[ToolDefinition] = Field(..., description="可用 Tools")
|
||||
tool_choice: str | dict[str, Any] = Field(
|
||||
default="auto", description="Tool 選擇策略"
|
||||
)
|
||||
temperature: float = Field(default=0.0, description="溫度 (0.0 最確定性)")
|
||||
max_tokens: int = Field(default=1024, description="最大輸出 Token")
|
||||
|
||||
|
||||
# === 驗證結果結構 ===
|
||||
|
||||
|
||||
class ToolCallValidationResult(BaseModel):
|
||||
"""Tool Call 驗證結果"""
|
||||
|
||||
valid: bool = Field(..., description="是否有效")
|
||||
tool_name: str | None = Field(default=None, description="Tool 名稱")
|
||||
arguments: dict[str, Any] | None = Field(default=None, description="解析後參數")
|
||||
error: str | None = Field(default=None, description="錯誤訊息")
|
||||
raw_response: str | None = Field(default=None, description="原始回應 (debug)")
|
||||
|
||||
|
||||
class NvidiaProviderResult(BaseModel):
|
||||
"""NvidiaProvider 回傳結果"""
|
||||
|
||||
success: bool = Field(..., description="是否成功")
|
||||
tool_calls: list[ToolCallValidationResult] = Field(
|
||||
default_factory=list, description="驗證後的 Tool Calls"
|
||||
)
|
||||
usage: NvidiaUsage | None = Field(default=None, description="Token 使用統計")
|
||||
latency_ms: float = Field(default=0.0, description="延遲 (毫秒)")
|
||||
error: str | None = Field(default=None, description="錯誤訊息")
|
||||
fallback_triggered: bool = Field(
|
||||
default=False, description="是否觸發 Fallback"
|
||||
)
|
||||
@@ -37,6 +37,12 @@ RATE_LIMITS = {
|
||||
"daily_requests": 200,
|
||||
"daily_tokens": 50_000,
|
||||
},
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (免費 Tier)
|
||||
"nvidia": {
|
||||
"rpm": 5, # 每分鐘請求數 (延遲較高,控制併發)
|
||||
"daily_requests": 100, # 每日請求數 (免費 Tier 限制)
|
||||
"daily_tokens": 50_000, # 每日 Token 數
|
||||
},
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -52,6 +58,11 @@ COST_LIMITS = {
|
||||
"total_cost_usd": 10.0,
|
||||
"alert_threshold_usd": 8.0,
|
||||
},
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron (免費 Tier,設定低限制作為監控)
|
||||
"nvidia": {
|
||||
"total_cost_usd": 0.0, # 免費 Tier,不計費
|
||||
"alert_threshold_usd": 0.0, # 不發送成本告警
|
||||
},
|
||||
}
|
||||
|
||||
# Gemini 1.5 Flash 定價 (per token)
|
||||
|
||||
@@ -66,6 +66,8 @@ class AIProvider(Enum):
|
||||
OLLAMA = "ollama"
|
||||
GEMINI = "gemini"
|
||||
CLAUDE = "claude"
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (83.3% 精準度)
|
||||
NVIDIA = "nvidia"
|
||||
|
||||
|
||||
# Provider 對應延遲預算 (ms)
|
||||
@@ -73,6 +75,8 @@ PROVIDER_LATENCY_BUDGET: dict[AIProvider, int] = {
|
||||
AIProvider.OLLAMA: 60000, # 本地,允許較長處理時間
|
||||
AIProvider.GEMINI: 30000, # 雲端,較低延遲
|
||||
AIProvider.CLAUDE: 30000, # 雲端,較低延遲
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s)
|
||||
AIProvider.NVIDIA: 60000, # Tool Calling 專用,允許較長時間
|
||||
}
|
||||
|
||||
|
||||
@@ -164,21 +168,32 @@ class AIRouter:
|
||||
self._ollama_summary = self._model_registry.get_model("ollama", "summary")
|
||||
self._gemini_default = self._model_registry.get_model("gemini", "default")
|
||||
self._claude_default = self._model_registry.get_model("claude", "default")
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling
|
||||
self._nvidia_default = self._model_registry.get_model("nvidia", "default")
|
||||
|
||||
# Provider 對應模型映射
|
||||
self._provider_models: dict[AIProvider, str] = {
|
||||
AIProvider.OLLAMA: self._ollama_default,
|
||||
AIProvider.GEMINI: self._gemini_default,
|
||||
AIProvider.CLAUDE: self._claude_default,
|
||||
AIProvider.NVIDIA: self._nvidia_default, # ADR-036
|
||||
}
|
||||
|
||||
# 完整 Fallback 鏈 (Provider, Model)
|
||||
# 2026-03-29 ogt: NVIDIA 不在一般 Fallback 鏈 (僅用於 Tool Calling)
|
||||
self._full_fallback_chain: list[tuple[AIProvider, str]] = [
|
||||
(AIProvider.OLLAMA, self._ollama_default),
|
||||
(AIProvider.GEMINI, self._gemini_default),
|
||||
(AIProvider.CLAUDE, self._claude_default),
|
||||
]
|
||||
|
||||
# Tool Calling 專用 Fallback 鏈 (ADR-036)
|
||||
self._tool_calling_fallback_chain: list[tuple[AIProvider, str]] = [
|
||||
(AIProvider.NVIDIA, self._nvidia_default),
|
||||
(AIProvider.GEMINI, self._gemini_default),
|
||||
(AIProvider.CLAUDE, self._claude_default),
|
||||
]
|
||||
|
||||
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
|
||||
self._intent_provider_overrides: dict[IntentType, AIProvider | None] = {
|
||||
# 四大核心意圖
|
||||
@@ -466,6 +481,39 @@ class AIRouter:
|
||||
routing_latency_ms=routing_latency,
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Tool Calling 路由 (ADR-036)
|
||||
# =========================================================================
|
||||
|
||||
def route_tool_calling(self) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]:
|
||||
"""
|
||||
Tool Calling 專用路由 (ADR-036)
|
||||
|
||||
Tool Calling 任務優先使用 Nemotron (83.3% 精準度),
|
||||
Fallback 到 Gemini/Claude。
|
||||
|
||||
Returns:
|
||||
(provider, model, fallback_chain)
|
||||
"""
|
||||
provider = AIProvider.NVIDIA
|
||||
model = self._nvidia_default
|
||||
fallback_chain = [
|
||||
(p, m) for p, m in self._tool_calling_fallback_chain if p != provider
|
||||
]
|
||||
|
||||
logger.info(
|
||||
"tool_calling_routing",
|
||||
provider=provider.value,
|
||||
model=model,
|
||||
fallback_count=len(fallback_chain),
|
||||
)
|
||||
|
||||
return provider, model, fallback_chain
|
||||
|
||||
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProvider, str]]:
|
||||
"""取得 Tool Calling Fallback 鏈"""
|
||||
return self._tool_calling_fallback_chain.copy()
|
||||
|
||||
# =========================================================================
|
||||
# 便捷方法
|
||||
# =========================================================================
|
||||
|
||||
432
apps/api/src/services/nvidia_provider.py
Normal file
432
apps/api/src/services/nvidia_provider.py
Normal file
@@ -0,0 +1,432 @@
|
||||
"""
|
||||
NVIDIA Nemotron Provider - ADR-036
|
||||
==================================
|
||||
2026-03-29 ogt: Nemotron Tool Calling 整合 (83.3% 精準度)
|
||||
|
||||
專門處理 Tool Calling 任務,提供高精準度的 K8s 操作決策。
|
||||
|
||||
設計原則:
|
||||
1. OpenAI 相容格式 - 與 Nemotron API 對接
|
||||
2. Pydantic 強制驗證 - 所有回應必須通過 Schema 驗證
|
||||
3. Fallback 機制 - 失敗時降級到 Gemini/Claude
|
||||
4. HITL 高風險保護 - DELETE 等操作需人工審核
|
||||
|
||||
版本: v1.0
|
||||
建立: 2026-03-29 (台北時區)
|
||||
建立者: Claude Code
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
from src.core.config import get_settings
|
||||
from src.models.nvidia import (
|
||||
NvidiaProviderResult,
|
||||
NvidiaResponse,
|
||||
NvidiaUsage,
|
||||
ToolCallValidationResult,
|
||||
ToolDefinition,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
# =============================================================================
|
||||
# 常量定義
|
||||
# =============================================================================
|
||||
|
||||
# NVIDIA NIM API Endpoint
|
||||
NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
||||
|
||||
# 預設模型
|
||||
NVIDIA_DEFAULT_MODEL = "nvidia/llama-3.1-nemotron-70b-instruct"
|
||||
|
||||
# 請求超時 (秒) - Nemotron 延遲 11-45s
|
||||
NVIDIA_TIMEOUT = 60.0
|
||||
|
||||
# 重試次數
|
||||
MAX_RETRIES = 2
|
||||
|
||||
# 高風險 Tool 清單 (需要 HITL 審核)
|
||||
HIGH_RISK_TOOLS: set[str] = {
|
||||
"delete_pod",
|
||||
"delete_deployment",
|
||||
"delete_namespace",
|
||||
"delete_service",
|
||||
"delete_configmap",
|
||||
"delete_secret",
|
||||
"scale_to_zero",
|
||||
"drain_node",
|
||||
"cordon_node",
|
||||
"delete_pvc",
|
||||
"delete_pv",
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NvidiaProvider 類別
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class NvidiaProvider:
|
||||
"""
|
||||
NVIDIA Nemotron Provider
|
||||
|
||||
專門處理 Tool Calling 任務,提供 83.3% 精準度的 K8s 操作決策。
|
||||
|
||||
使用方式:
|
||||
```python
|
||||
provider = NvidiaProvider()
|
||||
result = await provider.tool_call(
|
||||
messages=[{"role": "user", "content": "重啟 awoooi-api pod"}],
|
||||
tools=[restart_tool, scale_tool],
|
||||
)
|
||||
if result.success:
|
||||
for tc in result.tool_calls:
|
||||
if tc.valid:
|
||||
execute_tool(tc.tool_name, tc.arguments)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
"""
|
||||
初始化 NvidiaProvider
|
||||
|
||||
Args:
|
||||
api_key: NVIDIA API Key (預設從 settings 取得)
|
||||
"""
|
||||
self._api_key = api_key or settings.NVIDIA_API_KEY
|
||||
self._client: httpx.AsyncClient | None = None
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
"""取得或建立 HTTP Client"""
|
||||
if self._client is None or self._client.is_closed:
|
||||
self._client = httpx.AsyncClient(
|
||||
timeout=httpx.Timeout(NVIDIA_TIMEOUT, connect=10.0),
|
||||
limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
|
||||
)
|
||||
return self._client
|
||||
|
||||
async def close(self) -> None:
|
||||
"""關閉 HTTP Client"""
|
||||
if self._client and not self._client.is_closed:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
async def tool_call(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[ToolDefinition | dict[str, Any]],
|
||||
model: str = NVIDIA_DEFAULT_MODEL,
|
||||
temperature: float = 0.0,
|
||||
max_tokens: int = 1024,
|
||||
) -> NvidiaProviderResult:
|
||||
"""
|
||||
執行 Tool Calling 請求
|
||||
|
||||
Args:
|
||||
messages: 對話訊息列表
|
||||
tools: 可用 Tool 定義列表
|
||||
model: 模型名稱
|
||||
temperature: 溫度 (0.0 最確定性)
|
||||
max_tokens: 最大輸出 Token
|
||||
|
||||
Returns:
|
||||
NvidiaProviderResult: 包含驗證後的 Tool Calls
|
||||
"""
|
||||
start_time = time.perf_counter()
|
||||
|
||||
# 檢查 API Key
|
||||
if not self._api_key:
|
||||
return NvidiaProviderResult(
|
||||
success=False,
|
||||
error="NVIDIA_API_KEY 未設定",
|
||||
fallback_triggered=True,
|
||||
)
|
||||
|
||||
# 轉換 tools 為 dict 格式
|
||||
tools_data = []
|
||||
for tool in tools:
|
||||
if isinstance(tool, ToolDefinition):
|
||||
tools_data.append(tool.model_dump())
|
||||
else:
|
||||
tools_data.append(tool)
|
||||
|
||||
# 建立請求
|
||||
request_body = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"tools": tools_data,
|
||||
"tool_choice": "auto",
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
|
||||
# 執行請求 (含重試)
|
||||
response_data: dict | None = None
|
||||
last_error: str | None = None
|
||||
|
||||
for attempt in range(MAX_RETRIES + 1):
|
||||
try:
|
||||
response_data = await self._send_request(request_body)
|
||||
break
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
logger.warning(
|
||||
"nvidia_request_retry",
|
||||
attempt=attempt + 1,
|
||||
max_retries=MAX_RETRIES,
|
||||
error=last_error,
|
||||
)
|
||||
if attempt == MAX_RETRIES:
|
||||
break
|
||||
|
||||
latency_ms = (time.perf_counter() - start_time) * 1000
|
||||
|
||||
# 請求失敗
|
||||
if response_data is None:
|
||||
logger.error(
|
||||
"nvidia_request_failed",
|
||||
error=last_error,
|
||||
latency_ms=round(latency_ms, 2),
|
||||
)
|
||||
return NvidiaProviderResult(
|
||||
success=False,
|
||||
error=last_error,
|
||||
latency_ms=latency_ms,
|
||||
fallback_triggered=True,
|
||||
)
|
||||
|
||||
# 解析回應
|
||||
try:
|
||||
nvidia_response = NvidiaResponse.model_validate(response_data)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"nvidia_response_parse_failed",
|
||||
error=str(e),
|
||||
raw_response=str(response_data)[:500],
|
||||
)
|
||||
return NvidiaProviderResult(
|
||||
success=False,
|
||||
error=f"回應解析失敗: {e}",
|
||||
latency_ms=latency_ms,
|
||||
fallback_triggered=True,
|
||||
)
|
||||
|
||||
# 驗證 Tool Calls
|
||||
tool_calls = self._validate_tool_calls(nvidia_response)
|
||||
|
||||
# 統計
|
||||
usage = nvidia_response.usage
|
||||
|
||||
logger.info(
|
||||
"nvidia_tool_call_completed",
|
||||
success=True,
|
||||
tool_call_count=len(tool_calls),
|
||||
valid_count=sum(1 for tc in tool_calls if tc.valid),
|
||||
latency_ms=round(latency_ms, 2),
|
||||
prompt_tokens=usage.prompt_tokens if usage else 0,
|
||||
completion_tokens=usage.completion_tokens if usage else 0,
|
||||
)
|
||||
|
||||
return NvidiaProviderResult(
|
||||
success=True,
|
||||
tool_calls=tool_calls,
|
||||
usage=usage,
|
||||
latency_ms=latency_ms,
|
||||
fallback_triggered=False,
|
||||
)
|
||||
|
||||
async def _send_request(self, request_body: dict) -> dict:
|
||||
"""
|
||||
發送 HTTP 請求到 NVIDIA API
|
||||
|
||||
Args:
|
||||
request_body: 請求內容
|
||||
|
||||
Returns:
|
||||
API 回應 (dict)
|
||||
|
||||
Raises:
|
||||
Exception: 請求失敗
|
||||
"""
|
||||
client = await self._get_client()
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
NVIDIA_API_URL,
|
||||
headers=headers,
|
||||
json=request_body,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text[:500]
|
||||
raise Exception(
|
||||
f"NVIDIA API 錯誤: {response.status_code} - {error_text}"
|
||||
)
|
||||
|
||||
return response.json()
|
||||
|
||||
def _validate_tool_calls(
|
||||
self, response: NvidiaResponse
|
||||
) -> list[ToolCallValidationResult]:
|
||||
"""
|
||||
驗證 Tool Calls
|
||||
|
||||
Args:
|
||||
response: NVIDIA API 回應
|
||||
|
||||
Returns:
|
||||
驗證後的 Tool Call 結果列表
|
||||
"""
|
||||
results: list[ToolCallValidationResult] = []
|
||||
|
||||
if not response.choices:
|
||||
return results
|
||||
|
||||
message = response.choices[0].message
|
||||
if not message.tool_calls:
|
||||
return results
|
||||
|
||||
for tc in message.tool_calls:
|
||||
try:
|
||||
# 解析 arguments JSON
|
||||
arguments = json.loads(tc.function.arguments)
|
||||
|
||||
results.append(
|
||||
ToolCallValidationResult(
|
||||
valid=True,
|
||||
tool_name=tc.function.name,
|
||||
arguments=arguments,
|
||||
)
|
||||
)
|
||||
except json.JSONDecodeError as e:
|
||||
results.append(
|
||||
ToolCallValidationResult(
|
||||
valid=False,
|
||||
tool_name=tc.function.name,
|
||||
error=f"Arguments JSON 解析失敗: {e}",
|
||||
raw_response=tc.function.arguments,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
results.append(
|
||||
ToolCallValidationResult(
|
||||
valid=False,
|
||||
error=f"驗證失敗: {e}",
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def is_high_risk_tool(self, tool_name: str) -> bool:
|
||||
"""
|
||||
檢查是否為高風險 Tool
|
||||
|
||||
Args:
|
||||
tool_name: Tool 名稱
|
||||
|
||||
Returns:
|
||||
是否需要 HITL 審核
|
||||
"""
|
||||
return tool_name.lower() in HIGH_RISK_TOOLS
|
||||
|
||||
def get_high_risk_tools(
|
||||
self, tool_calls: list[ToolCallValidationResult]
|
||||
) -> list[ToolCallValidationResult]:
|
||||
"""
|
||||
篩選高風險 Tool Calls
|
||||
|
||||
Args:
|
||||
tool_calls: Tool Call 結果列表
|
||||
|
||||
Returns:
|
||||
高風險 Tool Calls
|
||||
"""
|
||||
return [
|
||||
tc
|
||||
for tc in tool_calls
|
||||
if tc.valid and tc.tool_name and self.is_high_risk_tool(tc.tool_name)
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 單例與工廠函數
|
||||
# =============================================================================
|
||||
|
||||
_provider: NvidiaProvider | None = None
|
||||
|
||||
|
||||
def get_nvidia_provider() -> NvidiaProvider:
|
||||
"""取得 NvidiaProvider 單例"""
|
||||
global _provider
|
||||
if _provider is None:
|
||||
_provider = NvidiaProvider()
|
||||
return _provider
|
||||
|
||||
|
||||
def reset_nvidia_provider() -> None:
|
||||
"""重置單例 (用於測試)"""
|
||||
global _provider
|
||||
_provider = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 便捷函數
|
||||
# =============================================================================
|
||||
|
||||
|
||||
async def nvidia_tool_call(
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[ToolDefinition | dict[str, Any]],
|
||||
**kwargs,
|
||||
) -> NvidiaProviderResult:
|
||||
"""
|
||||
便捷函數: 執行 NVIDIA Tool Calling
|
||||
|
||||
Args:
|
||||
messages: 對話訊息列表
|
||||
tools: 可用 Tool 定義列表
|
||||
**kwargs: 其他參數 (model, temperature, max_tokens)
|
||||
|
||||
Returns:
|
||||
NvidiaProviderResult
|
||||
"""
|
||||
provider = get_nvidia_provider()
|
||||
return await provider.tool_call(messages, tools, **kwargs)
|
||||
|
||||
|
||||
def create_tool_definition(
|
||||
name: str,
|
||||
description: str,
|
||||
parameters: dict[str, Any],
|
||||
) -> ToolDefinition:
|
||||
"""
|
||||
建立 Tool 定義
|
||||
|
||||
Args:
|
||||
name: Tool 名稱
|
||||
description: Tool 描述
|
||||
parameters: JSON Schema 參數定義
|
||||
|
||||
Returns:
|
||||
ToolDefinition
|
||||
"""
|
||||
return ToolDefinition(
|
||||
type="function",
|
||||
function={
|
||||
"name": name,
|
||||
"description": description,
|
||||
"parameters": parameters,
|
||||
},
|
||||
)
|
||||
316
apps/api/tests/test_nvidia_provider.py
Normal file
316
apps/api/tests/test_nvidia_provider.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
NVIDIA Provider Tests - ADR-036
|
||||
===============================
|
||||
測試 Nemotron Tool Calling 整合
|
||||
|
||||
注意: 這些是單元測試,不需要真實的 NVIDIA API Key
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from src.models.nvidia import (
|
||||
NvidiaChoice,
|
||||
NvidiaMessage,
|
||||
NvidiaProviderResult,
|
||||
NvidiaResponse,
|
||||
NvidiaUsage,
|
||||
ToolCall,
|
||||
ToolCallValidationResult,
|
||||
ToolDefinition,
|
||||
ToolFunction,
|
||||
)
|
||||
from src.services.nvidia_provider import (
|
||||
HIGH_RISK_TOOLS,
|
||||
NvidiaProvider,
|
||||
create_tool_definition,
|
||||
get_nvidia_provider,
|
||||
reset_nvidia_provider,
|
||||
)
|
||||
|
||||
|
||||
class TestNvidiaModels:
|
||||
"""測試 NVIDIA Pydantic Models"""
|
||||
|
||||
def test_tool_function_model(self):
|
||||
"""測試 ToolFunction 模型"""
|
||||
func = ToolFunction(
|
||||
name="restart_pod",
|
||||
arguments='{"pod_name": "api-server", "namespace": "default"}',
|
||||
)
|
||||
assert func.name == "restart_pod"
|
||||
assert '"pod_name"' in func.arguments
|
||||
|
||||
def test_tool_call_model(self):
|
||||
"""測試 ToolCall 模型"""
|
||||
tc = ToolCall(
|
||||
id="call_123",
|
||||
type="function",
|
||||
function=ToolFunction(
|
||||
name="scale_deployment",
|
||||
arguments='{"replicas": 3}',
|
||||
),
|
||||
)
|
||||
assert tc.id == "call_123"
|
||||
assert tc.function.name == "scale_deployment"
|
||||
|
||||
def test_nvidia_response_model(self):
|
||||
"""測試 NvidiaResponse 模型"""
|
||||
response = NvidiaResponse(
|
||||
id="resp_123",
|
||||
created=1234567890,
|
||||
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
||||
choices=[
|
||||
NvidiaChoice(
|
||||
index=0,
|
||||
message=NvidiaMessage(
|
||||
role="assistant",
|
||||
content=None,
|
||||
tool_calls=[
|
||||
ToolCall(
|
||||
id="call_1",
|
||||
function=ToolFunction(
|
||||
name="restart_pod",
|
||||
arguments='{"pod": "api"}',
|
||||
),
|
||||
)
|
||||
],
|
||||
),
|
||||
finish_reason="tool_calls",
|
||||
)
|
||||
],
|
||||
usage=NvidiaUsage(
|
||||
prompt_tokens=100,
|
||||
completion_tokens=50,
|
||||
total_tokens=150,
|
||||
),
|
||||
)
|
||||
|
||||
assert response.id == "resp_123"
|
||||
assert len(response.choices) == 1
|
||||
assert response.choices[0].message.tool_calls is not None
|
||||
assert len(response.choices[0].message.tool_calls) == 1
|
||||
assert response.usage.total_tokens == 150
|
||||
|
||||
def test_tool_call_validation_result(self):
|
||||
"""測試 ToolCallValidationResult 模型"""
|
||||
result = ToolCallValidationResult(
|
||||
valid=True,
|
||||
tool_name="restart_pod",
|
||||
arguments={"pod_name": "api", "namespace": "default"},
|
||||
)
|
||||
assert result.valid
|
||||
assert result.tool_name == "restart_pod"
|
||||
assert result.arguments["namespace"] == "default"
|
||||
|
||||
def test_tool_definition(self):
|
||||
"""測試 ToolDefinition 模型"""
|
||||
definition = ToolDefinition(
|
||||
type="function",
|
||||
function={
|
||||
"name": "restart_pod",
|
||||
"description": "Restart a Kubernetes pod",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pod_name": {"type": "string"},
|
||||
"namespace": {"type": "string"},
|
||||
},
|
||||
"required": ["pod_name"],
|
||||
},
|
||||
},
|
||||
)
|
||||
assert definition.type == "function"
|
||||
assert definition.function["name"] == "restart_pod"
|
||||
|
||||
|
||||
class TestNvidiaProvider:
|
||||
"""測試 NvidiaProvider 類別"""
|
||||
|
||||
def test_singleton(self):
|
||||
"""測試單例模式"""
|
||||
reset_nvidia_provider()
|
||||
p1 = get_nvidia_provider()
|
||||
p2 = get_nvidia_provider()
|
||||
assert p1 is p2
|
||||
reset_nvidia_provider()
|
||||
|
||||
def test_high_risk_tool_detection(self):
|
||||
"""測試高風險 Tool 檢測"""
|
||||
provider = NvidiaProvider()
|
||||
|
||||
# 高風險操作
|
||||
assert provider.is_high_risk_tool("delete_pod")
|
||||
assert provider.is_high_risk_tool("DELETE_POD") # 大寫也應該匹配
|
||||
assert provider.is_high_risk_tool("delete_deployment")
|
||||
assert provider.is_high_risk_tool("scale_to_zero")
|
||||
assert provider.is_high_risk_tool("drain_node")
|
||||
|
||||
# 非高風險操作
|
||||
assert not provider.is_high_risk_tool("restart_pod")
|
||||
assert not provider.is_high_risk_tool("scale_deployment")
|
||||
assert not provider.is_high_risk_tool("get_logs")
|
||||
|
||||
def test_filter_high_risk_tools(self):
|
||||
"""測試過濾高風險 Tool Calls"""
|
||||
provider = NvidiaProvider()
|
||||
|
||||
tool_calls = [
|
||||
ToolCallValidationResult(
|
||||
valid=True,
|
||||
tool_name="restart_pod",
|
||||
arguments={"pod": "api"},
|
||||
),
|
||||
ToolCallValidationResult(
|
||||
valid=True,
|
||||
tool_name="delete_pod",
|
||||
arguments={"pod": "test"},
|
||||
),
|
||||
ToolCallValidationResult(
|
||||
valid=False,
|
||||
tool_name="invalid_tool",
|
||||
error="Parse error",
|
||||
),
|
||||
]
|
||||
|
||||
high_risk = provider.get_high_risk_tools(tool_calls)
|
||||
|
||||
assert len(high_risk) == 1
|
||||
assert high_risk[0].tool_name == "delete_pod"
|
||||
|
||||
def test_validate_tool_calls(self):
|
||||
"""測試 Tool Call 驗證"""
|
||||
provider = NvidiaProvider()
|
||||
|
||||
# 建立模擬回應
|
||||
response = NvidiaResponse(
|
||||
id="resp_123",
|
||||
created=1234567890,
|
||||
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
||||
choices=[
|
||||
NvidiaChoice(
|
||||
index=0,
|
||||
message=NvidiaMessage(
|
||||
role="assistant",
|
||||
tool_calls=[
|
||||
ToolCall(
|
||||
id="call_1",
|
||||
function=ToolFunction(
|
||||
name="restart_pod",
|
||||
arguments='{"pod_name": "api", "namespace": "default"}',
|
||||
),
|
||||
),
|
||||
ToolCall(
|
||||
id="call_2",
|
||||
function=ToolFunction(
|
||||
name="invalid_tool",
|
||||
arguments="not valid json{", # 無效 JSON
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
results = provider._validate_tool_calls(response)
|
||||
|
||||
assert len(results) == 2
|
||||
assert results[0].valid
|
||||
assert results[0].tool_name == "restart_pod"
|
||||
assert results[0].arguments["pod_name"] == "api"
|
||||
assert not results[1].valid
|
||||
assert "JSON" in results[1].error
|
||||
|
||||
|
||||
class TestCreateToolDefinition:
|
||||
"""測試 Tool 定義建立函數"""
|
||||
|
||||
def test_create_tool_definition(self):
|
||||
"""測試建立 Tool 定義"""
|
||||
definition = create_tool_definition(
|
||||
name="scale_deployment",
|
||||
description="Scale a Kubernetes deployment",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"deployment": {"type": "string"},
|
||||
"replicas": {"type": "integer"},
|
||||
},
|
||||
"required": ["deployment", "replicas"],
|
||||
},
|
||||
)
|
||||
|
||||
assert definition.type == "function"
|
||||
assert definition.function["name"] == "scale_deployment"
|
||||
assert definition.function["description"] == "Scale a Kubernetes deployment"
|
||||
assert "replicas" in definition.function["parameters"]["properties"]
|
||||
|
||||
|
||||
class TestHighRiskTools:
|
||||
"""測試高風險 Tool 清單"""
|
||||
|
||||
def test_high_risk_tools_list(self):
|
||||
"""確認高風險 Tool 清單包含所有必要操作"""
|
||||
assert "delete_pod" in HIGH_RISK_TOOLS
|
||||
assert "delete_deployment" in HIGH_RISK_TOOLS
|
||||
assert "delete_namespace" in HIGH_RISK_TOOLS
|
||||
assert "scale_to_zero" in HIGH_RISK_TOOLS
|
||||
assert "drain_node" in HIGH_RISK_TOOLS
|
||||
assert "cordon_node" in HIGH_RISK_TOOLS
|
||||
|
||||
def test_restart_not_high_risk(self):
|
||||
"""確認 restart 不在高風險清單"""
|
||||
assert "restart_pod" not in HIGH_RISK_TOOLS
|
||||
assert "restart_deployment" not in HIGH_RISK_TOOLS
|
||||
|
||||
|
||||
class TestAIRouterNvidiaIntegration:
|
||||
"""測試 AIRouter NVIDIA 整合"""
|
||||
|
||||
def test_nvidia_provider_in_router(self):
|
||||
"""測試 AIProvider 包含 NVIDIA"""
|
||||
from src.services.ai_router import AIProvider
|
||||
|
||||
assert hasattr(AIProvider, "NVIDIA")
|
||||
assert AIProvider.NVIDIA.value == "nvidia"
|
||||
|
||||
def test_tool_calling_route(self):
|
||||
"""測試 Tool Calling 路由"""
|
||||
from src.services.ai_router import get_ai_router, AIProvider, reset_ai_router
|
||||
|
||||
reset_ai_router()
|
||||
router = get_ai_router()
|
||||
|
||||
provider, model, fallback_chain = router.route_tool_calling()
|
||||
|
||||
assert provider == AIProvider.NVIDIA
|
||||
assert "nvidia" in model.lower() or "nemotron" in model.lower()
|
||||
# Fallback 應該包含 Gemini 和 Claude
|
||||
fallback_providers = [p for p, _ in fallback_chain]
|
||||
assert AIProvider.GEMINI in fallback_providers
|
||||
assert AIProvider.CLAUDE in fallback_providers
|
||||
|
||||
reset_ai_router()
|
||||
|
||||
def test_existing_routing_not_affected(self):
|
||||
"""測試現有路由規則不受影響"""
|
||||
from src.services.ai_router import get_ai_router, AIProvider, reset_ai_router
|
||||
|
||||
reset_ai_router()
|
||||
router = get_ai_router()
|
||||
|
||||
# 測試同步路由 (不涉及 NVIDIA)
|
||||
decision = router.route_sync("重啟 api pod")
|
||||
|
||||
# 應該還是使用 Ollama (低複雜度)
|
||||
assert decision.selected_provider in [
|
||||
AIProvider.OLLAMA,
|
||||
AIProvider.GEMINI,
|
||||
AIProvider.CLAUDE,
|
||||
]
|
||||
# NVIDIA 不應該出現在一般路由中
|
||||
assert decision.selected_provider != AIProvider.NVIDIA
|
||||
|
||||
reset_ai_router()
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
---
|
||||
|
||||
## 📍 當前狀態 (2026-03-28 23:50 台北)
|
||||
## 📍 當前狀態 (2026-03-29 03:30 台北)
|
||||
|
||||
| 項目 | 狀態 |
|
||||
|------|------|
|
||||
| **當前 Phase** | ✅ **K3s 首席架構師完整審查 (99% EXCEPTIONAL)** |
|
||||
| **Day** | Day 11 |
|
||||
| **當前 Phase** | ✅ **Phase 20 Nemotron Tool Calling (Phase A 完成)** |
|
||||
| **Day** | Day 12 |
|
||||
| **K3s 版本** | v1.34.5+k3s1 (mon + mon1) |
|
||||
| **叢集健康** | ✅ **所有 Pod 正常運行** |
|
||||
| **K3s 優化** | ✅ **全部完成 + P2/P3 + PSS** |
|
||||
@@ -49,6 +49,33 @@
|
||||
|
||||
---
|
||||
|
||||
### ✅ 2026-03-29 Phase 20 Nemotron Phase A 完成 (Day 12 03:30) 🆕
|
||||
|
||||
| 項目 | 內容 | 狀態 |
|
||||
|------|------|------|
|
||||
| **ADR-036** | Nemotron Tool Calling 整合 | ✅ 已建立 |
|
||||
| **Phase A 實作** | NvidiaProvider 完整實作 | ✅ **已完成** |
|
||||
| **測試驗證** | tests/test_nvidia_provider.py | ✅ **15/15 PASSED** |
|
||||
| **整合** | ai_router + ai_rate_limiter + models.json | ✅ **已整合** |
|
||||
|
||||
**新建檔案**:
|
||||
- `src/models/nvidia.py` - Pydantic Schema
|
||||
- `src/services/nvidia_provider.py` - NvidiaProvider 類別
|
||||
- `tests/test_nvidia_provider.py` - 15 項單元測試
|
||||
|
||||
**已修改**:
|
||||
- `src/core/config.py` - NVIDIA_API_KEY
|
||||
- `src/services/ai_router.py` - AIProvider.NVIDIA + route_tool_calling()
|
||||
- `src/services/ai_rate_limiter.py` - NVIDIA 限制
|
||||
- `apps/api/models.json` - NVIDIA 配置
|
||||
|
||||
**待統帥執行**:
|
||||
```bash
|
||||
gh secret set NVIDIA_API_KEY --body "nvapi-..."
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🏛️ 2026-03-28 首席架構師完整審查 (Day 11 23:50)
|
||||
|
||||
| 審查項目 | 評分 | 說明 |
|
||||
|
||||
@@ -60,15 +60,36 @@ Elapsed: 28.71ms (< 50ms 目標)
|
||||
Method: httpx_native
|
||||
```
|
||||
|
||||
## 五主機架構
|
||||
## 五主機架構 (2026-03-28 K-HA 更新)
|
||||
|
||||
| 主機 | IP | 角色 | 服務 |
|
||||
|-----|-----|------|------|
|
||||
| DevOps | 192.168.0.110 | CI/CD | Harbor, GH Runner |
|
||||
| Security | 192.168.0.112 | 安全掃描 | Kali Scanner |
|
||||
| K3s Master | 192.168.0.120 | 容器編排 | K3s API Server |
|
||||
| K3s Worker | 192.168.0.121 | 工作負載 | App Pods |
|
||||
| AI+Web | 192.168.0.188 | AI/DB/Web | Ollama, PostgreSQL, Redis, SignOz |
|
||||
| DevOps | 192.168.0.110 | CI/CD + 監控 | Harbor:5000, GH Runner, Sentry:9000, Langfuse:3100 |
|
||||
| Security | 192.168.0.112 | 安全掃描 | Kali Scanner:8080 |
|
||||
| K3s Server #1 | 192.168.0.120 | **Control-Plane MASTER** | keepalived (priority=101) |
|
||||
| K3s Server #2 | 192.168.0.121 | **Control-Plane BACKUP** | keepalived (priority=100) |
|
||||
| **VIP** | **192.168.0.125** | **HA Endpoint** | **K3s API:6443 + NodePort (32334/32335)** |
|
||||
| AI+Web | 192.168.0.188 | AI/DB/Web | Ollama:11434, OpenClaw:8089, **PostgreSQL:5432 (K3s Datastore)**, Redis:6380, SignOz:3301 |
|
||||
|
||||
## K3s 高可用架構 (ADR-033)
|
||||
|
||||
```
|
||||
VIP 192.168.0.125 (keepalived)
|
||||
↓
|
||||
┌──────────────┼──────────────┐
|
||||
▼ ▼ ▼
|
||||
mon(120) mon1(121) PostgreSQL(188)
|
||||
K3s MASTER K3s BACKUP K3s Datastore
|
||||
priority=101 priority=100 (Kine protocol)
|
||||
```
|
||||
|
||||
### 關鍵變更 (2026-03-28)
|
||||
|
||||
- ✅ 雙 Control-Plane (120+121)
|
||||
- ✅ 外接 PostgreSQL Datastore (188:5432)
|
||||
- ✅ VIP 192.168.0.125 (keepalived VRRP)
|
||||
- ✅ CI/CD kubeconfig 指向 VIP
|
||||
- ✅ 故障轉移 < 3 秒
|
||||
|
||||
## SignOz 整合架構
|
||||
|
||||
@@ -93,8 +114,9 @@ Method: httpx_native
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## AI Fallback 策略 (ADR-006)
|
||||
## AI Fallback 策略 (ADR-006 v1.3 + ADR-036)
|
||||
|
||||
### 一般對話任務
|
||||
```
|
||||
Ollama (local) → Gemini (cloud) → Claude (cloud) → mock_fallback
|
||||
↓ ↓ ↓ ↓
|
||||
@@ -102,6 +124,24 @@ Ollama (local) → Gemini (cloud) → Claude (cloud) → mock_fallback
|
||||
188:11434 API Key API Key 無 LLM
|
||||
```
|
||||
|
||||
### Tool Calling 任務 (ADR-036 - 待批准)
|
||||
```
|
||||
Nemotron (cloud) → Gemini (cloud) → Claude (cloud) → 拒絕執行
|
||||
↓ ↓ ↓
|
||||
免費 tier $0.001/1K $0.003/1K
|
||||
精準度 83% API Key API Key
|
||||
延遲 11-45s
|
||||
```
|
||||
|
||||
### Provider 對照表
|
||||
|
||||
| Provider | 用途 | 延遲 | 精準度 | 成本 |
|
||||
|----------|------|------|--------|------|
|
||||
| **Ollama** | 即時對話、簡單查詢 | < 5s | 中 | $0 |
|
||||
| **Nemotron** | Tool Calling、K8s 操作 | 11-45s | 高 (83%) | 免費 tier |
|
||||
| **Gemini** | 通用備援 | 2-5s | 中高 | 低 |
|
||||
| **Claude** | 複雜推理、CRITICAL | 2-5s | 最高 | 高 |
|
||||
|
||||
## Phase 7: 視覺主權組件
|
||||
|
||||
### 已完成組件
|
||||
|
||||
Reference in New Issue
Block a user