修復項目: - P1-1: 從 ModelRegistry 取得模型 (非 hardcoded) - P1-2: models.json 新增 nvidia.rca 模型定義 - P0: 新增 test_openclaw_nvidia.py 測試 首席架構師審查 74/120 → 預期 85+ Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
198 lines
5.0 KiB
JSON
198 lines
5.0 KiB
JSON
{
|
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
"name": "OpenClaw AI Router Configuration",
|
|
"version": "1.1.0",
|
|
"description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)",
|
|
"updated_at": "2026-03-29",
|
|
|
|
"default_provider": "ollama",
|
|
"fallback_order": ["ollama", "gemini", "claude"],
|
|
"tool_calling_fallback_order": ["nvidia", "gemini", "claude"],
|
|
|
|
"providers": {
|
|
"ollama": {
|
|
"name": "Ollama (Local)",
|
|
"enabled": true,
|
|
"priority": 1,
|
|
"endpoint": "http://192.168.0.188:11434",
|
|
"api_path": "/api/generate",
|
|
"models": {
|
|
"default": "qwen2.5:7b-instruct",
|
|
"rca": "qwen2.5:7b-instruct",
|
|
"summary": "llama3.2:3b"
|
|
},
|
|
"options": {
|
|
"temperature": 0.1,
|
|
"top_p": 0.9,
|
|
"num_predict": 1024,
|
|
"format": "json"
|
|
},
|
|
"timeout_seconds": 90,
|
|
"cost": {
|
|
"per_1k_tokens": 0,
|
|
"currency": "USD"
|
|
},
|
|
"health_check": {
|
|
"endpoint": "/api/tags",
|
|
"interval_seconds": 60
|
|
}
|
|
},
|
|
|
|
"gemini": {
|
|
"name": "Google Gemini",
|
|
"enabled": true,
|
|
"priority": 2,
|
|
"endpoint": "https://generativelanguage.googleapis.com/v1beta",
|
|
"api_path": "/models/{model}:generateContent",
|
|
"models": {
|
|
"default": "gemini-1.5-flash",
|
|
"rca": "gemini-1.5-flash",
|
|
"summary": "gemini-1.5-flash"
|
|
},
|
|
"options": {
|
|
"temperature": 0.1,
|
|
"maxOutputTokens": 2048,
|
|
"responseMimeType": "application/json"
|
|
},
|
|
"timeout_seconds": 30,
|
|
"cost": {
|
|
"per_1k_tokens": 0.001,
|
|
"currency": "USD"
|
|
},
|
|
"auth": {
|
|
"type": "api_key",
|
|
"env_var": "GEMINI_API_KEY",
|
|
"query_param": "key"
|
|
},
|
|
"rate_limits": {
|
|
"daily_tokens": 70000,
|
|
"requests_per_minute": 60
|
|
}
|
|
},
|
|
|
|
"claude": {
|
|
"name": "Anthropic Claude",
|
|
"enabled": true,
|
|
"priority": 3,
|
|
"endpoint": "https://api.anthropic.com/v1",
|
|
"api_path": "/messages",
|
|
"models": {
|
|
"default": "claude-3-haiku-20240307",
|
|
"rca": "claude-3-haiku-20240307",
|
|
"summary": "claude-3-haiku-20240307"
|
|
},
|
|
"options": {
|
|
"max_tokens": 2048
|
|
},
|
|
"timeout_seconds": 30,
|
|
"cost": {
|
|
"per_1k_tokens": 0.008,
|
|
"currency": "USD"
|
|
},
|
|
"auth": {
|
|
"type": "header",
|
|
"env_var": "CLAUDE_API_KEY",
|
|
"header_name": "x-api-key"
|
|
},
|
|
"rate_limits": {
|
|
"daily_tokens": 35000,
|
|
"requests_per_minute": 50
|
|
},
|
|
"features": {
|
|
"tool_use": true,
|
|
"structured_output": true
|
|
}
|
|
},
|
|
|
|
"nvidia": {
|
|
"name": "NVIDIA Nemotron (ADR-036)",
|
|
"enabled": true,
|
|
"priority": 4,
|
|
"endpoint": "https://integrate.api.nvidia.com/v1",
|
|
"api_path": "/chat/completions",
|
|
"models": {
|
|
"default": "nvidia/nemotron-mini-4b-instruct",
|
|
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
|
|
"rca": "nvidia/llama-3.1-nemotron-70b-instruct"
|
|
},
|
|
"options": {
|
|
"temperature": 0.0,
|
|
"max_tokens": 1024
|
|
},
|
|
"timeout_seconds": 60,
|
|
"cost": {
|
|
"per_1k_tokens": 0,
|
|
"currency": "USD",
|
|
"notes": "Free tier (2026-03-29)"
|
|
},
|
|
"auth": {
|
|
"type": "header",
|
|
"env_var": "NVIDIA_API_KEY",
|
|
"header_name": "Authorization",
|
|
"header_prefix": "Bearer "
|
|
},
|
|
"rate_limits": {
|
|
"daily_tokens": 50000,
|
|
"requests_per_minute": 5
|
|
},
|
|
"features": {
|
|
"tool_use": true,
|
|
"structured_output": true,
|
|
"tool_calling_accuracy": 0.833
|
|
},
|
|
"use_for": ["tool_calling"]
|
|
}
|
|
},
|
|
|
|
"use_cases": {
|
|
"rca_analysis": {
|
|
"description": "Root Cause Analysis for alerts",
|
|
"preferred_provider": "ollama",
|
|
"fallback_enabled": true,
|
|
"required_features": ["json_output"]
|
|
},
|
|
"log_summary": {
|
|
"description": "Summarize K8s logs for context gathering",
|
|
"preferred_provider": "ollama",
|
|
"fallback_enabled": true,
|
|
"max_input_tokens": 4096
|
|
},
|
|
"telegram_compose": {
|
|
"description": "Compose compressed Telegram messages",
|
|
"preferred_provider": "ollama",
|
|
"fallback_enabled": false,
|
|
"max_output_tokens": 500
|
|
},
|
|
"tool_calling": {
|
|
"description": "K8s Tool Calling operations (ADR-036)",
|
|
"preferred_provider": "nvidia",
|
|
"fallback_enabled": true,
|
|
"fallback_order": ["gemini", "claude"],
|
|
"required_features": ["tool_use"],
|
|
"notes": "Nemotron 83.3% accuracy for K8s operations"
|
|
}
|
|
},
|
|
|
|
"monitoring": {
|
|
"enabled": true,
|
|
"metrics": {
|
|
"track_latency": true,
|
|
"track_tokens": true,
|
|
"track_cost": true,
|
|
"track_fallbacks": true
|
|
},
|
|
"alerts": {
|
|
"daily_cost_threshold_usd": 5,
|
|
"monthly_cost_threshold_usd": 10,
|
|
"fallback_rate_threshold_percent": 20
|
|
}
|
|
},
|
|
|
|
"circuit_breaker": {
|
|
"enabled": true,
|
|
"failure_threshold": 5,
|
|
"recovery_timeout_seconds": 60,
|
|
"half_open_requests": 3
|
|
}
|
|
}
|