Files
awoooi/apps/api/models.json
OG T 46843c8e19
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
fix(nvidia): revert to nemotron-mini, truncate context for 4K limit, enforce precise confidence
2026-03-31 13:57:10 +08:00

198 lines
5.0 KiB
JSON

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"name": "OpenClaw AI Router Configuration",
"version": "1.1.0",
"description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)",
"updated_at": "2026-03-29",
"default_provider": "ollama",
"fallback_order": ["ollama", "gemini", "claude"],
"tool_calling_fallback_order": ["nvidia", "gemini", "claude"],
"providers": {
"ollama": {
"name": "Ollama (Local)",
"enabled": true,
"priority": 1,
"endpoint": "http://192.168.0.188:11434",
"api_path": "/api/generate",
"models": {
"default": "qwen2.5:7b-instruct",
"rca": "qwen2.5:7b-instruct",
"summary": "llama3.2:3b"
},
"options": {
"temperature": 0.1,
"top_p": 0.9,
"num_predict": 1024,
"format": "json"
},
"timeout_seconds": 90,
"cost": {
"per_1k_tokens": 0,
"currency": "USD"
},
"health_check": {
"endpoint": "/api/tags",
"interval_seconds": 60
}
},
"gemini": {
"name": "Google Gemini",
"enabled": true,
"priority": 2,
"endpoint": "https://generativelanguage.googleapis.com/v1beta",
"api_path": "/models/{model}:generateContent",
"models": {
"default": "gemini-1.5-flash",
"rca": "gemini-1.5-flash",
"summary": "gemini-1.5-flash"
},
"options": {
"temperature": 0.1,
"maxOutputTokens": 2048,
"responseMimeType": "application/json"
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0.001,
"currency": "USD"
},
"auth": {
"type": "api_key",
"env_var": "GEMINI_API_KEY",
"query_param": "key"
},
"rate_limits": {
"daily_tokens": 70000,
"requests_per_minute": 60
}
},
"claude": {
"name": "Anthropic Claude",
"enabled": true,
"priority": 3,
"endpoint": "https://api.anthropic.com/v1",
"api_path": "/messages",
"models": {
"default": "claude-3-haiku-20240307",
"rca": "claude-3-haiku-20240307",
"summary": "claude-3-haiku-20240307"
},
"options": {
"max_tokens": 2048
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0.008,
"currency": "USD"
},
"auth": {
"type": "header",
"env_var": "CLAUDE_API_KEY",
"header_name": "x-api-key"
},
"rate_limits": {
"daily_tokens": 35000,
"requests_per_minute": 50
},
"features": {
"tool_use": true,
"structured_output": true
}
},
"nvidia": {
"name": "NVIDIA Nemotron (ADR-036)",
"enabled": true,
"priority": 4,
"endpoint": "https://integrate.api.nvidia.com/v1",
"api_path": "/chat/completions",
"models": {
"default": "nvidia/nemotron-mini-4b-instruct",
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
"rca": "nvidia/nemotron-mini-4b-instruct"
},
"options": {
"temperature": 0.0,
"max_tokens": 1024
},
"timeout_seconds": 60,
"cost": {
"per_1k_tokens": 0,
"currency": "USD",
"notes": "Free tier (2026-03-29)"
},
"auth": {
"type": "header",
"env_var": "NVIDIA_API_KEY",
"header_name": "Authorization",
"header_prefix": "Bearer "
},
"rate_limits": {
"daily_tokens": 50000,
"requests_per_minute": 5
},
"features": {
"tool_use": true,
"structured_output": true,
"tool_calling_accuracy": 0.833
},
"use_for": ["tool_calling"]
}
},
"use_cases": {
"rca_analysis": {
"description": "Root Cause Analysis for alerts",
"preferred_provider": "ollama",
"fallback_enabled": true,
"required_features": ["json_output"]
},
"log_summary": {
"description": "Summarize K8s logs for context gathering",
"preferred_provider": "ollama",
"fallback_enabled": true,
"max_input_tokens": 4096
},
"telegram_compose": {
"description": "Compose compressed Telegram messages",
"preferred_provider": "ollama",
"fallback_enabled": false,
"max_output_tokens": 500
},
"tool_calling": {
"description": "K8s Tool Calling operations (ADR-036)",
"preferred_provider": "nvidia",
"fallback_enabled": true,
"fallback_order": ["gemini", "claude"],
"required_features": ["tool_use"],
"notes": "Nemotron 83.3% accuracy for K8s operations"
}
},
"monitoring": {
"enabled": true,
"metrics": {
"track_latency": true,
"track_tokens": true,
"track_cost": true,
"track_fallbacks": true
},
"alerts": {
"daily_cost_threshold_usd": 5,
"monthly_cost_threshold_usd": 10,
"fallback_rate_threshold_percent": 20
}
},
"circuit_breaker": {
"enabled": true,
"failure_threshold": 5,
"recovery_timeout_seconds": 60,
"half_open_requests": 3
}
}