Files
awoooi/apps/api/models.json
OG T f32b077336
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m36s
E2E Health Check / e2e-health (push) Successful in 44s
fix(models): 更新 Ollama 設定 — M1 Pro + deepseek-r1:14b
- endpoint: 188 → 111 (M1 Pro, 40+ tok/s)
- rca/default model: qwen2.5:7b-instruct → deepseek-r1:14b (SRE最強推理)
- summary model: llama3.2:3b → gemma3:4b (快速摘要)
- timeout: 90s → 120s (deepseek-r1:14b 實測最慢 54s)
- version: 1.1.0 → 1.2.0

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 22:59:53 +08:00

198 lines
5.1 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"name": "OpenClaw AI Router Configuration",
"version": "1.2.0",
"description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)",
"updated_at": "2026-04-08",
"default_provider": "ollama",
"fallback_order": ["ollama", "gemini", "claude"],
"tool_calling_fallback_order": ["nvidia", "gemini", "claude"],
"providers": {
"ollama": {
"name": "Ollama (Local M1 Pro)",
"enabled": true,
"priority": 1,
"endpoint": "http://192.168.0.111:11434",
"api_path": "/api/generate",
"models": {
"default": "deepseek-r1:14b",
"rca": "deepseek-r1:14b",
"summary": "gemma3:4b"
},
"options": {
"temperature": 0.1,
"top_p": 0.9,
"num_predict": 1024,
"format": "json"
},
"timeout_seconds": 120,
"cost": {
"per_1k_tokens": 0,
"currency": "USD"
},
"health_check": {
"endpoint": "/api/tags",
"interval_seconds": 60
}
},
"gemini": {
"name": "Google Gemini",
"enabled": true,
"priority": 2,
"endpoint": "https://generativelanguage.googleapis.com/v1beta",
"api_path": "/models/{model}:generateContent",
"models": {
"default": "gemini-2.0-flash",
"rca": "gemini-2.0-flash",
"summary": "gemini-2.0-flash"
},
"options": {
"temperature": 0.1,
"maxOutputTokens": 2048,
"responseMimeType": "application/json"
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0.001,
"currency": "USD"
},
"auth": {
"type": "api_key",
"env_var": "GEMINI_API_KEY",
"query_param": "key"
},
"rate_limits": {
"daily_tokens": 70000,
"requests_per_minute": 60
}
},
"claude": {
"name": "Anthropic Claude",
"enabled": true,
"priority": 3,
"endpoint": "https://api.anthropic.com/v1",
"api_path": "/messages",
"models": {
"default": "claude-3-haiku-20240307",
"rca": "claude-3-haiku-20240307",
"summary": "claude-3-haiku-20240307"
},
"options": {
"max_tokens": 2048
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0.008,
"currency": "USD"
},
"auth": {
"type": "header",
"env_var": "CLAUDE_API_KEY",
"header_name": "x-api-key"
},
"rate_limits": {
"daily_tokens": 35000,
"requests_per_minute": 50
},
"features": {
"tool_use": true,
"structured_output": true
}
},
"nvidia": {
"name": "NVIDIA Nemotron (ADR-036)",
"enabled": true,
"priority": 4,
"endpoint": "https://integrate.api.nvidia.com/v1",
"api_path": "/chat/completions",
"models": {
"default": "nvidia/nemotron-mini-4b-instruct",
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
"rca": "nvidia/nemotron-mini-4b-instruct"
},
"options": {
"temperature": 0.0,
"max_tokens": 1024
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0,
"currency": "USD",
"notes": "Free tier (2026-03-29) - timeout 30s失敗後轉 Gemini"
},
"auth": {
"type": "header",
"env_var": "NVIDIA_API_KEY",
"header_name": "Authorization",
"header_prefix": "Bearer "
},
"rate_limits": {
"daily_tokens": 50000,
"requests_per_minute": 5
},
"features": {
"tool_use": true,
"structured_output": true,
"tool_calling_accuracy": 0.833
},
"use_for": ["tool_calling"]
}
},
"use_cases": {
"rca_analysis": {
"description": "Root Cause Analysis for alerts",
"preferred_provider": "ollama",
"fallback_enabled": true,
"required_features": ["json_output"]
},
"log_summary": {
"description": "Summarize K8s logs for context gathering",
"preferred_provider": "ollama",
"fallback_enabled": true,
"max_input_tokens": 4096
},
"telegram_compose": {
"description": "Compose compressed Telegram messages",
"preferred_provider": "ollama",
"fallback_enabled": false,
"max_output_tokens": 500
},
"tool_calling": {
"description": "K8s Tool Calling operations (ADR-036)",
"preferred_provider": "nvidia",
"fallback_enabled": true,
"fallback_order": ["gemini", "claude"],
"required_features": ["tool_use"],
"notes": "Nemotron 83.3% accuracy for K8s operations"
}
},
"monitoring": {
"enabled": true,
"metrics": {
"track_latency": true,
"track_tokens": true,
"track_cost": true,
"track_fallbacks": true
},
"alerts": {
"daily_cost_threshold_usd": 5,
"monthly_cost_threshold_usd": 10,
"fallback_rate_threshold_percent": 20
}
},
"circuit_breaker": {
"enabled": true,
"failure_threshold": 5,
"recovery_timeout_seconds": 60,
"half_open_requests": 3
}
}