Files
awoooi/apps/api/models.json
OG T 419dc2f8e0
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 5m46s
E2E Health Check / e2e-health (push) Successful in 16s
fix(nvidia): timeout 60s→30s,NVIDIA 第一保免費,失敗轉 Gemini
- nvidia_provider.py: NVIDIA_TIMEOUT 60→30s
- models.json: timeout_seconds 60→30s
- configmap: NEMOTRON_TIMEOUT_SECONDS 45→30s, fallback 恢復 nvidia 第一
目標: Nemo 有足夠時間回應(free),失敗快速轉 Gemini(備援),整體機制可運作

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:05:19 +08:00

198 lines
5.1 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"name": "OpenClaw AI Router Configuration",
"version": "1.1.0",
"description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron)",
"updated_at": "2026-03-29",
"default_provider": "ollama",
"fallback_order": ["ollama", "gemini", "claude"],
"tool_calling_fallback_order": ["nvidia", "gemini", "claude"],
"providers": {
"ollama": {
"name": "Ollama (Local)",
"enabled": true,
"priority": 1,
"endpoint": "http://192.168.0.188:11434",
"api_path": "/api/generate",
"models": {
"default": "qwen2.5:7b-instruct",
"rca": "qwen2.5:7b-instruct",
"summary": "llama3.2:3b"
},
"options": {
"temperature": 0.1,
"top_p": 0.9,
"num_predict": 1024,
"format": "json"
},
"timeout_seconds": 90,
"cost": {
"per_1k_tokens": 0,
"currency": "USD"
},
"health_check": {
"endpoint": "/api/tags",
"interval_seconds": 60
}
},
"gemini": {
"name": "Google Gemini",
"enabled": true,
"priority": 2,
"endpoint": "https://generativelanguage.googleapis.com/v1beta",
"api_path": "/models/{model}:generateContent",
"models": {
"default": "gemini-2.0-flash",
"rca": "gemini-2.0-flash",
"summary": "gemini-2.0-flash"
},
"options": {
"temperature": 0.1,
"maxOutputTokens": 2048,
"responseMimeType": "application/json"
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0.001,
"currency": "USD"
},
"auth": {
"type": "api_key",
"env_var": "GEMINI_API_KEY",
"query_param": "key"
},
"rate_limits": {
"daily_tokens": 70000,
"requests_per_minute": 60
}
},
"claude": {
"name": "Anthropic Claude",
"enabled": true,
"priority": 3,
"endpoint": "https://api.anthropic.com/v1",
"api_path": "/messages",
"models": {
"default": "claude-3-haiku-20240307",
"rca": "claude-3-haiku-20240307",
"summary": "claude-3-haiku-20240307"
},
"options": {
"max_tokens": 2048
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0.008,
"currency": "USD"
},
"auth": {
"type": "header",
"env_var": "CLAUDE_API_KEY",
"header_name": "x-api-key"
},
"rate_limits": {
"daily_tokens": 35000,
"requests_per_minute": 50
},
"features": {
"tool_use": true,
"structured_output": true
}
},
"nvidia": {
"name": "NVIDIA Nemotron (ADR-036)",
"enabled": true,
"priority": 4,
"endpoint": "https://integrate.api.nvidia.com/v1",
"api_path": "/chat/completions",
"models": {
"default": "nvidia/nemotron-mini-4b-instruct",
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
"rca": "nvidia/nemotron-mini-4b-instruct"
},
"options": {
"temperature": 0.0,
"max_tokens": 1024
},
"timeout_seconds": 30,
"cost": {
"per_1k_tokens": 0,
"currency": "USD",
"notes": "Free tier (2026-03-29) - timeout 30s失敗後轉 Gemini"
},
"auth": {
"type": "header",
"env_var": "NVIDIA_API_KEY",
"header_name": "Authorization",
"header_prefix": "Bearer "
},
"rate_limits": {
"daily_tokens": 50000,
"requests_per_minute": 5
},
"features": {
"tool_use": true,
"structured_output": true,
"tool_calling_accuracy": 0.833
},
"use_for": ["tool_calling"]
}
},
"use_cases": {
"rca_analysis": {
"description": "Root Cause Analysis for alerts",
"preferred_provider": "ollama",
"fallback_enabled": true,
"required_features": ["json_output"]
},
"log_summary": {
"description": "Summarize K8s logs for context gathering",
"preferred_provider": "ollama",
"fallback_enabled": true,
"max_input_tokens": 4096
},
"telegram_compose": {
"description": "Compose compressed Telegram messages",
"preferred_provider": "ollama",
"fallback_enabled": false,
"max_output_tokens": 500
},
"tool_calling": {
"description": "K8s Tool Calling operations (ADR-036)",
"preferred_provider": "nvidia",
"fallback_enabled": true,
"fallback_order": ["gemini", "claude"],
"required_features": ["tool_use"],
"notes": "Nemotron 83.3% accuracy for K8s operations"
}
},
"monitoring": {
"enabled": true,
"metrics": {
"track_latency": true,
"track_tokens": true,
"track_cost": true,
"track_fallbacks": true
},
"alerts": {
"daily_cost_threshold_usd": 5,
"monthly_cost_threshold_usd": 10,
"fallback_rate_threshold_percent": 20
}
},
"circuit_breaker": {
"enabled": true,
"failure_threshold": 5,
"recovery_timeout_seconds": 60,
"half_open_requests": 3
}
}