新增: - argocd 5個元件 (applicationset/dex/notifications/redis/repo-server) - awoooi-dev/awoooi-api - kube-state-metrics - observability/event-exporter - velero/velero 結果: prometheus 覆蓋率 94%→96%, errors 9→0 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
831 lines
18 KiB
YAML
831 lines
18 KiB
YAML
# AWOOOI 服務註冊表 (Single Source of Truth)
|
|
# ===========================================
|
|
# 版本: v1.0
|
|
# 建立日期: 2026-03-29
|
|
# 用途: 所有受監控服務的統一清單
|
|
#
|
|
# 新增服務時:
|
|
# 1. 在此檔案新增 entry
|
|
# 2. CI/CD 會自動生成對應的監控配置
|
|
# 3. 部署後監控自動生效
|
|
|
|
# =============================================================================
|
|
# K8s 工作負載 (awoooi-prod namespace)
|
|
# =============================================================================
|
|
services:
|
|
# --- API 後端 ---
|
|
- name: awoooi-api
|
|
type: k8s-deployment
|
|
namespace: awoooi-prod
|
|
replicas: 2
|
|
port: 8000
|
|
health_endpoint: /api/v1/health
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: true
|
|
otel: true
|
|
langfuse: true
|
|
alerts:
|
|
- pod_crash
|
|
- high_error_rate
|
|
- slow_response
|
|
- memory_high
|
|
auto_repair:
|
|
enabled: true
|
|
actions:
|
|
- restart_pod
|
|
- scale_up
|
|
owner: backend-team
|
|
criticality: P0
|
|
|
|
# --- Web 前端 ---
|
|
- name: awoooi-web
|
|
type: k8s-deployment
|
|
namespace: awoooi-prod
|
|
replicas: 2
|
|
port: 3000
|
|
health_endpoint: /
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: true
|
|
otel: true
|
|
langfuse: false
|
|
alerts:
|
|
- pod_crash
|
|
- slow_page_load
|
|
auto_repair:
|
|
enabled: true
|
|
actions:
|
|
- restart_pod
|
|
owner: frontend-team
|
|
criticality: P0
|
|
|
|
# --- Signal Worker ---
|
|
- name: awoooi-worker
|
|
type: k8s-deployment
|
|
namespace: awoooi-prod
|
|
replicas: 1
|
|
health_endpoint: /tmp/worker-healthy
|
|
health_type: exec_mtime
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: true
|
|
otel: true
|
|
langfuse: true
|
|
alerts:
|
|
- worker_stuck
|
|
- queue_backlog
|
|
auto_repair:
|
|
enabled: true
|
|
actions:
|
|
- restart_pod
|
|
owner: backend-team
|
|
criticality: P1
|
|
|
|
# --- ArgoCD ---
|
|
- name: argocd-server
|
|
type: k8s-deployment
|
|
namespace: argocd
|
|
port: 443
|
|
health_endpoint: /healthz
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
- sync_failed
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# --- Prometheus ---
|
|
- name: prometheus
|
|
type: k8s-deployment
|
|
namespace: monitoring
|
|
port: 9090
|
|
health_endpoint: /-/ready
|
|
monitoring:
|
|
prometheus: false # 自己監控自己會循環
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P0
|
|
|
|
# --- Alertmanager ---
|
|
- name: alertmanager
|
|
type: k8s-deployment
|
|
namespace: monitoring
|
|
port: 9093
|
|
health_endpoint: /-/ready
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P0
|
|
|
|
# --- ArgoCD 完整元件 (Phase O-6 2026-04-10) ---
|
|
- name: argocd-applicationset-controller
|
|
type: k8s-deployment
|
|
namespace: argocd
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P2
|
|
|
|
- name: argocd-dex-server
|
|
type: k8s-deployment
|
|
namespace: argocd
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P2
|
|
|
|
- name: argocd-notifications-controller
|
|
type: k8s-deployment
|
|
namespace: argocd
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P2
|
|
|
|
- name: argocd-redis
|
|
type: k8s-deployment
|
|
namespace: argocd
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P2
|
|
|
|
- name: argocd-repo-server
|
|
type: k8s-deployment
|
|
namespace: argocd
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P2
|
|
|
|
# --- AWOOOI Dev 環境 ---
|
|
- name: awoooi-api
|
|
type: k8s-deployment
|
|
namespace: awoooi-dev
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- pod_crash
|
|
owner: backend-team
|
|
criticality: P3
|
|
|
|
# --- kube-state-metrics ---
|
|
- name: kube-state-metrics
|
|
type: k8s-deployment
|
|
namespace: kube-state-metrics
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# --- OTEL Event Exporter ---
|
|
- name: event-exporter
|
|
type: k8s-deployment
|
|
namespace: observability
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# --- Velero 備份 ---
|
|
- name: velero
|
|
type: k8s-deployment
|
|
namespace: velero
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
- backup_failed
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# =============================================================================
|
|
# Docker 容器 (192.168.0.188 - AI/Web 中心)
|
|
# =============================================================================
|
|
|
|
# --- Ollama LLM ---
|
|
- name: ollama
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 11434
|
|
health_endpoint: /api/tags
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
otel: false
|
|
alerts:
|
|
- service_down
|
|
- inference_timeout
|
|
- model_load_failed
|
|
auto_repair:
|
|
enabled: true
|
|
actions:
|
|
- restart_container
|
|
owner: ai-team
|
|
criticality: P0
|
|
|
|
# --- OpenClaw AI 決策中心 ---
|
|
- name: openclaw
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 8089
|
|
health_endpoint: /health
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: true
|
|
otel: true
|
|
langfuse: true
|
|
alerts:
|
|
- service_down
|
|
- analysis_timeout
|
|
- high_error_rate
|
|
auto_repair:
|
|
enabled: true
|
|
actions:
|
|
- restart_container
|
|
owner: ai-team
|
|
criticality: P0
|
|
|
|
# --- Redis Stack ---
|
|
- name: redis
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 6380
|
|
health_endpoint: redis-cli ping
|
|
health_type: exec
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
- memory_high
|
|
- connection_rejected
|
|
auto_repair:
|
|
enabled: false # 資料庫謹慎處理
|
|
owner: infra-team
|
|
criticality: P0
|
|
|
|
# --- PostgreSQL ---
|
|
- name: postgres
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 5432
|
|
health_endpoint: pg_isready
|
|
health_type: exec
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
- connection_pool_exhausted
|
|
- slow_query
|
|
- replication_lag
|
|
auto_repair:
|
|
enabled: false # 資料庫謹慎處理
|
|
owner: infra-team
|
|
criticality: P0
|
|
|
|
# --- SignOz OTEL Collector ---
|
|
- name: signoz-collector
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 24317
|
|
health_endpoint: grpc_health
|
|
health_type: grpc
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
- trace_dropped
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# --- SignOz UI ---
|
|
- name: signoz-ui
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 3301
|
|
health_endpoint: /
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P2
|
|
|
|
# --- ClickHouse (SignOz 後端) ---
|
|
- name: clickhouse
|
|
type: docker
|
|
host: 192.168.0.188
|
|
port: 8123
|
|
health_endpoint: /ping
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
- disk_space_low
|
|
- query_timeout
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# =============================================================================
|
|
# Docker 容器 (192.168.0.110 - DevOps 中心)
|
|
# =============================================================================
|
|
|
|
# --- Harbor Registry ---
|
|
- name: harbor
|
|
type: docker
|
|
host: 192.168.0.110
|
|
port: 5000
|
|
health_endpoint: /api/v2.0/health
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
- storage_full
|
|
- push_failed
|
|
owner: devops-team
|
|
criticality: P0
|
|
|
|
# --- Sentry ---
|
|
- name: sentry
|
|
type: docker
|
|
host: 192.168.0.110
|
|
port: 9000
|
|
health_endpoint: /_health/
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false # 自己監控自己會循環
|
|
alerts:
|
|
- service_down
|
|
owner: devops-team
|
|
criticality: P1
|
|
|
|
# --- Langfuse LLMOps ---
|
|
- name: langfuse
|
|
type: docker
|
|
host: 192.168.0.110
|
|
port: 3100
|
|
health_endpoint: /api/public/health
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- service_down
|
|
- trace_lost
|
|
owner: ai-team
|
|
criticality: P2
|
|
|
|
# --- GitHub Actions Runner ---
|
|
- name: github-runner
|
|
type: systemd
|
|
host: 192.168.0.110
|
|
service_name: actions.runner.owenhytsai-awoooi.awoooi-110.service
|
|
monitoring:
|
|
prometheus: true
|
|
sentry: false
|
|
alerts:
|
|
- runner_offline
|
|
- job_stuck
|
|
auto_repair:
|
|
enabled: true
|
|
actions:
|
|
- restart_service
|
|
owner: devops-team
|
|
criticality: P0
|
|
|
|
# =============================================================================
|
|
# 主機節點
|
|
# =============================================================================
|
|
nodes:
|
|
- name: mon
|
|
ip: 192.168.0.120
|
|
role: k3s-master
|
|
alerts:
|
|
- node_down
|
|
- cpu_high
|
|
- memory_high
|
|
- disk_space_low
|
|
- etcd_latency_high
|
|
owner: infra-team
|
|
|
|
- name: mon1
|
|
ip: 192.168.0.121
|
|
role: k3s-worker
|
|
alerts:
|
|
- node_down
|
|
- node_not_ready
|
|
- cpu_high
|
|
- memory_high
|
|
- disk_space_low
|
|
owner: infra-team
|
|
|
|
- name: harbor
|
|
ip: 192.168.0.110
|
|
role: devops
|
|
alerts:
|
|
- node_down
|
|
- cpu_high
|
|
- memory_high
|
|
- disk_space_low
|
|
owner: devops-team
|
|
|
|
- name: pg
|
|
ip: 192.168.0.188
|
|
role: ai-web
|
|
alerts:
|
|
- node_down
|
|
- cpu_high
|
|
- memory_high
|
|
- disk_space_low
|
|
- gpu_utilization_high
|
|
owner: ai-team
|
|
|
|
- name: kali
|
|
ip: 192.168.0.112
|
|
port: 8080
|
|
role: security
|
|
monitoring:
|
|
blackbox_tcp: true
|
|
prometheus_scrape: false # 隔離環境,只做 TCP probe
|
|
alerts:
|
|
- node_down
|
|
- service_down
|
|
owner: security-team
|
|
|
|
# Phase O-1.3 2026-04-02: MinIO 備份儲存 (Phase O 補完)
|
|
- name: minio
|
|
ip: 192.168.0.188
|
|
port: 9000
|
|
role: storage
|
|
monitoring:
|
|
prometheus_scrape: true
|
|
metrics_path: /minio/v2/metrics/cluster
|
|
alerts:
|
|
- service_down
|
|
- disk_space_low
|
|
criticality: P1
|
|
owner: devops-team
|
|
|
|
# =============================================================================
|
|
# 前端頁面
|
|
# =============================================================================
|
|
pages:
|
|
- path: /
|
|
name: Dashboard
|
|
monitoring:
|
|
sentry_session: true
|
|
web_vitals: true
|
|
alerts:
|
|
- slow_page_load
|
|
- js_error
|
|
slo:
|
|
lcp_ms: 2500
|
|
fid_ms: 100
|
|
cls: 0.1
|
|
|
|
- path: /authorizations
|
|
name: 授權管理
|
|
monitoring:
|
|
sentry_session: true
|
|
web_vitals: true
|
|
alerts:
|
|
- slow_page_load
|
|
- api_error
|
|
slo:
|
|
lcp_ms: 2000
|
|
|
|
- path: /action-logs
|
|
name: 行動日誌
|
|
monitoring:
|
|
sentry_session: true
|
|
web_vitals: true
|
|
alerts:
|
|
- slow_page_load
|
|
|
|
- path: /errors
|
|
name: 錯誤追蹤
|
|
monitoring:
|
|
sentry_session: true
|
|
web_vitals: true
|
|
alerts:
|
|
- slow_page_load
|
|
|
|
- path: /settings
|
|
name: 設定
|
|
monitoring:
|
|
sentry_session: true
|
|
alerts:
|
|
- slow_page_load
|
|
|
|
- path: /knowledge-base
|
|
name: 知識庫
|
|
monitoring:
|
|
sentry_session: true
|
|
alerts:
|
|
- slow_page_load
|
|
|
|
# =============================================================================
|
|
# API 端點 (關鍵)
|
|
# =============================================================================
|
|
api_endpoints:
|
|
- path: /api/v1/health
|
|
method: GET
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 100
|
|
availability: 99.99
|
|
|
|
- path: /api/v1/approvals
|
|
method: GET
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 500
|
|
availability: 99.9
|
|
|
|
- path: /api/v1/approvals/{id}/sign
|
|
method: POST
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 1000
|
|
availability: 99.9
|
|
|
|
- path: /api/v1/incidents
|
|
method: GET
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 500
|
|
availability: 99.9
|
|
|
|
- path: /api/v1/analyze
|
|
method: POST
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 30000 # 30s (LLM 分析)
|
|
availability: 95
|
|
|
|
- path: /api/v1/webhooks/alertmanager
|
|
method: POST
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 5000
|
|
availability: 99.9
|
|
|
|
- path: /api/v1/webhooks/sentry/error
|
|
method: POST
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 5000
|
|
availability: 99.9
|
|
|
|
- path: /api/v1/execute
|
|
method: POST
|
|
critical: true
|
|
slo:
|
|
latency_p95_ms: 10000
|
|
availability: 99
|
|
|
|
# =============================================================================
|
|
# AI 服務 (特殊監控)
|
|
# =============================================================================
|
|
ai_services:
|
|
- name: gemini-api
|
|
type: external
|
|
rate_limit:
|
|
requests_per_minute: 60
|
|
tokens_per_minute: 100000
|
|
alerts:
|
|
- rate_limit_hit
|
|
- budget_exceeded
|
|
fallback: ollama
|
|
cost_tracking: true
|
|
|
|
- name: claude-api
|
|
type: external
|
|
rate_limit:
|
|
requests_per_minute: 50
|
|
tokens_per_minute: 100000
|
|
alerts:
|
|
- rate_limit_hit
|
|
- budget_exceeded
|
|
fallback: gemini
|
|
cost_tracking: true
|
|
|
|
- name: ollama-local
|
|
type: local
|
|
models:
|
|
- qwen2.5:7b
|
|
- llama3.2:3b
|
|
alerts:
|
|
- model_load_failed
|
|
- inference_timeout
|
|
cost_tracking: false
|
|
|
|
# --- NVIDIA Nemotron Tool Calling (Phase 20) ---
|
|
# 2026-03-29 ogt: ADR-036 新增
|
|
- name: nvidia-nemotron
|
|
type: external
|
|
endpoint: https://integrate.api.nvidia.com/v1
|
|
model: nvidia/llama-3.1-nemotron-70b-instruct
|
|
rate_limit:
|
|
requests_per_minute: 100
|
|
tokens_per_minute: 200000
|
|
features:
|
|
- tool_calling
|
|
- function_calling
|
|
monitoring:
|
|
prometheus: true
|
|
langfuse: true
|
|
otel: true
|
|
metrics:
|
|
- nvidia_tool_call_requests_total
|
|
- nvidia_tool_call_latency_seconds
|
|
- nvidia_circuit_breaker_state_changes_total
|
|
alerts:
|
|
- circuit_breaker_open
|
|
- tool_calling_timeout
|
|
- high_error_rate
|
|
- rate_limit_hit
|
|
fallback: gemini
|
|
cost_tracking: true
|
|
owner: ai-team
|
|
criticality: P0
|
|
|
|
# =============================================================================
|
|
# 告警模板 (Alert Templates)
|
|
# =============================================================================
|
|
alert_templates:
|
|
pod_crash:
|
|
expr: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff"} > 0'
|
|
for: 2m
|
|
severity: critical
|
|
auto_repair: restart_pod
|
|
|
|
high_error_rate:
|
|
expr: 'rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.01'
|
|
for: 5m
|
|
severity: critical
|
|
auto_repair: restart_pod
|
|
|
|
slow_response:
|
|
expr: 'histogram_quantile(0.95, http_request_duration_seconds_bucket) > 2'
|
|
for: 5m
|
|
severity: warning
|
|
auto_repair: scale_up
|
|
|
|
service_down:
|
|
expr: 'probe_success == 0'
|
|
for: 1m
|
|
severity: critical
|
|
auto_repair: restart_container
|
|
|
|
memory_high:
|
|
expr: 'container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.9'
|
|
for: 5m
|
|
severity: warning
|
|
auto_repair: analyze_memory_leak
|
|
|
|
disk_space_low:
|
|
expr: 'node_filesystem_avail_bytes / node_filesystem_size_bytes < 0.15'
|
|
for: 10m
|
|
severity: warning
|
|
auto_repair: cleanup_logs
|
|
|
|
inference_timeout:
|
|
expr: 'ollama_inference_duration_seconds > 60'
|
|
for: 3m
|
|
severity: warning
|
|
auto_repair: switch_model
|
|
|
|
runner_offline:
|
|
expr: 'github_runner_status == 0'
|
|
for: 5m
|
|
severity: critical
|
|
auto_repair: restart_service
|
|
|
|
# --- NVIDIA Nemotron 告警 (Phase 20) ---
|
|
# 2026-03-29 ogt: ADR-036 新增
|
|
circuit_breaker_open:
|
|
expr: 'nvidia_circuit_breaker_state_changes_total{to_state="open"} > 0'
|
|
for: 1m
|
|
severity: warning
|
|
auto_repair: fallback_to_gemini
|
|
annotations:
|
|
summary: 'NVIDIA Circuit Breaker 已斷路,切換至備援'
|
|
runbook: 'docs/runbooks/NVIDIA-CIRCUIT-BREAKER.md'
|
|
|
|
tool_calling_timeout:
|
|
expr: 'histogram_quantile(0.95, nvidia_tool_call_latency_seconds_bucket) > 45'
|
|
for: 5m
|
|
severity: warning
|
|
auto_repair: switch_model
|
|
annotations:
|
|
summary: 'NVIDIA Tool Calling P95 延遲 > 45s'
|
|
|
|
nvidia_high_error_rate:
|
|
expr: 'rate(nvidia_tool_call_requests_total{status="error"}[5m]) / rate(nvidia_tool_call_requests_total[5m]) > 0.1'
|
|
for: 5m
|
|
severity: critical
|
|
auto_repair: fallback_to_gemini
|
|
annotations:
|
|
summary: 'NVIDIA Tool Calling 錯誤率 > 10%'
|
|
|
|
# =============================================================================
|
|
# 自動修復動作 (Auto-Repair Actions)
|
|
# =============================================================================
|
|
auto_repair_actions:
|
|
restart_pod:
|
|
command: 'kubectl rollout restart deployment/{name} -n {namespace}'
|
|
risk: low
|
|
cooldown_minutes: 10
|
|
|
|
scale_up:
|
|
command: 'kubectl scale deployment/{name} --replicas=+1 -n {namespace}'
|
|
risk: low
|
|
max_replicas: 5
|
|
cooldown_minutes: 15
|
|
|
|
restart_container:
|
|
command: 'ssh {host} docker restart {container}'
|
|
risk: low
|
|
cooldown_minutes: 10
|
|
|
|
restart_service:
|
|
command: 'ssh {host} sudo systemctl restart {service_name}'
|
|
risk: low
|
|
cooldown_minutes: 10
|
|
|
|
switch_model:
|
|
command: 'internal:switch_to_smaller_model'
|
|
risk: low
|
|
cooldown_minutes: 5
|
|
|
|
cleanup_logs:
|
|
command: 'ssh {host} find /var/log -name "*.log" -mtime +7 -delete'
|
|
risk: low
|
|
cooldown_minutes: 60
|
|
|
|
analyze_memory_leak:
|
|
command: 'internal:trigger_memory_analysis'
|
|
risk: low
|
|
cooldown_minutes: 30
|
|
|
|
# --- NVIDIA Nemotron 自動修復 (Phase 20) ---
|
|
# 2026-03-29 ogt: ADR-036 新增
|
|
fallback_to_gemini:
|
|
command: 'internal:switch_provider_to_gemini'
|
|
risk: low
|
|
cooldown_minutes: 5
|
|
description: 'NVIDIA API 失敗時自動切換至 Gemini'
|
|
|
|
fallback_to_ollama:
|
|
command: 'internal:switch_provider_to_ollama'
|
|
risk: low
|
|
cooldown_minutes: 5
|
|
description: 'Cloud API 失敗時自動切換至本地 Ollama'
|