diff --git a/apps/api/models.json b/apps/api/models.json index 5f6446e2..57333ee0 100644 --- a/apps/api/models.json +++ b/apps/api/models.json @@ -1,9 +1,9 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "name": "OpenClaw AI Router Configuration", - "version": "1.3.0", - "description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron + D1 ADR-067 五大應用 2026-04-11)", - "updated_at": "2026-04-11", + "version": "1.4.0", + "description": "AI 模型路由與備援設定 (ADR-006 + ADR-036 Nemotron + D1 ADR-067 五大應用 2026-04-11 + ADR-110 GCP 三層容災 2026-05-04)", + "updated_at": "2026-05-04", "default_provider": "ollama", "fallback_order": ["ollama", "gemini", "claude"], @@ -11,24 +11,28 @@ "providers": { "ollama": { - "name": "Ollama (Local M1 Pro)", + "name": "Ollama (GCP-A Primary)", "enabled": true, "priority": 1, - "endpoint": "http://192.168.0.111:11434", + "endpoint": "http://34.143.170.20:11434", "api_path": "/api/generate", "models": { "default": "qwen2.5:7b-instruct", - "rca": "qwen2.5:7b-instruct", + "rca": "qwen3:14b", "summary": "gemma3:4b", - "drift_summary": "qwen2.5:7b-instruct", + "drift_summary": "qwen3:14b", "drift_intent": "qwen2.5:7b-instruct", "log_anomaly": "deepseek-r1:14b", "nemoclaw": "deepseek-r1:14b", - "playbook_draft": "qwen2.5:7b-instruct", - "code_review": "qwen2.5-coder:7b", - "embedding": "nomic-embed-text", - "rag_generate": "qwen2.5:7b-instruct", - "image_analysis": "llava:latest" + "playbook_draft": "qwen3:14b", + "code_review": "qwen2.5-coder:32b", + "embedding": "bge-m3:latest", + "rag_generate": "qwen3:14b", + "image_analysis": "minicpm-v:latest", + "trust_scoring": "hermes3:latest", + "alert_triage": "hermes3:latest", + "intent_classify": "qwen2.5:7b-instruct", + "governance": "deepseek-r1:14b" }, "options": { "temperature": 0.1, @@ -154,12 +158,12 @@ }, "adr067_ollama_applications": { - "description": "ADR-067 五大 Ollama 本地 AI 應用 (Phase 30-34),endpoint: http://192.168.0.111:11434", - "endpoint": "http://192.168.0.111:11434", + "description": "ADR-067 五大 Ollama 本地 AI 應用 (Phase 30-34),2026-05-04 ogt + Claude Sonnet 4.6: endpoint 升級至 GCP-A Primary", + "endpoint": "http://34.143.170.20:11434", "applications": { "drift_summary": { "phase": 30, - "model": "qwen2.5:7b-instruct", + "model": "qwen3:14b", "timeout_seconds": 90, "purpose": "Config Drift 報告中文摘要" }, @@ -171,28 +175,28 @@ }, "pr_code_review": { "phase": 32, - "model": "qwen2.5-coder:7b", + "model": "qwen2.5-coder:32b", "timeout_seconds": 120, "purpose": "Gitea PR 自動審查" }, "rag_embed": { "phase": 33, - "model": "nomic-embed-text", - "dimensions": 768, + "model": "bge-m3:latest", + "dimensions": 1024, "timeout_seconds": 30, - "purpose": "RAG 知識庫向量化,pgvector 儲存" + "purpose": "RAG 知識庫向量化,pgvector 儲存(bge-m3 多語言 1024 維)" }, "rag_generate": { "phase": 33, - "model": "qwen2.5:7b-instruct", + "model": "qwen3:14b", "timeout_seconds": 60, "purpose": "RAG 查詢回答生成,top_k=5" }, "image_analysis": { "phase": 34, - "model": "llava:latest", + "model": "minicpm-v:latest", "timeout_seconds": 60, - "purpose": "Telegram 圖片分析" + "purpose": "Telegram 圖片分析(minicpm-v 多模態精度優於 llava)" } } }, diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index 033fabb0..edb5f94e 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -376,8 +376,9 @@ class Settings(BaseSettings): description="OllamaHealthMonitor 推理測試使用模型(P1.1)", ) # 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單 + # 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級,更新必要模型清單(nomic→bge-m3 + 新增 qwen3:14b + hermes3) OLLAMA_REQUIRED_MODELS: list[str] = Field( - default=["nomic-embed-text", "qwen2.5:7b-instruct", "deepseek-r1:14b"], + default=["bge-m3:latest", "qwen2.5:7b-instruct", "qwen3:14b", "deepseek-r1:14b", "hermes3:latest"], description="HeartbeatReportService 探測必要模型是否載入", ) # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2 @@ -492,8 +493,9 @@ class Settings(BaseSettings): default=True, description="使用 Ollama 本機做 Tool Calling,取代 NVIDIA NIM 雲端 (44s→5s)", ) + # 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級,改 hermes3:latest(工具調用能力優於 llama3.1:8b) OLLAMA_TOOL_MODEL: str = Field( - default="llama3.1:8b", + default="hermes3:latest", description="Ollama Tool Calling 模型 (支援 function calling 格式)", ) @@ -571,8 +573,9 @@ class Settings(BaseSettings): default="http://192.168.0.188:8088", # 🔧 修正: OpenClaw 實際 port 是 8088 description="OpenClaw AI Agent service URL", ) + # 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級,改 qwen3:14b(GCP-A SSD 算力,RCA 推理更強) OPENCLAW_DEFAULT_MODEL: str = Field( - default="qwen2.5:7b-instruct", # 2026-04-30: DIAGNOSE/RCA 優先低延遲本地模型 + default="qwen3:14b", description="Default Ollama model for RCA analysis", ) OPENCLAW_TIMEOUT: int = Field(