diff --git a/apps/api/src/hermes/nl_gateway.py b/apps/api/src/hermes/nl_gateway.py index e68d9b53..b665a75a 100644 --- a/apps/api/src/hermes/nl_gateway.py +++ b/apps/api/src/hermes/nl_gateway.py @@ -54,21 +54,22 @@ _RATE_LIMIT_WINDOW_SEC = 60 # Ollama 模型路由(按 agent 專業選最適模型,111 主機) # ───────────────────────────────────────────────────────────────────────────── _MODEL_BY_AGENT: dict[str, str] = { - # 推理型(找根因 / 安全分析)→ deepseek-r1:14b + # 推理型(找根因 / 安全分析)→ deepseek-r1:14b(CoT 推理) "debugger": "deepseek-r1:14b", "vuln-verifier": "deepseek-r1:14b", - # 程式碼專用(review / 實作 / 重構 / DB / 前端 / 工具)→ qwen2.5-coder:7b - "critic": "qwen2.5-coder:7b", - "db-expert": "qwen2.5-coder:7b", - "fullstack-engineer": "qwen2.5-coder:7b", - "refactor-specialist":"qwen2.5-coder:7b", - "migration-engineer": "qwen2.5-coder:7b", - "frontend-designer": "qwen2.5-coder:7b", - "tool-expert": "qwen2.5-coder:7b", - # 通用指令(規劃 / 導覽 / 文件)→ qwen2.5:7b-instruct - "planner": "qwen2.5:7b-instruct", - "onboarder": "qwen2.5:7b-instruct", - "web-researcher": "qwen2.5:7b-instruct", + # 程式碼 + 通用(review / 實作 / 重構 / DB / 前端 / 工具 / 規劃 / 文件)→ qwen3:8b + # 2026-04-25 ogt + Claude Sonnet 4.6: qwen2.5-coder:7b + qwen2.5:7b-instruct → qwen3:8b + # qwen3:8b Hybrid Thinking 同時勝任程式碼與指令;gemma4 尚未在 Ollama 釋出 + "critic": "qwen3:8b", + "db-expert": "qwen3:8b", + "fullstack-engineer": "qwen3:8b", + "refactor-specialist":"qwen3:8b", + "migration-engineer": "qwen3:8b", + "frontend-designer": "qwen3:8b", + "tool-expert": "qwen3:8b", + "planner": "qwen3:8b", + "onboarder": "qwen3:8b", + "web-researcher": "qwen3:8b", } _DEFAULT_MODEL = "deepseek-r1:14b" _OLLAMA_TIMEOUT = 90.0 # deepseek-r1:14b 推理較慢,給 90s