diff --git a/apps/api/src/hermes/nl_gateway.py b/apps/api/src/hermes/nl_gateway.py
index bfdd11b2..e68d9b53 100644
--- a/apps/api/src/hermes/nl_gateway.py
+++ b/apps/api/src/hermes/nl_gateway.py
@@ -1,19 +1,19 @@
"""Hermes 自然語言閘道 — ADR-094
-Layer 1 意圖路由(關鍵字正則)→ Anthropic Python SDK 直呼 → Telegram 格式化輸出。
+Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型(111)→ Telegram 格式化輸出。
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL)
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL T1+T2+T3): hermes_dispatch_log DB 寫入 /
Redis per-chat_id 速率限制 / Multi-turn session (Redis Hash TTL=300s)
-2026-04-25 Claude Sonnet 4.6: 改用 anthropic Python SDK 直呼,棄用需要 CLI 的
- claude-agent-sdk(prod pod 無 claude CLI,sdk call 回傳空字串)
+2026-04-25 Claude Sonnet 4.6: 改用 Ollama 本地模型(111),按 agent 類型選模型,零費用
+ debugger/vuln → deepseek-r1:14b(推理); code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
"""
from __future__ import annotations
import asyncio
import re
import time
-import anthropic as _anthropic
+import httpx
import structlog
from sqlalchemy import text
@@ -50,6 +50,38 @@ _ROUTING_RULES: list[tuple[re.Pattern, str]] = [
_RATE_LIMIT_MAX = 20
_RATE_LIMIT_WINDOW_SEC = 60
+# ─────────────────────────────────────────────────────────────────────────────
+# Ollama 模型路由(按 agent 專業選最適模型,111 主機)
+# ─────────────────────────────────────────────────────────────────────────────
+_MODEL_BY_AGENT: dict[str, str] = {
+ # 推理型(找根因 / 安全分析)→ deepseek-r1:14b
+ "debugger": "deepseek-r1:14b",
+ "vuln-verifier": "deepseek-r1:14b",
+ # 程式碼專用(review / 實作 / 重構 / DB / 前端 / 工具)→ qwen2.5-coder:7b
+ "critic": "qwen2.5-coder:7b",
+ "db-expert": "qwen2.5-coder:7b",
+ "fullstack-engineer": "qwen2.5-coder:7b",
+ "refactor-specialist":"qwen2.5-coder:7b",
+ "migration-engineer": "qwen2.5-coder:7b",
+ "frontend-designer": "qwen2.5-coder:7b",
+ "tool-expert": "qwen2.5-coder:7b",
+ # 通用指令(規劃 / 導覽 / 文件)→ qwen2.5:7b-instruct
+ "planner": "qwen2.5:7b-instruct",
+ "onboarder": "qwen2.5:7b-instruct",
+ "web-researcher": "qwen2.5:7b-instruct",
+}
+_DEFAULT_MODEL = "deepseek-r1:14b"
+_OLLAMA_TIMEOUT = 90.0 # deepseek-r1:14b 推理較慢,給 90s
+
+
+def _pick_model(agent_name: str) -> str:
+ return _MODEL_BY_AGENT.get(agent_name, _DEFAULT_MODEL)
+
+
+def _strip_think_tags(text: str) -> str:
+ """移除 deepseek-r1 的 ... 內部推理塊,只留最終回答。"""
+ return re.sub(r".*?", "", text, flags=re.DOTALL).strip()
+
def _route_intent_layer1(msg: str) -> str:
"""Layer 1: 關鍵字正則路由,回傳 agent 名稱"""
@@ -221,19 +253,29 @@ async def process_nl_message(
t0 = time.monotonic()
- # 呼叫 Anthropic Python SDK(直呼 messages.create,不依賴 claude CLI)
+ # 呼叫 Ollama 本地模型(111,零費用,按 agent 選模型)
+ model = _pick_model(agent_name)
success = False
error_type: str | None = None
try:
- _client = _anthropic.AsyncAnthropic(api_key=settings.CLAUDE_API_KEY or None)
- _msg = await _client.messages.create(
- model="claude-haiku-4-5-20251001",
- max_tokens=1500,
- system=system_prompt,
- messages=[{"role": "user", "content": prompt_with_ctx}],
- )
- result_text = _msg.content[0].text if _msg.content else ""
+ ollama_base = getattr(settings, "OLLAMA_URL", "http://192.168.0.111:11434")
+ async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
+ resp = await _hc.post(
+ f"{ollama_base}/api/chat",
+ json={
+ "model": model,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": prompt_with_ctx},
+ ],
+ "stream": False,
+ "options": {"num_predict": 1500, "temperature": 0.3},
+ },
+ )
+ resp.raise_for_status()
+ result_text = resp.json().get("message", {}).get("content", "")
+ result_text = _strip_think_tags(result_text)
if not result_text:
result_text = "_Agent 回應為空,請稍後再試。_"
success = True
@@ -241,9 +283,10 @@ async def process_nl_message(
except Exception as exc:
error_type = type(exc).__name__
logger.error(
- "hermes_nl_sdk_error",
+ "hermes_nl_ollama_error",
error=str(exc),
agent=agent_name,
+ model=model,
exc_type=error_type,
)
result_text = f"_Hermes 暫時無法連線({error_type}),請稍後再試。_"
@@ -252,6 +295,7 @@ async def process_nl_message(
logger.info(
"hermes_nl_dispatch",
agent=agent_name,
+ model=model,
user_id=user_id,
chat_id=chat_id,
username=username,