diff --git a/apps/api/src/hermes/nl_gateway.py b/apps/api/src/hermes/nl_gateway.py index bfdd11b2..e68d9b53 100644 --- a/apps/api/src/hermes/nl_gateway.py +++ b/apps/api/src/hermes/nl_gateway.py @@ -1,19 +1,19 @@ """Hermes 自然語言閘道 — ADR-094 -Layer 1 意圖路由(關鍵字正則)→ Anthropic Python SDK 直呼 → Telegram 格式化輸出。 +Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型(111)→ Telegram 格式化輸出。 2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL) 2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL T1+T2+T3): hermes_dispatch_log DB 寫入 / Redis per-chat_id 速率限制 / Multi-turn session (Redis Hash TTL=300s) -2026-04-25 Claude Sonnet 4.6: 改用 anthropic Python SDK 直呼,棄用需要 CLI 的 - claude-agent-sdk(prod pod 無 claude CLI,sdk call 回傳空字串) +2026-04-25 Claude Sonnet 4.6: 改用 Ollama 本地模型(111),按 agent 類型選模型,零費用 + debugger/vuln → deepseek-r1:14b(推理); code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct """ from __future__ import annotations import asyncio import re import time -import anthropic as _anthropic +import httpx import structlog from sqlalchemy import text @@ -50,6 +50,38 @@ _ROUTING_RULES: list[tuple[re.Pattern, str]] = [ _RATE_LIMIT_MAX = 20 _RATE_LIMIT_WINDOW_SEC = 60 +# ───────────────────────────────────────────────────────────────────────────── +# Ollama 模型路由(按 agent 專業選最適模型,111 主機) +# ───────────────────────────────────────────────────────────────────────────── +_MODEL_BY_AGENT: dict[str, str] = { + # 推理型(找根因 / 安全分析)→ deepseek-r1:14b + "debugger": "deepseek-r1:14b", + "vuln-verifier": "deepseek-r1:14b", + # 程式碼專用(review / 實作 / 重構 / DB / 前端 / 工具)→ qwen2.5-coder:7b + "critic": "qwen2.5-coder:7b", + "db-expert": "qwen2.5-coder:7b", + "fullstack-engineer": "qwen2.5-coder:7b", + "refactor-specialist":"qwen2.5-coder:7b", + "migration-engineer": "qwen2.5-coder:7b", + "frontend-designer": "qwen2.5-coder:7b", + "tool-expert": "qwen2.5-coder:7b", + # 通用指令(規劃 / 導覽 / 文件)→ qwen2.5:7b-instruct + "planner": "qwen2.5:7b-instruct", + "onboarder": "qwen2.5:7b-instruct", + "web-researcher": "qwen2.5:7b-instruct", +} +_DEFAULT_MODEL = "deepseek-r1:14b" +_OLLAMA_TIMEOUT = 90.0 # deepseek-r1:14b 推理較慢,給 90s + + +def _pick_model(agent_name: str) -> str: + return _MODEL_BY_AGENT.get(agent_name, _DEFAULT_MODEL) + + +def _strip_think_tags(text: str) -> str: + """移除 deepseek-r1 的 ... 內部推理塊,只留最終回答。""" + return re.sub(r".*?", "", text, flags=re.DOTALL).strip() + def _route_intent_layer1(msg: str) -> str: """Layer 1: 關鍵字正則路由,回傳 agent 名稱""" @@ -221,19 +253,29 @@ async def process_nl_message( t0 = time.monotonic() - # 呼叫 Anthropic Python SDK(直呼 messages.create,不依賴 claude CLI) + # 呼叫 Ollama 本地模型(111,零費用,按 agent 選模型) + model = _pick_model(agent_name) success = False error_type: str | None = None try: - _client = _anthropic.AsyncAnthropic(api_key=settings.CLAUDE_API_KEY or None) - _msg = await _client.messages.create( - model="claude-haiku-4-5-20251001", - max_tokens=1500, - system=system_prompt, - messages=[{"role": "user", "content": prompt_with_ctx}], - ) - result_text = _msg.content[0].text if _msg.content else "" + ollama_base = getattr(settings, "OLLAMA_URL", "http://192.168.0.111:11434") + async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc: + resp = await _hc.post( + f"{ollama_base}/api/chat", + json={ + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt_with_ctx}, + ], + "stream": False, + "options": {"num_predict": 1500, "temperature": 0.3}, + }, + ) + resp.raise_for_status() + result_text = resp.json().get("message", {}).get("content", "") + result_text = _strip_think_tags(result_text) if not result_text: result_text = "_Agent 回應為空,請稍後再試。_" success = True @@ -241,9 +283,10 @@ async def process_nl_message( except Exception as exc: error_type = type(exc).__name__ logger.error( - "hermes_nl_sdk_error", + "hermes_nl_ollama_error", error=str(exc), agent=agent_name, + model=model, exc_type=error_type, ) result_text = f"_Hermes 暫時無法連線({error_type}),請稍後再試。_" @@ -252,6 +295,7 @@ async def process_nl_message( logger.info( "hermes_nl_dispatch", agent=agent_name, + model=model, user_id=user_id, chat_id=chat_id, username=username,