fix(hermes): 改用 Ollama 本地模型(111),零費用,按 agent 類型選模型
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled

模型路由:
  debugger / vuln-verifier     → deepseek-r1:14b  (強推理,找根因/安全分析)
  critic / db-expert / coder 群 → qwen2.5-coder:7b (程式碼專用)
  planner / onboarder / web     → qwen2.5:7b-instruct (通用指令)
  default                       → deepseek-r1:14b

- _strip_think_tags(): 去除 deepseek-r1 <think> 推理塊,只留最終回答
- timeout=90s (deepseek-r1 推理較慢)
- log 加 model 欄位供 latency 監控

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Your Name
2026-04-25 03:13:59 +08:00
parent d467cac709
commit 250eca99c6

View File

@@ -1,19 +1,19 @@
"""Hermes 自然語言閘道 — ADR-094
Layer 1 意圖路由(關鍵字正則)→ Anthropic Python SDK 直呼 → Telegram 格式化輸出。
Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型111→ Telegram 格式化輸出。
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL)
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL T1+T2+T3): hermes_dispatch_log DB 寫入 /
Redis per-chat_id 速率限制 / Multi-turn session (Redis Hash TTL=300s)
2026-04-25 Claude Sonnet 4.6: 改用 anthropic Python SDK 直呼,棄用需要 CLI 的
claude-agent-sdkprod pod 無 claude CLIsdk call 回傳空字串)
2026-04-25 Claude Sonnet 4.6: 改用 Ollama 本地模型111按 agent 類型選模型,零費用
debugger/vuln → deepseek-r1:14b推理; code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
"""
from __future__ import annotations
import asyncio
import re
import time
import anthropic as _anthropic
import httpx
import structlog
from sqlalchemy import text
@@ -50,6 +50,38 @@ _ROUTING_RULES: list[tuple[re.Pattern, str]] = [
_RATE_LIMIT_MAX = 20
_RATE_LIMIT_WINDOW_SEC = 60
# ─────────────────────────────────────────────────────────────────────────────
# Ollama 模型路由(按 agent 專業選最適模型111 主機)
# ─────────────────────────────────────────────────────────────────────────────
_MODEL_BY_AGENT: dict[str, str] = {
# 推理型(找根因 / 安全分析)→ deepseek-r1:14b
"debugger": "deepseek-r1:14b",
"vuln-verifier": "deepseek-r1:14b",
# 程式碼專用review / 實作 / 重構 / DB / 前端 / 工具)→ qwen2.5-coder:7b
"critic": "qwen2.5-coder:7b",
"db-expert": "qwen2.5-coder:7b",
"fullstack-engineer": "qwen2.5-coder:7b",
"refactor-specialist":"qwen2.5-coder:7b",
"migration-engineer": "qwen2.5-coder:7b",
"frontend-designer": "qwen2.5-coder:7b",
"tool-expert": "qwen2.5-coder:7b",
# 通用指令(規劃 / 導覽 / 文件)→ qwen2.5:7b-instruct
"planner": "qwen2.5:7b-instruct",
"onboarder": "qwen2.5:7b-instruct",
"web-researcher": "qwen2.5:7b-instruct",
}
_DEFAULT_MODEL = "deepseek-r1:14b"
_OLLAMA_TIMEOUT = 90.0 # deepseek-r1:14b 推理較慢,給 90s
def _pick_model(agent_name: str) -> str:
return _MODEL_BY_AGENT.get(agent_name, _DEFAULT_MODEL)
def _strip_think_tags(text: str) -> str:
"""移除 deepseek-r1 的 <think>...</think> 內部推理塊,只留最終回答。"""
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
def _route_intent_layer1(msg: str) -> str:
"""Layer 1: 關鍵字正則路由,回傳 agent 名稱"""
@@ -221,19 +253,29 @@ async def process_nl_message(
t0 = time.monotonic()
# 呼叫 Anthropic Python SDK直呼 messages.create不依賴 claude CLI
# 呼叫 Ollama 本地模型111零費用按 agent 選模型
model = _pick_model(agent_name)
success = False
error_type: str | None = None
try:
_client = _anthropic.AsyncAnthropic(api_key=settings.CLAUDE_API_KEY or None)
_msg = await _client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=1500,
system=system_prompt,
messages=[{"role": "user", "content": prompt_with_ctx}],
)
result_text = _msg.content[0].text if _msg.content else ""
ollama_base = getattr(settings, "OLLAMA_URL", "http://192.168.0.111:11434")
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
resp = await _hc.post(
f"{ollama_base}/api/chat",
json={
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt_with_ctx},
],
"stream": False,
"options": {"num_predict": 1500, "temperature": 0.3},
},
)
resp.raise_for_status()
result_text = resp.json().get("message", {}).get("content", "")
result_text = _strip_think_tags(result_text)
if not result_text:
result_text = "_Agent 回應為空請稍後再試。_"
success = True
@@ -241,9 +283,10 @@ async def process_nl_message(
except Exception as exc:
error_type = type(exc).__name__
logger.error(
"hermes_nl_sdk_error",
"hermes_nl_ollama_error",
error=str(exc),
agent=agent_name,
model=model,
exc_type=error_type,
)
result_text = f"_Hermes 暫時無法連線({error_type}請稍後再試。_"
@@ -252,6 +295,7 @@ async def process_nl_message(
logger.info(
"hermes_nl_dispatch",
agent=agent_name,
model=model,
user_id=user_id,
chat_id=chat_id,
username=username,