fix(hermes): 改用 Ollama 本地模型(111),零費用,按 agent 類型選模型
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
模型路由: debugger / vuln-verifier → deepseek-r1:14b (強推理,找根因/安全分析) critic / db-expert / coder 群 → qwen2.5-coder:7b (程式碼專用) planner / onboarder / web → qwen2.5:7b-instruct (通用指令) default → deepseek-r1:14b - _strip_think_tags(): 去除 deepseek-r1 <think> 推理塊,只留最終回答 - timeout=90s (deepseek-r1 推理較慢) - log 加 model 欄位供 latency 監控 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,19 +1,19 @@
|
|||||||
"""Hermes 自然語言閘道 — ADR-094
|
"""Hermes 自然語言閘道 — ADR-094
|
||||||
|
|
||||||
Layer 1 意圖路由(關鍵字正則)→ Anthropic Python SDK 直呼 → Telegram 格式化輸出。
|
Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型(111)→ Telegram 格式化輸出。
|
||||||
|
|
||||||
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL)
|
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL)
|
||||||
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL T1+T2+T3): hermes_dispatch_log DB 寫入 /
|
2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL T1+T2+T3): hermes_dispatch_log DB 寫入 /
|
||||||
Redis per-chat_id 速率限制 / Multi-turn session (Redis Hash TTL=300s)
|
Redis per-chat_id 速率限制 / Multi-turn session (Redis Hash TTL=300s)
|
||||||
2026-04-25 Claude Sonnet 4.6: 改用 anthropic Python SDK 直呼,棄用需要 CLI 的
|
2026-04-25 Claude Sonnet 4.6: 改用 Ollama 本地模型(111),按 agent 類型選模型,零費用
|
||||||
claude-agent-sdk(prod pod 無 claude CLI,sdk call 回傳空字串)
|
debugger/vuln → deepseek-r1:14b(推理); code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import anthropic as _anthropic
|
import httpx
|
||||||
import structlog
|
import structlog
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
@@ -50,6 +50,38 @@ _ROUTING_RULES: list[tuple[re.Pattern, str]] = [
|
|||||||
_RATE_LIMIT_MAX = 20
|
_RATE_LIMIT_MAX = 20
|
||||||
_RATE_LIMIT_WINDOW_SEC = 60
|
_RATE_LIMIT_WINDOW_SEC = 60
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# Ollama 模型路由(按 agent 專業選最適模型,111 主機)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
_MODEL_BY_AGENT: dict[str, str] = {
|
||||||
|
# 推理型(找根因 / 安全分析)→ deepseek-r1:14b
|
||||||
|
"debugger": "deepseek-r1:14b",
|
||||||
|
"vuln-verifier": "deepseek-r1:14b",
|
||||||
|
# 程式碼專用(review / 實作 / 重構 / DB / 前端 / 工具)→ qwen2.5-coder:7b
|
||||||
|
"critic": "qwen2.5-coder:7b",
|
||||||
|
"db-expert": "qwen2.5-coder:7b",
|
||||||
|
"fullstack-engineer": "qwen2.5-coder:7b",
|
||||||
|
"refactor-specialist":"qwen2.5-coder:7b",
|
||||||
|
"migration-engineer": "qwen2.5-coder:7b",
|
||||||
|
"frontend-designer": "qwen2.5-coder:7b",
|
||||||
|
"tool-expert": "qwen2.5-coder:7b",
|
||||||
|
# 通用指令(規劃 / 導覽 / 文件)→ qwen2.5:7b-instruct
|
||||||
|
"planner": "qwen2.5:7b-instruct",
|
||||||
|
"onboarder": "qwen2.5:7b-instruct",
|
||||||
|
"web-researcher": "qwen2.5:7b-instruct",
|
||||||
|
}
|
||||||
|
_DEFAULT_MODEL = "deepseek-r1:14b"
|
||||||
|
_OLLAMA_TIMEOUT = 90.0 # deepseek-r1:14b 推理較慢,給 90s
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_model(agent_name: str) -> str:
|
||||||
|
return _MODEL_BY_AGENT.get(agent_name, _DEFAULT_MODEL)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_think_tags(text: str) -> str:
|
||||||
|
"""移除 deepseek-r1 的 <think>...</think> 內部推理塊,只留最終回答。"""
|
||||||
|
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|
||||||
|
|
||||||
|
|
||||||
def _route_intent_layer1(msg: str) -> str:
|
def _route_intent_layer1(msg: str) -> str:
|
||||||
"""Layer 1: 關鍵字正則路由,回傳 agent 名稱"""
|
"""Layer 1: 關鍵字正則路由,回傳 agent 名稱"""
|
||||||
@@ -221,19 +253,29 @@ async def process_nl_message(
|
|||||||
|
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
# 呼叫 Anthropic Python SDK(直呼 messages.create,不依賴 claude CLI)
|
# 呼叫 Ollama 本地模型(111,零費用,按 agent 選模型)
|
||||||
|
model = _pick_model(agent_name)
|
||||||
success = False
|
success = False
|
||||||
error_type: str | None = None
|
error_type: str | None = None
|
||||||
try:
|
try:
|
||||||
_client = _anthropic.AsyncAnthropic(api_key=settings.CLAUDE_API_KEY or None)
|
ollama_base = getattr(settings, "OLLAMA_URL", "http://192.168.0.111:11434")
|
||||||
_msg = await _client.messages.create(
|
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
|
||||||
model="claude-haiku-4-5-20251001",
|
resp = await _hc.post(
|
||||||
max_tokens=1500,
|
f"{ollama_base}/api/chat",
|
||||||
system=system_prompt,
|
json={
|
||||||
messages=[{"role": "user", "content": prompt_with_ctx}],
|
"model": model,
|
||||||
)
|
"messages": [
|
||||||
result_text = _msg.content[0].text if _msg.content else ""
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": prompt_with_ctx},
|
||||||
|
],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 1500, "temperature": 0.3},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
result_text = resp.json().get("message", {}).get("content", "")
|
||||||
|
|
||||||
|
result_text = _strip_think_tags(result_text)
|
||||||
if not result_text:
|
if not result_text:
|
||||||
result_text = "_Agent 回應為空,請稍後再試。_"
|
result_text = "_Agent 回應為空,請稍後再試。_"
|
||||||
success = True
|
success = True
|
||||||
@@ -241,9 +283,10 @@ async def process_nl_message(
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
error_type = type(exc).__name__
|
error_type = type(exc).__name__
|
||||||
logger.error(
|
logger.error(
|
||||||
"hermes_nl_sdk_error",
|
"hermes_nl_ollama_error",
|
||||||
error=str(exc),
|
error=str(exc),
|
||||||
agent=agent_name,
|
agent=agent_name,
|
||||||
|
model=model,
|
||||||
exc_type=error_type,
|
exc_type=error_type,
|
||||||
)
|
)
|
||||||
result_text = f"_Hermes 暫時無法連線({error_type}),請稍後再試。_"
|
result_text = f"_Hermes 暫時無法連線({error_type}),請稍後再試。_"
|
||||||
@@ -252,6 +295,7 @@ async def process_nl_message(
|
|||||||
logger.info(
|
logger.info(
|
||||||
"hermes_nl_dispatch",
|
"hermes_nl_dispatch",
|
||||||
agent=agent_name,
|
agent=agent_name,
|
||||||
|
model=model,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
chat_id=chat_id,
|
chat_id=chat_id,
|
||||||
username=username,
|
username=username,
|
||||||
|
|||||||
Reference in New Issue
Block a user