fix: restore Hermes to 111+hermes3 + add NVIDIA NIM auto-fallback for OpenClaw
All checks were successful
CD Pipeline / deploy (push) Successful in 3m0s

Hermes was wrongly redirected to 188 (CPU-only, 60s+ timeout).
111 has hermes3:latest with GPU acceleration (~10s response).

OpenClaw now auto-detects:
  1. Gemini (primary, when GEMINI_API_KEY set)
  2. NVIDIA NIM nemotron-ultra (auto-fallback, NVIDIA_API_KEY already set)
  3. Friendly error only when both are unavailable

This implements the user-requested auto-failover pattern: always try
primary first, silently fall back, restore automatically when primary recovers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ogt
2026-04-25 10:31:00 +08:00
parent e9e0ddf54f
commit c299abba5d
4 changed files with 55 additions and 24 deletions

View File

@@ -1637,7 +1637,7 @@ def api_icaim_trigger():
if result.threats:
hermes_stats = {
'model': 'qwen2.5:7b-instruct',
'model': 'hermes3:latest',
'duration_sec': hermes_duration,
'tokens': result.hermes_tokens,
}

View File

@@ -69,7 +69,7 @@ class ElephantAlphaOrchestrator:
self.agents = {
"hermes": AgentCapability(
name="Hermes Analyst",
model="qwen2.5:7b-instruct",
model="hermes3:latest",
strengths=["price_competition_analysis", "threat_detection", "market_intelligence"],
limitations=["context_window", "real_time_data"],
cost_per_token=0.0,
@@ -112,7 +112,7 @@ CURRENT ARCHITECTURE:
- Your role: Autonomous decision-making and agent orchestration
AGENT CAPABILITIES:
1. HERMES (qwen2.5:7b-instruct)
1. HERMES (hermes3:latest)
- Strengths: Price competition analysis, threat detection, market intelligence
- Limitations: Limited context window, no real-time data access
- Best for: Analyzing large datasets, identifying patterns, threat assessment

View File

@@ -25,8 +25,8 @@ from sqlalchemy import text
logger = logging.getLogger(__name__)
HERMES_MODEL = "qwen2.5:7b-instruct"
HERMES_URL = "http://192.168.0.188:11434"
HERMES_MODEL = "hermes3:latest"
HERMES_URL = "http://192.168.0.111:11434"
HERMES_TIMEOUT = 120 # 秒,批量 300 筆最長預估 ~90s
TOP_N = 20 # 輸出前 N 個威脅,控制 NemoTron 每次消耗配額
@@ -154,7 +154,7 @@ class HermesAnalystService:
resp = requests.post(
f"{HERMES_URL}/api/generate",
json=payload,
timeout=30, # 意圖分類,qwen2.5 首次推理可能需 ~20s
timeout=20, # 意圖分類,hermes3 on 111 實測 ~10s
)
resp.raise_for_status()
raw = (resp.json().get("response", "") or "").strip()

View File

@@ -24,6 +24,7 @@ OpenClaw 戰略分析師Gemini 2.5 Flash
import json
import logging
import os
import requests
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
@@ -34,6 +35,9 @@ logger = logging.getLogger(__name__)
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
STRATEGY_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash-preview-05-20")
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
NVIDIA_NIM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
NVIDIA_FALLBACK_MODEL = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
TAIPEI_TZ_OFFSET = 8 # UTC+8
__all__ = [
@@ -63,34 +67,32 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N
if not q:
return "請輸入您的問題,例如:本週業績趨勢、競品價差分析、產出週報 PPT。"
if not GEMINI_API_KEY:
return (
"OpenClaw 策略師目前離線(未設定 GEMINI_API_KEY\n"
"您可直接輸入以下指令取得報告:\n"
"• /daily — 每日業績\n"
"• /weekly — 週報\n"
"• /threats — 最新競價威脅\n"
"• /help — 完整功能說明"
)
system_prompt = (
"你是 MOMO Pro 電商情報策略師。以繁體中文(台灣用語)回覆使用者。"
"你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。"
"嚴禁簡體字,嚴禁空洞套話。若使用者要求的資料需即時查詢,"
"請告知使用者相關可用指令(例如 /daily、/weekly、/threats"
"回覆長度控制在 500 字內,可用 Markdown 條列。"
)
user_prompt = f"使用者問題:{q}\n上下文:{json.dumps(context or {}, ensure_ascii=False)}"
try:
text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
except Exception as e:
logger.error("[OpenClaw] generate_strategy_response 例外:%s", e)
text_reply = None
# 優先 Gemini無 key 或失敗時自動備援 NVIDIA NIM
text_reply = None
if GEMINI_API_KEY:
try:
text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
except Exception as e:
logger.warning("[OpenClaw] Gemini 呼叫失敗,備援 NVIDIA NIM%s", e)
if not text_reply and NVIDIA_API_KEY:
try:
text_reply = _call_nvidia_nim(system_prompt, user_prompt)
except Exception as e:
logger.error("[OpenClaw] NVIDIA NIM 備援也失敗:%s", e)
if not text_reply:
return (
"策略師暫時無法回覆(模型呼叫逾時或失敗)。\n"
"您可改用:/daily、/weekly、/threats 取得結構化報告。"
"策略師暫時無法回覆(Gemini 與 NVIDIA NIM 均離線)。\n"
"改用:/daily、/weekly、/threats 取得結構化報告。"
)
return text_reply
@@ -349,6 +351,35 @@ def _call_gemini(system_prompt: str, user_prompt: str, temperature: float = 0.4)
return None
def _call_nvidia_nim(system_prompt: str, user_prompt: str, temperature: float = 0.5) -> Optional[str]:
"""Gemini 離線時備援 NVIDIA NIM回傳文字失敗回傳 None"""
if not NVIDIA_API_KEY:
return None
try:
resp = requests.post(
NVIDIA_NIM_URL,
headers={
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json",
},
json={
"model": NVIDIA_FALLBACK_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"temperature": temperature,
"max_tokens": 1024,
},
timeout=60,
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
except Exception as e:
logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e)
return None
# ═══════════════════════════════════════════════════════════════════════════════
# Telegram 推播
# ═══════════════════════════════════════════════════════════════════════════════