fix: restore Hermes to 111+hermes3 + add NVIDIA NIM auto-fallback for OpenClaw
All checks were successful
CD Pipeline / deploy (push) Successful in 3m0s
All checks were successful
CD Pipeline / deploy (push) Successful in 3m0s
Hermes was wrongly redirected to 188 (CPU-only, 60s+ timeout). 111 has hermes3:latest with GPU acceleration (~10s response). OpenClaw now auto-detects: 1. Gemini (primary, when GEMINI_API_KEY set) 2. NVIDIA NIM nemotron-ultra (auto-fallback, NVIDIA_API_KEY already set) 3. Friendly error only when both are unavailable This implements the user-requested auto-failover pattern: always try primary first, silently fall back, restore automatically when primary recovers. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1637,7 +1637,7 @@ def api_icaim_trigger():
|
||||
|
||||
if result.threats:
|
||||
hermes_stats = {
|
||||
'model': 'qwen2.5:7b-instruct',
|
||||
'model': 'hermes3:latest',
|
||||
'duration_sec': hermes_duration,
|
||||
'tokens': result.hermes_tokens,
|
||||
}
|
||||
|
||||
@@ -69,7 +69,7 @@ class ElephantAlphaOrchestrator:
|
||||
self.agents = {
|
||||
"hermes": AgentCapability(
|
||||
name="Hermes Analyst",
|
||||
model="qwen2.5:7b-instruct",
|
||||
model="hermes3:latest",
|
||||
strengths=["price_competition_analysis", "threat_detection", "market_intelligence"],
|
||||
limitations=["context_window", "real_time_data"],
|
||||
cost_per_token=0.0,
|
||||
@@ -112,7 +112,7 @@ CURRENT ARCHITECTURE:
|
||||
- Your role: Autonomous decision-making and agent orchestration
|
||||
|
||||
AGENT CAPABILITIES:
|
||||
1. HERMES (qwen2.5:7b-instruct)
|
||||
1. HERMES (hermes3:latest)
|
||||
- Strengths: Price competition analysis, threat detection, market intelligence
|
||||
- Limitations: Limited context window, no real-time data access
|
||||
- Best for: Analyzing large datasets, identifying patterns, threat assessment
|
||||
|
||||
@@ -25,8 +25,8 @@ from sqlalchemy import text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HERMES_MODEL = "qwen2.5:7b-instruct"
|
||||
HERMES_URL = "http://192.168.0.188:11434"
|
||||
HERMES_MODEL = "hermes3:latest"
|
||||
HERMES_URL = "http://192.168.0.111:11434"
|
||||
HERMES_TIMEOUT = 120 # 秒,批量 300 筆最長預估 ~90s
|
||||
TOP_N = 20 # 輸出前 N 個威脅,控制 NemoTron 每次消耗配額
|
||||
|
||||
@@ -154,7 +154,7 @@ class HermesAnalystService:
|
||||
resp = requests.post(
|
||||
f"{HERMES_URL}/api/generate",
|
||||
json=payload,
|
||||
timeout=30, # 意圖分類,qwen2.5 首次推理可能需 ~20s
|
||||
timeout=20, # 意圖分類,hermes3 on 111 實測 ~10s
|
||||
)
|
||||
resp.raise_for_status()
|
||||
raw = (resp.json().get("response", "") or "").strip()
|
||||
|
||||
@@ -24,6 +24,7 @@ OpenClaw 戰略分析師(Gemini 2.5 Flash)
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -34,6 +35,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
|
||||
STRATEGY_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash-preview-05-20")
|
||||
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
|
||||
NVIDIA_NIM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
||||
NVIDIA_FALLBACK_MODEL = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
|
||||
TAIPEI_TZ_OFFSET = 8 # UTC+8
|
||||
|
||||
__all__ = [
|
||||
@@ -63,34 +67,32 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N
|
||||
if not q:
|
||||
return "請輸入您的問題,例如:本週業績趨勢、競品價差分析、產出週報 PPT。"
|
||||
|
||||
if not GEMINI_API_KEY:
|
||||
return (
|
||||
"OpenClaw 策略師目前離線(未設定 GEMINI_API_KEY)。\n"
|
||||
"您可直接輸入以下指令取得報告:\n"
|
||||
"• /daily — 每日業績\n"
|
||||
"• /weekly — 週報\n"
|
||||
"• /threats — 最新競價威脅\n"
|
||||
"• /help — 完整功能說明"
|
||||
)
|
||||
|
||||
system_prompt = (
|
||||
"你是 MOMO Pro 電商情報策略師。以繁體中文(台灣用語)回覆使用者。"
|
||||
"你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。"
|
||||
"嚴禁簡體字,嚴禁空洞套話。若使用者要求的資料需即時查詢,"
|
||||
"請告知使用者相關可用指令(例如 /daily、/weekly、/threats)。"
|
||||
"回覆長度控制在 500 字內,可用 Markdown 條列。"
|
||||
)
|
||||
user_prompt = f"使用者問題:{q}\n上下文:{json.dumps(context or {}, ensure_ascii=False)}"
|
||||
|
||||
try:
|
||||
text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
|
||||
except Exception as e:
|
||||
logger.error("[OpenClaw] generate_strategy_response 例外:%s", e)
|
||||
text_reply = None
|
||||
# 優先 Gemini;無 key 或失敗時自動備援 NVIDIA NIM
|
||||
text_reply = None
|
||||
if GEMINI_API_KEY:
|
||||
try:
|
||||
text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
|
||||
except Exception as e:
|
||||
logger.warning("[OpenClaw] Gemini 呼叫失敗,備援 NVIDIA NIM:%s", e)
|
||||
|
||||
if not text_reply and NVIDIA_API_KEY:
|
||||
try:
|
||||
text_reply = _call_nvidia_nim(system_prompt, user_prompt)
|
||||
except Exception as e:
|
||||
logger.error("[OpenClaw] NVIDIA NIM 備援也失敗:%s", e)
|
||||
|
||||
if not text_reply:
|
||||
return (
|
||||
"策略師暫時無法回覆(模型呼叫逾時或失敗)。\n"
|
||||
"您可改用:/daily、/weekly、/threats 取得結構化報告。"
|
||||
"策略師暫時無法回覆(Gemini 與 NVIDIA NIM 均離線)。\n"
|
||||
"請改用:/daily、/weekly、/threats 取得結構化報告。"
|
||||
)
|
||||
return text_reply
|
||||
|
||||
@@ -349,6 +351,35 @@ def _call_gemini(system_prompt: str, user_prompt: str, temperature: float = 0.4)
|
||||
return None
|
||||
|
||||
|
||||
def _call_nvidia_nim(system_prompt: str, user_prompt: str, temperature: float = 0.5) -> Optional[str]:
|
||||
"""Gemini 離線時備援 NVIDIA NIM,回傳文字;失敗回傳 None"""
|
||||
if not NVIDIA_API_KEY:
|
||||
return None
|
||||
try:
|
||||
resp = requests.post(
|
||||
NVIDIA_NIM_URL,
|
||||
headers={
|
||||
"Authorization": f"Bearer {NVIDIA_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": NVIDIA_FALLBACK_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": temperature,
|
||||
"max_tokens": 1024,
|
||||
},
|
||||
timeout=60,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# Telegram 推播
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Reference in New Issue
Block a user