fix: restore Hermes to 111+hermes3 + add NVIDIA NIM auto-fallback for OpenClaw

Hermes was wrongly redirected to 188 (CPU-only, 60s+ timeout). 111 has hermes3:latest with GPU acceleration (~10s response). OpenClaw now auto-detects: 1. Gemini (primary, when GEMINI_API_KEY set) 2. NVIDIA NIM nemotron-ultra (auto-fallback, NVIDIA_API_KEY already set) 3. Friendly error only when both are unavailable This implements the user-requested auto-failover pattern: always try primary first, silently fall back, restore automatically when primary recovers. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 10:31:00 +08:00
parent e9e0ddf54f
commit c299abba5d
4 changed files with 55 additions and 24 deletions
--- a/routes/ai_routes.py
+++ b/routes/ai_routes.py
@@ -1637,7 +1637,7 @@ def api_icaim_trigger():

            if result.threats:
                hermes_stats = {
-                    'model':        'qwen2.5:7b-instruct',
+                    'model':        'hermes3:latest',
                    'duration_sec': hermes_duration,
                    'tokens':       result.hermes_tokens,
                }
--- a/services/elephant_alpha_orchestrator.py
+++ b/services/elephant_alpha_orchestrator.py
@@ -69,7 +69,7 @@ class ElephantAlphaOrchestrator:
        self.agents = {
            "hermes": AgentCapability(
                name="Hermes Analyst",
-                model="qwen2.5:7b-instruct",
+                model="hermes3:latest",
                strengths=["price_competition_analysis", "threat_detection", "market_intelligence"],
                limitations=["context_window", "real_time_data"],
                cost_per_token=0.0,
@@ -112,7 +112,7 @@ CURRENT ARCHITECTURE:
 - Your role: Autonomous decision-making and agent orchestration

 AGENT CAPABILITIES:
-1. HERMES (qwen2.5:7b-instruct)
+1. HERMES (hermes3:latest)
   - Strengths: Price competition analysis, threat detection, market intelligence
   - Limitations: Limited context window, no real-time data access
   - Best for: Analyzing large datasets, identifying patterns, threat assessment
--- a/services/hermes_analyst_service.py
+++ b/services/hermes_analyst_service.py
@@ -25,8 +25,8 @@ from sqlalchemy import text

 logger = logging.getLogger(__name__)

-HERMES_MODEL = "qwen2.5:7b-instruct"
-HERMES_URL = "http://192.168.0.188:11434"
+HERMES_MODEL = "hermes3:latest"
+HERMES_URL = "http://192.168.0.111:11434"
 HERMES_TIMEOUT = 120  # 秒，批量 300 筆最長預估 ~90s
 TOP_N = 20  # 輸出前 N 個威脅，控制 NemoTron 每次消耗配額

@@ -154,7 +154,7 @@ class HermesAnalystService:
            resp = requests.post(
                f"{HERMES_URL}/api/generate",
                json=payload,
-                timeout=30,  # 意圖分類，qwen2.5 首次推理可能需 ~20s
+                timeout=20,  # 意圖分類，hermes3 on 111 實測 ~10s
            )
            resp.raise_for_status()
            raw = (resp.json().get("response", "") or "").strip()
--- a/services/openclaw_strategist_service.py
+++ b/services/openclaw_strategist_service.py
@@ -24,6 +24,7 @@ OpenClaw 戰略分析師（Gemini 2.5 Flash）
 import json
 import logging
 import os
+import requests
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional

@@ -34,6 +35,9 @@ logger = logging.getLogger(__name__)

 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
 STRATEGY_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash-preview-05-20")
+NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
+NVIDIA_NIM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
+NVIDIA_FALLBACK_MODEL = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
 TAIPEI_TZ_OFFSET = 8  # UTC+8

 __all__ = [
@@ -63,34 +67,32 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N
    if not q:
        return "請輸入您的問題，例如：本週業績趨勢、競品價差分析、產出週報 PPT。"

-    if not GEMINI_API_KEY:
-        return (
-            "OpenClaw 策略師目前離線（未設定 GEMINI_API_KEY）。\n"
-            "您可直接輸入以下指令取得報告：\n"
-            "• /daily — 每日業績\n"
-            "• /weekly — 週報\n"
-            "• /threats — 最新競價威脅\n"
-            "• /help — 完整功能說明"
-        )
-
    system_prompt = (
-        "你是 MOMO Pro 電商情報策略師。以繁體中文（台灣用語）回覆使用者。"
+        "你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文（台灣用語）回覆使用者。"
        "嚴禁簡體字，嚴禁空洞套話。若使用者要求的資料需即時查詢，"
        "請告知使用者相關可用指令（例如 /daily、/weekly、/threats）。"
        "回覆長度控制在 500 字內，可用 Markdown 條列。"
    )
    user_prompt = f"使用者問題：{q}\n上下文：{json.dumps(context or {}, ensure_ascii=False)}"

-    try:
-        text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
-    except Exception as e:
-        logger.error("[OpenClaw] generate_strategy_response 例外：%s", e)
-        text_reply = None
+    # 優先 Gemini；無 key 或失敗時自動備援 NVIDIA NIM
+    text_reply = None
+    if GEMINI_API_KEY:
+        try:
+            text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
+        except Exception as e:
+            logger.warning("[OpenClaw] Gemini 呼叫失敗，備援 NVIDIA NIM：%s", e)
+
+    if not text_reply and NVIDIA_API_KEY:
+        try:
+            text_reply = _call_nvidia_nim(system_prompt, user_prompt)
+        except Exception as e:
+            logger.error("[OpenClaw] NVIDIA NIM 備援也失敗：%s", e)

    if not text_reply:
        return (
-            "策略師暫時無法回覆（模型呼叫逾時或失敗）。\n"
-            "您可改用：/daily、/weekly、/threats 取得結構化報告。"
+            "策略師暫時無法回覆（Gemini 與 NVIDIA NIM 均離線）。\n"
+            "請改用：/daily、/weekly、/threats 取得結構化報告。"
        )
    return text_reply

@@ -349,6 +351,35 @@ def _call_gemini(system_prompt: str, user_prompt: str, temperature: float = 0.4)
        return None


+def _call_nvidia_nim(system_prompt: str, user_prompt: str, temperature: float = 0.5) -> Optional[str]:
+    """Gemini 離線時備援 NVIDIA NIM，回傳文字；失敗回傳 None"""
+    if not NVIDIA_API_KEY:
+        return None
+    try:
+        resp = requests.post(
+            NVIDIA_NIM_URL,
+            headers={
+                "Authorization": f"Bearer {NVIDIA_API_KEY}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": NVIDIA_FALLBACK_MODEL,
+                "messages": [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ],
+                "temperature": temperature,
+                "max_tokens": 1024,
+            },
+            timeout=60,
+        )
+        resp.raise_for_status()
+        return resp.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e)
+        return None
+
+
 # ═══════════════════════════════════════════════════════════════════════════════
 # Telegram 推播
 # ═══════════════════════════════════════════════════════════════════════════════