diff --git a/routes/ai_routes.py b/routes/ai_routes.py index 4a4cbbe..f5d8018 100644 --- a/routes/ai_routes.py +++ b/routes/ai_routes.py @@ -1637,7 +1637,7 @@ def api_icaim_trigger(): if result.threats: hermes_stats = { - 'model': 'qwen2.5:7b-instruct', + 'model': 'hermes3:latest', 'duration_sec': hermes_duration, 'tokens': result.hermes_tokens, } diff --git a/services/elephant_alpha_orchestrator.py b/services/elephant_alpha_orchestrator.py index 81f9c70..ad73abc 100644 --- a/services/elephant_alpha_orchestrator.py +++ b/services/elephant_alpha_orchestrator.py @@ -69,7 +69,7 @@ class ElephantAlphaOrchestrator: self.agents = { "hermes": AgentCapability( name="Hermes Analyst", - model="qwen2.5:7b-instruct", + model="hermes3:latest", strengths=["price_competition_analysis", "threat_detection", "market_intelligence"], limitations=["context_window", "real_time_data"], cost_per_token=0.0, @@ -112,7 +112,7 @@ CURRENT ARCHITECTURE: - Your role: Autonomous decision-making and agent orchestration AGENT CAPABILITIES: -1. HERMES (qwen2.5:7b-instruct) +1. HERMES (hermes3:latest) - Strengths: Price competition analysis, threat detection, market intelligence - Limitations: Limited context window, no real-time data access - Best for: Analyzing large datasets, identifying patterns, threat assessment diff --git a/services/hermes_analyst_service.py b/services/hermes_analyst_service.py index 6b89064..730d70b 100644 --- a/services/hermes_analyst_service.py +++ b/services/hermes_analyst_service.py @@ -25,8 +25,8 @@ from sqlalchemy import text logger = logging.getLogger(__name__) -HERMES_MODEL = "qwen2.5:7b-instruct" -HERMES_URL = "http://192.168.0.188:11434" +HERMES_MODEL = "hermes3:latest" +HERMES_URL = "http://192.168.0.111:11434" HERMES_TIMEOUT = 120 # 秒,批量 300 筆最長預估 ~90s TOP_N = 20 # 輸出前 N 個威脅,控制 NemoTron 每次消耗配額 @@ -154,7 +154,7 @@ class HermesAnalystService: resp = requests.post( f"{HERMES_URL}/api/generate", json=payload, - timeout=30, # 意圖分類,qwen2.5 首次推理可能需 ~20s + timeout=20, # 意圖分類,hermes3 on 111 實測 ~10s ) resp.raise_for_status() raw = (resp.json().get("response", "") or "").strip() diff --git a/services/openclaw_strategist_service.py b/services/openclaw_strategist_service.py index c59d9d5..f8e34d9 100644 --- a/services/openclaw_strategist_service.py +++ b/services/openclaw_strategist_service.py @@ -24,6 +24,7 @@ OpenClaw 戰略分析師(Gemini 2.5 Flash) import json import logging import os +import requests from datetime import datetime, timedelta from typing import Any, Dict, List, Optional @@ -34,6 +35,9 @@ logger = logging.getLogger(__name__) GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") STRATEGY_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash-preview-05-20") +NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "") +NVIDIA_NIM_URL = "https://integrate.api.nvidia.com/v1/chat/completions" +NVIDIA_FALLBACK_MODEL = "nvidia/llama-3.1-nemotron-ultra-253b-v1" TAIPEI_TZ_OFFSET = 8 # UTC+8 __all__ = [ @@ -63,34 +67,32 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N if not q: return "請輸入您的問題,例如:本週業績趨勢、競品價差分析、產出週報 PPT。" - if not GEMINI_API_KEY: - return ( - "OpenClaw 策略師目前離線(未設定 GEMINI_API_KEY)。\n" - "您可直接輸入以下指令取得報告:\n" - "• /daily — 每日業績\n" - "• /weekly — 週報\n" - "• /threats — 最新競價威脅\n" - "• /help — 完整功能說明" - ) - system_prompt = ( - "你是 MOMO Pro 電商情報策略師。以繁體中文(台灣用語)回覆使用者。" + "你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。" "嚴禁簡體字,嚴禁空洞套話。若使用者要求的資料需即時查詢," "請告知使用者相關可用指令(例如 /daily、/weekly、/threats)。" "回覆長度控制在 500 字內,可用 Markdown 條列。" ) user_prompt = f"使用者問題:{q}\n上下文:{json.dumps(context or {}, ensure_ascii=False)}" - try: - text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5) - except Exception as e: - logger.error("[OpenClaw] generate_strategy_response 例外:%s", e) - text_reply = None + # 優先 Gemini;無 key 或失敗時自動備援 NVIDIA NIM + text_reply = None + if GEMINI_API_KEY: + try: + text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5) + except Exception as e: + logger.warning("[OpenClaw] Gemini 呼叫失敗,備援 NVIDIA NIM:%s", e) + + if not text_reply and NVIDIA_API_KEY: + try: + text_reply = _call_nvidia_nim(system_prompt, user_prompt) + except Exception as e: + logger.error("[OpenClaw] NVIDIA NIM 備援也失敗:%s", e) if not text_reply: return ( - "策略師暫時無法回覆(模型呼叫逾時或失敗)。\n" - "您可改用:/daily、/weekly、/threats 取得結構化報告。" + "策略師暫時無法回覆(Gemini 與 NVIDIA NIM 均離線)。\n" + "請改用:/daily、/weekly、/threats 取得結構化報告。" ) return text_reply @@ -349,6 +351,35 @@ def _call_gemini(system_prompt: str, user_prompt: str, temperature: float = 0.4) return None +def _call_nvidia_nim(system_prompt: str, user_prompt: str, temperature: float = 0.5) -> Optional[str]: + """Gemini 離線時備援 NVIDIA NIM,回傳文字;失敗回傳 None""" + if not NVIDIA_API_KEY: + return None + try: + resp = requests.post( + NVIDIA_NIM_URL, + headers={ + "Authorization": f"Bearer {NVIDIA_API_KEY}", + "Content-Type": "application/json", + }, + json={ + "model": NVIDIA_FALLBACK_MODEL, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": temperature, + "max_tokens": 1024, + }, + timeout=60, + ) + resp.raise_for_status() + return resp.json()["choices"][0]["message"]["content"] + except Exception as e: + logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e) + return None + + # ═══════════════════════════════════════════════════════════════════════════════ # Telegram 推播 # ═══════════════════════════════════════════════════════════════════════════════