From 2da8da5a258839844d47da31fe02894fdf0fc47f Mon Sep 17 00:00:00 2001 From: OG T Date: Fri, 3 Apr 2026 18:30:31 +0800 Subject: [PATCH] =?UTF-8?q?fix(chat):=20OpenClaw=20=E6=94=B9=E7=94=A8=20Ol?= =?UTF-8?q?lama=20qwen2.5=20=E5=81=9A=E5=B0=8D=E8=A9=B1=20+=20NemoClaw=20?= =?UTF-8?q?=E5=8A=A0=20Ollama=20fallback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題: _call_openclaw 用 analyze/incident API → 回覆是告警格式,不是自然語言 修法: 1. OpenClaw chat → Ollama qwen2.5:7b-instruct (本地,快速,無格式污染) 2. NemoClaw → NIM 優先,超時 fallback 到 Ollama llama3.2:3b Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/chat_manager.py | 69 ++++++++++++++++++++------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py index 356bbef6..fb73f174 100644 --- a/apps/api/src/services/chat_manager.py +++ b/apps/api/src/services/chat_manager.py @@ -78,44 +78,52 @@ class ChatManager: async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 OpenClaw (192.168.0.188:8088) + 呼叫 OpenClaw 對話 — 走 Ollama qwen2.5:7b-instruct (192.168.0.188:11434) - OpenClaw 是產品 AI 大腦,對話走 /api/v1/analyze/incident 的通用分析路徑 + 2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API, + 不適合做自然語言對話(回覆會是告警格式)。 + 改用 Ollama 本地模型做 chat,速度快、無格式污染。 """ import httpx from src.core.config import get_settings settings = get_settings() - openclaw_url = getattr(settings, 'OPENCLAW_URL', 'http://192.168.0.188:8088') - openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 30.0)) + ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') + openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 40.0)) try: - # OpenClaw 沒有通用 chat endpoint,用 analyze/incident 傳入對話內容 async with httpx.AsyncClient(timeout=openclaw_timeout) as client: resp = await client.post( - f"{openclaw_url}/api/v1/analyze/incident", + f"{ollama_url}/api/chat", json={ - "incident_id": "CHAT", - "severity": "P3", - "signals": [{"alert_name": "user_chat", "description": user_message[:800]}], - "affected_services": ["interactive_chat"], - "expert_context": {"system_prompt": system_prompt[:500]}, + "model": "qwen2.5:7b-instruct", + "stream": False, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ], + "options": {"num_predict": 300}, }, ) resp.raise_for_status() data = resp.json() - # 從 reasoning 取出自然語言回應 - return data.get("reasoning") or data.get("description") or data.get("action_title") + return data.get("message", {}).get("content", "").strip() or None except Exception as e: logger.warning("openclaw_chat_failed", error=str(e)) return None async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw) + 呼叫 NemoClaw — NIM 優先,超時則 fallback 到 Ollama llama3.2:3b - NIM 免費 tier 延遲 11-45s,此方法可能需要 30-45s 才回應 + 2026-04-03 ogt: NIM 免費 tier 延遲 11-45s 且常超時, + 加 Ollama fallback 確保 NemoClaw 一定有回應。 """ + import httpx + from src.core.config import get_settings as _get_settings from src.services.nvidia_provider import get_nvidia_provider + settings = _get_settings() + + # 優先嘗試 NIM (timeout 20s,快速失敗) nvidia = get_nvidia_provider() try: full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" @@ -126,10 +134,35 @@ class ChatManager: ) if success and response and "not configured" not in response and "Circuit Breaker" not in response: return response.strip() - return None except Exception as e: - logger.warning("nemotron_chat_failed", error=str(e)) - return None + logger.warning("nemotron_nim_failed_fallback_ollama", error=str(e)) + + # Fallback: Ollama llama3.2:3b (本地,速度快) + ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') + try: + async with httpx.AsyncClient(timeout=30.0) as client: + resp = await client.post( + f"{ollama_url}/api/chat", + json={ + "model": "llama3.2:3b", + "stream": False, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ], + "options": {"num_predict": 250}, + }, + ) + resp.raise_for_status() + data = resp.json() + result = data.get("message", {}).get("content", "").strip() + if result: + logger.info("nemotron_ollama_fallback_used") + return result + except Exception as e: + logger.warning("nemotron_ollama_fallback_failed", error=str(e)) + + return None async def generate_response( self,