fix(chat): OpenClaw 改用 Ollama qwen2.5 做對話 + NemoClaw 加 Ollama fallback

問題: _call_openclaw 用 analyze/incident API → 回覆是告警格式，不是自然語言修法: 1. OpenClaw chat → Ollama qwen2.5:7b-instruct (本地，快速，無格式污染) 2. NemoClaw → NIM 優先，超時 fallback 到 Ollama llama3.2:3b Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 18:30:31 +08:00
parent d1436157b7
commit 2da8da5a25
1 changed files with 51 additions and 18 deletions
--- a/apps/api/src/services/chat_manager.py
+++ b/apps/api/src/services/chat_manager.py
@@ -78,44 +78,52 @@ class ChatManager:

    async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
        """
-        呼叫 OpenClaw (192.168.0.188:8088)
+        呼叫 OpenClaw 對話 — 走 Ollama qwen2.5:7b-instruct (192.168.0.188:11434)

-        OpenClaw 是產品 AI 大腦，對話走 /api/v1/analyze/incident 的通用分析路徑
+        2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API，
+        不適合做自然語言對話（回覆會是告警格式）。
+        改用 Ollama 本地模型做 chat，速度快、無格式污染。
        """
        import httpx
        from src.core.config import get_settings
        settings = get_settings()

-        openclaw_url = getattr(settings, 'OPENCLAW_URL', 'http://192.168.0.188:8088')
-        openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 30.0))
+        ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
+        openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 40.0))
        try:
-            # OpenClaw 沒有通用 chat endpoint，用 analyze/incident 傳入對話內容
            async with httpx.AsyncClient(timeout=openclaw_timeout) as client:
                resp = await client.post(
-                    f"{openclaw_url}/api/v1/analyze/incident",
+                    f"{ollama_url}/api/chat",
                    json={
-                        "incident_id": "CHAT",
-                        "severity": "P3",
-                        "signals": [{"alert_name": "user_chat", "description": user_message[:800]}],
-                        "affected_services": ["interactive_chat"],
-                        "expert_context": {"system_prompt": system_prompt[:500]},
+                        "model": "qwen2.5:7b-instruct",
+                        "stream": False,
+                        "messages": [
+                            {"role": "system", "content": system_prompt},
+                            {"role": "user", "content": user_message},
+                        ],
+                        "options": {"num_predict": 300},
                    },
                )
                resp.raise_for_status()
                data = resp.json()
-                # 從 reasoning 取出自然語言回應
-                return data.get("reasoning") or data.get("description") or data.get("action_title")
+                return data.get("message", {}).get("content", "").strip() or None
        except Exception as e:
            logger.warning("openclaw_chat_failed", error=str(e))
            return None

    async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
        """
-        呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw)
+        呼叫 NemoClaw — NIM 優先，超時則 fallback 到 Ollama llama3.2:3b

-        NIM 免費 tier 延遲 11-45s，此方法可能需要 30-45s 才回應
+        2026-04-03 ogt: NIM 免費 tier 延遲 11-45s 且常超時，
+        加 Ollama fallback 確保 NemoClaw 一定有回應。
        """
+        import httpx
+        from src.core.config import get_settings as _get_settings
        from src.services.nvidia_provider import get_nvidia_provider
+        settings = _get_settings()
+
+        # 優先嘗試 NIM (timeout 20s，快速失敗)
        nvidia = get_nvidia_provider()
        try:
            full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
@@ -126,10 +134,35 @@ class ChatManager:
            )
            if success and response and "not configured" not in response and "Circuit Breaker" not in response:
                return response.strip()
-            return None
        except Exception as e:
-            logger.warning("nemotron_chat_failed", error=str(e))
-            return None
+            logger.warning("nemotron_nim_failed_fallback_ollama", error=str(e))
+
+        # Fallback: Ollama llama3.2:3b (本地，速度快)
+        ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.post(
+                    f"{ollama_url}/api/chat",
+                    json={
+                        "model": "llama3.2:3b",
+                        "stream": False,
+                        "messages": [
+                            {"role": "system", "content": system_prompt},
+                            {"role": "user", "content": user_message},
+                        ],
+                        "options": {"num_predict": 250},
+                    },
+                )
+                resp.raise_for_status()
+                data = resp.json()
+                result = data.get("message", {}).get("content", "").strip()
+                if result:
+                    logger.info("nemotron_ollama_fallback_used")
+                    return result
+        except Exception as e:
+            logger.warning("nemotron_ollama_fallback_failed", error=str(e))
+
+        return None

    async def generate_response(
        self,