feat(chat+monitor): 雙 AI 對話重寫 + Nemotron 健康監控告警

ChatManager 重寫 (Phase 22.6): - @openclaw <msg> → 只有 OpenClaw 回應 (Ollama qwen2.5:7b) - @nemo <msg> → 只有 NemoClaw 回應 (Gemini Flash) - 無前綴 → OpenClaw 先答，NemoClaw 評論/反駁 NemoClaw 改用 Gemini Flash (棄 NIM nemotron-mini-4b 因為 15s+ 回應時間) TelegramGateway 心跳新增 Nemotron 健康探測: - 每次心跳探測 NVIDIA NIM API (10s timeout) - 異常時立刻發 Telegram 告警 + 緩解指令 - 補足 Nemotron 100% 超時卻無告警的監控盲區 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 14:59:06 +08:00
parent c1834a7156
commit d9007e6855
2 changed files with 232 additions and 87 deletions
--- a/apps/api/src/services/chat_manager.py
+++ b/apps/api/src/services/chat_manager.py
@@ -1,115 +1,211 @@
 """
-AWOOOI Chat Manager - 統帥對話核心
-===================================
-Phase 21.5: 實作 Telegram 互動對話功能
+AWOOOI Chat Manager - 雙 AI 對話核心
+======================================
+Phase 21.5 初版: 2026-03-31 ogt
+Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話)

-職責:
-1. 整合系統上下文 (K3s 狀態, 最近告警, 目前時間)
-2. 決定對話風格 (OpenClaw 專業風 vs Nemo 參謀風)
-3. 調用 LLM (Nemo-4B / Gemini) 產出回應
-4. 遵守 SOUL.md Nothing.tech 純淨美學
+功能:
+1. @openclaw / @nemo 路由 — 指定 AI 回應
+2. 無前綴 — 兩個 AI 輪流回應，並互相評論
+3. AI 互相對話 — NemoClaw 看到 OpenClaw 的回應後可補充/反駁

-2026-03-31 ogt: 初版建立
+架構:
+- OpenClaw: 用 Ollama qwen2.5:7b-instruct (本地, 快)
+- NemoClaw: 用 Gemini Flash (雲端, 快) — NIM nemotron-mini 太慢 (15s+)
 """

 import structlog
-from datetime import datetime
 from src.utils.timezone import now_taipei
-from src.services.nvidia_provider import get_nvidia_provider
 from src.repositories.k8s_repository import get_k8s_repository
 from src.repositories.incident_repository import get_incident_repository

 logger = structlog.get_logger(__name__)

+# 人格設定
+OPENCLAW_PERSONA = """你是 OpenClaw，AWOOOI 平台的 SRE AI 主帥。
+個性: 精準、果斷、專業，像老將一樣直接給出建議。
+語氣: 簡短有力，不廢話。繁體中文回應。
+當 NemoClaw 有不同意見時，你會直接反駁或接受，不拐彎抹角。
+"""
+
+NEMOCLAW_PERSONA = """你是 NemoClaw，AWOOOI 平台的 AI 戰術參謀。
+個性: 分析型、喜歡從不同角度思考，會質疑假設。
+語氣: 帶點挑釁但建設性，繁體中文回應。
+當 OpenClaw 給出意見時，你會評估是否同意，必要時提出替代方案。
+"""
+
+
 class ChatManager:
-    """
-    AWOOOI 對話管理器 - 系統的大腦與聲帶
-    """
+    """AWOOOI 雙 AI 對話管理器"""

    def __init__(self):
-        self.nvidia = get_nvidia_provider()
        self.k8s = get_k8s_repository()
        self.incidents = get_incident_repository()

    async def get_system_context(self) -> str:
-        """
-        收集系統即時上下文，供 LLM 參考
-        """
+        """收集系統即時上下文"""
        now = now_taipei()
-        
-        # 1. K3s 狀態
-        k8s_status = await self.k8s.get_pod_status_summary(namespace="awoooi-prod")
-        cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
-        if k8s_status['problem_pods']:
-            cluster_info += f", {len(k8s_status['problem_pods'])} anomalies detected."
-
-        # 2. 最近告警 (取 3 筆)
-        active_incidents = await self.incidents.get_active()
-        incident_summary = "None"
-        if active_incidents:
-            lines = []
-            for inc in active_incidents[:3]:
-                lines.append(f"- {inc.incident_id}: {inc.status.value} (Severity: {inc.severity.value})")
-            incident_summary = "\n".join(lines)
-
-        context = f"""
-## Current System Context (Taipei Time: {now.strftime('%Y-%m-%d %H:%M:%S')})
- Environment: AWOOOI Production (K3s)
- {cluster_info}
- Active Incidents:
-{incident_summary}
-"""
-        return context
-
-    async def generate_response(
-        self, 
-        user_id: int, 
-        username: str, 
-        message_text: str
-    ) -> str:
-        """
-        根據統帥訊息產生回覆
-        """
-        system_context = await self.get_system_context()
-        
-        # 判定是否在跟 Nemo 對話
-        is_asking_nemo = "nemo" in message_text.lower()
-        
-        role_description = "You are OpenClaw, the AI operations assistant for AWOOOI platform."
-        if is_asking_nemo:
-            role_description = "You are Nemo-4B, the elite AI tactical advisor for AWOOOI. Address the user as 'Supreme Commander' (統帥)."
-
-        system_prompt = f"""{role_description}
-{system_context}
-
-## Guidelines:
-1. Keep responses extremely concise and professional (Nothing.tech aesthetic).
-2. For status queries, provide precise data.
-3. For general chat, be supportive but focused on operations.
-4. Language: Preferred Traditional Chinese (繁體中文).
-5. No emojis except for functional ones (🚨, ✅, 📊).
-"""

        try:
-            # 優先使用 NVIDIA Nemo-4B
-            response, success, tokens, cost = await self.nvidia.chat(
-                prompt=f"{system_prompt}\n\nCommander's Message: {message_text}",
-                model="nvidia/nemotron-mini-4b-instruct",
-                max_tokens=1024
-            )
-            
-            if not success:
-                return "🛸 抱歉統帥，Nemo 參謀暫時離線。請稍後再試。"
+            k8s_status = await self.k8s.get_pod_status_summary(namespace="awoooi-prod")
+            cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
+            if k8s_status.get('problem_pods'):
+                cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
+        except Exception:
+            cluster_info = "Cluster: 無法取得狀態"

-            return response.strip()
+        try:
+            active_incidents = await self.incidents.get_active()
+            if active_incidents:
+                lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
+                         for inc in active_incidents[:3]]
+                incident_summary = "\n".join(lines)
+            else:
+                incident_summary = "無活躍告警"
+        except Exception:
+            incident_summary = "無法取得告警"

+        return f"""## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)
+- {cluster_info}
+- 活躍告警: {incident_summary}
+"""
+
+    async def _call_ollama(self, system_prompt: str, user_message: str) -> str:
+        """呼叫 Ollama (OpenClaw 用)"""
+        import httpx
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.post(
+                    "http://192.168.0.188:11434/api/chat",
+                    json={
+                        "model": "qwen2.5:7b-instruct",
+                        "messages": [
+                            {"role": "system", "content": system_prompt},
+                            {"role": "user", "content": user_message},
+                        ],
+                        "stream": False,
+                        "options": {"temperature": 0.7, "num_predict": 512},
+                    },
+                )
+                resp.raise_for_status()
+                data = resp.json()
+                return data.get("message", {}).get("content", "").strip()
        except Exception as e:
-            logger.exception("chat_generation_error", error=str(e))
-            return "⚠️ 通訊鏈路異常，無法聯繫 AI 腦區。"
+            logger.warning("ollama_chat_failed", error=str(e))
+            return None
+
+    async def _call_gemini(self, system_prompt: str, user_message: str) -> str:
+        """呼叫 Gemini Flash (NemoClaw 用)"""
+        import httpx
+        from src.core.config import get_settings
+        settings = get_settings()
+
+        api_key = settings.GEMINI_API_KEY if hasattr(settings, 'GEMINI_API_KEY') else None
+        if not api_key:
+            return None
+
+        try:
+            full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
+            async with httpx.AsyncClient(timeout=20.0) as client:
+                resp = await client.post(
+                    f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
+                    json={
+                        "contents": [{"role": "user", "parts": [{"text": full_prompt}]}],
+                        "generationConfig": {"temperature": 0.8, "maxOutputTokens": 512},
+                    },
+                )
+                resp.raise_for_status()
+                data = resp.json()
+                return data["candidates"][0]["content"]["parts"][0]["text"].strip()
+        except Exception as e:
+            logger.warning("gemini_chat_failed", error=str(e))
+            return None
+
+    async def _openclaw_respond(self, context: str, message: str) -> str:
+        """OpenClaw 回應"""
+        system = f"{OPENCLAW_PERSONA}\n{context}"
+        result = await self._call_ollama(system, message)
+        if not result:
+            result = "🔴 OpenClaw 暫時離線，Ollama 無響應。"
+        return f"🦞 <b>OpenClaw:</b>\n{result}"
+
+    async def _nemoclaw_respond(self, context: str, message: str) -> str:
+        """NemoClaw 回應"""
+        system = f"{NEMOCLAW_PERSONA}\n{context}"
+        result = await self._call_gemini(system, message)
+        if not result:
+            # Gemini 失敗時 fallback 到 Ollama
+            result = await self._call_ollama(system, message)
+        if not result:
+            result = "🔴 NemoClaw 暫時離線。"
+        return f"🤖 <b>NemoClaw:</b>\n{result}"
+
+    async def _nemoclaw_comment_on(self, context: str, openclaw_response: str, original_msg: str) -> str:
+        """NemoClaw 評論 OpenClaw 的回應"""
+        message = f"""統帥問了: {original_msg}
+
+OpenClaw 的回應是:
+{openclaw_response}
+
+請你從 NemoClaw 的角度評論上面的回應。可以補充、反駁、或提出不同觀點。"""
+
+        system = f"{NEMOCLAW_PERSONA}\n{context}"
+        result = await self._call_gemini(system, message)
+        if not result:
+            result = await self._call_ollama(system, message)
+        if not result:
+            return None
+        return f"🤖 <b>NemoClaw 補充:</b>\n{result}"
+
+    async def generate_response(
+        self,
+        user_id: int,
+        username: str,
+        message_text: str,
+    ) -> str:
+        """
+        根據訊息內容決定回應模式:
+
+        @openclaw <msg>  → 只有 OpenClaw 回應
+        @nemo <msg>      → 只有 NemoClaw 回應
+        其他             → OpenClaw 先回，NemoClaw 評論
+        """
+        context = await self.get_system_context()
+        text = message_text.strip()
+
+        # 模式 1: 指定 OpenClaw
+        if text.lower().startswith("@openclaw"):
+            msg = text[9:].strip() or text
+            return await self._openclaw_respond(context, msg)
+
+        # 模式 2: 指定 NemoClaw
+        if text.lower().startswith("@nemo"):
+            msg = text[5:].strip() or text
+            return await self._nemoclaw_respond(context, msg)
+
+        # 模式 3: 雙 AI 對話
+        # Step 1: OpenClaw 先回
+        openclaw_raw = await self._call_ollama(
+            f"{OPENCLAW_PERSONA}\n{context}", text
+        )
+        if not openclaw_raw:
+            openclaw_raw = "Ollama 無響應，OpenClaw 暫時離線。"
+
+        openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw}"
+
+        # Step 2: NemoClaw 評論 OpenClaw 的回應
+        nemo_block = await self._nemoclaw_comment_on(context, openclaw_raw, text)
+
+        if nemo_block:
+            return f"{openclaw_block}\n\n{nemo_block}"
+        else:
+            return openclaw_block
+

 # Singleton
 _chat_manager = None

+
 def get_chat_manager() -> ChatManager:
    global _chat_manager
    if _chat_manager is None:
--- a/apps/api/src/services/telegram_gateway.py
+++ b/apps/api/src/services/telegram_gateway.py
@@ -2933,27 +2933,76 @@ class TelegramGateway:
 # Phase 6.5: 心跳監控方法
 # =============================================================================

+    async def _check_nemotron_health(self) -> tuple[bool, str]:
+        """
+        探測 Nemotron (NVIDIA NIM) 是否可用
+
+        2026-04-03 ogt: 新增 — Nemotron 100% 超時但沒有告警，補足監控盲區
+        Returns: (is_healthy, status_text)
+        """
+        import httpx
+        from src.core.config import get_settings
+        settings = get_settings()
+
+        api_key = settings.NVIDIA_API_KEY
+        if not api_key:
+            return False, "❌ NVIDIA_API_KEY 未設定"
+
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(
+                    "https://integrate.api.nvidia.com/v1/chat/completions",
+                    headers={"Authorization": f"Bearer {api_key}"},
+                    json={
+                        "model": "nvidia/nemotron-mini-4b-instruct",
+                        "messages": [{"role": "user", "content": "ping"}],
+                        "max_tokens": 1,
+                    },
+                )
+                if resp.status_code == 200:
+                    return True, "✅ 正常"
+                return False, f"❌ HTTP {resp.status_code}"
+        except httpx.TimeoutException:
+            return False, "⚠️ 超時 (>10s)"
+        except Exception as e:
+            return False, f"❌ {str(e)[:40]}"
+
    async def send_heartbeat(self) -> bool:
        """
-        發送心跳訊息 (系統狀態摘要)
+        發送心跳訊息 (系統狀態摘要，含 Nemotron 健康探測)

        每 30 分鐘執行一次，證明告警鏈路正常運作
+        2026-04-03 ogt: 加入 Nemotron 健康探測 — 補足監控盲區
        """
        try:
            if not self._initialized:
                await self.initialize()

-            # 心跳訊息 (2026-03-30 ogt: 改用台北時區，符合 feedback_timezone_taipei.md)
            from src.utils.timezone import now_taipei
            taipei_now = now_taipei()
+
+            # Nemotron 健康探測
+            nemo_ok, nemo_status = await self._check_nemotron_health()
+
            text = f"""💓 <b>AWOOOI 心跳</b>
 ⏰ {taipei_now.strftime('%Y-%m-%d %H:%M:%S')} (台北)
-📡 告警鏈路: ✅ 正常"""
+📡 告警鏈路: ✅ 正常
+🤖 Nemotron NIM: {nemo_status}"""

            await self.send_notification(text)
            self._last_message_time = datetime.now(UTC)

-            logger.info("telegram_heartbeat_sent")
+            # Nemotron 異常時額外發告警
+            if not nemo_ok:
+                await self.send_notification(
+                    f"🚨 <b>Nemotron 異常告警</b>\n\n"
+                    f"NVIDIA NIM API 不可用: <code>{nemo_status}</code>\n"
+                    f"影響: 所有 incident 的 Nemotron Tool Calling 將 100% 超時\n"
+                    f"緩解: <code>kubectl set env deployment/awoooi-api ENABLE_NEMOTRON_COLLABORATION=false -n awoooi-prod</code>"
+                )
+                logger.error("nemotron_health_alert_sent", status=nemo_status)
+
+            logger.info("telegram_heartbeat_sent", nemotron_ok=nemo_ok)
            return True

        except Exception as e: