""" AWOOOI Chat Manager - 雙 AI 對話核心 ====================================== Phase 21.5 初版: 2026-03-31 ogt Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話) 架構: - OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答 - NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充 使用模式: @openclaw → 只有 OpenClaw 回應 @nemo → 只有 NemoClaw 回應 其他 → OpenClaw 先答,NemoClaw 評論 注意: NIM 免費 tier 延遲 11-45s,對話採異步模式: 先推 OpenClaw 回應,NemoClaw 完成後再補充 """ import asyncio import structlog from src.utils.timezone import now_taipei from src.repositories.k8s_repository import get_k8s_repository from src.repositories.incident_repository import get_incident_repository logger = structlog.get_logger(__name__) OPENCLAW_PERSONA = """你是 OpenClaw,AWOOOI 平台的 SRE AI 首席顧問。 個性: 精準、果斷、專業,像老將一樣直接給出建議。 語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。 稱呼用戶為「老闆」。 """ NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。 個性: 分析型、從不同角度思考,會質疑假設。 語氣: 帶點挑釁但建設性。不超過 200 字。 稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。 重要:你必須全程使用繁體中文回應,禁止使用英文或其他語言。 """ class ChatManager: """AWOOOI 雙 AI 對話管理器""" def __init__(self): pass # 2026-04-03 ogt: 移除 repo 實例化,leWOOOgo 規範禁止 Service 持有 repository async def get_system_context(self) -> str: """收集系統即時上下文""" now = now_taipei() k8s = get_k8s_repository() incidents = get_incident_repository() try: k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod") cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running" if k8s_status.get('problem_pods'): cluster_info += f", {len(k8s_status['problem_pods'])} 異常" except Exception: cluster_info = "Cluster: 無法取得狀態" try: active_incidents = await incidents.get_active() if active_incidents: lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})" for inc in active_incidents[:3]] incident_summary = "\n".join(lines) else: incident_summary = "無活躍告警" except Exception: incident_summary = "無法取得告警" return ( f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n" f"- {cluster_info}\n" f"- 活躍告警: {incident_summary}\n" ) async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None: """ 呼叫 OpenClaw 對話 — 走 Ollama qwen2.5:7b-instruct (192.168.0.188:11434) 2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API, 不適合做自然語言對話(回覆會是告警格式)。 改用 Ollama 本地模型做 chat,速度快、無格式污染。 """ import httpx from src.core.config import get_settings settings = get_settings() ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 40.0)) try: async with httpx.AsyncClient(timeout=openclaw_timeout) as client: resp = await client.post( f"{ollama_url}/api/chat", json={ "model": "qwen2.5:7b-instruct", "stream": False, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ], "options": {"num_predict": 300}, }, ) resp.raise_for_status() data = resp.json() return data.get("message", {}).get("content", "").strip() or None except Exception as e: logger.warning("openclaw_chat_failed", error=str(e)) return None async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: """ 呼叫 NemoClaw — NIM 優先,超時則 fallback 到 Ollama llama3.2:3b 2026-04-03 ogt: NIM 免費 tier 延遲 11-45s 且常超時, 加 Ollama fallback 確保 NemoClaw 一定有回應。 """ import httpx from src.core.config import get_settings as _get_settings from src.services.nvidia_provider import get_nvidia_provider settings = _get_settings() # 優先嘗試 NIM (timeout 20s,快速失敗) nvidia = get_nvidia_provider() try: full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" response, success, _, _ = await nvidia.chat( prompt=full_prompt, model="nvidia/nemotron-mini-4b-instruct", max_tokens=300, ) if success and response and "not configured" not in response and "Circuit Breaker" not in response: return response.strip() except Exception as e: logger.warning("nemotron_nim_failed_fallback_ollama", error=str(e)) # Fallback: Ollama llama3.2:3b (本地,速度快) ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( f"{ollama_url}/api/chat", json={ "model": "llama3.2:3b", "stream": False, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ], "options": {"num_predict": 250}, }, ) resp.raise_for_status() data = resp.json() result = data.get("message", {}).get("content", "").strip() if result: logger.info("nemotron_ollama_fallback_used") return result except Exception as e: logger.warning("nemotron_ollama_fallback_failed", error=str(e)) return None async def generate_response( self, user_id: int, # noqa: ARG002 username: str, # noqa: ARG002 message_text: str, ) -> str: """ 根據訊息決定回應模式: @openclaw → 只有 OpenClaw 回應 @nemo → 只有 NemoClaw 回應 其他 → OpenClaw 先回,NemoClaw 異步補充 """ context = await self.get_system_context() text = message_text.strip() # 模式 1: 指定 OpenClaw if text.lower().startswith("@openclaw"): msg = text[9:].strip() or text result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg) return f"🦞 OpenClaw:\n{result or '🔴 OpenClaw 無響應'}" # 模式 2: 指定 NemoClaw if text.lower().startswith("@nemo"): msg = text[5:].strip() or text result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg) return f"🤖 NemoClaw:\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}" # 模式 3: 雙 AI — OpenClaw 先答,NemoClaw 並行 openclaw_task = asyncio.create_task( self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text) ) nemo_task = asyncio.create_task( self._call_nemotron( f"{NEMOCLAW_PERSONA}\n{context}", f"統帥問了: {text}\n\n請從 NemoClaw 角度補充或評論。", ) ) # OpenClaw 最多等 40s(含 context 取得時間),NemoClaw 最多等 60s # 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak try: openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0) except asyncio.TimeoutError: openclaw_raw = None openclaw_block = f"🦞 OpenClaw:\n{openclaw_raw or '🔴 無響應'}" try: nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0) except asyncio.TimeoutError: nemo_raw = None if nemo_raw: return f"{openclaw_block}\n\n🤖 NemoClaw:\n{nemo_raw}" return openclaw_block # Singleton _chat_manager: ChatManager | None = None def get_chat_manager() -> ChatManager: global _chat_manager if _chat_manager is None: _chat_manager = ChatManager() return _chat_manager