diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py index 7ccd1b9b..e1a5749a 100644 --- a/apps/api/src/services/chat_manager.py +++ b/apps/api/src/services/chat_manager.py @@ -4,16 +4,20 @@ AWOOOI Chat Manager - 雙 AI 對話核心 Phase 21.5 初版: 2026-03-31 ogt Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話) -功能: -1. @openclaw → 只有 OpenClaw 回應 -2. @nemo → 只有 NemoClaw 回應 -3. 無前綴 → OpenClaw 先答,NemoClaw 評論/反駁 +架構: +- OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答 +- NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充 -後端: -- 雙 AI 皆用 Gemini Flash,靠不同 persona 區分人格 -- Ollama 188 目前卡死 (0 bytes/30s),待主機重啟後可切換回來 +使用模式: + @openclaw → 只有 OpenClaw 回應 + @nemo → 只有 NemoClaw 回應 + 其他 → OpenClaw 先答,NemoClaw 評論 + +注意: NIM 免費 tier 延遲 11-45s,對話採異步模式: + 先推 OpenClaw 回應,NemoClaw 完成後再補充 """ +import asyncio import structlog from src.utils.timezone import now_taipei from src.repositories.k8s_repository import get_k8s_repository @@ -23,14 +27,13 @@ logger = structlog.get_logger(__name__) OPENCLAW_PERSONA = """你是 OpenClaw,AWOOOI 平台的 SRE AI 主帥。 個性: 精準、果斷、專業,像老將一樣直接給出建議。 -語氣: 簡短有力,不廢話。繁體中文回應。 -當 NemoClaw 有不同意見時,你會直接反駁或接受,不拐彎抹角。 +語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。 """ -NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀。 -個性: 分析型、喜歡從不同角度思考,會質疑假設。 -語氣: 帶點挑釁但建設性,繁體中文回應。 -當 OpenClaw 給出意見時,你會評估是否同意,必要時提出替代方案。 +NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。 +個性: 分析型、從不同角度思考,會質疑假設。 +語氣: 帶點挑釁但建設性,繁體中文回應。不超過 200 字。 +評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。 """ @@ -70,77 +73,72 @@ class ChatManager: f"- 活躍告警: {incident_summary}\n" ) - async def _call_gemini(self, system_prompt: str, user_message: str, temperature: float = 0.7) -> str | None: + async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 Gemini Flash + 呼叫 OpenClaw (192.168.0.188:8088) - 2026-04-03 ogt: 雙 AI 皆走 Gemini,用不同 persona 區分 - OpenClaw temperature=0.5 (精準), NemoClaw temperature=0.9 (發散) + OpenClaw 是產品 AI 大腦,對話走 /api/v1/analyze/incident 的通用分析路徑 """ import httpx from src.core.config import get_settings settings = get_settings() - api_key = getattr(settings, 'GEMINI_API_KEY', None) - if not api_key: - logger.warning("gemini_api_key_not_set") - return None - + openclaw_url = getattr(settings, 'OPENCLAW_URL', 'http://192.168.0.188:8088') try: - full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" - async with httpx.AsyncClient(timeout=20.0) as client: + # OpenClaw 沒有通用 chat endpoint,用 analyze/incident 傳入對話內容 + async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( - f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}", + f"{openclaw_url}/api/v1/analyze/incident", json={ - "contents": [{"role": "user", "parts": [{"text": full_prompt}]}], - "generationConfig": {"temperature": temperature, "maxOutputTokens": 512}, + "incident_id": "CHAT", + "severity": "P3", + "signals": [{"alert_name": "user_chat", "description": user_message[:800]}], + "affected_services": ["interactive_chat"], + "expert_context": {"system_prompt": system_prompt[:500]}, }, ) resp.raise_for_status() data = resp.json() - return data["candidates"][0]["content"]["parts"][0]["text"].strip() + # 從 reasoning 取出自然語言回應 + return data.get("reasoning") or data.get("description") or data.get("action_title") except Exception as e: - logger.warning("gemini_chat_failed", error=str(e)) + logger.warning("openclaw_chat_failed", error=str(e)) return None - async def _openclaw_respond(self, context: str, message: str) -> str: - """OpenClaw 回應 (Gemini + OpenClaw persona, temperature=0.5)""" - result = await self._call_gemini(f"{OPENCLAW_PERSONA}\n{context}", message, temperature=0.5) - if not result: - result = "🔴 OpenClaw 暫時無法回應。" - return f"🦞 OpenClaw:\n{result}" + async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: + """ + 呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw) - async def _nemoclaw_respond(self, context: str, message: str) -> str: - """NemoClaw 回應 (Gemini + NemoClaw persona, temperature=0.9)""" - result = await self._call_gemini(f"{NEMOCLAW_PERSONA}\n{context}", message, temperature=0.9) - if not result: - result = "🔴 NemoClaw 暫時無法回應。" - return f"🤖 NemoClaw:\n{result}" - - async def _nemoclaw_comment_on(self, context: str, openclaw_response: str, original_msg: str) -> str | None: - """NemoClaw 評論 OpenClaw 的回應""" - message = ( - f"統帥問了: {original_msg}\n\n" - f"OpenClaw 剛才回應:\n{openclaw_response}\n\n" - f"請從 NemoClaw 角度評論。可以補充、反駁、或提出不同觀點。簡短有力。" - ) - result = await self._call_gemini(f"{NEMOCLAW_PERSONA}\n{context}", message, temperature=0.9) - if not result: + NIM 免費 tier 延遲 11-45s,此方法可能需要 30-45s 才回應 + """ + from src.services.nvidia_provider import get_nvidia_provider + nvidia = get_nvidia_provider() + try: + full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" + response, success, _, _ = await nvidia.chat( + prompt=full_prompt, + model="nvidia/nemotron-mini-4b-instruct", + max_tokens=300, + ) + if success and response and "not configured" not in response and "Circuit Breaker" not in response: + return response.strip() + return None + except Exception as e: + logger.warning("nemotron_chat_failed", error=str(e)) return None - return f"🤖 NemoClaw 補充:\n{result}" async def generate_response( self, - user_id: int, - username: str, + user_id: int, # noqa: ARG002 + username: str, # noqa: ARG002 message_text: str, ) -> str: """ - 根據訊息內容決定回應模式: + 根據訊息決定回應模式: @openclaw → 只有 OpenClaw 回應 @nemo → 只有 NemoClaw 回應 - 其他 → OpenClaw 先回,NemoClaw 補充/反駁 + 其他 → OpenClaw 先回,NemoClaw 異步補充 """ context = await self.get_system_context() text = message_text.strip() @@ -148,26 +146,41 @@ class ChatManager: # 模式 1: 指定 OpenClaw if text.lower().startswith("@openclaw"): msg = text[9:].strip() or text - return await self._openclaw_respond(context, msg) + result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg) + return f"🦞 OpenClaw:\n{result or '🔴 OpenClaw 無響應'}" # 模式 2: 指定 NemoClaw if text.lower().startswith("@nemo"): msg = text[5:].strip() or text - return await self._nemoclaw_respond(context, msg) + result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg) + return f"🤖 NemoClaw:\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}" - # 模式 3: 雙 AI 對話 — OpenClaw 先,NemoClaw 評論 - openclaw_raw = await self._call_gemini( - f"{OPENCLAW_PERSONA}\n{context}", text, temperature=0.5 + # 模式 3: 雙 AI — OpenClaw 先答,NemoClaw 並行 + openclaw_task = asyncio.create_task( + self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text) + ) + nemo_task = asyncio.create_task( + self._call_nemotron( + f"{NEMOCLAW_PERSONA}\n{context}", + f"統帥問了: {text}\n\n請從 NemoClaw 角度補充或評論。", + ) ) - if not openclaw_raw: - openclaw_raw = "Gemini 無響應,OpenClaw 暫時離線。" - openclaw_block = f"🦞 OpenClaw:\n{openclaw_raw}" + # OpenClaw 最多等 30s,NemoClaw 最多等 50s + try: + openclaw_raw = await asyncio.wait_for(asyncio.shield(openclaw_task), timeout=30.0) + except asyncio.TimeoutError: + openclaw_raw = None - nemo_block = await self._nemoclaw_comment_on(context, openclaw_raw, text) + openclaw_block = f"🦞 OpenClaw:\n{openclaw_raw or '🔴 無響應'}" - if nemo_block: - return f"{openclaw_block}\n\n{nemo_block}" + try: + nemo_raw = await asyncio.wait_for(nemo_task, timeout=50.0) + except asyncio.TimeoutError: + nemo_raw = None + + if nemo_raw: + return f"{openclaw_block}\n\n🤖 NemoClaw:\n{nemo_raw}" return openclaw_block diff --git a/apps/api/src/services/nvidia_provider.py b/apps/api/src/services/nvidia_provider.py index ec091072..f65aabac 100644 --- a/apps/api/src/services/nvidia_provider.py +++ b/apps/api/src/services/nvidia_provider.py @@ -117,9 +117,18 @@ NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions" # 預設模型 (2026-03-31 ogt: 恢復為 nemotron-mini-4b-instruct) NVIDIA_DEFAULT_MODEL = "nvidia/nemotron-mini-4b-instruct" -# 請求超時 (秒) - 2026-04-01 ogt: 30s 讓 Nemo 有機會回應,失敗後轉 Gemini -# 原 60s 太長;15s 太短 (Nemo 有時需 20-40s);30s 是實用平衡點 -NVIDIA_TIMEOUT = 30.0 +# 請求超時 (秒) +# 2026-04-01 ogt: 設為 30s (平衡點) +# 2026-04-03 ogt: 改從 config 讀取,與 NEMOTRON_TIMEOUT_SECONDS=45 對齊 +# Memory 記載 NIM 免費 tier 延遲 11-45s,30s 硬編碼導致慢請求全超時 +def _get_nvidia_timeout() -> float: + try: + from src.core.config import get_settings + return float(get_settings().NEMOTRON_TIMEOUT_SECONDS) + except Exception: + return 45.0 + +NVIDIA_TIMEOUT = _get_nvidia_timeout() # 重試次數 MAX_RETRIES = 2