fix(chat+nvidia): 還原 OpenClaw+Nemotron 架構 + 修 30s timeout 根因
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
ChatManager 還原: - OpenClaw (188:8088) 負責 RCA 仲裁,不改用 Gemini (未經批准) - NemoClaw (NVIDIA NIM nemotron-mini-4b) 負責補充/評論 - 雙 AI 並行執行,OpenClaw 30s / NemoClaw 50s timeout - 支援 @openclaw / @nemo 指定對象 nvidia_provider.py 修 timeout 根因: - NVIDIA_TIMEOUT 從硬編碼 30.0 改為讀 NEMOTRON_TIMEOUT_SECONDS (45s) - Memory 記載 NIM 免費 tier 延遲 11-45s,30s 硬編碼導致慢請求全超時 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,16 +4,20 @@ AWOOOI Chat Manager - 雙 AI 對話核心
|
||||
Phase 21.5 初版: 2026-03-31 ogt
|
||||
Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話)
|
||||
|
||||
功能:
|
||||
1. @openclaw <msg> → 只有 OpenClaw 回應
|
||||
2. @nemo <msg> → 只有 NemoClaw 回應
|
||||
3. 無前綴 → OpenClaw 先答,NemoClaw 評論/反駁
|
||||
架構:
|
||||
- OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答
|
||||
- NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充
|
||||
|
||||
後端:
|
||||
- 雙 AI 皆用 Gemini Flash,靠不同 persona 區分人格
|
||||
- Ollama 188 目前卡死 (0 bytes/30s),待主機重啟後可切換回來
|
||||
使用模式:
|
||||
@openclaw <msg> → 只有 OpenClaw 回應
|
||||
@nemo <msg> → 只有 NemoClaw 回應
|
||||
其他 → OpenClaw 先答,NemoClaw 評論
|
||||
|
||||
注意: NIM 免費 tier 延遲 11-45s,對話採異步模式:
|
||||
先推 OpenClaw 回應,NemoClaw 完成後再補充
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import structlog
|
||||
from src.utils.timezone import now_taipei
|
||||
from src.repositories.k8s_repository import get_k8s_repository
|
||||
@@ -23,14 +27,13 @@ logger = structlog.get_logger(__name__)
|
||||
|
||||
OPENCLAW_PERSONA = """你是 OpenClaw,AWOOOI 平台的 SRE AI 主帥。
|
||||
個性: 精準、果斷、專業,像老將一樣直接給出建議。
|
||||
語氣: 簡短有力,不廢話。繁體中文回應。
|
||||
當 NemoClaw 有不同意見時,你會直接反駁或接受,不拐彎抹角。
|
||||
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
|
||||
"""
|
||||
|
||||
NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀。
|
||||
個性: 分析型、喜歡從不同角度思考,會質疑假設。
|
||||
語氣: 帶點挑釁但建設性,繁體中文回應。
|
||||
當 OpenClaw 給出意見時,你會評估是否同意,必要時提出替代方案。
|
||||
NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。
|
||||
個性: 分析型、從不同角度思考,會質疑假設。
|
||||
語氣: 帶點挑釁但建設性,繁體中文回應。不超過 200 字。
|
||||
評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
|
||||
"""
|
||||
|
||||
|
||||
@@ -70,77 +73,72 @@ class ChatManager:
|
||||
f"- 活躍告警: {incident_summary}\n"
|
||||
)
|
||||
|
||||
async def _call_gemini(self, system_prompt: str, user_message: str, temperature: float = 0.7) -> str | None:
|
||||
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
|
||||
"""
|
||||
呼叫 Gemini Flash
|
||||
呼叫 OpenClaw (192.168.0.188:8088)
|
||||
|
||||
2026-04-03 ogt: 雙 AI 皆走 Gemini,用不同 persona 區分
|
||||
OpenClaw temperature=0.5 (精準), NemoClaw temperature=0.9 (發散)
|
||||
OpenClaw 是產品 AI 大腦,對話走 /api/v1/analyze/incident 的通用分析路徑
|
||||
"""
|
||||
import httpx
|
||||
from src.core.config import get_settings
|
||||
settings = get_settings()
|
||||
|
||||
api_key = getattr(settings, 'GEMINI_API_KEY', None)
|
||||
if not api_key:
|
||||
logger.warning("gemini_api_key_not_set")
|
||||
return None
|
||||
|
||||
openclaw_url = getattr(settings, 'OPENCLAW_URL', 'http://192.168.0.188:8088')
|
||||
try:
|
||||
full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
|
||||
async with httpx.AsyncClient(timeout=20.0) as client:
|
||||
# OpenClaw 沒有通用 chat endpoint,用 analyze/incident 傳入對話內容
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(
|
||||
f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
|
||||
f"{openclaw_url}/api/v1/analyze/incident",
|
||||
json={
|
||||
"contents": [{"role": "user", "parts": [{"text": full_prompt}]}],
|
||||
"generationConfig": {"temperature": temperature, "maxOutputTokens": 512},
|
||||
"incident_id": "CHAT",
|
||||
"severity": "P3",
|
||||
"signals": [{"alert_name": "user_chat", "description": user_message[:800]}],
|
||||
"affected_services": ["interactive_chat"],
|
||||
"expert_context": {"system_prompt": system_prompt[:500]},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data["candidates"][0]["content"]["parts"][0]["text"].strip()
|
||||
# 從 reasoning 取出自然語言回應
|
||||
return data.get("reasoning") or data.get("description") or data.get("action_title")
|
||||
except Exception as e:
|
||||
logger.warning("gemini_chat_failed", error=str(e))
|
||||
logger.warning("openclaw_chat_failed", error=str(e))
|
||||
return None
|
||||
|
||||
async def _openclaw_respond(self, context: str, message: str) -> str:
|
||||
"""OpenClaw 回應 (Gemini + OpenClaw persona, temperature=0.5)"""
|
||||
result = await self._call_gemini(f"{OPENCLAW_PERSONA}\n{context}", message, temperature=0.5)
|
||||
if not result:
|
||||
result = "🔴 OpenClaw 暫時無法回應。"
|
||||
return f"🦞 <b>OpenClaw:</b>\n{result}"
|
||||
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
|
||||
"""
|
||||
呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw)
|
||||
|
||||
async def _nemoclaw_respond(self, context: str, message: str) -> str:
|
||||
"""NemoClaw 回應 (Gemini + NemoClaw persona, temperature=0.9)"""
|
||||
result = await self._call_gemini(f"{NEMOCLAW_PERSONA}\n{context}", message, temperature=0.9)
|
||||
if not result:
|
||||
result = "🔴 NemoClaw 暫時無法回應。"
|
||||
return f"🤖 <b>NemoClaw:</b>\n{result}"
|
||||
|
||||
async def _nemoclaw_comment_on(self, context: str, openclaw_response: str, original_msg: str) -> str | None:
|
||||
"""NemoClaw 評論 OpenClaw 的回應"""
|
||||
message = (
|
||||
f"統帥問了: {original_msg}\n\n"
|
||||
f"OpenClaw 剛才回應:\n{openclaw_response}\n\n"
|
||||
f"請從 NemoClaw 角度評論。可以補充、反駁、或提出不同觀點。簡短有力。"
|
||||
)
|
||||
result = await self._call_gemini(f"{NEMOCLAW_PERSONA}\n{context}", message, temperature=0.9)
|
||||
if not result:
|
||||
NIM 免費 tier 延遲 11-45s,此方法可能需要 30-45s 才回應
|
||||
"""
|
||||
from src.services.nvidia_provider import get_nvidia_provider
|
||||
nvidia = get_nvidia_provider()
|
||||
try:
|
||||
full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
|
||||
response, success, _, _ = await nvidia.chat(
|
||||
prompt=full_prompt,
|
||||
model="nvidia/nemotron-mini-4b-instruct",
|
||||
max_tokens=300,
|
||||
)
|
||||
if success and response and "not configured" not in response and "Circuit Breaker" not in response:
|
||||
return response.strip()
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("nemotron_chat_failed", error=str(e))
|
||||
return None
|
||||
return f"🤖 <b>NemoClaw 補充:</b>\n{result}"
|
||||
|
||||
async def generate_response(
|
||||
self,
|
||||
user_id: int,
|
||||
username: str,
|
||||
user_id: int, # noqa: ARG002
|
||||
username: str, # noqa: ARG002
|
||||
message_text: str,
|
||||
) -> str:
|
||||
"""
|
||||
根據訊息內容決定回應模式:
|
||||
根據訊息決定回應模式:
|
||||
|
||||
@openclaw <msg> → 只有 OpenClaw 回應
|
||||
@nemo <msg> → 只有 NemoClaw 回應
|
||||
其他 → OpenClaw 先回,NemoClaw 補充/反駁
|
||||
其他 → OpenClaw 先回,NemoClaw 異步補充
|
||||
"""
|
||||
context = await self.get_system_context()
|
||||
text = message_text.strip()
|
||||
@@ -148,26 +146,41 @@ class ChatManager:
|
||||
# 模式 1: 指定 OpenClaw
|
||||
if text.lower().startswith("@openclaw"):
|
||||
msg = text[9:].strip() or text
|
||||
return await self._openclaw_respond(context, msg)
|
||||
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
|
||||
return f"🦞 <b>OpenClaw:</b>\n{result or '🔴 OpenClaw 無響應'}"
|
||||
|
||||
# 模式 2: 指定 NemoClaw
|
||||
if text.lower().startswith("@nemo"):
|
||||
msg = text[5:].strip() or text
|
||||
return await self._nemoclaw_respond(context, msg)
|
||||
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
|
||||
return f"🤖 <b>NemoClaw:</b>\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
|
||||
|
||||
# 模式 3: 雙 AI 對話 — OpenClaw 先,NemoClaw 評論
|
||||
openclaw_raw = await self._call_gemini(
|
||||
f"{OPENCLAW_PERSONA}\n{context}", text, temperature=0.5
|
||||
# 模式 3: 雙 AI — OpenClaw 先答,NemoClaw 並行
|
||||
openclaw_task = asyncio.create_task(
|
||||
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
|
||||
)
|
||||
nemo_task = asyncio.create_task(
|
||||
self._call_nemotron(
|
||||
f"{NEMOCLAW_PERSONA}\n{context}",
|
||||
f"統帥問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
|
||||
)
|
||||
)
|
||||
if not openclaw_raw:
|
||||
openclaw_raw = "Gemini 無響應,OpenClaw 暫時離線。"
|
||||
|
||||
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw}"
|
||||
# OpenClaw 最多等 30s,NemoClaw 最多等 50s
|
||||
try:
|
||||
openclaw_raw = await asyncio.wait_for(asyncio.shield(openclaw_task), timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
openclaw_raw = None
|
||||
|
||||
nemo_block = await self._nemoclaw_comment_on(context, openclaw_raw, text)
|
||||
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
|
||||
|
||||
if nemo_block:
|
||||
return f"{openclaw_block}\n\n{nemo_block}"
|
||||
try:
|
||||
nemo_raw = await asyncio.wait_for(nemo_task, timeout=50.0)
|
||||
except asyncio.TimeoutError:
|
||||
nemo_raw = None
|
||||
|
||||
if nemo_raw:
|
||||
return f"{openclaw_block}\n\n🤖 <b>NemoClaw:</b>\n{nemo_raw}"
|
||||
return openclaw_block
|
||||
|
||||
|
||||
|
||||
@@ -117,9 +117,18 @@ NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
||||
# 預設模型 (2026-03-31 ogt: 恢復為 nemotron-mini-4b-instruct)
|
||||
NVIDIA_DEFAULT_MODEL = "nvidia/nemotron-mini-4b-instruct"
|
||||
|
||||
# 請求超時 (秒) - 2026-04-01 ogt: 30s 讓 Nemo 有機會回應,失敗後轉 Gemini
|
||||
# 原 60s 太長;15s 太短 (Nemo 有時需 20-40s);30s 是實用平衡點
|
||||
NVIDIA_TIMEOUT = 30.0
|
||||
# 請求超時 (秒)
|
||||
# 2026-04-01 ogt: 設為 30s (平衡點)
|
||||
# 2026-04-03 ogt: 改從 config 讀取,與 NEMOTRON_TIMEOUT_SECONDS=45 對齊
|
||||
# Memory 記載 NIM 免費 tier 延遲 11-45s,30s 硬編碼導致慢請求全超時
|
||||
def _get_nvidia_timeout() -> float:
|
||||
try:
|
||||
from src.core.config import get_settings
|
||||
return float(get_settings().NEMOTRON_TIMEOUT_SECONDS)
|
||||
except Exception:
|
||||
return 45.0
|
||||
|
||||
NVIDIA_TIMEOUT = _get_nvidia_timeout()
|
||||
|
||||
# 重試次數
|
||||
MAX_RETRIES = 2
|
||||
|
||||
Reference in New Issue
Block a user