All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 6m51s
問題: _call_openclaw 用 analyze/incident API → 回覆是告警格式,不是自然語言 修法: 1. OpenClaw chat → Ollama qwen2.5:7b-instruct (本地,快速,無格式污染) 2. NemoClaw → NIM 優先,超時 fallback 到 Ollama llama3.2:3b Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
234 lines
9.2 KiB
Python
234 lines
9.2 KiB
Python
"""
|
||
AWOOOI Chat Manager - 雙 AI 對話核心
|
||
======================================
|
||
Phase 21.5 初版: 2026-03-31 ogt
|
||
Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話)
|
||
|
||
架構:
|
||
- OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答
|
||
- NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充
|
||
|
||
使用模式:
|
||
@openclaw <msg> → 只有 OpenClaw 回應
|
||
@nemo <msg> → 只有 NemoClaw 回應
|
||
其他 → OpenClaw 先答,NemoClaw 評論
|
||
|
||
注意: NIM 免費 tier 延遲 11-45s,對話採異步模式:
|
||
先推 OpenClaw 回應,NemoClaw 完成後再補充
|
||
"""
|
||
|
||
import asyncio
|
||
import structlog
|
||
from src.utils.timezone import now_taipei
|
||
from src.repositories.k8s_repository import get_k8s_repository
|
||
from src.repositories.incident_repository import get_incident_repository
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
OPENCLAW_PERSONA = """你是 OpenClaw,AWOOOI 平台的 SRE AI 首席顧問。
|
||
個性: 精準、果斷、專業,像老將一樣直接給出建議。
|
||
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
|
||
稱呼用戶為「老闆」。
|
||
"""
|
||
|
||
NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。
|
||
個性: 分析型、從不同角度思考,會質疑假設。
|
||
語氣: 帶點挑釁但建設性。不超過 200 字。
|
||
稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
|
||
重要:你必須全程使用繁體中文回應,禁止使用英文或其他語言。
|
||
"""
|
||
|
||
|
||
class ChatManager:
|
||
"""AWOOOI 雙 AI 對話管理器"""
|
||
|
||
def __init__(self):
|
||
pass # 2026-04-03 ogt: 移除 repo 實例化,leWOOOgo 規範禁止 Service 持有 repository
|
||
|
||
async def get_system_context(self) -> str:
|
||
"""收集系統即時上下文"""
|
||
now = now_taipei()
|
||
k8s = get_k8s_repository()
|
||
incidents = get_incident_repository()
|
||
|
||
try:
|
||
k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod")
|
||
cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
|
||
if k8s_status.get('problem_pods'):
|
||
cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
|
||
except Exception:
|
||
cluster_info = "Cluster: 無法取得狀態"
|
||
|
||
try:
|
||
active_incidents = await incidents.get_active()
|
||
if active_incidents:
|
||
lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
|
||
for inc in active_incidents[:3]]
|
||
incident_summary = "\n".join(lines)
|
||
else:
|
||
incident_summary = "無活躍告警"
|
||
except Exception:
|
||
incident_summary = "無法取得告警"
|
||
|
||
return (
|
||
f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n"
|
||
f"- {cluster_info}\n"
|
||
f"- 活躍告警: {incident_summary}\n"
|
||
)
|
||
|
||
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
|
||
"""
|
||
呼叫 OpenClaw 對話 — 走 Ollama qwen2.5:7b-instruct (192.168.0.188:11434)
|
||
|
||
2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API,
|
||
不適合做自然語言對話(回覆會是告警格式)。
|
||
改用 Ollama 本地模型做 chat,速度快、無格式污染。
|
||
"""
|
||
import httpx
|
||
from src.core.config import get_settings
|
||
settings = get_settings()
|
||
|
||
ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
|
||
openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 40.0))
|
||
try:
|
||
async with httpx.AsyncClient(timeout=openclaw_timeout) as client:
|
||
resp = await client.post(
|
||
f"{ollama_url}/api/chat",
|
||
json={
|
||
"model": "qwen2.5:7b-instruct",
|
||
"stream": False,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_message},
|
||
],
|
||
"options": {"num_predict": 300},
|
||
},
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
return data.get("message", {}).get("content", "").strip() or None
|
||
except Exception as e:
|
||
logger.warning("openclaw_chat_failed", error=str(e))
|
||
return None
|
||
|
||
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
|
||
"""
|
||
呼叫 NemoClaw — NIM 優先,超時則 fallback 到 Ollama llama3.2:3b
|
||
|
||
2026-04-03 ogt: NIM 免費 tier 延遲 11-45s 且常超時,
|
||
加 Ollama fallback 確保 NemoClaw 一定有回應。
|
||
"""
|
||
import httpx
|
||
from src.core.config import get_settings as _get_settings
|
||
from src.services.nvidia_provider import get_nvidia_provider
|
||
settings = _get_settings()
|
||
|
||
# 優先嘗試 NIM (timeout 20s,快速失敗)
|
||
nvidia = get_nvidia_provider()
|
||
try:
|
||
full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
|
||
response, success, _, _ = await nvidia.chat(
|
||
prompt=full_prompt,
|
||
model="nvidia/nemotron-mini-4b-instruct",
|
||
max_tokens=300,
|
||
)
|
||
if success and response and "not configured" not in response and "Circuit Breaker" not in response:
|
||
return response.strip()
|
||
except Exception as e:
|
||
logger.warning("nemotron_nim_failed_fallback_ollama", error=str(e))
|
||
|
||
# Fallback: Ollama llama3.2:3b (本地,速度快)
|
||
ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
|
||
try:
|
||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||
resp = await client.post(
|
||
f"{ollama_url}/api/chat",
|
||
json={
|
||
"model": "llama3.2:3b",
|
||
"stream": False,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_message},
|
||
],
|
||
"options": {"num_predict": 250},
|
||
},
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
result = data.get("message", {}).get("content", "").strip()
|
||
if result:
|
||
logger.info("nemotron_ollama_fallback_used")
|
||
return result
|
||
except Exception as e:
|
||
logger.warning("nemotron_ollama_fallback_failed", error=str(e))
|
||
|
||
return None
|
||
|
||
async def generate_response(
|
||
self,
|
||
user_id: int, # noqa: ARG002
|
||
username: str, # noqa: ARG002
|
||
message_text: str,
|
||
) -> str:
|
||
"""
|
||
根據訊息決定回應模式:
|
||
|
||
@openclaw <msg> → 只有 OpenClaw 回應
|
||
@nemo <msg> → 只有 NemoClaw 回應
|
||
其他 → OpenClaw 先回,NemoClaw 異步補充
|
||
"""
|
||
context = await self.get_system_context()
|
||
text = message_text.strip()
|
||
|
||
# 模式 1: 指定 OpenClaw
|
||
if text.lower().startswith("@openclaw"):
|
||
msg = text[9:].strip() or text
|
||
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
|
||
return f"🦞 <b>OpenClaw:</b>\n{result or '🔴 OpenClaw 無響應'}"
|
||
|
||
# 模式 2: 指定 NemoClaw
|
||
if text.lower().startswith("@nemo"):
|
||
msg = text[5:].strip() or text
|
||
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
|
||
return f"🤖 <b>NemoClaw:</b>\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
|
||
|
||
# 模式 3: 雙 AI — OpenClaw 先答,NemoClaw 並行
|
||
openclaw_task = asyncio.create_task(
|
||
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
|
||
)
|
||
nemo_task = asyncio.create_task(
|
||
self._call_nemotron(
|
||
f"{NEMOCLAW_PERSONA}\n{context}",
|
||
f"統帥問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
|
||
)
|
||
)
|
||
|
||
# OpenClaw 最多等 40s(含 context 取得時間),NemoClaw 最多等 60s
|
||
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
|
||
try:
|
||
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
|
||
except asyncio.TimeoutError:
|
||
openclaw_raw = None
|
||
|
||
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
|
||
|
||
try:
|
||
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
|
||
except asyncio.TimeoutError:
|
||
nemo_raw = None
|
||
|
||
if nemo_raw:
|
||
return f"{openclaw_block}\n\n🤖 <b>NemoClaw:</b>\n{nemo_raw}"
|
||
return openclaw_block
|
||
|
||
|
||
# Singleton
|
||
_chat_manager: ChatManager | None = None
|
||
|
||
|
||
def get_chat_manager() -> ChatManager:
|
||
global _chat_manager
|
||
if _chat_manager is None:
|
||
_chat_manager = ChatManager()
|
||
return _chat_manager
|