Files
awoooi/apps/api/src/services/chat_manager.py
OG T 2da8da5a25
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 6m51s
fix(chat): OpenClaw 改用 Ollama qwen2.5 做對話 + NemoClaw 加 Ollama fallback
問題: _call_openclaw 用 analyze/incident API → 回覆是告警格式,不是自然語言
修法:
  1. OpenClaw chat → Ollama qwen2.5:7b-instruct (本地,快速,無格式污染)
  2. NemoClaw → NIM 優先,超時 fallback 到 Ollama llama3.2:3b

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 18:30:31 +08:00

234 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI Chat Manager - 雙 AI 對話核心
======================================
Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話)
架構:
- OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答
- NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充
使用模式:
@openclaw <msg> → 只有 OpenClaw 回應
@nemo <msg> → 只有 NemoClaw 回應
其他 → OpenClaw 先答NemoClaw 評論
注意: NIM 免費 tier 延遲 11-45s對話採異步模式:
先推 OpenClaw 回應NemoClaw 完成後再補充
"""
import asyncio
import structlog
from src.utils.timezone import now_taipei
from src.repositories.k8s_repository import get_k8s_repository
from src.repositories.incident_repository import get_incident_repository
logger = structlog.get_logger(__name__)
OPENCLAW_PERSONA = """你是 OpenClawAWOOOI 平台的 SRE AI 首席顧問。
個性: 精準、果斷、專業,像老將一樣直接給出建議。
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
稱呼用戶為「老闆」。
"""
NEMOCLAW_PERSONA = """你是 NemoClawAWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。
個性: 分析型、從不同角度思考,會質疑假設。
語氣: 帶點挑釁但建設性。不超過 200 字。
稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
重要:你必須全程使用繁體中文回應,禁止使用英文或其他語言。
"""
class ChatManager:
"""AWOOOI 雙 AI 對話管理器"""
def __init__(self):
pass # 2026-04-03 ogt: 移除 repo 實例化leWOOOgo 規範禁止 Service 持有 repository
async def get_system_context(self) -> str:
"""收集系統即時上下文"""
now = now_taipei()
k8s = get_k8s_repository()
incidents = get_incident_repository()
try:
k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod")
cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
if k8s_status.get('problem_pods'):
cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
except Exception:
cluster_info = "Cluster: 無法取得狀態"
try:
active_incidents = await incidents.get_active()
if active_incidents:
lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
for inc in active_incidents[:3]]
incident_summary = "\n".join(lines)
else:
incident_summary = "無活躍告警"
except Exception:
incident_summary = "無法取得告警"
return (
f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n"
f"- {cluster_info}\n"
f"- 活躍告警: {incident_summary}\n"
)
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 OpenClaw 對話 — 走 Ollama qwen2.5:7b-instruct (192.168.0.188:11434)
2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API
不適合做自然語言對話(回覆會是告警格式)。
改用 Ollama 本地模型做 chat速度快、無格式污染。
"""
import httpx
from src.core.config import get_settings
settings = get_settings()
ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 40.0))
try:
async with httpx.AsyncClient(timeout=openclaw_timeout) as client:
resp = await client.post(
f"{ollama_url}/api/chat",
json={
"model": "qwen2.5:7b-instruct",
"stream": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"options": {"num_predict": 300},
},
)
resp.raise_for_status()
data = resp.json()
return data.get("message", {}).get("content", "").strip() or None
except Exception as e:
logger.warning("openclaw_chat_failed", error=str(e))
return None
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 NemoClaw — NIM 優先,超時則 fallback 到 Ollama llama3.2:3b
2026-04-03 ogt: NIM 免費 tier 延遲 11-45s 且常超時,
加 Ollama fallback 確保 NemoClaw 一定有回應。
"""
import httpx
from src.core.config import get_settings as _get_settings
from src.services.nvidia_provider import get_nvidia_provider
settings = _get_settings()
# 優先嘗試 NIM (timeout 20s快速失敗)
nvidia = get_nvidia_provider()
try:
full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
response, success, _, _ = await nvidia.chat(
prompt=full_prompt,
model="nvidia/nemotron-mini-4b-instruct",
max_tokens=300,
)
if success and response and "not configured" not in response and "Circuit Breaker" not in response:
return response.strip()
except Exception as e:
logger.warning("nemotron_nim_failed_fallback_ollama", error=str(e))
# Fallback: Ollama llama3.2:3b (本地,速度快)
ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"{ollama_url}/api/chat",
json={
"model": "llama3.2:3b",
"stream": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"options": {"num_predict": 250},
},
)
resp.raise_for_status()
data = resp.json()
result = data.get("message", {}).get("content", "").strip()
if result:
logger.info("nemotron_ollama_fallback_used")
return result
except Exception as e:
logger.warning("nemotron_ollama_fallback_failed", error=str(e))
return None
async def generate_response(
self,
user_id: int, # noqa: ARG002
username: str, # noqa: ARG002
message_text: str,
) -> str:
"""
根據訊息決定回應模式:
@openclaw <msg> → 只有 OpenClaw 回應
@nemo <msg> → 只有 NemoClaw 回應
其他 → OpenClaw 先回NemoClaw 異步補充
"""
context = await self.get_system_context()
text = message_text.strip()
# 模式 1: 指定 OpenClaw
if text.lower().startswith("@openclaw"):
msg = text[9:].strip() or text
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
return f"🦞 <b>OpenClaw:</b>\n{result or '🔴 OpenClaw 無響應'}"
# 模式 2: 指定 NemoClaw
if text.lower().startswith("@nemo"):
msg = text[5:].strip() or text
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
return f"🤖 <b>NemoClaw:</b>\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
# 模式 3: 雙 AI — OpenClaw 先答NemoClaw 並行
openclaw_task = asyncio.create_task(
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
)
nemo_task = asyncio.create_task(
self._call_nemotron(
f"{NEMOCLAW_PERSONA}\n{context}",
f"統帥問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
)
)
# OpenClaw 最多等 40s含 context 取得時間NemoClaw 最多等 60s
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
except asyncio.TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
except asyncio.TimeoutError:
nemo_raw = None
if nemo_raw:
return f"{openclaw_block}\n\n🤖 <b>NemoClaw:</b>\n{nemo_raw}"
return openclaw_block
# Singleton
_chat_manager: ChatManager | None = None
def get_chat_manager() -> ChatManager:
global _chat_manager
if _chat_manager is None:
_chat_manager = ChatManager()
return _chat_manager