Files
awoooi/apps/api/src/services/chat_manager.py
OG T fbf122fa1f
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 7m9s
fix(chat): OpenClaw 改用 NIM llama-3.1-8b 對話 + NemoClaw timeout 120s + 老闆稱謂
1. _call_openclaw: 改用 NIM meta/llama-3.1-8b-instruct
   舊的 analyze/incident 是告警 API,回覆是告警格式,不適合對話
2. _call_nemotron: 移除 Ollama fallback,回到純 NIM
3. NEMOTRON_TIMEOUT_SECONDS: 55 → 120 (ConfigMap 已更新)
4. 修正「統帥」→「老闆」

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 18:41:15 +08:00

192 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI Chat Manager - 雙 AI 對話核心
======================================
Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話)
架構:
- OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答
- NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充
使用模式:
@openclaw <msg> → 只有 OpenClaw 回應
@nemo <msg> → 只有 NemoClaw 回應
其他 → OpenClaw 先答NemoClaw 評論
注意: NIM 免費 tier 延遲 11-45s對話採異步模式:
先推 OpenClaw 回應NemoClaw 完成後再補充
"""
import asyncio
import structlog
from src.utils.timezone import now_taipei
from src.repositories.k8s_repository import get_k8s_repository
from src.repositories.incident_repository import get_incident_repository
logger = structlog.get_logger(__name__)
OPENCLAW_PERSONA = """你是 OpenClawAWOOOI 平台的 SRE AI 首席顧問。
個性: 精準、果斷、專業,像老將一樣直接給出建議。
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
稱呼用戶為「老闆」。
"""
NEMOCLAW_PERSONA = """你是 NemoClawAWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。
個性: 分析型、從不同角度思考,會質疑假設。
語氣: 帶點挑釁但建設性。不超過 200 字。
稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
重要:你必須全程使用繁體中文回應,禁止使用英文或其他語言。
"""
class ChatManager:
"""AWOOOI 雙 AI 對話管理器"""
def __init__(self):
pass # 2026-04-03 ogt: 移除 repo 實例化leWOOOgo 規範禁止 Service 持有 repository
async def get_system_context(self) -> str:
"""收集系統即時上下文"""
now = now_taipei()
k8s = get_k8s_repository()
incidents = get_incident_repository()
try:
k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod")
cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
if k8s_status.get('problem_pods'):
cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
except Exception:
cluster_info = "Cluster: 無法取得狀態"
try:
active_incidents = await incidents.get_active()
if active_incidents:
lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
for inc in active_incidents[:3]]
incident_summary = "\n".join(lines)
else:
incident_summary = "無活躍告警"
except Exception:
incident_summary = "無法取得告警"
return (
f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n"
f"- {cluster_info}\n"
f"- 活躍告警: {incident_summary}\n"
)
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 OpenClaw 對話 — 走 NVIDIA NIM meta/llama-3.1-8b-instruct
2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API
回覆是告警格式,不適合自然語言對話。
改用 NIM llama-3.1-8b 做 chat與 NemoClaw 同樣走免費 NIM cloud。
"""
from src.services.nvidia_provider import get_nvidia_provider
nvidia = get_nvidia_provider()
try:
full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
response, success, _, _ = await nvidia.chat(
prompt=full_prompt,
model="meta/llama-3.1-8b-instruct",
max_tokens=300,
)
if success and response and "not configured" not in response and "Circuit Breaker" not in response:
return response.strip()
return None
except Exception as e:
logger.warning("openclaw_chat_failed", error=str(e))
return None
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw)
NIM 免費 tier 延遲 11-45s此方法可能需要 30-120s 才回應
"""
from src.services.nvidia_provider import get_nvidia_provider
nvidia = get_nvidia_provider()
try:
full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
response, success, _, _ = await nvidia.chat(
prompt=full_prompt,
model="nvidia/nemotron-mini-4b-instruct",
max_tokens=300,
)
if success and response and "not configured" not in response and "Circuit Breaker" not in response:
return response.strip()
return None
except Exception as e:
logger.warning("nemotron_chat_failed", error=str(e))
return None
async def generate_response(
self,
user_id: int, # noqa: ARG002
username: str, # noqa: ARG002
message_text: str,
) -> str:
"""
根據訊息決定回應模式:
@openclaw <msg> → 只有 OpenClaw 回應
@nemo <msg> → 只有 NemoClaw 回應
其他 → OpenClaw 先回NemoClaw 異步補充
"""
context = await self.get_system_context()
text = message_text.strip()
# 模式 1: 指定 OpenClaw
if text.lower().startswith("@openclaw"):
msg = text[9:].strip() or text
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
return f"🦞 <b>OpenClaw:</b>\n{result or '🔴 OpenClaw 無響應'}"
# 模式 2: 指定 NemoClaw
if text.lower().startswith("@nemo"):
msg = text[5:].strip() or text
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
return f"🤖 <b>NemoClaw:</b>\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
# 模式 3: 雙 AI — OpenClaw 先答NemoClaw 並行
openclaw_task = asyncio.create_task(
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
)
nemo_task = asyncio.create_task(
self._call_nemotron(
f"{NEMOCLAW_PERSONA}\n{context}",
f"老闆問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
)
)
# OpenClaw 最多等 40s含 context 取得時間NemoClaw 最多等 60s
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
except asyncio.TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
except asyncio.TimeoutError:
nemo_raw = None
if nemo_raw:
return f"{openclaw_block}\n\n🤖 <b>NemoClaw:</b>\n{nemo_raw}"
return openclaw_block
# Singleton
_chat_manager: ChatManager | None = None
def get_chat_manager() -> ChatManager:
global _chat_manager
if _chat_manager is None:
_chat_manager = ChatManager()
return _chat_manager