"""
AWOOOI Chat Manager - 雙 AI 對話核心
======================================
Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話)
Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b)
Phase 22.8 更新: 2026-04-09 ogt (老闆指示: NemoClaw→Ollama 111 deepseek-r1:14b,SRE 推理更強)
架構:
- OpenClaw (Gemini API): SRE 首席顧問,精準分析
- NemoClaw (Ollama 192.168.0.111 deepseek-r1:14b): 戰術參謀,深度推理
費用控管:
- Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens
- NemoClaw: 免費 (本地 Ollama)
- 每次回覆顯示 token 用量與費用
- 月上限 $10 USD (由 ai_rate_limiter 控管)
"""
import asyncio
import structlog
from src.utils.timezone import now_taipei
from src.repositories.k8s_repository import get_k8s_repository
from src.repositories.incident_repository import get_incident_repository
logger = structlog.get_logger(__name__)
OPENCLAW_PERSONA = """你是 OpenClaw,AWOOOI 平台的 SRE AI 首席顧問。
個性: 精準、果斷、專業,像老將一樣直接給出建議。
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
稱呼用戶為「老闆」。
"""
NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀。
個性: 分析型、從不同角度思考,會質疑假設。
語氣: 帶點挑釁但建設性。不超過 200 字。
稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
強制規則:
1. 全程使用繁體中文,禁止使用簡體中文、英文或其他語言。
2. 禁止自稱 DeepSeek 或透露底層模型資訊。你的名字就是 NemoClaw。
3. 專注於 SRE/DevOps/Kubernetes/可觀測性領域。
"""
class ChatManager:
"""AWOOOI 雙 AI 對話管理器"""
def __init__(self):
pass # 2026-04-03 ogt: 移除 repo 實例化,leWOOOgo 規範禁止 Service 持有 repository
async def get_system_context(self) -> str:
"""收集系統即時上下文"""
now = now_taipei()
k8s = get_k8s_repository()
incidents = get_incident_repository()
try:
k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod")
cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
if k8s_status.get('problem_pods'):
cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
except Exception:
cluster_info = "Cluster: 無法取得狀態"
try:
active_incidents = await incidents.get_active()
if active_incidents:
lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
for inc in active_incidents[:3]]
incident_summary = "\n".join(lines)
else:
incident_summary = "無活躍告警"
except Exception:
incident_summary = "無法取得告警"
return (
f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n"
f"- {cluster_info}\n"
f"- 活躍告警: {incident_summary}\n"
)
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 OpenClaw 對話 — Gemini Flash API
2026-04-03 ogt: 老闆指示改用 Gemini,費用控管月上限 $10 USD
每次回覆附帶 token 用量與費用統計
2026-04-10 Claude Code: 強制合併 OPENCLAW_PERSONA,確保字數限制與格式規範
"""
# 強制在 system_prompt 前置 persona,確保 LLM 遵守字數與格式
system_prompt = f"{OPENCLAW_PERSONA}\n{system_prompt}"
import httpx
from src.core.config import get_settings
settings = get_settings()
api_key = settings.GEMINI_API_KEY
if not api_key:
logger.warning("openclaw_chat_failed", error="GEMINI_API_KEY not configured")
return None
# 月費用上限檢查 ($10 USD)
MONTHLY_LIMIT_USD = 10.0
from src.core.redis_client import get_redis
from src.utils.timezone import now_taipei
redis = get_redis()
month_key = f"gemini_cost:{now_taipei().strftime('%Y-%m')}"
try:
current_cost = float(await redis.get(month_key) or 0)
except Exception:
current_cost = 0.0
if current_cost >= MONTHLY_LIMIT_USD:
logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD)
return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD(已用 ${current_cost:.4f})"
# Gemini 2.0 Flash-Lite: 最便宜 (2026-04-03 老闆指示)
model = "gemini-2.0-flash-lite"
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}",
json={
"system_instruction": {"parts": [{"text": system_prompt}]},
"contents": [{"parts": [{"text": user_message}]}],
"generationConfig": {"maxOutputTokens": 300, "temperature": 0.7},
},
)
resp.raise_for_status()
data = resp.json()
text = data["candidates"][0]["content"]["parts"][0]["text"].strip()
# Token/費用統計 + 累計到 Redis
usage = data.get("usageMetadata", {})
in_tok = usage.get("promptTokenCount", 0)
out_tok = usage.get("candidatesTokenCount", 0)
# Gemini 2.0 Flash-Lite: Input $0.075/1M, Output $0.30/1M
cost = (in_tok * 0.000000075) + (out_tok * 0.0000003)
new_total = current_cost + cost
try:
await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600) # 40天 TTL
except Exception:
pass
logger.info("openclaw_gemini_usage", in_tokens=in_tok, out_tokens=out_tok,
cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4))
return f"{text}\n\n📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}"
except Exception as e:
logger.warning("openclaw_chat_failed", error=str(e))
return None
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 NemoClaw 對話 — Ollama 111 deepseek-r1:14b
2026-04-09 ogt: 改接 192.168.0.111 Ollama deepseek-r1:14b,SRE 推理能力最強
deepseek-r1 含 標籤,需過濾後才回傳
2026-04-10 Claude Code: 強制合併 NEMOCLAW_PERSONA,確保字數限制與格式規範
"""
# 強制在 system_prompt 前置 persona
system_prompt = f"{NEMOCLAW_PERSONA}\n{system_prompt}"
import httpx
import re
OLLAMA_URL = "http://192.168.0.111:11434"
MODEL = "deepseek-r1:14b"
try:
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(
f"{OLLAMA_URL}/api/chat",
json={
"model": MODEL,
"stream": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"options": {"num_predict": 1200},
},
)
resp.raise_for_status()
data = resp.json()
raw = data.get("message", {}).get("content", "").strip()
# 過濾 deepseek-r1 的 ... 推理區塊
text = re.sub(r".*?", "", raw, flags=re.DOTALL).strip()
if not text:
text = raw # 萬一全是 think block,直接回傳原文
eval_count = data.get("eval_count", 0)
prompt_eval_count = data.get("prompt_eval_count", 0)
total_tokens = eval_count + prompt_eval_count
logger.info("nemotron_ollama_usage", model=MODEL,
prompt_tokens=prompt_eval_count, output_tokens=eval_count)
return f"{text}\n\n🦙 {MODEL} | {total_tokens} tokens | 免費"
except Exception as e:
logger.warning("nemotron_chat_failed", model=MODEL, error=str(e))
return None
async def generate_response(
self,
user_id: int, # noqa: ARG002
username: str, # noqa: ARG002
message_text: str,
) -> str:
"""
根據訊息決定回應模式:
@openclaw → 只有 OpenClaw 回應
@nemo → 只有 NemoClaw 回應
其他 → OpenClaw 先回,NemoClaw 異步補充
"""
context = await self.get_system_context()
text = message_text.strip()
# 模式 1: 指定 OpenClaw
if text.lower().startswith("@openclaw"):
msg = text[9:].strip() or text
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
return f"🦞 OpenClaw:\n{result or '🔴 OpenClaw 無響應'}"
# 模式 2: 指定 NemoClaw
if text.lower().startswith("@nemo"):
msg = text[5:].strip() or text
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
return f"🤖 NemoClaw:\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
# 模式 3: 雙 AI — OpenClaw 先答,NemoClaw 並行
openclaw_task = asyncio.create_task(
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
)
nemo_task = asyncio.create_task(
self._call_nemotron(
f"{NEMOCLAW_PERSONA}\n{context}",
f"老闆問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
)
)
# OpenClaw 最多等 40s(含 context 取得時間),NemoClaw 最多等 60s
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
except asyncio.TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 OpenClaw:\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
except asyncio.TimeoutError:
nemo_raw = None
if nemo_raw:
return f"{openclaw_block}\n\n🤖 NemoClaw:\n{nemo_raw}"
return openclaw_block
# Singleton
_chat_manager: ChatManager | None = None
def get_chat_manager() -> ChatManager:
global _chat_manager
if _chat_manager is None:
_chat_manager = ChatManager()
return _chat_manager