Files
awoooi/apps/api/src/services/chat_manager.py
OG T b62d7d3eb0
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
feat(chat): OpenClaw 改用 Gemini 2.0 Flash-Lite (最便宜)
Input $0.075/1M, Output $0.30/1M (比 Flash 便宜 25%)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 19:35:13 +08:00

274 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI Chat Manager - 雙 AI 對話核心
======================================
Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話)
Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b)
架構:
- OpenClaw (Gemini API): SRE 首席顧問,精準分析
- NemoClaw (Ollama llama3.2:3b): 戰術參謀,快速補充
費用控管:
- Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens
- 每次回覆顯示 token 用量與費用
- 月上限 $10 USD (由 ai_rate_limiter 控管)
"""
import asyncio
import structlog
from src.utils.timezone import now_taipei
from src.repositories.k8s_repository import get_k8s_repository
from src.repositories.incident_repository import get_incident_repository
logger = structlog.get_logger(__name__)
OPENCLAW_PERSONA = """你是 OpenClawAWOOOI 平台的 SRE AI 首席顧問。
個性: 精準、果斷、專業,像老將一樣直接給出建議。
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
稱呼用戶為「老闆」。
"""
NEMOCLAW_PERSONA = """你是 NemoClawAWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。
個性: 分析型、從不同角度思考,會質疑假設。
語氣: 帶點挑釁但建設性。不超過 200 字。
稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
重要:你必須全程使用繁體中文回應,禁止使用英文或其他語言。
"""
class ChatManager:
"""AWOOOI 雙 AI 對話管理器"""
def __init__(self):
pass # 2026-04-03 ogt: 移除 repo 實例化leWOOOgo 規範禁止 Service 持有 repository
async def get_system_context(self) -> str:
"""收集系統即時上下文"""
now = now_taipei()
k8s = get_k8s_repository()
incidents = get_incident_repository()
try:
k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod")
cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
if k8s_status.get('problem_pods'):
cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
except Exception:
cluster_info = "Cluster: 無法取得狀態"
try:
active_incidents = await incidents.get_active()
if active_incidents:
lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
for inc in active_incidents[:3]]
incident_summary = "\n".join(lines)
else:
incident_summary = "無活躍告警"
except Exception:
incident_summary = "無法取得告警"
return (
f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n"
f"- {cluster_info}\n"
f"- 活躍告警: {incident_summary}\n"
)
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 OpenClaw 對話 — Gemini Flash API
2026-04-03 ogt: 老闆指示改用 Gemini費用控管月上限 $10 USD
每次回覆附帶 token 用量與費用統計
"""
import httpx
from src.core.config import get_settings
settings = get_settings()
api_key = settings.GEMINI_API_KEY
if not api_key:
logger.warning("openclaw_chat_failed", error="GEMINI_API_KEY not configured")
return None
# 月費用上限檢查 ($10 USD)
MONTHLY_LIMIT_USD = 10.0
from src.core.redis_client import get_redis
from src.utils.timezone import now_taipei
redis = get_redis()
month_key = f"gemini_cost:{now_taipei().strftime('%Y-%m')}"
try:
current_cost = float(await redis.get(month_key) or 0)
except Exception:
current_cost = 0.0
if current_cost >= MONTHLY_LIMIT_USD:
logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD)
return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD已用 ${current_cost:.4f}"
# Gemini 2.0 Flash-Lite: 最便宜 (2026-04-03 老闆指示)
model = "gemini-2.0-flash-lite"
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}",
json={
"system_instruction": {"parts": [{"text": system_prompt}]},
"contents": [{"parts": [{"text": user_message}]}],
"generationConfig": {"maxOutputTokens": 300, "temperature": 0.7},
},
)
resp.raise_for_status()
data = resp.json()
text = data["candidates"][0]["content"]["parts"][0]["text"].strip()
# Token/費用統計 + 累計到 Redis
usage = data.get("usageMetadata", {})
in_tok = usage.get("promptTokenCount", 0)
out_tok = usage.get("candidatesTokenCount", 0)
# Gemini 2.0 Flash-Lite: Input $0.075/1M, Output $0.30/1M
cost = (in_tok * 0.000000075) + (out_tok * 0.0000003)
new_total = current_cost + cost
try:
await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600) # 40天 TTL
except Exception:
pass
logger.info("openclaw_gemini_usage", in_tokens=in_tok, out_tokens=out_tok,
cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4))
return f"{text}\n\n<i>📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}</i>"
except Exception as e:
logger.warning("openclaw_chat_failed", error=str(e))
return None
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 NemoClaw 對話 — Claude API (claude-haiku-4-5)
2026-04-03 ogt: 老闆指示改接 Claude API快速且中文能力強
用 haiku 降低費用
"""
import httpx
from src.core.config import get_settings
settings = get_settings()
api_key = getattr(settings, 'CLAUDE_API_KEY', None)
if not api_key:
logger.warning("nemotron_chat_failed", error="CLAUDE_API_KEY not configured")
return None
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={
"model": "claude-haiku-4-5-20251001",
"max_tokens": 300,
"system": system_prompt,
"messages": [{"role": "user", "content": user_message}],
},
)
resp.raise_for_status()
data = resp.json()
text = data["content"][0]["text"].strip()
# Token/費用統計 — Claude Haiku 4.5: Input $0.80/1M, Output $4.00/1M
usage = data.get("usage", {})
in_tok = usage.get("input_tokens", 0)
out_tok = usage.get("output_tokens", 0)
cost = (in_tok * 0.0000008) + (out_tok * 0.000004)
# 月累計到 Redis
from src.core.redis_client import get_redis
from src.utils.timezone import now_taipei
redis = get_redis()
month_key = f"claude_cost:{now_taipei().strftime('%Y-%m')}"
try:
current = float(await redis.get(month_key) or 0)
new_total = current + cost
await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600)
except Exception:
new_total = cost
logger.info("nemotron_claude_usage", in_tokens=in_tok, out_tokens=out_tok,
cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4))
return f"{text}\n\n<i>📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}</i>"
except Exception as e:
logger.warning("nemotron_chat_failed", error=str(e))
return None
async def generate_response(
self,
user_id: int, # noqa: ARG002
username: str, # noqa: ARG002
message_text: str,
) -> str:
"""
根據訊息決定回應模式:
@openclaw <msg> → 只有 OpenClaw 回應
@nemo <msg> → 只有 NemoClaw 回應
其他 → OpenClaw 先回NemoClaw 異步補充
"""
context = await self.get_system_context()
text = message_text.strip()
# 模式 1: 指定 OpenClaw
if text.lower().startswith("@openclaw"):
msg = text[9:].strip() or text
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
return f"🦞 <b>OpenClaw:</b>\n{result or '🔴 OpenClaw 無響應'}"
# 模式 2: 指定 NemoClaw
if text.lower().startswith("@nemo"):
msg = text[5:].strip() or text
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
return f"🤖 <b>NemoClaw:</b>\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
# 模式 3: 雙 AI — OpenClaw 先答NemoClaw 並行
openclaw_task = asyncio.create_task(
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
)
nemo_task = asyncio.create_task(
self._call_nemotron(
f"{NEMOCLAW_PERSONA}\n{context}",
f"老闆問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
)
)
# OpenClaw 最多等 40s含 context 取得時間NemoClaw 最多等 60s
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
except asyncio.TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
except asyncio.TimeoutError:
nemo_raw = None
if nemo_raw:
return f"{openclaw_block}\n\n🤖 <b>NemoClaw:</b>\n{nemo_raw}"
return openclaw_block
# Singleton
_chat_manager: ChatManager | None = None
def get_chat_manager() -> ChatManager:
global _chat_manager
if _chat_manager is None:
_chat_manager = ChatManager()
return _chat_manager