Files
awoooi/apps/api/src/services/chat_manager.py
Your Name 3953ef6d57
All checks were successful
CD Pipeline / tests (push) Successful in 1m31s
Code Review / ai-code-review (push) Successful in 26s
CD Pipeline / build-and-deploy (push) Successful in 5m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m40s
fix(ollama): disable thinking for deepseek call sites
2026-05-25 23:19:31 +08:00

280 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI Chat Manager - 雙 AI 對話核心
======================================
Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話)
Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b)
Phase 22.8 更新: 2026-04-09 ogt (老闆指示: NemoClaw→Ollama 111 deepseek-r1:14bSRE 推理更強)
Phase 33 更新: 2026-05-05 ogt (ADR-110: OpenClaw chat 改走 GCP-A Ollama interactive lane)
架構:
- OpenClaw (Ollama GCP-A interactive lane): SRE 首席顧問,精準分析
- NemoClaw (Ollama interactive lane deepseek-r1:14b): 戰術參謀,深度推理
費用控管:
- OpenClaw/NemoClaw chat 預設免費 OllamaGemini 不再作為 ChatManager 預設路徑
- 每次回覆顯示 token 用量
"""
import asyncio
import re
import httpx
import structlog
from src.core.config import get_settings
from src.repositories.incident_repository import get_incident_repository
from src.repositories.k8s_repository import get_k8s_repository
from src.services.ollama_endpoint_resolver import resolve_ollama_order
from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
OPENCLAW_PERSONA = """你是 OpenClawAWOOOI 平台的 SRE AI 首席顧問。
個性: 精準、果斷、專業,像老將一樣直接給出建議。
語氣: 簡短有力,不廢話。繁體中文回應。不超過 300 字。
稱呼用戶為「老闆」。
"""
NEMOCLAW_PERSONA = """你是 NemoClawAWOOOI 平台的 AI 戰術參謀。
個性: 分析型、從不同角度思考,會質疑假設。
語氣: 帶點挑釁但建設性。不超過 200 字。
稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。
強制規則:
1. 全程使用繁體中文,禁止使用簡體中文、英文或其他語言。
2. 禁止自稱 DeepSeek 或透露底層模型資訊。你的名字就是 NemoClaw。
3. 專注於 SRE/DevOps/Kubernetes/可觀測性領域。
"""
class ChatManager:
"""AWOOOI 雙 AI 對話管理器"""
def __init__(self):
pass # 2026-04-03 ogt: 移除 repo 實例化leWOOOgo 規範禁止 Service 持有 repository
async def get_system_context(self) -> str:
"""收集系統即時上下文"""
now = now_taipei()
k8s = get_k8s_repository()
incidents = get_incident_repository()
try:
k8s_status = await k8s.get_pod_status_summary(namespace="awoooi-prod")
cluster_info = f"Cluster: {k8s_status['running']}/{k8s_status['total']} Pods Running"
if k8s_status.get('problem_pods'):
cluster_info += f", {len(k8s_status['problem_pods'])} 異常"
except Exception:
cluster_info = "Cluster: 無法取得狀態"
try:
active_incidents = await incidents.get_active()
if active_incidents:
lines = [f"- {inc.incident_id}: {inc.status.value} (SEV {inc.severity.value})"
for inc in active_incidents[:3]]
incident_summary = "\n".join(lines)
else:
incident_summary = "無活躍告警"
except Exception:
incident_summary = "無法取得告警"
return (
f"## 系統狀態 ({now.strftime('%Y-%m-%d %H:%M')} 台北)\n"
f"- {cluster_info}\n"
f"- 活躍告警: {incident_summary}\n"
)
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 OpenClaw 對話 — Ollama interactive lane
2026-04-10 Claude Code: 強制合併 OPENCLAW_PERSONA確保字數限制與格式規範
2026-05-05 Codex: 改走 ADR-110 GCP-A/GCP-B/111 Ollama topology避免個人聊天直打 Gemini
"""
# 強制在 system_prompt 前置 persona確保 LLM 遵守字數與格式
system_prompt = f"{OPENCLAW_PERSONA}\n{system_prompt}"
settings = get_settings()
model = settings.OPENCLAW_DEFAULT_MODEL
async with httpx.AsyncClient(timeout=40.0) as client:
for endpoint in resolve_ollama_order("interactive"):
if not endpoint.url:
continue
try:
resp = await client.post(
f"{endpoint.url}/api/chat",
json={
"model": model,
"stream": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"options": {"num_predict": 900, "temperature": 0.2},
},
)
resp.raise_for_status()
data = resp.json()
raw = data.get("message", {}).get("content", "").strip()
text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip() or raw
eval_count = data.get("eval_count", 0)
prompt_eval_count = data.get("prompt_eval_count", 0)
total_tokens = eval_count + prompt_eval_count
logger.info(
"openclaw_ollama_chat_usage",
model=model,
endpoint=endpoint.url,
provider=endpoint.provider_name,
prompt_tokens=prompt_eval_count,
output_tokens=eval_count,
)
return f"{text}\n\n<i>🦙 {model} | {total_tokens} tokens | 免費</i>"
except Exception as e:
logger.warning(
"openclaw_chat_endpoint_failed",
provider=endpoint.provider_name,
endpoint=endpoint.url,
error=str(e),
)
logger.warning("openclaw_chat_failed_all_endpoints", model=model)
return None
async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 NemoClaw 對話 — Ollama 111 deepseek-r1:14b
2026-04-09 ogt: 改接 192.168.0.111 Ollama deepseek-r1:14bSRE 推理能力最強
deepseek-r1 含 <think> 標籤,需過濾後才回傳
2026-04-10 Claude Code: 強制合併 NEMOCLAW_PERSONA確保字數限制與格式規範
"""
# 強制在 system_prompt 前置 persona
system_prompt = f"{NEMOCLAW_PERSONA}\n{system_prompt}"
# 2026-05-05 Codex: ADR-110 interactive lane由 resolver 管理 GCP-A/GCP-B/111 拓撲
MODEL = "deepseek-r1:14b"
async with httpx.AsyncClient(timeout=120.0) as client:
for endpoint in resolve_ollama_order("interactive"):
if not endpoint.url:
continue
try:
resp = await client.post(
f"{endpoint.url}/api/chat",
json={
"model": MODEL,
"stream": False,
# Ollama 0.24 separates deepseek-r1 thinking from final text.
# Chat callers expect message.content to contain the answer.
"think": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"options": {"num_predict": 1200},
},
)
resp.raise_for_status()
data = resp.json()
raw = data.get("message", {}).get("content", "").strip()
# 過濾 deepseek-r1 的 <think>...</think> 推理區塊
text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
if not text:
text = raw # 萬一全是 think block直接回傳原文
eval_count = data.get("eval_count", 0)
prompt_eval_count = data.get("prompt_eval_count", 0)
total_tokens = eval_count + prompt_eval_count
logger.info(
"nemotron_ollama_usage",
model=MODEL,
provider=endpoint.provider_name,
prompt_tokens=prompt_eval_count,
output_tokens=eval_count,
)
return f"{text}\n\n<i>🦙 {MODEL} | {total_tokens} tokens | 免費</i>"
except Exception as e:
logger.warning(
"nemotron_chat_endpoint_failed",
model=MODEL,
provider=endpoint.provider_name,
endpoint=endpoint.url,
error=str(e),
)
logger.warning("nemotron_chat_failed_all_endpoints", model=MODEL)
return None
async def generate_response(
self,
user_id: int, # noqa: ARG002
username: str, # noqa: ARG002
message_text: str,
) -> str:
"""
根據訊息決定回應模式:
@openclaw <msg> → 只有 OpenClaw 回應
@nemo <msg> → 只有 NemoClaw 回應
其他 → OpenClaw 先回NemoClaw 異步補充
"""
context = await self.get_system_context()
text = message_text.strip()
# 模式 1: 指定 OpenClaw
if text.lower().startswith("@openclaw"):
msg = text[9:].strip() or text
result = await self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", msg)
return f"🦞 <b>OpenClaw:</b>\n{result or '🔴 OpenClaw 無響應'}"
# 模式 2: 指定 NemoClaw
if text.lower().startswith("@nemo"):
msg = text[5:].strip() or text
result = await self._call_nemotron(f"{NEMOCLAW_PERSONA}\n{context}", msg)
return f"🤖 <b>NemoClaw:</b>\n{result or '🔴 NemoClaw 無響應 (NIM 超時)'}"
# 模式 3: 雙 AI — OpenClaw 先答NemoClaw 並行
openclaw_task = asyncio.create_task(
self._call_openclaw(f"{OPENCLAW_PERSONA}\n{context}", text)
)
nemo_task = asyncio.create_task(
self._call_nemotron(
f"{NEMOCLAW_PERSONA}\n{context}",
f"老闆問了: {text}\n\n請從 NemoClaw 角度補充或評論。",
)
)
# OpenClaw 最多等 40s含 context 取得時間NemoClaw 最多等 60s
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
except TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
except TimeoutError:
nemo_raw = None
if nemo_raw:
return f"{openclaw_block}\n\n🤖 <b>NemoClaw:</b>\n{nemo_raw}"
return openclaw_block
# Singleton
_chat_manager: ChatManager | None = None
def get_chat_manager() -> ChatManager:
global _chat_manager
if _chat_manager is None:
_chat_manager = ChatManager()
return _chat_manager