diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py index 922313d2..42240609 100644 --- a/apps/api/src/services/chat_manager.py +++ b/apps/api/src/services/chat_manager.py @@ -2,19 +2,17 @@ AWOOOI Chat Manager - 雙 AI 對話核心 ====================================== Phase 21.5 初版: 2026-03-31 ogt -Phase 22.6 重寫: 2026-04-03 ogt (統帥需求: 雙 AI 互動對話) +Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話) +Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b) 架構: -- OpenClaw (192.168.0.188:8088): RCA 仲裁者,負責回答 -- NemoClaw (NVIDIA NIM nemotron-mini-4b): 戰術參謀,評論/補充 +- OpenClaw (Gemini API): SRE 首席顧問,精準分析 +- NemoClaw (Ollama llama3.2:3b): 戰術參謀,快速補充 -使用模式: - @openclaw → 只有 OpenClaw 回應 - @nemo → 只有 NemoClaw 回應 - 其他 → OpenClaw 先答,NemoClaw 評論 - -注意: NIM 免費 tier 延遲 11-45s,對話採異步模式: - 先推 OpenClaw 回應,NemoClaw 完成後再補充 +費用控管: +- Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens +- 每次回覆顯示 token 用量與費用 +- 月上限 $10 USD (由 ai_rate_limiter 控管) """ import asyncio @@ -78,46 +76,77 @@ class ChatManager: async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 OpenClaw 對話 — 走 NVIDIA NIM meta/llama-3.1-8b-instruct + 呼叫 OpenClaw 對話 — Gemini Flash API - 2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API, - 回覆是告警格式,不適合自然語言對話。 - 改用 NIM llama-3.1-8b 做 chat,與 NemoClaw 同樣走免費 NIM cloud。 + 2026-04-03 ogt: 老闆指示改用 Gemini,費用控管月上限 $10 USD + 每次回覆附帶 token 用量與費用統計 """ - from src.services.nvidia_provider import get_nvidia_provider - nvidia = get_nvidia_provider() - try: - full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" - response, success, _, _ = await nvidia.chat( - prompt=full_prompt, - model="meta/llama-3.1-8b-instruct", - max_tokens=300, - ) - if success and response and "not configured" not in response and "Circuit Breaker" not in response: - return response.strip() + import httpx + from src.core.config import get_settings + settings = get_settings() + + api_key = settings.GEMINI_API_KEY + if not api_key: + logger.warning("openclaw_chat_failed", error="GEMINI_API_KEY not configured") return None + + # Gemini 1.5 Flash: 快速、便宜 + model = "gemini-1.5-flash" + try: + async with httpx.AsyncClient(timeout=30.0) as client: + resp = await client.post( + f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}", + json={ + "system_instruction": {"parts": [{"text": system_prompt}]}, + "contents": [{"parts": [{"text": user_message}]}], + "generationConfig": {"maxOutputTokens": 300, "temperature": 0.7}, + }, + ) + resp.raise_for_status() + data = resp.json() + text = data["candidates"][0]["content"]["parts"][0]["text"].strip() + + # Token/費用統計 + usage = data.get("usageMetadata", {}) + in_tok = usage.get("promptTokenCount", 0) + out_tok = usage.get("candidatesTokenCount", 0) + cost = (in_tok * 0.000000075) + (out_tok * 0.0000003) + logger.info("openclaw_gemini_usage", in_tokens=in_tok, out_tokens=out_tok, cost_usd=round(cost, 6)) + + return f"{text}\n\n📊 {in_tok+out_tok} tokens | ${cost:.4f}" except Exception as e: logger.warning("openclaw_chat_failed", error=str(e)) return None async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw) + 呼叫 NemoClaw 對話 — Ollama llama3.2:3b (本地,快速) - NIM 免費 tier 延遲 11-45s,此方法可能需要 30-120s 才回應 + 2026-04-03 ogt: 老闆指示改用 Ollama 小模型取代 NIM,加快回應速度 """ - from src.services.nvidia_provider import get_nvidia_provider - nvidia = get_nvidia_provider() + import httpx + from src.core.config import get_settings + settings = get_settings() + + ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') try: - full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" - response, success, _, _ = await nvidia.chat( - prompt=full_prompt, - model="nvidia/nemotron-mini-4b-instruct", - max_tokens=300, - ) - if success and response and "not configured" not in response and "Circuit Breaker" not in response: - return response.strip() - return None + async with httpx.AsyncClient(timeout=60.0) as client: + resp = await client.post( + f"{ollama_url}/api/chat", + json={ + "model": "llama3.2:3b", + "stream": False, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ], + "options": {"num_predict": 250}, + }, + ) + resp.raise_for_status() + data = resp.json() + text = data.get("message", {}).get("content", "").strip() + return text or None except Exception as e: logger.warning("nemotron_chat_failed", error=str(e)) return None