diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py index 78d2c2b2..11130726 100644 --- a/apps/api/src/services/chat_manager.py +++ b/apps/api/src/services/chat_manager.py @@ -105,8 +105,8 @@ class ChatManager: logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD) return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD(已用 ${current_cost:.4f})" - # Gemini 1.5 Flash: 快速、便宜 - model = "gemini-1.5-flash" + # Gemini 2.0 Flash: 快速、便宜 (與 models.json 對齊) + model = "gemini-2.0-flash" try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( @@ -125,7 +125,8 @@ class ChatManager: usage = data.get("usageMetadata", {}) in_tok = usage.get("promptTokenCount", 0) out_tok = usage.get("candidatesTokenCount", 0) - cost = (in_tok * 0.000000075) + (out_tok * 0.0000003) + # Gemini 2.0 Flash: Input $0.10/1M, Output $0.40/1M + cost = (in_tok * 0.0000001) + (out_tok * 0.0000004) new_total = current_cost + cost try: @@ -143,33 +144,23 @@ class ChatManager: async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 NemoClaw 對話 — Ollama llama3.2:3b (本地,快速) + 呼叫 NemoClaw 對話 — NVIDIA NIM nemotron-mini-4b - 2026-04-03 ogt: 老闆指示改用 Ollama 小模型取代 NIM,加快回應速度 + 2026-04-03 ogt: Ollama 188 主機負載高常超時,暫時維持 NIM + 老闆可接受慢速,timeout=120s """ - import httpx - from src.core.config import get_settings - settings = get_settings() - - ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') + from src.services.nvidia_provider import get_nvidia_provider + nvidia = get_nvidia_provider() try: - async with httpx.AsyncClient(timeout=60.0) as client: - resp = await client.post( - f"{ollama_url}/api/chat", - json={ - "model": "llama3.2:3b", - "stream": False, - "messages": [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_message}, - ], - "options": {"num_predict": 250}, - }, - ) - resp.raise_for_status() - data = resp.json() - text = data.get("message", {}).get("content", "").strip() - return text or None + full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" + response, success, _, _ = await nvidia.chat( + prompt=full_prompt, + model="nvidia/nemotron-mini-4b-instruct", + max_tokens=300, + ) + if success and response and "not configured" not in response and "Circuit Breaker" not in response: + return response.strip() + return None except Exception as e: logger.warning("nemotron_chat_failed", error=str(e)) return None diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index eef349f9..89ebaefd 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -3161,15 +3161,17 @@ class TelegramGateway: from src.services.chat_manager import get_chat_manager as _get_cm chat_mgr = _get_cm() - text_lower = text.lower() - # 別名: 小O / 小o → OpenClaw; 小賀 / 小贺 → NemoClaw - mention_openclaw = "@openclawawoooi_bot" in text_lower or "小o" in text_lower - mention_nemo = "@nemotronawoooi_bot" in text_lower or "小賀" in text_lower or "小贺" in text_lower + # 全形/半形統一化後比較 + import unicodedata + text_normalized = unicodedata.normalize("NFKC", text).lower() + # 別名: 小O / 小o (含全形O) → OpenClaw; 小賀 / 小贺 → NemoClaw + mention_openclaw = "@openclawawoooi_bot" in text_normalized or "小o" in text_normalized + mention_nemo = "@nemotronawoooi_bot" in text_normalized or "小賀" in text_normalized or "小贺" in text_normalized # 去掉 @ mention 與別名,取出純訊息 - clean_text = text + clean_text = unicodedata.normalize("NFKC", text) for token in ["@openclawawoooi_bot", "@OpenClawAwoooI_Bot", "@nemotronawoooi_bot", "@NemoTronAwoooI_Bot", - "小O", "小o", "小賀", "小贺"]: + "小O", "小o", "小O", "小o", "小賀", "小贺"]: clean_text = clean_text.replace(token, "").strip() if not clean_text: clean_text = text