diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py
index 78d2c2b2..11130726 100644
--- a/apps/api/src/services/chat_manager.py
+++ b/apps/api/src/services/chat_manager.py
@@ -105,8 +105,8 @@ class ChatManager:
             logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD)
             return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD（已用 ${current_cost:.4f}）"
 
-        # Gemini 1.5 Flash: 快速、便宜
-        model = "gemini-1.5-flash"
+        # Gemini 2.0 Flash: 快速、便宜 (與 models.json 對齊)
+        model = "gemini-2.0-flash"
         try:
             async with httpx.AsyncClient(timeout=30.0) as client:
                 resp = await client.post(
@@ -125,7 +125,8 @@ class ChatManager:
                 usage = data.get("usageMetadata", {})
                 in_tok = usage.get("promptTokenCount", 0)
                 out_tok = usage.get("candidatesTokenCount", 0)
-                cost = (in_tok * 0.000000075) + (out_tok * 0.0000003)
+                # Gemini 2.0 Flash: Input $0.10/1M, Output $0.40/1M
+                cost = (in_tok * 0.0000001) + (out_tok * 0.0000004)
                 new_total = current_cost + cost
 
                 try:
@@ -143,33 +144,23 @@ class ChatManager:
 
     async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None:
         """
-        呼叫 NemoClaw 對話 — Ollama llama3.2:3b (本地，快速)
+        呼叫 NemoClaw 對話 — NVIDIA NIM nemotron-mini-4b
 
-        2026-04-03 ogt: 老闆指示改用 Ollama 小模型取代 NIM，加快回應速度
+        2026-04-03 ogt: Ollama 188 主機負載高常超時，暫時維持 NIM
+        老闆可接受慢速，timeout=120s
         """
-        import httpx
-        from src.core.config import get_settings
-        settings = get_settings()
-
-        ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434')
+        from src.services.nvidia_provider import get_nvidia_provider
+        nvidia = get_nvidia_provider()
         try:
-            async with httpx.AsyncClient(timeout=60.0) as client:
-                resp = await client.post(
-                    f"{ollama_url}/api/chat",
-                    json={
-                        "model": "llama3.2:3b",
-                        "stream": False,
-                        "messages": [
-                            {"role": "system", "content": system_prompt},
-                            {"role": "user", "content": user_message},
-                        ],
-                        "options": {"num_predict": 250},
-                    },
-                )
-                resp.raise_for_status()
-                data = resp.json()
-                text = data.get("message", {}).get("content", "").strip()
-                return text or None
+            full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}"
+            response, success, _, _ = await nvidia.chat(
+                prompt=full_prompt,
+                model="nvidia/nemotron-mini-4b-instruct",
+                max_tokens=300,
+            )
+            if success and response and "not configured" not in response and "Circuit Breaker" not in response:
+                return response.strip()
+            return None
         except Exception as e:
             logger.warning("nemotron_chat_failed", error=str(e))
             return None
diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py
index eef349f9..89ebaefd 100644
--- a/apps/api/src/services/telegram_gateway.py
+++ b/apps/api/src/services/telegram_gateway.py
@@ -3161,15 +3161,17 @@ class TelegramGateway:
         from src.services.chat_manager import get_chat_manager as _get_cm
         chat_mgr = _get_cm()
 
-        text_lower = text.lower()
-        # 別名: 小O / 小o → OpenClaw; 小賀 / 小贺 → NemoClaw
-        mention_openclaw = "@openclawawoooi_bot" in text_lower or "小o" in text_lower
-        mention_nemo = "@nemotronawoooi_bot" in text_lower or "小賀" in text_lower or "小贺" in text_lower
+        # 全形/半形統一化後比較
+        import unicodedata
+        text_normalized = unicodedata.normalize("NFKC", text).lower()
+        # 別名: 小O / 小o (含全形Ｏ) → OpenClaw; 小賀 / 小贺 → NemoClaw
+        mention_openclaw = "@openclawawoooi_bot" in text_normalized or "小o" in text_normalized
+        mention_nemo = "@nemotronawoooi_bot" in text_normalized or "小賀" in text_normalized or "小贺" in text_normalized
 
         # 去掉 @ mention 與別名，取出純訊息
-        clean_text = text
+        clean_text = unicodedata.normalize("NFKC", text)
         for token in ["@openclawawoooi_bot", "@OpenClawAwoooI_Bot", "@nemotronawoooi_bot", "@NemoTronAwoooI_Bot",
-                      "小O", "小o", "小賀", "小贺"]:
+                      "小O", "小o", "小Ｏ", "小ｏ", "小賀", "小贺"]:
             clean_text = clean_text.replace(token, "").strip()
         if not clean_text:
             clean_text = text