diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py
index 38584404..e9bdc0f9 100644
--- a/apps/api/src/services/chat_manager.py
+++ b/apps/api/src/services/chat_manager.py
@@ -5,23 +5,28 @@ Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話)
Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b)
Phase 22.8 更新: 2026-04-09 ogt (老闆指示: NemoClaw→Ollama 111 deepseek-r1:14b,SRE 推理更強)
+Phase 33 更新: 2026-05-05 ogt (ADR-110: OpenClaw chat 改走 GCP-A Ollama interactive lane)
架構:
-- OpenClaw (Gemini API): SRE 首席顧問,精準分析
-- NemoClaw (Ollama 192.168.0.111 deepseek-r1:14b): 戰術參謀,深度推理
+- OpenClaw (Ollama GCP-A interactive lane): SRE 首席顧問,精準分析
+- NemoClaw (Ollama interactive lane deepseek-r1:14b): 戰術參謀,深度推理
費用控管:
-- Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens
-- NemoClaw: 免費 (本地 Ollama)
-- 每次回覆顯示 token 用量與費用
-- 月上限 $10 USD (由 ai_rate_limiter 控管)
+- OpenClaw/NemoClaw chat 預設免費 Ollama;Gemini 不再作為 ChatManager 預設路徑
+- 每次回覆顯示 token 用量
"""
import asyncio
+import re
+
+import httpx
import structlog
-from src.utils.timezone import now_taipei
-from src.repositories.k8s_repository import get_k8s_repository
+
+from src.core.config import get_settings
from src.repositories.incident_repository import get_incident_repository
+from src.repositories.k8s_repository import get_k8s_repository
+from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
+from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
@@ -81,73 +86,49 @@ class ChatManager:
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
- 呼叫 OpenClaw 對話 — Gemini Flash API
-
- 2026-04-03 ogt: 老闆指示改用 Gemini,費用控管月上限 $10 USD
- 每次回覆附帶 token 用量與費用統計
+ 呼叫 OpenClaw 對話 — Ollama interactive lane
2026-04-10 Claude Code: 強制合併 OPENCLAW_PERSONA,確保字數限制與格式規範
+ 2026-05-05 Codex: 改走 ADR-110 GCP-A/GCP-B/111 Ollama topology,避免個人聊天直打 Gemini
"""
# 強制在 system_prompt 前置 persona,確保 LLM 遵守字數與格式
system_prompt = f"{OPENCLAW_PERSONA}\n{system_prompt}"
- import httpx
- from src.core.config import get_settings
settings = get_settings()
- api_key = settings.GEMINI_API_KEY
- if not api_key:
- logger.warning("openclaw_chat_failed", error="GEMINI_API_KEY not configured")
- return None
-
- # 月費用上限檢查 ($10 USD)
- MONTHLY_LIMIT_USD = 10.0
- from src.core.redis_client import get_redis
- from src.utils.timezone import now_taipei
- redis = get_redis()
- month_key = f"gemini_cost:{now_taipei().strftime('%Y-%m')}"
+ model = settings.OPENCLAW_DEFAULT_MODEL
+ ollama_url = resolve_ollama_endpoint("interactive")
try:
- current_cost = float(await redis.get(month_key) or 0)
- except Exception:
- current_cost = 0.0
-
- if current_cost >= MONTHLY_LIMIT_USD:
- logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD)
- return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD(已用 ${current_cost:.4f})"
-
- # Gemini 2.0 Flash-Lite: 最便宜 (2026-04-03 老闆指示)
- model = "gemini-2.0-flash-lite"
- try:
- async with httpx.AsyncClient(timeout=30.0) as client:
+ async with httpx.AsyncClient(timeout=40.0) as client:
resp = await client.post(
- f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent",
- headers={"x-goog-api-key": api_key},
+ f"{ollama_url}/api/chat",
json={
- "system_instruction": {"parts": [{"text": system_prompt}]},
- "contents": [{"parts": [{"text": user_message}]}],
- "generationConfig": {"maxOutputTokens": 300, "temperature": 0.7},
+ "model": model,
+ "stream": False,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_message},
+ ],
+ "options": {"num_predict": 900, "temperature": 0.2},
},
)
resp.raise_for_status()
data = resp.json()
- text = data["candidates"][0]["content"]["parts"][0]["text"].strip()
+ raw = data.get("message", {}).get("content", "").strip()
+ text = re.sub(r".*?", "", raw, flags=re.DOTALL).strip() or raw
- # Token/費用統計 + 累計到 Redis
- usage = data.get("usageMetadata", {})
- in_tok = usage.get("promptTokenCount", 0)
- out_tok = usage.get("candidatesTokenCount", 0)
- # Gemini 2.0 Flash-Lite: Input $0.075/1M, Output $0.30/1M
- cost = (in_tok * 0.000000075) + (out_tok * 0.0000003)
- new_total = current_cost + cost
+ eval_count = data.get("eval_count", 0)
+ prompt_eval_count = data.get("prompt_eval_count", 0)
+ total_tokens = eval_count + prompt_eval_count
- try:
- await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600) # 40天 TTL
- except Exception:
- pass
+ logger.info(
+ "openclaw_ollama_chat_usage",
+ model=model,
+ endpoint=ollama_url,
+ prompt_tokens=prompt_eval_count,
+ output_tokens=eval_count,
+ )
- logger.info("openclaw_gemini_usage", in_tokens=in_tok, out_tokens=out_tok,
- cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4))
-
- return f"{text}\n\n📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}"
+ return f"{text}\n\n🦙 {model} | {total_tokens} tokens | 免費"
except Exception as e:
logger.warning("openclaw_chat_failed", error=str(e))
return None
@@ -164,12 +145,8 @@ class ChatManager:
# 強制在 system_prompt 前置 persona
system_prompt = f"{NEMOCLAW_PERSONA}\n{system_prompt}"
- import httpx
- import re
- from src.core.config import get_settings as _get_settings
-
- # 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
- OLLAMA_URL = _get_settings().OLLAMA_URL
+ # 2026-05-05 Codex: ADR-110 interactive lane,由 resolver 管理 GCP-A/GCP-B/111 拓撲
+ OLLAMA_URL = resolve_ollama_endpoint("interactive")
MODEL = "deepseek-r1:14b"
try:
@@ -250,14 +227,14 @@ class ChatManager:
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
- except asyncio.TimeoutError:
+ except TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 OpenClaw:\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
- except asyncio.TimeoutError:
+ except TimeoutError:
nemo_raw = None
if nemo_raw:
diff --git a/apps/api/tests/test_chat_manager_ollama_routing.py b/apps/api/tests/test_chat_manager_ollama_routing.py
new file mode 100644
index 00000000..d5dc40f6
--- /dev/null
+++ b/apps/api/tests/test_chat_manager_ollama_routing.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from src.services import chat_manager as chat_module
+from src.services.chat_manager import ChatManager
+
+
+class _FakeResponse:
+ def __init__(self, content: str = "老闆,系統目前穩定。") -> None:
+ self._content = content
+
+ def raise_for_status(self) -> None:
+ return None
+
+ def json(self) -> dict[str, Any]:
+ return {
+ "message": {"content": self._content},
+ "prompt_eval_count": 11,
+ "eval_count": 13,
+ }
+
+
+class _FakeAsyncClient:
+ posted: list[tuple[str, dict[str, Any]]] = []
+
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ self.args = args
+ self.kwargs = kwargs
+
+ async def __aenter__(self) -> _FakeAsyncClient:
+ return self
+
+ async def __aexit__(self, *args: Any) -> None:
+ return None
+
+ async def post(self, url: str, *, json: dict[str, Any]) -> _FakeResponse:
+ self.posted.append((url, json))
+ return _FakeResponse()
+
+
+def _settings() -> SimpleNamespace:
+ return SimpleNamespace(OPENCLAW_DEFAULT_MODEL="qwen3:14b")
+
+
+@pytest.fixture(autouse=True)
+def _reset_fake_client() -> None:
+ _FakeAsyncClient.posted = []
+
+
+@pytest.mark.asyncio
+async def test_openclaw_chat_uses_ollama_interactive_lane(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setattr(chat_module.httpx, "AsyncClient", _FakeAsyncClient)
+ monkeypatch.setattr(chat_module, "get_settings", _settings)
+ monkeypatch.setattr(
+ chat_module,
+ "resolve_ollama_endpoint",
+ lambda workload_type: "http://gcp-a:11435",
+ )
+
+ result = await ChatManager()._call_openclaw("system context", "幫我看狀態")
+
+ assert result is not None
+ assert "qwen3:14b" in result
+ assert "免費" in result
+
+ assert len(_FakeAsyncClient.posted) == 1
+ url, payload = _FakeAsyncClient.posted[0]
+ assert url == "http://gcp-a:11435/api/chat"
+ assert payload["model"] == "qwen3:14b"
+ assert payload["messages"][0]["role"] == "system"
+ assert payload["messages"][1] == {"role": "user", "content": "幫我看狀態"}
+
+
+@pytest.mark.asyncio
+async def test_nemoclaw_chat_uses_resolved_interactive_lane(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setattr(chat_module.httpx, "AsyncClient", _FakeAsyncClient)
+ monkeypatch.setattr(
+ chat_module,
+ "resolve_ollama_endpoint",
+ lambda workload_type: "http://gcp-a:11435",
+ )
+
+ result = await ChatManager()._call_nemotron("system context", "補充觀點")
+
+ assert result is not None
+ url, payload = _FakeAsyncClient.posted[0]
+ assert url == "http://gcp-a:11435/api/chat"
+ assert payload["model"] == "deepseek-r1:14b"
+
+
+def test_chat_manager_has_no_direct_gemini_generation_path() -> None:
+ source_path = Path(chat_module.__file__).resolve()
+ source = source_path.read_text(encoding="utf-8")
+
+ assert "generativelanguage.googleapis.com" not in source
+ assert "GEMINI_API_KEY" not in source