fix(chat): route OpenClaw chat through Ollama lane
Some checks failed
CD Pipeline / tests (push) Successful in 5m26s
Code Review / ai-code-review (push) Successful in 25s
CD Pipeline / build-and-deploy (push) Successful in 8m11s
CD Pipeline / post-deploy-checks (push) Has been cancelled

This commit is contained in:
Your Name
2026-05-05 15:57:07 +08:00
parent 3a17a860a0
commit 6b93c8f454
2 changed files with 148 additions and 66 deletions

View File

@@ -5,23 +5,28 @@ Phase 21.5 初版: 2026-03-31 ogt
Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話)
Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b)
Phase 22.8 更新: 2026-04-09 ogt (老闆指示: NemoClaw→Ollama 111 deepseek-r1:14bSRE 推理更強)
Phase 33 更新: 2026-05-05 ogt (ADR-110: OpenClaw chat 改走 GCP-A Ollama interactive lane)
架構:
- OpenClaw (Gemini API): SRE 首席顧問,精準分析
- NemoClaw (Ollama 192.168.0.111 deepseek-r1:14b): 戰術參謀,深度推理
- OpenClaw (Ollama GCP-A interactive lane): SRE 首席顧問,精準分析
- NemoClaw (Ollama interactive lane deepseek-r1:14b): 戰術參謀,深度推理
費用控管:
- Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens
- NemoClaw: 免費 (本地 Ollama)
- 每次回覆顯示 token 用量與費用
- 月上限 $10 USD (由 ai_rate_limiter 控管)
- OpenClaw/NemoClaw chat 預設免費 OllamaGemini 不再作為 ChatManager 預設路徑
- 每次回覆顯示 token 用量
"""
import asyncio
import re
import httpx
import structlog
from src.utils.timezone import now_taipei
from src.repositories.k8s_repository import get_k8s_repository
from src.core.config import get_settings
from src.repositories.incident_repository import get_incident_repository
from src.repositories.k8s_repository import get_k8s_repository
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
@@ -81,73 +86,49 @@ class ChatManager:
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
"""
呼叫 OpenClaw 對話 — Gemini Flash API
2026-04-03 ogt: 老闆指示改用 Gemini費用控管月上限 $10 USD
每次回覆附帶 token 用量與費用統計
呼叫 OpenClaw 對話 — Ollama interactive lane
2026-04-10 Claude Code: 強制合併 OPENCLAW_PERSONA確保字數限制與格式規範
2026-05-05 Codex: 改走 ADR-110 GCP-A/GCP-B/111 Ollama topology避免個人聊天直打 Gemini
"""
# 強制在 system_prompt 前置 persona確保 LLM 遵守字數與格式
system_prompt = f"{OPENCLAW_PERSONA}\n{system_prompt}"
import httpx
from src.core.config import get_settings
settings = get_settings()
api_key = settings.GEMINI_API_KEY
if not api_key:
logger.warning("openclaw_chat_failed", error="GEMINI_API_KEY not configured")
return None
# 月費用上限檢查 ($10 USD)
MONTHLY_LIMIT_USD = 10.0
from src.core.redis_client import get_redis
from src.utils.timezone import now_taipei
redis = get_redis()
month_key = f"gemini_cost:{now_taipei().strftime('%Y-%m')}"
model = settings.OPENCLAW_DEFAULT_MODEL
ollama_url = resolve_ollama_endpoint("interactive")
try:
current_cost = float(await redis.get(month_key) or 0)
except Exception:
current_cost = 0.0
if current_cost >= MONTHLY_LIMIT_USD:
logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD)
return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD已用 ${current_cost:.4f}"
# Gemini 2.0 Flash-Lite: 最便宜 (2026-04-03 老闆指示)
model = "gemini-2.0-flash-lite"
try:
async with httpx.AsyncClient(timeout=30.0) as client:
async with httpx.AsyncClient(timeout=40.0) as client:
resp = await client.post(
f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent",
headers={"x-goog-api-key": api_key},
f"{ollama_url}/api/chat",
json={
"system_instruction": {"parts": [{"text": system_prompt}]},
"contents": [{"parts": [{"text": user_message}]}],
"generationConfig": {"maxOutputTokens": 300, "temperature": 0.7},
"model": model,
"stream": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"options": {"num_predict": 900, "temperature": 0.2},
},
)
resp.raise_for_status()
data = resp.json()
text = data["candidates"][0]["content"]["parts"][0]["text"].strip()
raw = data.get("message", {}).get("content", "").strip()
text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip() or raw
# Token/費用統計 + 累計到 Redis
usage = data.get("usageMetadata", {})
in_tok = usage.get("promptTokenCount", 0)
out_tok = usage.get("candidatesTokenCount", 0)
# Gemini 2.0 Flash-Lite: Input $0.075/1M, Output $0.30/1M
cost = (in_tok * 0.000000075) + (out_tok * 0.0000003)
new_total = current_cost + cost
eval_count = data.get("eval_count", 0)
prompt_eval_count = data.get("prompt_eval_count", 0)
total_tokens = eval_count + prompt_eval_count
try:
await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600) # 40天 TTL
except Exception:
pass
logger.info(
"openclaw_ollama_chat_usage",
model=model,
endpoint=ollama_url,
prompt_tokens=prompt_eval_count,
output_tokens=eval_count,
)
logger.info("openclaw_gemini_usage", in_tokens=in_tok, out_tokens=out_tok,
cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4))
return f"{text}\n\n<i>📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}</i>"
return f"{text}\n\n<i>🦙 {model} | {total_tokens} tokens | 免費</i>"
except Exception as e:
logger.warning("openclaw_chat_failed", error=str(e))
return None
@@ -164,12 +145,8 @@ class ChatManager:
# 強制在 system_prompt 前置 persona
system_prompt = f"{NEMOCLAW_PERSONA}\n{system_prompt}"
import httpx
import re
from src.core.config import get_settings as _get_settings
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
OLLAMA_URL = _get_settings().OLLAMA_URL
# 2026-05-05 Codex: ADR-110 interactive lane由 resolver 管理 GCP-A/GCP-B/111 拓撲
OLLAMA_URL = resolve_ollama_endpoint("interactive")
MODEL = "deepseek-r1:14b"
try:
@@ -250,14 +227,14 @@ class ChatManager:
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
try:
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
except asyncio.TimeoutError:
except TimeoutError:
openclaw_raw = None
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
try:
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
except asyncio.TimeoutError:
except TimeoutError:
nemo_raw = None
if nemo_raw:

View File

@@ -0,0 +1,105 @@
from __future__ import annotations
from pathlib import Path
from types import SimpleNamespace
from typing import Any
import pytest
from src.services import chat_manager as chat_module
from src.services.chat_manager import ChatManager
class _FakeResponse:
def __init__(self, content: str = "老闆,系統目前穩定。") -> None:
self._content = content
def raise_for_status(self) -> None:
return None
def json(self) -> dict[str, Any]:
return {
"message": {"content": self._content},
"prompt_eval_count": 11,
"eval_count": 13,
}
class _FakeAsyncClient:
posted: list[tuple[str, dict[str, Any]]] = []
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.args = args
self.kwargs = kwargs
async def __aenter__(self) -> _FakeAsyncClient:
return self
async def __aexit__(self, *args: Any) -> None:
return None
async def post(self, url: str, *, json: dict[str, Any]) -> _FakeResponse:
self.posted.append((url, json))
return _FakeResponse()
def _settings() -> SimpleNamespace:
return SimpleNamespace(OPENCLAW_DEFAULT_MODEL="qwen3:14b")
@pytest.fixture(autouse=True)
def _reset_fake_client() -> None:
_FakeAsyncClient.posted = []
@pytest.mark.asyncio
async def test_openclaw_chat_uses_ollama_interactive_lane(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(chat_module.httpx, "AsyncClient", _FakeAsyncClient)
monkeypatch.setattr(chat_module, "get_settings", _settings)
monkeypatch.setattr(
chat_module,
"resolve_ollama_endpoint",
lambda workload_type: "http://gcp-a:11435",
)
result = await ChatManager()._call_openclaw("system context", "幫我看狀態")
assert result is not None
assert "qwen3:14b" in result
assert "免費" in result
assert len(_FakeAsyncClient.posted) == 1
url, payload = _FakeAsyncClient.posted[0]
assert url == "http://gcp-a:11435/api/chat"
assert payload["model"] == "qwen3:14b"
assert payload["messages"][0]["role"] == "system"
assert payload["messages"][1] == {"role": "user", "content": "幫我看狀態"}
@pytest.mark.asyncio
async def test_nemoclaw_chat_uses_resolved_interactive_lane(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(chat_module.httpx, "AsyncClient", _FakeAsyncClient)
monkeypatch.setattr(
chat_module,
"resolve_ollama_endpoint",
lambda workload_type: "http://gcp-a:11435",
)
result = await ChatManager()._call_nemotron("system context", "補充觀點")
assert result is not None
url, payload = _FakeAsyncClient.posted[0]
assert url == "http://gcp-a:11435/api/chat"
assert payload["model"] == "deepseek-r1:14b"
def test_chat_manager_has_no_direct_gemini_generation_path() -> None:
source_path = Path(chat_module.__file__).resolve()
source = source_path.read_text(encoding="utf-8")
assert "generativelanguage.googleapis.com" not in source
assert "GEMINI_API_KEY" not in source