fix(chat): route OpenClaw chat through Ollama lane
This commit is contained in:
@@ -5,23 +5,28 @@ Phase 21.5 初版: 2026-03-31 ogt
|
||||
Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話)
|
||||
Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b)
|
||||
Phase 22.8 更新: 2026-04-09 ogt (老闆指示: NemoClaw→Ollama 111 deepseek-r1:14b,SRE 推理更強)
|
||||
Phase 33 更新: 2026-05-05 ogt (ADR-110: OpenClaw chat 改走 GCP-A Ollama interactive lane)
|
||||
|
||||
架構:
|
||||
- OpenClaw (Gemini API): SRE 首席顧問,精準分析
|
||||
- NemoClaw (Ollama 192.168.0.111 deepseek-r1:14b): 戰術參謀,深度推理
|
||||
- OpenClaw (Ollama GCP-A interactive lane): SRE 首席顧問,精準分析
|
||||
- NemoClaw (Ollama interactive lane deepseek-r1:14b): 戰術參謀,深度推理
|
||||
|
||||
費用控管:
|
||||
- Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens
|
||||
- NemoClaw: 免費 (本地 Ollama)
|
||||
- 每次回覆顯示 token 用量與費用
|
||||
- 月上限 $10 USD (由 ai_rate_limiter 控管)
|
||||
- OpenClaw/NemoClaw chat 預設免費 Ollama;Gemini 不再作為 ChatManager 預設路徑
|
||||
- 每次回覆顯示 token 用量
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from src.utils.timezone import now_taipei
|
||||
from src.repositories.k8s_repository import get_k8s_repository
|
||||
|
||||
from src.core.config import get_settings
|
||||
from src.repositories.incident_repository import get_incident_repository
|
||||
from src.repositories.k8s_repository import get_k8s_repository
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
@@ -81,73 +86,49 @@ class ChatManager:
|
||||
|
||||
async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None:
|
||||
"""
|
||||
呼叫 OpenClaw 對話 — Gemini Flash API
|
||||
|
||||
2026-04-03 ogt: 老闆指示改用 Gemini,費用控管月上限 $10 USD
|
||||
每次回覆附帶 token 用量與費用統計
|
||||
呼叫 OpenClaw 對話 — Ollama interactive lane
|
||||
|
||||
2026-04-10 Claude Code: 強制合併 OPENCLAW_PERSONA,確保字數限制與格式規範
|
||||
2026-05-05 Codex: 改走 ADR-110 GCP-A/GCP-B/111 Ollama topology,避免個人聊天直打 Gemini
|
||||
"""
|
||||
# 強制在 system_prompt 前置 persona,確保 LLM 遵守字數與格式
|
||||
system_prompt = f"{OPENCLAW_PERSONA}\n{system_prompt}"
|
||||
import httpx
|
||||
from src.core.config import get_settings
|
||||
settings = get_settings()
|
||||
|
||||
api_key = settings.GEMINI_API_KEY
|
||||
if not api_key:
|
||||
logger.warning("openclaw_chat_failed", error="GEMINI_API_KEY not configured")
|
||||
return None
|
||||
|
||||
# 月費用上限檢查 ($10 USD)
|
||||
MONTHLY_LIMIT_USD = 10.0
|
||||
from src.core.redis_client import get_redis
|
||||
from src.utils.timezone import now_taipei
|
||||
redis = get_redis()
|
||||
month_key = f"gemini_cost:{now_taipei().strftime('%Y-%m')}"
|
||||
model = settings.OPENCLAW_DEFAULT_MODEL
|
||||
ollama_url = resolve_ollama_endpoint("interactive")
|
||||
try:
|
||||
current_cost = float(await redis.get(month_key) or 0)
|
||||
except Exception:
|
||||
current_cost = 0.0
|
||||
|
||||
if current_cost >= MONTHLY_LIMIT_USD:
|
||||
logger.warning("openclaw_gemini_monthly_limit_reached", current_usd=current_cost, limit_usd=MONTHLY_LIMIT_USD)
|
||||
return f"🔴 OpenClaw 本月 Gemini 用量已達上限 ${MONTHLY_LIMIT_USD} USD(已用 ${current_cost:.4f})"
|
||||
|
||||
# Gemini 2.0 Flash-Lite: 最便宜 (2026-04-03 老闆指示)
|
||||
model = "gemini-2.0-flash-lite"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
async with httpx.AsyncClient(timeout=40.0) as client:
|
||||
resp = await client.post(
|
||||
f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent",
|
||||
headers={"x-goog-api-key": api_key},
|
||||
f"{ollama_url}/api/chat",
|
||||
json={
|
||||
"system_instruction": {"parts": [{"text": system_prompt}]},
|
||||
"contents": [{"parts": [{"text": user_message}]}],
|
||||
"generationConfig": {"maxOutputTokens": 300, "temperature": 0.7},
|
||||
"model": model,
|
||||
"stream": False,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message},
|
||||
],
|
||||
"options": {"num_predict": 900, "temperature": 0.2},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
text = data["candidates"][0]["content"]["parts"][0]["text"].strip()
|
||||
raw = data.get("message", {}).get("content", "").strip()
|
||||
text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip() or raw
|
||||
|
||||
# Token/費用統計 + 累計到 Redis
|
||||
usage = data.get("usageMetadata", {})
|
||||
in_tok = usage.get("promptTokenCount", 0)
|
||||
out_tok = usage.get("candidatesTokenCount", 0)
|
||||
# Gemini 2.0 Flash-Lite: Input $0.075/1M, Output $0.30/1M
|
||||
cost = (in_tok * 0.000000075) + (out_tok * 0.0000003)
|
||||
new_total = current_cost + cost
|
||||
eval_count = data.get("eval_count", 0)
|
||||
prompt_eval_count = data.get("prompt_eval_count", 0)
|
||||
total_tokens = eval_count + prompt_eval_count
|
||||
|
||||
try:
|
||||
await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600) # 40天 TTL
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"openclaw_ollama_chat_usage",
|
||||
model=model,
|
||||
endpoint=ollama_url,
|
||||
prompt_tokens=prompt_eval_count,
|
||||
output_tokens=eval_count,
|
||||
)
|
||||
|
||||
logger.info("openclaw_gemini_usage", in_tokens=in_tok, out_tokens=out_tok,
|
||||
cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4))
|
||||
|
||||
return f"{text}\n\n<i>📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}</i>"
|
||||
return f"{text}\n\n<i>🦙 {model} | {total_tokens} tokens | 免費</i>"
|
||||
except Exception as e:
|
||||
logger.warning("openclaw_chat_failed", error=str(e))
|
||||
return None
|
||||
@@ -164,12 +145,8 @@ class ChatManager:
|
||||
# 強制在 system_prompt 前置 persona
|
||||
system_prompt = f"{NEMOCLAW_PERSONA}\n{system_prompt}"
|
||||
|
||||
import httpx
|
||||
import re
|
||||
from src.core.config import get_settings as _get_settings
|
||||
|
||||
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
|
||||
OLLAMA_URL = _get_settings().OLLAMA_URL
|
||||
# 2026-05-05 Codex: ADR-110 interactive lane,由 resolver 管理 GCP-A/GCP-B/111 拓撲
|
||||
OLLAMA_URL = resolve_ollama_endpoint("interactive")
|
||||
MODEL = "deepseek-r1:14b"
|
||||
|
||||
try:
|
||||
@@ -250,14 +227,14 @@ class ChatManager:
|
||||
# 2026-04-03 ogt: 移除 asyncio.shield — shield 會在超時後讓 task 繼續跑但無人等待,造成 silent leak
|
||||
try:
|
||||
openclaw_raw = await asyncio.wait_for(openclaw_task, timeout=40.0)
|
||||
except asyncio.TimeoutError:
|
||||
except TimeoutError:
|
||||
openclaw_raw = None
|
||||
|
||||
openclaw_block = f"🦞 <b>OpenClaw:</b>\n{openclaw_raw or '🔴 無響應'}"
|
||||
|
||||
try:
|
||||
nemo_raw = await asyncio.wait_for(nemo_task, timeout=60.0)
|
||||
except asyncio.TimeoutError:
|
||||
except TimeoutError:
|
||||
nemo_raw = None
|
||||
|
||||
if nemo_raw:
|
||||
|
||||
105
apps/api/tests/test_chat_manager_ollama_routing.py
Normal file
105
apps/api/tests/test_chat_manager_ollama_routing.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from src.services import chat_manager as chat_module
|
||||
from src.services.chat_manager import ChatManager
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, content: str = "老闆,系統目前穩定。") -> None:
|
||||
self._content = content
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict[str, Any]:
|
||||
return {
|
||||
"message": {"content": self._content},
|
||||
"prompt_eval_count": 11,
|
||||
"eval_count": 13,
|
||||
}
|
||||
|
||||
|
||||
class _FakeAsyncClient:
|
||||
posted: list[tuple[str, dict[str, Any]]] = []
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
async def __aenter__(self) -> _FakeAsyncClient:
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *args: Any) -> None:
|
||||
return None
|
||||
|
||||
async def post(self, url: str, *, json: dict[str, Any]) -> _FakeResponse:
|
||||
self.posted.append((url, json))
|
||||
return _FakeResponse()
|
||||
|
||||
|
||||
def _settings() -> SimpleNamespace:
|
||||
return SimpleNamespace(OPENCLAW_DEFAULT_MODEL="qwen3:14b")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_fake_client() -> None:
|
||||
_FakeAsyncClient.posted = []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openclaw_chat_uses_ollama_interactive_lane(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(chat_module.httpx, "AsyncClient", _FakeAsyncClient)
|
||||
monkeypatch.setattr(chat_module, "get_settings", _settings)
|
||||
monkeypatch.setattr(
|
||||
chat_module,
|
||||
"resolve_ollama_endpoint",
|
||||
lambda workload_type: "http://gcp-a:11435",
|
||||
)
|
||||
|
||||
result = await ChatManager()._call_openclaw("system context", "幫我看狀態")
|
||||
|
||||
assert result is not None
|
||||
assert "qwen3:14b" in result
|
||||
assert "免費" in result
|
||||
|
||||
assert len(_FakeAsyncClient.posted) == 1
|
||||
url, payload = _FakeAsyncClient.posted[0]
|
||||
assert url == "http://gcp-a:11435/api/chat"
|
||||
assert payload["model"] == "qwen3:14b"
|
||||
assert payload["messages"][0]["role"] == "system"
|
||||
assert payload["messages"][1] == {"role": "user", "content": "幫我看狀態"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_nemoclaw_chat_uses_resolved_interactive_lane(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(chat_module.httpx, "AsyncClient", _FakeAsyncClient)
|
||||
monkeypatch.setattr(
|
||||
chat_module,
|
||||
"resolve_ollama_endpoint",
|
||||
lambda workload_type: "http://gcp-a:11435",
|
||||
)
|
||||
|
||||
result = await ChatManager()._call_nemotron("system context", "補充觀點")
|
||||
|
||||
assert result is not None
|
||||
url, payload = _FakeAsyncClient.posted[0]
|
||||
assert url == "http://gcp-a:11435/api/chat"
|
||||
assert payload["model"] == "deepseek-r1:14b"
|
||||
|
||||
|
||||
def test_chat_manager_has_no_direct_gemini_generation_path() -> None:
|
||||
source_path = Path(chat_module.__file__).resolve()
|
||||
source = source_path.read_text(encoding="utf-8")
|
||||
|
||||
assert "generativelanguage.googleapis.com" not in source
|
||||
assert "GEMINI_API_KEY" not in source
|
||||
Reference in New Issue
Block a user