fix: route openclaw qa through ollama cascade
This commit is contained in:
@@ -340,9 +340,9 @@ OLLAMA_COPY_TIMEOUT=180
|
||||
OLLAMA_EMBED_TIMEOUT=45
|
||||
|
||||
# [預設 true] OpenClaw Q&A 先走 Ollama,品質不足或失敗時才 fallback Gemini/NIM
|
||||
# 主機不提供單 caller override;一律走 OLLAMA_HOST_PRIMARY → OLLAMA_HOST_SECONDARY → OLLAMA_HOST_FALLBACK
|
||||
OPENCLAW_QA_OLLAMA_FIRST=true
|
||||
OPENCLAW_QA_OLLAMA_MODEL=qwen3:14b
|
||||
OPENCLAW_QA_OLLAMA_HOST=http://34.143.170.20:11434
|
||||
OPENCLAW_QA_OLLAMA_TIMEOUT=60
|
||||
NEMOTRON_OLLAMA_FIRST=true
|
||||
NEMOTRON_OLLAMA_MODEL=qwen3:14b
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
- 188 `192.168.0.188` 僅是 App / DB / scheduler / Telegram bot 容器宿主與 AutoHeal target,不可作為 Ollama 節點。
|
||||
- 通用 AI 文案、關鍵字、商品洞察與 Telegram Q&A 第一響應不得 Gemini-first。
|
||||
- Code Review pipeline 也必須 Ollama-first:Hermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retry;Gemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。
|
||||
- OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。
|
||||
|
||||
## 一、四 AI Agent 路由架構
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
- `services/pg_sync_service.py` 是顯式 opt-in legacy CLI,不是生產自動同步路徑;`tests/test_pg_sync_contract.py` 已守住預設 OFF 與 runtime paths 不自動 import。
|
||||
- `qwen3:14b` 不是未使用 Ollama 模型:OpenClaw QA、NemoTron dispatch 與 LLM model router 仍有現役路徑;`tests/test_qwen3_runtime_usage.py` 已守住,不能只因體積大就三主機移除。
|
||||
- Ollama host env 已加白名單護欄:`OLLAMA_HOST*` / `EMBEDDING_HOST` 只接受 GCP-A、GCP-B、111 或 110 proxy,誤設 188/localhost 會回到核准主機。
|
||||
- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON;顯式 `false` 才是 Gemini/NIM legacy 緊急退路。
|
||||
- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON;顯式 `false` 才是 Gemini/NIM legacy 緊急退路。OpenClaw QA 已移除單一 `OPENCLAW_QA_OLLAMA_HOST` 主機覆寫,`_call_qwen3_qa()` 改走 `OllamaService` 的 GCP-A → GCP-B → 111 retry 並回寫實際 provider。
|
||||
- Code Review pipeline 已對齊 Ollama-first:`_hermes_scan()` 與 `_openclaw_assess()` 都先走 `OllamaService` 的 GCP-A → GCP-B → 111 retry;Gemini 僅在 Ollama(與可選 Claude)失敗後以 `code_review_openclaw_gemini` caller 記錄備援,不再以 `code_review_openclaw` 直接 Gemini-first。
|
||||
- `.env.example` 已補齊 Python runtime 實際讀取的環境變數,`tests/test_phase3f_cleanup_contracts.py::test_env_example_documents_runtime_os_env_keys` 會掃 `app.py/config.py/scheduler.py/run_scheduler.py/routes/services/utils` 的 `os.getenv()` / `os.environ.get()`;只允許 `PYTEST_CURRENT_TEST` 與 `MOMO_ALLOW_INSECURE_CONFIG_FOR_TESTS` 兩個測試內部 key 不進範例。
|
||||
- `docker-compose*.yml` 使用的 `${VAR}` 也已納入 `.env.example` 契約,包含 MCP compose 的 `TAVILY_API_KEY`、`EXA_API_KEY`、`MCP_POSTGRES_PASSWORD`、`FIRECRAWL_AUTH_KEY`,以及 image tag / Grafana / pgAdmin / Metabase / Grist 變數;`test_env_example_documents_docker_compose_variables` 會守住。
|
||||
|
||||
@@ -51,8 +51,9 @@ TAIPEI_TZ_OFFSET = 8 # UTC+8
|
||||
# Operation Ollama-First v5.0 — Phase 3 feature flag(預設 ON;Gemini 僅 fallback)
|
||||
# - OPENCLAW_QA_OLLAMA_FIRST: true=走 Ollama 主、Gemini fallback;false=緊急退回 legacy Gemini-first
|
||||
# - OPENCLAW_QA_OLLAMA_MODEL: GCP Ollama 上的模型 tag(A2 推薦 qwen3:14b,9.3GB)
|
||||
# - OPENCLAW_QA_OLLAMA_HOST: 允許獨立指定 QA 用主機;未設則 fallback 到通用 OLLAMA_HOST_PRIMARY
|
||||
# - OPENCLAW_QA_OLLAMA_TIMEOUT: 單次 Ollama 呼叫超時(秒),低品質判定後仍會升級 Gemini
|
||||
# OpenClaw Q&A 不提供單 caller host override;主機必須統一走 OllamaService 的
|
||||
# GCP-A → GCP-B → 111 三主機級聯,避免 Telegram Q&A 被固定在單一 GCP 節點。
|
||||
# 任何 deploy 不開 flag → Ollama-first;緊急時才顯式設 false 回 legacy。
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -66,10 +67,6 @@ def _qa_ollama_first_enabled() -> bool:
|
||||
|
||||
|
||||
OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
|
||||
OPENCLAW_QA_OLLAMA_HOST = os.getenv(
|
||||
'OPENCLAW_QA_OLLAMA_HOST',
|
||||
os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'),
|
||||
)
|
||||
OPENCLAW_QA_OLLAMA_TIMEOUT = int(os.getenv('OPENCLAW_QA_OLLAMA_TIMEOUT', '60'))
|
||||
|
||||
# 繁體中文強制 system prompt(A2 黃燈警訊「Qwen 繁中短板」緩解策略)
|
||||
@@ -261,18 +258,6 @@ def _call_qwen3_qa(
|
||||
f"使用者問題:{question}\n"
|
||||
f"上下文:{json.dumps(context or {}, ensure_ascii=False)}"
|
||||
)
|
||||
url = f"{OPENCLAW_QA_OLLAMA_HOST.rstrip('/')}/api/generate"
|
||||
payload = {
|
||||
"model": OPENCLAW_QA_OLLAMA_MODEL,
|
||||
"system": QWEN3_TC_SYSTEM_PROMPT,
|
||||
"prompt": user_prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.5,
|
||||
"num_predict": 1024,
|
||||
},
|
||||
}
|
||||
|
||||
with log_ai_call(
|
||||
caller='openclaw_qa',
|
||||
provider='gcp_ollama',
|
||||
@@ -280,21 +265,40 @@ def _call_qwen3_qa(
|
||||
request_id=request_id,
|
||||
meta={
|
||||
'flag': 'OPENCLAW_QA_OLLAMA_FIRST',
|
||||
'host': OPENCLAW_QA_OLLAMA_HOST,
|
||||
'route': 'ollama_first',
|
||||
'temperature': 0.5,
|
||||
},
|
||||
) as ctx:
|
||||
try:
|
||||
from services.ollama_service import OllamaService, get_host_label, get_provider_tag
|
||||
|
||||
ctx.set_prompt_hash(user_prompt)
|
||||
resp = requests.post(url, json=payload, timeout=OPENCLAW_QA_OLLAMA_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
body = resp.json() or {}
|
||||
# Ollama /api/generate 回傳格式:{response, prompt_eval_count, eval_count, ...}
|
||||
ctx.set_tokens(
|
||||
input=body.get('prompt_eval_count', 0),
|
||||
output=body.get('eval_count', 0),
|
||||
ollama = OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL)
|
||||
resp = ollama.generate(
|
||||
prompt=user_prompt,
|
||||
model=OPENCLAW_QA_OLLAMA_MODEL,
|
||||
system_prompt=QWEN3_TC_SYSTEM_PROMPT,
|
||||
temperature=0.5,
|
||||
timeout=OPENCLAW_QA_OLLAMA_TIMEOUT,
|
||||
)
|
||||
text_reply = (body.get('response') or '').strip()
|
||||
actual_provider = get_provider_tag(resp.host or '')
|
||||
ctx.set_provider(actual_provider)
|
||||
ctx.set_tokens(
|
||||
input=resp.input_tokens,
|
||||
output=resp.output_tokens,
|
||||
)
|
||||
ctx.add_meta('host', resp.host)
|
||||
ctx.add_meta('host_label', get_host_label(resp.host or ''))
|
||||
if not resp.success:
|
||||
ctx.set_error(resp.error or 'ollama generate failed')
|
||||
ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
|
||||
logger.warning(
|
||||
"[OpenClaw][QA] qwen3 三主機級聯失敗 request_id=%s host=%s: %s",
|
||||
request_id, resp.host, resp.error,
|
||||
)
|
||||
return None
|
||||
|
||||
text_reply = (resp.content or '').strip()
|
||||
if not text_reply:
|
||||
ctx.set_error('empty_response')
|
||||
ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
|
||||
@@ -302,8 +306,8 @@ def _call_qwen3_qa(
|
||||
return text_reply
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[OpenClaw][QA] qwen3 呼叫失敗 request_id=%s host=%s: %s",
|
||||
request_id, OPENCLAW_QA_OLLAMA_HOST, e,
|
||||
"[OpenClaw][QA] qwen3 級聯呼叫例外 request_id=%s: %s",
|
||||
request_id, e,
|
||||
)
|
||||
ctx.set_error(f"{type(e).__name__}: {str(e)[:200]}")
|
||||
ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
|
||||
|
||||
@@ -38,7 +38,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 啟用條件:須三條件齊備才實跑
|
||||
# 1. RUN_GOLDEN_SET=1
|
||||
# 2. OPENCLAW_QA_OLLAMA_HOST 可達
|
||||
# 2. OllamaService 三主機級聯可解析出可達主機
|
||||
# 3. GEMINI_API_KEY 已設
|
||||
# 否則 SKIP。
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -66,10 +66,14 @@ def _ollama_has_model(host: str, model: str, timeout: float = 3.0) -> bool:
|
||||
|
||||
|
||||
_RUN_GOLDEN = os.getenv('RUN_GOLDEN_SET', '0') == '1'
|
||||
_HOST = os.getenv('OPENCLAW_QA_OLLAMA_HOST', os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'))
|
||||
_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
|
||||
_HAS_GEMINI = bool(os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
|
||||
def _resolved_ollama_host() -> str:
|
||||
from services.ollama_service import resolve_ollama_host
|
||||
return resolve_ollama_host()
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not _RUN_GOLDEN,
|
||||
reason="黃金集需要 RUN_GOLDEN_SET=1 + GCP qwen3:14b ready + GEMINI_API_KEY;統帥盲測前才跑",
|
||||
@@ -215,10 +219,11 @@ def _call_gemini_baseline(question: str) -> Optional[str]:
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_environment_ready():
|
||||
"""sanity check:跑黃金集前確認 GCP host + model + Gemini key 都 ready。"""
|
||||
assert _ollama_reachable(_HOST), f"Ollama 主機不可達:{_HOST}"
|
||||
assert _ollama_has_model(_HOST, _MODEL), (
|
||||
f"GCP Ollama 尚未拉 {_MODEL}(請於 Phase 8 由 A1 完成 ollama pull)"
|
||||
"""sanity check:跑黃金集前確認 Ollama 級聯 host + model + Gemini key 都 ready。"""
|
||||
host = _resolved_ollama_host()
|
||||
assert _ollama_reachable(host), f"Ollama 主機不可達:{host}"
|
||||
assert _ollama_has_model(host, _MODEL), (
|
||||
f"Ollama 主機 {host} 尚未拉 {_MODEL}(請先完成 ollama pull)"
|
||||
)
|
||||
assert _HAS_GEMINI, "GEMINI_API_KEY 未設"
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ OpenClaw Q&A 路由 + 品質守門 unit tests
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import pytest
|
||||
@@ -68,6 +69,47 @@ def _wait_async(captured, n=1, timeout=2.0):
|
||||
return False
|
||||
|
||||
|
||||
def _stub_ollama_generate(
|
||||
monkeypatch,
|
||||
*,
|
||||
success: bool = True,
|
||||
content: str = '本週 momo 業績成長 12%,建議加碼家電促銷。',
|
||||
error: str = 'ConnectionError: connection refused',
|
||||
host: str = 'http://34.143.170.20:11434',
|
||||
input_tokens: int = 150,
|
||||
output_tokens: int = 60,
|
||||
):
|
||||
"""讓 OpenClaw QA 測試走 OllamaService 介面,而非直打單一 host。"""
|
||||
import services.ollama_service as ollama_mod
|
||||
|
||||
fake_resp = SimpleNamespace(
|
||||
success=success,
|
||||
content=content if success else '',
|
||||
model=svc.OPENCLAW_QA_OLLAMA_MODEL,
|
||||
error=None if success else error,
|
||||
total_duration=0.12,
|
||||
host=host,
|
||||
input_tokens=input_tokens if success else 0,
|
||||
output_tokens=output_tokens if success else 0,
|
||||
)
|
||||
|
||||
class FakeOllamaService:
|
||||
instances = []
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.init_args = args
|
||||
self.init_kwargs = kwargs
|
||||
self.generate_calls = []
|
||||
FakeOllamaService.instances.append(self)
|
||||
|
||||
def generate(self, **kwargs):
|
||||
self.generate_calls.append(kwargs)
|
||||
return fake_resp
|
||||
|
||||
monkeypatch.setattr(ollama_mod, 'OllamaService', FakeOllamaService)
|
||||
return FakeOllamaService, fake_resp
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 1. _is_low_quality_response 純函式規則
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -276,21 +318,15 @@ class TestCallQwen3Telemetry:
|
||||
"""高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama"""
|
||||
captured = reset_state
|
||||
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
def raise_for_status(self): pass
|
||||
def json(self):
|
||||
return {
|
||||
'response': '本週 momo 業績成長 12%,建議加碼家電促銷。',
|
||||
'prompt_eval_count': 150,
|
||||
'eval_count': 60,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
|
||||
fake_service, _fake_resp = _stub_ollama_generate(monkeypatch)
|
||||
|
||||
result = svc._call_qwen3_qa("本週業績?", None, "qa-test123")
|
||||
assert result is not None
|
||||
assert "業績成長" in result
|
||||
assert fake_service.instances
|
||||
generate_kwargs = fake_service.instances[0].generate_calls[0]
|
||||
assert generate_kwargs['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
|
||||
assert generate_kwargs['system_prompt'] == svc.QWEN3_TC_SYSTEM_PROMPT
|
||||
|
||||
assert _wait_async(captured, 1)
|
||||
assert len(captured) == 1
|
||||
@@ -301,16 +337,32 @@ class TestCallQwen3Telemetry:
|
||||
assert rec['status'] == 'ok'
|
||||
assert rec['fallback_to'] is None
|
||||
assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST'
|
||||
assert rec['meta'].get('route') == 'ollama_first'
|
||||
assert rec['meta'].get('host') == 'http://34.143.170.20:11434'
|
||||
assert rec['meta'].get('host_label') == 'GCP-SSD'
|
||||
assert rec['request_id'] == "qa-test123"
|
||||
|
||||
def test_qwen3_logs_actual_secondary_provider_after_retry(self, monkeypatch, reset_state):
|
||||
"""OllamaService 若落到 GCP-B,ai_calls.provider 必須寫 ollama_secondary。"""
|
||||
captured = reset_state
|
||||
_stub_ollama_generate(
|
||||
monkeypatch,
|
||||
host='http://34.21.145.224:11434',
|
||||
)
|
||||
|
||||
result = svc._call_qwen3_qa("本週業績?", None, "qa-secondary")
|
||||
assert result is not None
|
||||
|
||||
assert _wait_async(captured, 1)
|
||||
rec = captured[0]
|
||||
assert rec['provider'] == 'ollama_secondary'
|
||||
assert rec['meta'].get('host_label') == 'GCP-SSD-2'
|
||||
|
||||
def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state):
|
||||
"""Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback"""
|
||||
captured = reset_state
|
||||
|
||||
def boom(*a, **kw):
|
||||
raise svc.requests.ConnectionError("connection refused")
|
||||
|
||||
monkeypatch.setattr(svc.requests, 'post', boom)
|
||||
_stub_ollama_generate(monkeypatch, success=False)
|
||||
|
||||
result = svc._call_qwen3_qa("test", None, "qa-fail123")
|
||||
assert result is None
|
||||
@@ -326,13 +378,12 @@ class TestCallQwen3Telemetry:
|
||||
"""Ollama 回空 response → 視為 empty_response,標 fallback。"""
|
||||
captured = reset_state
|
||||
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
def raise_for_status(self): pass
|
||||
def json(self):
|
||||
return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0}
|
||||
|
||||
monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
|
||||
_stub_ollama_generate(
|
||||
monkeypatch,
|
||||
content='',
|
||||
input_tokens=100,
|
||||
output_tokens=0,
|
||||
)
|
||||
|
||||
result = svc._call_qwen3_qa("test", None, "qa-empty")
|
||||
assert result is None
|
||||
|
||||
@@ -143,7 +143,6 @@ def test_env_example_documents_runtime_and_ai_automation_variables():
|
||||
"OPENCLAW_OLLAMA_MODEL",
|
||||
"OPENCLAW_PPT_CACHE_TTL_HOURS",
|
||||
"OPENCLAW_QA_OLLAMA_FIRST",
|
||||
"OPENCLAW_QA_OLLAMA_HOST",
|
||||
"OPENCLAW_QA_OLLAMA_MODEL",
|
||||
"OPENCLAW_QA_OLLAMA_TIMEOUT",
|
||||
"PPT_VISION_ENABLED",
|
||||
|
||||
@@ -11,6 +11,8 @@ def test_qwen3_is_active_runtime_model_not_unused_ollama_weight():
|
||||
|
||||
assert "OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')" in openclaw_source
|
||||
assert "def _call_qwen3_qa(" in openclaw_source
|
||||
assert "OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL)" in openclaw_source
|
||||
assert "OPENCLAW_QA_OLLAMA_HOST" not in openclaw_source
|
||||
assert 'NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b")' in nemotron_source
|
||||
assert "def _call_qwen3_dispatch(" in nemotron_source
|
||||
assert "'qwen3:14b'" in router_source
|
||||
|
||||
Reference in New Issue
Block a user