fix: route openclaw qa through ollama cascade

This commit is contained in:
OoO
2026-05-13 21:17:22 +08:00
parent b644f57084
commit d82a1671b6
8 changed files with 121 additions and 59 deletions

View File

@@ -340,9 +340,9 @@ OLLAMA_COPY_TIMEOUT=180
OLLAMA_EMBED_TIMEOUT=45
# [預設 true] OpenClaw Q&A 先走 Ollama品質不足或失敗時才 fallback Gemini/NIM
# 主機不提供單 caller override一律走 OLLAMA_HOST_PRIMARY → OLLAMA_HOST_SECONDARY → OLLAMA_HOST_FALLBACK
OPENCLAW_QA_OLLAMA_FIRST=true
OPENCLAW_QA_OLLAMA_MODEL=qwen3:14b
OPENCLAW_QA_OLLAMA_HOST=http://34.143.170.20:11434
OPENCLAW_QA_OLLAMA_TIMEOUT=60
NEMOTRON_OLLAMA_FIRST=true
NEMOTRON_OLLAMA_MODEL=qwen3:14b

View File

@@ -14,6 +14,7 @@
- 188 `192.168.0.188` 僅是 App / DB / scheduler / Telegram bot 容器宿主與 AutoHeal target不可作為 Ollama 節點。
- 通用 AI 文案、關鍵字、商品洞察與 Telegram Q&A 第一響應不得 Gemini-first。
- Code Review pipeline 也必須 Ollama-firstHermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retryGemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。
- OpenClaw Telegram Q&A 主路徑也不得綁單一 host`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111並把實際落點寫入 `ai_calls.provider`
## 一、四 AI Agent 路由架構

View File

@@ -51,7 +51,7 @@
- `services/pg_sync_service.py` 是顯式 opt-in legacy CLI不是生產自動同步路徑`tests/test_pg_sync_contract.py` 已守住預設 OFF 與 runtime paths 不自動 import。
- `qwen3:14b` 不是未使用 Ollama 模型OpenClaw QA、NemoTron dispatch 與 LLM model router 仍有現役路徑;`tests/test_qwen3_runtime_usage.py` 已守住,不能只因體積大就三主機移除。
- Ollama host env 已加白名單護欄:`OLLAMA_HOST*` / `EMBEDDING_HOST` 只接受 GCP-A、GCP-B、111 或 110 proxy誤設 188/localhost 會回到核准主機。
- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON顯式 `false` 才是 Gemini/NIM legacy 緊急退路。
- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON顯式 `false` 才是 Gemini/NIM legacy 緊急退路。OpenClaw QA 已移除單一 `OPENCLAW_QA_OLLAMA_HOST` 主機覆寫,`_call_qwen3_qa()` 改走 `OllamaService` 的 GCP-A → GCP-B → 111 retry 並回寫實際 provider。
- Code Review pipeline 已對齊 Ollama-first`_hermes_scan()``_openclaw_assess()` 都先走 `OllamaService` 的 GCP-A → GCP-B → 111 retryGemini 僅在 Ollama與可選 Claude失敗後以 `code_review_openclaw_gemini` caller 記錄備援,不再以 `code_review_openclaw` 直接 Gemini-first。
- `.env.example` 已補齊 Python runtime 實際讀取的環境變數,`tests/test_phase3f_cleanup_contracts.py::test_env_example_documents_runtime_os_env_keys` 會掃 `app.py/config.py/scheduler.py/run_scheduler.py/routes/services/utils``os.getenv()` / `os.environ.get()`;只允許 `PYTEST_CURRENT_TEST``MOMO_ALLOW_INSECURE_CONFIG_FOR_TESTS` 兩個測試內部 key 不進範例。
- `docker-compose*.yml` 使用的 `${VAR}` 也已納入 `.env.example` 契約,包含 MCP compose 的 `TAVILY_API_KEY``EXA_API_KEY``MCP_POSTGRES_PASSWORD``FIRECRAWL_AUTH_KEY`,以及 image tag / Grafana / pgAdmin / Metabase / Grist 變數;`test_env_example_documents_docker_compose_variables` 會守住。

View File

@@ -51,8 +51,9 @@ TAIPEI_TZ_OFFSET = 8 # UTC+8
# Operation Ollama-First v5.0 — Phase 3 feature flag預設 ONGemini 僅 fallback
# - OPENCLAW_QA_OLLAMA_FIRST: true=走 Ollama 主、Gemini fallbackfalse=緊急退回 legacy Gemini-first
# - OPENCLAW_QA_OLLAMA_MODEL: GCP Ollama 上的模型 tagA2 推薦 qwen3:14b9.3GB
# - OPENCLAW_QA_OLLAMA_HOST: 允許獨立指定 QA 用主機;未設則 fallback 到通用 OLLAMA_HOST_PRIMARY
# - OPENCLAW_QA_OLLAMA_TIMEOUT: 單次 Ollama 呼叫超時(秒),低品質判定後仍會升級 Gemini
# OpenClaw Q&A 不提供單 caller host override主機必須統一走 OllamaService 的
# GCP-A → GCP-B → 111 三主機級聯,避免 Telegram Q&A 被固定在單一 GCP 節點。
# 任何 deploy 不開 flag → Ollama-first緊急時才顯式設 false 回 legacy。
# ──────────────────────────────────────────────────────────────────────────────
@@ -66,10 +67,6 @@ def _qa_ollama_first_enabled() -> bool:
OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
OPENCLAW_QA_OLLAMA_HOST = os.getenv(
'OPENCLAW_QA_OLLAMA_HOST',
os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'),
)
OPENCLAW_QA_OLLAMA_TIMEOUT = int(os.getenv('OPENCLAW_QA_OLLAMA_TIMEOUT', '60'))
# 繁體中文強制 system promptA2 黃燈警訊「Qwen 繁中短板」緩解策略)
@@ -261,18 +258,6 @@ def _call_qwen3_qa(
f"使用者問題:{question}\n"
f"上下文:{json.dumps(context or {}, ensure_ascii=False)}"
)
url = f"{OPENCLAW_QA_OLLAMA_HOST.rstrip('/')}/api/generate"
payload = {
"model": OPENCLAW_QA_OLLAMA_MODEL,
"system": QWEN3_TC_SYSTEM_PROMPT,
"prompt": user_prompt,
"stream": False,
"options": {
"temperature": 0.5,
"num_predict": 1024,
},
}
with log_ai_call(
caller='openclaw_qa',
provider='gcp_ollama',
@@ -280,21 +265,40 @@ def _call_qwen3_qa(
request_id=request_id,
meta={
'flag': 'OPENCLAW_QA_OLLAMA_FIRST',
'host': OPENCLAW_QA_OLLAMA_HOST,
'route': 'ollama_first',
'temperature': 0.5,
},
) as ctx:
try:
from services.ollama_service import OllamaService, get_host_label, get_provider_tag
ctx.set_prompt_hash(user_prompt)
resp = requests.post(url, json=payload, timeout=OPENCLAW_QA_OLLAMA_TIMEOUT)
resp.raise_for_status()
body = resp.json() or {}
# Ollama /api/generate 回傳格式:{response, prompt_eval_count, eval_count, ...}
ctx.set_tokens(
input=body.get('prompt_eval_count', 0),
output=body.get('eval_count', 0),
ollama = OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL)
resp = ollama.generate(
prompt=user_prompt,
model=OPENCLAW_QA_OLLAMA_MODEL,
system_prompt=QWEN3_TC_SYSTEM_PROMPT,
temperature=0.5,
timeout=OPENCLAW_QA_OLLAMA_TIMEOUT,
)
text_reply = (body.get('response') or '').strip()
actual_provider = get_provider_tag(resp.host or '')
ctx.set_provider(actual_provider)
ctx.set_tokens(
input=resp.input_tokens,
output=resp.output_tokens,
)
ctx.add_meta('host', resp.host)
ctx.add_meta('host_label', get_host_label(resp.host or ''))
if not resp.success:
ctx.set_error(resp.error or 'ollama generate failed')
ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
logger.warning(
"[OpenClaw][QA] qwen3 三主機級聯失敗 request_id=%s host=%s: %s",
request_id, resp.host, resp.error,
)
return None
text_reply = (resp.content or '').strip()
if not text_reply:
ctx.set_error('empty_response')
ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
@@ -302,8 +306,8 @@ def _call_qwen3_qa(
return text_reply
except Exception as e:
logger.warning(
"[OpenClaw][QA] qwen3 呼叫失敗 request_id=%s host=%s: %s",
request_id, OPENCLAW_QA_OLLAMA_HOST, e,
"[OpenClaw][QA] qwen3 級聯呼叫例外 request_id=%s: %s",
request_id, e,
)
ctx.set_error(f"{type(e).__name__}: {str(e)[:200]}")
ctx.fallback_to_caller('openclaw_qa_gemini_fallback')

View File

@@ -38,7 +38,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# ─────────────────────────────────────────────────────────────────────────────
# 啟用條件:須三條件齊備才實跑
# 1. RUN_GOLDEN_SET=1
# 2. OPENCLAW_QA_OLLAMA_HOST 可達
# 2. OllamaService 三主機級聯可解析出可達主機
# 3. GEMINI_API_KEY 已設
# 否則 SKIP。
# ─────────────────────────────────────────────────────────────────────────────
@@ -66,10 +66,14 @@ def _ollama_has_model(host: str, model: str, timeout: float = 3.0) -> bool:
_RUN_GOLDEN = os.getenv('RUN_GOLDEN_SET', '0') == '1'
_HOST = os.getenv('OPENCLAW_QA_OLLAMA_HOST', os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'))
_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
_HAS_GEMINI = bool(os.getenv('GEMINI_API_KEY'))
def _resolved_ollama_host() -> str:
from services.ollama_service import resolve_ollama_host
return resolve_ollama_host()
pytestmark = pytest.mark.skipif(
not _RUN_GOLDEN,
reason="黃金集需要 RUN_GOLDEN_SET=1 + GCP qwen3:14b ready + GEMINI_API_KEY統帥盲測前才跑",
@@ -215,10 +219,11 @@ def _call_gemini_baseline(question: str) -> Optional[str]:
# ─────────────────────────────────────────────────────────────────────────────
def test_environment_ready():
"""sanity check跑黃金集前確認 GCP host + model + Gemini key 都 ready。"""
assert _ollama_reachable(_HOST), f"Ollama 主機不可達:{_HOST}"
assert _ollama_has_model(_HOST, _MODEL), (
f"GCP Ollama 尚未拉 {_MODEL}(請於 Phase 8 由 A1 完成 ollama pull"
"""sanity check跑黃金集前確認 Ollama 級聯 host + model + Gemini key 都 ready。"""
host = _resolved_ollama_host()
assert _ollama_reachable(host), f"Ollama 主機不可達:{host}"
assert _ollama_has_model(host, _MODEL), (
f"Ollama 主機 {host} 尚未拉 {_MODEL}(請先完成 ollama pull"
)
assert _HAS_GEMINI, "GEMINI_API_KEY 未設"

View File

@@ -19,6 +19,7 @@ OpenClaw Q&A 路由 + 品質守門 unit tests
import os
import sys
import time
from types import SimpleNamespace
from typing import Any, Dict, Optional
import pytest
@@ -68,6 +69,47 @@ def _wait_async(captured, n=1, timeout=2.0):
return False
def _stub_ollama_generate(
monkeypatch,
*,
success: bool = True,
content: str = '本週 momo 業績成長 12%,建議加碼家電促銷。',
error: str = 'ConnectionError: connection refused',
host: str = 'http://34.143.170.20:11434',
input_tokens: int = 150,
output_tokens: int = 60,
):
"""讓 OpenClaw QA 測試走 OllamaService 介面,而非直打單一 host。"""
import services.ollama_service as ollama_mod
fake_resp = SimpleNamespace(
success=success,
content=content if success else '',
model=svc.OPENCLAW_QA_OLLAMA_MODEL,
error=None if success else error,
total_duration=0.12,
host=host,
input_tokens=input_tokens if success else 0,
output_tokens=output_tokens if success else 0,
)
class FakeOllamaService:
instances = []
def __init__(self, *args, **kwargs):
self.init_args = args
self.init_kwargs = kwargs
self.generate_calls = []
FakeOllamaService.instances.append(self)
def generate(self, **kwargs):
self.generate_calls.append(kwargs)
return fake_resp
monkeypatch.setattr(ollama_mod, 'OllamaService', FakeOllamaService)
return FakeOllamaService, fake_resp
# ─────────────────────────────────────────────────────────────────────────────
# 1. _is_low_quality_response 純函式規則
# ─────────────────────────────────────────────────────────────────────────────
@@ -276,21 +318,15 @@ class TestCallQwen3Telemetry:
"""高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama"""
captured = reset_state
class FakeResp:
status_code = 200
def raise_for_status(self): pass
def json(self):
return {
'response': '本週 momo 業績成長 12%,建議加碼家電促銷。',
'prompt_eval_count': 150,
'eval_count': 60,
}
monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
fake_service, _fake_resp = _stub_ollama_generate(monkeypatch)
result = svc._call_qwen3_qa("本週業績?", None, "qa-test123")
assert result is not None
assert "業績成長" in result
assert fake_service.instances
generate_kwargs = fake_service.instances[0].generate_calls[0]
assert generate_kwargs['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
assert generate_kwargs['system_prompt'] == svc.QWEN3_TC_SYSTEM_PROMPT
assert _wait_async(captured, 1)
assert len(captured) == 1
@@ -301,16 +337,32 @@ class TestCallQwen3Telemetry:
assert rec['status'] == 'ok'
assert rec['fallback_to'] is None
assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST'
assert rec['meta'].get('route') == 'ollama_first'
assert rec['meta'].get('host') == 'http://34.143.170.20:11434'
assert rec['meta'].get('host_label') == 'GCP-SSD'
assert rec['request_id'] == "qa-test123"
def test_qwen3_logs_actual_secondary_provider_after_retry(self, monkeypatch, reset_state):
"""OllamaService 若落到 GCP-Bai_calls.provider 必須寫 ollama_secondary。"""
captured = reset_state
_stub_ollama_generate(
monkeypatch,
host='http://34.21.145.224:11434',
)
result = svc._call_qwen3_qa("本週業績?", None, "qa-secondary")
assert result is not None
assert _wait_async(captured, 1)
rec = captured[0]
assert rec['provider'] == 'ollama_secondary'
assert rec['meta'].get('host_label') == 'GCP-SSD-2'
def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state):
"""Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback"""
captured = reset_state
def boom(*a, **kw):
raise svc.requests.ConnectionError("connection refused")
monkeypatch.setattr(svc.requests, 'post', boom)
_stub_ollama_generate(monkeypatch, success=False)
result = svc._call_qwen3_qa("test", None, "qa-fail123")
assert result is None
@@ -326,13 +378,12 @@ class TestCallQwen3Telemetry:
"""Ollama 回空 response → 視為 empty_response標 fallback。"""
captured = reset_state
class FakeResp:
status_code = 200
def raise_for_status(self): pass
def json(self):
return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0}
monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
_stub_ollama_generate(
monkeypatch,
content='',
input_tokens=100,
output_tokens=0,
)
result = svc._call_qwen3_qa("test", None, "qa-empty")
assert result is None

View File

@@ -143,7 +143,6 @@ def test_env_example_documents_runtime_and_ai_automation_variables():
"OPENCLAW_OLLAMA_MODEL",
"OPENCLAW_PPT_CACHE_TTL_HOURS",
"OPENCLAW_QA_OLLAMA_FIRST",
"OPENCLAW_QA_OLLAMA_HOST",
"OPENCLAW_QA_OLLAMA_MODEL",
"OPENCLAW_QA_OLLAMA_TIMEOUT",
"PPT_VISION_ENABLED",

View File

@@ -11,6 +11,8 @@ def test_qwen3_is_active_runtime_model_not_unused_ollama_weight():
assert "OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')" in openclaw_source
assert "def _call_qwen3_qa(" in openclaw_source
assert "OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL)" in openclaw_source
assert "OPENCLAW_QA_OLLAMA_HOST" not in openclaw_source
assert 'NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b")' in nemotron_source
assert "def _call_qwen3_dispatch(" in nemotron_source
assert "'qwen3:14b'" in router_source