diff --git a/.env.example b/.env.example index e2b75e9..45dfcb9 100644 --- a/.env.example +++ b/.env.example @@ -340,9 +340,9 @@ OLLAMA_COPY_TIMEOUT=180 OLLAMA_EMBED_TIMEOUT=45 # [預設 true] OpenClaw Q&A 先走 Ollama,品質不足或失敗時才 fallback Gemini/NIM +# 主機不提供單 caller override;一律走 OLLAMA_HOST_PRIMARY → OLLAMA_HOST_SECONDARY → OLLAMA_HOST_FALLBACK OPENCLAW_QA_OLLAMA_FIRST=true OPENCLAW_QA_OLLAMA_MODEL=qwen3:14b -OPENCLAW_QA_OLLAMA_HOST=http://34.143.170.20:11434 OPENCLAW_QA_OLLAMA_TIMEOUT=60 NEMOTRON_OLLAMA_FIRST=true NEMOTRON_OLLAMA_MODEL=qwen3:14b diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 88d15ae..e9d975b 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -14,6 +14,7 @@ - 188 `192.168.0.188` 僅是 App / DB / scheduler / Telegram bot 容器宿主與 AutoHeal target,不可作為 Ollama 節點。 - 通用 AI 文案、關鍵字、商品洞察與 Telegram Q&A 第一響應不得 Gemini-first。 - Code Review pipeline 也必須 Ollama-first:Hermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retry;Gemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。 +- OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。 ## 一、四 AI Agent 路由架構 diff --git a/docs/memory/claude_inventory_validation_20260513.md b/docs/memory/claude_inventory_validation_20260513.md index 11c4219..579a6ea 100644 --- a/docs/memory/claude_inventory_validation_20260513.md +++ b/docs/memory/claude_inventory_validation_20260513.md @@ -51,7 +51,7 @@ - `services/pg_sync_service.py` 是顯式 opt-in legacy CLI,不是生產自動同步路徑;`tests/test_pg_sync_contract.py` 已守住預設 OFF 與 runtime paths 不自動 import。 - `qwen3:14b` 不是未使用 Ollama 模型:OpenClaw QA、NemoTron dispatch 與 LLM model router 仍有現役路徑;`tests/test_qwen3_runtime_usage.py` 已守住,不能只因體積大就三主機移除。 - Ollama host env 已加白名單護欄:`OLLAMA_HOST*` / `EMBEDDING_HOST` 只接受 GCP-A、GCP-B、111 或 110 proxy,誤設 188/localhost 會回到核准主機。 -- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON;顯式 `false` 才是 Gemini/NIM legacy 緊急退路。 +- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON;顯式 `false` 才是 Gemini/NIM legacy 緊急退路。OpenClaw QA 已移除單一 `OPENCLAW_QA_OLLAMA_HOST` 主機覆寫,`_call_qwen3_qa()` 改走 `OllamaService` 的 GCP-A → GCP-B → 111 retry 並回寫實際 provider。 - Code Review pipeline 已對齊 Ollama-first:`_hermes_scan()` 與 `_openclaw_assess()` 都先走 `OllamaService` 的 GCP-A → GCP-B → 111 retry;Gemini 僅在 Ollama(與可選 Claude)失敗後以 `code_review_openclaw_gemini` caller 記錄備援,不再以 `code_review_openclaw` 直接 Gemini-first。 - `.env.example` 已補齊 Python runtime 實際讀取的環境變數,`tests/test_phase3f_cleanup_contracts.py::test_env_example_documents_runtime_os_env_keys` 會掃 `app.py/config.py/scheduler.py/run_scheduler.py/routes/services/utils` 的 `os.getenv()` / `os.environ.get()`;只允許 `PYTEST_CURRENT_TEST` 與 `MOMO_ALLOW_INSECURE_CONFIG_FOR_TESTS` 兩個測試內部 key 不進範例。 - `docker-compose*.yml` 使用的 `${VAR}` 也已納入 `.env.example` 契約,包含 MCP compose 的 `TAVILY_API_KEY`、`EXA_API_KEY`、`MCP_POSTGRES_PASSWORD`、`FIRECRAWL_AUTH_KEY`,以及 image tag / Grafana / pgAdmin / Metabase / Grist 變數;`test_env_example_documents_docker_compose_variables` 會守住。 diff --git a/services/openclaw_strategist_service.py b/services/openclaw_strategist_service.py index f0d6bb5..cd3b3c7 100644 --- a/services/openclaw_strategist_service.py +++ b/services/openclaw_strategist_service.py @@ -51,8 +51,9 @@ TAIPEI_TZ_OFFSET = 8 # UTC+8 # Operation Ollama-First v5.0 — Phase 3 feature flag(預設 ON;Gemini 僅 fallback) # - OPENCLAW_QA_OLLAMA_FIRST: true=走 Ollama 主、Gemini fallback;false=緊急退回 legacy Gemini-first # - OPENCLAW_QA_OLLAMA_MODEL: GCP Ollama 上的模型 tag(A2 推薦 qwen3:14b,9.3GB) -# - OPENCLAW_QA_OLLAMA_HOST: 允許獨立指定 QA 用主機;未設則 fallback 到通用 OLLAMA_HOST_PRIMARY # - OPENCLAW_QA_OLLAMA_TIMEOUT: 單次 Ollama 呼叫超時(秒),低品質判定後仍會升級 Gemini +# OpenClaw Q&A 不提供單 caller host override;主機必須統一走 OllamaService 的 +# GCP-A → GCP-B → 111 三主機級聯,避免 Telegram Q&A 被固定在單一 GCP 節點。 # 任何 deploy 不開 flag → Ollama-first;緊急時才顯式設 false 回 legacy。 # ────────────────────────────────────────────────────────────────────────────── @@ -66,10 +67,6 @@ def _qa_ollama_first_enabled() -> bool: OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b') -OPENCLAW_QA_OLLAMA_HOST = os.getenv( - 'OPENCLAW_QA_OLLAMA_HOST', - os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'), -) OPENCLAW_QA_OLLAMA_TIMEOUT = int(os.getenv('OPENCLAW_QA_OLLAMA_TIMEOUT', '60')) # 繁體中文強制 system prompt(A2 黃燈警訊「Qwen 繁中短板」緩解策略) @@ -261,18 +258,6 @@ def _call_qwen3_qa( f"使用者問題:{question}\n" f"上下文:{json.dumps(context or {}, ensure_ascii=False)}" ) - url = f"{OPENCLAW_QA_OLLAMA_HOST.rstrip('/')}/api/generate" - payload = { - "model": OPENCLAW_QA_OLLAMA_MODEL, - "system": QWEN3_TC_SYSTEM_PROMPT, - "prompt": user_prompt, - "stream": False, - "options": { - "temperature": 0.5, - "num_predict": 1024, - }, - } - with log_ai_call( caller='openclaw_qa', provider='gcp_ollama', @@ -280,21 +265,40 @@ def _call_qwen3_qa( request_id=request_id, meta={ 'flag': 'OPENCLAW_QA_OLLAMA_FIRST', - 'host': OPENCLAW_QA_OLLAMA_HOST, + 'route': 'ollama_first', 'temperature': 0.5, }, ) as ctx: try: + from services.ollama_service import OllamaService, get_host_label, get_provider_tag + ctx.set_prompt_hash(user_prompt) - resp = requests.post(url, json=payload, timeout=OPENCLAW_QA_OLLAMA_TIMEOUT) - resp.raise_for_status() - body = resp.json() or {} - # Ollama /api/generate 回傳格式:{response, prompt_eval_count, eval_count, ...} - ctx.set_tokens( - input=body.get('prompt_eval_count', 0), - output=body.get('eval_count', 0), + ollama = OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL) + resp = ollama.generate( + prompt=user_prompt, + model=OPENCLAW_QA_OLLAMA_MODEL, + system_prompt=QWEN3_TC_SYSTEM_PROMPT, + temperature=0.5, + timeout=OPENCLAW_QA_OLLAMA_TIMEOUT, ) - text_reply = (body.get('response') or '').strip() + actual_provider = get_provider_tag(resp.host or '') + ctx.set_provider(actual_provider) + ctx.set_tokens( + input=resp.input_tokens, + output=resp.output_tokens, + ) + ctx.add_meta('host', resp.host) + ctx.add_meta('host_label', get_host_label(resp.host or '')) + if not resp.success: + ctx.set_error(resp.error or 'ollama generate failed') + ctx.fallback_to_caller('openclaw_qa_gemini_fallback') + logger.warning( + "[OpenClaw][QA] qwen3 三主機級聯失敗 request_id=%s host=%s: %s", + request_id, resp.host, resp.error, + ) + return None + + text_reply = (resp.content or '').strip() if not text_reply: ctx.set_error('empty_response') ctx.fallback_to_caller('openclaw_qa_gemini_fallback') @@ -302,8 +306,8 @@ def _call_qwen3_qa( return text_reply except Exception as e: logger.warning( - "[OpenClaw][QA] qwen3 呼叫失敗 request_id=%s host=%s: %s", - request_id, OPENCLAW_QA_OLLAMA_HOST, e, + "[OpenClaw][QA] qwen3 級聯呼叫例外 request_id=%s: %s", + request_id, e, ) ctx.set_error(f"{type(e).__name__}: {str(e)[:200]}") ctx.fallback_to_caller('openclaw_qa_gemini_fallback') diff --git a/tests/test_openclaw_qa_golden_set.py b/tests/test_openclaw_qa_golden_set.py index b6e7962..c8ea5dc 100644 --- a/tests/test_openclaw_qa_golden_set.py +++ b/tests/test_openclaw_qa_golden_set.py @@ -38,7 +38,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # ───────────────────────────────────────────────────────────────────────────── # 啟用條件:須三條件齊備才實跑 # 1. RUN_GOLDEN_SET=1 -# 2. OPENCLAW_QA_OLLAMA_HOST 可達 +# 2. OllamaService 三主機級聯可解析出可達主機 # 3. GEMINI_API_KEY 已設 # 否則 SKIP。 # ───────────────────────────────────────────────────────────────────────────── @@ -66,10 +66,14 @@ def _ollama_has_model(host: str, model: str, timeout: float = 3.0) -> bool: _RUN_GOLDEN = os.getenv('RUN_GOLDEN_SET', '0') == '1' -_HOST = os.getenv('OPENCLAW_QA_OLLAMA_HOST', os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434')) _MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b') _HAS_GEMINI = bool(os.getenv('GEMINI_API_KEY')) + +def _resolved_ollama_host() -> str: + from services.ollama_service import resolve_ollama_host + return resolve_ollama_host() + pytestmark = pytest.mark.skipif( not _RUN_GOLDEN, reason="黃金集需要 RUN_GOLDEN_SET=1 + GCP qwen3:14b ready + GEMINI_API_KEY;統帥盲測前才跑", @@ -215,10 +219,11 @@ def _call_gemini_baseline(question: str) -> Optional[str]: # ───────────────────────────────────────────────────────────────────────────── def test_environment_ready(): - """sanity check:跑黃金集前確認 GCP host + model + Gemini key 都 ready。""" - assert _ollama_reachable(_HOST), f"Ollama 主機不可達:{_HOST}" - assert _ollama_has_model(_HOST, _MODEL), ( - f"GCP Ollama 尚未拉 {_MODEL}(請於 Phase 8 由 A1 完成 ollama pull)" + """sanity check:跑黃金集前確認 Ollama 級聯 host + model + Gemini key 都 ready。""" + host = _resolved_ollama_host() + assert _ollama_reachable(host), f"Ollama 主機不可達:{host}" + assert _ollama_has_model(host, _MODEL), ( + f"Ollama 主機 {host} 尚未拉 {_MODEL}(請先完成 ollama pull)" ) assert _HAS_GEMINI, "GEMINI_API_KEY 未設" diff --git a/tests/test_openclaw_qa_routing.py b/tests/test_openclaw_qa_routing.py index 93bed4b..c280c74 100644 --- a/tests/test_openclaw_qa_routing.py +++ b/tests/test_openclaw_qa_routing.py @@ -19,6 +19,7 @@ OpenClaw Q&A 路由 + 品質守門 unit tests import os import sys import time +from types import SimpleNamespace from typing import Any, Dict, Optional import pytest @@ -68,6 +69,47 @@ def _wait_async(captured, n=1, timeout=2.0): return False +def _stub_ollama_generate( + monkeypatch, + *, + success: bool = True, + content: str = '本週 momo 業績成長 12%,建議加碼家電促銷。', + error: str = 'ConnectionError: connection refused', + host: str = 'http://34.143.170.20:11434', + input_tokens: int = 150, + output_tokens: int = 60, +): + """讓 OpenClaw QA 測試走 OllamaService 介面,而非直打單一 host。""" + import services.ollama_service as ollama_mod + + fake_resp = SimpleNamespace( + success=success, + content=content if success else '', + model=svc.OPENCLAW_QA_OLLAMA_MODEL, + error=None if success else error, + total_duration=0.12, + host=host, + input_tokens=input_tokens if success else 0, + output_tokens=output_tokens if success else 0, + ) + + class FakeOllamaService: + instances = [] + + def __init__(self, *args, **kwargs): + self.init_args = args + self.init_kwargs = kwargs + self.generate_calls = [] + FakeOllamaService.instances.append(self) + + def generate(self, **kwargs): + self.generate_calls.append(kwargs) + return fake_resp + + monkeypatch.setattr(ollama_mod, 'OllamaService', FakeOllamaService) + return FakeOllamaService, fake_resp + + # ───────────────────────────────────────────────────────────────────────────── # 1. _is_low_quality_response 純函式規則 # ───────────────────────────────────────────────────────────────────────────── @@ -276,21 +318,15 @@ class TestCallQwen3Telemetry: """高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama""" captured = reset_state - class FakeResp: - status_code = 200 - def raise_for_status(self): pass - def json(self): - return { - 'response': '本週 momo 業績成長 12%,建議加碼家電促銷。', - 'prompt_eval_count': 150, - 'eval_count': 60, - } - - monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp()) + fake_service, _fake_resp = _stub_ollama_generate(monkeypatch) result = svc._call_qwen3_qa("本週業績?", None, "qa-test123") assert result is not None assert "業績成長" in result + assert fake_service.instances + generate_kwargs = fake_service.instances[0].generate_calls[0] + assert generate_kwargs['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL + assert generate_kwargs['system_prompt'] == svc.QWEN3_TC_SYSTEM_PROMPT assert _wait_async(captured, 1) assert len(captured) == 1 @@ -301,16 +337,32 @@ class TestCallQwen3Telemetry: assert rec['status'] == 'ok' assert rec['fallback_to'] is None assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST' + assert rec['meta'].get('route') == 'ollama_first' + assert rec['meta'].get('host') == 'http://34.143.170.20:11434' + assert rec['meta'].get('host_label') == 'GCP-SSD' assert rec['request_id'] == "qa-test123" + def test_qwen3_logs_actual_secondary_provider_after_retry(self, monkeypatch, reset_state): + """OllamaService 若落到 GCP-B,ai_calls.provider 必須寫 ollama_secondary。""" + captured = reset_state + _stub_ollama_generate( + monkeypatch, + host='http://34.21.145.224:11434', + ) + + result = svc._call_qwen3_qa("本週業績?", None, "qa-secondary") + assert result is not None + + assert _wait_async(captured, 1) + rec = captured[0] + assert rec['provider'] == 'ollama_secondary' + assert rec['meta'].get('host_label') == 'GCP-SSD-2' + def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state): """Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback""" captured = reset_state - def boom(*a, **kw): - raise svc.requests.ConnectionError("connection refused") - - monkeypatch.setattr(svc.requests, 'post', boom) + _stub_ollama_generate(monkeypatch, success=False) result = svc._call_qwen3_qa("test", None, "qa-fail123") assert result is None @@ -326,13 +378,12 @@ class TestCallQwen3Telemetry: """Ollama 回空 response → 視為 empty_response,標 fallback。""" captured = reset_state - class FakeResp: - status_code = 200 - def raise_for_status(self): pass - def json(self): - return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0} - - monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp()) + _stub_ollama_generate( + monkeypatch, + content='', + input_tokens=100, + output_tokens=0, + ) result = svc._call_qwen3_qa("test", None, "qa-empty") assert result is None diff --git a/tests/test_phase3f_cleanup_contracts.py b/tests/test_phase3f_cleanup_contracts.py index fa219c7..a1909ac 100644 --- a/tests/test_phase3f_cleanup_contracts.py +++ b/tests/test_phase3f_cleanup_contracts.py @@ -143,7 +143,6 @@ def test_env_example_documents_runtime_and_ai_automation_variables(): "OPENCLAW_OLLAMA_MODEL", "OPENCLAW_PPT_CACHE_TTL_HOURS", "OPENCLAW_QA_OLLAMA_FIRST", - "OPENCLAW_QA_OLLAMA_HOST", "OPENCLAW_QA_OLLAMA_MODEL", "OPENCLAW_QA_OLLAMA_TIMEOUT", "PPT_VISION_ENABLED", diff --git a/tests/test_qwen3_runtime_usage.py b/tests/test_qwen3_runtime_usage.py index e51d9f8..f9d622e 100644 --- a/tests/test_qwen3_runtime_usage.py +++ b/tests/test_qwen3_runtime_usage.py @@ -11,6 +11,8 @@ def test_qwen3_is_active_runtime_model_not_unused_ollama_weight(): assert "OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')" in openclaw_source assert "def _call_qwen3_qa(" in openclaw_source + assert "OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL)" in openclaw_source + assert "OPENCLAW_QA_OLLAMA_HOST" not in openclaw_source assert 'NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b")' in nemotron_source assert "def _call_qwen3_dispatch(" in nemotron_source assert "'qwen3:14b'" in router_source