From 2ff0ef3bb643c2a790567e685214839229fdc9c2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 5 May 2026 13:55:52 +0800 Subject: [PATCH] fix(openclaw): route legacy ollama through failover endpoints --- apps/api/src/services/openclaw.py | 123 ++++++++++----- .../test_openclaw_legacy_ollama_failover.py | 140 ++++++++++++++++++ 2 files changed, 229 insertions(+), 34 deletions(-) create mode 100644 apps/api/tests/test_openclaw_legacy_ollama_failover.py diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index 1a4cbb84..3678d567 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -38,6 +38,7 @@ from src.models.ai import ( ) from src.services.langfuse_client import langfuse_trace from src.services.model_registry import get_model_registry +from src.services.ollama_failover_manager import get_ollama_failover_manager from src.services.signoz_client import GoldMetrics, get_signoz_client from src.utils.k8s_naming import normalize_resource_name from src.utils.timezone import now_taipei_iso @@ -438,53 +439,107 @@ class OpenClawService: async def _call_ollama(self, prompt: str) -> tuple[str, bool]: """ - 呼叫本機 Ollama (支援 JSON Mode) + 呼叫 Ollama (支援 JSON Mode)。 + + USE_AI_ROUTER=true 正常會走 AIRouterExecutor;這裡是 legacy safety-net。 + 2026-05-05 Codex: safety-net 也必須遵守 ADR-110 三層 Ollama + 路由,不能只打 OLLAMA_URL 後直接掉 Gemini。 """ try: client = await self._get_client() - logger.info( - "ollama_request_start", - url=f"{settings.OLLAMA_URL}/api/generate", - prompt_length=len(prompt), - ) - # 從 ModelRegistry 取得模型配置 registry = get_model_registry() model_name = registry.get_model("ollama", "rca") options = registry.get_provider_options("ollama") - - response = await client.post( - f"{settings.OLLAMA_URL}/api/generate", - json={ - "model": model_name, - "prompt": prompt, - "stream": False, - "format": "json", # 強制 JSON 輸出 - "options": { - "num_predict": options.get("num_predict", 1024), - "temperature": options.get("temperature", 0.1), - "top_p": options.get("top_p", 0.9), - }, - }, - timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0), + timeout_seconds = max( + float(settings.OPENCLAW_TIMEOUT), + float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", settings.OPENCLAW_TIMEOUT)), ) - logger.info( - "ollama_response_received", - status_code=response.status_code, - ) + endpoints: list[tuple[str, str]] = [] + try: + route = await get_ollama_failover_manager().select_provider() + endpoints = [ + (endpoint.provider_name, endpoint.url) + for endpoint in route.all_endpoints_in_order() + if endpoint.provider_name.startswith("ollama") and endpoint.url + ] + except Exception as route_error: + logger.warning( + "legacy_ollama_route_lookup_failed", + error=str(route_error), + ) - response.raise_for_status() - data = response.json() - result = data.get("response", "") + if not endpoints: + configured_endpoints = [ + ("ollama_gcp_a", settings.OLLAMA_URL), + ("ollama_gcp_b", getattr(settings, "OLLAMA_SECONDARY_URL", "")), + ("ollama_local", getattr(settings, "OLLAMA_FALLBACK_URL", "")), + ] + seen_urls: set[str] = set() + endpoints = [] + for provider_name, endpoint_url in configured_endpoints: + if endpoint_url and endpoint_url not in seen_urls: + endpoints.append((provider_name, endpoint_url)) + seen_urls.add(endpoint_url) - logger.info( - "ollama_response_parsed", - response_length=len(result), - ) + last_error = "" + for provider_name, endpoint_url in endpoints: + try: + logger.info( + "ollama_request_start", + provider=provider_name, + url=f"{endpoint_url}/api/generate", + prompt_length=len(prompt), + ) - return result, True + response = await client.post( + f"{endpoint_url}/api/generate", + json={ + "model": model_name, + "prompt": prompt, + "stream": False, + "format": "json", # 強制 JSON 輸出 + "options": { + "num_predict": options.get("num_predict", 1024), + "temperature": options.get("temperature", 0.1), + "top_p": options.get("top_p", 0.9), + }, + }, + timeout=httpx.Timeout(timeout_seconds, connect=10.0), + ) + + logger.info( + "ollama_response_received", + provider=provider_name, + status_code=response.status_code, + ) + + response.raise_for_status() + data = response.json() + result = data.get("response", "") + + logger.info( + "ollama_response_parsed", + provider=provider_name, + response_length=len(result), + ) + + return result, True + except httpx.TimeoutException as e: + last_error = f"{provider_name} timeout: {e}" + logger.warning("ollama_timeout", provider=provider_name, error=str(e)) + except Exception as e: + last_error = f"{provider_name} failed: {e}" + logger.warning( + "ollama_call_failed", + provider=provider_name, + error=str(e), + error_type=type(e).__name__, + ) + + return last_error or "all Ollama endpoints failed", False except httpx.TimeoutException as e: logger.warning("ollama_timeout", error=str(e)) diff --git a/apps/api/tests/test_openclaw_legacy_ollama_failover.py b/apps/api/tests/test_openclaw_legacy_ollama_failover.py new file mode 100644 index 00000000..f72995d5 --- /dev/null +++ b/apps/api/tests/test_openclaw_legacy_ollama_failover.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import httpx +import pytest + +from src.services import openclaw as openclaw_module +from src.services.openclaw import OpenClawService + + +class _FakeRegistry: + def get_model(self, provider: str, use_case: str) -> str: + return "qwen2.5:7b-instruct" + + def get_provider_options(self, provider: str) -> dict[str, Any]: + return {"num_predict": 32, "temperature": 0.1, "top_p": 0.9} + + +@dataclass +class _FakeEndpoint: + provider_name: str + url: str + + +class _FakeRoute: + def __init__(self, endpoints: list[_FakeEndpoint]) -> None: + self._endpoints = endpoints + + def all_endpoints_in_order(self) -> list[_FakeEndpoint]: + return self._endpoints + + +class _FakeManager: + def __init__(self, endpoints: list[_FakeEndpoint]) -> None: + self._endpoints = endpoints + + async def select_provider(self) -> _FakeRoute: + return _FakeRoute(self._endpoints) + + +class _FakeResponse: + status_code = 200 + + def raise_for_status(self) -> None: + return None + + def json(self) -> dict[str, Any]: + return {"response": '{"action_title":"ok"}'} + + +class _FakeClient: + def __init__(self, fail_urls: set[str]) -> None: + self.fail_urls = fail_urls + self.posted_urls: list[str] = [] + + async def post(self, url: str, **kwargs: Any) -> _FakeResponse: + self.posted_urls.append(url) + if url in self.fail_urls: + raise httpx.ConnectError("offline") + return _FakeResponse() + + +@pytest.mark.asyncio +async def test_legacy_ollama_uses_failover_order_before_gemini( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(openclaw_module, "get_model_registry", lambda: _FakeRegistry()) + monkeypatch.setattr(openclaw_module.settings, "OPENCLAW_TIMEOUT", 30) + monkeypatch.setattr(openclaw_module.settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200) + monkeypatch.setattr( + openclaw_module, + "get_ollama_failover_manager", + lambda: _FakeManager( + [ + _FakeEndpoint("ollama_gcp_a", "http://gcp-a:11435"), + _FakeEndpoint("ollama_gcp_b", "http://gcp-b:11436"), + _FakeEndpoint("ollama_local", "http://local-111:11434"), + _FakeEndpoint("gemini", ""), + ], + ), + ) + + client = _FakeClient(fail_urls={"http://gcp-a:11435/api/generate"}) + service = object.__new__(OpenClawService) + + async def _get_client() -> _FakeClient: + return client + + monkeypatch.setattr(service, "_get_client", _get_client) + + result, ok = await service._call_ollama("diagnose") + + assert ok is True + assert result == '{"action_title":"ok"}' + assert client.posted_urls == [ + "http://gcp-a:11435/api/generate", + "http://gcp-b:11436/api/generate", + ] + + +@pytest.mark.asyncio +async def test_legacy_ollama_falls_back_to_configured_three_layer_urls( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(openclaw_module, "get_model_registry", lambda: _FakeRegistry()) + monkeypatch.setattr(openclaw_module.settings, "OPENCLAW_TIMEOUT", 30) + monkeypatch.setattr(openclaw_module.settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200) + monkeypatch.setattr(openclaw_module.settings, "OLLAMA_URL", "http://gcp-a:11435") + monkeypatch.setattr(openclaw_module.settings, "OLLAMA_SECONDARY_URL", "http://gcp-b:11436") + monkeypatch.setattr(openclaw_module.settings, "OLLAMA_FALLBACK_URL", "http://local-111:11434") + monkeypatch.setattr( + openclaw_module, + "get_ollama_failover_manager", + lambda: _FakeManager([_FakeEndpoint("gemini", "")]), + ) + + client = _FakeClient( + fail_urls={ + "http://gcp-a:11435/api/generate", + "http://gcp-b:11436/api/generate", + }, + ) + service = object.__new__(OpenClawService) + + async def _get_client() -> _FakeClient: + return client + + monkeypatch.setattr(service, "_get_client", _get_client) + + result, ok = await service._call_ollama("diagnose") + + assert ok is True + assert result == '{"action_title":"ok"}' + assert client.posted_urls == [ + "http://gcp-a:11435/api/generate", + "http://gcp-b:11436/api/generate", + "http://local-111:11434/api/generate", + ]