fix(openclaw): route legacy ollama through failover endpoints
Some checks failed
CD Pipeline / tests (push) Failing after 1m49s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Successful in 24s

This commit is contained in:
Your Name
2026-05-05 13:55:52 +08:00
parent bb1995f349
commit 2ff0ef3bb6
2 changed files with 229 additions and 34 deletions

View File

@@ -38,6 +38,7 @@ from src.models.ai import (
)
from src.services.langfuse_client import langfuse_trace
from src.services.model_registry import get_model_registry
from src.services.ollama_failover_manager import get_ollama_failover_manager
from src.services.signoz_client import GoldMetrics, get_signoz_client
from src.utils.k8s_naming import normalize_resource_name
from src.utils.timezone import now_taipei_iso
@@ -438,53 +439,107 @@ class OpenClawService:
async def _call_ollama(self, prompt: str) -> tuple[str, bool]:
"""
呼叫本機 Ollama (支援 JSON Mode)
呼叫 Ollama (支援 JSON Mode)
USE_AI_ROUTER=true 正常會走 AIRouterExecutor這裡是 legacy safety-net。
2026-05-05 Codex: safety-net 也必須遵守 ADR-110 三層 Ollama
路由,不能只打 OLLAMA_URL 後直接掉 Gemini。
"""
try:
client = await self._get_client()
logger.info(
"ollama_request_start",
url=f"{settings.OLLAMA_URL}/api/generate",
prompt_length=len(prompt),
)
# 從 ModelRegistry 取得模型配置
registry = get_model_registry()
model_name = registry.get_model("ollama", "rca")
options = registry.get_provider_options("ollama")
response = await client.post(
f"{settings.OLLAMA_URL}/api/generate",
json={
"model": model_name,
"prompt": prompt,
"stream": False,
"format": "json", # 強制 JSON 輸出
"options": {
"num_predict": options.get("num_predict", 1024),
"temperature": options.get("temperature", 0.1),
"top_p": options.get("top_p", 0.9),
},
},
timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0),
timeout_seconds = max(
float(settings.OPENCLAW_TIMEOUT),
float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", settings.OPENCLAW_TIMEOUT)),
)
logger.info(
"ollama_response_received",
status_code=response.status_code,
)
endpoints: list[tuple[str, str]] = []
try:
route = await get_ollama_failover_manager().select_provider()
endpoints = [
(endpoint.provider_name, endpoint.url)
for endpoint in route.all_endpoints_in_order()
if endpoint.provider_name.startswith("ollama") and endpoint.url
]
except Exception as route_error:
logger.warning(
"legacy_ollama_route_lookup_failed",
error=str(route_error),
)
response.raise_for_status()
data = response.json()
result = data.get("response", "")
if not endpoints:
configured_endpoints = [
("ollama_gcp_a", settings.OLLAMA_URL),
("ollama_gcp_b", getattr(settings, "OLLAMA_SECONDARY_URL", "")),
("ollama_local", getattr(settings, "OLLAMA_FALLBACK_URL", "")),
]
seen_urls: set[str] = set()
endpoints = []
for provider_name, endpoint_url in configured_endpoints:
if endpoint_url and endpoint_url not in seen_urls:
endpoints.append((provider_name, endpoint_url))
seen_urls.add(endpoint_url)
logger.info(
"ollama_response_parsed",
response_length=len(result),
)
last_error = ""
for provider_name, endpoint_url in endpoints:
try:
logger.info(
"ollama_request_start",
provider=provider_name,
url=f"{endpoint_url}/api/generate",
prompt_length=len(prompt),
)
return result, True
response = await client.post(
f"{endpoint_url}/api/generate",
json={
"model": model_name,
"prompt": prompt,
"stream": False,
"format": "json", # 強制 JSON 輸出
"options": {
"num_predict": options.get("num_predict", 1024),
"temperature": options.get("temperature", 0.1),
"top_p": options.get("top_p", 0.9),
},
},
timeout=httpx.Timeout(timeout_seconds, connect=10.0),
)
logger.info(
"ollama_response_received",
provider=provider_name,
status_code=response.status_code,
)
response.raise_for_status()
data = response.json()
result = data.get("response", "")
logger.info(
"ollama_response_parsed",
provider=provider_name,
response_length=len(result),
)
return result, True
except httpx.TimeoutException as e:
last_error = f"{provider_name} timeout: {e}"
logger.warning("ollama_timeout", provider=provider_name, error=str(e))
except Exception as e:
last_error = f"{provider_name} failed: {e}"
logger.warning(
"ollama_call_failed",
provider=provider_name,
error=str(e),
error_type=type(e).__name__,
)
return last_error or "all Ollama endpoints failed", False
except httpx.TimeoutException as e:
logger.warning("ollama_timeout", error=str(e))

View File

@@ -0,0 +1,140 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
import httpx
import pytest
from src.services import openclaw as openclaw_module
from src.services.openclaw import OpenClawService
class _FakeRegistry:
def get_model(self, provider: str, use_case: str) -> str:
return "qwen2.5:7b-instruct"
def get_provider_options(self, provider: str) -> dict[str, Any]:
return {"num_predict": 32, "temperature": 0.1, "top_p": 0.9}
@dataclass
class _FakeEndpoint:
provider_name: str
url: str
class _FakeRoute:
def __init__(self, endpoints: list[_FakeEndpoint]) -> None:
self._endpoints = endpoints
def all_endpoints_in_order(self) -> list[_FakeEndpoint]:
return self._endpoints
class _FakeManager:
def __init__(self, endpoints: list[_FakeEndpoint]) -> None:
self._endpoints = endpoints
async def select_provider(self) -> _FakeRoute:
return _FakeRoute(self._endpoints)
class _FakeResponse:
status_code = 200
def raise_for_status(self) -> None:
return None
def json(self) -> dict[str, Any]:
return {"response": '{"action_title":"ok"}'}
class _FakeClient:
def __init__(self, fail_urls: set[str]) -> None:
self.fail_urls = fail_urls
self.posted_urls: list[str] = []
async def post(self, url: str, **kwargs: Any) -> _FakeResponse:
self.posted_urls.append(url)
if url in self.fail_urls:
raise httpx.ConnectError("offline")
return _FakeResponse()
@pytest.mark.asyncio
async def test_legacy_ollama_uses_failover_order_before_gemini(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(openclaw_module, "get_model_registry", lambda: _FakeRegistry())
monkeypatch.setattr(openclaw_module.settings, "OPENCLAW_TIMEOUT", 30)
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200)
monkeypatch.setattr(
openclaw_module,
"get_ollama_failover_manager",
lambda: _FakeManager(
[
_FakeEndpoint("ollama_gcp_a", "http://gcp-a:11435"),
_FakeEndpoint("ollama_gcp_b", "http://gcp-b:11436"),
_FakeEndpoint("ollama_local", "http://local-111:11434"),
_FakeEndpoint("gemini", ""),
],
),
)
client = _FakeClient(fail_urls={"http://gcp-a:11435/api/generate"})
service = object.__new__(OpenClawService)
async def _get_client() -> _FakeClient:
return client
monkeypatch.setattr(service, "_get_client", _get_client)
result, ok = await service._call_ollama("diagnose")
assert ok is True
assert result == '{"action_title":"ok"}'
assert client.posted_urls == [
"http://gcp-a:11435/api/generate",
"http://gcp-b:11436/api/generate",
]
@pytest.mark.asyncio
async def test_legacy_ollama_falls_back_to_configured_three_layer_urls(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(openclaw_module, "get_model_registry", lambda: _FakeRegistry())
monkeypatch.setattr(openclaw_module.settings, "OPENCLAW_TIMEOUT", 30)
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200)
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_URL", "http://gcp-a:11435")
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_SECONDARY_URL", "http://gcp-b:11436")
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_FALLBACK_URL", "http://local-111:11434")
monkeypatch.setattr(
openclaw_module,
"get_ollama_failover_manager",
lambda: _FakeManager([_FakeEndpoint("gemini", "")]),
)
client = _FakeClient(
fail_urls={
"http://gcp-a:11435/api/generate",
"http://gcp-b:11436/api/generate",
},
)
service = object.__new__(OpenClawService)
async def _get_client() -> _FakeClient:
return client
monkeypatch.setattr(service, "_get_client", _get_client)
result, ok = await service._call_ollama("diagnose")
assert ok is True
assert result == '{"action_title":"ok"}'
assert client.posted_urls == [
"http://gcp-a:11435/api/generate",
"http://gcp-b:11436/api/generate",
"http://local-111:11434/api/generate",
]