fix(openclaw): route legacy ollama through failover endpoints
This commit is contained in:
@@ -38,6 +38,7 @@ from src.models.ai import (
|
||||
)
|
||||
from src.services.langfuse_client import langfuse_trace
|
||||
from src.services.model_registry import get_model_registry
|
||||
from src.services.ollama_failover_manager import get_ollama_failover_manager
|
||||
from src.services.signoz_client import GoldMetrics, get_signoz_client
|
||||
from src.utils.k8s_naming import normalize_resource_name
|
||||
from src.utils.timezone import now_taipei_iso
|
||||
@@ -438,53 +439,107 @@ class OpenClawService:
|
||||
|
||||
async def _call_ollama(self, prompt: str) -> tuple[str, bool]:
|
||||
"""
|
||||
呼叫本機 Ollama (支援 JSON Mode)
|
||||
呼叫 Ollama (支援 JSON Mode)。
|
||||
|
||||
USE_AI_ROUTER=true 正常會走 AIRouterExecutor;這裡是 legacy safety-net。
|
||||
2026-05-05 Codex: safety-net 也必須遵守 ADR-110 三層 Ollama
|
||||
路由,不能只打 OLLAMA_URL 後直接掉 Gemini。
|
||||
"""
|
||||
try:
|
||||
client = await self._get_client()
|
||||
|
||||
logger.info(
|
||||
"ollama_request_start",
|
||||
url=f"{settings.OLLAMA_URL}/api/generate",
|
||||
prompt_length=len(prompt),
|
||||
)
|
||||
|
||||
# 從 ModelRegistry 取得模型配置
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
response = await client.post(
|
||||
f"{settings.OLLAMA_URL}/api/generate",
|
||||
json={
|
||||
"model": model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json", # 強制 JSON 輸出
|
||||
"options": {
|
||||
"num_predict": options.get("num_predict", 1024),
|
||||
"temperature": options.get("temperature", 0.1),
|
||||
"top_p": options.get("top_p", 0.9),
|
||||
},
|
||||
},
|
||||
timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0),
|
||||
timeout_seconds = max(
|
||||
float(settings.OPENCLAW_TIMEOUT),
|
||||
float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", settings.OPENCLAW_TIMEOUT)),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ollama_response_received",
|
||||
status_code=response.status_code,
|
||||
)
|
||||
endpoints: list[tuple[str, str]] = []
|
||||
try:
|
||||
route = await get_ollama_failover_manager().select_provider()
|
||||
endpoints = [
|
||||
(endpoint.provider_name, endpoint.url)
|
||||
for endpoint in route.all_endpoints_in_order()
|
||||
if endpoint.provider_name.startswith("ollama") and endpoint.url
|
||||
]
|
||||
except Exception as route_error:
|
||||
logger.warning(
|
||||
"legacy_ollama_route_lookup_failed",
|
||||
error=str(route_error),
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
result = data.get("response", "")
|
||||
if not endpoints:
|
||||
configured_endpoints = [
|
||||
("ollama_gcp_a", settings.OLLAMA_URL),
|
||||
("ollama_gcp_b", getattr(settings, "OLLAMA_SECONDARY_URL", "")),
|
||||
("ollama_local", getattr(settings, "OLLAMA_FALLBACK_URL", "")),
|
||||
]
|
||||
seen_urls: set[str] = set()
|
||||
endpoints = []
|
||||
for provider_name, endpoint_url in configured_endpoints:
|
||||
if endpoint_url and endpoint_url not in seen_urls:
|
||||
endpoints.append((provider_name, endpoint_url))
|
||||
seen_urls.add(endpoint_url)
|
||||
|
||||
logger.info(
|
||||
"ollama_response_parsed",
|
||||
response_length=len(result),
|
||||
)
|
||||
last_error = ""
|
||||
for provider_name, endpoint_url in endpoints:
|
||||
try:
|
||||
logger.info(
|
||||
"ollama_request_start",
|
||||
provider=provider_name,
|
||||
url=f"{endpoint_url}/api/generate",
|
||||
prompt_length=len(prompt),
|
||||
)
|
||||
|
||||
return result, True
|
||||
response = await client.post(
|
||||
f"{endpoint_url}/api/generate",
|
||||
json={
|
||||
"model": model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json", # 強制 JSON 輸出
|
||||
"options": {
|
||||
"num_predict": options.get("num_predict", 1024),
|
||||
"temperature": options.get("temperature", 0.1),
|
||||
"top_p": options.get("top_p", 0.9),
|
||||
},
|
||||
},
|
||||
timeout=httpx.Timeout(timeout_seconds, connect=10.0),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ollama_response_received",
|
||||
provider=provider_name,
|
||||
status_code=response.status_code,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
result = data.get("response", "")
|
||||
|
||||
logger.info(
|
||||
"ollama_response_parsed",
|
||||
provider=provider_name,
|
||||
response_length=len(result),
|
||||
)
|
||||
|
||||
return result, True
|
||||
except httpx.TimeoutException as e:
|
||||
last_error = f"{provider_name} timeout: {e}"
|
||||
logger.warning("ollama_timeout", provider=provider_name, error=str(e))
|
||||
except Exception as e:
|
||||
last_error = f"{provider_name} failed: {e}"
|
||||
logger.warning(
|
||||
"ollama_call_failed",
|
||||
provider=provider_name,
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
|
||||
return last_error or "all Ollama endpoints failed", False
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
logger.warning("ollama_timeout", error=str(e))
|
||||
|
||||
140
apps/api/tests/test_openclaw_legacy_ollama_failover.py
Normal file
140
apps/api/tests/test_openclaw_legacy_ollama_failover.py
Normal file
@@ -0,0 +1,140 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from src.services import openclaw as openclaw_module
|
||||
from src.services.openclaw import OpenClawService
|
||||
|
||||
|
||||
class _FakeRegistry:
|
||||
def get_model(self, provider: str, use_case: str) -> str:
|
||||
return "qwen2.5:7b-instruct"
|
||||
|
||||
def get_provider_options(self, provider: str) -> dict[str, Any]:
|
||||
return {"num_predict": 32, "temperature": 0.1, "top_p": 0.9}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeEndpoint:
|
||||
provider_name: str
|
||||
url: str
|
||||
|
||||
|
||||
class _FakeRoute:
|
||||
def __init__(self, endpoints: list[_FakeEndpoint]) -> None:
|
||||
self._endpoints = endpoints
|
||||
|
||||
def all_endpoints_in_order(self) -> list[_FakeEndpoint]:
|
||||
return self._endpoints
|
||||
|
||||
|
||||
class _FakeManager:
|
||||
def __init__(self, endpoints: list[_FakeEndpoint]) -> None:
|
||||
self._endpoints = endpoints
|
||||
|
||||
async def select_provider(self) -> _FakeRoute:
|
||||
return _FakeRoute(self._endpoints)
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
status_code = 200
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict[str, Any]:
|
||||
return {"response": '{"action_title":"ok"}'}
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, fail_urls: set[str]) -> None:
|
||||
self.fail_urls = fail_urls
|
||||
self.posted_urls: list[str] = []
|
||||
|
||||
async def post(self, url: str, **kwargs: Any) -> _FakeResponse:
|
||||
self.posted_urls.append(url)
|
||||
if url in self.fail_urls:
|
||||
raise httpx.ConnectError("offline")
|
||||
return _FakeResponse()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_ollama_uses_failover_order_before_gemini(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(openclaw_module, "get_model_registry", lambda: _FakeRegistry())
|
||||
monkeypatch.setattr(openclaw_module.settings, "OPENCLAW_TIMEOUT", 30)
|
||||
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200)
|
||||
monkeypatch.setattr(
|
||||
openclaw_module,
|
||||
"get_ollama_failover_manager",
|
||||
lambda: _FakeManager(
|
||||
[
|
||||
_FakeEndpoint("ollama_gcp_a", "http://gcp-a:11435"),
|
||||
_FakeEndpoint("ollama_gcp_b", "http://gcp-b:11436"),
|
||||
_FakeEndpoint("ollama_local", "http://local-111:11434"),
|
||||
_FakeEndpoint("gemini", ""),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
client = _FakeClient(fail_urls={"http://gcp-a:11435/api/generate"})
|
||||
service = object.__new__(OpenClawService)
|
||||
|
||||
async def _get_client() -> _FakeClient:
|
||||
return client
|
||||
|
||||
monkeypatch.setattr(service, "_get_client", _get_client)
|
||||
|
||||
result, ok = await service._call_ollama("diagnose")
|
||||
|
||||
assert ok is True
|
||||
assert result == '{"action_title":"ok"}'
|
||||
assert client.posted_urls == [
|
||||
"http://gcp-a:11435/api/generate",
|
||||
"http://gcp-b:11436/api/generate",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_ollama_falls_back_to_configured_three_layer_urls(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(openclaw_module, "get_model_registry", lambda: _FakeRegistry())
|
||||
monkeypatch.setattr(openclaw_module.settings, "OPENCLAW_TIMEOUT", 30)
|
||||
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200)
|
||||
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_URL", "http://gcp-a:11435")
|
||||
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_SECONDARY_URL", "http://gcp-b:11436")
|
||||
monkeypatch.setattr(openclaw_module.settings, "OLLAMA_FALLBACK_URL", "http://local-111:11434")
|
||||
monkeypatch.setattr(
|
||||
openclaw_module,
|
||||
"get_ollama_failover_manager",
|
||||
lambda: _FakeManager([_FakeEndpoint("gemini", "")]),
|
||||
)
|
||||
|
||||
client = _FakeClient(
|
||||
fail_urls={
|
||||
"http://gcp-a:11435/api/generate",
|
||||
"http://gcp-b:11436/api/generate",
|
||||
},
|
||||
)
|
||||
service = object.__new__(OpenClawService)
|
||||
|
||||
async def _get_client() -> _FakeClient:
|
||||
return client
|
||||
|
||||
monkeypatch.setattr(service, "_get_client", _get_client)
|
||||
|
||||
result, ok = await service._call_ollama("diagnose")
|
||||
|
||||
assert ok is True
|
||||
assert result == '{"action_title":"ok"}'
|
||||
assert client.posted_urls == [
|
||||
"http://gcp-a:11435/api/generate",
|
||||
"http://gcp-b:11436/api/generate",
|
||||
"http://local-111:11434/api/generate",
|
||||
]
|
||||
Reference in New Issue
Block a user