fix(openclaw): route legacy ollama through failover endpoints
This commit is contained in:
@@ -38,6 +38,7 @@ from src.models.ai import (
|
||||
)
|
||||
from src.services.langfuse_client import langfuse_trace
|
||||
from src.services.model_registry import get_model_registry
|
||||
from src.services.ollama_failover_manager import get_ollama_failover_manager
|
||||
from src.services.signoz_client import GoldMetrics, get_signoz_client
|
||||
from src.utils.k8s_naming import normalize_resource_name
|
||||
from src.utils.timezone import now_taipei_iso
|
||||
@@ -438,53 +439,107 @@ class OpenClawService:
|
||||
|
||||
async def _call_ollama(self, prompt: str) -> tuple[str, bool]:
|
||||
"""
|
||||
呼叫本機 Ollama (支援 JSON Mode)
|
||||
呼叫 Ollama (支援 JSON Mode)。
|
||||
|
||||
USE_AI_ROUTER=true 正常會走 AIRouterExecutor;這裡是 legacy safety-net。
|
||||
2026-05-05 Codex: safety-net 也必須遵守 ADR-110 三層 Ollama
|
||||
路由,不能只打 OLLAMA_URL 後直接掉 Gemini。
|
||||
"""
|
||||
try:
|
||||
client = await self._get_client()
|
||||
|
||||
logger.info(
|
||||
"ollama_request_start",
|
||||
url=f"{settings.OLLAMA_URL}/api/generate",
|
||||
prompt_length=len(prompt),
|
||||
)
|
||||
|
||||
# 從 ModelRegistry 取得模型配置
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
response = await client.post(
|
||||
f"{settings.OLLAMA_URL}/api/generate",
|
||||
json={
|
||||
"model": model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json", # 強制 JSON 輸出
|
||||
"options": {
|
||||
"num_predict": options.get("num_predict", 1024),
|
||||
"temperature": options.get("temperature", 0.1),
|
||||
"top_p": options.get("top_p", 0.9),
|
||||
},
|
||||
},
|
||||
timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0),
|
||||
timeout_seconds = max(
|
||||
float(settings.OPENCLAW_TIMEOUT),
|
||||
float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", settings.OPENCLAW_TIMEOUT)),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ollama_response_received",
|
||||
status_code=response.status_code,
|
||||
)
|
||||
endpoints: list[tuple[str, str]] = []
|
||||
try:
|
||||
route = await get_ollama_failover_manager().select_provider()
|
||||
endpoints = [
|
||||
(endpoint.provider_name, endpoint.url)
|
||||
for endpoint in route.all_endpoints_in_order()
|
||||
if endpoint.provider_name.startswith("ollama") and endpoint.url
|
||||
]
|
||||
except Exception as route_error:
|
||||
logger.warning(
|
||||
"legacy_ollama_route_lookup_failed",
|
||||
error=str(route_error),
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
result = data.get("response", "")
|
||||
if not endpoints:
|
||||
configured_endpoints = [
|
||||
("ollama_gcp_a", settings.OLLAMA_URL),
|
||||
("ollama_gcp_b", getattr(settings, "OLLAMA_SECONDARY_URL", "")),
|
||||
("ollama_local", getattr(settings, "OLLAMA_FALLBACK_URL", "")),
|
||||
]
|
||||
seen_urls: set[str] = set()
|
||||
endpoints = []
|
||||
for provider_name, endpoint_url in configured_endpoints:
|
||||
if endpoint_url and endpoint_url not in seen_urls:
|
||||
endpoints.append((provider_name, endpoint_url))
|
||||
seen_urls.add(endpoint_url)
|
||||
|
||||
logger.info(
|
||||
"ollama_response_parsed",
|
||||
response_length=len(result),
|
||||
)
|
||||
last_error = ""
|
||||
for provider_name, endpoint_url in endpoints:
|
||||
try:
|
||||
logger.info(
|
||||
"ollama_request_start",
|
||||
provider=provider_name,
|
||||
url=f"{endpoint_url}/api/generate",
|
||||
prompt_length=len(prompt),
|
||||
)
|
||||
|
||||
return result, True
|
||||
response = await client.post(
|
||||
f"{endpoint_url}/api/generate",
|
||||
json={
|
||||
"model": model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json", # 強制 JSON 輸出
|
||||
"options": {
|
||||
"num_predict": options.get("num_predict", 1024),
|
||||
"temperature": options.get("temperature", 0.1),
|
||||
"top_p": options.get("top_p", 0.9),
|
||||
},
|
||||
},
|
||||
timeout=httpx.Timeout(timeout_seconds, connect=10.0),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"ollama_response_received",
|
||||
provider=provider_name,
|
||||
status_code=response.status_code,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
result = data.get("response", "")
|
||||
|
||||
logger.info(
|
||||
"ollama_response_parsed",
|
||||
provider=provider_name,
|
||||
response_length=len(result),
|
||||
)
|
||||
|
||||
return result, True
|
||||
except httpx.TimeoutException as e:
|
||||
last_error = f"{provider_name} timeout: {e}"
|
||||
logger.warning("ollama_timeout", provider=provider_name, error=str(e))
|
||||
except Exception as e:
|
||||
last_error = f"{provider_name} failed: {e}"
|
||||
logger.warning(
|
||||
"ollama_call_failed",
|
||||
provider=provider_name,
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
|
||||
return last_error or "all Ollama endpoints failed", False
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
logger.warning("ollama_timeout", error=str(e))
|
||||
|
||||
Reference in New Issue
Block a user