105 lines
2.7 KiB
Python
105 lines
2.7 KiB
Python
"""
|
|
Ollama endpoint resolver for AWOOOI workload placement.
|
|
|
|
ADR-110 gives AWOOOI three Ollama endpoints. The global order is always
|
|
GCP-A -> GCP-B -> 111 local; Gemini is owned by the caller/AI Router as the
|
|
final non-Ollama fallback.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Literal, Protocol
|
|
|
|
from src.core.config import settings
|
|
|
|
OllamaWorkloadType = Literal[
|
|
"interactive",
|
|
"healthcheck",
|
|
"alert_fast",
|
|
"batch",
|
|
"embedding",
|
|
"rag",
|
|
"code_review",
|
|
"shadow",
|
|
"canary",
|
|
"deep_rca",
|
|
"image_analysis",
|
|
"hermes",
|
|
"local_required",
|
|
"privacy_sensitive",
|
|
"dr",
|
|
]
|
|
|
|
class _OllamaSettings(Protocol):
|
|
OLLAMA_URL: str
|
|
OLLAMA_SECONDARY_URL: str
|
|
OLLAMA_FALLBACK_URL: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class OllamaEndpointSelection:
|
|
url: str
|
|
provider_name: str
|
|
workload_type: OllamaWorkloadType
|
|
reason: str
|
|
|
|
|
|
def resolve_ollama_selection(
|
|
workload_type: OllamaWorkloadType = "interactive",
|
|
*,
|
|
config: _OllamaSettings | None = None,
|
|
) -> OllamaEndpointSelection:
|
|
"""Return the preferred Ollama endpoint for a workload class."""
|
|
return resolve_ollama_order(workload_type, config=config)[0]
|
|
|
|
|
|
def resolve_ollama_order(
|
|
workload_type: OllamaWorkloadType = "interactive",
|
|
*,
|
|
config: _OllamaSettings | None = None,
|
|
) -> tuple[OllamaEndpointSelection, ...]:
|
|
"""Return the global Ollama fallback order: GCP-A -> GCP-B -> 111."""
|
|
cfg = config or settings
|
|
|
|
candidates = (
|
|
(cfg.OLLAMA_URL, "ollama_gcp_a", "global_primary_gcp_a"),
|
|
(cfg.OLLAMA_SECONDARY_URL, "ollama_gcp_b", "global_secondary_gcp_b"),
|
|
(cfg.OLLAMA_FALLBACK_URL, "ollama_local", "global_local_111"),
|
|
)
|
|
selections: list[OllamaEndpointSelection] = []
|
|
seen: set[str] = set()
|
|
for url, provider_name, reason in candidates:
|
|
if not url or url in seen:
|
|
continue
|
|
seen.add(url)
|
|
selections.append(
|
|
OllamaEndpointSelection(
|
|
url=url,
|
|
provider_name=provider_name,
|
|
workload_type=workload_type,
|
|
reason=reason,
|
|
)
|
|
)
|
|
|
|
if selections:
|
|
return tuple(selections)
|
|
|
|
return (
|
|
OllamaEndpointSelection(
|
|
url="",
|
|
provider_name="ollama_unconfigured",
|
|
workload_type=workload_type,
|
|
reason="no_ollama_endpoint_configured",
|
|
),
|
|
)
|
|
|
|
|
|
def resolve_ollama_endpoint(
|
|
workload_type: OllamaWorkloadType = "interactive",
|
|
*,
|
|
config: _OllamaSettings | None = None,
|
|
) -> str:
|
|
"""Return only the preferred Ollama base URL."""
|
|
return resolve_ollama_selection(workload_type, config=config).url
|