48 lines
1.9 KiB
Python
48 lines
1.9 KiB
Python
"""Lightweight Ollama runtime health probes shared by scheduler and UI."""
|
|
|
|
import os
|
|
from typing import Optional, Tuple
|
|
|
|
|
|
def _env_flag(name: str, default: bool = False) -> bool:
|
|
raw = os.getenv(name)
|
|
if raw is None:
|
|
return default
|
|
return str(raw).strip().lower() in {"1", "true", "yes", "on"}
|
|
|
|
|
|
def host_health_model_probe_enabled(label: str) -> bool:
|
|
"""Return whether host health should verify a tiny real model operation."""
|
|
if not _env_flag("OLLAMA_HOST_HEALTH_MODEL_PROBE_ENABLED", True):
|
|
return False
|
|
if "Fallback" in label:
|
|
return _env_flag("OLLAMA_HOST_HEALTH_MODEL_PROBE_INCLUDE_111", False)
|
|
return True
|
|
|
|
|
|
def probe_ollama_embedding_runtime(requests_module, host: str) -> Tuple[bool, Optional[str]]:
|
|
"""Verify Ollama can serve a tiny embedding, not just answer /api/tags."""
|
|
model = os.getenv("OLLAMA_HOST_HEALTH_EMBED_MODEL", "bge-m3:latest")
|
|
timeout = float(os.getenv("OLLAMA_HOST_HEALTH_EMBED_TIMEOUT", "30"))
|
|
keep_alive = os.getenv("OLLAMA_HOST_HEALTH_EMBED_KEEP_ALIVE", "1m")
|
|
try:
|
|
resp = requests_module.post(
|
|
f"{host.rstrip('/')}/api/embed",
|
|
json={"model": model, "input": "health", "keep_alive": keep_alive},
|
|
timeout=timeout,
|
|
)
|
|
if resp.status_code != 200:
|
|
return False, f"EmbedProbe HTTP {resp.status_code}"
|
|
payload = resp.json()
|
|
embeddings = payload.get("embeddings")
|
|
if isinstance(embeddings, list) and embeddings:
|
|
first = embeddings[0]
|
|
if isinstance(first, list) and first:
|
|
return True, None
|
|
embedding = payload.get("embedding")
|
|
if isinstance(embedding, list) and embedding:
|
|
return True, None
|
|
return False, "EmbedProbe empty embedding"
|
|
except Exception as exc:
|
|
return False, f"EmbedProbe {type(exc).__name__}: {str(exc)[:160]}"
|