Files
ewoooc/services/ollama_health_probe.py
OoO ba5fe06b13
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix: update ollama primary host
2026-06-18 14:24:55 +08:00

48 lines
1.9 KiB
Python

"""Lightweight Ollama runtime health probes shared by scheduler and UI."""
import os
from typing import Optional, Tuple
def _env_flag(name: str, default: bool = False) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return str(raw).strip().lower() in {"1", "true", "yes", "on"}
def host_health_model_probe_enabled(label: str) -> bool:
"""Return whether host health should verify a tiny real model operation."""
if not _env_flag("OLLAMA_HOST_HEALTH_MODEL_PROBE_ENABLED", True):
return False
if "Fallback" in label:
return _env_flag("OLLAMA_HOST_HEALTH_MODEL_PROBE_INCLUDE_111", False)
return True
def probe_ollama_embedding_runtime(requests_module, host: str) -> Tuple[bool, Optional[str]]:
"""Verify Ollama can serve a tiny embedding, not just answer /api/tags."""
model = os.getenv("OLLAMA_HOST_HEALTH_EMBED_MODEL", "bge-m3:latest")
timeout = float(os.getenv("OLLAMA_HOST_HEALTH_EMBED_TIMEOUT", "30"))
keep_alive = os.getenv("OLLAMA_HOST_HEALTH_EMBED_KEEP_ALIVE", "1m")
try:
resp = requests_module.post(
f"{host.rstrip('/')}/api/embed",
json={"model": model, "input": "health", "keep_alive": keep_alive},
timeout=timeout,
)
if resp.status_code != 200:
return False, f"EmbedProbe HTTP {resp.status_code}"
payload = resp.json()
embeddings = payload.get("embeddings")
if isinstance(embeddings, list) and embeddings:
first = embeddings[0]
if isinstance(first, list) and first:
return True, None
embedding = payload.get("embedding")
if isinstance(embedding, list) and embedding:
return True, None
return False, "EmbedProbe empty embedding"
except Exception as exc:
return False, f"EmbedProbe {type(exc).__name__}: {str(exc)[:160]}"