ewoooc/services/ollama_health_probe.py

"""Lightweight Ollama runtime health probes shared by scheduler and UI."""

import os
from typing import Optional, Tuple


def _env_flag(name: str, default: bool = False) -> bool:
    raw = os.getenv(name)
    if raw is None:
        return default
    return str(raw).strip().lower() in {"1", "true", "yes", "on"}


def host_health_model_probe_enabled(label: str) -> bool:
    """Return whether host health should verify a tiny real model operation."""
    if not _env_flag("OLLAMA_HOST_HEALTH_MODEL_PROBE_ENABLED", True):
        return False
    if "Fallback" in label:
        return _env_flag("OLLAMA_HOST_HEALTH_MODEL_PROBE_INCLUDE_111", False)
    return True


def probe_ollama_embedding_runtime(requests_module, host: str) -> Tuple[bool, Optional[str]]:
    """Verify Ollama can serve a tiny embedding, not just answer /api/tags."""
    model = os.getenv("OLLAMA_HOST_HEALTH_EMBED_MODEL", "bge-m3:latest")
    timeout = float(os.getenv("OLLAMA_HOST_HEALTH_EMBED_TIMEOUT", "30"))
    keep_alive = os.getenv("OLLAMA_HOST_HEALTH_EMBED_KEEP_ALIVE", "1m")
    try:
        resp = requests_module.post(
            f"{host.rstrip('/')}/api/embed",
            json={"model": model, "input": "health", "keep_alive": keep_alive},
            timeout=timeout,
        )
        if resp.status_code != 200:
            return False, f"EmbedProbe HTTP {resp.status_code}"
        payload = resp.json()
        embeddings = payload.get("embeddings")
        if isinstance(embeddings, list) and embeddings:
            first = embeddings[0]
            if isinstance(first, list) and first:
                return True, None
        embedding = payload.get("embedding")
        if isinstance(embedding, list) and embedding:
            return True, None
        return False, "EmbedProbe empty embedding"
    except Exception as exc:
        return False, f"EmbedProbe {type(exc).__name__}: {str(exc)[:160]}"