Files
awoooi/scripts/ops/ollama-topology-check.sh
Your Name ed7c6946cb
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
docs(awooop): define private Ollama mesh gateway
2026-05-05 22:56:22 +08:00

89 lines
3.0 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
NAMESPACE="${NAMESPACE:-awoooi-prod}"
DEPLOYMENT="${DEPLOYMENT:-awoooi-api}"
MODEL="${MODEL:-gemma3:4b}"
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-60}"
kubectl -n "${NAMESPACE}" exec -i "deploy/${DEPLOYMENT}" -- \
env CHECK_MODEL="${MODEL}" CHECK_TIMEOUT_SECONDS="${TIMEOUT_SECONDS}" python - <<'PY'
import json
import os
import time
import urllib.error
import urllib.request
model = os.environ["CHECK_MODEL"]
timeout = int(os.environ["CHECK_TIMEOUT_SECONDS"])
endpoints = [
("primary", os.environ.get("OLLAMA_URL", "")),
("secondary", os.environ.get("OLLAMA_SECONDARY_URL", "")),
("fallback", os.environ.get("OLLAMA_FALLBACK_URL", "")),
]
print(f"model={model} timeout={timeout}s")
def request_json(url: str, path: str, payload=None, timeout_seconds=10):
data = None
headers = {}
if payload is not None:
data = json.dumps(payload).encode()
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url.rstrip("/") + path, data=data, headers=headers)
with urllib.request.urlopen(req, timeout=timeout_seconds) as response:
return json.loads(response.read().decode())
for label, url in endpoints:
print(f"\n== {label}: {url or '<missing>'} ==")
if not url:
print("status=missing")
continue
try:
tags = request_json(url, "/api/tags", timeout_seconds=10)
names = sorted(m.get("name", "") for m in tags.get("models", []))
print("tags=ok", ",".join(names[:12]))
except Exception as exc:
print("tags=fail", type(exc).__name__, str(exc)[:160])
continue
try:
ps = request_json(url, "/api/ps", timeout_seconds=10)
live = ps.get("models", [])
if not live:
print("ps=ok live_models=<none>")
for item in live:
print(
"ps=ok",
f"model={item.get('model')}",
f"expires={item.get('expires_at')}",
f"size_vram={item.get('size_vram')}",
f"context={item.get('context_length')}",
)
if item.get("size_vram") == 0:
print("warning=cpu_only_or_no_vram")
except Exception as exc:
print("ps=fail", type(exc).__name__, str(exc)[:160])
payload = {
"model": model,
"prompt": "用繁體中文用一行回答Ollama health check",
"stream": False,
"keep_alive": "8h",
"options": {"num_predict": 32, "temperature": 0.1},
}
start = time.time()
try:
result = request_json(url, "/api/generate", payload, timeout_seconds=timeout)
latency_ms = round((time.time() - start) * 1000)
response = (result.get("response") or "").replace("\n", " ")[:120]
print(f"generate=ok latency_ms={latency_ms} response={response}")
except urllib.error.HTTPError as exc:
body = exc.read().decode(errors="replace")[:200]
print("generate=fail", "HTTPError", exc.code, body)
except Exception as exc:
print("generate=fail", type(exc).__name__, str(exc)[:200])
PY