89 lines
3.0 KiB
Bash
Executable File
89 lines
3.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
NAMESPACE="${NAMESPACE:-awoooi-prod}"
|
||
DEPLOYMENT="${DEPLOYMENT:-awoooi-api}"
|
||
MODEL="${MODEL:-gemma3:4b}"
|
||
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-60}"
|
||
|
||
kubectl -n "${NAMESPACE}" exec -i "deploy/${DEPLOYMENT}" -- \
|
||
env CHECK_MODEL="${MODEL}" CHECK_TIMEOUT_SECONDS="${TIMEOUT_SECONDS}" python - <<'PY'
|
||
import json
|
||
import os
|
||
import time
|
||
import urllib.error
|
||
import urllib.request
|
||
|
||
model = os.environ["CHECK_MODEL"]
|
||
timeout = int(os.environ["CHECK_TIMEOUT_SECONDS"])
|
||
|
||
endpoints = [
|
||
("primary", os.environ.get("OLLAMA_URL", "")),
|
||
("secondary", os.environ.get("OLLAMA_SECONDARY_URL", "")),
|
||
("fallback", os.environ.get("OLLAMA_FALLBACK_URL", "")),
|
||
]
|
||
|
||
print(f"model={model} timeout={timeout}s")
|
||
|
||
def request_json(url: str, path: str, payload=None, timeout_seconds=10):
|
||
data = None
|
||
headers = {}
|
||
if payload is not None:
|
||
data = json.dumps(payload).encode()
|
||
headers["Content-Type"] = "application/json"
|
||
req = urllib.request.Request(url.rstrip("/") + path, data=data, headers=headers)
|
||
with urllib.request.urlopen(req, timeout=timeout_seconds) as response:
|
||
return json.loads(response.read().decode())
|
||
|
||
for label, url in endpoints:
|
||
print(f"\n== {label}: {url or '<missing>'} ==")
|
||
if not url:
|
||
print("status=missing")
|
||
continue
|
||
|
||
try:
|
||
tags = request_json(url, "/api/tags", timeout_seconds=10)
|
||
names = sorted(m.get("name", "") for m in tags.get("models", []))
|
||
print("tags=ok", ",".join(names[:12]))
|
||
except Exception as exc:
|
||
print("tags=fail", type(exc).__name__, str(exc)[:160])
|
||
continue
|
||
|
||
try:
|
||
ps = request_json(url, "/api/ps", timeout_seconds=10)
|
||
live = ps.get("models", [])
|
||
if not live:
|
||
print("ps=ok live_models=<none>")
|
||
for item in live:
|
||
print(
|
||
"ps=ok",
|
||
f"model={item.get('model')}",
|
||
f"expires={item.get('expires_at')}",
|
||
f"size_vram={item.get('size_vram')}",
|
||
f"context={item.get('context_length')}",
|
||
)
|
||
if item.get("size_vram") == 0:
|
||
print("warning=cpu_only_or_no_vram")
|
||
except Exception as exc:
|
||
print("ps=fail", type(exc).__name__, str(exc)[:160])
|
||
|
||
payload = {
|
||
"model": model,
|
||
"prompt": "用繁體中文用一行回答:Ollama health check",
|
||
"stream": False,
|
||
"keep_alive": "8h",
|
||
"options": {"num_predict": 32, "temperature": 0.1},
|
||
}
|
||
start = time.time()
|
||
try:
|
||
result = request_json(url, "/api/generate", payload, timeout_seconds=timeout)
|
||
latency_ms = round((time.time() - start) * 1000)
|
||
response = (result.get("response") or "").replace("\n", " ")[:120]
|
||
print(f"generate=ok latency_ms={latency_ms} response={response}")
|
||
except urllib.error.HTTPError as exc:
|
||
body = exc.read().decode(errors="replace")[:200]
|
||
print("generate=fail", "HTTPError", exc.code, body)
|
||
except Exception as exc:
|
||
print("generate=fail", type(exc).__name__, str(exc)[:200])
|
||
PY
|