Files
awoooi/scripts/ops/ollama111-fallback-proxy-diagnose.sh

79 lines
2.5 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# Read-only diagnosis for ADR-110 local fallback:
# API Pod -> 110:11437 Nginx proxy -> 111:11434 Ollama allowlist proxy.
NAMESPACE="${NAMESPACE:-awoooi-prod}"
DEPLOYMENT="${DEPLOYMENT:-awoooi-api}"
PROXY_HOST="${PROXY_HOST:-wooo@192.168.0.110}"
LOCAL_HOST="${LOCAL_HOST:-ollama-111-gpu}"
LOCAL_IP="${LOCAL_IP:-192.168.0.111}"
PROXY_PORT="${PROXY_PORT:-11437}"
LOCAL_PORT="${LOCAL_PORT:-11434}"
SSH_TIMEOUT="${SSH_TIMEOUT:-8}"
CURL_TIMEOUT="${CURL_TIMEOUT:-5}"
section() {
printf '\n== %s ==\n' "$1"
}
run_local() {
set +e
"$@"
local rc=$?
set -e
printf 'exit_code=%s\n' "$rc"
}
section "Production health provider chain"
run_local curl -sS -m 12 https://awoooi.wooo.work/api/v1/health
section "API pod view"
set +e
kubectl -n "${NAMESPACE}" exec -i "deploy/${DEPLOYMENT}" -- sh -lc "
set +e
echo OLLAMA_URL=\$OLLAMA_URL
echo OLLAMA_SECONDARY_URL=\$OLLAMA_SECONDARY_URL
echo OLLAMA_FALLBACK_URL=\$OLLAMA_FALLBACK_URL
curl -sS -m ${CURL_TIMEOUT} -w '\nHTTP=%{http_code}\n' http://192.168.0.110:${PROXY_PORT}/api/tags | head -60
"
printf 'exit_code=%s\n' "$?"
set -e
section "110 proxy upstream reachability"
run_local ssh -o BatchMode=yes -o ConnectTimeout="${SSH_TIMEOUT}" "${PROXY_HOST}" "
set +e
echo route:
ip route get ${LOCAL_IP}
echo neigh:
ip neigh show ${LOCAL_IP}
echo ping:
ping -c 3 -W 2 ${LOCAL_IP}
echo direct_111:
curl -sS -m ${CURL_TIMEOUT} -w '\nHTTP=%{http_code}\n' http://${LOCAL_IP}:${LOCAL_PORT}/api/tags | head -60
echo proxy_11437:
curl -sS -m ${CURL_TIMEOUT} -w '\nHTTP=%{http_code}\n' http://127.0.0.1:${PROXY_PORT}/api/tags | head -60
echo nginx_recent_errors:
tail -20 /var/log/nginx/ollama-local-error.log 2>/dev/null || true
"
section "111 host direct SSH view"
run_local ssh -o BatchMode=yes -o ConnectTimeout="${SSH_TIMEOUT}" "${LOCAL_HOST}" "
set +e
hostname
date
curl -sS -m ${CURL_TIMEOUT} -w '\nHTTP=%{http_code}\n' http://127.0.0.1:${LOCAL_PORT}/api/tags | head -60
launchctl print gui/501/com.momo.ollama111-allow-proxy 2>/dev/null | head -60 || true
pmset -g custom 2>/dev/null | sed -n '1,80p' || true
"
cat <<'EOF'
Interpretation:
- If 110 shows "ip neigh INCOMPLETE", "Destination Host Unreachable", or "No route to host",
the 111 host or LAN path is down. Do not restart API or change provider order.
- If 110 can reach 111 directly but 11437 returns 502, inspect/reload the 110 Nginx proxy.
- If 111 direct SSH works but 127.0.0.1:11434 fails, recover the 111 Ollama/allowlist LaunchAgent.
EOF