Files
awoooi/scripts/backup/backup-ai-artifacts.sh
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

130 lines
5.4 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# =============================================================================
# WOOO AIOps - AI 工具與模型 manifest 備份
# 2026-05-06 ogt + Codex: 補齊 188 Ollama / AI tooling metadata backup。
#
# 安全原則:
# - 每日只備份模型清單、manifest、Modelfile 與工具狀態證據。
# - 不預設備份 /home/ollama/.ollama/models/blobs避免每日拉 10GB+
# 可重新下載模型;自製或不可重下的 blobs 需先人工標記後另做 offsite。
# - 所有輸出只進 encrypted restic repo不把 Secret 值印到 log。
# =============================================================================
set -euo pipefail
source "$(dirname "$0")/common.sh"
SERVICE="ai-artifacts"
LOCAL_REPO="${BACKUP_BASE}/ai-artifacts"
DUMP_DIR="/tmp/ai-artifacts-backup-$$"
REMOTE_HOST="${AI_ARTIFACTS_REMOTE_HOST:-ollama@192.168.0.188}"
SSH_OPTS=(-o BatchMode=yes -o ConnectTimeout=8)
cleanup() {
rm -rf "${DUMP_DIR}"
}
low_priority() {
if command -v ionice >/dev/null 2>&1; then
ionice -c2 -n7 nice -n 10 "$@"
else
nice -n 10 "$@"
fi
}
capture_remote_cmd() {
local label="$1"
local cmd="$2"
if ssh "${SSH_OPTS[@]}" "${REMOTE_HOST}" "${cmd}" > "${DUMP_DIR}/${label}.txt" 2>&1; then
log_success "AI artifacts 盤點完成: ${label}"
else
log_warn "AI artifacts 盤點失敗: ${label}"
return 1
fi
}
main() {
local start_time
local timestamp
local failed=0
start_time=$(date +%s)
timestamp=$(date "+%Y%m%d_%H%M%S")
trap cleanup EXIT
install -d -m 700 "${DUMP_DIR}"
log_info "========== 開始 AI artifacts 備份 (${timestamp}) =========="
capture_remote_cmd "188-ollama-version" "ollama --version" || true
capture_remote_cmd "188-ollama-list" "ollama list" || failed=$((failed + 1))
capture_remote_cmd "188-ollama-ps" "ollama ps" || true
capture_remote_cmd "188-ollama-manifest-inventory" "find /home/ollama/.ollama/models/manifests -type f -printf '%P\t%s\t%TY-%Tm-%Td %TH:%TM:%TS\n' | sort" || failed=$((failed + 1))
capture_remote_cmd "188-ollama-manifest-sha256" "cd /home/ollama/.ollama/models/manifests && find . -type f -print0 | sort -z | xargs -0 sha256sum" || failed=$((failed + 1))
capture_remote_cmd "188-ollama-blob-summary" "find /home/ollama/.ollama/models/blobs -type f -printf '%s\n' 2>/dev/null | awk 'BEGIN{count=0;bytes=0}{count++;bytes+=\$1}END{printf \"blob_count=%d\\nblob_bytes=%d\\n\", count, bytes}'" || true
capture_remote_cmd "188-ai-containers" "docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}' | grep -Ei 'ollama|open-webui|litellm|openclaw|clawbot|langfuse|n8n' || true" || true
log_info "匯出 Ollama manifest tree不含 blobs"
if ssh "${SSH_OPTS[@]}" "${REMOTE_HOST}" "tar czf - -C /home/ollama/.ollama/models manifests 2>/dev/null" > "${DUMP_DIR}/ollama-manifests_${timestamp}.tar.gz"; then
log_success "Ollama manifests 備份完成 ($(du -h "${DUMP_DIR}/ollama-manifests_${timestamp}.tar.gz" | cut -f1))"
else
log_error "Ollama manifests 備份失敗"
failed=$((failed + 1))
fi
log_info "匯出 Ollama Modelfile 摘要"
ssh "${SSH_OPTS[@]}" "${REMOTE_HOST}" 'set -euo pipefail
tmp="$(mktemp -d)"
trap "rm -rf \"$tmp\"" EXIT
ollama list 2>/dev/null | awk "NR>1 {print \$1}" | while read -r model; do
safe="$(printf "%s" "$model" | tr "/:" "__")"
ollama show "$model" --modelfile > "$tmp/${safe}.Modelfile" 2>&1 || true
done
tar czf - -C "$tmp" .
' > "${DUMP_DIR}/ollama-modelfiles_${timestamp}.tar.gz" 2>"${DUMP_DIR}/ollama-modelfiles_${timestamp}.stderr" || log_warn "Ollama Modelfile 匯出部分失敗"
cat > "${DUMP_DIR}/backup-manifest.txt" <<EOF
service=ai-artifacts
timestamp=${timestamp}
remote_host=${REMOTE_HOST}
contains=ollama_list,ollama_ps,ollama_manifests,ollama_modelfiles,ai_container_inventory
blob_policy=manifest_only_no_model_blobs_by_default
failed_components=${failed}
EOF
if [ ! -d "${LOCAL_REPO}/data" ]; then
log_info "初始化 Restic 倉庫: ${LOCAL_REPO}"
low_priority restic -r "${LOCAL_REPO}" init --password-file "${RESTIC_PASSWORD_FILE}" 2>&1
fi
log_info "建立 AI artifacts Restic 備份..."
local tags
tags=$(build_tags "${SERVICE}")
low_priority restic -r "${LOCAL_REPO}" backup "${DUMP_DIR}" \
--password-file "${RESTIC_PASSWORD_FILE}" \
${tags} \
--tag "scope:ai-artifacts" \
--tag "contains:ollama-manifests-no-blobs" 2>&1
local snapshot_id
snapshot_id=$(restic -r "${LOCAL_REPO}" snapshots --latest 1 --json \
--password-file "${RESTIC_PASSWORD_FILE}" 2>/dev/null | \
python3 -c 'import json,sys; rows=json.load(sys.stdin); print(rows[-1].get("short_id","unknown") if rows else "unknown")' 2>/dev/null || echo "unknown")
log_success "AI artifacts Restic 備份完成: ${snapshot_id}"
cleanup_old_backups "${LOCAL_REPO}"
local duration
duration=$(($(date +%s) - start_time))
if [ "${failed}" -eq 0 ]; then
log_success "========== AI artifacts 備份完成 (${duration}s) =========="
notify_clawbot "success" "${SERVICE}" "AI artifacts 備份完成" "${duration}"
else
log_error "========== AI artifacts 備份有 ${failed} 個必要項目失敗 (${duration}s) =========="
notify_clawbot "failed" "${SERVICE}" "AI artifacts 備份有 ${failed} 個必要項目失敗" "${duration}"
fi
return "${failed}"
}
main "$@"