#!/bin/bash # ============================================================================= # WOOO AIOps - Offsite full sync verifier # 2026-05-19 ogt + Codex: full sync 後驗證 Google Drive/rclone 遠端仍符合 # latest-only:13 個 repo 都可列出,且 snapshots/ 只保留 1 份。 # # 規則: # - 只讀 Google Drive/rclone remote,不讀、不輸出 token 或 rclone.conf。 # - 預設印出人可讀報告;--write-textfile 會寫 node-exporter 指標。 # - full marker 未 fresh 時可執行,但結果會標示 verify_ok=0。 # ============================================================================= set -euo pipefail source "$(dirname "$0")/common.sh" SERVICE="offsite-full-sync-verify" PROVIDER="${OFFSITE_PROVIDER:-rclone}" RCLONE_REMOTE="${OFFSITE_RCLONE_REMOTE:-gdrive}" OFFSITE_REMOTE_ROOT_VALUE="${OFFSITE_REMOTE_ROOT:-${RCLONE_REMOTE}:awoooi-backups/restic}" OFFSITE_DIR="${BACKUP_BASE}/offsite" TEXTFILE_DIR="${NODE_EXPORTER_TEXTFILE_DIR:-/home/wooo/node_exporter_textfiles}" TEXTFILE_PATH="${TEXTFILE_DIR}/offsite_full_sync_verify.prom" HOST_LABEL="${AIOPS_HOST_LABEL:-110}" EXPECTED_REPOS="${OFFSITE_REPOS:-awoooi configs gitea harbor momo langfuse monitoring signoz open-webui clawbot sentry ai-artifacts public-routes}" MAX_AGE_HOURS="${OFFSITE_FULL_VERIFY_MAX_AGE_HOURS:-48}" WRITE_TEXTFILE=0 NO_COLOR=0 usage() { cat <<'USAGE' Usage: verify-offsite-full-sync.sh [--write-textfile] [--no-color] Checks: - Google Drive/rclone remote exists. - /backup/offsite/rclone-last-success is fresh. - Every expected remote restic repo has exactly one snapshots/ entry. This script never prints OAuth tokens, rclone.conf, restic passwords, or provider secrets. USAGE } while [ "$#" -gt 0 ]; do case "$1" in --write-textfile) WRITE_TEXTFILE=1 shift ;; --no-color) NO_COLOR=1 shift ;; -h|--help) usage exit 0 ;; *) echo "Unknown argument: $1" >&2 usage >&2 exit 2 ;; esac done if [ "${NO_COLOR}" = "1" ]; then green="" yellow="" red="" reset="" else green="$(printf '\033[32m')" yellow="$(printf '\033[33m')" red="$(printf '\033[31m')" reset="$(printf '\033[0m')" fi label_escape() { printf '%s' "$1" | sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' } remote_root() { printf '%s' "${OFFSITE_REMOTE_ROOT_VALUE}" } remote_repo_path() { local repo="$1" printf '%s/%s' "$(remote_root)" "${repo}" } marker_timestamp() { local path="$1" [ -f "${path}" ] || { echo 0 return } awk -F= '/^timestamp=/ {print int($2); found=1; exit} END {if (!found) print 0}' "${path}" 2>/dev/null || echo 0 } repo_count() { local count=0 for _repo in ${EXPECTED_REPOS}; do count=$((count + 1)) done echo "${count}" } low_priority() { if command -v ionice >/dev/null 2>&1; then ionice -c2 -n7 nice -n 10 "$@" else nice -n 10 "$@" fi } rclone_ready() { command -v rclone >/dev/null 2>&1 || return 1 rclone listremotes 2>/dev/null | grep -Fxq "${RCLONE_REMOTE}:" } count_remote_snapshots() { local repo="$1" local remote_snapshots local output remote_snapshots="$(remote_repo_path "${repo}")/snapshots" if ! output="$(low_priority timeout 60s rclone lsf "${remote_snapshots}" --files-only --max-depth 1 2>/dev/null)"; then echo -1 return 1 fi printf '%s\n' "${output}" | sed '/^[[:space:]]*$/d' | wc -l | tr -d ' ' } write_textfile() { local now="$1" local full_ts="$2" local full_age="$3" local full_fresh="$4" local verify_ok="$5" local failed="$6" local success_ts="$7" local success_age="$8" local success_fresh="$9" shift 9 local rows=("$@") local tmp local host local provider host="$(label_escape "${HOST_LABEL}")" provider="$(label_escape "${PROVIDER}")" install -d -m 755 "${TEXTFILE_DIR}" tmp="$(mktemp "${TEXTFILE_PATH}.tmp.XXXXXX")" { echo "# HELP awoooi_backup_offsite_full_verify_last_run_timestamp Unix timestamp of the last full offsite verification run." echo "# TYPE awoooi_backup_offsite_full_verify_last_run_timestamp gauge" echo "# HELP awoooi_backup_offsite_full_verify_last_success_timestamp Unix timestamp of the last successful full offsite verification run." echo "# TYPE awoooi_backup_offsite_full_verify_last_success_timestamp gauge" echo "# HELP awoooi_backup_offsite_full_verify_age_seconds Age of the last successful full offsite verification run." echo "# TYPE awoooi_backup_offsite_full_verify_age_seconds gauge" echo "# HELP awoooi_backup_offsite_full_verify_fresh Whether the last successful full offsite verification is within max_age_hours." echo "# TYPE awoooi_backup_offsite_full_verify_fresh gauge" echo "# HELP awoooi_backup_offsite_full_verify_last_run_failed Whether the latest full offsite verification run failed." echo "# TYPE awoooi_backup_offsite_full_verify_last_run_failed gauge" echo "# HELP awoooi_backup_offsite_remote_verify_ok Whether full offsite remote state currently matches latest-only expectations." echo "# TYPE awoooi_backup_offsite_remote_verify_ok gauge" echo "# HELP awoooi_backup_offsite_full_marker_fresh Whether the full offsite success marker is fresh." echo "# TYPE awoooi_backup_offsite_full_marker_fresh gauge" echo "# HELP awoooi_backup_offsite_remote_snapshot_count Count of remote restic snapshots for each repo." echo "# TYPE awoooi_backup_offsite_remote_snapshot_count gauge" echo "# HELP awoooi_backup_offsite_remote_snapshot_latest_only Whether the remote repo has exactly one snapshot." echo "# TYPE awoooi_backup_offsite_remote_snapshot_latest_only gauge" echo "awoooi_backup_offsite_full_verify_last_run_timestamp{host=\"${host}\",provider=\"${provider}\"} ${now}" echo "awoooi_backup_offsite_full_verify_last_success_timestamp{host=\"${host}\",provider=\"${provider}\"} ${success_ts}" echo "awoooi_backup_offsite_full_verify_age_seconds{host=\"${host}\",provider=\"${provider}\",max_age_hours=\"${MAX_AGE_HOURS}\"} ${success_age}" echo "awoooi_backup_offsite_full_verify_fresh{host=\"${host}\",provider=\"${provider}\",max_age_hours=\"${MAX_AGE_HOURS}\"} ${success_fresh}" echo "awoooi_backup_offsite_full_verify_last_run_failed{host=\"${host}\",provider=\"${provider}\"} ${failed}" echo "awoooi_backup_offsite_remote_verify_ok{host=\"${host}\",provider=\"${provider}\"} ${verify_ok}" echo "awoooi_backup_offsite_full_marker_fresh{host=\"${host}\",provider=\"${provider}\",max_age_hours=\"${MAX_AGE_HOURS}\"} ${full_fresh}" echo "awoooi_backup_offsite_full_marker_timestamp{host=\"${host}\",provider=\"${provider}\"} ${full_ts}" echo "awoooi_backup_offsite_full_marker_age_seconds{host=\"${host}\",provider=\"${provider}\",max_age_hours=\"${MAX_AGE_HOURS}\"} ${full_age}" for row in "${rows[@]}"; do IFS='|' read -r repo count ok <<<"${row}" repo="$(label_escape "${repo}")" echo "awoooi_backup_offsite_remote_snapshot_count{host=\"${host}\",provider=\"${provider}\",repo=\"${repo}\"} ${count}" echo "awoooi_backup_offsite_remote_snapshot_latest_only{host=\"${host}\",provider=\"${provider}\",repo=\"${repo}\"} ${ok}" done } >"${tmp}" mv "${tmp}" "${TEXTFILE_PATH}" chmod 0644 "${TEXTFILE_PATH}" } main() { local now local full_ts local full_age local full_fresh=0 local failed=0 local repo local count local ok local latest_only_ok=1 local verify_ok=0 local success_marker="${OFFSITE_DIR}/${PROVIDER}-full-verify-last-success" local success_ts local success_age local success_fresh=0 local rows=() now="$(date +%s)" full_ts="$(marker_timestamp "${OFFSITE_DIR}/${PROVIDER}-last-success")" full_age=0 if [ "${full_ts}" -gt 0 ]; then full_age=$((now - full_ts)) if [ "${full_age}" -le $((MAX_AGE_HOURS * 3600)) ]; then full_fresh=1 fi fi echo "AWOOOI offsite full sync verifier" date echo "PROVIDER=${PROVIDER}" echo "REMOTE_ROOT=$(remote_root)" echo "EXPECTED_REPO_COUNT=$(repo_count)" echo "WRITE_TEXTFILE=${WRITE_TEXTFILE}" echo if [ "${PROVIDER}" != "rclone" ]; then printf "%sBLOCKED%s unsupported provider for remote snapshot verification: %s\n" "${red}" "${reset}" "${PROVIDER}" failed=1 elif rclone_ready; then printf "%sOK%s rclone remote configured: %s:\n" "${green}" "${reset}" "${RCLONE_REMOTE}" else printf "%sBLOCKED%s rclone remote unavailable: %s:\n" "${red}" "${reset}" "${RCLONE_REMOTE}" failed=1 fi if [ "${full_fresh}" = "1" ]; then printf "%sOK%s full offsite marker fresh age=%ss\n" "${green}" "${reset}" "${full_age}" else printf "%sWARN%s full offsite marker missing or stale age=%ss\n" "${yellow}" "${reset}" "${full_age}" failed=1 fi echo echo "== remote snapshot counts ==" for repo in ${EXPECTED_REPOS}; do count="$(count_remote_snapshots "${repo}" || true)" ok=0 if [ "${count}" = "1" ]; then ok=1 printf "%sOK%s %s remote snapshots=%s\n" "${green}" "${reset}" "${repo}" "${count}" else latest_only_ok=0 failed=1 printf "%sWARN%s %s remote snapshots=%s expected=1\n" "${yellow}" "${reset}" "${repo}" "${count}" fi rows+=("${repo}|${count}|${ok}") done if [ "${failed}" -eq 0 ] && [ "${latest_only_ok}" -eq 1 ] && [ "${full_fresh}" = "1" ]; then verify_ok=1 install -d -m 750 "${OFFSITE_DIR}" cat >"${success_marker}" <