#!/bin/sh # ============================================================================= # 備份還原驗證 CronJob — ADR-074 M4 # ============================================================================= # 每週日 02:00 台北執行 Velero restore dry-run,驗證 PVC 快照可讀取。 # 失敗時寫入 Prometheus textfile metrics → 觸發 BackupRestoreTestFailed 告警。 # # Textfile 路徑:/var/lib/node_exporter/textfile_collector/backup_restore_test.prom # (由 node-exporter --collector.textfile.directory 掃描) # # 2026-04-12 ogt (ADR-074 M4) # ============================================================================= set -e TEXTFILE="/var/lib/node_exporter/textfile_collector/backup_restore_test.prom" NAMESPACE="${VELERO_NAMESPACE:-velero}" BACKUP_NAME="${VELERO_BACKUP_NAME:-awoooi-daily}" EXIT_CODE=0 echo "=== backup-restore-test: $(date '+%Y-%m-%d %H:%M:%S %Z') ===" echo "Backup: ${BACKUP_NAME} Namespace: ${NAMESPACE}" # --- Velero restore dry-run --- velero restore create \ --from-backup "${BACKUP_NAME}" \ --namespace-mappings "${NAMESPACE}:restore-test-dry" \ --dry-run \ --wait \ 2>&1 || EXIT_CODE=$? # --- 寫入 textfile metric --- # 注意:Prometheus textfile collector 不接受毫秒時間戳(13位),只接受秒(10位) # 直接省略 timestamp,由 node-exporter scrape 時自動補上 TS=$(date +%s) mkdir -p "$(dirname "${TEXTFILE}")" if [ "${EXIT_CODE}" -eq 0 ]; then echo "backup restore dry-run OK" cat > "${TEXTFILE}" < "${TEXTFILE}" <