Files
awoooi/scripts/cron_backup_restore_test.sh
OG T a28625f088
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
fix(cr): 首席架構師 CR P0/P1/P2 全修補
P0-1: incident_service.py — 刪除 classify_alert_early 死碼 L131-132
P0-2: cron_backup_restore_test.sh — date +%s%3N→+%s,修正毫秒時間戳
P1-2: gitea_webhook.py — fingerprint 移除 sha_short,收斂同 branch 失敗
heartbeat: 還原原始空格對齊格式(統帥要求原本怎樣就怎樣)

P1-1(積木化)/P1-3(TYPE-4)/P2-1(timeZone)/P2-2(IP)/P2-3(WS重連) 待後續處理

2026-04-12 ogt
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-12 16:10:46 +08:00

63 lines
2.4 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/sh
# =============================================================================
# 備份還原驗證 CronJob — ADR-074 M4
# =============================================================================
# 每週日 02:00 台北執行 Velero restore dry-run驗證 PVC 快照可讀取。
# 失敗時寫入 Prometheus textfile metrics → 觸發 BackupRestoreTestFailed 告警。
#
# Textfile 路徑:/var/lib/node_exporter/textfile_collector/backup_restore_test.prom
# (由 node-exporter --collector.textfile.directory 掃描)
#
# 2026-04-12 ogt (ADR-074 M4)
# =============================================================================
set -e
TEXTFILE="/var/lib/node_exporter/textfile_collector/backup_restore_test.prom"
NAMESPACE="${VELERO_NAMESPACE:-velero}"
BACKUP_NAME="${VELERO_BACKUP_NAME:-awoooi-daily}"
EXIT_CODE=0
echo "=== backup-restore-test: $(date '+%Y-%m-%d %H:%M:%S %Z') ==="
echo "Backup: ${BACKUP_NAME} Namespace: ${NAMESPACE}"
# --- Velero restore dry-run ---
velero restore create \
--from-backup "${BACKUP_NAME}" \
--namespace-mappings "${NAMESPACE}:restore-test-dry" \
--dry-run \
--wait \
2>&1 || EXIT_CODE=$?
# --- 寫入 textfile metric ---
# 注意Prometheus textfile collector 不接受毫秒時間戳13位只接受秒10位
# 直接省略 timestamp由 node-exporter scrape 時自動補上
TS=$(date +%s)
mkdir -p "$(dirname "${TEXTFILE}")"
if [ "${EXIT_CODE}" -eq 0 ]; then
echo "backup restore dry-run OK"
cat > "${TEXTFILE}" <<PROM
# HELP awoooi_backup_restore_test_success 1 = last backup restore dry-run succeeded
# TYPE awoooi_backup_restore_test_success gauge
awoooi_backup_restore_test_success 1
# HELP awoooi_backup_restore_test_timestamp_seconds Unix timestamp of last test run
# TYPE awoooi_backup_restore_test_timestamp_seconds gauge
awoooi_backup_restore_test_timestamp_seconds ${TS}
PROM
echo "Textfile written: success"
exit 0
else
echo "backup restore dry-run FAILED (exit ${EXIT_CODE})"
cat > "${TEXTFILE}" <<PROM
# HELP awoooi_backup_restore_test_success 1 = last backup restore dry-run succeeded
# TYPE awoooi_backup_restore_test_success gauge
awoooi_backup_restore_test_success 0
# HELP awoooi_backup_restore_test_timestamp_seconds Unix timestamp of last test run
# TYPE awoooi_backup_restore_test_timestamp_seconds gauge
awoooi_backup_restore_test_timestamp_seconds ${TS}
PROM
echo "Textfile written: failure"
exit 1
fi