Files
awoooi/scripts/ops/backup-from-110.sh
Your Name 95110971f3
Some checks failed
CD Pipeline / tests (push) Successful in 1m27s
Code Review / ai-code-review (push) Successful in 29s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
fix(telegram): close remaining DM alert routes
2026-04-30 23:02:17 +08:00

115 lines
5.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# =============================================================================
# backup-from-110.sh — 188 Host 層備份腳本(從 110 rsync 到 188
# =============================================================================
# 部署位置: /home/ollama/bin/backup-from-110.sh (188 上)
# 執行者: ollama (188 的主要帳號)
# Cron: 0 1 * * * /home/ollama/bin/backup-from-110.sh
#
# 備份項目:
# 1. Harbor registry data最高優先
# 2. Gitea repos
# 3. bitan-pharmacy git bare repo若存在
#
# 前提:
# - 188 的 ollama 帳號已加入 110 wooo 帳號的 authorized_keys
# - /backup/110/{harbor,gitea} 目錄已建立 (mkdir -p /backup/110/{harbor,gitea})
# - 188 磁碟空間足夠(建議 > 50GB 可用)
#
# 成功/失敗狀態:
# - 寫入 BACKUP_LAST_SUCCESS_TS 到 /var/run/backup-110.last_success
# - 失敗時傳送 Telegram 告警(若設定了 TG_BOT_TOKEN
#
# Sprint C ADR-069 (2026-04-11 Claude Sonnet 4.6 Asia/Taipei)
# =============================================================================
set -euo pipefail
BACKUP_ROOT="${BACKUP_ROOT:-/home/ollama/backup/110}"
LOG="${BACKUP_ROOT}/backup.log"
LAST_SUCCESS_FILE="${BACKUP_ROOT}/last_success"
TEXTFILE_DIR="${TEXTFILE_DIR:-/home/ollama/node_exporter_textfiles}"
TEXTFILE_PROM="${TEXTFILE_DIR}/backup.prom"
DATE=$(date +%Y%m%d-%H%M%S)
ERRORS=0
log() {
echo "[$DATE] $*" | tee -a "$LOG"
}
log "=== Starting backup from 110 ==="
# ── Harbor registry data ──────────────────────────────────────────────────────
# 2026-04-17 ogt: 改用 docker socket 讀取 volumes/var/lib/docker/volumes/ 是 710 root:root
# wooo 是 docker group 成員,可透過 docker run 掛載 volume不可直接讀取 FS 路徑
log "Backing up Harbor registry..."
mkdir -p "${BACKUP_ROOT}/harbor"
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
wooo@192.168.0.110 \
"docker run --rm -v harbor_harbor-data:/source alpine tar czf - -C /source ." \
| tar xzf - -C "${BACKUP_ROOT}/harbor/" >> "$LOG" 2>&1; then
log "✅ Harbor backup OK"
else
log "❌ ERROR: Harbor backup failed"
ERRORS=$((ERRORS + 1))
fi
# ── Gitea repos ───────────────────────────────────────────────────────────────
log "Backing up Gitea repos..."
mkdir -p "${BACKUP_ROOT}/gitea"
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
wooo@192.168.0.110 \
"docker run --rm -v gitea_gitea-data:/source alpine tar czf - -C /source ." \
| tar xzf - -C "${BACKUP_ROOT}/gitea/" >> "$LOG" 2>&1; then
log "✅ Gitea backup OK"
else
log "❌ ERROR: Gitea backup failed"
ERRORS=$((ERRORS + 1))
fi
# ── bitan-pharmacy git bare repo (可選) ──────────────────────────────────────
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
wooo@192.168.0.110 "test -d /home/wooo/bitan-pharmacy.git" 2>/dev/null; then
log "Backing up bitan-pharmacy.git..."
if rsync -avz \
-e "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" \
wooo@192.168.0.110:/home/wooo/bitan-pharmacy.git/ \
${BACKUP_ROOT}/bitan-pharmacy.git/ >> "$LOG" 2>&1; then
log "✅ bitan-pharmacy.git backup OK"
else
log "⚠️ bitan-pharmacy.git backup failed (non-fatal)"
fi
else
log "⚠️ bitan-pharmacy.git not found on 110, skipping"
fi
# ── 結果處理 ─────────────────────────────────────────────────────────────────
if [ "$ERRORS" -eq 0 ]; then
TS=$(date +%s)
# 寫入純文字時間戳(舊格式,保留相容性)
echo "$TS" > "$LAST_SUCCESS_FILE"
# 寫入 Prometheus textfile 格式(供 node_exporter textfile collector 讀取)
# 2026-04-17 ogt: 修復 HostBackupFailed — absent(backup_110_last_success_timestamp) 永遠觸發
# 根因:只寫純文字檔,從未輸出 .prom 指標 → node_exporter 找不到 → Prometheus absent()=1
mkdir -p "$TEXTFILE_DIR"
cat > "$TEXTFILE_PROM" <<EOF
# HELP backup_110_last_success_timestamp Unix timestamp of last successful backup from 110
# TYPE backup_110_last_success_timestamp gauge
backup_110_last_success_timestamp $TS
EOF
log "=== Backup completed successfully (ts=$TS) ==="
exit 0
else
log "=== Backup FAILED ($ERRORS errors) ==="
# Telegram 告警:正式目的地為 SRE 戰情室群組。
TG_TOKEN="${TG_BOT_TOKEN:-}"
TG_CHAT="${TELEGRAM_ALERT_CHAT_ID:-${SRE_GROUP_CHAT_ID:--1003711974679}}"
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-d "chat_id=${TG_CHAT}" \
-d "text=🚨 backup-from-110.sh FAILED on 188 — ${ERRORS} error(s) at ${DATE}" \
> /dev/null || true
fi
exit 1
fi