115 lines
5.1 KiB
Bash
115 lines
5.1 KiB
Bash
#!/bin/bash
|
||
# =============================================================================
|
||
# backup-from-110.sh — 188 Host 層備份腳本(從 110 rsync 到 188)
|
||
# =============================================================================
|
||
# 部署位置: /home/ollama/bin/backup-from-110.sh (188 上)
|
||
# 執行者: ollama (188 的主要帳號)
|
||
# Cron: 0 1 * * * /home/ollama/bin/backup-from-110.sh
|
||
#
|
||
# 備份項目:
|
||
# 1. Harbor registry data(最高優先)
|
||
# 2. Gitea repos
|
||
# 3. bitan-pharmacy git bare repo(若存在)
|
||
#
|
||
# 前提:
|
||
# - 188 的 ollama 帳號已加入 110 wooo 帳號的 authorized_keys
|
||
# - /backup/110/{harbor,gitea} 目錄已建立 (mkdir -p /backup/110/{harbor,gitea})
|
||
# - 188 磁碟空間足夠(建議 > 50GB 可用)
|
||
#
|
||
# 成功/失敗狀態:
|
||
# - 寫入 BACKUP_LAST_SUCCESS_TS 到 /var/run/backup-110.last_success
|
||
# - 失敗時傳送 Telegram 告警(若設定了 TG_BOT_TOKEN)
|
||
#
|
||
# Sprint C ADR-069 (2026-04-11 Claude Sonnet 4.6 Asia/Taipei)
|
||
# =============================================================================
|
||
set -euo pipefail
|
||
|
||
BACKUP_ROOT="${BACKUP_ROOT:-/home/ollama/backup/110}"
|
||
LOG="${BACKUP_ROOT}/backup.log"
|
||
LAST_SUCCESS_FILE="${BACKUP_ROOT}/last_success"
|
||
TEXTFILE_DIR="${TEXTFILE_DIR:-/home/ollama/node_exporter_textfiles}"
|
||
TEXTFILE_PROM="${TEXTFILE_DIR}/backup.prom"
|
||
DATE=$(date +%Y%m%d-%H%M%S)
|
||
ERRORS=0
|
||
|
||
log() {
|
||
echo "[$DATE] $*" | tee -a "$LOG"
|
||
}
|
||
|
||
log "=== Starting backup from 110 ==="
|
||
|
||
# ── Harbor registry data ──────────────────────────────────────────────────────
|
||
# 2026-04-17 ogt: 改用 docker socket 讀取 volumes(/var/lib/docker/volumes/ 是 710 root:root)
|
||
# wooo 是 docker group 成員,可透過 docker run 掛載 volume,不可直接讀取 FS 路徑
|
||
log "Backing up Harbor registry..."
|
||
mkdir -p "${BACKUP_ROOT}/harbor"
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
|
||
wooo@192.168.0.110 \
|
||
"docker run --rm -v harbor_harbor-data:/source alpine tar czf - -C /source ." \
|
||
| tar xzf - -C "${BACKUP_ROOT}/harbor/" >> "$LOG" 2>&1; then
|
||
log "✅ Harbor backup OK"
|
||
else
|
||
log "❌ ERROR: Harbor backup failed"
|
||
ERRORS=$((ERRORS + 1))
|
||
fi
|
||
|
||
# ── Gitea repos ───────────────────────────────────────────────────────────────
|
||
log "Backing up Gitea repos..."
|
||
mkdir -p "${BACKUP_ROOT}/gitea"
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
|
||
wooo@192.168.0.110 \
|
||
"docker run --rm -v gitea_gitea-data:/source alpine tar czf - -C /source ." \
|
||
| tar xzf - -C "${BACKUP_ROOT}/gitea/" >> "$LOG" 2>&1; then
|
||
log "✅ Gitea backup OK"
|
||
else
|
||
log "❌ ERROR: Gitea backup failed"
|
||
ERRORS=$((ERRORS + 1))
|
||
fi
|
||
|
||
# ── bitan-pharmacy git bare repo (可選) ──────────────────────────────────────
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
|
||
wooo@192.168.0.110 "test -d /home/wooo/bitan-pharmacy.git" 2>/dev/null; then
|
||
log "Backing up bitan-pharmacy.git..."
|
||
if rsync -avz \
|
||
-e "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" \
|
||
wooo@192.168.0.110:/home/wooo/bitan-pharmacy.git/ \
|
||
${BACKUP_ROOT}/bitan-pharmacy.git/ >> "$LOG" 2>&1; then
|
||
log "✅ bitan-pharmacy.git backup OK"
|
||
else
|
||
log "⚠️ bitan-pharmacy.git backup failed (non-fatal)"
|
||
fi
|
||
else
|
||
log "⚠️ bitan-pharmacy.git not found on 110, skipping"
|
||
fi
|
||
|
||
# ── 結果處理 ─────────────────────────────────────────────────────────────────
|
||
if [ "$ERRORS" -eq 0 ]; then
|
||
TS=$(date +%s)
|
||
# 寫入純文字時間戳(舊格式,保留相容性)
|
||
echo "$TS" > "$LAST_SUCCESS_FILE"
|
||
# 寫入 Prometheus textfile 格式(供 node_exporter textfile collector 讀取)
|
||
# 2026-04-17 ogt: 修復 HostBackupFailed — absent(backup_110_last_success_timestamp) 永遠觸發
|
||
# 根因:只寫純文字檔,從未輸出 .prom 指標 → node_exporter 找不到 → Prometheus absent()=1
|
||
mkdir -p "$TEXTFILE_DIR"
|
||
cat > "$TEXTFILE_PROM" <<EOF
|
||
# HELP backup_110_last_success_timestamp Unix timestamp of last successful backup from 110
|
||
# TYPE backup_110_last_success_timestamp gauge
|
||
backup_110_last_success_timestamp $TS
|
||
EOF
|
||
log "=== Backup completed successfully (ts=$TS) ==="
|
||
exit 0
|
||
else
|
||
log "=== Backup FAILED ($ERRORS errors) ==="
|
||
|
||
# Telegram 告警:正式目的地為 SRE 戰情室群組。
|
||
TG_TOKEN="${TG_BOT_TOKEN:-}"
|
||
TG_CHAT="${TELEGRAM_ALERT_CHAT_ID:-${SRE_GROUP_CHAT_ID:--1003711974679}}"
|
||
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
|
||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||
-d "chat_id=${TG_CHAT}" \
|
||
-d "text=🚨 backup-from-110.sh FAILED on 188 — ${ERRORS} error(s) at ${DATE}" \
|
||
> /dev/null || true
|
||
fi
|
||
exit 1
|
||
fi
|