146 lines
6.0 KiB
Bash
146 lines
6.0 KiB
Bash
#!/bin/bash
|
||
# =============================================================================
|
||
# backup-from-110.sh — 188 Host 層備份腳本(從 110 rsync 到 188)
|
||
# =============================================================================
|
||
# 部署位置: /home/ollama/bin/backup-from-110.sh (188 上)
|
||
# 執行者: ollama (188 的主要帳號)
|
||
# Cron: 0 1 * * * /home/ollama/bin/backup-from-110.sh
|
||
#
|
||
# 備份項目:
|
||
# 1. Harbor registry data(最高優先)
|
||
# 2. Gitea repos
|
||
# 3. bitan-pharmacy git bare repo(若存在)
|
||
#
|
||
# 前提:
|
||
# - 188 的 ollama 帳號已加入 110 wooo 帳號的 authorized_keys
|
||
# - /backup/110/{harbor,gitea} 目錄已建立 (mkdir -p /backup/110/{harbor,gitea})
|
||
# - 188 磁碟空間足夠(建議 > 50GB 可用)
|
||
#
|
||
# 成功/失敗狀態:
|
||
# - 寫入 BACKUP_LAST_SUCCESS_TS 到 /var/run/backup-110.last_success
|
||
# - 失敗時傳送 Telegram 告警(若設定了 TG_BOT_TOKEN)
|
||
#
|
||
# Sprint C ADR-069 (2026-04-11 Claude Sonnet 4.6 Asia/Taipei)
|
||
# =============================================================================
|
||
set -euo pipefail
|
||
|
||
BACKUP_ROOT="${BACKUP_ROOT:-/home/ollama/backup/110}"
|
||
LOG="${BACKUP_ROOT}/backup.log"
|
||
LAST_SUCCESS_FILE="${BACKUP_ROOT}/last_success"
|
||
TEXTFILE_DIR="${TEXTFILE_DIR:-/home/ollama/node_exporter_textfiles}"
|
||
TEXTFILE_PROM="${TEXTFILE_DIR}/backup.prom"
|
||
DATE=$(date +%Y%m%d-%H%M%S)
|
||
ERRORS=0
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
||
log() {
|
||
echo "[$DATE] $*" | tee -a "$LOG"
|
||
}
|
||
|
||
log "=== Starting backup from 110 ==="
|
||
|
||
notify_awoooi_ops() {
|
||
local status="$1"
|
||
local msg="$2"
|
||
local helper="${SCRIPT_DIR}/notify-awoooi-ops.sh"
|
||
[[ -x "$helper" ]] || return 1
|
||
|
||
AWOOI_OPS_ALERTNAME="HostBackupFailed" \
|
||
AWOOI_OPS_JOB_NAME="188 Host 層備份" \
|
||
AWOOI_OPS_STATUS="$status" \
|
||
AWOOI_OPS_SEVERITY="info" \
|
||
AWOOI_OPS_SOURCE="backup-from-110" \
|
||
AWOOI_OPS_COMPONENT="host-backup" \
|
||
AWOOI_OPS_SUMMARY="188 Host 層備份 ${status}" \
|
||
AWOOI_OPS_DETAIL="$msg" \
|
||
"$helper" >/dev/null
|
||
}
|
||
|
||
notify_telegram_fallback() {
|
||
local msg="$1"
|
||
local tg_token="${TG_BOT_TOKEN:-${TELEGRAM_BOT_TOKEN:-}}"
|
||
local tg_chat="${SRE_GROUP_CHAT_ID:--1003711974679}"
|
||
if [ -n "$tg_token" ] && [ -n "$tg_chat" ]; then
|
||
curl -s -X POST "https://api.telegram.org/bot${tg_token}/sendMessage" \
|
||
-d "chat_id=${tg_chat}" \
|
||
--data-urlencode "text=${msg}" \
|
||
> /dev/null || true
|
||
fi
|
||
}
|
||
|
||
notify_ops() {
|
||
local status="$1"
|
||
local msg="$2"
|
||
|
||
# 正式路徑:先交給 AWOOI API,由 TelegramGateway 送出並鏡像到 AwoooP。
|
||
# 只有 API 不可達或 helper 未部署時,才使用 Telegram 直發救命旁路。
|
||
notify_awoooi_ops "$status" "$msg" && return 0
|
||
notify_telegram_fallback "$msg"
|
||
}
|
||
|
||
# ── Harbor registry data ──────────────────────────────────────────────────────
|
||
# 2026-04-17 ogt: 改用 docker socket 讀取 volumes(/var/lib/docker/volumes/ 是 710 root:root)
|
||
# wooo 是 docker group 成員,可透過 docker run 掛載 volume,不可直接讀取 FS 路徑
|
||
log "Backing up Harbor registry..."
|
||
mkdir -p "${BACKUP_ROOT}/harbor"
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
|
||
wooo@192.168.0.110 \
|
||
"docker run --rm -v harbor_harbor-data:/source alpine tar czf - -C /source ." \
|
||
| tar xzf - -C "${BACKUP_ROOT}/harbor/" >> "$LOG" 2>&1; then
|
||
log "✅ Harbor backup OK"
|
||
else
|
||
log "❌ ERROR: Harbor backup failed"
|
||
ERRORS=$((ERRORS + 1))
|
||
fi
|
||
|
||
# ── Gitea repos ───────────────────────────────────────────────────────────────
|
||
log "Backing up Gitea repos..."
|
||
mkdir -p "${BACKUP_ROOT}/gitea"
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
|
||
wooo@192.168.0.110 \
|
||
"docker run --rm -v gitea_gitea-data:/source alpine tar czf - -C /source ." \
|
||
| tar xzf - -C "${BACKUP_ROOT}/gitea/" >> "$LOG" 2>&1; then
|
||
log "✅ Gitea backup OK"
|
||
else
|
||
log "❌ ERROR: Gitea backup failed"
|
||
ERRORS=$((ERRORS + 1))
|
||
fi
|
||
|
||
# ── bitan-pharmacy git bare repo (可選) ──────────────────────────────────────
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
|
||
wooo@192.168.0.110 "test -d /home/wooo/bitan-pharmacy.git" 2>/dev/null; then
|
||
log "Backing up bitan-pharmacy.git..."
|
||
if rsync -avz \
|
||
-e "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" \
|
||
wooo@192.168.0.110:/home/wooo/bitan-pharmacy.git/ \
|
||
${BACKUP_ROOT}/bitan-pharmacy.git/ >> "$LOG" 2>&1; then
|
||
log "✅ bitan-pharmacy.git backup OK"
|
||
else
|
||
log "⚠️ bitan-pharmacy.git backup failed (non-fatal)"
|
||
fi
|
||
else
|
||
log "⚠️ bitan-pharmacy.git not found on 110, skipping"
|
||
fi
|
||
|
||
# ── 結果處理 ─────────────────────────────────────────────────────────────────
|
||
if [ "$ERRORS" -eq 0 ]; then
|
||
TS=$(date +%s)
|
||
# 寫入純文字時間戳(舊格式,保留相容性)
|
||
echo "$TS" > "$LAST_SUCCESS_FILE"
|
||
# 寫入 Prometheus textfile 格式(供 node_exporter textfile collector 讀取)
|
||
# 2026-04-17 ogt: 修復 HostBackupFailed — absent(backup_110_last_success_timestamp) 永遠觸發
|
||
# 根因:只寫純文字檔,從未輸出 .prom 指標 → node_exporter 找不到 → Prometheus absent()=1
|
||
mkdir -p "$TEXTFILE_DIR"
|
||
cat > "$TEXTFILE_PROM" <<EOF
|
||
# HELP backup_110_last_success_timestamp Unix timestamp of last successful backup from 110
|
||
# TYPE backup_110_last_success_timestamp gauge
|
||
backup_110_last_success_timestamp $TS
|
||
EOF
|
||
log "=== Backup completed successfully (ts=$TS) ==="
|
||
exit 0
|
||
else
|
||
log "=== Backup FAILED ($ERRORS errors) ==="
|
||
notify_ops "failed" "🚨 backup-from-110.sh FAILED on 188 — ${ERRORS} error(s) at ${DATE}"
|
||
exit 1
|
||
fi
|