Files
awoooi/scripts/ops/backup-from-110.sh
Your Name 1a74286dfa
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
fix(awooop): mirror ops notifications through api
2026-05-12 14:43:09 +08:00

146 lines
6.0 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# =============================================================================
# backup-from-110.sh — 188 Host 層備份腳本(從 110 rsync 到 188
# =============================================================================
# 部署位置: /home/ollama/bin/backup-from-110.sh (188 上)
# 執行者: ollama (188 的主要帳號)
# Cron: 0 1 * * * /home/ollama/bin/backup-from-110.sh
#
# 備份項目:
# 1. Harbor registry data最高優先
# 2. Gitea repos
# 3. bitan-pharmacy git bare repo若存在
#
# 前提:
# - 188 的 ollama 帳號已加入 110 wooo 帳號的 authorized_keys
# - /backup/110/{harbor,gitea} 目錄已建立 (mkdir -p /backup/110/{harbor,gitea})
# - 188 磁碟空間足夠(建議 > 50GB 可用)
#
# 成功/失敗狀態:
# - 寫入 BACKUP_LAST_SUCCESS_TS 到 /var/run/backup-110.last_success
# - 失敗時傳送 Telegram 告警(若設定了 TG_BOT_TOKEN
#
# Sprint C ADR-069 (2026-04-11 Claude Sonnet 4.6 Asia/Taipei)
# =============================================================================
set -euo pipefail
BACKUP_ROOT="${BACKUP_ROOT:-/home/ollama/backup/110}"
LOG="${BACKUP_ROOT}/backup.log"
LAST_SUCCESS_FILE="${BACKUP_ROOT}/last_success"
TEXTFILE_DIR="${TEXTFILE_DIR:-/home/ollama/node_exporter_textfiles}"
TEXTFILE_PROM="${TEXTFILE_DIR}/backup.prom"
DATE=$(date +%Y%m%d-%H%M%S)
ERRORS=0
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
log() {
echo "[$DATE] $*" | tee -a "$LOG"
}
log "=== Starting backup from 110 ==="
notify_awoooi_ops() {
local status="$1"
local msg="$2"
local helper="${SCRIPT_DIR}/notify-awoooi-ops.sh"
[[ -x "$helper" ]] || return 1
AWOOI_OPS_ALERTNAME="HostBackupFailed" \
AWOOI_OPS_JOB_NAME="188 Host 層備份" \
AWOOI_OPS_STATUS="$status" \
AWOOI_OPS_SEVERITY="info" \
AWOOI_OPS_SOURCE="backup-from-110" \
AWOOI_OPS_COMPONENT="host-backup" \
AWOOI_OPS_SUMMARY="188 Host 層備份 ${status}" \
AWOOI_OPS_DETAIL="$msg" \
"$helper" >/dev/null
}
notify_telegram_fallback() {
local msg="$1"
local tg_token="${TG_BOT_TOKEN:-${TELEGRAM_BOT_TOKEN:-}}"
local tg_chat="${TELEGRAM_ALERT_CHAT_ID:-${SRE_GROUP_CHAT_ID:--1003711974679}}"
if [ -n "$tg_token" ] && [ -n "$tg_chat" ]; then
curl -s -X POST "https://api.telegram.org/bot${tg_token}/sendMessage" \
-d "chat_id=${tg_chat}" \
--data-urlencode "text=${msg}" \
> /dev/null || true
fi
}
notify_ops() {
local status="$1"
local msg="$2"
# 正式路徑:先交給 AWOOI API由 TelegramGateway 送出並鏡像到 AwoooP。
# 只有 API 不可達或 helper 未部署時,才使用 Telegram 直發救命旁路。
notify_awoooi_ops "$status" "$msg" && return 0
notify_telegram_fallback "$msg"
}
# ── Harbor registry data ──────────────────────────────────────────────────────
# 2026-04-17 ogt: 改用 docker socket 讀取 volumes/var/lib/docker/volumes/ 是 710 root:root
# wooo 是 docker group 成員,可透過 docker run 掛載 volume不可直接讀取 FS 路徑
log "Backing up Harbor registry..."
mkdir -p "${BACKUP_ROOT}/harbor"
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
wooo@192.168.0.110 \
"docker run --rm -v harbor_harbor-data:/source alpine tar czf - -C /source ." \
| tar xzf - -C "${BACKUP_ROOT}/harbor/" >> "$LOG" 2>&1; then
log "✅ Harbor backup OK"
else
log "❌ ERROR: Harbor backup failed"
ERRORS=$((ERRORS + 1))
fi
# ── Gitea repos ───────────────────────────────────────────────────────────────
log "Backing up Gitea repos..."
mkdir -p "${BACKUP_ROOT}/gitea"
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
wooo@192.168.0.110 \
"docker run --rm -v gitea_gitea-data:/source alpine tar czf - -C /source ." \
| tar xzf - -C "${BACKUP_ROOT}/gitea/" >> "$LOG" 2>&1; then
log "✅ Gitea backup OK"
else
log "❌ ERROR: Gitea backup failed"
ERRORS=$((ERRORS + 1))
fi
# ── bitan-pharmacy git bare repo (可選) ──────────────────────────────────────
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
wooo@192.168.0.110 "test -d /home/wooo/bitan-pharmacy.git" 2>/dev/null; then
log "Backing up bitan-pharmacy.git..."
if rsync -avz \
-e "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" \
wooo@192.168.0.110:/home/wooo/bitan-pharmacy.git/ \
${BACKUP_ROOT}/bitan-pharmacy.git/ >> "$LOG" 2>&1; then
log "✅ bitan-pharmacy.git backup OK"
else
log "⚠️ bitan-pharmacy.git backup failed (non-fatal)"
fi
else
log "⚠️ bitan-pharmacy.git not found on 110, skipping"
fi
# ── 結果處理 ─────────────────────────────────────────────────────────────────
if [ "$ERRORS" -eq 0 ]; then
TS=$(date +%s)
# 寫入純文字時間戳(舊格式,保留相容性)
echo "$TS" > "$LAST_SUCCESS_FILE"
# 寫入 Prometheus textfile 格式(供 node_exporter textfile collector 讀取)
# 2026-04-17 ogt: 修復 HostBackupFailed — absent(backup_110_last_success_timestamp) 永遠觸發
# 根因:只寫純文字檔,從未輸出 .prom 指標 → node_exporter 找不到 → Prometheus absent()=1
mkdir -p "$TEXTFILE_DIR"
cat > "$TEXTFILE_PROM" <<EOF
# HELP backup_110_last_success_timestamp Unix timestamp of last successful backup from 110
# TYPE backup_110_last_success_timestamp gauge
backup_110_last_success_timestamp $TS
EOF
log "=== Backup completed successfully (ts=$TS) ==="
exit 0
else
log "=== Backup FAILED ($ERRORS errors) ==="
notify_ops "failed" "🚨 backup-from-110.sh FAILED on 188 — ${ERRORS} error(s) at ${DATE}"
exit 1
fi