#!/bin/bash # ============================================================================= # backup-from-110.sh — 188 Host 層備份腳本(從 110 rsync 到 188) # ============================================================================= # 部署位置: /home/ollama/bin/backup-from-110.sh (188 上) # 執行者: ollama (188 的主要帳號) # Cron: 0 1 * * * /home/ollama/bin/backup-from-110.sh # # 備份項目: # 1. Harbor registry data(最高優先) # 2. Gitea repos # 3. bitan-pharmacy git bare repo(若存在) # # 前提: # - 188 的 ollama 帳號已加入 110 wooo 帳號的 authorized_keys # - /backup/110/{harbor,gitea} 目錄已建立 (mkdir -p /backup/110/{harbor,gitea}) # - 188 磁碟空間足夠(建議 > 50GB 可用) # # 成功/失敗狀態: # - 寫入 BACKUP_LAST_SUCCESS_TS 到 /var/run/backup-110.last_success # - 失敗時傳送 Telegram 告警(若設定了 TG_BOT_TOKEN) # # Sprint C ADR-069 (2026-04-11 Claude Sonnet 4.6 Asia/Taipei) # ============================================================================= set -euo pipefail BACKUP_ROOT="${BACKUP_ROOT:-/home/ollama/backup/110}" LOG="${BACKUP_ROOT}/backup.log" LAST_SUCCESS_FILE="${BACKUP_ROOT}/last_success" TEXTFILE_DIR="${TEXTFILE_DIR:-/home/ollama/node_exporter_textfiles}" TEXTFILE_PROM="${TEXTFILE_DIR}/backup.prom" DATE=$(date +%Y%m%d-%H%M%S) ERRORS=0 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" log() { echo "[$DATE] $*" | tee -a "$LOG" } log "=== Starting backup from 110 ===" notify_awoooi_ops() { local status="$1" local msg="$2" local helper="${SCRIPT_DIR}/notify-awoooi-ops.sh" [[ -x "$helper" ]] || return 1 AWOOI_OPS_ALERTNAME="HostBackupFailed" \ AWOOI_OPS_JOB_NAME="188 Host 層備份" \ AWOOI_OPS_STATUS="$status" \ AWOOI_OPS_SEVERITY="info" \ AWOOI_OPS_SOURCE="backup-from-110" \ AWOOI_OPS_COMPONENT="host-backup" \ AWOOI_OPS_SUMMARY="188 Host 層備份 ${status}" \ AWOOI_OPS_DETAIL="$msg" \ "$helper" >/dev/null } notify_telegram_fallback() { local msg="$1" local tg_token="${TG_BOT_TOKEN:-${TELEGRAM_BOT_TOKEN:-}}" local tg_chat="${SRE_GROUP_CHAT_ID:--1003711974679}" if [ -n "$tg_token" ] && [ -n "$tg_chat" ]; then curl -s -X POST "https://api.telegram.org/bot${tg_token}/sendMessage" \ -d "chat_id=${tg_chat}" \ --data-urlencode "text=${msg}" \ > /dev/null || true fi } notify_ops() { local status="$1" local msg="$2" # 正式路徑:先交給 AWOOI API,由 TelegramGateway 送出並鏡像到 AwoooP。 # 只有 API 不可達或 helper 未部署時,才使用 Telegram 直發救命旁路。 notify_awoooi_ops "$status" "$msg" && return 0 notify_telegram_fallback "$msg" } # ── Harbor registry data ────────────────────────────────────────────────────── # 2026-04-17 ogt: 改用 docker socket 讀取 volumes(/var/lib/docker/volumes/ 是 710 root:root) # wooo 是 docker group 成員,可透過 docker run 掛載 volume,不可直接讀取 FS 路徑 log "Backing up Harbor registry..." mkdir -p "${BACKUP_ROOT}/harbor" if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \ wooo@192.168.0.110 \ "docker run --rm -v harbor_harbor-data:/source alpine tar czf - -C /source ." \ | tar xzf - -C "${BACKUP_ROOT}/harbor/" >> "$LOG" 2>&1; then log "✅ Harbor backup OK" else log "❌ ERROR: Harbor backup failed" ERRORS=$((ERRORS + 1)) fi # ── Gitea repos ─────────────────────────────────────────────────────────────── log "Backing up Gitea repos..." mkdir -p "${BACKUP_ROOT}/gitea" if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \ wooo@192.168.0.110 \ "docker run --rm -v gitea_gitea-data:/source alpine tar czf - -C /source ." \ | tar xzf - -C "${BACKUP_ROOT}/gitea/" >> "$LOG" 2>&1; then log "✅ Gitea backup OK" else log "❌ ERROR: Gitea backup failed" ERRORS=$((ERRORS + 1)) fi # ── bitan-pharmacy git bare repo (可選) ────────────────────────────────────── if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \ wooo@192.168.0.110 "test -d /home/wooo/bitan-pharmacy.git" 2>/dev/null; then log "Backing up bitan-pharmacy.git..." if rsync -avz \ -e "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" \ wooo@192.168.0.110:/home/wooo/bitan-pharmacy.git/ \ ${BACKUP_ROOT}/bitan-pharmacy.git/ >> "$LOG" 2>&1; then log "✅ bitan-pharmacy.git backup OK" else log "⚠️ bitan-pharmacy.git backup failed (non-fatal)" fi else log "⚠️ bitan-pharmacy.git not found on 110, skipping" fi # ── 結果處理 ───────────────────────────────────────────────────────────────── if [ "$ERRORS" -eq 0 ]; then TS=$(date +%s) # 寫入純文字時間戳(舊格式,保留相容性) echo "$TS" > "$LAST_SUCCESS_FILE" # 寫入 Prometheus textfile 格式(供 node_exporter textfile collector 讀取) # 2026-04-17 ogt: 修復 HostBackupFailed — absent(backup_110_last_success_timestamp) 永遠觸發 # 根因:只寫純文字檔,從未輸出 .prom 指標 → node_exporter 找不到 → Prometheus absent()=1 mkdir -p "$TEXTFILE_DIR" cat > "$TEXTFILE_PROM" <