278 lines
11 KiB
Bash
Executable File
278 lines
11 KiB
Bash
Executable File
#!/bin/bash
|
||
# =============================================================================
|
||
# WOOO AIOps - Sentry 專屬資料層備份
|
||
# 2026-05-06 ogt + Codex: dirty reboot 後補齊 Sentry Postgres /
|
||
# ClickHouse / Kafka / Redis / SeaweedFS / Taskbroker state backup。
|
||
#
|
||
# 安全原則:
|
||
# - 只做 dump / volume snapshot / restic backup,不停止正式服務、不還原資料。
|
||
# - pg_dumpall 可能包含 role hash;所有輸出只進 encrypted restic repo。
|
||
# - 不把 Secret 值、DB dump 內容或 credentials 印到 log。
|
||
# =============================================================================
|
||
|
||
set -euo pipefail
|
||
|
||
source "$(dirname "$0")/common.sh"
|
||
|
||
SERVICE="sentry"
|
||
LOCAL_REPO="${BACKUP_BASE}/sentry"
|
||
DUMP_DIR="/tmp/sentry-backup-$$"
|
||
|
||
POSTGRES_CONTAINER="${SENTRY_POSTGRES_CONTAINER:-sentry-self-hosted-postgres-1}"
|
||
CLICKHOUSE_CONTAINER="${SENTRY_CLICKHOUSE_CONTAINER:-sentry-self-hosted-clickhouse-1}"
|
||
KAFKA_CONTAINER="${SENTRY_KAFKA_CONTAINER:-sentry-self-hosted-kafka-1}"
|
||
REDIS_CONTAINER="${SENTRY_REDIS_CONTAINER:-sentry-self-hosted-redis-1}"
|
||
SENTRY_DIR="${SENTRY_DIR:-/opt/sentry}"
|
||
|
||
cleanup() {
|
||
rm -rf "${DUMP_DIR}" 2>/dev/null || true
|
||
if [ -d "${DUMP_DIR}" ] && command -v docker >/dev/null 2>&1; then
|
||
docker run --rm \
|
||
-v "$(dirname "${DUMP_DIR}"):/hosttmp" \
|
||
alpine rm -rf "/hosttmp/$(basename "${DUMP_DIR}")" >/dev/null 2>&1 || true
|
||
fi
|
||
return 0
|
||
}
|
||
|
||
low_priority() {
|
||
if command -v ionice >/dev/null 2>&1; then
|
||
ionice -c2 -n7 nice -n 10 "$@"
|
||
else
|
||
nice -n 10 "$@"
|
||
fi
|
||
}
|
||
|
||
container_exists() {
|
||
docker inspect "$1" >/dev/null 2>&1
|
||
}
|
||
|
||
volume_exists() {
|
||
docker volume inspect "$1" >/dev/null 2>&1
|
||
}
|
||
|
||
backup_volume() {
|
||
local volume_name="$1"
|
||
local output_file="$2"
|
||
local label="$3"
|
||
local required="${4:-required}"
|
||
shift 4 || true
|
||
local tar_args=("$@")
|
||
|
||
if ! volume_exists "${volume_name}"; then
|
||
if [ "${required}" = "required" ]; then
|
||
log_error "Sentry ${label} volume 不存在: ${volume_name}"
|
||
return 1
|
||
fi
|
||
log_warn "Sentry ${label} volume 不存在,略過: ${volume_name}"
|
||
return 0
|
||
fi
|
||
|
||
log_info "備份 Sentry volume: ${label} (${volume_name})"
|
||
low_priority docker run --rm \
|
||
--cpus="${BACKUP_DOCKER_CPUS}" \
|
||
--memory="${BACKUP_DOCKER_MEMORY}" \
|
||
--memory-swap="${BACKUP_DOCKER_MEMORY_SWAP}" \
|
||
-v "${volume_name}:/data:ro" \
|
||
alpine \
|
||
tar czf - "${tar_args[@]}" /data 2>"${output_file}.stderr" > "${output_file}" || true
|
||
|
||
if [ -s "${output_file}" ]; then
|
||
local size
|
||
size=$(du -h "${output_file}" | cut -f1)
|
||
log_success " Sentry ${label} volume 備份完成 (${size})"
|
||
return 0
|
||
fi
|
||
|
||
if [ "${required}" = "required" ]; then
|
||
log_error " Sentry ${label} volume 備份失敗或為空"
|
||
return 1
|
||
fi
|
||
log_warn " Sentry ${label} volume 備份為空,略過"
|
||
return 0
|
||
}
|
||
|
||
backup_volume_tree() {
|
||
local volume_name="$1"
|
||
local output_dir="$2"
|
||
local label="$3"
|
||
local required="${4:-required}"
|
||
shift 4 || true
|
||
local tar_args=("$@")
|
||
|
||
if ! volume_exists "${volume_name}"; then
|
||
if [ "${required}" = "required" ]; then
|
||
log_error "Sentry ${label} volume 不存在: ${volume_name}"
|
||
return 1
|
||
fi
|
||
log_warn "Sentry ${label} volume 不存在,略過: ${volume_name}"
|
||
return 0
|
||
fi
|
||
|
||
local host_uid
|
||
local host_gid
|
||
host_uid="$(id -u)"
|
||
host_gid="$(id -g)"
|
||
|
||
log_info "備份 Sentry volume tree: ${label} (${volume_name})"
|
||
install -d -m 700 "${output_dir}/data"
|
||
if low_priority docker run --rm \
|
||
--cpus="${BACKUP_DOCKER_CPUS}" \
|
||
--memory="${BACKUP_DOCKER_MEMORY}" \
|
||
--memory-swap="${BACKUP_DOCKER_MEMORY_SWAP}" \
|
||
-e "HOST_UID=${host_uid}" \
|
||
-e "HOST_GID=${host_gid}" \
|
||
-v "${volume_name}:/data:ro" \
|
||
-v "${output_dir}/data:/out" \
|
||
alpine sh -c 'cd /data && tar cf - "$@" . | tar xf - -C /out && chown -R "${HOST_UID}:${HOST_GID}" /out && chmod -R u+rwX,go-rwx /out' sh "${tar_args[@]}" \
|
||
> "${output_dir}/copy.stdout" 2>"${output_dir}/copy.stderr"; then
|
||
if find "${output_dir}/data" -mindepth 1 -print -quit | grep -q .; then
|
||
local size
|
||
size=$(du -sh "${output_dir}/data" | cut -f1)
|
||
log_success " Sentry ${label} volume tree 備份完成 (${size})"
|
||
return 0
|
||
fi
|
||
fi
|
||
|
||
if [ "${required}" = "required" ]; then
|
||
log_error " Sentry ${label} volume tree 備份失敗或為空"
|
||
return 1
|
||
fi
|
||
log_warn " Sentry ${label} volume tree 備份為空,略過"
|
||
return 0
|
||
}
|
||
|
||
capture_cmd() {
|
||
local label="$1"
|
||
shift
|
||
if "$@" > "${DUMP_DIR}/${label}.txt" 2>&1; then
|
||
log_success "Sentry 盤點完成: ${label}"
|
||
else
|
||
log_warn "Sentry 盤點失敗: ${label}"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
main() {
|
||
local start_time
|
||
local timestamp
|
||
local failed=0
|
||
start_time=$(date +%s)
|
||
timestamp=$(date "+%Y%m%d_%H%M%S")
|
||
|
||
trap cleanup EXIT
|
||
install -d -m 700 "${DUMP_DIR}"
|
||
|
||
log_info "========== 開始 Sentry 專屬資料層備份 (${timestamp}) =========="
|
||
|
||
capture_cmd "docker-containers" docker ps --filter "name=sentry-self-hosted" --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}' || true
|
||
capture_cmd "docker-volumes" docker volume ls --format '{{.Name}}' || true
|
||
|
||
if [ -d "${SENTRY_DIR}" ]; then
|
||
log_info "封存 Sentry compose/config 證據"
|
||
tar \
|
||
--exclude="*/clickhouse/store" \
|
||
--exclude="*/.git" \
|
||
--exclude="*/logs" \
|
||
--exclude="*/data" \
|
||
-czf "${DUMP_DIR}/sentry-config_${timestamp}.tar.gz" \
|
||
-C "$(dirname "${SENTRY_DIR}")" "$(basename "${SENTRY_DIR}")" \
|
||
2>"${DUMP_DIR}/sentry-config_${timestamp}.tar.stderr" || true
|
||
[ -s "${DUMP_DIR}/sentry-config_${timestamp}.tar.gz" ] || log_warn "Sentry config tar 為空或失敗"
|
||
else
|
||
log_warn "找不到 Sentry 目錄: ${SENTRY_DIR}"
|
||
fi
|
||
|
||
if container_exists "${POSTGRES_CONTAINER}"; then
|
||
log_info "匯出 Sentry Postgres logical dump"
|
||
if docker exec "${POSTGRES_CONTAINER}" pg_dumpall -U postgres 2>"${DUMP_DIR}/postgres_${timestamp}.stderr" | low_priority gzip -9 > "${DUMP_DIR}/postgres_${timestamp}.sql.gz"; then
|
||
log_success "Sentry Postgres dump 完成 ($(du -h "${DUMP_DIR}/postgres_${timestamp}.sql.gz" | cut -f1))"
|
||
else
|
||
log_error "Sentry Postgres dump 失敗"
|
||
failed=$((failed + 1))
|
||
fi
|
||
else
|
||
log_error "Sentry Postgres container 不存在: ${POSTGRES_CONTAINER}"
|
||
failed=$((failed + 1))
|
||
fi
|
||
|
||
if container_exists "${CLICKHOUSE_CONTAINER}"; then
|
||
docker exec "${CLICKHOUSE_CONTAINER}" clickhouse-client -q "SHOW DATABASES" > "${DUMP_DIR}/clickhouse_databases_${timestamp}.txt" 2>&1 || true
|
||
docker exec "${CLICKHOUSE_CONTAINER}" clickhouse-client -q \
|
||
"SELECT database, name, total_rows, total_bytes FROM system.tables WHERE database NOT IN ('system','INFORMATION_SCHEMA','information_schema') ORDER BY database, name FORMAT TSV" \
|
||
> "${DUMP_DIR}/clickhouse_tables_${timestamp}.tsv" 2>&1 || true
|
||
else
|
||
log_warn "Sentry ClickHouse container 不存在,仍嘗試 volume snapshot: ${CLICKHOUSE_CONTAINER}"
|
||
fi
|
||
|
||
if container_exists "${REDIS_CONTAINER}"; then
|
||
log_info "觸發 Sentry Redis SAVE 以刷新 dump.rdb"
|
||
docker exec "${REDIS_CONTAINER}" redis-cli SAVE >/dev/null 2>&1 || log_warn "Redis SAVE 失敗,仍繼續 volume snapshot"
|
||
fi
|
||
|
||
if container_exists "${KAFKA_CONTAINER}"; then
|
||
docker exec "${KAFKA_CONTAINER}" bash -lc \
|
||
"find /var/lib/kafka -maxdepth 2 -type f | sed 's#^#/##' | head -200" \
|
||
> "${DUMP_DIR}/kafka_file_sample_${timestamp}.txt" 2>&1 || true
|
||
fi
|
||
|
||
backup_volume_tree "sentry-clickhouse" "${DUMP_DIR}/volumes/clickhouse" "ClickHouse" "required" --exclude=./tmp || failed=$((failed + 1))
|
||
backup_volume_tree "sentry-kafka" "${DUMP_DIR}/volumes/kafka" "Kafka queue" "required" || failed=$((failed + 1))
|
||
backup_volume_tree "sentry-redis" "${DUMP_DIR}/volumes/redis" "Redis" "required" || failed=$((failed + 1))
|
||
backup_volume_tree "sentry-seaweedfs" "${DUMP_DIR}/volumes/seaweedfs" "SeaweedFS attachments" "required" || failed=$((failed + 1))
|
||
backup_volume_tree "sentry-self-hosted_sentry-taskbroker" "${DUMP_DIR}/volumes/taskbroker" "Taskbroker SQLite" "optional" || true
|
||
backup_volume_tree "sentry-self-hosted_sentry-vroom" "${DUMP_DIR}/volumes/vroom" "Vroom profiles" "optional" || true
|
||
backup_volume_tree "sentry-self-hosted_sentry-symbolicator" "${DUMP_DIR}/volumes/symbolicator" "Symbolicator" "optional" || true
|
||
backup_volume_tree "sentry-self-hosted_sentry-secrets" "${DUMP_DIR}/volumes/runtime-secrets" "runtime secrets" "optional" || true
|
||
|
||
cat > "${DUMP_DIR}/backup-manifest.txt" <<EOF
|
||
service=sentry
|
||
timestamp=${timestamp}
|
||
postgres_container=${POSTGRES_CONTAINER}
|
||
clickhouse_container=${CLICKHOUSE_CONTAINER}
|
||
kafka_container=${KAFKA_CONTAINER}
|
||
redis_container=${REDIS_CONTAINER}
|
||
contains=postgres_dump,clickhouse_volume_tree,kafka_volume_tree,redis_volume_tree,seaweedfs_volume_tree,taskbroker_vroom_symbolicator_optional
|
||
secrets_policy=encrypted_restic_only_no_log_output
|
||
failed_components=${failed}
|
||
EOF
|
||
|
||
if [ ! -d "${LOCAL_REPO}/data" ]; then
|
||
log_info "初始化 Restic 倉庫: ${LOCAL_REPO}"
|
||
low_priority restic -r "${LOCAL_REPO}" init --password-file "${RESTIC_PASSWORD_FILE}" 2>&1
|
||
fi
|
||
|
||
log_info "建立 Sentry Restic 備份..."
|
||
local tags
|
||
tags=$(build_tags "${SERVICE}")
|
||
low_priority restic -r "${LOCAL_REPO}" backup "${DUMP_DIR}" \
|
||
--password-file "${RESTIC_PASSWORD_FILE}" \
|
||
${tags} \
|
||
--tag "scope:sentry-state" \
|
||
--tag "contains:postgres-clickhouse-kafka-redis-seaweedfs" \
|
||
--tag "contains:runtime-secrets" 2>&1
|
||
|
||
local snapshot_id
|
||
snapshot_id=$(restic -r "${LOCAL_REPO}" snapshots --latest 1 --json \
|
||
--password-file "${RESTIC_PASSWORD_FILE}" 2>/dev/null | \
|
||
python3 -c 'import json,sys; rows=json.load(sys.stdin); print(rows[-1].get("short_id","unknown") if rows else "unknown")' 2>/dev/null || echo "unknown")
|
||
log_success "Sentry Restic 備份完成: ${snapshot_id}"
|
||
|
||
cleanup_old_backups "${LOCAL_REPO}"
|
||
|
||
local duration
|
||
duration=$(($(date +%s) - start_time))
|
||
if [ "${failed}" -eq 0 ]; then
|
||
log_success "========== Sentry 專屬資料層備份完成 (${duration}s) =========="
|
||
notify_clawbot "success" "${SERVICE}" "Sentry 專屬資料層備份完成" "${duration}"
|
||
else
|
||
log_error "========== Sentry 備份有 ${failed} 個必要項目失敗 (${duration}s) =========="
|
||
notify_clawbot "failed" "${SERVICE}" "Sentry 備份有 ${failed} 個必要項目失敗" "${duration}"
|
||
fi
|
||
|
||
trap - EXIT
|
||
cleanup
|
||
return "${failed}"
|
||
}
|
||
|
||
main "$@"
|