#!/bin/bash # ============================================================================= # WOOO AIOps - Sentry 專屬資料層備份 # 2026-05-06 ogt + Codex: dirty reboot 後補齊 Sentry Postgres / # ClickHouse / Kafka / Redis / SeaweedFS / Taskbroker state backup。 # # 安全原則: # - 只做 dump / volume snapshot / restic backup,不停止正式服務、不還原資料。 # - pg_dumpall 可能包含 role hash;所有輸出只進 encrypted restic repo。 # - 不把 Secret 值、DB dump 內容或 credentials 印到 log。 # ============================================================================= set -euo pipefail source "$(dirname "$0")/common.sh" SERVICE="sentry" LOCAL_REPO="${BACKUP_BASE}/sentry" DUMP_DIR="/tmp/sentry-backup-$$" POSTGRES_CONTAINER="${SENTRY_POSTGRES_CONTAINER:-sentry-self-hosted-postgres-1}" CLICKHOUSE_CONTAINER="${SENTRY_CLICKHOUSE_CONTAINER:-sentry-self-hosted-clickhouse-1}" KAFKA_CONTAINER="${SENTRY_KAFKA_CONTAINER:-sentry-self-hosted-kafka-1}" REDIS_CONTAINER="${SENTRY_REDIS_CONTAINER:-sentry-self-hosted-redis-1}" SENTRY_DIR="${SENTRY_DIR:-/opt/sentry}" cleanup() { rm -rf "${DUMP_DIR}" 2>/dev/null || true if [ -d "${DUMP_DIR}" ] && command -v docker >/dev/null 2>&1; then docker run --rm \ -v "$(dirname "${DUMP_DIR}"):/hosttmp" \ alpine rm -rf "/hosttmp/$(basename "${DUMP_DIR}")" >/dev/null 2>&1 || true fi return 0 } low_priority() { if command -v ionice >/dev/null 2>&1; then ionice -c2 -n7 nice -n 10 "$@" else nice -n 10 "$@" fi } container_exists() { docker inspect "$1" >/dev/null 2>&1 } volume_exists() { docker volume inspect "$1" >/dev/null 2>&1 } backup_volume() { local volume_name="$1" local output_file="$2" local label="$3" local required="${4:-required}" shift 4 || true local tar_args=("$@") if ! volume_exists "${volume_name}"; then if [ "${required}" = "required" ]; then log_error "Sentry ${label} volume 不存在: ${volume_name}" return 1 fi log_warn "Sentry ${label} volume 不存在,略過: ${volume_name}" return 0 fi log_info "備份 Sentry volume: ${label} (${volume_name})" low_priority docker run --rm \ --cpus="${BACKUP_DOCKER_CPUS}" \ --memory="${BACKUP_DOCKER_MEMORY}" \ --memory-swap="${BACKUP_DOCKER_MEMORY_SWAP}" \ -v "${volume_name}:/data:ro" \ alpine \ tar czf - "${tar_args[@]}" /data 2>"${output_file}.stderr" > "${output_file}" || true if [ -s "${output_file}" ]; then local size size=$(du -h "${output_file}" | cut -f1) log_success " Sentry ${label} volume 備份完成 (${size})" return 0 fi if [ "${required}" = "required" ]; then log_error " Sentry ${label} volume 備份失敗或為空" return 1 fi log_warn " Sentry ${label} volume 備份為空,略過" return 0 } backup_volume_tree() { local volume_name="$1" local output_dir="$2" local label="$3" local required="${4:-required}" shift 4 || true local tar_args=("$@") if ! volume_exists "${volume_name}"; then if [ "${required}" = "required" ]; then log_error "Sentry ${label} volume 不存在: ${volume_name}" return 1 fi log_warn "Sentry ${label} volume 不存在,略過: ${volume_name}" return 0 fi local host_uid local host_gid host_uid="$(id -u)" host_gid="$(id -g)" log_info "備份 Sentry volume tree: ${label} (${volume_name})" install -d -m 700 "${output_dir}/data" if low_priority docker run --rm \ --cpus="${BACKUP_DOCKER_CPUS}" \ --memory="${BACKUP_DOCKER_MEMORY}" \ --memory-swap="${BACKUP_DOCKER_MEMORY_SWAP}" \ -e "HOST_UID=${host_uid}" \ -e "HOST_GID=${host_gid}" \ -v "${volume_name}:/data:ro" \ -v "${output_dir}/data:/out" \ alpine sh -c 'cd /data && tar cf - "$@" . | tar xf - -C /out && chown -R "${HOST_UID}:${HOST_GID}" /out && chmod -R u+rwX,go-rwx /out' sh "${tar_args[@]}" \ > "${output_dir}/copy.stdout" 2>"${output_dir}/copy.stderr"; then if find "${output_dir}/data" -mindepth 1 -print -quit | grep -q .; then local size size=$(du -sh "${output_dir}/data" | cut -f1) log_success " Sentry ${label} volume tree 備份完成 (${size})" return 0 fi fi if [ "${required}" = "required" ]; then log_error " Sentry ${label} volume tree 備份失敗或為空" return 1 fi log_warn " Sentry ${label} volume tree 備份為空,略過" return 0 } capture_cmd() { local label="$1" shift if "$@" > "${DUMP_DIR}/${label}.txt" 2>&1; then log_success "Sentry 盤點完成: ${label}" else log_warn "Sentry 盤點失敗: ${label}" return 1 fi } main() { local start_time local timestamp local failed=0 start_time=$(date +%s) timestamp=$(date "+%Y%m%d_%H%M%S") trap cleanup EXIT install -d -m 700 "${DUMP_DIR}" log_info "========== 開始 Sentry 專屬資料層備份 (${timestamp}) ==========" capture_cmd "docker-containers" docker ps --filter "name=sentry-self-hosted" --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}' || true capture_cmd "docker-volumes" docker volume ls --format '{{.Name}}' || true if [ -d "${SENTRY_DIR}" ]; then log_info "封存 Sentry compose/config 證據" tar \ --exclude="*/clickhouse/store" \ --exclude="*/.git" \ --exclude="*/logs" \ --exclude="*/data" \ -czf "${DUMP_DIR}/sentry-config_${timestamp}.tar.gz" \ -C "$(dirname "${SENTRY_DIR}")" "$(basename "${SENTRY_DIR}")" \ 2>"${DUMP_DIR}/sentry-config_${timestamp}.tar.stderr" || true [ -s "${DUMP_DIR}/sentry-config_${timestamp}.tar.gz" ] || log_warn "Sentry config tar 為空或失敗" else log_warn "找不到 Sentry 目錄: ${SENTRY_DIR}" fi if container_exists "${POSTGRES_CONTAINER}"; then log_info "匯出 Sentry Postgres logical dump" if docker exec "${POSTGRES_CONTAINER}" pg_dumpall -U postgres 2>"${DUMP_DIR}/postgres_${timestamp}.stderr" | low_priority gzip -9 > "${DUMP_DIR}/postgres_${timestamp}.sql.gz"; then log_success "Sentry Postgres dump 完成 ($(du -h "${DUMP_DIR}/postgres_${timestamp}.sql.gz" | cut -f1))" else log_error "Sentry Postgres dump 失敗" failed=$((failed + 1)) fi else log_error "Sentry Postgres container 不存在: ${POSTGRES_CONTAINER}" failed=$((failed + 1)) fi if container_exists "${CLICKHOUSE_CONTAINER}"; then docker exec "${CLICKHOUSE_CONTAINER}" clickhouse-client -q "SHOW DATABASES" > "${DUMP_DIR}/clickhouse_databases_${timestamp}.txt" 2>&1 || true docker exec "${CLICKHOUSE_CONTAINER}" clickhouse-client -q \ "SELECT database, name, total_rows, total_bytes FROM system.tables WHERE database NOT IN ('system','INFORMATION_SCHEMA','information_schema') ORDER BY database, name FORMAT TSV" \ > "${DUMP_DIR}/clickhouse_tables_${timestamp}.tsv" 2>&1 || true else log_warn "Sentry ClickHouse container 不存在,仍嘗試 volume snapshot: ${CLICKHOUSE_CONTAINER}" fi if container_exists "${REDIS_CONTAINER}"; then log_info "觸發 Sentry Redis SAVE 以刷新 dump.rdb" docker exec "${REDIS_CONTAINER}" redis-cli SAVE >/dev/null 2>&1 || log_warn "Redis SAVE 失敗,仍繼續 volume snapshot" fi if container_exists "${KAFKA_CONTAINER}"; then docker exec "${KAFKA_CONTAINER}" bash -lc \ "find /var/lib/kafka -maxdepth 2 -type f | sed 's#^#/##' | head -200" \ > "${DUMP_DIR}/kafka_file_sample_${timestamp}.txt" 2>&1 || true fi backup_volume_tree "sentry-clickhouse" "${DUMP_DIR}/volumes/clickhouse" "ClickHouse" "required" --exclude=./tmp || failed=$((failed + 1)) backup_volume_tree "sentry-kafka" "${DUMP_DIR}/volumes/kafka" "Kafka queue" "required" || failed=$((failed + 1)) backup_volume_tree "sentry-redis" "${DUMP_DIR}/volumes/redis" "Redis" "required" || failed=$((failed + 1)) backup_volume_tree "sentry-seaweedfs" "${DUMP_DIR}/volumes/seaweedfs" "SeaweedFS attachments" "required" || failed=$((failed + 1)) backup_volume_tree "sentry-self-hosted_sentry-taskbroker" "${DUMP_DIR}/volumes/taskbroker" "Taskbroker SQLite" "optional" || true backup_volume_tree "sentry-self-hosted_sentry-vroom" "${DUMP_DIR}/volumes/vroom" "Vroom profiles" "optional" || true backup_volume_tree "sentry-self-hosted_sentry-symbolicator" "${DUMP_DIR}/volumes/symbolicator" "Symbolicator" "optional" || true backup_volume_tree "sentry-self-hosted_sentry-secrets" "${DUMP_DIR}/volumes/runtime-secrets" "runtime secrets" "optional" || true cat > "${DUMP_DIR}/backup-manifest.txt" <&1 fi log_info "建立 Sentry Restic 備份..." local tags tags=$(build_tags "${SERVICE}") low_priority restic -r "${LOCAL_REPO}" backup "${DUMP_DIR}" \ --password-file "${RESTIC_PASSWORD_FILE}" \ ${tags} \ --tag "scope:sentry-state" \ --tag "contains:postgres-clickhouse-kafka-redis-seaweedfs" \ --tag "contains:runtime-secrets" 2>&1 local snapshot_id snapshot_id=$(restic -r "${LOCAL_REPO}" snapshots --latest 1 --json \ --password-file "${RESTIC_PASSWORD_FILE}" 2>/dev/null | \ python3 -c 'import json,sys; rows=json.load(sys.stdin); print(rows[-1].get("short_id","unknown") if rows else "unknown")' 2>/dev/null || echo "unknown") log_success "Sentry Restic 備份完成: ${snapshot_id}" cleanup_old_backups "${LOCAL_REPO}" local duration duration=$(($(date +%s) - start_time)) if [ "${failed}" -eq 0 ]; then log_success "========== Sentry 專屬資料層備份完成 (${duration}s) ==========" notify_clawbot "success" "${SERVICE}" "Sentry 專屬資料層備份完成" "${duration}" else log_error "========== Sentry 備份有 ${failed} 個必要項目失敗 (${duration}s) ==========" notify_clawbot "failed" "${SERVICE}" "Sentry 備份有 ${failed} 個必要項目失敗" "${duration}" fi trap - EXIT cleanup return "${failed}" } main "$@"