Files
awoooi/scripts/backup/backup-sentry.sh
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

278 lines
11 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# =============================================================================
# WOOO AIOps - Sentry 專屬資料層備份
# 2026-05-06 ogt + Codex: dirty reboot 後補齊 Sentry Postgres /
# ClickHouse / Kafka / Redis / SeaweedFS / Taskbroker state backup。
#
# 安全原則:
# - 只做 dump / volume snapshot / restic backup不停止正式服務、不還原資料。
# - pg_dumpall 可能包含 role hash所有輸出只進 encrypted restic repo。
# - 不把 Secret 值、DB dump 內容或 credentials 印到 log。
# =============================================================================
set -euo pipefail
source "$(dirname "$0")/common.sh"
SERVICE="sentry"
LOCAL_REPO="${BACKUP_BASE}/sentry"
DUMP_DIR="/tmp/sentry-backup-$$"
POSTGRES_CONTAINER="${SENTRY_POSTGRES_CONTAINER:-sentry-self-hosted-postgres-1}"
CLICKHOUSE_CONTAINER="${SENTRY_CLICKHOUSE_CONTAINER:-sentry-self-hosted-clickhouse-1}"
KAFKA_CONTAINER="${SENTRY_KAFKA_CONTAINER:-sentry-self-hosted-kafka-1}"
REDIS_CONTAINER="${SENTRY_REDIS_CONTAINER:-sentry-self-hosted-redis-1}"
SENTRY_DIR="${SENTRY_DIR:-/opt/sentry}"
cleanup() {
rm -rf "${DUMP_DIR}" 2>/dev/null || true
if [ -d "${DUMP_DIR}" ] && command -v docker >/dev/null 2>&1; then
docker run --rm \
-v "$(dirname "${DUMP_DIR}"):/hosttmp" \
alpine rm -rf "/hosttmp/$(basename "${DUMP_DIR}")" >/dev/null 2>&1 || true
fi
return 0
}
low_priority() {
if command -v ionice >/dev/null 2>&1; then
ionice -c2 -n7 nice -n 10 "$@"
else
nice -n 10 "$@"
fi
}
container_exists() {
docker inspect "$1" >/dev/null 2>&1
}
volume_exists() {
docker volume inspect "$1" >/dev/null 2>&1
}
backup_volume() {
local volume_name="$1"
local output_file="$2"
local label="$3"
local required="${4:-required}"
shift 4 || true
local tar_args=("$@")
if ! volume_exists "${volume_name}"; then
if [ "${required}" = "required" ]; then
log_error "Sentry ${label} volume 不存在: ${volume_name}"
return 1
fi
log_warn "Sentry ${label} volume 不存在,略過: ${volume_name}"
return 0
fi
log_info "備份 Sentry volume: ${label} (${volume_name})"
low_priority docker run --rm \
--cpus="${BACKUP_DOCKER_CPUS}" \
--memory="${BACKUP_DOCKER_MEMORY}" \
--memory-swap="${BACKUP_DOCKER_MEMORY_SWAP}" \
-v "${volume_name}:/data:ro" \
alpine \
tar czf - "${tar_args[@]}" /data 2>"${output_file}.stderr" > "${output_file}" || true
if [ -s "${output_file}" ]; then
local size
size=$(du -h "${output_file}" | cut -f1)
log_success " Sentry ${label} volume 備份完成 (${size})"
return 0
fi
if [ "${required}" = "required" ]; then
log_error " Sentry ${label} volume 備份失敗或為空"
return 1
fi
log_warn " Sentry ${label} volume 備份為空,略過"
return 0
}
backup_volume_tree() {
local volume_name="$1"
local output_dir="$2"
local label="$3"
local required="${4:-required}"
shift 4 || true
local tar_args=("$@")
if ! volume_exists "${volume_name}"; then
if [ "${required}" = "required" ]; then
log_error "Sentry ${label} volume 不存在: ${volume_name}"
return 1
fi
log_warn "Sentry ${label} volume 不存在,略過: ${volume_name}"
return 0
fi
local host_uid
local host_gid
host_uid="$(id -u)"
host_gid="$(id -g)"
log_info "備份 Sentry volume tree: ${label} (${volume_name})"
install -d -m 700 "${output_dir}/data"
if low_priority docker run --rm \
--cpus="${BACKUP_DOCKER_CPUS}" \
--memory="${BACKUP_DOCKER_MEMORY}" \
--memory-swap="${BACKUP_DOCKER_MEMORY_SWAP}" \
-e "HOST_UID=${host_uid}" \
-e "HOST_GID=${host_gid}" \
-v "${volume_name}:/data:ro" \
-v "${output_dir}/data:/out" \
alpine sh -c 'cd /data && tar cf - "$@" . | tar xf - -C /out && chown -R "${HOST_UID}:${HOST_GID}" /out && chmod -R u+rwX,go-rwx /out' sh "${tar_args[@]}" \
> "${output_dir}/copy.stdout" 2>"${output_dir}/copy.stderr"; then
if find "${output_dir}/data" -mindepth 1 -print -quit | grep -q .; then
local size
size=$(du -sh "${output_dir}/data" | cut -f1)
log_success " Sentry ${label} volume tree 備份完成 (${size})"
return 0
fi
fi
if [ "${required}" = "required" ]; then
log_error " Sentry ${label} volume tree 備份失敗或為空"
return 1
fi
log_warn " Sentry ${label} volume tree 備份為空,略過"
return 0
}
capture_cmd() {
local label="$1"
shift
if "$@" > "${DUMP_DIR}/${label}.txt" 2>&1; then
log_success "Sentry 盤點完成: ${label}"
else
log_warn "Sentry 盤點失敗: ${label}"
return 1
fi
}
main() {
local start_time
local timestamp
local failed=0
start_time=$(date +%s)
timestamp=$(date "+%Y%m%d_%H%M%S")
trap cleanup EXIT
install -d -m 700 "${DUMP_DIR}"
log_info "========== 開始 Sentry 專屬資料層備份 (${timestamp}) =========="
capture_cmd "docker-containers" docker ps --filter "name=sentry-self-hosted" --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}' || true
capture_cmd "docker-volumes" docker volume ls --format '{{.Name}}' || true
if [ -d "${SENTRY_DIR}" ]; then
log_info "封存 Sentry compose/config 證據"
tar \
--exclude="*/clickhouse/store" \
--exclude="*/.git" \
--exclude="*/logs" \
--exclude="*/data" \
-czf "${DUMP_DIR}/sentry-config_${timestamp}.tar.gz" \
-C "$(dirname "${SENTRY_DIR}")" "$(basename "${SENTRY_DIR}")" \
2>"${DUMP_DIR}/sentry-config_${timestamp}.tar.stderr" || true
[ -s "${DUMP_DIR}/sentry-config_${timestamp}.tar.gz" ] || log_warn "Sentry config tar 為空或失敗"
else
log_warn "找不到 Sentry 目錄: ${SENTRY_DIR}"
fi
if container_exists "${POSTGRES_CONTAINER}"; then
log_info "匯出 Sentry Postgres logical dump"
if docker exec "${POSTGRES_CONTAINER}" pg_dumpall -U postgres 2>"${DUMP_DIR}/postgres_${timestamp}.stderr" | low_priority gzip -9 > "${DUMP_DIR}/postgres_${timestamp}.sql.gz"; then
log_success "Sentry Postgres dump 完成 ($(du -h "${DUMP_DIR}/postgres_${timestamp}.sql.gz" | cut -f1))"
else
log_error "Sentry Postgres dump 失敗"
failed=$((failed + 1))
fi
else
log_error "Sentry Postgres container 不存在: ${POSTGRES_CONTAINER}"
failed=$((failed + 1))
fi
if container_exists "${CLICKHOUSE_CONTAINER}"; then
docker exec "${CLICKHOUSE_CONTAINER}" clickhouse-client -q "SHOW DATABASES" > "${DUMP_DIR}/clickhouse_databases_${timestamp}.txt" 2>&1 || true
docker exec "${CLICKHOUSE_CONTAINER}" clickhouse-client -q \
"SELECT database, name, total_rows, total_bytes FROM system.tables WHERE database NOT IN ('system','INFORMATION_SCHEMA','information_schema') ORDER BY database, name FORMAT TSV" \
> "${DUMP_DIR}/clickhouse_tables_${timestamp}.tsv" 2>&1 || true
else
log_warn "Sentry ClickHouse container 不存在,仍嘗試 volume snapshot: ${CLICKHOUSE_CONTAINER}"
fi
if container_exists "${REDIS_CONTAINER}"; then
log_info "觸發 Sentry Redis SAVE 以刷新 dump.rdb"
docker exec "${REDIS_CONTAINER}" redis-cli SAVE >/dev/null 2>&1 || log_warn "Redis SAVE 失敗,仍繼續 volume snapshot"
fi
if container_exists "${KAFKA_CONTAINER}"; then
docker exec "${KAFKA_CONTAINER}" bash -lc \
"find /var/lib/kafka -maxdepth 2 -type f | sed 's#^#/##' | head -200" \
> "${DUMP_DIR}/kafka_file_sample_${timestamp}.txt" 2>&1 || true
fi
backup_volume_tree "sentry-clickhouse" "${DUMP_DIR}/volumes/clickhouse" "ClickHouse" "required" --exclude=./tmp || failed=$((failed + 1))
backup_volume_tree "sentry-kafka" "${DUMP_DIR}/volumes/kafka" "Kafka queue" "required" || failed=$((failed + 1))
backup_volume_tree "sentry-redis" "${DUMP_DIR}/volumes/redis" "Redis" "required" || failed=$((failed + 1))
backup_volume_tree "sentry-seaweedfs" "${DUMP_DIR}/volumes/seaweedfs" "SeaweedFS attachments" "required" || failed=$((failed + 1))
backup_volume_tree "sentry-self-hosted_sentry-taskbroker" "${DUMP_DIR}/volumes/taskbroker" "Taskbroker SQLite" "optional" || true
backup_volume_tree "sentry-self-hosted_sentry-vroom" "${DUMP_DIR}/volumes/vroom" "Vroom profiles" "optional" || true
backup_volume_tree "sentry-self-hosted_sentry-symbolicator" "${DUMP_DIR}/volumes/symbolicator" "Symbolicator" "optional" || true
backup_volume_tree "sentry-self-hosted_sentry-secrets" "${DUMP_DIR}/volumes/runtime-secrets" "runtime secrets" "optional" || true
cat > "${DUMP_DIR}/backup-manifest.txt" <<EOF
service=sentry
timestamp=${timestamp}
postgres_container=${POSTGRES_CONTAINER}
clickhouse_container=${CLICKHOUSE_CONTAINER}
kafka_container=${KAFKA_CONTAINER}
redis_container=${REDIS_CONTAINER}
contains=postgres_dump,clickhouse_volume_tree,kafka_volume_tree,redis_volume_tree,seaweedfs_volume_tree,taskbroker_vroom_symbolicator_optional
secrets_policy=encrypted_restic_only_no_log_output
failed_components=${failed}
EOF
if [ ! -d "${LOCAL_REPO}/data" ]; then
log_info "初始化 Restic 倉庫: ${LOCAL_REPO}"
low_priority restic -r "${LOCAL_REPO}" init --password-file "${RESTIC_PASSWORD_FILE}" 2>&1
fi
log_info "建立 Sentry Restic 備份..."
local tags
tags=$(build_tags "${SERVICE}")
low_priority restic -r "${LOCAL_REPO}" backup "${DUMP_DIR}" \
--password-file "${RESTIC_PASSWORD_FILE}" \
${tags} \
--tag "scope:sentry-state" \
--tag "contains:postgres-clickhouse-kafka-redis-seaweedfs" \
--tag "contains:runtime-secrets" 2>&1
local snapshot_id
snapshot_id=$(restic -r "${LOCAL_REPO}" snapshots --latest 1 --json \
--password-file "${RESTIC_PASSWORD_FILE}" 2>/dev/null | \
python3 -c 'import json,sys; rows=json.load(sys.stdin); print(rows[-1].get("short_id","unknown") if rows else "unknown")' 2>/dev/null || echo "unknown")
log_success "Sentry Restic 備份完成: ${snapshot_id}"
cleanup_old_backups "${LOCAL_REPO}"
local duration
duration=$(($(date +%s) - start_time))
if [ "${failed}" -eq 0 ]; then
log_success "========== Sentry 專屬資料層備份完成 (${duration}s) =========="
notify_clawbot "success" "${SERVICE}" "Sentry 專屬資料層備份完成" "${duration}"
else
log_error "========== Sentry 備份有 ${failed} 個必要項目失敗 (${duration}s) =========="
notify_clawbot "failed" "${SERVICE}" "Sentry 備份有 ${failed} 個必要項目失敗" "${duration}"
fi
trap - EXIT
cleanup
return "${failed}"
}
main "$@"