#!/usr/bin/env bash # Render and deploy ops/alertmanager/alertmanager.yml to the 110 Docker Alertmanager. # # This script keeps the live direct-Telegram emergency route aligned with Git: # - inject Telegram bot token and SRE group chat id from K8s secret or env # - validate with amtool before touching the live config # - back up the live file # - keep the bind-mounted live file inode and readable permissions intact # - reload Alertmanager with SIGHUP # # Usage: # bash scripts/ops/deploy-alertmanager-config.sh [--dry-run] # # Optional env: # TARGET_HOST=192.168.0.110 # TARGET_PATH=/home/wooo/monitoring/alertmanager.yml # K8S_HOST=192.168.0.120 # K8S_NAMESPACE=awoooi-prod # K8S_SECRET=awoooi-secrets # TELEGRAM_BOT_TOKEN=... # SRE_GROUP_CHAT_ID=... set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" CONFIG_TEMPLATE="${REPO_ROOT}/ops/alertmanager/alertmanager.yml" TARGET_HOST="${TARGET_HOST:-192.168.0.110}" TARGET_USER="${TARGET_USER:-wooo}" TARGET_PATH="${TARGET_PATH:-/home/wooo/monitoring/alertmanager.yml}" K8S_HOST="${K8S_HOST:-192.168.0.120}" K8S_USER="${K8S_USER:-wooo}" K8S_NAMESPACE="${K8S_NAMESPACE:-awoooi-prod}" K8S_SECRET="${K8S_SECRET:-awoooi-secrets}" DRY_RUN="${1:-}" log() { printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*"; } die() { echo "ERROR: $*" >&2 exit 1 } decode_b64() { python3 -c 'import base64,sys; print(base64.b64decode(sys.stdin.read()).decode().strip())' } secret_key_b64() { local key="$1" ssh -o BatchMode=yes -o ConnectTimeout=8 "${K8S_USER}@${K8S_HOST}" \ "sudo -n kubectl -n '${K8S_NAMESPACE}' get secret '${K8S_SECRET}' -o jsonpath='{.data.${key}}'" 2>/dev/null } read_secret_first_available() { local env_value="$1" shift if [[ -n "$env_value" ]]; then printf '%s' "$env_value" return 0 fi local key raw for key in "$@"; do raw="$(secret_key_b64 "$key" || true)" if [[ -n "$raw" ]]; then printf '%s' "$raw" | decode_b64 return 0 fi done return 1 } [[ -f "$CONFIG_TEMPLATE" ]] || die "template not found: ${CONFIG_TEMPLATE}" TELEGRAM_BOT_TOKEN="$( read_secret_first_available \ "${TELEGRAM_BOT_TOKEN:-}" \ OPENCLAW_TG_BOT_TOKEN \ OPENCLAW_BOT_TOKEN \ TELEGRAM_BOT_TOKEN \ TG_BOT_TOKEN )" || die "missing Telegram bot token; set TELEGRAM_BOT_TOKEN or add one of the known keys to ${K8S_SECRET}" SRE_GROUP_CHAT_ID="$( read_secret_first_available \ "${SRE_GROUP_CHAT_ID:-}" \ SRE_GROUP_CHAT_ID \ TELEGRAM_ALERT_CHAT_ID )" || die "missing SRE_GROUP_CHAT_ID" [[ "$SRE_GROUP_CHAT_ID" =~ ^-?[0-9]+$ ]] || die "SRE_GROUP_CHAT_ID must be a Telegram numeric chat id" export TELEGRAM_BOT_TOKEN SRE_GROUP_CHAT_ID tmp_rendered="$(mktemp)" trap 'rm -f "$tmp_rendered"' EXIT chmod 600 "$tmp_rendered" python3 - "$CONFIG_TEMPLATE" "$tmp_rendered" <<'PY' from pathlib import Path import os import sys template = Path(sys.argv[1]) target = Path(sys.argv[2]) text = template.read_text() text = text.replace("TELEGRAM_BOT_TOKEN_PLACEHOLDER", os.environ["TELEGRAM_BOT_TOKEN"]) text = text.replace("SRE_GROUP_CHAT_ID_PLACEHOLDER", os.environ["SRE_GROUP_CHAT_ID"]) if "TELEGRAM_BOT_TOKEN_PLACEHOLDER" in text or "SRE_GROUP_CHAT_ID_PLACEHOLDER" in text: raise SystemExit("unreplaced secret placeholder remains in rendered config") target.write_text(text) PY log "Validating rendered config with live Alertmanager amtool on ${TARGET_HOST}" ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${TARGET_HOST}" \ "docker exec -i alertmanager sh -c 'cat >/tmp/alertmanager-rendered.yml && amtool check-config /tmp/alertmanager-rendered.yml'" \ < "$tmp_rendered" if [[ "$DRY_RUN" == "--dry-run" ]]; then log "DRY RUN: rendered config validated; not deploying" exit 0 fi log "Uploading rendered config to ${TARGET_HOST}:${TARGET_PATH}" ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${TARGET_HOST}" \ "umask 077 && cat > /tmp/alertmanager.yml.new" < "$tmp_rendered" ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${TARGET_HOST}" "bash -s" < "\$target" chmod 0644 "\$target" rm -f /tmp/alertmanager.yml.new docker exec alertmanager amtool check-config /etc/alertmanager/alertmanager.yml docker kill -s HUP alertmanager >/dev/null sleep 2 docker inspect alertmanager --format 'status={{.State.Status}} started={{.State.StartedAt}}' echo "backup=\$backup" REMOTE log "Alertmanager config deployed and reloaded"