146 lines
4.7 KiB
Bash
Executable File
146 lines
4.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Render and deploy ops/alertmanager/alertmanager.yml to the 110 Docker Alertmanager.
|
|
#
|
|
# This script keeps the live direct-Telegram emergency route aligned with Git:
|
|
# - inject Telegram bot token and SRE group chat id from K8s secret or env
|
|
# - validate with amtool before touching the live config
|
|
# - back up the live file
|
|
# - keep the bind-mounted live file inode and readable permissions intact
|
|
# - reload Alertmanager with SIGHUP
|
|
#
|
|
# Usage:
|
|
# bash scripts/ops/deploy-alertmanager-config.sh [--dry-run]
|
|
#
|
|
# Optional env:
|
|
# TARGET_HOST=192.168.0.110
|
|
# TARGET_PATH=/home/wooo/monitoring/alertmanager.yml
|
|
# K8S_HOST=192.168.0.120
|
|
# K8S_NAMESPACE=awoooi-prod
|
|
# K8S_SECRET=awoooi-secrets
|
|
# TELEGRAM_BOT_TOKEN=...
|
|
# SRE_GROUP_CHAT_ID=...
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
CONFIG_TEMPLATE="${REPO_ROOT}/ops/alertmanager/alertmanager.yml"
|
|
|
|
TARGET_HOST="${TARGET_HOST:-192.168.0.110}"
|
|
TARGET_USER="${TARGET_USER:-wooo}"
|
|
TARGET_PATH="${TARGET_PATH:-/home/wooo/monitoring/alertmanager.yml}"
|
|
K8S_HOST="${K8S_HOST:-192.168.0.120}"
|
|
K8S_USER="${K8S_USER:-wooo}"
|
|
K8S_NAMESPACE="${K8S_NAMESPACE:-awoooi-prod}"
|
|
K8S_SECRET="${K8S_SECRET:-awoooi-secrets}"
|
|
DRY_RUN="${1:-}"
|
|
|
|
log() { printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*"; }
|
|
|
|
die() {
|
|
echo "ERROR: $*" >&2
|
|
exit 1
|
|
}
|
|
|
|
decode_b64() {
|
|
python3 -c 'import base64,sys; print(base64.b64decode(sys.stdin.read()).decode().strip())'
|
|
}
|
|
|
|
secret_key_b64() {
|
|
local key="$1"
|
|
ssh -o BatchMode=yes -o ConnectTimeout=8 "${K8S_USER}@${K8S_HOST}" \
|
|
"sudo -n kubectl -n '${K8S_NAMESPACE}' get secret '${K8S_SECRET}' -o jsonpath='{.data.${key}}'" 2>/dev/null
|
|
}
|
|
|
|
read_secret_first_available() {
|
|
local env_value="$1"
|
|
shift
|
|
if [[ -n "$env_value" ]]; then
|
|
printf '%s' "$env_value"
|
|
return 0
|
|
fi
|
|
|
|
local key raw
|
|
for key in "$@"; do
|
|
raw="$(secret_key_b64 "$key" || true)"
|
|
if [[ -n "$raw" ]]; then
|
|
printf '%s' "$raw" | decode_b64
|
|
return 0
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
[[ -f "$CONFIG_TEMPLATE" ]] || die "template not found: ${CONFIG_TEMPLATE}"
|
|
|
|
TELEGRAM_BOT_TOKEN="$(
|
|
read_secret_first_available \
|
|
"${TELEGRAM_BOT_TOKEN:-}" \
|
|
OPENCLAW_TG_BOT_TOKEN \
|
|
OPENCLAW_BOT_TOKEN \
|
|
TELEGRAM_BOT_TOKEN \
|
|
TG_BOT_TOKEN
|
|
)" || die "missing Telegram bot token; set TELEGRAM_BOT_TOKEN or add one of the known keys to ${K8S_SECRET}"
|
|
|
|
SRE_GROUP_CHAT_ID="$(
|
|
read_secret_first_available \
|
|
"${SRE_GROUP_CHAT_ID:-}" \
|
|
SRE_GROUP_CHAT_ID \
|
|
TELEGRAM_ALERT_CHAT_ID
|
|
)" || die "missing SRE_GROUP_CHAT_ID"
|
|
|
|
[[ "$SRE_GROUP_CHAT_ID" =~ ^-?[0-9]+$ ]] || die "SRE_GROUP_CHAT_ID must be a Telegram numeric chat id"
|
|
export TELEGRAM_BOT_TOKEN SRE_GROUP_CHAT_ID
|
|
|
|
tmp_rendered="$(mktemp)"
|
|
trap 'rm -f "$tmp_rendered"' EXIT
|
|
chmod 600 "$tmp_rendered"
|
|
|
|
python3 - "$CONFIG_TEMPLATE" "$tmp_rendered" <<'PY'
|
|
from pathlib import Path
|
|
import os
|
|
import sys
|
|
|
|
template = Path(sys.argv[1])
|
|
target = Path(sys.argv[2])
|
|
text = template.read_text()
|
|
text = text.replace("TELEGRAM_BOT_TOKEN_PLACEHOLDER", os.environ["TELEGRAM_BOT_TOKEN"])
|
|
text = text.replace("SRE_GROUP_CHAT_ID_PLACEHOLDER", os.environ["SRE_GROUP_CHAT_ID"])
|
|
if "TELEGRAM_BOT_TOKEN_PLACEHOLDER" in text or "SRE_GROUP_CHAT_ID_PLACEHOLDER" in text:
|
|
raise SystemExit("unreplaced secret placeholder remains in rendered config")
|
|
target.write_text(text)
|
|
PY
|
|
|
|
log "Validating rendered config with live Alertmanager amtool on ${TARGET_HOST}"
|
|
ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${TARGET_HOST}" \
|
|
"docker exec -i alertmanager sh -c 'cat >/tmp/alertmanager-rendered.yml && amtool check-config /tmp/alertmanager-rendered.yml'" \
|
|
< "$tmp_rendered"
|
|
|
|
if [[ "$DRY_RUN" == "--dry-run" ]]; then
|
|
log "DRY RUN: rendered config validated; not deploying"
|
|
exit 0
|
|
fi
|
|
|
|
log "Uploading rendered config to ${TARGET_HOST}:${TARGET_PATH}"
|
|
ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${TARGET_HOST}" \
|
|
"umask 077 && cat > /tmp/alertmanager.yml.new" < "$tmp_rendered"
|
|
|
|
ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${TARGET_HOST}" "bash -s" <<REMOTE
|
|
set -euo pipefail
|
|
target='${TARGET_PATH}'
|
|
backup="\${target}.bak.\$(date +%Y%m%d%H%M%S)"
|
|
cp "\$target" "\$backup"
|
|
# Alertmanager bind-mounts a single file. Keep the existing inode instead of mv'ing
|
|
# a replacement over it, then restore readable permissions for the container user.
|
|
cat /tmp/alertmanager.yml.new > "\$target"
|
|
chmod 0644 "\$target"
|
|
rm -f /tmp/alertmanager.yml.new
|
|
docker exec alertmanager amtool check-config /etc/alertmanager/alertmanager.yml
|
|
docker kill -s HUP alertmanager >/dev/null
|
|
sleep 2
|
|
docker inspect alertmanager --format 'status={{.State.Status}} started={{.State.StartedAt}}'
|
|
echo "backup=\$backup"
|
|
REMOTE
|
|
|
|
log "Alertmanager config deployed and reloaded"
|