fix(monitoring): verify alert rule deploy content
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 23s

This commit is contained in:
Your Name
2026-05-20 13:26:24 +08:00
parent 1b525b7c18
commit 4956fbb849

View File

@@ -16,6 +16,23 @@ DRY_RUN="${1:-}"
log() { echo "[$(date '+%H:%M:%S')] $*"; }
file_sha() {
if command -v sha256sum >/dev/null 2>&1; then
sha256sum "$1" | awk '{print $1}'
else
shasum -a 256 "$1" | awk '{print $1}'
fi
}
remote_file_sha() {
ssh wooo@${TARGET_HOST} "\
if command -v sha256sum >/dev/null 2>&1; then \
sha256sum '$1' | awk '{print \$1}'; \
else \
shasum -a 256 '$1' | awk '{print \$1}'; \
fi"
}
# 確認檔案存在
for file in "$ALERT_RULES_FILE" "$SLO_RULES_FILE"; do
if [ ! -f "$file" ]; then
@@ -59,6 +76,20 @@ scp "$ALERT_RULES_FILE" wooo@${TARGET_HOST}:${TARGET_ALERTS_PATH}
scp "$SLO_RULES_FILE" wooo@${TARGET_HOST}:${TARGET_SLO_PATH}
log "✅ 規則已複製到 ${TARGET_HOST}"
LOCAL_ALERTS_SHA="$(file_sha "$ALERT_RULES_FILE")"
REMOTE_ALERTS_SHA="$(remote_file_sha "$TARGET_ALERTS_PATH")"
LOCAL_SLO_SHA="$(file_sha "$SLO_RULES_FILE")"
REMOTE_SLO_SHA="$(remote_file_sha "$TARGET_SLO_PATH")"
if [ "$LOCAL_ALERTS_SHA" != "$REMOTE_ALERTS_SHA" ]; then
echo "ERROR: 遠端 alerts.yml hash 不一致 local=${LOCAL_ALERTS_SHA} remote=${REMOTE_ALERTS_SHA}"
exit 1
fi
if [ "$LOCAL_SLO_SHA" != "$REMOTE_SLO_SHA" ]; then
echo "ERROR: 遠端 slo-rules.yml hash 不一致 local=${LOCAL_SLO_SHA} remote=${REMOTE_SLO_SHA}"
exit 1
fi
log "✅ 遠端規則 hash 驗證通過"
# Reload Prometheus
ssh wooo@${TARGET_HOST} "curl -s -X POST ${PROMETHEUS_URL}/-/reload"
sleep 3
@@ -72,6 +103,13 @@ if [ "$RULE_COUNT" -lt 25 ]; then
exit 1
fi
NO_ALERTS_QUERY=$(ssh wooo@${TARGET_HOST} "curl -s ${PROMETHEUS_URL}/api/v1/rules" | python3 -c "import sys,json; r=json.load(sys.stdin); print(next((x.get('query','') for g in r['data']['groups'] for x in g['rules'] if x.get('name') == 'NoAlertsReceived2Hours'), ''))")
if [[ "$NO_ALERTS_QUERY" != *'source="alertmanager"'* ]]; then
echo "ERROR: NoAlertsReceived2Hours query 未限制 alertmanager 主鏈路: ${NO_ALERTS_QUERY}"
exit 1
fi
log "✅ NoAlertsReceived2Hours query 已限制 alertmanager 主鏈路"
# 驗證關鍵規則存在
KEY_RULES=("SentryDown" "HarborDown" "GiteaDown" "OpenClawDown" "AlertmanagerDown" "AlertChainUnhealthy")
for rule in "${KEY_RULES[@]}"; do