fix(monitoring): verify alert rule deploy content
This commit is contained in:
@@ -16,6 +16,23 @@ DRY_RUN="${1:-}"
|
||||
|
||||
log() { echo "[$(date '+%H:%M:%S')] $*"; }
|
||||
|
||||
file_sha() {
|
||||
if command -v sha256sum >/dev/null 2>&1; then
|
||||
sha256sum "$1" | awk '{print $1}'
|
||||
else
|
||||
shasum -a 256 "$1" | awk '{print $1}'
|
||||
fi
|
||||
}
|
||||
|
||||
remote_file_sha() {
|
||||
ssh wooo@${TARGET_HOST} "\
|
||||
if command -v sha256sum >/dev/null 2>&1; then \
|
||||
sha256sum '$1' | awk '{print \$1}'; \
|
||||
else \
|
||||
shasum -a 256 '$1' | awk '{print \$1}'; \
|
||||
fi"
|
||||
}
|
||||
|
||||
# 確認檔案存在
|
||||
for file in "$ALERT_RULES_FILE" "$SLO_RULES_FILE"; do
|
||||
if [ ! -f "$file" ]; then
|
||||
@@ -59,6 +76,20 @@ scp "$ALERT_RULES_FILE" wooo@${TARGET_HOST}:${TARGET_ALERTS_PATH}
|
||||
scp "$SLO_RULES_FILE" wooo@${TARGET_HOST}:${TARGET_SLO_PATH}
|
||||
log "✅ 規則已複製到 ${TARGET_HOST}"
|
||||
|
||||
LOCAL_ALERTS_SHA="$(file_sha "$ALERT_RULES_FILE")"
|
||||
REMOTE_ALERTS_SHA="$(remote_file_sha "$TARGET_ALERTS_PATH")"
|
||||
LOCAL_SLO_SHA="$(file_sha "$SLO_RULES_FILE")"
|
||||
REMOTE_SLO_SHA="$(remote_file_sha "$TARGET_SLO_PATH")"
|
||||
if [ "$LOCAL_ALERTS_SHA" != "$REMOTE_ALERTS_SHA" ]; then
|
||||
echo "ERROR: 遠端 alerts.yml hash 不一致 local=${LOCAL_ALERTS_SHA} remote=${REMOTE_ALERTS_SHA}"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$LOCAL_SLO_SHA" != "$REMOTE_SLO_SHA" ]; then
|
||||
echo "ERROR: 遠端 slo-rules.yml hash 不一致 local=${LOCAL_SLO_SHA} remote=${REMOTE_SLO_SHA}"
|
||||
exit 1
|
||||
fi
|
||||
log "✅ 遠端規則 hash 驗證通過"
|
||||
|
||||
# Reload Prometheus
|
||||
ssh wooo@${TARGET_HOST} "curl -s -X POST ${PROMETHEUS_URL}/-/reload"
|
||||
sleep 3
|
||||
@@ -72,6 +103,13 @@ if [ "$RULE_COUNT" -lt 25 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NO_ALERTS_QUERY=$(ssh wooo@${TARGET_HOST} "curl -s ${PROMETHEUS_URL}/api/v1/rules" | python3 -c "import sys,json; r=json.load(sys.stdin); print(next((x.get('query','') for g in r['data']['groups'] for x in g['rules'] if x.get('name') == 'NoAlertsReceived2Hours'), ''))")
|
||||
if [[ "$NO_ALERTS_QUERY" != *'source="alertmanager"'* ]]; then
|
||||
echo "ERROR: NoAlertsReceived2Hours query 未限制 alertmanager 主鏈路: ${NO_ALERTS_QUERY}"
|
||||
exit 1
|
||||
fi
|
||||
log "✅ NoAlertsReceived2Hours query 已限制 alertmanager 主鏈路"
|
||||
|
||||
# 驗證關鍵規則存在
|
||||
KEY_RULES=("SentryDown" "HarborDown" "GiteaDown" "OpenClawDown" "AlertmanagerDown" "AlertChainUnhealthy")
|
||||
for rule in "${KEY_RULES[@]}"; do
|
||||
|
||||
Reference in New Issue
Block a user