# Unit tests for SentryClickHouseMemoryPressure # 2026-04-25 ogt + Claude Opus 4.7 rule_files: - ../alerts-unified.yml evaluation_interval: 1m tests: # ---- 負測 1:page cache 高、working_set 低(修正後不該觸發)---- - interval: 1m name: "page cache spike must NOT alert (the original false-positive scenario)" input_series: # working_set: 411 MiB / 8 GiB = 5%(正常) - series: 'container_memory_working_set_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632 430917632' # usage_bytes: 7.5 GiB / 8 GiB = 93.7%(如果規則用錯指標就會誤觸發) - series: 'container_memory_usage_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680 8053063680' - series: 'container_spec_memory_limit_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592' alert_rule_test: - eval_time: 12m alertname: SentryClickHouseMemoryPressure # 期望沒有任何告警(exp_alerts 留空) exp_alerts: [] # ---- 負測 2:working_set 略高但 < 85%(不該觸發)---- - interval: 1m name: "working_set 80% must NOT alert (below 85% threshold)" input_series: # working_set: 6.5 GiB / 8 GiB = 80%(< 85%,不該觸發) - series: 'container_memory_working_set_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673' - series: 'container_memory_usage_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673 6871947673' - series: 'container_spec_memory_limit_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592 8589934592' alert_rule_test: - eval_time: 12m alertname: SentryClickHouseMemoryPressure exp_alerts: [] # ---- 正測 1:working_set > 85% 持續 10 分鐘(必須觸發)---- - interval: 1m name: "working_set 90% sustained 10m MUST alert (real memory pressure)" input_series: # working_set: 7.4 GiB / 8 GiB = 86.7%(持續高水位) - series: 'container_memory_working_set_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '7449424589x14' - series: 'container_memory_usage_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '7449424589x14' - series: 'container_spec_memory_limit_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '8589934592x14' alert_rule_test: - eval_time: 12m alertname: SentryClickHouseMemoryPressure exp_alerts: - exp_labels: alertname: SentryClickHouseMemoryPressure alert_category: infrastructure auto_repair: "false" component: sentry-clickhouse name: sentry-self-hosted-clickhouse-1 notification_type: TYPE-1 severity: warning team: platform exp_annotations: summary: "Sentry ClickHouse 工作集記憶體 > 85% limit" description: "sentry clickhouse working_set / mem_limit = 86.72% (排除 page cache)。" runbook: "檢查 Sentry 查詢壓力;確認非 page cache 假象;必要時調整 /opt/sentry/docker-compose.override.yml clickhouse mem_limit" # ---- 正測 2:尖峰 < 10 分鐘(不該觸發,for: 10m 過濾掉)---- - interval: 1m name: "working_set 95% spike for only 5m must NOT alert (for:10m guard)" input_series: # 前 5 分鐘 90%,之後降回 5% - series: 'container_memory_working_set_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '7730941132 7730941132 7730941132 7730941132 7730941132 430917632 430917632 430917632 430917632 430917632 430917632 430917632' - series: 'container_memory_usage_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '7730941132 7730941132 7730941132 7730941132 7730941132 430917632 430917632 430917632 430917632 430917632 430917632 430917632' - series: 'container_spec_memory_limit_bytes{name="sentry-self-hosted-clickhouse-1"}' values: '8589934592x12' alert_rule_test: - eval_time: 11m alertname: SentryClickHouseMemoryPressure exp_alerts: []