Some checks failed
CD Pipeline / workflow-shape (push) Successful in 1s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
58 lines
2.0 KiB
Python
58 lines
2.0 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[3]
|
|
ALERTS = ROOT / "ops" / "monitoring" / "alerts-unified.yml"
|
|
|
|
|
|
def load_alerts() -> dict[str, dict]:
|
|
payload = yaml.safe_load(ALERTS.read_text(encoding="utf-8"))
|
|
alerts: dict[str, dict] = {}
|
|
for group in payload["groups"]:
|
|
for rule in group.get("rules", []):
|
|
if "alert" in rule:
|
|
alerts[rule["alert"]] = rule
|
|
return alerts
|
|
|
|
|
|
def test_110_moderate_pressure_alert_routes_to_live_controller() -> None:
|
|
alerts = load_alerts()
|
|
rule = alerts["Host110SustainedModeratePressure"]
|
|
|
|
expr = str(rule["expr"])
|
|
annotations = rule["annotations"]
|
|
action = annotations["auto_repair_action"]
|
|
|
|
assert 'awoooi_host_load5_per_core{host="110"} > 0.75' in expr
|
|
assert 'docker_container_cpu_cores{host="110"' in expr
|
|
assert "> 2.0" in expr
|
|
assert "gitea" in expr
|
|
assert "stockplatform-v2-postgres-1" in expr
|
|
assert rule["for"] == "1m"
|
|
assert rule["labels"]["auto_repair"] == "true"
|
|
assert "/home/wooo/scripts/host-sustained-load-controller.py" in action
|
|
assert "--load5-per-core-threshold 0.75" in action
|
|
assert "--container-cpu-threshold 2.0" in action
|
|
assert "不讀 secret" in annotations["runbook"]
|
|
assert "禁止 Docker / systemd / Nginx / DB restart" in annotations["runbook"]
|
|
|
|
|
|
def test_critical_sustained_load_alert_uses_deployed_controller_path() -> None:
|
|
alerts = load_alerts()
|
|
action = alerts["HostLoadAverageSustainedHigh"]["annotations"]["auto_repair_action"]
|
|
|
|
assert "/home/wooo/scripts/host-sustained-load-controller.py" in action
|
|
assert "scripts/ops/host-sustained-load-controller.py" not in action
|
|
|
|
|
|
def test_backup_aggregate_alert_excludes_old_wrapper_noise() -> None:
|
|
alerts = load_alerts()
|
|
expr = str(alerts["BackupAggregateRunFailed"]["expr"])
|
|
|
|
assert 'awoooi_backup_last_run_failed_count{host="110",exported_job!="backup_all"}' in expr
|
|
assert 'exported_job="backup_all"} > 0' not in expr
|