Files
awoooi/scripts/ops/tests/test_host_runaway_process_exporter.py
Your Name ff18872a23
Some checks failed
Code Review / ai-code-review (push) Successful in 14s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Failing after 26s
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
feat(ops): 新增 host runaway process aiops guard
2026-06-18 14:17:03 +08:00

145 lines
4.8 KiB
Python

from __future__ import annotations
import importlib.util
import subprocess
import sys
from pathlib import Path
SCRIPT_ROOT = Path(__file__).resolve().parents[1]
EXPORTER_PATH = SCRIPT_ROOT / "host-runaway-process-exporter.py"
REMEDIATION_PATH = SCRIPT_ROOT / "host-runaway-process-remediation.py"
def load_exporter():
spec = importlib.util.spec_from_file_location("host_runaway_process_exporter", EXPORTER_PATH)
assert spec and spec.loader
module = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return module
def test_classifies_orphan_stockplatform_headless_group() -> None:
exporter = load_exporter()
rows = exporter.parse_ps_rows(
"""
100 1 100 100 7200 65.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-aa
101 100 100 100 7190 55.0 S chromium /opt/chrome/chromium --type=renderer /tmp/stockplatform-review-bulk-ux-aa
200 10 200 200 600 90.0 S node pnpm --filter @awoooi/web build
"""
)
groups = exporter.classify_groups(rows, min_age_seconds=1800, min_cpu_percent=50)
assert len(groups) == 1
assert groups[0].rule_id == "stockplatform_headless_smoke"
assert groups[0].pgid == 100
assert groups[0].orphan_reason == "ppid_1"
assert groups[0].cpu_percent == 120.0
assert len(groups[0].rows) == 2
def test_ignores_non_orphan_or_young_browser_processes() -> None:
exporter = load_exporter()
rows = exporter.parse_ps_rows(
"""
100 99 100 100 7200 65.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-aa
101 100 100 100 7190 55.0 S chromium /opt/chrome/chromium /tmp/stockplatform-review-bulk-ux-aa
300 1 300 300 60 120.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-bb
"""
)
assert exporter.classify_groups(rows, min_age_seconds=1800, min_cpu_percent=50) == []
def test_parses_bsd_elapsed_time_for_local_smoke() -> None:
exporter = load_exporter()
rows = exporter.parse_ps_rows(
"""
100 1 100 100 01:00:00 65.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-aa
101 100 100 100 2-00:00:10 55.0 S chromium /opt/chrome/chromium /tmp/stockplatform-review-bulk-ux-aa
"""
)
assert rows[0].etimes == 3600
assert rows[1].etimes == 172810
def test_renders_ci_load_and_swap_without_authorizing_repair(tmp_path: Path) -> None:
exporter = load_exporter()
groups = exporter.classify_groups(
exporter.parse_ps_rows(
"100 1 100 100 7200 65.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-aa"
),
min_age_seconds=1800,
min_cpu_percent=50,
)
metrics = exporter.render_metrics(
host="110",
groups=groups,
active_action_containers=3,
min_age_seconds=1800,
min_cpu_percent=50,
now=123,
load_ratio=1.25,
swap_ratio=1.0,
)
assert 'awoooi_host_runaway_process_monitor_up{host="110",mode="read_only"} 1' in metrics
assert 'awoooi_host_gitea_actions_active_container_count{host="110"} 3' in metrics
assert 'awoooi_host_swap_used_ratio{host="110"} 1.000000' in metrics
assert 'awoooi_host_runaway_process_remediation_authorized{host="110"} 0' in metrics
assert 'rule="stockplatform_headless_smoke"' in metrics
def test_remediation_defaults_to_dry_run(tmp_path: Path) -> None:
ps_file = tmp_path / "ps.txt"
ps_file.write_text(
"100 1 100 100 7200 65.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-aa\n",
encoding="utf-8",
)
result = subprocess.run(
[
sys.executable,
str(REMEDIATION_PATH),
"--ps-file",
str(ps_file),
"--rule",
"stockplatform_headless_smoke",
],
check=True,
capture_output=True,
text=True,
)
assert '"mode": "dry_run"' in result.stdout
assert '"runtime_gate": 0' in result.stdout
assert '"action": "dry_run"' in result.stdout
def test_remediation_refuses_apply_without_gates(tmp_path: Path) -> None:
ps_file = tmp_path / "ps.txt"
ps_file.write_text(
"100 1 100 100 7200 65.0 S chrome /opt/chrome/chrome --headless --user-data-dir=/tmp/stockplatform-review-bulk-ux-aa\n",
encoding="utf-8",
)
result = subprocess.run(
[
sys.executable,
str(REMEDIATION_PATH),
"--ps-file",
str(ps_file),
"--apply",
"--rule",
"stockplatform_headless_smoke",
],
capture_output=True,
text=True,
)
assert result.returncode != 0
assert "Refusing apply" in result.stderr