Files
awoooi/scripts/ops/apply-runner-systemd-guardrails.sh
Your Name 34d1c76be9
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
Code Review / ai-code-review (push) Has been cancelled
fix(ops): route systemd runner baseline alerts
2026-05-05 14:19:58 +08:00

71 lines
1.9 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# 2026-05-05 ogt + Codex
# Apply resource guardrails to 110 self-hosted runner systemd services.
#
# This script intentionally requires sudo. Run it from 110 when the operator
# approves host-level systemd changes:
#
# bash scripts/ops/apply-runner-systemd-guardrails.sh --apply
#
# Without --apply it prints the exact changes and exits.
APPLY=0
if [[ "${1:-}" == "--apply" ]]; then
APPLY=1
fi
RUNNER_UNITS=(
actions.runner.owenhytsai-awoooi.awoooi-110.service
actions.runner.owenhytsai-awoooi.awoooi-110-3.service
actions.runner.owenhytsai-wooo-aiops.wooo-110-runner-2.service
actions.runner.owenhytsai-wooo-aiops.wooo-110-runner-3.service
actions.runner.owenhytsai-wooo-aiops.wooo-runner-110.service
)
WATCHDOG_UNIT="actions.runner.owenhytsai-awoooi.awoooi-110.service"
WATCHDOG_DROPIN="/etc/systemd/system/${WATCHDOG_UNIT}.d/watchdog.conf"
WATCHDOG_BACKUP="${WATCHDOG_DROPIN}.disabled-20260505"
echo "Runner systemd guardrail plan:"
echo "- Disable bad watchdog drop-in: ${WATCHDOG_DROPIN}"
echo "- Set CPUAccounting=yes, CPUQuota=200%, MemoryAccounting=yes, MemoryMax=2G"
echo "- Restart only ${WATCHDOG_UNIT}; other runner units pick up quotas on next restart"
if [[ "$APPLY" != "1" ]]; then
echo
echo "Dry run only. Re-run with --apply to make changes."
exit 0
fi
if [[ -f "$WATCHDOG_DROPIN" ]]; then
sudo mv "$WATCHDOG_DROPIN" "$WATCHDOG_BACKUP"
fi
for unit in "${RUNNER_UNITS[@]}"; do
sudo mkdir -p "/etc/systemd/system/${unit}.d"
sudo tee "/etc/systemd/system/${unit}.d/resource-guard.conf" >/dev/null <<'EOF'
[Service]
CPUAccounting=yes
CPUQuota=200%
MemoryAccounting=yes
MemoryMax=2G
EOF
done
sudo systemctl daemon-reload
sudo systemctl restart "$WATCHDOG_UNIT"
for unit in "${RUNNER_UNITS[@]}"; do
echo "=== ${unit} ==="
systemctl show "$unit" \
-p WatchdogUSec \
-p NRestarts \
-p DropInPaths \
-p CPUQuotaPerSecUSec \
-p MemoryMax \
-p ActiveState \
-p SubState
done