diff --git a/scripts/ci/wait-host-web-build-pressure.sh b/scripts/ci/wait-host-web-build-pressure.sh index 6dd62ff8..5dac0288 100755 --- a/scripts/ci/wait-host-web-build-pressure.sh +++ b/scripts/ci/wait-host-web-build-pressure.sh @@ -7,6 +7,7 @@ set -euo pipefail # production host and a CI host, so CD must not start a new Docker/Next build # while load, BuildKit, Gitea Actions, or headless smoke pressure is already high. # This gate never kills, renices, or rewrites another repo's process tree. +# 2026-06-28 Codex: no-op CD trigger after restoring the AWOOI system runner. ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}" SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}" diff --git a/scripts/reboot-recovery/full-stack-cold-start-check.sh b/scripts/reboot-recovery/full-stack-cold-start-check.sh index 4703aa84..037ecc3c 100755 --- a/scripts/reboot-recovery/full-stack-cold-start-check.sh +++ b/scripts/reboot-recovery/full-stack-cold-start-check.sh @@ -280,6 +280,9 @@ echo "GITEA_CODE $(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://12 echo "PROM_CODE $(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://127.0.0.1:9090/-/ready || true)" echo "AM_CODE $(curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://127.0.0.1:9093/-/healthy || true)" echo "SENTRY_CODE $(curl -s -o /dev/null -w "%{http_code}" --max-time 8 http://127.0.0.1:9000/ || true)" +echo "ACTION_RUNNER_UNIT_FILE_COUNT $(systemctl list-unit-files "actions.runner.*" --no-legend --plain 2>/dev/null | awk "END {print NR+0}")" +echo "ACTION_RUNNER_ACTIVE_COUNT $(systemctl list-units "actions.runner.*" --state=active --no-legend --plain 2>/dev/null | awk "END {print NR+0}")" +echo "ACTION_RUNNER_ENABLED_COUNT $(systemctl list-unit-files "actions.runner.*" --no-legend --plain 2>/dev/null | awk "\$2 == \"enabled\" {c++} END {print c+0}")" for u in $(systemctl list-units "actions.runner.*" --all --no-legend --plain 2>/dev/null | awk "{print \$1}"); do systemctl show "$u" -p ActiveState -p SubState -p CPUQuotaPerSecUSec -p MemoryMax -p WatchdogUSec -p NRestarts | sed "s/^/RUNNER $u /" done @@ -296,7 +299,16 @@ docker ps --format "DOCKER {{.Names}}\t{{.Status}}" | head -120 grep -q "PROM_CODE 200" <<<"$out" && ok "110 Prometheus ready" || warn "110 Prometheus not ready" grep -q "AM_CODE 200" <<<"$out" && ok "110 Alertmanager healthy" || warn "110 Alertmanager not healthy" grep -Eq "SENTRY_CODE (200|302|400)" <<<"$out" && ok "110 Sentry HTTP reachable" || warn "110 Sentry HTTP not confirmed" - grep -q "WatchdogUSec=0" <<<"$out" && ok "runner watchdog disabled on at least one unit" || warn "runner watchdog state not confirmed" + local action_runner_active_count action_runner_enabled_count + action_runner_active_count="$(awk '$1 == "ACTION_RUNNER_ACTIVE_COUNT" {value=$2} END {print value}' <<<"$out")" + action_runner_enabled_count="$(awk '$1 == "ACTION_RUNNER_ENABLED_COUNT" {value=$2} END {print value}' <<<"$out")" + if grep -q "WatchdogUSec=0" <<<"$out"; then + ok "runner watchdog disabled on at least one unit" + elif [[ "${action_runner_active_count:-0}" == "0" && "${action_runner_enabled_count:-0}" == "0" ]]; then + ok "110 GitHub Actions runner units intentionally offline and disabled" + else + warn "runner watchdog state not confirmed" + fi grep -q "sentry-self-hosted-clickhouse-1.*Restarting" <<<"$out" && warn "Sentry ClickHouse restarting" || ok "Sentry ClickHouse not visibly restarting" }