Some checks failed
CD Pipeline / tests (push) Failing after 42s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Successful in 14s
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
187 lines
6.8 KiB
Bash
Executable File
187 lines
6.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# 2026-05-21 Codex: protect the shared 110 host runner from overlapping
|
|
# host-side frontend production builds launched by other repositories.
|
|
# 2026-06-27 Codex: make the gate enforce real host pressure too. 110 is both a
|
|
# production host and a CI host, so CD must not start a new Docker/Next build
|
|
# while load, BuildKit, Gitea Actions, or headless smoke pressure is already high.
|
|
# This gate never kills, renices, or rewrites another repo's process tree.
|
|
|
|
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}"
|
|
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}"
|
|
WARN_ONLY="${HOST_WEB_BUILD_PRESSURE_WARN_ONLY:-0}"
|
|
MAX_LOAD5_PER_CORE="${HOST_WEB_BUILD_PRESSURE_MAX_LOAD5_PER_CORE:-0.85}"
|
|
MAX_CI_CPU_PERCENT="${HOST_WEB_BUILD_PRESSURE_MAX_CI_CPU_PERCENT:-250}"
|
|
# One Gitea Actions task container/process group is the current job itself.
|
|
# Block only when there is additional CI/BuildKit pressure unless explicitly
|
|
# tightened by the runner environment.
|
|
MAX_ACTIVE_CI_PROCESS_GROUPS="${HOST_WEB_BUILD_PRESSURE_MAX_ACTIVE_CI_PROCESS_GROUPS:-1}"
|
|
MAX_ACTIVE_CI_CONTAINERS="${HOST_WEB_BUILD_PRESSURE_MAX_ACTIVE_CI_CONTAINERS:-1}"
|
|
MAX_ORPHAN_BROWSER_GROUPS="${HOST_WEB_BUILD_PRESSURE_MAX_ORPHAN_BROWSER_GROUPS:-0}"
|
|
METRICS_FILE="${HOST_RUNAWAY_PROCESS_METRICS_FILE:-${HOST_WEB_BUILD_PRESSURE_METRICS_FILE:-/home/wooo/node_exporter_textfiles/host_runaway_process.prom}}"
|
|
EXPORTER="${HOST_RUNAWAY_PROCESS_EXPORTER:-/home/wooo/scripts/host-runaway-process-exporter.py}"
|
|
|
|
default_ps_command() {
|
|
if ps -eo pid=,ppid=,pcpu=,pmem=,args= --sort=-pcpu >/dev/null 2>&1; then
|
|
printf '%s\n' "ps -eo pid=,ppid=,pcpu=,pmem=,args= --sort=-pcpu"
|
|
return
|
|
fi
|
|
printf '%s\n' "ps -axo pid=,ppid=,pcpu=,pmem=,command="
|
|
}
|
|
|
|
PS_COMMAND="${HOST_WEB_BUILD_PRESSURE_PS_COMMAND:-$(default_ps_command)}"
|
|
|
|
list_foreign_web_builds() {
|
|
bash -c "$PS_COMMAND" | awk '
|
|
BEGIN { IGNORECASE = 1 }
|
|
/[n]ext[\/[:alnum:]._-]*[[:space:]]+build|[t]urbo[[:space:]]+build|[v]ite[[:space:]]+build/ {
|
|
if ($0 ~ /\/workspace\/wooo\/awoooi/) next
|
|
if ($0 ~ /\/Users\/ogt\/awoooi/) next
|
|
if ($0 ~ /\/private\/tmp\/awoooi/) next
|
|
if ($0 ~ /\/app\/apps\/web/) next
|
|
if ($0 ~ /scripts\/ci\/wait-host-web-build-pressure\.sh/) next
|
|
print
|
|
}
|
|
'
|
|
}
|
|
|
|
refresh_metrics() {
|
|
if [ -x "$EXPORTER" ]; then
|
|
AIOPS_HOST_LABEL="${AIOPS_HOST_LABEL:-110}" \
|
|
NODE_EXPORTER_TEXTFILE_DIR="${NODE_EXPORTER_TEXTFILE_DIR:-/home/wooo/node_exporter_textfiles}" \
|
|
AIOPS_RUNAWAY_PROCESS_MIN_AGE_SECONDS="${AIOPS_RUNAWAY_PROCESS_MIN_AGE_SECONDS:-1800}" \
|
|
AIOPS_RUNAWAY_PROCESS_MIN_CPU_PERCENT="${AIOPS_RUNAWAY_PROCESS_MIN_CPU_PERCENT:-50}" \
|
|
"$EXPORTER" >/dev/null 2>&1 || true
|
|
fi
|
|
}
|
|
|
|
metric_value() {
|
|
local name="$1"
|
|
if [ ! -r "$METRICS_FILE" ]; then
|
|
return 1
|
|
fi
|
|
awk -v metric="$name" '
|
|
$1 ~ ("^" metric "(\\{|$)") {
|
|
value = $NF
|
|
}
|
|
END {
|
|
if (value != "") print value
|
|
else exit 1
|
|
}
|
|
' "$METRICS_FILE"
|
|
}
|
|
|
|
metric_sum() {
|
|
local name="$1"
|
|
if [ ! -r "$METRICS_FILE" ]; then
|
|
return 1
|
|
fi
|
|
awk -v metric="$name" '
|
|
$1 ~ ("^" metric "(\\{|$)") {
|
|
sum += $NF
|
|
found = 1
|
|
}
|
|
END {
|
|
if (found) print sum
|
|
else exit 1
|
|
}
|
|
' "$METRICS_FILE"
|
|
}
|
|
|
|
load5_per_core() {
|
|
metric_value "awoooi_host_load5_per_core" 2>/dev/null || awk '
|
|
BEGIN {
|
|
cores = 0
|
|
while ((getline line < "/proc/cpuinfo") > 0) {
|
|
if (line ~ /^processor[[:space:]]*:/) cores += 1
|
|
}
|
|
close("/proc/cpuinfo")
|
|
if (cores < 1) cores = 1
|
|
if ((getline loadline < "/proc/loadavg") <= 0) exit 1
|
|
split(loadline, parts, " ")
|
|
printf "%.6f\n", parts[2] / cores
|
|
}
|
|
'
|
|
}
|
|
|
|
greater_than() {
|
|
awk -v left="$1" -v right="$2" 'BEGIN { exit !(left > right) }'
|
|
}
|
|
|
|
list_headless_smoke_pressure() {
|
|
bash -c "$PS_COMMAND" | awk '
|
|
BEGIN { IGNORECASE = 1 }
|
|
/[c]hrome.*\/tmp\/stockplatform|[s]tockplatform-[[:alnum:]_-]*smoke|[h]eadless=new/ {
|
|
if ($0 ~ /scripts\/ci\/wait-host-web-build-pressure\.sh/) next
|
|
print
|
|
}
|
|
'
|
|
}
|
|
|
|
pressure_report() {
|
|
local report=""
|
|
local load_ratio active_ci_cpu active_ci_groups active_ci_containers orphan_groups
|
|
load_ratio="$(load5_per_core 2>/dev/null || echo 0)"
|
|
active_ci_cpu="$(metric_value "awoooi_host_gitea_actions_active_process_cpu_percent" 2>/dev/null || echo 0)"
|
|
active_ci_groups="$(metric_value "awoooi_host_gitea_actions_active_process_group_count" 2>/dev/null || echo 0)"
|
|
active_ci_containers="$(metric_value "awoooi_host_gitea_actions_active_container_count" 2>/dev/null || echo 0)"
|
|
orphan_groups="$(
|
|
metric_sum "awoooi_host_runaway_browser_orphan_group_count" 2>/dev/null \
|
|
|| metric_sum "awoooi_host_orphan_browser_group_count" 2>/dev/null \
|
|
|| echo 0
|
|
)"
|
|
|
|
if greater_than "$load_ratio" "$MAX_LOAD5_PER_CORE"; then
|
|
report="${report}host load5/core ${load_ratio} > ${MAX_LOAD5_PER_CORE}"$'\n'
|
|
fi
|
|
if greater_than "$active_ci_cpu" "$MAX_CI_CPU_PERCENT"; then
|
|
report="${report}active CI/BuildKit CPU ${active_ci_cpu}% > ${MAX_CI_CPU_PERCENT}%"$'\n'
|
|
fi
|
|
if greater_than "$active_ci_groups" "$MAX_ACTIVE_CI_PROCESS_GROUPS"; then
|
|
report="${report}active CI/BuildKit process groups ${active_ci_groups} > ${MAX_ACTIVE_CI_PROCESS_GROUPS}"$'\n'
|
|
fi
|
|
if greater_than "$active_ci_containers" "$MAX_ACTIVE_CI_CONTAINERS"; then
|
|
report="${report}active Gitea Actions containers ${active_ci_containers} > ${MAX_ACTIVE_CI_CONTAINERS}"$'\n'
|
|
fi
|
|
if greater_than "$orphan_groups" "$MAX_ORPHAN_BROWSER_GROUPS"; then
|
|
report="${report}orphan browser/smoke groups ${orphan_groups} > ${MAX_ORPHAN_BROWSER_GROUPS}"$'\n'
|
|
fi
|
|
|
|
local smoke_pressure
|
|
smoke_pressure="$(list_headless_smoke_pressure || true)"
|
|
if [ -n "$smoke_pressure" ]; then
|
|
report="${report}active headless smoke pressure detected"$'\n'"$(printf '%s\n' "$smoke_pressure" | head -n 6)"$'\n'
|
|
fi
|
|
|
|
printf '%s' "$report"
|
|
}
|
|
|
|
for attempt in $(seq 1 "$ATTEMPTS"); do
|
|
refresh_metrics
|
|
active_builds="$(list_foreign_web_builds || true)"
|
|
host_pressure="$(pressure_report || true)"
|
|
if [ -z "$active_builds" ] && [ -z "$host_pressure" ]; then
|
|
echo "✅ no host web/build/smoke pressure detected"
|
|
exit 0
|
|
fi
|
|
|
|
echo "⏳ host web/build/smoke pressure detected (attempt ${attempt}/${ATTEMPTS}); waiting ${SLEEP_SECONDS}s"
|
|
if [ -n "$host_pressure" ]; then
|
|
printf '%s\n' "$host_pressure" | sed -n '1,12p'
|
|
fi
|
|
if [ -n "$active_builds" ]; then
|
|
printf '%s\n' "$active_builds" | head -n 8
|
|
fi
|
|
sleep "$SLEEP_SECONDS"
|
|
done
|
|
|
|
echo "⚠️ host web/build/smoke pressure still active after ${ATTEMPTS} checks"
|
|
if [ "$WARN_ONLY" = "1" ]; then
|
|
echo "⚠️ continuing to avoid a stuck deploy; see ops/runner/README.md for the runner isolation plan"
|
|
exit 0
|
|
fi
|
|
|
|
echo "❌ refusing to start AWOOI image build while host web/build/smoke pressure is still active"
|
|
exit 1
|