fix(runner): recover disabled non110 service from keepalive
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 56s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 56s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
This commit is contained in:
@@ -15,6 +15,7 @@ RUNNER_DOCKER_IMAGES="${RUNNER_DOCKER_IMAGES:-gitea/act_runner:latest}"
|
||||
RUNNER_REGISTRATION_PATHS="${RUNNER_REGISTRATION_PATHS:-${RUNNER_HOME}/awoooi-non110-runner/data/.runner ${RUNNER_HOME}/awoooi-non110-runner/.runner ${RUNNER_HOME}/act-runner-awoooi/.runner /home/wooo/act-runner-awoooi/.runner /home/wooo/awoooi-act-runner/.runner /home/wooo/awoooi-non110-runner/.runner /home/wooo/act-runner/.runner}"
|
||||
RUNNER_SERVICE_NAMES="${RUNNER_SERVICE_NAMES:-awoooi-non110-runner.service gitea-act-runner-awoooi.service gitea-act-runner-host.service}"
|
||||
RUNNER_AUTOSTART_PATH_UNIT_NAMES="${RUNNER_AUTOSTART_PATH_UNIT_NAMES:-awoooi-non110-runner-autostart.path}"
|
||||
RUNNER_KEEPALIVE_SERVICE_UNIT_NAMES="${RUNNER_KEEPALIVE_SERVICE_UNIT_NAMES:-awoooi-non110-runner-keepalive.service}"
|
||||
RUNNER_KEEPALIVE_TIMER_UNIT_NAMES="${RUNNER_KEEPALIVE_TIMER_UNIT_NAMES:-awoooi-non110-runner-keepalive.timer}"
|
||||
ALLOWED_RUNNER_CONTAINER_NAMES="${ALLOWED_RUNNER_CONTAINER_NAMES:-awoooi-non110-runner stockplatform-ubuntu-runner}"
|
||||
ALLOWED_LABEL_NAMES="${ALLOWED_LABEL_NAMES:-awoooi-non110-host awoooi-non110-ubuntu awoooi-host awoooi-ubuntu}"
|
||||
@@ -33,6 +34,7 @@ READY_REGISTRATION_COUNT=0
|
||||
READY_SERVICE_COUNT=0
|
||||
READY_ACTIVE_SERVICE_COUNT=0
|
||||
READY_AUTOSTART_PATH_COUNT=0
|
||||
READY_KEEPALIVE_SERVICE_COUNT=0
|
||||
READY_KEEPALIVE_TIMER_COUNT=0
|
||||
|
||||
section() {
|
||||
@@ -85,12 +87,12 @@ systemd_cat() {
|
||||
systemd_show() {
|
||||
local unit="$1"
|
||||
local out
|
||||
if out="$(systemctl show "$unit" -p LoadState -p ActiveState -p UnitFileState -p MainPID --no-pager 2>/dev/null)" \
|
||||
if out="$(systemctl show "$unit" -p LoadState -p ActiveState -p UnitFileState -p MainPID -p Result -p ExecMainStatus --no-pager 2>/dev/null)" \
|
||||
&& ! grep -q '^LoadState=not-found$' <<<"$out"; then
|
||||
printf '%s\n' "$out"
|
||||
return 0
|
||||
fi
|
||||
if out="$(systemctl --user show "$unit" -p LoadState -p ActiveState -p UnitFileState -p MainPID --no-pager 2>/dev/null)" \
|
||||
if out="$(systemctl --user show "$unit" -p LoadState -p ActiveState -p UnitFileState -p MainPID -p Result -p ExecMainStatus --no-pager 2>/dev/null)" \
|
||||
&& ! grep -q '^LoadState=not-found$' <<<"$out"; then
|
||||
printf '%s\n' "$out"
|
||||
return 0
|
||||
@@ -446,6 +448,32 @@ check_autostart_paths() {
|
||||
}
|
||||
|
||||
check_keepalive_timers() {
|
||||
section "runner keepalive service metadata"
|
||||
local service_unit service_text service_state
|
||||
for service_unit in $RUNNER_KEEPALIVE_SERVICE_UNIT_NAMES; do
|
||||
if ! service_text="$(systemd_cat "$service_unit" 2>/dev/null)"; then
|
||||
printf 'RUNNER_KEEPALIVE_SERVICE unit=%s installed=0\n' "$service_unit"
|
||||
continue
|
||||
fi
|
||||
service_state="$(systemd_show "$service_unit" | tr '\n' ' ' || true)"
|
||||
if grep -q 'LoadState=not-found' <<<"$service_state"; then
|
||||
printf 'RUNNER_KEEPALIVE_SERVICE unit=%s installed=0\n' "$service_unit"
|
||||
continue
|
||||
fi
|
||||
printf 'RUNNER_KEEPALIVE_SERVICE unit=%s installed=1 %s\n' "$service_unit" "$service_state"
|
||||
if grep -q 'ActiveState=failed' <<<"$service_state" \
|
||||
|| grep -Eq 'Result=(exit-code|signal|timeout|core-dump)' <<<"$service_state"; then
|
||||
blocker "runner_keepalive_service_failed:${service_unit}"
|
||||
continue
|
||||
fi
|
||||
if grep -Eq '^[[:space:]]*ExecStart=-/usr/bin/systemctl --user reset-failed ' <<<"$service_text" \
|
||||
&& grep -Eq '^[[:space:]]*ExecStart=/usr/bin/systemctl --user daemon-reload' <<<"$service_text"; then
|
||||
READY_KEEPALIVE_SERVICE_COUNT=$((READY_KEEPALIVE_SERVICE_COUNT + 1))
|
||||
else
|
||||
blocker "runner_keepalive_service_recovery_steps_missing:${service_unit}"
|
||||
fi
|
||||
done
|
||||
|
||||
section "runner keepalive metadata"
|
||||
local unit text state active enabled interval
|
||||
for unit in $RUNNER_KEEPALIVE_TIMER_UNIT_NAMES; do
|
||||
@@ -477,6 +505,11 @@ check_keepalive_timers() {
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$READY_REGISTRATION_COUNT" -gt 0 ] \
|
||||
&& [ "$READY_SERVICE_COUNT" -gt 0 ] \
|
||||
&& [ "$READY_KEEPALIVE_SERVICE_COUNT" -eq 0 ]; then
|
||||
blocker "runner_keepalive_service_not_ready"
|
||||
fi
|
||||
if [ "$READY_REGISTRATION_COUNT" -gt 0 ] \
|
||||
&& [ "$READY_SERVICE_COUNT" -gt 0 ] \
|
||||
&& [ "$READY_KEEPALIVE_TIMER_COUNT" -eq 0 ]; then
|
||||
@@ -543,6 +576,7 @@ main() {
|
||||
printf 'READY_SERVICE_COUNT=%s\n' "$READY_SERVICE_COUNT"
|
||||
printf 'READY_ACTIVE_SERVICE_COUNT=%s\n' "$READY_ACTIVE_SERVICE_COUNT"
|
||||
printf 'READY_AUTOSTART_PATH_COUNT=%s\n' "$READY_AUTOSTART_PATH_COUNT"
|
||||
printf 'READY_KEEPALIVE_SERVICE_COUNT=%s\n' "$READY_KEEPALIVE_SERVICE_COUNT"
|
||||
printf 'READY_KEEPALIVE_TIMER_COUNT=%s\n' "$READY_KEEPALIVE_TIMER_COUNT"
|
||||
printf 'WARNING_COUNT=%s\n' "${#WARNINGS[@]}"
|
||||
printf 'BLOCKER_COUNT=%s\n' "${#BLOCKERS[@]}"
|
||||
|
||||
@@ -18,7 +18,7 @@ AUTOSTART_SERVICE_NAME="${AUTOSTART_SERVICE_NAME:-awoooi-non110-runner-autostart
|
||||
AUTOSTART_PATH_NAME="${AUTOSTART_PATH_NAME:-awoooi-non110-runner-autostart.path}"
|
||||
KEEPALIVE_SERVICE_NAME="${KEEPALIVE_SERVICE_NAME:-awoooi-non110-runner-keepalive.service}"
|
||||
KEEPALIVE_TIMER_NAME="${KEEPALIVE_TIMER_NAME:-awoooi-non110-runner-keepalive.timer}"
|
||||
KEEPALIVE_INTERVAL_SECONDS="${KEEPALIVE_INTERVAL_SECONDS:-60}"
|
||||
KEEPALIVE_INTERVAL_SECONDS="${KEEPALIVE_INTERVAL_SECONDS:-15}"
|
||||
USER_SERVICE_DIR="${USER_SERVICE_DIR:-${RUNNER_HOME}/.config/systemd/user}"
|
||||
RUNNER_LABELS="${RUNNER_LABELS:-awoooi-non110-host:host,awoooi-non110-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04}"
|
||||
WRITE_CONFIG_IF_MISSING="${WRITE_CONFIG_IF_MISSING:-1}"
|
||||
@@ -245,7 +245,8 @@ Type=oneshot
|
||||
ExecStart=/usr/bin/test -x ${RUNNER_BINARY}
|
||||
ExecStart=/usr/bin/test -s ${RUNNER_CONFIG}
|
||||
ExecStart=/usr/bin/test -s ${RUNNER_REGISTRATION}
|
||||
ExecStart=/usr/bin/systemctl --user reset-failed ${SERVICE_NAME}
|
||||
ExecStart=/usr/bin/systemctl --user daemon-reload
|
||||
ExecStart=-/usr/bin/systemctl --user reset-failed ${SERVICE_NAME}
|
||||
ExecStart=/usr/bin/systemctl --user enable ${SERVICE_NAME}
|
||||
ExecStart=/usr/bin/systemctl --user start ${SERVICE_NAME}
|
||||
RemainAfterExit=no
|
||||
|
||||
@@ -93,6 +93,25 @@ WantedBy=timers.target
|
||||
)
|
||||
|
||||
|
||||
def _write_keepalive_service(path: Path) -> None:
|
||||
path.write_text(
|
||||
"""
|
||||
[Unit]
|
||||
Description=Keep AWOOOI non-110 runner active while enable sentinel exists
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/bin/systemctl --user daemon-reload
|
||||
ExecStart=-/usr/bin/systemctl --user reset-failed awoooi-non110-runner.service
|
||||
ExecStart=/usr/bin/systemctl --user enable awoooi-non110-runner.service
|
||||
ExecStart=/usr/bin/systemctl --user start awoooi-non110-runner.service
|
||||
RemainAfterExit=no
|
||||
""".strip()
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _run_verifier(
|
||||
tmp_path: Path,
|
||||
registration_path: Path,
|
||||
@@ -101,6 +120,7 @@ def _run_verifier(
|
||||
unit_target_matches: bool = True,
|
||||
unmanaged_runner_container: bool = False,
|
||||
keepalive_timer: bool = True,
|
||||
keepalive_service_failed: bool = False,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
fake_bin = tmp_path / "bin"
|
||||
unit_dir = tmp_path / "units"
|
||||
@@ -127,6 +147,10 @@ case "$cmd" in
|
||||
printf 'LoadState=loaded\\nActiveState=active\\nUnitFileState=enabled\\nMainPID=0\\n'
|
||||
exit 0
|
||||
fi
|
||||
if [[ "$unit" == *keepalive.service ]]; then
|
||||
printf 'LoadState=loaded\\nActiveState={"failed" if keepalive_service_failed else "inactive"}\\nUnitFileState=static\\nMainPID=0\\nResult={"exit-code" if keepalive_service_failed else "success"}\\nExecMainStatus={"1" if keepalive_service_failed else "0"}\\n'
|
||||
exit 0
|
||||
fi
|
||||
printf 'LoadState=loaded\\nActiveState={"active" if active_service else "inactive"}\\nUnitFileState=disabled\\nMainPID={"1234" if active_service else "0"}\\n'
|
||||
exit 0
|
||||
fi
|
||||
@@ -208,6 +232,7 @@ exit 1
|
||||
registration_path,
|
||||
)
|
||||
if keepalive_timer:
|
||||
_write_keepalive_service(unit_dir / "awoooi-non110-runner-keepalive.service")
|
||||
_write_keepalive_timer(unit_dir / "awoooi-non110-runner-keepalive.timer")
|
||||
|
||||
env = {
|
||||
@@ -219,6 +244,7 @@ exit 1
|
||||
"RUNNER_BINARY_PATHS": str(binary_path),
|
||||
"RUNNER_SERVICE_NAMES": "awoooi-non110-runner.service",
|
||||
"RUNNER_AUTOSTART_PATH_UNIT_NAMES": "awoooi-non110-runner-autostart.path",
|
||||
"RUNNER_KEEPALIVE_SERVICE_UNIT_NAMES": "awoooi-non110-runner-keepalive.service",
|
||||
"RUNNER_KEEPALIVE_TIMER_UNIT_NAMES": "awoooi-non110-runner-keepalive.timer",
|
||||
"ROLLBACK_UNIT_NAMES": "awoooi-non110-runner-rollback.service",
|
||||
"RUNNER_REGISTRATION_PATHS": str(registration_path),
|
||||
@@ -252,6 +278,7 @@ def test_non110_readiness_blocks_without_registration_state(tmp_path: Path) -> N
|
||||
assert "READY_AUTOSTART_PATH_COUNT=1" in result.stdout
|
||||
assert "RUNNER_KEEPALIVE_TIMER unit=awoooi-non110-runner-keepalive.timer installed=1" in result.stdout
|
||||
assert "BLOCKER runner_keepalive_timer_not_ready" not in result.stdout
|
||||
assert "BLOCKER runner_keepalive_service_not_ready" not in result.stdout
|
||||
|
||||
|
||||
def test_non110_readiness_accepts_registration_state_presence_without_reading_it(
|
||||
@@ -265,12 +292,29 @@ def test_non110_readiness_accepts_registration_state_presence_without_reading_it
|
||||
assert "present=1" in result.stdout
|
||||
assert "content_read=false" in result.stdout
|
||||
assert "registration_condition=1" in result.stdout
|
||||
assert "RUNNER_KEEPALIVE_SERVICE unit=awoooi-non110-runner-keepalive.service installed=1" in result.stdout
|
||||
assert "READY_KEEPALIVE_SERVICE_COUNT=1" in result.stdout
|
||||
assert "RUNNER_KEEPALIVE_TIMER unit=awoooi-non110-runner-keepalive.timer installed=1" in result.stdout
|
||||
assert "READY_KEEPALIVE_TIMER_COUNT=1" in result.stdout
|
||||
assert "secret-token-like-content" not in result.stdout
|
||||
assert "AWOOOI_NON110_RUNNER_READY=1" in result.stdout
|
||||
|
||||
|
||||
def test_non110_readiness_blocks_failed_keepalive_service(tmp_path: Path) -> None:
|
||||
registration_path = tmp_path / ".runner"
|
||||
registration_path.write_text("secret-token-like-content-not-printed\n", encoding="utf-8")
|
||||
result = _run_verifier(
|
||||
tmp_path,
|
||||
registration_path,
|
||||
keepalive_service_failed=True,
|
||||
)
|
||||
assert result.returncode == 1
|
||||
assert "BLOCKER runner_keepalive_service_failed:awoooi-non110-runner-keepalive.service" in result.stdout
|
||||
assert "BLOCKER runner_keepalive_service_not_ready" in result.stdout
|
||||
assert "secret-token-like-content" not in result.stdout
|
||||
assert "AWOOOI_NON110_RUNNER_READY=0" in result.stdout
|
||||
|
||||
|
||||
def test_non110_readiness_blocks_registered_runner_without_keepalive_timer(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
|
||||
@@ -96,12 +96,19 @@ def test_apply_with_existing_registration_does_not_start_runner(tmp_path: Path)
|
||||
|
||||
unit_dir = tmp_path / "home/.config/systemd/user"
|
||||
autostart = unit_dir / "awoooi-non110-runner-autostart.service"
|
||||
keepalive_service = unit_dir / "awoooi-non110-runner-keepalive.service"
|
||||
keepalive_timer = unit_dir / "awoooi-non110-runner-keepalive.timer"
|
||||
assert "ConditionPathExists=!" in autostart.read_text(encoding="utf-8")
|
||||
assert "enable --now awoooi-non110-runner-keepalive.timer" in autostart.read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
assert "OnUnitInactiveSec=60s" in keepalive_timer.read_text(encoding="utf-8")
|
||||
keepalive_service_text = keepalive_service.read_text(encoding="utf-8")
|
||||
assert "ExecStart=/usr/bin/systemctl --user daemon-reload" in keepalive_service_text
|
||||
assert (
|
||||
"ExecStart=-/usr/bin/systemctl --user reset-failed awoooi-non110-runner.service"
|
||||
in keepalive_service_text
|
||||
)
|
||||
assert "OnUnitInactiveSec=15s" in keepalive_timer.read_text(encoding="utf-8")
|
||||
|
||||
log = (tmp_path / "systemctl.log").read_text(encoding="utf-8")
|
||||
assert "enable --now awoooi-non110-runner-autostart.path" not in log
|
||||
|
||||
Reference in New Issue
Block a user