diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 1df28c05..42a97b58 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -12,13 +12,15 @@ ### 完成 - 新增 `ops/runner/gitea-act-runner-host.service`,讓 110 host-level `act_runner` 由 systemd 管理,不再依賴裸 `nohup` 程序。 -- 新增 `ops/runner/install-gitea-host-runner-service.sh`,會把 `/home/wooo/act-runner/config.yaml` 正規化為 `shutdown_timeout: 1h`、安裝 systemd service、停用 Docker-wrapped `gitea-runner` restart policy,且在 `GITEA-ACTIONS-TASK-*` 正在跑時拒絕重啟。 +- 新增 `ops/runner/gitea-act-runner-host.user.service`,讓沒有 sudo 的維運路徑也能落到 user-level systemd。 +- 新增 `ops/runner/install-gitea-host-runner-service.sh`,會把 `/home/wooo/act-runner/config.yaml` 正規化為 `shutdown_timeout: 1h`、安裝 system/user systemd service、停用 Docker-wrapped `gitea-runner` restart policy,且在 `GITEA-ACTIONS-TASK-*` 正在跑時拒絕重啟。 - `scripts/reboot-recovery/awoooi-startup-110.sh` 改為優先啟動 `gitea-act-runner-host.service`,並在 reboot recovery 時補上 `shutdown_timeout: 1h`。 - `ops/runner/README.md` 補第三層 runner 修復:graceful shutdown service 與 status mismatch 根因。 ### 驗證 - Live root cause:`act_runner generate-config` 顯示預設 `runner.shutdown_timeout: 0s`;110 config 當時未覆寫。 - Live deploy state:ArgoCD `Synced Healthy f72419dd`,`awoooi-api`/`awoooi-worker`/`awoooi-web` 均已使用 `b0da6da1` image。 +- Live hotfix:110 `/home/wooo/act-runner/config.yaml` 已套 `shutdown_timeout: 1h`,host runner 重新宣告 labels 成功。 ## 2026-05-01 | Agent Loop shadow structured metadata guard diff --git a/ops/runner/README.md b/ops/runner/README.md index cd3f3dd9..585d3b31 100644 --- a/ops/runner/README.md +++ b/ops/runner/README.md @@ -176,7 +176,8 @@ RESTART_NOW=1 bash ops/runner/install-gitea-host-runner-service.sh 此 script 會: - 更新 `/home/wooo/act-runner/config.yaml` 的 `shutdown_timeout: 1h` -- 安裝 `/etc/systemd/system/gitea-act-runner-host.service` +- 有 passwordless sudo 時安裝 `/etc/systemd/system/gitea-act-runner-host.service` +- 沒有 sudo 時 fallback 到 `~/.config/systemd/user/gitea-act-runner-host.service` - 停用 Docker-wrapped `gitea-runner` container 的 restart policy - 拒絕在 `GITEA-ACTIONS-TASK-*` container 正在跑時重啟 runner diff --git a/ops/runner/gitea-act-runner-host.user.service b/ops/runner/gitea-act-runner-host.user.service new file mode 100644 index 00000000..5bab0e68 --- /dev/null +++ b/ops/runner/gitea-act-runner-host.user.service @@ -0,0 +1,17 @@ +[Unit] +Description=Gitea Actions Host Runner (AWOOOI user service) +After=default.target + +[Service] +Type=simple +WorkingDirectory=/home/wooo/act-runner/data +Environment=HOME=/home/wooo +ExecStart=/home/wooo/act-runner/act_runner daemon --config /home/wooo/act-runner/config.yaml +Restart=always +RestartSec=15 +KillSignal=SIGINT +TimeoutStopSec=3700 +SuccessExitStatus=0 130 143 + +[Install] +WantedBy=default.target diff --git a/ops/runner/install-gitea-host-runner-service.sh b/ops/runner/install-gitea-host-runner-service.sh index 2613bc56..3f23c4f7 100755 --- a/ops/runner/install-gitea-host-runner-service.sh +++ b/ops/runner/install-gitea-host-runner-service.sh @@ -5,7 +5,9 @@ RUNNER_DIR="${RUNNER_DIR:-/home/wooo/act-runner}" RUNNER_USER="${RUNNER_USER:-wooo}" SERVICE_NAME="${SERVICE_NAME:-gitea-act-runner-host.service}" SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-1h}" -SERVICE_SRC="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/gitea-act-runner-host.service" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SYSTEM_SERVICE_SRC="${SCRIPT_DIR}/gitea-act-runner-host.service" +USER_SERVICE_SRC="${SCRIPT_DIR}/gitea-act-runner-host.user.service" CONFIG_FILE="${RUNNER_DIR}/config.yaml" if [ ! -x "${RUNNER_DIR}/act_runner" ]; then @@ -67,27 +69,70 @@ if not found: path.write_text("\n".join(updated) + "\n") PY -sudo install -o root -g root -m 0644 "${SERVICE_SRC}" "/etc/systemd/system/${SERVICE_NAME}" -sudo systemctl daemon-reload -sudo systemctl enable "${SERVICE_NAME}" >/dev/null - if docker ps --format '{{.Names}}' | grep -qx 'gitea-runner'; then echo "Disabling legacy docker-wrapped gitea-runner container" docker update --restart=no gitea-runner >/dev/null 2>&1 || true docker stop gitea-runner >/dev/null 2>&1 || true fi +install_system_service() { + sudo install -o root -g root -m 0644 "${SYSTEM_SERVICE_SRC}" "/etc/systemd/system/${SERVICE_NAME}" + sudo systemctl daemon-reload + sudo systemctl enable "${SERVICE_NAME}" >/dev/null +} + +install_user_service() { + local user_service_dir="${HOME}/.config/systemd/user" + mkdir -p "${user_service_dir}" + install -m 0644 "${USER_SERVICE_SRC}" "${user_service_dir}/${SERVICE_NAME}" + systemctl --user daemon-reload + systemctl --user enable "${SERVICE_NAME}" >/dev/null +} + +stop_existing_runner() { + if pgrep -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" >/dev/null; then + pkill -INT -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" || true + for _ in $(seq 1 60); do + pgrep -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" >/dev/null || return 0 + sleep 1 + done + echo "Existing act_runner daemon did not stop within 60s" >&2 + return 1 + fi +} + +if sudo -n true >/dev/null 2>&1; then + SERVICE_SCOPE="system" + install_system_service +else + SERVICE_SCOPE="user" + install_user_service +fi + if [ "${RESTART_NOW:-0}" = "1" ]; then if docker ps --format '{{.Names}}' | grep -q '^GITEA-ACTIONS-TASK-'; then echo "Refusing to restart: Gitea Actions task containers are running" >&2 exit 1 fi - sudo systemctl restart "${SERVICE_NAME}" + stop_existing_runner + if [ "${SERVICE_SCOPE}" = "system" ]; then + sudo systemctl restart "${SERVICE_NAME}" + else + systemctl --user restart "${SERVICE_NAME}" + fi elif pgrep -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" >/dev/null; then echo "Existing act_runner daemon is still running; service will take over after the next safe restart." else - sudo systemctl start "${SERVICE_NAME}" + if [ "${SERVICE_SCOPE}" = "system" ]; then + sudo systemctl start "${SERVICE_NAME}" + else + systemctl --user start "${SERVICE_NAME}" + fi fi -sudo systemctl --no-pager --full status "${SERVICE_NAME}" | sed -n '1,18p' +if [ "${SERVICE_SCOPE}" = "system" ]; then + sudo systemctl --no-pager --full status "${SERVICE_NAME}" | sed -n '1,18p' +else + systemctl --user --no-pager --full status "${SERVICE_NAME}" | sed -n '1,18p' +fi grep -n 'shutdown_timeout' "${CONFIG_FILE}"