fix(ci): allow user service for gitea host runner
Some checks failed
Code Review / ai-code-review (push) Has been cancelled

This commit is contained in:
Your Name
2026-05-01 16:24:45 +08:00
parent cb5ab900c4
commit bc295eaec2
4 changed files with 75 additions and 10 deletions

View File

@@ -12,13 +12,15 @@
### 完成
- 新增 `ops/runner/gitea-act-runner-host.service`,讓 110 host-level `act_runner` 由 systemd 管理,不再依賴裸 `nohup` 程序。
- 新增 `ops/runner/install-gitea-host-runner-service.sh`,會把 `/home/wooo/act-runner/config.yaml` 正規化為 `shutdown_timeout: 1h`、安裝 systemd service、停用 Docker-wrapped `gitea-runner` restart policy且在 `GITEA-ACTIONS-TASK-*` 正在跑時拒絕重啟
- 新增 `ops/runner/gitea-act-runner-host.user.service`,讓沒有 sudo 的維運路徑也能落到 user-level systemd
- 新增 `ops/runner/install-gitea-host-runner-service.sh`,會把 `/home/wooo/act-runner/config.yaml` 正規化為 `shutdown_timeout: 1h`、安裝 system/user systemd service、停用 Docker-wrapped `gitea-runner` restart policy且在 `GITEA-ACTIONS-TASK-*` 正在跑時拒絕重啟。
- `scripts/reboot-recovery/awoooi-startup-110.sh` 改為優先啟動 `gitea-act-runner-host.service`,並在 reboot recovery 時補上 `shutdown_timeout: 1h`
- `ops/runner/README.md` 補第三層 runner 修復graceful shutdown service 與 status mismatch 根因。
### 驗證
- Live root cause`act_runner generate-config` 顯示預設 `runner.shutdown_timeout: 0s`110 config 當時未覆寫。
- Live deploy stateArgoCD `Synced Healthy f72419dd``awoooi-api`/`awoooi-worker`/`awoooi-web` 均已使用 `b0da6da1` image。
- Live hotfix110 `/home/wooo/act-runner/config.yaml` 已套 `shutdown_timeout: 1h`host runner 重新宣告 labels 成功。
## 2026-05-01 | Agent Loop shadow structured metadata guard

View File

@@ -176,7 +176,8 @@ RESTART_NOW=1 bash ops/runner/install-gitea-host-runner-service.sh
此 script 會:
- 更新 `/home/wooo/act-runner/config.yaml``shutdown_timeout: 1h`
- 安裝 `/etc/systemd/system/gitea-act-runner-host.service`
- 有 passwordless sudo 時安裝 `/etc/systemd/system/gitea-act-runner-host.service`
- 沒有 sudo 時 fallback 到 `~/.config/systemd/user/gitea-act-runner-host.service`
- 停用 Docker-wrapped `gitea-runner` container 的 restart policy
- 拒絕在 `GITEA-ACTIONS-TASK-*` container 正在跑時重啟 runner

View File

@@ -0,0 +1,17 @@
[Unit]
Description=Gitea Actions Host Runner (AWOOOI user service)
After=default.target
[Service]
Type=simple
WorkingDirectory=/home/wooo/act-runner/data
Environment=HOME=/home/wooo
ExecStart=/home/wooo/act-runner/act_runner daemon --config /home/wooo/act-runner/config.yaml
Restart=always
RestartSec=15
KillSignal=SIGINT
TimeoutStopSec=3700
SuccessExitStatus=0 130 143
[Install]
WantedBy=default.target

View File

@@ -5,7 +5,9 @@ RUNNER_DIR="${RUNNER_DIR:-/home/wooo/act-runner}"
RUNNER_USER="${RUNNER_USER:-wooo}"
SERVICE_NAME="${SERVICE_NAME:-gitea-act-runner-host.service}"
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-1h}"
SERVICE_SRC="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/gitea-act-runner-host.service"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SYSTEM_SERVICE_SRC="${SCRIPT_DIR}/gitea-act-runner-host.service"
USER_SERVICE_SRC="${SCRIPT_DIR}/gitea-act-runner-host.user.service"
CONFIG_FILE="${RUNNER_DIR}/config.yaml"
if [ ! -x "${RUNNER_DIR}/act_runner" ]; then
@@ -67,27 +69,70 @@ if not found:
path.write_text("\n".join(updated) + "\n")
PY
sudo install -o root -g root -m 0644 "${SERVICE_SRC}" "/etc/systemd/system/${SERVICE_NAME}"
sudo systemctl daemon-reload
sudo systemctl enable "${SERVICE_NAME}" >/dev/null
if docker ps --format '{{.Names}}' | grep -qx 'gitea-runner'; then
echo "Disabling legacy docker-wrapped gitea-runner container"
docker update --restart=no gitea-runner >/dev/null 2>&1 || true
docker stop gitea-runner >/dev/null 2>&1 || true
fi
install_system_service() {
sudo install -o root -g root -m 0644 "${SYSTEM_SERVICE_SRC}" "/etc/systemd/system/${SERVICE_NAME}"
sudo systemctl daemon-reload
sudo systemctl enable "${SERVICE_NAME}" >/dev/null
}
install_user_service() {
local user_service_dir="${HOME}/.config/systemd/user"
mkdir -p "${user_service_dir}"
install -m 0644 "${USER_SERVICE_SRC}" "${user_service_dir}/${SERVICE_NAME}"
systemctl --user daemon-reload
systemctl --user enable "${SERVICE_NAME}" >/dev/null
}
stop_existing_runner() {
if pgrep -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" >/dev/null; then
pkill -INT -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" || true
for _ in $(seq 1 60); do
pgrep -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" >/dev/null || return 0
sleep 1
done
echo "Existing act_runner daemon did not stop within 60s" >&2
return 1
fi
}
if sudo -n true >/dev/null 2>&1; then
SERVICE_SCOPE="system"
install_system_service
else
SERVICE_SCOPE="user"
install_user_service
fi
if [ "${RESTART_NOW:-0}" = "1" ]; then
if docker ps --format '{{.Names}}' | grep -q '^GITEA-ACTIONS-TASK-'; then
echo "Refusing to restart: Gitea Actions task containers are running" >&2
exit 1
fi
sudo systemctl restart "${SERVICE_NAME}"
stop_existing_runner
if [ "${SERVICE_SCOPE}" = "system" ]; then
sudo systemctl restart "${SERVICE_NAME}"
else
systemctl --user restart "${SERVICE_NAME}"
fi
elif pgrep -u "${RUNNER_USER}" -f "${RUNNER_DIR}/act_runner daemon" >/dev/null; then
echo "Existing act_runner daemon is still running; service will take over after the next safe restart."
else
sudo systemctl start "${SERVICE_NAME}"
if [ "${SERVICE_SCOPE}" = "system" ]; then
sudo systemctl start "${SERVICE_NAME}"
else
systemctl --user start "${SERVICE_NAME}"
fi
fi
sudo systemctl --no-pager --full status "${SERVICE_NAME}" | sed -n '1,18p'
if [ "${SERVICE_SCOPE}" = "system" ]; then
sudo systemctl --no-pager --full status "${SERVICE_NAME}" | sed -n '1,18p'
else
systemctl --user --no-pager --full status "${SERVICE_NAME}" | sed -n '1,18p'
fi
grep -n 'shutdown_timeout' "${CONFIG_FILE}"