ci(runner): drain legacy docker act runner safely
All checks were successful
Code Review / ai-code-review (push) Successful in 11s

This commit is contained in:
Your Name
2026-05-21 18:53:45 +08:00
parent 19739339e7
commit 9b465ee140
4 changed files with 77 additions and 7 deletions

View File

@@ -217,6 +217,39 @@ AWOOI 的 Docker lock會和 AWOOI Web image 內的 Next production build 疊
長期方向仍是 runner 隔離或 build offload此 gate 是在 shared runner 尚未
拆分前,降低重型前端 build 互相踩踏的保守保護層。
### 第五層修復: legacy Docker runner drain
2026-05-21 再次確認 110 同時存在兩個 runner
- host-level `gitea-act-runner-host.service`
- Docker-wrapped `gitea-runner`
兩者使用同一份 labels/config會同時接 `awoooi``stockplatform-v2`
`ewoooc` 等 repo 的 job。這會讓 AWOOI CD 的 runner ownership 失真,也會
讓 shared Docker daemon 壓力無法預測。
正確處理不是在 task container 正在跑時直接 `docker stop gitea-runner`
`ops/runner/install-gitea-host-runner-service.sh` 現在採用 drain 流程:
1. `docker update --restart=no gitea-runner`
2. 若沒有 `GITEA-ACTIONS-TASK-*`,用長 timeout 停止 container
3. 若仍有 `GITEA-ACTIONS-TASK-*`,送 `SIGINT``gitea-runner`
4. act-runner 依 `shutdown_timeout: 1h` 停止接新 job等待手上的 job 收尾
現場判讀:
```bash
docker inspect gitea-runner --format 'Restart={{.HostConfig.RestartPolicy.Name}} Status={{.State.Status}}'
docker ps --format '{{.Names}}' | grep '^GITEA-ACTIONS-TASK-' || true
docker logs --since 10m gitea-runner
```
目標狀態:
```text
Restart=no Status=exited
```
---
版本: v2.0 | 更新: 2026-03-29 | 作者: Claude Code
變更: v1.0→v2.0 序列建構取代 Job Concurrency Groups

View File

@@ -5,11 +5,33 @@ RUNNER_DIR="${RUNNER_DIR:-/home/wooo/act-runner}"
RUNNER_USER="${RUNNER_USER:-wooo}"
SERVICE_NAME="${SERVICE_NAME:-gitea-act-runner-host.service}"
SHUTDOWN_TIMEOUT="${SHUTDOWN_TIMEOUT:-1h}"
LEGACY_RUNNER_STOP_TIMEOUT_SECONDS="${LEGACY_RUNNER_STOP_TIMEOUT_SECONDS:-3700}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SYSTEM_SERVICE_SRC="${SCRIPT_DIR}/gitea-act-runner-host.service"
USER_SERVICE_SRC="${SCRIPT_DIR}/gitea-act-runner-host.user.service"
CONFIG_FILE="${RUNNER_DIR}/config.yaml"
gitea_task_containers_running() {
docker ps --format '{{.Names}}' | grep -q '^GITEA-ACTIONS-TASK-'
}
disable_legacy_docker_runner() {
if ! docker ps -a --format '{{.Names}}' | grep -qx 'gitea-runner'; then
return 0
fi
echo "Disabling legacy docker-wrapped gitea-runner container"
docker update --restart=no gitea-runner >/dev/null 2>&1 || true
if gitea_task_containers_running; then
echo "Active Gitea Actions task containers are running; sending SIGINT to drain gitea-runner" >&2
docker kill --signal=SIGINT gitea-runner >/dev/null 2>&1 || true
return 0
fi
docker stop -t "${LEGACY_RUNNER_STOP_TIMEOUT_SECONDS}" gitea-runner >/dev/null 2>&1 || true
}
if [ ! -x "${RUNNER_DIR}/act_runner" ]; then
echo "act_runner binary not found: ${RUNNER_DIR}/act_runner" >&2
exit 1
@@ -69,11 +91,7 @@ if not found:
path.write_text("\n".join(updated) + "\n")
PY
if docker ps --format '{{.Names}}' | grep -qx 'gitea-runner'; then
echo "Disabling legacy docker-wrapped gitea-runner container"
docker update --restart=no gitea-runner >/dev/null 2>&1 || true
docker stop gitea-runner >/dev/null 2>&1 || true
fi
disable_legacy_docker_runner
install_system_service() {
sudo install -o root -g root -m 0644 "${SYSTEM_SERVICE_SRC}" "/etc/systemd/system/${SERVICE_NAME}"
@@ -110,7 +128,7 @@ else
fi
if [ "${RESTART_NOW:-0}" = "1" ]; then
if docker ps --format '{{.Names}}' | grep -q '^GITEA-ACTIONS-TASK-'; then
if gitea_task_containers_running; then
echo "Refusing to restart: Gitea Actions task containers are running" >&2
exit 1
fi

View File

@@ -37,6 +37,17 @@ cleanup_artifacts() {
rm -rf node_modules apps/web/node_modules 2>/dev/null || true
}
cleanup_empty_docker_network() {
local network="$1"
local containers_json
command -v docker >/dev/null 2>&1 || return 0
containers_json="$(docker network inspect "$network" --format '{{json .Containers}}' 2>/dev/null || true)"
[ "$containers_json" = "{}" ] || return 0
docker network rm "$network" >/dev/null 2>&1 || true
}
has_leftovers() {
local root="$1"
[ -d "$root/.pytest_cache" ] && return 0
@@ -53,6 +64,7 @@ has_leftovers() {
}
cleanup_artifacts "$ROOT"
cleanup_empty_docker_network "b5-test-net"
if has_leftovers "$ROOT" && [ -n "$CLEANUP_IMAGE" ] && command -v docker >/dev/null 2>&1; then
echo "host cleanup left root-owned artifacts; retrying through ${CLEANUP_IMAGE}"

View File

@@ -228,8 +228,15 @@ PY
fi
# 已停用 Docker-wrapped runner避免它搶走 host label job。
# 若手動執行此 recovery script 時仍有 task container送 SIGINT
# 讓 act_runner drain不再接新 job並等手上的 job 收尾。
docker update --restart=no gitea-runner >/dev/null 2>&1 || true
docker stop gitea-runner >/dev/null 2>&1 || true
if docker ps --format '{{.Names}}' | grep -q '^GITEA-ACTIONS-TASK-'; then
log "⚠️ Gitea Actions task container still running; draining docker-wrapped gitea-runner"
docker kill --signal=SIGINT gitea-runner >/dev/null 2>&1 || true
else
docker stop -t 3700 gitea-runner >/dev/null 2>&1 || true
fi
sleep 15