Compare commits

...

3 Commits

Author SHA1 Message Date
Your Name
981e616994 fix(ci): add stale cd cancellation marker 2026-06-28 11:40:12 +08:00
Your Name
ca0b6cb72f chore(ci): cancel stale deploy queue [skip ci] 2026-06-28 11:38:28 +08:00
Your Name
f109b11478 fix(recovery): seal 110 cd lane restore sources [skip ci] 2026-06-28 11:37:01 +08:00
12 changed files with 138 additions and 283 deletions

View File

@@ -11,26 +11,7 @@ name: CD Pipeline
on:
# 2026-06-28 Codex: 110 host runner/CD lane pressure incident.
# Production CD is reopened for controlled apply through the dedicated
# capacity=1 cd-lane drain verifier; the host pressure gate below remains
# fail-closed before build starts.
push:
branches: [main]
paths:
# 只有實際影響部署的程式碼才觸發 CD
- 'apps/**'
- 'k8s/**'
- '.dockerignore'
# Dockerfile COPY scripts/ into the API image; keep production ops
# seed scripts deploy-coupled instead of repo-only.
- 'scripts/backup/backup-momo-188-pg.sh'
- 'scripts/ci/wait-host-web-build-pressure.sh'
- 'scripts/ops/notify-awoooi-ops.sh'
- 'scripts/ops/awooop-seed-auto-repair-canary-playbook.py'
# Workflow-only changes do not rebuild runtime images. Use workflow_dispatch
# when an operator explicitly wants to test the CD pipeline itself.
# docs/、memory/、ADR 等不觸發
# ops/monitoring/alerts-unified.yml 由 deploy-alerts.yaml 獨立處理 (I3)
# Production CD is manual-only until the runner is moved or hard-rate-limited.
workflow_dispatch:
# 手動觸發永遠可用(用於補跑、緊急部署)
@@ -67,8 +48,9 @@ jobs:
tests:
# 2026-06-28 Codex: Gitea does not consistently short-circuit `[skip ci]`
# on CD-generated deploy commits. Skip jobs explicitly so marker commits
# do not trigger a self-feeding CD loop.
if: ${{ github.event_name != 'push' || !contains(github.event.head_commit.message, '[skip ci]') }}
# do not trigger a self-feeding CD loop; `[cancel-stale-cd]` is a
# controlled no-op trigger used only to cancel stale pre-guard runs.
if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[cancel-stale-cd]')) }}
# 2026-04-30 Codex: run the tests job on the host runner and launch the
# CI image explicitly. The act-managed job container can disappear mid-test
# with Docker RWLayer=nil on the shared 110 daemon.
@@ -326,7 +308,7 @@ jobs:
build-and-deploy:
# 2026-06-28 Codex: keep CD-generated `[skip ci]` deploy commits from
# re-entering build/deploy and writing another deploy marker commit.
if: ${{ github.event_name != 'push' || !contains(github.event.head_commit.message, '[skip ci]') }}
if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[cancel-stale-cd]')) }}
# 2026-04-30 Codex: Docker builds run on the host runner. Long docker build
# steps were killing the transient act job container with RWLayer=nil.
needs: [tests]
@@ -1254,7 +1236,7 @@ jobs:
post-deploy-checks:
# 2026-06-28 Codex: post-deploy checks belong to real deploy runs; skip
# CD-generated marker commits already read back by the prior deploy run.
if: ${{ github.event_name != 'push' || !contains(github.event.head_commit.message, '[skip ci]') }}
if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[cancel-stale-cd]')) }}
needs: [build-and-deploy]
timeout-minutes: 30
# 2026-04-30 Codex: keep post-deploy on the host runner too. Playwright

View File

@@ -1,15 +1,8 @@
name: Code Review
on:
push:
branches: [main]
paths:
- 'apps/**'
- 'k8s/**'
- '!k8s/awoooi-prod/kustomization.yaml'
- 'ops/**'
- 'scripts/**'
- '.gitea/workflows/**'
# 2026-06-28 Codex: 110 host runner/CD lane pressure incident.
# Keep code review manual until the runner is moved or hard-rate-limited.
workflow_dispatch:
concurrency:
@@ -24,8 +17,9 @@ env:
jobs:
ai-code-review:
# 2026-06-28 Codex: deploy marker commits are generated by CD and carry
# `[skip ci]`; skip review at job level to avoid queued runner churn.
if: ${{ github.event_name != 'push' || !contains(github.event.head_commit.message, '[skip ci]') }}
# `[skip ci]`; `[cancel-stale-cd]` is a controlled no-op trigger. Skip both
# at job level to avoid queued runner churn.
if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[cancel-stale-cd]')) }}
runs-on: awoooi-ubuntu
timeout-minutes: 8
steps:

View File

@@ -46,7 +46,7 @@
正確動作是 AI 自動補齊 target selector、source-of-truth diff、check-mode / dry-run、rollback、post-apply verifier、KM / PlayBook trust writeback然後推進可驗證、可回滾、低爆炸半徑的實作。
**110 runner / controlled CD lane 壓力事故例外**Gitea / act-runner / direct transient runner、泛用 `ubuntu-latest`、StockPlatform / headless / Playwright 類重型工作對 110 造成 CPU / Docker build 壓力時,屬事故級容量保護,不得用「全面授權」直接重開 legacy runner、移除 legacy mask、還原 legacy runner binary、用 `systemd-run` 直啟 `.real` binary或把 host pressure gate 改成 warn-only。專用 AWOOOI controlled CD lane 可在 `capacity=1`、窄 label、無泛用重型 label、rollback unit、post-apply verifier 與 legacy runner fail-closed 同時成立時受控開啟;Gitea push workflow 不得因非事故級 guard 長期停在 manual-only。
**110 runner / controlled CD lane 壓力事故例外**Gitea / act-runner / direct transient runner、泛用 `ubuntu-latest`、StockPlatform / headless / Playwright 類重型工作對 110 造成 CPU / Docker build 壓力時,屬事故級容量保護,不得用「全面授權」直接重開 legacy runner、移除 legacy mask、還原 legacy runner binary、用 `systemd-run` 直啟 `.real` binary、保留可被 opener 復活的 root quarantine,或把 host pressure gate 改成 warn-only。允許的 controlled apply 是降壓、防再發、root restore-source 清零、workflow 手動化、runner 搬遷 / 硬限流與 post-apply verifier在搬遷或硬限流驗證成立前`awoooi-cd-lane.service``awoooi-cd-lane-drain.service`Gitea push workflow 必須維持 fail-closed / manual-only。
---

View File

@@ -291,7 +291,7 @@ force push / 刪 repo / 刪 refs / 改 repo visibility / raw runtime secret volu
2026-06-28 事故後110 上的 Gitea / act-runner / direct transient runner、StockPlatform headless smoke、host-side Next build 與 Docker / BuildKit 壓力屬容量事故保護面。即使收到「批准 / 繼續 / 全面授權」,也不得直接重開 legacy runner、解除 legacy service mask、還原 legacy runner binary、用 `systemd-run` 直啟 `.real` binary、恢復泛用 `ubuntu-latest` label或把 host pressure gate 改成 warn-only 作為預設。
允許的 controlled apply 是降壓與防再發:停止 / disable / mask legacy runner、mask direct transient unit、quarantine legacy runner binary、收斂 labels、補 source fail-closed guard、限制 concurrency、把 smoke 改成排程 / 非 110 runner以及執行只讀 pressure / cold-start verifier。專用 `awoooi-cd-lane.service` `awoooi-cd-lane-drain.service` 可在 `capacity=1`、無 `ubuntu-latest` / StockPlatform / headless / Playwright label、可回滾 unit、post-apply verifier 與 legacy runner fail-closed 都成立時受控開啟verifier 必須把它與 legacy runner 分開判讀
允許的 controlled apply 是降壓與防再發:停止 / disable / mask legacy runner、mask direct transient unit、quarantine legacy runner binary、收斂 labels、清零可被 opener 復活的 root restore-source、補 source fail-closed guard、限制 concurrency、把 workflow 改為手動、把 smoke 改成排程 / 非 110 runner以及執行只讀 pressure / cold-start verifier。在 runner 搬遷或硬限流驗證完成前,`awoooi-cd-lane.service` `awoooi-cd-lane-drain.service` 必須一併 fail-closed單純 `capacity=1`、窄 label 或 binary verifier 不足以重新開 lane
恢復 runner 必須同時具備:
@@ -301,7 +301,7 @@ force push / 刪 repo / 刪 refs / 改 repo visibility / raw runtime secret volu
4. rollback能回到 inactive / masked / fail-closed stub。
5. post-apply verifierrunner tasks、host load、Actions queue、Stock smoke、AWOOI public route 與 cold-start scorecard 讀回。
在上述條件完成前startup / recovery script 必須保留 legacy fail-closed;若保留 `START_CONTROLLED_CD_LANE` 或 drain lane,必須同時具備 capacity / label / binary / process verifier、rollback unit 與 post-apply readback,不得讓泛用 runner 或未限流 runner 借 lane 復活。
在上述條件完成前startup / recovery script 必須保留 legacy fail-closed,且不得保留 `START_CONTROLLED_CD_LANE`、sentinel 或 drain lane opener。Verifier 必須同時讀回 regular lane fail-closed、drain lane fail-closed、runner process `0`、active job containers `0`、root restore-source left `0`,不得讓泛用 runner 或未限流 runner 借 lane 復活。
### Source freshness / provider proxy gate

View File

@@ -48468,3 +48468,30 @@ production browser smoke:
**下一個 P0**
- 將 Wazuh runtime gate owner review packet 從 no-persist validation 推進為 committed review readback保留 redacted evidence refs、target selector、source diff、check-mode / dry-run、rollback、post-apply verifier 與 KM writeback仍不得查 live Wazuh 或做 host write。
- 若要進一步打開 runtime gate必須逐 target 以 check-mode / dry-run、rollback owner、maintenance window 與 post-apply verifier 收斂,並在 production readback 中證明沒有 secret/raw payload 外洩。
## 2026-06-28 — 11:17 110 runner / cd-lane root restore-source fail-closed
**時間與來源**
- 2026-06-28 11:02-11:17 Asia/Taipei。
- 來源110 live systemd / sudo metadata / docker job container readback未讀 raw sessions、SQLite、auth、`.env`、runner token 或 cd-lane config 內容。
**事故根因補充**
- `241cbe067` 後 main 曾以 `e7db56d4c``95c825f24``e97b25247``022bf0b80` 重新打開 controlled CD lane / drain lane。
- 110 sudo metadata 顯示 11:02:47 有 opener 從 `/root/awoooi-cd-lane-drain-disabled-*` 最新 quarantine 復原 binary / config / unit`systemctl enable --now awoooi-cd-lane-drain.service`
- 11:13 readback`awoooi-cd-lane-drain.service loaded|active|enabled``CD_LANE_PROC_COUNT=1``ACTIVE_JOB_CONTAINERS=1`
**完成內容**
- live 已停止 drain lane、mask unit、停止 active job container、搬走 live unit / binary / config / `.runner`,且把所有 `/root/awoooi-cd-lane-drain-disabled-*` 搬到不可被 opener glob 命中的 final quarantine。
- `scripts/reboot-recovery/awoooi-startup-110.sh` 移除 `START_CONTROLLED_CD_LANE` / sentinel openerregular 與 drain lane 均納入 fail-closed新增 root restore-source quarantine不讀內容只搬移目錄。
- `p3-controlled-release-gate.sh``full-stack-cold-start-check.sh` 要求 regular lane fail-closed、drain lane fail-closed、process `0`、root restore-source left `0`;不再接受單一 controlled-open lane。
- `.gitea/workflows/cd.yaml``code-review.yaml` 回到 `workflow_dispatch` only避免 push 製造 job queue 再觸發 opener。
- `AGENTS.md``docs/HARD_RULES.md`、MASTER 與 `ops/runner/README.md` 更新 110 容量事故例外:全面授權不等於直接重開 110 runner / CD lane。
**live 驗證結果**
- 11:17 readback`DRAIN_SYSTEMD=masked|inactive|masked``DRAIN_UNIT_LINK=/dev/null``DRAIN_PROC_COUNT=0``ACTIVE_JOB_CONTAINERS=0`
- `DRAIN_ROOT_RESTORE_LEFT=0``DRAIN_ROOT_RESTORE_MOVED=4`、final quarantine `/root/awoooi-runner-restore-sources-disabled-final-20260628T111656+0800`
**仍維持**
- 沒有重啟 Docker / Nginx / firewall / K3s / DB。
- 沒有 force push、沒有讀 secret 明文、沒有讀 raw sessions / SQLite / auth / `.env`
- 110 runner / cd-lane 自動恢復仍 blocked下一步是 runner 搬遷或硬限流後再另開 controlled apply。

View File

@@ -153,7 +153,7 @@ AWOOOI / AwoooP / IwoooS 不是單純監控頁、告警轉發器或資安清冊
3. 缺 PlayBook、缺 rollback、缺 verifier、缺 source-of-truth、缺 evidence ref、缺 owner 欄位時AI Agent 必須自動產生 controlled apply package包含 target selector、source diff、check-mode、rollback、post-check 與 KM / PlayBook trust writeback。
4. Guard 的職責不是擋住所有工作,而是把動作導向 allowlist / check-mode / controlled apply / staged rollout / verifier / rollbackguard 若只能回「人工處理」,本身就是 P0/P1 修復候選。
5. 真正仍不可直接打開的事故級硬阻擋只包含secret 明文讀取或外送、不可逆資料破壞、DB DROP / TRUNCATE / destructive restore、reboot / node drain / 不可逆 firewall cutover、credentialed exploit / 外部攻擊型 active scan、付費 provider / 成本上限 / production provider route 切換、OpenClaw 核心替換未完成 replay / shadow / canary、force push / repo refs / visibility 破壞、raw runtime secret volume 讀寫。
6. 110 runner 容量事故屬硬保護例外:不得重開 legacy runner、解除 legacy fail-closed、恢復泛用 label 或把 host pressure gate warn-only專用 AWOOOI controlled CD lane / drain lane `capacity=1`、窄 label、無泛用重型 label、rollback unit 與 post-apply verifier 成立時可 controlled openworkflow 不得因非事故級 guard 長期停在 manual-only
6. 110 runner 容量事故屬硬保護例外:不得重開 legacy runner、解除 legacy fail-closed、恢復泛用 label、保留可被 opener 復活的 root quarantine或把 host pressure gate warn-only在 runner 搬遷或硬限流驗證成立前,專用 AWOOOI controlled CD lane / drain lane 也必須 fail-closedGitea push workflow 維持 manual-only。允許的 controlled apply 是降壓、防再發、root restore-source 清零、smoke 搬遷 / 排程、runner 搬遷 / 硬限流與 post-apply verifier
7. 資料 freshness gate 必須 source-aware若 Drive / provider source preflight 證明沒有比最後乾淨 import 更新的來源,且 DB sync / import job 乾淨stale business data 是 source freshness warningauth/source/failed-folder/DB sync 有異常才是 hard blocker。
8. Provider proxy gate 必須避免成本 / route 誤開:未 provisioned 且 repo 已標 optional retired 的 LiteLLM 等 proxy只能列 warning不得為了過 health gate 自動啟動或切 production provider route。
@@ -5347,4 +5347,4 @@ Trigger commit `f5cd37b7` 與 deploy marker `0ba92357` 已把 governance UI 的
**觸發**110 壓力事故 fail-closed guard 將專用 `awoooi-cd-lane.service` 與 legacy / direct runner 混為同一 blocker造成正式 CD lane 在統帥全面授權後仍被反覆關閉。
**裁決:** legacy `act-runner`、direct transient runner、泛用 `ubuntu-latest` 與 StockPlatform / headless / Playwright 類重型任務屬容量事故保護面;專用 `awoooi-cd-lane.service` 則可在獨立 sentinel、`capacity=1`、窄 label、可回滾 unit、post-apply verifier 與 legacy runner fail-closed 同時成立時進入 `controlled_open`。所有 startup、cold-start、post-start 與 P3 release verifier 必須分開判讀 `legacy runner fail-closed``CD_LANE_CONTROLLED ok=1`不得再用「cd-lane binary 是 ELF」作為單一硬阻擋
**裁決更新** 10:05 曾嘗試將專用 `awoooi-cd-lane.service` 與 legacy runner 分流11:02 live evidence 證明 opener 會從 `/root/awoooi-cd-lane-drain-disabled-*` 復活 drain lane 並拉起 job container。因此在 runner 搬遷或硬限流驗證完成前legacy `act-runner`、direct transient runner、`awoooi-cd-lane.service``awoooi-cd-lane-drain.service`、泛用 `ubuntu-latest` 與 StockPlatform / headless / Playwright 類重型任務屬容量事故保護面startup、cold-start、post-start 與 P3 release verifier 必須讀回 regular lane fail-closed、drain lane fail-closed、process `0`、active job containers `0`、root restore-source left `0`

View File

@@ -406,24 +406,30 @@ Gitea service 名稱。四條 live runner 入口已改為 immutable fail-closed
- `gitea-awoooi-controlled-runner.service`
- `gitea-act-runner-awoooi-open.service`
`awoooi-cd-lane.service` 是專用 controlled lane不屬於 legacy runner mask 清單;
只有在 `/run/awoooi-cd-lane-enabled` `AWOOOI_START_CONTROLLED_CD_LANE=1`
存在、`capacity=1`、label 僅限 `awoooi-ubuntu` / `awoooi-host`沒有
`ubuntu-latest` / StockPlatform / headless / Playwright 類泛用重型 label
post-apply verifier 可讀回 `CD_LANE_CONTROLLED ok=1` 時,才可受控恢復
未滿足條件時 cd-lane 應回到 static `/bin/false` unit 與 shell stub。
`awoooi-cd-lane.service` `awoooi-cd-lane-drain.service` 已納入 110 容量事故
fail-closed 面。僅有 `capacity=1`、label 僅限 `awoooi-ubuntu` / `awoooi-host`
或 binary verifier 不足以恢復 lane不得再靠 `/run/awoooi-cd-lane-enabled`
`AWOOOI_START_CONTROLLED_CD_LANE=1`、root quarantine restore 或 startup opener
重新啟動
目前 verifier 必須讀回:
- regular lane fail-closed。
- drain lane fail-closed。
- runner / cd-lane process count `0`
- active job containers `0`
- `/root/awoooi-cd-lane-disabled-*``/root/awoooi-cd-lane-drain-disabled-*`
restore-source left `0`
未完成 runner 搬遷、限流、smoke 排程前,不得解除 legacy mask、恢復泛用 runner label
或把 host pressure gate 預設改成 warn-only。
2026-06-28 controlled update舊的 manual-only / freeze guard 已改為分流判讀。
legacy runner 仍維持 masked / fail-closed專用 `awoooi-cd-lane.service`
`awoooi-cd-lane-drain.service` 只要通過 capacity、label、binary、process 與
post-apply verifier可作為 AWOOOI 專用受控部署 lane。
若 verifier 失敗rollback 回 inactive / masked / fail-closed stub若 verifier
通過,不得再用 generic runner fail-closed 規則殺掉 controlled lane也不得把
`cd.yaml` / `code-review.yaml` 長期停在 `workflow_dispatch` only。
2026-06-28 controlled updatemain 曾短暫重開 controlled CD lane / drain lane
live evidence 顯示 opener 會從 `/root/awoooi-cd-lane-drain-disabled-*` 復活
drain lane 並拉起 job container。因此目前 source-of-truth 回到 manual-only /
fail-closed。恢復自動 CD 必須另開 runner 搬遷或硬限流變更,包含 target selector、
source diff、check-mode / dry-run、rollback、post-apply verifier 與 root
restore-source left `0` readback。
---
版本: v2.0 | 更新: 2026-03-29 | 作者: Claude Code

View File

@@ -13,6 +13,7 @@ set -euo pipefail
# 2026-06-28 Codex: non-behavior trigger after increasing API test container memory.
# 2026-06-28 Codex: host 110 runner pressure remains an incident-grade guard.
# Controlled apply is open, but this pressure gate stays fail-closed by default.
# 2026-06-28 Codex: skip-ci trigger to cancel the stale pre-guard CD run queue.
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}"
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}"

View File

@@ -193,19 +193,12 @@ RUNNER_DIR="/home/wooo/act-runner"
RUNNER_SERVICE="gitea-act-runner-host.service"
RUNNER_ENABLE_SENTINEL="/run/awoooi-runner-host-enabled"
CD_LANE_DIR="/home/wooo/awoooi-cd-lane"
CD_LANE_SERVICE="awoooi-cd-lane.service"
CD_LANE_BINARY="$CD_LANE_DIR/awoooi_cd_lane"
CD_LANE_CONFIG="$CD_LANE_DIR/config.yaml"
CD_LANE_DRAIN_DIR="/home/wooo/awoooi-cd-lane-drain"
CD_LANE_DRAIN_SERVICE="awoooi-cd-lane-drain.service"
CD_LANE_DRAIN_BINARY="$CD_LANE_DRAIN_DIR/awoooi_cd_lane_controlled"
CD_LANE_DRAIN_CONFIG="$CD_LANE_DRAIN_DIR/config.yaml"
CD_LANE_ENABLE_SENTINEL="/run/awoooi-cd-lane-enabled"
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-0}"
START_CONTROLLED_CD_LANE="${AWOOOI_START_CONTROLLED_CD_LANE:-0}"
START_GITEA_RUNNER_ALLOWED=0
START_CD_LANE_ALLOWED=0
RUNNER_FAIL_CLOSED_SERVICES=(
"awoooi-cd-lane.service"
"awoooi-cd-lane-drain.service"
"awoooi-direct-runner-open.service"
"awoooi-direct-runner.service"
"gitea-act-runner-host.service"
@@ -214,19 +207,18 @@ RUNNER_FAIL_CLOSED_SERVICES=(
"gitea-act-runner-awoooi-open.service"
)
RUNNER_FAIL_CLOSED_BINARY_PATHS=(
"/home/wooo/awoooi-cd-lane/awoooi_cd_lane"
"/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled"
"/home/wooo/act-runner/act_runner"
"/home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard"
"/home/wooo/act-runner-controlled/act_runner"
"/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner"
)
# Legacy host runner still needs both keys. The dedicated cd-lane has its own
# sentinel and narrow label/capacity verifier below.
# Host runner still needs both keys. The direct cd-lane stays fail-closed until
# it is migrated or hard-limited outside this production host pressure lane.
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ -e "$RUNNER_ENABLE_SENTINEL" ]; then
START_GITEA_RUNNER_ALLOWED=1
fi
if [ -e "$CD_LANE_ENABLE_SENTINEL" ] || [ "$START_CONTROLLED_CD_LANE" = "1" ]; then
START_CD_LANE_ALLOWED=1
fi
mask_runner_unit_file() {
local unit="$1"
@@ -279,143 +271,17 @@ EOF
install_cd_lane_fail_closed_unit() {
local unit_file="/etc/systemd/system/awoooi-cd-lane.service"
local tmp
local quarantine_stamp
quarantine_stamp="$(date +%Y%m%d%H%M%S)"
systemctl mask awoooi-cd-lane.service >/dev/null 2>&1 || true
if [ -e "$unit_file" ] || [ -L "$unit_file" ]; then
chattr -i "$unit_file" >/dev/null 2>&1 || true
if ! grep -q "AWOOOI direct CD lane fail-closed" "$unit_file" 2>/dev/null; then
if ! { [ -L "$unit_file" ] && [ "$(readlink "$unit_file" 2>/dev/null || true)" = "/dev/null" ]; }; then
mv "$unit_file" "${unit_file}.quarantined-runner-incident-${quarantine_stamp}" >/dev/null 2>&1 || true
fi
fi
tmp="$(mktemp)"
cat >"$tmp" <<'EOF'
[Unit]
Description=AWOOOI direct CD lane fail-closed after 2026-06-28 pressure incident
ConditionPathExists=/run/awoooi-cd-lane-enabled
[Service]
Type=oneshot
ExecStart=/bin/false
EOF
install -o root -g root -m 0444 "$tmp" "$unit_file" >/dev/null 2>&1 || true
rm -f "$tmp"
chattr +i "$unit_file" >/dev/null 2>&1 || true
}
install_controlled_cd_lane_unit() {
local unit_file="/etc/systemd/system/$CD_LANE_SERVICE"
local tmp
chattr -i "$unit_file" "$CD_LANE_BINARY" >/dev/null 2>&1 || true
tmp="$(mktemp)"
cat >"$tmp" <<EOF
[Unit]
Description=AWOOOI controlled CD lane
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
[Service]
Type=simple
User=wooo
WorkingDirectory=${CD_LANE_DIR}/data
Environment=HOME=/home/wooo
Environment=AWOOOI_CONTROLLED_RUNNER_OPEN=1
Environment=HOST_WEB_BUILD_PRESSURE_ATTEMPTS=1
Environment=HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS=1
ExecStart=${CD_LANE_BINARY} daemon --config ${CD_LANE_CONFIG}
Restart=always
RestartSec=10
KillSignal=SIGINT
TimeoutStopSec=3700
SuccessExitStatus=0 130 143
CPUQuota=250%
MemoryHigh=8G
MemoryMax=12G
TasksMax=512
[Install]
WantedBy=multi-user.target
EOF
install -o root -g root -m 0644 "$tmp" "$unit_file" >/dev/null 2>&1 || true
rm -f "$tmp"
}
install_controlled_cd_lane_drain_unit() {
local unit_file="/etc/systemd/system/$CD_LANE_DRAIN_SERVICE"
local tmp
chattr -i "$unit_file" "$CD_LANE_DRAIN_BINARY" >/dev/null 2>&1 || true
if [ -L "$unit_file" ] && [ "$(readlink "$unit_file" 2>/dev/null || true)" = "/dev/null" ]; then
rm -f "$unit_file" >/dev/null 2>&1 || true
fi
tmp="$(mktemp)"
cat >"$tmp" <<EOF
[Unit]
Description=AWOOOI controlled CD lane drain bypass for old queued guards
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
[Service]
Type=simple
User=wooo
WorkingDirectory=${CD_LANE_DRAIN_DIR}/data
Environment=HOME=/home/wooo
Environment=AWOOOI_CONTROLLED_RUNNER_OPEN=1
Environment=HOST_WEB_BUILD_PRESSURE_ATTEMPTS=1
Environment=HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS=1
ExecStart=${CD_LANE_DRAIN_BINARY} daemon --config ${CD_LANE_DRAIN_CONFIG}
Restart=always
RestartSec=10
KillSignal=SIGINT
TimeoutStopSec=3700
SuccessExitStatus=0 130 143
CPUQuota=250%
MemoryHigh=8G
MemoryMax=12G
TasksMax=512
[Install]
WantedBy=multi-user.target
EOF
install -o root -g root -m 0644 "$tmp" "$unit_file" >/dev/null 2>&1 || true
rm -f "$tmp"
}
cd_lane_config_path_is_controlled() {
local config_path="$1"
[ -f "$config_path" ] || return 1
grep -Eq '^[[:space:]]+capacity:[[:space:]]*1[[:space:]]*$' "$config_path" || return 1
grep -q 'awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04' "$config_path" || return 1
grep -q 'awoooi-host:host' "$config_path" || return 1
if grep -Eq '^[[:space:]]+- ".*(ubuntu-latest|stockplatform|headless|playwright)' "$config_path"; then
return 1
fi
return 0
}
cd_lane_config_is_controlled() {
cd_lane_config_path_is_controlled "$CD_LANE_CONFIG"
}
cd_lane_drain_config_is_controlled() {
cd_lane_config_path_is_controlled "$CD_LANE_DRAIN_CONFIG"
}
cd_lane_drain_is_controlled_open() {
local active
active="$(systemctl show "$CD_LANE_DRAIN_SERVICE" -p ActiveState --value 2>/dev/null || true)"
[ "$active" = "active" ] || return 1
cd_lane_drain_config_is_controlled || return 1
file "$CD_LANE_DRAIN_BINARY" 2>/dev/null | grep -qi "ELF" || return 1
return 0
}
cd_lane_drain_is_controlled_available() {
cd_lane_drain_config_is_controlled || return 1
file "$CD_LANE_DRAIN_BINARY" 2>/dev/null | grep -qi "ELF" || return 1
return 0
ln -sfn /dev/null "$unit_file" >/dev/null 2>&1 || true
}
quarantine_cd_lane_registration_fail_closed() {
@@ -448,33 +314,28 @@ quarantine_cd_lane_registration_fail_closed() {
done
}
quarantine_cd_lane_root_restore_sources_fail_closed() {
local final_root
local path
local target_dir
final_root="/root/awoooi-runner-restore-sources-disabled-final-$(date +%Y%m%dT%H%M%S%z)"
target_dir="$final_root/cd-lane-restore-sources"
mkdir -p "$target_dir" >/dev/null 2>&1 || true
while IFS= read -r -d '' path; do
[ -d "$path" ] || continue
chattr -R -i "$path" >/dev/null 2>&1 || true
mv "$path" "$target_dir/" >/dev/null 2>&1 || true
done < <(
{
find /root -maxdepth 1 -type d -name 'awoooi-cd-lane-disabled-*' -print0 2>/dev/null
find /root -maxdepth 1 -type d -name 'awoooi-cd-lane-drain-disabled-*' -print0 2>/dev/null
} || true
)
}
apply_cd_lane_fail_closed_guard() {
local unit
if cd_lane_drain_is_controlled_available; then
if cd_lane_drain_is_controlled_open; then
log "✅ controlled cd-lane drain verifier passed; preserving drain lane and fail-closing regular lane only"
else
log "✅ controlled cd-lane drain assets verified; restoring drain unit and fail-closing regular lane only"
fi
systemctl kill --signal=SIGKILL "$CD_LANE_SERVICE" >/dev/null 2>&1 || true
systemctl stop "$CD_LANE_SERVICE" >/dev/null 2>&1 || true
systemctl disable "$CD_LANE_SERVICE" >/dev/null 2>&1 || true
install_cd_lane_fail_closed_unit
pkill -KILL -f "^${CD_LANE_BINARY} daemon" >/dev/null 2>&1 || true
install_controlled_cd_lane_drain_unit
systemctl daemon-reload >/dev/null 2>&1 || true
systemctl enable --now "$CD_LANE_DRAIN_SERVICE" >/dev/null 2>&1 || true
return 0
fi
if { [ -e "$CD_LANE_ENABLE_SENTINEL" ] || [ -e "/run/awoooi-cd-lane-controlled-open" ] || [ "$START_CONTROLLED_CD_LANE" = "1" ]; } \
&& cd_lane_config_is_controlled \
&& file "$CD_LANE_BINARY" 2>/dev/null | grep -qi "ELF"; then
log "✅ controlled cd-lane verifier passed; keeping dedicated lane open"
install_controlled_cd_lane_unit
systemctl daemon-reload >/dev/null 2>&1 || true
systemctl enable --now "$CD_LANE_SERVICE" >/dev/null 2>&1 || true
return 0
fi
for unit in awoooi-cd-lane.service awoooi-cd-lane-drain.service; do
systemctl kill --signal=SIGKILL "$unit" >/dev/null 2>&1 || true
systemctl stop "$unit" >/dev/null 2>&1 || true
@@ -490,6 +351,7 @@ apply_cd_lane_fail_closed_guard() {
pkill -KILL -f "^${CD_LANE_DIR}/awoooi_cd_lane daemon" >/dev/null 2>&1 || true
pkill -KILL -f "^${CD_LANE_DRAIN_DIR}/awoooi_cd_lane_controlled daemon" >/dev/null 2>&1 || true
quarantine_cd_lane_registration_fail_closed
quarantine_cd_lane_root_restore_sources_fail_closed
guard_runner_binary_fail_closed "$CD_LANE_DIR/awoooi_cd_lane"
guard_runner_binary_fail_closed "$CD_LANE_DRAIN_DIR/awoooi_cd_lane_controlled"
systemctl daemon-reload >/dev/null 2>&1 || true
@@ -499,22 +361,6 @@ ensure_cd_lane_fail_closed() {
apply_cd_lane_fail_closed_guard
}
ensure_controlled_cd_lane_open() {
if ! cd_lane_config_is_controlled; then
log "⛔ controlled cd-lane config 未通過 capacity/label 檢查,維持 fail-closed"
ensure_cd_lane_fail_closed
return 0
fi
if ! file "$CD_LANE_BINARY" 2>/dev/null | grep -qi "ELF"; then
log "⛔ controlled cd-lane binary 不是可執行 ELF維持 fail-closed"
ensure_cd_lane_fail_closed
return 0
fi
install_controlled_cd_lane_unit
systemctl daemon-reload >/dev/null 2>&1 || true
systemctl enable --now "$CD_LANE_SERVICE" >/dev/null 2>&1 || true
}
ensure_host_runner_fail_closed() {
local unit
local binary
@@ -544,6 +390,10 @@ ensure_host_runner_fail_closed() {
fi
pkill -KILL -f "^${RUNNER_DIR}/act_runner(\\.real-[^ ]*)? daemon" >/dev/null 2>&1 || true
pkill -KILL -f "^${CD_LANE_DIR}/awoooi_cd_lane daemon" >/dev/null 2>&1 || true
pkill -KILL -f "^${CD_LANE_DRAIN_DIR}/awoooi_cd_lane_controlled daemon" >/dev/null 2>&1 || true
quarantine_cd_lane_registration_fail_closed
quarantine_cd_lane_root_restore_sources_fail_closed
for binary in "${RUNNER_FAIL_CLOSED_BINARY_PATHS[@]}"; do
guard_runner_binary_fail_closed "$binary"
done
@@ -649,13 +499,8 @@ else
log "⚠️ 找不到 act-runner binary/config: $RUNNER_DIR"
fi
if [ "$START_CD_LANE_ALLOWED" = "1" ]; then
log "✅ controlled cd-lane 具備 sentinel/env 授權,執行 capacity/label/binary verifier 後受控開啟"
ensure_controlled_cd_lane_open
else
log "⏸️ controlled cd-lane 未要求啟動;保留合格 drain laneregular lane 維持 fail-closed"
ensure_cd_lane_fail_closed
fi
log "⏸️ direct cd-lane / drain lane 維持 fail-closed需完成搬遷或硬限流後才可用獨立變更恢復"
ensure_cd_lane_fail_closed
# ──────────────────────────────────────────────
# STEP 7: SentryError Tracking

View File

@@ -327,9 +327,6 @@ if [ "$cd_lane_active" = "inactive" ] \
&& { { [ "$cd_lane_load" = "masked" ] && [ "$cd_lane_unitfile" = "masked" ]; } || echo "$cd_lane_execstart" | grep -q "/bin/false"; }; then
cd_lane_ok=1
cd_lane_mode=failclosed
elif [ "$cd_lane_sentinel" = "present" ] && [ "$cd_lane_active" = "active" ] && [ "$cd_lane_capacity_ok" = "1" ] && [ "$cd_lane_labels_ok" = "1" ] && [ "$cd_lane_binary_elf" = "1" ]; then
cd_lane_ok=1
cd_lane_mode=controlled_open
fi
echo "CD_LANE_CONTROLLED mode=$cd_lane_mode load=$cd_lane_load unitfile=$cd_lane_unitfile active=$cd_lane_active mainpid=$cd_lane_mainpid sentinel=$cd_lane_sentinel capacity=$cd_lane_capacity_ok labels=$cd_lane_labels_ok binary_elf=$cd_lane_binary_elf process_count=$cd_lane_process_count ok=$cd_lane_ok"
cd_lane_drain_load=$(systemctl show awoooi-cd-lane-drain.service -p LoadState --value 2>/dev/null || true)
@@ -358,22 +355,21 @@ if [ "$cd_lane_drain_active" != "active" ] \
&& { [ "$cd_lane_drain_load" = "not-found" ] || { [ "$cd_lane_drain_load" = "masked" ] && [ "$cd_lane_drain_unitfile" = "masked" ]; }; }; then
cd_lane_drain_ok=1
cd_lane_drain_mode=failclosed
elif [ "$cd_lane_drain_active" = "active" ] \
&& [ "$cd_lane_drain_capacity_ok" = "1" ] \
&& [ "$cd_lane_drain_labels_ok" = "1" ] \
&& [ "$cd_lane_drain_binary_elf" = "1" ]; then
cd_lane_drain_ok=1
cd_lane_drain_mode=controlled_open
fi
echo "CD_LANE_DRAIN_CONTROLLED mode=$cd_lane_drain_mode load=$cd_lane_drain_load unitfile=$cd_lane_drain_unitfile active=$cd_lane_drain_active mainpid=$cd_lane_drain_mainpid capacity=$cd_lane_drain_capacity_ok labels=$cd_lane_drain_labels_ok binary_elf=$cd_lane_drain_binary_elf process_count=$cd_lane_drain_process_count ok=$cd_lane_drain_ok"
cd_lane_root_restore_left=unknown
if sudo -n true >/dev/null 2>&1; then
cd_lane_root_restore_left=$(sudo -n find /root -maxdepth 1 -type d \( -name "awoooi-cd-lane-disabled-*" -o -name "awoooi-cd-lane-drain-disabled-*" \) -print 2>/dev/null | wc -l | tr -d " ")
fi
echo "CD_LANE_ROOT_RESTORE_SOURCES left=$cd_lane_root_restore_left"
cd_lane_guard_ok=0
if [ "$cd_lane_ok" = "1" ] || [ "$cd_lane_drain_ok" = "1" ]; then
if [ "$cd_lane_ok" = "1" ] && [ "$cd_lane_drain_ok" = "1" ] && [ "$cd_lane_root_restore_left" = "0" ]; then
cd_lane_guard_ok=1
fi
echo "CD_LANE_GUARDRAILS_OK $cd_lane_guard_ok"
direct_runner_count=$(pgrep -f "^/home/wooo/act-runner/act_runner|^/home/wooo/act-runner-controlled/act_runner|^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner" 2>/dev/null | wc -l | tr -d " ")
echo "RUNNER_DIRECT_PROCESS_COUNT $direct_runner_count"
for p in /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner; do
for p in /home/wooo/awoooi-cd-lane/awoooi_cd_lane /home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner; do
kind=$(file -b "$p" 2>/dev/null || echo missing)
echo "RUNNER_FAILCLOSED_BINARY $p kind=$kind"
echo "$kind" | grep -qi "ELF" && echo "RUNNER_FAILCLOSED_BINARY_ELF $p"
@@ -406,7 +402,7 @@ docker ps --format "DOCKER {{.Names}}\t{{.Status}}" | head -120
else
fail "110 legacy direct/Gitea runner units are not fail-closed"
fi
grep -q "CD_LANE_GUARDRAILS_OK 1" <<<"$out" && ok "110 controlled cd-lane is safe, drained, or fail-closed" || fail "110 controlled cd-lane is neither safe-open/drained nor fail-closed"
grep -q "CD_LANE_GUARDRAILS_OK 1" <<<"$out" && ok "110 runner/CD lane fail-closed guardrails complete" || fail "110 runner/CD lane fail-closed guardrails incomplete"
grep -q "RUNNER_DIRECT_PROCESS_COUNT 0" <<<"$out" && ok "110 legacy direct runner process count is zero" || fail "110 legacy direct runner process detected"
grep -q "RUNNER_FAILCLOSED_BINARY_ELF" <<<"$out" && fail "110 runner fail-closed binary path restored to ELF" || ok "110 runner binary paths are fail-closed stubs or missing"
grep -q "sentry-self-hosted-clickhouse-1.*Restarting" <<<"$out" && warn "Sentry ClickHouse restarting" || ok "Sentry ClickHouse not visibly restarting"

View File

@@ -346,9 +346,6 @@ if [ "$cd_lane_active" = "inactive" ] \
&& { { [ "$cd_lane_load" = "masked" ] && [ "$cd_lane_unitfile" = "masked" ]; } || echo "$cd_lane_execstart" | grep -q "/bin/false"; }; then
cd_lane_ok=1
cd_lane_mode=failclosed
elif [ "$cd_lane_sentinel" = "present" ] && [ "$cd_lane_active" = "active" ] && [ "$cd_lane_capacity_ok" = "1" ] && [ "$cd_lane_labels_ok" = "1" ] && [ "$cd_lane_binary_elf" = "1" ]; then
cd_lane_ok=1
cd_lane_mode=controlled_open
fi
echo "CD_LANE_CONTROLLED mode=$cd_lane_mode load=$cd_lane_load unitfile=$cd_lane_unitfile active=$cd_lane_active sentinel=$cd_lane_sentinel capacity=$cd_lane_capacity_ok labels=$cd_lane_labels_ok binary_elf=$cd_lane_binary_elf process_count=$cd_lane_process_count ok=$cd_lane_ok"
cd_lane_drain_load=$(systemctl show awoooi-cd-lane-drain.service -p LoadState --value 2>/dev/null || true)
@@ -376,16 +373,15 @@ if [ "$cd_lane_drain_active" != "active" ] \
&& { [ "$cd_lane_drain_load" = "not-found" ] || { [ "$cd_lane_drain_load" = "masked" ] && [ "$cd_lane_drain_unitfile" = "masked" ]; }; }; then
cd_lane_drain_ok=1
cd_lane_drain_mode=failclosed
elif [ "$cd_lane_drain_active" = "active" ] \
&& [ "$cd_lane_drain_capacity_ok" = "1" ] \
&& [ "$cd_lane_drain_labels_ok" = "1" ] \
&& [ "$cd_lane_drain_binary_elf" = "1" ]; then
cd_lane_drain_ok=1
cd_lane_drain_mode=controlled_open
fi
echo "CD_LANE_DRAIN_CONTROLLED mode=$cd_lane_drain_mode load=$cd_lane_drain_load unitfile=$cd_lane_drain_unitfile active=$cd_lane_drain_active capacity=$cd_lane_drain_capacity_ok labels=$cd_lane_drain_labels_ok binary_elf=$cd_lane_drain_binary_elf process_count=$cd_lane_drain_process_count ok=$cd_lane_drain_ok"
cd_lane_root_restore_left=unknown
if sudo -n true >/dev/null 2>&1; then
cd_lane_root_restore_left=$(sudo -n find /root -maxdepth 1 -type d \( -name "awoooi-cd-lane-disabled-*" -o -name "awoooi-cd-lane-drain-disabled-*" \) -print 2>/dev/null | wc -l | tr -d " ")
fi
echo "CD_LANE_ROOT_RESTORE_SOURCES left=$cd_lane_root_restore_left"
cd_lane_guard_ok=0
if [ "$cd_lane_ok" = "1" ] || [ "$cd_lane_drain_ok" = "1" ]; then
if [ "$cd_lane_ok" = "1" ] && [ "$cd_lane_drain_ok" = "1" ] && [ "$cd_lane_root_restore_left" = "0" ]; then
cd_lane_guard_ok=1
fi
echo "CD_LANE_GUARDRAILS_OK $cd_lane_guard_ok"
@@ -393,7 +389,7 @@ echo "CD_LANE_GUARDRAILS_OK $cd_lane_guard_ok"
direct_runner_count=$(pgrep -f "^/home/wooo/act-runner/act_runner|^/home/wooo/act-runner-controlled/act_runner|^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner" 2>/dev/null | wc -l | tr -d " ")
echo "RUNNER_DIRECT_PROCESS_COUNT $direct_runner_count"
[ "$direct_runner_count" = "0" ] || bad=1
for p in /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner; do
for p in /home/wooo/awoooi-cd-lane/awoooi_cd_lane /home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner; do
kind=$(file -b "$p" 2>/dev/null || echo missing)
echo "RUNNER_FAILCLOSED_BINARY $p kind=$kind"
echo "$kind" | grep -qi "ELF" && bad=1

View File

@@ -569,16 +569,18 @@ fi
cd_lane_binary_kind=$(file -b /home/wooo/awoooi-cd-lane/awoooi_cd_lane 2>/dev/null || echo missing)
cd_lane_binary_elf=0
echo "$cd_lane_binary_kind" | grep -qi "ELF" && cd_lane_binary_elf=1
cd_lane_process_count=$(pgrep -f "^/home/wooo/awoooi-cd-lane/awoooi_cd_lane" 2>/dev/null | wc -l | tr -d " ")
cd_lane_ok=0
cd_lane_mode=blocked
if [ "$cd_lane_active" = "inactive" ] && echo "$cd_lane_execstart" | grep -q "/bin/false" && [ "$cd_lane_binary_elf" = "0" ]; then
if [ "$cd_lane_active" = "inactive" ] \
&& [ "$cd_lane_sentinel" = "missing" ] \
&& [ "$cd_lane_binary_elf" = "0" ] \
&& [ "$cd_lane_process_count" = "0" ] \
&& { { [ "$cd_lane_load" = "masked" ] && [ "$cd_lane_unitfile" = "masked" ]; } || echo "$cd_lane_execstart" | grep -q "/bin/false"; }; then
cd_lane_ok=1
cd_lane_mode=failclosed
elif [ "$cd_lane_sentinel" = "present" ] && [ "$cd_lane_active" = "active" ] && [ "$cd_lane_capacity_ok" = "1" ] && [ "$cd_lane_labels_ok" = "1" ] && [ "$cd_lane_binary_elf" = "1" ]; then
cd_lane_ok=1
cd_lane_mode=controlled_open
fi
echo "CD_LANE_CONTROLLED mode=$cd_lane_mode load=$cd_lane_load unitfile=$cd_lane_unitfile active=$cd_lane_active mainpid=$cd_lane_mainpid sentinel=$cd_lane_sentinel capacity=$cd_lane_capacity_ok labels=$cd_lane_labels_ok binary_elf=$cd_lane_binary_elf ok=$cd_lane_ok"
echo "CD_LANE_CONTROLLED mode=$cd_lane_mode load=$cd_lane_load unitfile=$cd_lane_unitfile active=$cd_lane_active mainpid=$cd_lane_mainpid sentinel=$cd_lane_sentinel capacity=$cd_lane_capacity_ok labels=$cd_lane_labels_ok binary_elf=$cd_lane_binary_elf process_count=$cd_lane_process_count ok=$cd_lane_ok"
cd_lane_drain_load=$(systemctl show awoooi-cd-lane-drain.service -p LoadState --value 2>/dev/null || true)
cd_lane_drain_unitfile=$(systemctl show awoooi-cd-lane-drain.service -p UnitFileState --value 2>/dev/null || true)
cd_lane_drain_active=$(systemctl show awoooi-cd-lane-drain.service -p ActiveState --value 2>/dev/null || true)
@@ -596,24 +598,30 @@ fi
cd_lane_drain_binary_kind=$(file -b /home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled 2>/dev/null || echo missing)
cd_lane_drain_binary_elf=0
echo "$cd_lane_drain_binary_kind" | grep -qi "ELF" && cd_lane_drain_binary_elf=1
cd_lane_drain_process_count=$(pgrep -f "^/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled" 2>/dev/null | wc -l | tr -d " ")
cd_lane_drain_ok=0
cd_lane_drain_mode=absent
if [ "$cd_lane_drain_load" = "loaded" ] || [ "$cd_lane_drain_unitfile" = "enabled" ] || [ "$cd_lane_drain_active" = "active" ]; then
cd_lane_drain_mode=blocked
fi
if [ "$cd_lane_drain_active" = "active" ] && [ "$cd_lane_drain_capacity_ok" = "1" ] && [ "$cd_lane_drain_labels_ok" = "1" ] && [ "$cd_lane_drain_binary_elf" = "1" ]; then
cd_lane_drain_mode=blocked
if [ "$cd_lane_drain_active" != "active" ] \
&& [ "$cd_lane_drain_binary_elf" = "0" ] \
&& [ "$cd_lane_drain_process_count" = "0" ] \
&& { [ "$cd_lane_drain_load" = "not-found" ] || { [ "$cd_lane_drain_load" = "masked" ] && [ "$cd_lane_drain_unitfile" = "masked" ]; }; }; then
cd_lane_drain_ok=1
cd_lane_drain_mode=controlled_open
cd_lane_drain_mode=failclosed
fi
echo "CD_LANE_DRAIN_CONTROLLED mode=$cd_lane_drain_mode load=$cd_lane_drain_load unitfile=$cd_lane_drain_unitfile active=$cd_lane_drain_active mainpid=$cd_lane_drain_mainpid capacity=$cd_lane_drain_capacity_ok labels=$cd_lane_drain_labels_ok binary_elf=$cd_lane_drain_binary_elf ok=$cd_lane_drain_ok"
echo "CD_LANE_DRAIN_CONTROLLED mode=$cd_lane_drain_mode load=$cd_lane_drain_load unitfile=$cd_lane_drain_unitfile active=$cd_lane_drain_active mainpid=$cd_lane_drain_mainpid capacity=$cd_lane_drain_capacity_ok labels=$cd_lane_drain_labels_ok binary_elf=$cd_lane_drain_binary_elf process_count=$cd_lane_drain_process_count ok=$cd_lane_drain_ok"
cd_lane_root_restore_left=unknown
if sudo -n true >/dev/null 2>&1; then
cd_lane_root_restore_left=$(sudo -n find /root -maxdepth 1 -type d \( -name "awoooi-cd-lane-disabled-*" -o -name "awoooi-cd-lane-drain-disabled-*" \) -print 2>/dev/null | wc -l | tr -d " ")
fi
echo "CD_LANE_ROOT_RESTORE_SOURCES left=$cd_lane_root_restore_left"
cd_lane_guard_ok=0
if [ "$cd_lane_ok" = "1" ] || [ "$cd_lane_drain_ok" = "1" ]; then
if [ "$cd_lane_ok" = "1" ] && [ "$cd_lane_drain_ok" = "1" ] && [ "$cd_lane_root_restore_left" = "0" ]; then
cd_lane_guard_ok=1
fi
echo "CD_LANE_GUARDRAILS_OK $cd_lane_guard_ok"
direct_runner_count=$(pgrep -f "^/home/wooo/act-runner/act_runner|^/home/wooo/act-runner-controlled/act_runner|^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner" 2>/dev/null | wc -l | tr -d " ")
echo "RUNNER_DIRECT_PROCESS_COUNT $direct_runner_count"
for p in /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner; do
for p in /home/wooo/awoooi-cd-lane/awoooi_cd_lane /home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner; do
kind=$(file -b "$p" 2>/dev/null || echo missing)
echo "RUNNER_FAILCLOSED_BINARY $p kind=$kind"
echo "$kind" | grep -qi "ELF" && echo "RUNNER_FAILCLOSED_BINARY_ELF $p"
@@ -631,7 +639,7 @@ if awk '$1 == "RUNNER_FAILCLOSED_UNIT" && $NF != "ok=1" {bad=1} END {exit bad}'
else
blocked "110 legacy direct/Gitea runner units are not fail-closed"
fi
grep -q "CD_LANE_GUARDRAILS_OK 1" "$runner_tmp" && ok "110 controlled cd-lane is safe, drained, or fail-closed" || blocked "110 controlled cd-lane is neither safe-open/drained nor fail-closed"
grep -q "CD_LANE_GUARDRAILS_OK 1" "$runner_tmp" && ok "110 runner/CD lane fail-closed guardrails complete" || blocked "110 runner/CD lane fail-closed guardrails incomplete"
grep -q "RUNNER_DIRECT_PROCESS_COUNT 0" "$runner_tmp" && ok "110 legacy direct runner process count is zero" || blocked "110 legacy direct runner process detected"
grep -q "RUNNER_FAILCLOSED_BINARY_ELF" "$runner_tmp" && blocked "110 runner fail-closed binary path restored to ELF" || ok "110 runner binary paths are fail-closed stubs or missing"
grep -q "RUNNER_PRESSURE_GATE_RC 0" "$runner_tmp" && ok "110 host pressure gate returned 0" || blocked "110 host pressure gate is blocking"