fix(recovery): open controlled drain lane after guardrails
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 3m13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 1s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-30 21:26:47 +08:00
parent f376ff5de3
commit 3397f5a9aa
2 changed files with 48 additions and 1 deletions

View File

@@ -509,6 +509,13 @@ cd_lane_drain_is_controlled_available() {
return 0
}
cd_lane_root_restore_sources_left() {
find /root -maxdepth 1 -type d \( \
-name 'awoooi-cd-lane-disabled-*' -o \
-name 'awoooi-cd-lane-drain-disabled-*' \
\) -print 2>/dev/null | wc -l | tr -d " "
}
quarantine_cd_lane_registration_fail_closed() {
local quarantine_dir
local lane_dir
@@ -625,10 +632,25 @@ ensure_host_runner_fail_closed() {
pkill -KILL -f "^${RUNNER_DIR}/act_runner(\\.real-[^ ]*)? daemon" >/dev/null 2>&1 || true
quarantine_cd_lane_root_restore_sources_fail_closed
for binary in "${RUNNER_FAIL_CLOSED_BINARY_PATHS[@]}"; do
if [ "$START_CD_LANE_ALLOWED" = "1" ] && [ "$binary" = "$CD_LANE_DRAIN_BINARY" ]; then
continue
fi
guard_runner_binary_fail_closed "$binary"
done
}
CD_LANE_ROOT_RESTORE_LEFT="$(cd_lane_root_restore_sources_left)"
if [ "$START_CONTROLLED_CD_LANE" = "1" ]; then
if ! cd_lane_drain_is_controlled_available; then
log "⛔ AWOOOI_START_CONTROLLED_CD_LANE=1 但 controlled drain lane config/binary 未通過;維持 fail-closed"
elif [ "$CD_LANE_ROOT_RESTORE_LEFT" != "0" ]; then
log "⛔ AWOOOI_START_CONTROLLED_CD_LANE=1 但 root restore-source left=${CD_LANE_ROOT_RESTORE_LEFT};維持 fail-closed"
else
START_CD_LANE_ALLOWED=1
log "✅ controlled cd-lane drain preflight passed; legacy runner 仍維持 fail-closed"
fi
fi
if [ -x "$RUNNER_DIR/act_runner" ] && [ -f "$RUNNER_DIR/config.yaml" ]; then
# 若舊的 .runner 配置指向過期 hostname只有在明確允許啟動 runner
# 時才清除重新註冊;預設降壓模式不得碰 registration 狀態。
@@ -729,7 +751,15 @@ else
log "⚠️ 找不到 act-runner binary/config: $RUNNER_DIR"
fi
log "✅ controlled cd-lane startup override active; startup will not enforce drain fail-closed"
if [ "$START_CD_LANE_ALLOWED" = "1" ]; then
install_controlled_cd_lane_drain_unit
systemctl daemon-reload >/dev/null 2>&1 || true
systemctl unmask "$CD_LANE_DRAIN_SERVICE" >/dev/null 2>&1 || true
systemctl enable --now "$CD_LANE_DRAIN_SERVICE" >/dev/null 2>&1 || true
ensure_controlled_cd_lane_open
else
log "✅ controlled cd-lane remains closed unless AWOOOI_START_CONTROLLED_CD_LANE=1 passes guardrails"
fi
# ──────────────────────────────────────────────
# STEP 7: SentryError Tracking

View File

@@ -76,6 +76,23 @@ def test_startup_110_quarantines_corrupt_docker_container_metadata() -> None:
assert 'run_bounded "$DOCKER_START_TIMEOUT_SECONDS" systemctl start docker.socket docker.service' in text
def test_startup_110_opens_only_controlled_cd_lane_after_guardrails() -> None:
text = STARTUP_110.read_text(encoding="utf-8")
assert 'START_CONTROLLED_CD_LANE="${AWOOOI_START_CONTROLLED_CD_LANE:-0}"' in text
assert "cd_lane_root_restore_sources_left()" in text
assert 'CD_LANE_ROOT_RESTORE_LEFT="$(cd_lane_root_restore_sources_left)"' in text
assert 'START_CD_LANE_ALLOWED=1' in text
assert 'install_controlled_cd_lane_drain_unit' in text
assert 'systemctl unmask "$CD_LANE_DRAIN_SERVICE"' in text
assert 'systemctl enable --now "$CD_LANE_DRAIN_SERVICE"' in text
assert 'ensure_controlled_cd_lane_open' in text
assert 'if [ "$START_CD_LANE_ALLOWED" = "1" ] && [ "$binary" = "$CD_LANE_DRAIN_BINARY" ]; then' in text
assert 'systemctl enable --now "$RUNNER_SERVICE"' in text
assert "legacy runner 仍維持 fail-closed" in text
assert "controlled cd-lane remains closed unless AWOOOI_START_CONTROLLED_CD_LANE=1 passes guardrails" in text
def test_cold_start_deploy_parity_verifier_bounds_ssh_readback() -> None:
text = VERIFY_DEPLOY.read_text(encoding="utf-8")