fix(recovery): open controlled drain lane after guardrails
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 3m13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 1s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Has been cancelled
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 3m13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 1s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Has been cancelled
This commit is contained in:
@@ -509,6 +509,13 @@ cd_lane_drain_is_controlled_available() {
|
||||
return 0
|
||||
}
|
||||
|
||||
cd_lane_root_restore_sources_left() {
|
||||
find /root -maxdepth 1 -type d \( \
|
||||
-name 'awoooi-cd-lane-disabled-*' -o \
|
||||
-name 'awoooi-cd-lane-drain-disabled-*' \
|
||||
\) -print 2>/dev/null | wc -l | tr -d " "
|
||||
}
|
||||
|
||||
quarantine_cd_lane_registration_fail_closed() {
|
||||
local quarantine_dir
|
||||
local lane_dir
|
||||
@@ -625,10 +632,25 @@ ensure_host_runner_fail_closed() {
|
||||
pkill -KILL -f "^${RUNNER_DIR}/act_runner(\\.real-[^ ]*)? daemon" >/dev/null 2>&1 || true
|
||||
quarantine_cd_lane_root_restore_sources_fail_closed
|
||||
for binary in "${RUNNER_FAIL_CLOSED_BINARY_PATHS[@]}"; do
|
||||
if [ "$START_CD_LANE_ALLOWED" = "1" ] && [ "$binary" = "$CD_LANE_DRAIN_BINARY" ]; then
|
||||
continue
|
||||
fi
|
||||
guard_runner_binary_fail_closed "$binary"
|
||||
done
|
||||
}
|
||||
|
||||
CD_LANE_ROOT_RESTORE_LEFT="$(cd_lane_root_restore_sources_left)"
|
||||
if [ "$START_CONTROLLED_CD_LANE" = "1" ]; then
|
||||
if ! cd_lane_drain_is_controlled_available; then
|
||||
log "⛔ AWOOOI_START_CONTROLLED_CD_LANE=1 但 controlled drain lane config/binary 未通過;維持 fail-closed"
|
||||
elif [ "$CD_LANE_ROOT_RESTORE_LEFT" != "0" ]; then
|
||||
log "⛔ AWOOOI_START_CONTROLLED_CD_LANE=1 但 root restore-source left=${CD_LANE_ROOT_RESTORE_LEFT};維持 fail-closed"
|
||||
else
|
||||
START_CD_LANE_ALLOWED=1
|
||||
log "✅ controlled cd-lane drain preflight passed; legacy runner 仍維持 fail-closed"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -x "$RUNNER_DIR/act_runner" ] && [ -f "$RUNNER_DIR/config.yaml" ]; then
|
||||
# 若舊的 .runner 配置指向過期 hostname,只有在明確允許啟動 runner
|
||||
# 時才清除重新註冊;預設降壓模式不得碰 registration 狀態。
|
||||
@@ -729,7 +751,15 @@ else
|
||||
log "⚠️ 找不到 act-runner binary/config: $RUNNER_DIR"
|
||||
fi
|
||||
|
||||
log "✅ controlled cd-lane startup override active; startup will not enforce drain fail-closed"
|
||||
if [ "$START_CD_LANE_ALLOWED" = "1" ]; then
|
||||
install_controlled_cd_lane_drain_unit
|
||||
systemctl daemon-reload >/dev/null 2>&1 || true
|
||||
systemctl unmask "$CD_LANE_DRAIN_SERVICE" >/dev/null 2>&1 || true
|
||||
systemctl enable --now "$CD_LANE_DRAIN_SERVICE" >/dev/null 2>&1 || true
|
||||
ensure_controlled_cd_lane_open
|
||||
else
|
||||
log "✅ controlled cd-lane remains closed unless AWOOOI_START_CONTROLLED_CD_LANE=1 passes guardrails"
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# STEP 7: Sentry(Error Tracking)
|
||||
|
||||
@@ -76,6 +76,23 @@ def test_startup_110_quarantines_corrupt_docker_container_metadata() -> None:
|
||||
assert 'run_bounded "$DOCKER_START_TIMEOUT_SECONDS" systemctl start docker.socket docker.service' in text
|
||||
|
||||
|
||||
def test_startup_110_opens_only_controlled_cd_lane_after_guardrails() -> None:
|
||||
text = STARTUP_110.read_text(encoding="utf-8")
|
||||
|
||||
assert 'START_CONTROLLED_CD_LANE="${AWOOOI_START_CONTROLLED_CD_LANE:-0}"' in text
|
||||
assert "cd_lane_root_restore_sources_left()" in text
|
||||
assert 'CD_LANE_ROOT_RESTORE_LEFT="$(cd_lane_root_restore_sources_left)"' in text
|
||||
assert 'START_CD_LANE_ALLOWED=1' in text
|
||||
assert 'install_controlled_cd_lane_drain_unit' in text
|
||||
assert 'systemctl unmask "$CD_LANE_DRAIN_SERVICE"' in text
|
||||
assert 'systemctl enable --now "$CD_LANE_DRAIN_SERVICE"' in text
|
||||
assert 'ensure_controlled_cd_lane_open' in text
|
||||
assert 'if [ "$START_CD_LANE_ALLOWED" = "1" ] && [ "$binary" = "$CD_LANE_DRAIN_BINARY" ]; then' in text
|
||||
assert 'systemctl enable --now "$RUNNER_SERVICE"' in text
|
||||
assert "legacy runner 仍維持 fail-closed" in text
|
||||
assert "controlled cd-lane remains closed unless AWOOOI_START_CONTROLLED_CD_LANE=1 passes guardrails" in text
|
||||
|
||||
|
||||
def test_cold_start_deploy_parity_verifier_bounds_ssh_readback() -> None:
|
||||
text = VERIFY_DEPLOY.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user