fix(recovery): orchestrate 110 harbor local repair
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 1m33s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 1m33s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
This commit is contained in:
@@ -986,6 +986,7 @@ jobs:
|
||||
echo "BLOCKER harbor_registry_public_route_unavailable registry_v2_status=${registry_status}"
|
||||
echo "NEXT_ACTION run_on_110_local_console_or_restored_ssh: sudo /usr/local/bin/harbor-watchdog.sh --check"
|
||||
echo "NEXT_ACTION if_check_confirms_unhealthy_on_110: sudo /usr/local/bin/harbor-watchdog.sh --repair-once"
|
||||
echo "NEXT_ACTION combined_110_control_path_then_harbor: sudo /usr/local/bin/recover-110-control-path-and-harbor-local.sh --apply-all"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@@ -82,6 +82,10 @@ def test_harbor_login_has_public_route_retry_and_safe_secret_transport() -> None
|
||||
assert "BLOCKER harbor_registry_public_route_unavailable" in block
|
||||
assert "sudo /usr/local/bin/harbor-watchdog.sh --check" in block
|
||||
assert "sudo /usr/local/bin/harbor-watchdog.sh --repair-once" in block
|
||||
assert (
|
||||
"sudo /usr/local/bin/recover-110-control-path-and-harbor-local.sh --apply-all"
|
||||
in block
|
||||
)
|
||||
assert "sleep \"${LOGIN_SLEEP_SECONDS}\"" in block
|
||||
assert "${HARBOR_PASSWORD}" in block
|
||||
assert "--password " not in block
|
||||
@@ -117,6 +121,17 @@ def test_harbor_watchdog_exposes_controlled_check_and_one_shot_repair() -> None:
|
||||
assert "while true" in text
|
||||
|
||||
|
||||
def test_deploy_to_110_syncs_local_control_path_recovery_helpers() -> None:
|
||||
text = (ROOT / "scripts/reboot-recovery/deploy-to-110.sh").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
|
||||
assert "repair-110-ssh-publickey-auth-local.sh" in text
|
||||
assert "recover-110-control-path-and-harbor-local.sh" in text
|
||||
assert "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh" in text
|
||||
assert "/usr/local/bin/recover-110-control-path-and-harbor-local.sh" in text
|
||||
|
||||
|
||||
def test_onboarding_warning_step_template_stays_on_controlled_runtime_profile() -> None:
|
||||
text = _workflow_text()
|
||||
assert "onboarding warning-step workflow is" in text
|
||||
|
||||
@@ -16,6 +16,8 @@ echo "=== 部署 awoooi-startup-110 + harbor-watchdog 到 192.168.0.110 ==="
|
||||
echo "[1/5] 上傳啟動腳本..."
|
||||
scp "$SCRIPT_DIR/awoooi-startup-110.sh" "$HOST:/tmp/awoooi-startup-110.sh"
|
||||
scp "$SCRIPT_DIR/awoooi-startup-110.service" "$HOST:/tmp/awoooi-startup-110.service"
|
||||
scp "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh" "$HOST:/tmp/repair-110-ssh-publickey-auth-local.sh"
|
||||
scp "$SCRIPT_DIR/recover-110-control-path-and-harbor-local.sh" "$HOST:/tmp/recover-110-control-path-and-harbor-local.sh"
|
||||
|
||||
# 2. 上傳 watchdog
|
||||
echo "[2/5] 上傳 harbor-watchdog..."
|
||||
@@ -26,6 +28,10 @@ scp "$SCRIPT_DIR/harbor-watchdog.service" "$HOST:/tmp/harbor-watchdog.service"
|
||||
echo "[3/5] 安裝 startup service..."
|
||||
ssh "$HOST" "sudo cp /tmp/awoooi-startup-110.sh /usr/local/bin/awoooi-startup-110.sh && \
|
||||
sudo chmod +x /usr/local/bin/awoooi-startup-110.sh && \
|
||||
sudo cp /tmp/repair-110-ssh-publickey-auth-local.sh /usr/local/bin/repair-110-ssh-publickey-auth-local.sh && \
|
||||
sudo chmod +x /usr/local/bin/repair-110-ssh-publickey-auth-local.sh && \
|
||||
sudo cp /tmp/recover-110-control-path-and-harbor-local.sh /usr/local/bin/recover-110-control-path-and-harbor-local.sh && \
|
||||
sudo chmod +x /usr/local/bin/recover-110-control-path-and-harbor-local.sh && \
|
||||
sudo cp /tmp/awoooi-startup-110.service /etc/systemd/system/awoooi-startup-110.service && \
|
||||
sudo systemctl daemon-reload && \
|
||||
sudo systemctl enable awoooi-startup-110.service && \
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/env bash
|
||||
# Local-only orchestrator for the current P0 110 control path + Harbor blocker.
|
||||
#
|
||||
# Run on host 110 from a trusted local console or an already working root shell.
|
||||
# Default mode is read-only. Apply modes do not read key material, do not create
|
||||
# keys, do not restart the Docker daemon, and do not reboot the host.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MODE="check"
|
||||
TARGET_USER="${TARGET_USER:-wooo}"
|
||||
EXPECTED_HOST_IP="${AWOOOI_110_EXPECTED_HOST_IP:-192.168.0.110}"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SSH_REPAIR_SCRIPT="${AWOOOI_110_SSH_REPAIR_SCRIPT:-}"
|
||||
HARBOR_WATCHDOG_SCRIPT="${AWOOOI_HARBOR_WATCHDOG_SCRIPT:-}"
|
||||
RELOAD_SSH="${RELOAD_SSH:-0}"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: recover-110-control-path-and-harbor-local.sh [--check|--apply-ssh-metadata|--repair-harbor-once|--apply-all]
|
||||
|
||||
Modes:
|
||||
--check Read-only checks for SSH metadata and Harbor readiness.
|
||||
--apply-ssh-metadata Fix TARGET_USER home/.ssh/authorized_keys metadata only.
|
||||
--repair-harbor-once Run one bounded Harbor watchdog repair cycle only.
|
||||
--apply-all Apply SSH metadata repair, then one Harbor repair cycle.
|
||||
|
||||
Environment:
|
||||
TARGET_USER=wooo
|
||||
RELOAD_SSH=0
|
||||
ALLOW_NON_110=0
|
||||
|
||||
Safety:
|
||||
This script refuses to apply outside 192.168.0.110 unless ALLOW_NON_110=1.
|
||||
USAGE
|
||||
}
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--check)
|
||||
MODE="check"
|
||||
;;
|
||||
--apply-ssh-metadata)
|
||||
MODE="apply_ssh_metadata"
|
||||
;;
|
||||
--repair-harbor-once)
|
||||
MODE="repair_harbor_once"
|
||||
;;
|
||||
--apply-all)
|
||||
MODE="apply_all"
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1" >&2
|
||||
usage >&2
|
||||
exit 64
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
log() {
|
||||
printf '[%s] [recover-110] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"
|
||||
}
|
||||
|
||||
host_has_expected_ip() {
|
||||
if command -v hostname >/dev/null 2>&1; then
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$EXPECTED_HOST_IP" && return 0
|
||||
fi
|
||||
if command -v ip >/dev/null 2>&1; then
|
||||
ip -o -4 addr show 2>/dev/null | grep -q " ${EXPECTED_HOST_IP}/" && return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
require_expected_host_for_apply() {
|
||||
if [ "${ALLOW_NON_110:-0}" = "1" ]; then
|
||||
log "ALLOW_NON_110=1 set; expected host guard bypassed"
|
||||
return 0
|
||||
fi
|
||||
if host_has_expected_ip; then
|
||||
return 0
|
||||
fi
|
||||
echo "BLOCKED not running on ${EXPECTED_HOST_IP}; use 110 local console/root shell" >&2
|
||||
exit 65
|
||||
}
|
||||
|
||||
resolve_ssh_repair_script() {
|
||||
if [ -n "$SSH_REPAIR_SCRIPT" ] && [ -x "$SSH_REPAIR_SCRIPT" ]; then
|
||||
printf '%s\n' "$SSH_REPAIR_SCRIPT"
|
||||
return 0
|
||||
fi
|
||||
if [ -x "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh" ]; then
|
||||
printf '%s\n' "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh"
|
||||
return 0
|
||||
fi
|
||||
if [ -x "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh" ]; then
|
||||
printf '%s\n' "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
resolve_harbor_watchdog_script() {
|
||||
if [ -n "$HARBOR_WATCHDOG_SCRIPT" ] && [ -x "$HARBOR_WATCHDOG_SCRIPT" ]; then
|
||||
printf '%s\n' "$HARBOR_WATCHDOG_SCRIPT"
|
||||
return 0
|
||||
fi
|
||||
if [ -x "/usr/local/bin/harbor-watchdog.sh" ]; then
|
||||
printf '%s\n' "/usr/local/bin/harbor-watchdog.sh"
|
||||
return 0
|
||||
fi
|
||||
if [ -x "$SCRIPT_DIR/harbor-watchdog.sh" ]; then
|
||||
printf '%s\n' "$SCRIPT_DIR/harbor-watchdog.sh"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
run_ssh_check() {
|
||||
local script
|
||||
if ! script="$(resolve_ssh_repair_script)"; then
|
||||
echo "SSH_REPAIR_SCRIPT_STATUS=missing"
|
||||
return 1
|
||||
fi
|
||||
TARGET_USER="$TARGET_USER" RELOAD_SSH=0 "$script" --check
|
||||
}
|
||||
|
||||
run_ssh_apply() {
|
||||
local script
|
||||
require_expected_host_for_apply
|
||||
if ! script="$(resolve_ssh_repair_script)"; then
|
||||
echo "SSH_REPAIR_SCRIPT_STATUS=missing"
|
||||
return 1
|
||||
fi
|
||||
TARGET_USER="$TARGET_USER" RELOAD_SSH="$RELOAD_SSH" "$script" --apply
|
||||
}
|
||||
|
||||
run_harbor_check() {
|
||||
local script
|
||||
if ! script="$(resolve_harbor_watchdog_script)"; then
|
||||
echo "HARBOR_WATCHDOG_SCRIPT_STATUS=missing"
|
||||
return 1
|
||||
fi
|
||||
"$script" --check
|
||||
}
|
||||
|
||||
run_harbor_repair_once() {
|
||||
local script
|
||||
require_expected_host_for_apply
|
||||
if ! script="$(resolve_harbor_watchdog_script)"; then
|
||||
echo "HARBOR_WATCHDOG_SCRIPT_STATUS=missing"
|
||||
return 1
|
||||
fi
|
||||
"$script" --repair-once
|
||||
}
|
||||
|
||||
echo "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=${MODE} target_user=${TARGET_USER}"
|
||||
echo "expected_host_ip=${EXPECTED_HOST_IP}"
|
||||
echo "operation_boundary_secret_value_read=false"
|
||||
echo "operation_boundary_host_reboot_performed=false"
|
||||
echo "operation_boundary_docker_daemon_restart_performed=false"
|
||||
echo "operation_boundary_node_drain_performed=false"
|
||||
|
||||
case "$MODE" in
|
||||
check)
|
||||
run_ssh_check || true
|
||||
run_harbor_check || true
|
||||
;;
|
||||
apply_ssh_metadata)
|
||||
run_ssh_apply
|
||||
;;
|
||||
repair_harbor_once)
|
||||
run_harbor_repair_once
|
||||
;;
|
||||
apply_all)
|
||||
run_ssh_apply
|
||||
run_harbor_repair_once
|
||||
;;
|
||||
*)
|
||||
echo "Unknown internal mode: $MODE" >&2
|
||||
exit 64
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import stat
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[3]
|
||||
RECOVERY = ROOT / "scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh"
|
||||
|
||||
|
||||
def test_recover_110_orchestrator_contracts() -> None:
|
||||
text = RECOVERY.read_text(encoding="utf-8")
|
||||
|
||||
assert "--check" in text
|
||||
assert "--apply-ssh-metadata" in text
|
||||
assert "--repair-harbor-once" in text
|
||||
assert "--apply-all" in text
|
||||
assert "operation_boundary_secret_value_read=false" in text
|
||||
assert "operation_boundary_host_reboot_performed=false" in text
|
||||
assert "operation_boundary_docker_daemon_restart_performed=false" in text
|
||||
assert "repair-110-ssh-publickey-auth-local.sh" in text
|
||||
assert "harbor-watchdog.sh" in text
|
||||
assert "cat \"$home_dir/.ssh/authorized_keys\"" not in text
|
||||
|
||||
forbidden = [
|
||||
"systemctl restart docker",
|
||||
"service docker restart",
|
||||
"\nreboot",
|
||||
"\nsudo reboot",
|
||||
"\nshutdown",
|
||||
"\nsudo shutdown",
|
||||
"docker system prune",
|
||||
"docker volume rm",
|
||||
]
|
||||
for pattern in forbidden:
|
||||
assert pattern not in text
|
||||
|
||||
|
||||
def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> None:
|
||||
ssh_helper = tmp_path / "ssh-helper.sh"
|
||||
harbor_helper = tmp_path / "harbor-helper.sh"
|
||||
ssh_helper.write_text(
|
||||
"#!/usr/bin/env bash\n"
|
||||
"echo SSH_HELPER_MODE=$1\n"
|
||||
"echo SSH_METADATA_WRITE=false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
harbor_helper.write_text(
|
||||
"#!/usr/bin/env bash\n"
|
||||
"echo HARBOR_HELPER_MODE=$1\n"
|
||||
"echo HARBOR_RUNTIME_WRITE=false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
for helper in (ssh_helper, harbor_helper):
|
||||
helper.chmod(helper.stat().st_mode | stat.S_IXUSR)
|
||||
|
||||
env = {
|
||||
**os.environ,
|
||||
"ALLOW_NON_110": "1",
|
||||
"AWOOOI_110_SSH_REPAIR_SCRIPT": str(ssh_helper),
|
||||
"AWOOOI_HARBOR_WATCHDOG_SCRIPT": str(harbor_helper),
|
||||
}
|
||||
result = subprocess.run(
|
||||
["bash", str(RECOVERY), "--check"],
|
||||
check=False,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stdout + result.stderr
|
||||
assert "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=check" in result.stdout
|
||||
assert "SSH_HELPER_MODE=--check" in result.stdout
|
||||
assert "HARBOR_HELPER_MODE=--check" in result.stdout
|
||||
assert "SSH_METADATA_WRITE=false" in result.stdout
|
||||
assert "HARBOR_RUNTIME_WRITE=false" in result.stdout
|
||||
assert "operation_boundary_secret_value_read=false" in result.stdout
|
||||
Reference in New Issue
Block a user