fix(recovery): orchestrate 110 harbor local repair
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 1m33s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped

This commit is contained in:
Your Name
2026-06-30 20:15:52 +08:00
parent aac2d95847
commit 1d97b3ffea
5 changed files with 289 additions and 0 deletions

View File

@@ -986,6 +986,7 @@ jobs:
echo "BLOCKER harbor_registry_public_route_unavailable registry_v2_status=${registry_status}"
echo "NEXT_ACTION run_on_110_local_console_or_restored_ssh: sudo /usr/local/bin/harbor-watchdog.sh --check"
echo "NEXT_ACTION if_check_confirms_unhealthy_on_110: sudo /usr/local/bin/harbor-watchdog.sh --repair-once"
echo "NEXT_ACTION combined_110_control_path_then_harbor: sudo /usr/local/bin/recover-110-control-path-and-harbor-local.sh --apply-all"
exit 1
fi

View File

@@ -82,6 +82,10 @@ def test_harbor_login_has_public_route_retry_and_safe_secret_transport() -> None
assert "BLOCKER harbor_registry_public_route_unavailable" in block
assert "sudo /usr/local/bin/harbor-watchdog.sh --check" in block
assert "sudo /usr/local/bin/harbor-watchdog.sh --repair-once" in block
assert (
"sudo /usr/local/bin/recover-110-control-path-and-harbor-local.sh --apply-all"
in block
)
assert "sleep \"${LOGIN_SLEEP_SECONDS}\"" in block
assert "${HARBOR_PASSWORD}" in block
assert "--password " not in block
@@ -117,6 +121,17 @@ def test_harbor_watchdog_exposes_controlled_check_and_one_shot_repair() -> None:
assert "while true" in text
def test_deploy_to_110_syncs_local_control_path_recovery_helpers() -> None:
text = (ROOT / "scripts/reboot-recovery/deploy-to-110.sh").read_text(
encoding="utf-8"
)
assert "repair-110-ssh-publickey-auth-local.sh" in text
assert "recover-110-control-path-and-harbor-local.sh" in text
assert "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh" in text
assert "/usr/local/bin/recover-110-control-path-and-harbor-local.sh" in text
def test_onboarding_warning_step_template_stays_on_controlled_runtime_profile() -> None:
text = _workflow_text()
assert "onboarding warning-step workflow is" in text

View File

@@ -16,6 +16,8 @@ echo "=== 部署 awoooi-startup-110 + harbor-watchdog 到 192.168.0.110 ==="
echo "[1/5] 上傳啟動腳本..."
scp "$SCRIPT_DIR/awoooi-startup-110.sh" "$HOST:/tmp/awoooi-startup-110.sh"
scp "$SCRIPT_DIR/awoooi-startup-110.service" "$HOST:/tmp/awoooi-startup-110.service"
scp "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh" "$HOST:/tmp/repair-110-ssh-publickey-auth-local.sh"
scp "$SCRIPT_DIR/recover-110-control-path-and-harbor-local.sh" "$HOST:/tmp/recover-110-control-path-and-harbor-local.sh"
# 2. 上傳 watchdog
echo "[2/5] 上傳 harbor-watchdog..."
@@ -26,6 +28,10 @@ scp "$SCRIPT_DIR/harbor-watchdog.service" "$HOST:/tmp/harbor-watchdog.service"
echo "[3/5] 安裝 startup service..."
ssh "$HOST" "sudo cp /tmp/awoooi-startup-110.sh /usr/local/bin/awoooi-startup-110.sh && \
sudo chmod +x /usr/local/bin/awoooi-startup-110.sh && \
sudo cp /tmp/repair-110-ssh-publickey-auth-local.sh /usr/local/bin/repair-110-ssh-publickey-auth-local.sh && \
sudo chmod +x /usr/local/bin/repair-110-ssh-publickey-auth-local.sh && \
sudo cp /tmp/recover-110-control-path-and-harbor-local.sh /usr/local/bin/recover-110-control-path-and-harbor-local.sh && \
sudo chmod +x /usr/local/bin/recover-110-control-path-and-harbor-local.sh && \
sudo cp /tmp/awoooi-startup-110.service /etc/systemd/system/awoooi-startup-110.service && \
sudo systemctl daemon-reload && \
sudo systemctl enable awoooi-startup-110.service && \

View File

@@ -0,0 +1,187 @@
#!/usr/bin/env bash
# Local-only orchestrator for the current P0 110 control path + Harbor blocker.
#
# Run on host 110 from a trusted local console or an already working root shell.
# Default mode is read-only. Apply modes do not read key material, do not create
# keys, do not restart the Docker daemon, and do not reboot the host.
set -euo pipefail
MODE="check"
TARGET_USER="${TARGET_USER:-wooo}"
EXPECTED_HOST_IP="${AWOOOI_110_EXPECTED_HOST_IP:-192.168.0.110}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SSH_REPAIR_SCRIPT="${AWOOOI_110_SSH_REPAIR_SCRIPT:-}"
HARBOR_WATCHDOG_SCRIPT="${AWOOOI_HARBOR_WATCHDOG_SCRIPT:-}"
RELOAD_SSH="${RELOAD_SSH:-0}"
usage() {
cat <<'USAGE'
Usage: recover-110-control-path-and-harbor-local.sh [--check|--apply-ssh-metadata|--repair-harbor-once|--apply-all]
Modes:
--check Read-only checks for SSH metadata and Harbor readiness.
--apply-ssh-metadata Fix TARGET_USER home/.ssh/authorized_keys metadata only.
--repair-harbor-once Run one bounded Harbor watchdog repair cycle only.
--apply-all Apply SSH metadata repair, then one Harbor repair cycle.
Environment:
TARGET_USER=wooo
RELOAD_SSH=0
ALLOW_NON_110=0
Safety:
This script refuses to apply outside 192.168.0.110 unless ALLOW_NON_110=1.
USAGE
}
while [ "$#" -gt 0 ]; do
case "$1" in
--check)
MODE="check"
;;
--apply-ssh-metadata)
MODE="apply_ssh_metadata"
;;
--repair-harbor-once)
MODE="repair_harbor_once"
;;
--apply-all)
MODE="apply_all"
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 64
;;
esac
shift
done
log() {
printf '[%s] [recover-110] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"
}
host_has_expected_ip() {
if command -v hostname >/dev/null 2>&1; then
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$EXPECTED_HOST_IP" && return 0
fi
if command -v ip >/dev/null 2>&1; then
ip -o -4 addr show 2>/dev/null | grep -q " ${EXPECTED_HOST_IP}/" && return 0
fi
return 1
}
require_expected_host_for_apply() {
if [ "${ALLOW_NON_110:-0}" = "1" ]; then
log "ALLOW_NON_110=1 set; expected host guard bypassed"
return 0
fi
if host_has_expected_ip; then
return 0
fi
echo "BLOCKED not running on ${EXPECTED_HOST_IP}; use 110 local console/root shell" >&2
exit 65
}
resolve_ssh_repair_script() {
if [ -n "$SSH_REPAIR_SCRIPT" ] && [ -x "$SSH_REPAIR_SCRIPT" ]; then
printf '%s\n' "$SSH_REPAIR_SCRIPT"
return 0
fi
if [ -x "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh" ]; then
printf '%s\n' "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh"
return 0
fi
if [ -x "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh" ]; then
printf '%s\n' "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh"
return 0
fi
return 1
}
resolve_harbor_watchdog_script() {
if [ -n "$HARBOR_WATCHDOG_SCRIPT" ] && [ -x "$HARBOR_WATCHDOG_SCRIPT" ]; then
printf '%s\n' "$HARBOR_WATCHDOG_SCRIPT"
return 0
fi
if [ -x "/usr/local/bin/harbor-watchdog.sh" ]; then
printf '%s\n' "/usr/local/bin/harbor-watchdog.sh"
return 0
fi
if [ -x "$SCRIPT_DIR/harbor-watchdog.sh" ]; then
printf '%s\n' "$SCRIPT_DIR/harbor-watchdog.sh"
return 0
fi
return 1
}
run_ssh_check() {
local script
if ! script="$(resolve_ssh_repair_script)"; then
echo "SSH_REPAIR_SCRIPT_STATUS=missing"
return 1
fi
TARGET_USER="$TARGET_USER" RELOAD_SSH=0 "$script" --check
}
run_ssh_apply() {
local script
require_expected_host_for_apply
if ! script="$(resolve_ssh_repair_script)"; then
echo "SSH_REPAIR_SCRIPT_STATUS=missing"
return 1
fi
TARGET_USER="$TARGET_USER" RELOAD_SSH="$RELOAD_SSH" "$script" --apply
}
run_harbor_check() {
local script
if ! script="$(resolve_harbor_watchdog_script)"; then
echo "HARBOR_WATCHDOG_SCRIPT_STATUS=missing"
return 1
fi
"$script" --check
}
run_harbor_repair_once() {
local script
require_expected_host_for_apply
if ! script="$(resolve_harbor_watchdog_script)"; then
echo "HARBOR_WATCHDOG_SCRIPT_STATUS=missing"
return 1
fi
"$script" --repair-once
}
echo "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=${MODE} target_user=${TARGET_USER}"
echo "expected_host_ip=${EXPECTED_HOST_IP}"
echo "operation_boundary_secret_value_read=false"
echo "operation_boundary_host_reboot_performed=false"
echo "operation_boundary_docker_daemon_restart_performed=false"
echo "operation_boundary_node_drain_performed=false"
case "$MODE" in
check)
run_ssh_check || true
run_harbor_check || true
;;
apply_ssh_metadata)
run_ssh_apply
;;
repair_harbor_once)
run_harbor_repair_once
;;
apply_all)
run_ssh_apply
run_harbor_repair_once
;;
*)
echo "Unknown internal mode: $MODE" >&2
exit 64
;;
esac

View File

@@ -0,0 +1,80 @@
from __future__ import annotations
import os
import stat
import subprocess
from pathlib import Path
ROOT = Path(__file__).resolve().parents[3]
RECOVERY = ROOT / "scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh"
def test_recover_110_orchestrator_contracts() -> None:
text = RECOVERY.read_text(encoding="utf-8")
assert "--check" in text
assert "--apply-ssh-metadata" in text
assert "--repair-harbor-once" in text
assert "--apply-all" in text
assert "operation_boundary_secret_value_read=false" in text
assert "operation_boundary_host_reboot_performed=false" in text
assert "operation_boundary_docker_daemon_restart_performed=false" in text
assert "repair-110-ssh-publickey-auth-local.sh" in text
assert "harbor-watchdog.sh" in text
assert "cat \"$home_dir/.ssh/authorized_keys\"" not in text
forbidden = [
"systemctl restart docker",
"service docker restart",
"\nreboot",
"\nsudo reboot",
"\nshutdown",
"\nsudo shutdown",
"docker system prune",
"docker volume rm",
]
for pattern in forbidden:
assert pattern not in text
def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> None:
ssh_helper = tmp_path / "ssh-helper.sh"
harbor_helper = tmp_path / "harbor-helper.sh"
ssh_helper.write_text(
"#!/usr/bin/env bash\n"
"echo SSH_HELPER_MODE=$1\n"
"echo SSH_METADATA_WRITE=false\n",
encoding="utf-8",
)
harbor_helper.write_text(
"#!/usr/bin/env bash\n"
"echo HARBOR_HELPER_MODE=$1\n"
"echo HARBOR_RUNTIME_WRITE=false\n",
encoding="utf-8",
)
for helper in (ssh_helper, harbor_helper):
helper.chmod(helper.stat().st_mode | stat.S_IXUSR)
env = {
**os.environ,
"ALLOW_NON_110": "1",
"AWOOOI_110_SSH_REPAIR_SCRIPT": str(ssh_helper),
"AWOOOI_HARBOR_WATCHDOG_SCRIPT": str(harbor_helper),
}
result = subprocess.run(
["bash", str(RECOVERY), "--check"],
check=False,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
assert result.returncode == 0, result.stdout + result.stderr
assert "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=check" in result.stdout
assert "SSH_HELPER_MODE=--check" in result.stdout
assert "HARBOR_HELPER_MODE=--check" in result.stdout
assert "SSH_METADATA_WRITE=false" in result.stdout
assert "HARBOR_RUNTIME_WRITE=false" in result.stdout
assert "operation_boundary_secret_value_read=false" in result.stdout