fix(ops): gate reboot recovery on product freshness [skip ci]

This commit is contained in:
ogt
2026-06-25 19:39:42 +08:00
parent bfc78d3fee
commit 5e4887d15c
7 changed files with 170 additions and 15 deletions

View File

@@ -9,6 +9,7 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
SSH_CONNECT_TIMEOUT="${SSH_CONNECT_TIMEOUT:-6}"
RUN_COLD_START=1
RUN_MOMO=1
RUN_STOCK=1
RUN_BACKUP=1
RUN_ROUTES=1
RUN_CPU=1
@@ -29,10 +30,29 @@ HOSTS=(
)
ROUTES=(
"https://awoooi.wooo.work/"
"https://awoooi.wooo.work/api/v1/health"
"https://awoooi.wooo.work/zh-TW/iwooos"
"https://vibework.wooo.work/"
"https://awooogo.wooo.work/"
"https://2026fifa.wooo.work/"
"https://agent.wooo.work/"
"https://mo.wooo.work/"
"https://mo.wooo.work/health"
"https://stock.wooo.work/"
"https://stock.wooo.work/healthz"
"https://stock.wooo.work/api/healthz"
"https://bitan.wooo.work/"
"https://tsenyang.com/"
"https://www.tsenyang.com/"
"https://vtuber.wooo.work/"
"https://gitea.wooo.work/"
"https://harbor.wooo.work/"
"https://registry.wooo.work/"
"https://sentry.wooo.work/"
"https://signoz.wooo.work/"
"https://langfuse.wooo.work/"
"https://aiops.wooo.work/"
)
usage() {
@@ -44,6 +64,7 @@ Read-only post-reboot quick check for 110 / 120 / 121 / 188.
Options:
--skip-cold-start Do not run full-stack-cold-start-check.sh.
--skip-momo Do not run momo-drive-token-source-recovery-preflight.sh.
--skip-stock Do not query StockPlatform data freshness.
--skip-backup Do not run /backup/scripts/backup-status.sh on 110.
--skip-routes Do not curl public route smoke targets.
--skip-cpu Do not read 110 CPU / process summary.
@@ -67,6 +88,9 @@ while [[ $# -gt 0 ]]; do
--skip-momo)
RUN_MOMO=0
;;
--skip-stock)
RUN_STOCK=0
;;
--skip-backup)
RUN_BACKUP=0
;;
@@ -244,6 +268,57 @@ if [[ "$RUN_MOMO" -eq 1 ]]; then
rm -f "$momo_tmp"
fi
if [[ "$RUN_STOCK" -eq 1 ]]; then
section "StockPlatform freshness"
stock_tmp="$(mktemp -t post-start-stock.XXXXXX)"
stock_code="$(curl -k -sS -o "$stock_tmp" -w '%{http_code}' --max-time 12 "https://stock.wooo.work/api/v1/system/freshness" 2>/dev/null || true)"
if [[ "$stock_code" != 2* ]]; then
blocked "StockPlatform freshness endpoint returned ${stock_code:-curl_failed}"
cat "$stock_tmp" || true
else
python3 - "$stock_tmp" <<'PY'
import json
import sys
path = sys.argv[1]
with open(path, "r", encoding="utf-8") as fh:
payload = json.load(fh)
print(f"STOCK_FRESHNESS_STATUS {payload.get('status')}")
print(f"STOCK_LATEST_TRADING_DATE {payload.get('latest_trading_date')}")
print("STOCK_BLOCKERS " + ",".join(payload.get("blockers") or []))
for source in payload.get("sources") or []:
print(
"STOCK_SOURCE "
f"{source.get('source')}|{source.get('status')}|"
f"{source.get('latest_date')}|{source.get('row_count')}"
)
PY
stock_status="$(python3 - "$stock_tmp" <<'PY'
import json
import sys
with open(sys.argv[1], "r", encoding="utf-8") as fh:
print(json.load(fh).get("status") or "")
PY
)"
if [[ "$stock_status" == "ok" ]]; then
ok "StockPlatform freshness is ok"
else
stock_blockers="$(python3 - "$stock_tmp" <<'PY'
import json
import sys
with open(sys.argv[1], "r", encoding="utf-8") as fh:
print(",".join(json.load(fh).get("blockers") or []))
PY
)"
blocked "StockPlatform freshness is ${stock_status:-unknown}: ${stock_blockers:-no_blocker_list}"
fi
fi
rm -f "$stock_tmp"
fi
if [[ "$RUN_BACKUP" -eq 1 ]]; then
section "Backup / offsite / escrow"
backup_tmp="$(mktemp -t post-start-backup.XXXXXX)"