docs(ops): refresh momo preflight recovery evidence [skip ci]

This commit is contained in:
ogt
2026-06-25 11:51:34 +08:00
parent 6cfe1c1067
commit fc51a8f295
4 changed files with 89 additions and 17 deletions

View File

@@ -33,9 +33,11 @@ Usage: momo-drive-token-source-recovery-preflight.sh [--host user@host] [--fresh
Read-only checks:
- MOMO public health and local health endpoint on 188
- MOMO version, container StartedAt, health, recent replace/restart evidence
- momo-scheduler running / health / UID
- Google token metadata only, never token content
- scheduler fail-closed log evidence and notification evidence
- exact local daily-sales source candidate presence
- daily_sales_snapshot / realtime_sales_monthly bounds
- latest daily_sales import job
@@ -84,12 +86,24 @@ emit() {
}
emit HOST "$(hostname 2>/dev/null || true)"
momo_health_json="$(curl -s --max-time 5 http://127.0.0.1:5003/health 2>/dev/null || true)"
momo_public_health_json="$(curl -s --max-time 8 https://mo.wooo.work/health 2>/dev/null || true)"
emit MOMO_HEALTH_CODE "$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 http://127.0.0.1:5003/health 2>/dev/null || true)"
emit MOMO_PUBLIC_HEALTH_CODE "$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 https://mo.wooo.work/health 2>/dev/null || true)"
emit MOMO_HEALTH_VERSION "$(printf '%s\n%s\n' "$momo_health_json" "$momo_public_health_json" | sed -n 's/.*"version"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -n 1)"
emit MOMO_APP_STARTED_AT "$(docker inspect -f '{{.State.StartedAt}}' momo-pro-system 2>/dev/null || true)"
emit MOMO_APP_HEALTH "$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' momo-pro-system 2>/dev/null || true)"
emit SCHEDULER_RUNNING "$(docker inspect -f '{{.State.Running}}' momo-scheduler 2>/dev/null || true)"
emit SCHEDULER_HEALTH "$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' momo-scheduler 2>/dev/null || true)"
emit SCHEDULER_STARTED_AT "$(docker inspect -f '{{.State.StartedAt}}' momo-scheduler 2>/dev/null || true)"
emit SCHEDULER_RESTART_COUNT "$(docker inspect -f '{{.RestartCount}}' momo-scheduler 2>/dev/null || true)"
emit SCHEDULER_UID "$(docker top momo-scheduler -eo pid,user,uid 2>/dev/null | awk 'NR==2 {print $3}' || true)"
emit TELEGRAM_BOT_STARTED_AT "$(docker inspect -f '{{.State.StartedAt}}' momo-telegram-bot 2>/dev/null || true)"
emit TELEGRAM_BOT_HEALTH "$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' momo-telegram-bot 2>/dev/null || true)"
event_until="$(date --iso-8601=seconds 2>/dev/null || date -Iseconds)"
recent_events="$(docker events --since 45m --until "$event_until" --filter container=momo-pro-system --filter container=momo-scheduler --filter container=momo-telegram-bot 2>/dev/null || true)"
emit MOMO_CONTAINER_REPLACE_EVENTS_45M "$(printf '%s\n' "$recent_events" | grep -Ec 'container (restart|start|kill|die|stop)' || true)"
token_stat="$(stat -c '%u:%g:%a' /home/ollama/momo-pro/config/google_token.json 2>/dev/null || true)"
emit TOKEN_STAT "${token_stat:-missing}"
@@ -104,6 +118,11 @@ emit LOG_FAILURE_NOTIFY_SUCCESS_COUNT "$(printf '%s\n' "$logs" | grep -Ec '匯
emit LOG_EMPTY_SOURCE_COUNT "$(printf '%s\n' "$logs" | grep -Ec '找到 0 個 Excel|沒有找到待匯入' || true)"
emit LOG_SUCCESS_IMPORT_COUNT "$(printf '%s\n' "$logs" | grep -Ec '自動匯入完成|匯入成功|成功匯入' || true)"
source_count="$(find /home/ollama/momo-pro /backup -type f -name '即時業績_當日.xlsx' 2>/dev/null | wc -l | awk '{print $1}' || true)"
latest_source="$(find /home/ollama/momo-pro /backup -type f -name '即時業績_當日.xlsx' -printf '%T@|%TY-%Tm-%TdT%TH:%TM:%TS|%s|%f\n' 2>/dev/null | sort -n | tail -n 1 | cut -d'|' -f2- || true)"
emit LOCAL_EXACT_DAILY_SOURCE_COUNT "${source_count:-0}"
emit LOCAL_EXACT_DAILY_SOURCE_LATEST "${latest_source:-none}"
psql_query() {
docker exec momo-db psql -h 127.0.0.1 -U momo -d momo_analytics -Atc "$1" 2>/dev/null || true
}
@@ -137,12 +156,32 @@ public_health_code="$(value_for MOMO_PUBLIC_HEALTH_CODE)"
[[ "$public_health_code" == "200" ]] && ok "MOMO public health endpoint returns 200" || blocked "MOMO public health endpoint is not 200: ${public_health_code:-missing}"
[[ "$health_code" == "200" ]] && ok "MOMO local health endpoint returns 200" || warn "MOMO local health endpoint is not 200: ${health_code:-missing}"
momo_version="$(value_for MOMO_HEALTH_VERSION)"
[[ -n "$momo_version" ]] && ok "MOMO health version readback is available: $momo_version" || warn "MOMO health version readback unavailable"
scheduler_running="$(value_for SCHEDULER_RUNNING)"
scheduler_health="$(value_for SCHEDULER_HEALTH)"
scheduler_started_at="$(value_for SCHEDULER_STARTED_AT)"
scheduler_restart_count="$(value_for SCHEDULER_RESTART_COUNT)"
momo_app_health="$(value_for MOMO_APP_HEALTH)"
momo_app_started_at="$(value_for MOMO_APP_STARTED_AT)"
telegram_bot_health="$(value_for TELEGRAM_BOT_HEALTH)"
telegram_bot_started_at="$(value_for TELEGRAM_BOT_STARTED_AT)"
[[ "$scheduler_running" == "true" ]] && ok "momo-scheduler container is running" || blocked "momo-scheduler container is not running"
[[ "$scheduler_health" == "healthy" ]] && ok "momo-scheduler container health is healthy" || warn "momo-scheduler health is not healthy: ${scheduler_health:-missing}"
[[ -n "$scheduler_started_at" ]] && ok "momo-scheduler started_at metadata is available: $scheduler_started_at" || warn "momo-scheduler started_at metadata unavailable"
[[ "$scheduler_restart_count" == "0" ]] && ok "momo-scheduler restart count is 0" || warn "momo-scheduler restart count is not 0: ${scheduler_restart_count:-missing}"
[[ "$momo_app_health" == "healthy" ]] && ok "momo-pro-system health is healthy" || warn "momo-pro-system health is not healthy: ${momo_app_health:-missing}"
[[ -n "$momo_app_started_at" ]] && ok "momo-pro-system started_at metadata is available: $momo_app_started_at" || warn "momo-pro-system started_at metadata unavailable"
[[ "$telegram_bot_health" == "healthy" ]] && ok "momo-telegram-bot health is healthy" || warn "momo-telegram-bot health is not healthy: ${telegram_bot_health:-missing}"
[[ -n "$telegram_bot_started_at" ]] && ok "momo-telegram-bot started_at metadata is available: $telegram_bot_started_at" || warn "momo-telegram-bot started_at metadata unavailable"
replace_events="$(num_for MOMO_CONTAINER_REPLACE_EVENTS_45M)"
if [[ "$replace_events" -gt 0 ]]; then
warn "recent MOMO container replace/restart events observed in the last 45m: $replace_events"
else
ok "no MOMO container replace/restart events observed in the last 45m"
fi
scheduler_uid="$(value_for SCHEDULER_UID)"
token_stat="$(value_for TOKEN_STAT)"
@@ -182,6 +221,14 @@ else
warn "scheduler failure notification success evidence not observed in the last 8h"
fi
local_source_count="$(num_for LOCAL_EXACT_DAILY_SOURCE_COUNT)"
local_source_latest="$(value_for LOCAL_EXACT_DAILY_SOURCE_LATEST)"
if [[ "$local_source_count" -gt 0 ]]; then
warn "exact local daily-sales source candidates exist outside Drive intake: count=$local_source_count latest=${local_source_latest:-unknown}"
else
ok "no exact local daily-sales source candidate found on 188 / backup paths"
fi
import_config="$(value_for IMPORT_CONFIG)"
[[ "$import_config" == *"當日業績匯入|即時業績_當日"* ]] && ok "Drive import config points to expected daily-sales intake" || blocked "Drive import config is unavailable or drifted: ${import_config:-missing}"