docs(ops): refresh momo preflight recovery evidence [skip ci]
This commit is contained in:
@@ -33,9 +33,11 @@ Usage: momo-drive-token-source-recovery-preflight.sh [--host user@host] [--fresh
|
||||
|
||||
Read-only checks:
|
||||
- MOMO public health and local health endpoint on 188
|
||||
- MOMO version, container StartedAt, health, recent replace/restart evidence
|
||||
- momo-scheduler running / health / UID
|
||||
- Google token metadata only, never token content
|
||||
- scheduler fail-closed log evidence and notification evidence
|
||||
- exact local daily-sales source candidate presence
|
||||
- daily_sales_snapshot / realtime_sales_monthly bounds
|
||||
- latest daily_sales import job
|
||||
|
||||
@@ -84,12 +86,24 @@ emit() {
|
||||
}
|
||||
|
||||
emit HOST "$(hostname 2>/dev/null || true)"
|
||||
momo_health_json="$(curl -s --max-time 5 http://127.0.0.1:5003/health 2>/dev/null || true)"
|
||||
momo_public_health_json="$(curl -s --max-time 8 https://mo.wooo.work/health 2>/dev/null || true)"
|
||||
emit MOMO_HEALTH_CODE "$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 http://127.0.0.1:5003/health 2>/dev/null || true)"
|
||||
emit MOMO_PUBLIC_HEALTH_CODE "$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 https://mo.wooo.work/health 2>/dev/null || true)"
|
||||
emit MOMO_HEALTH_VERSION "$(printf '%s\n%s\n' "$momo_health_json" "$momo_public_health_json" | sed -n 's/.*"version"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -n 1)"
|
||||
emit MOMO_APP_STARTED_AT "$(docker inspect -f '{{.State.StartedAt}}' momo-pro-system 2>/dev/null || true)"
|
||||
emit MOMO_APP_HEALTH "$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' momo-pro-system 2>/dev/null || true)"
|
||||
emit SCHEDULER_RUNNING "$(docker inspect -f '{{.State.Running}}' momo-scheduler 2>/dev/null || true)"
|
||||
emit SCHEDULER_HEALTH "$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' momo-scheduler 2>/dev/null || true)"
|
||||
emit SCHEDULER_STARTED_AT "$(docker inspect -f '{{.State.StartedAt}}' momo-scheduler 2>/dev/null || true)"
|
||||
emit SCHEDULER_RESTART_COUNT "$(docker inspect -f '{{.RestartCount}}' momo-scheduler 2>/dev/null || true)"
|
||||
emit SCHEDULER_UID "$(docker top momo-scheduler -eo pid,user,uid 2>/dev/null | awk 'NR==2 {print $3}' || true)"
|
||||
emit TELEGRAM_BOT_STARTED_AT "$(docker inspect -f '{{.State.StartedAt}}' momo-telegram-bot 2>/dev/null || true)"
|
||||
emit TELEGRAM_BOT_HEALTH "$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' momo-telegram-bot 2>/dev/null || true)"
|
||||
|
||||
event_until="$(date --iso-8601=seconds 2>/dev/null || date -Iseconds)"
|
||||
recent_events="$(docker events --since 45m --until "$event_until" --filter container=momo-pro-system --filter container=momo-scheduler --filter container=momo-telegram-bot 2>/dev/null || true)"
|
||||
emit MOMO_CONTAINER_REPLACE_EVENTS_45M "$(printf '%s\n' "$recent_events" | grep -Ec 'container (restart|start|kill|die|stop)' || true)"
|
||||
|
||||
token_stat="$(stat -c '%u:%g:%a' /home/ollama/momo-pro/config/google_token.json 2>/dev/null || true)"
|
||||
emit TOKEN_STAT "${token_stat:-missing}"
|
||||
@@ -104,6 +118,11 @@ emit LOG_FAILURE_NOTIFY_SUCCESS_COUNT "$(printf '%s\n' "$logs" | grep -Ec '匯
|
||||
emit LOG_EMPTY_SOURCE_COUNT "$(printf '%s\n' "$logs" | grep -Ec '找到 0 個 Excel|沒有找到待匯入' || true)"
|
||||
emit LOG_SUCCESS_IMPORT_COUNT "$(printf '%s\n' "$logs" | grep -Ec '自動匯入完成|匯入成功|成功匯入' || true)"
|
||||
|
||||
source_count="$(find /home/ollama/momo-pro /backup -type f -name '即時業績_當日.xlsx' 2>/dev/null | wc -l | awk '{print $1}' || true)"
|
||||
latest_source="$(find /home/ollama/momo-pro /backup -type f -name '即時業績_當日.xlsx' -printf '%T@|%TY-%Tm-%TdT%TH:%TM:%TS|%s|%f\n' 2>/dev/null | sort -n | tail -n 1 | cut -d'|' -f2- || true)"
|
||||
emit LOCAL_EXACT_DAILY_SOURCE_COUNT "${source_count:-0}"
|
||||
emit LOCAL_EXACT_DAILY_SOURCE_LATEST "${latest_source:-none}"
|
||||
|
||||
psql_query() {
|
||||
docker exec momo-db psql -h 127.0.0.1 -U momo -d momo_analytics -Atc "$1" 2>/dev/null || true
|
||||
}
|
||||
@@ -137,12 +156,32 @@ public_health_code="$(value_for MOMO_PUBLIC_HEALTH_CODE)"
|
||||
[[ "$public_health_code" == "200" ]] && ok "MOMO public health endpoint returns 200" || blocked "MOMO public health endpoint is not 200: ${public_health_code:-missing}"
|
||||
[[ "$health_code" == "200" ]] && ok "MOMO local health endpoint returns 200" || warn "MOMO local health endpoint is not 200: ${health_code:-missing}"
|
||||
|
||||
momo_version="$(value_for MOMO_HEALTH_VERSION)"
|
||||
[[ -n "$momo_version" ]] && ok "MOMO health version readback is available: $momo_version" || warn "MOMO health version readback unavailable"
|
||||
|
||||
scheduler_running="$(value_for SCHEDULER_RUNNING)"
|
||||
scheduler_health="$(value_for SCHEDULER_HEALTH)"
|
||||
scheduler_started_at="$(value_for SCHEDULER_STARTED_AT)"
|
||||
scheduler_restart_count="$(value_for SCHEDULER_RESTART_COUNT)"
|
||||
momo_app_health="$(value_for MOMO_APP_HEALTH)"
|
||||
momo_app_started_at="$(value_for MOMO_APP_STARTED_AT)"
|
||||
telegram_bot_health="$(value_for TELEGRAM_BOT_HEALTH)"
|
||||
telegram_bot_started_at="$(value_for TELEGRAM_BOT_STARTED_AT)"
|
||||
[[ "$scheduler_running" == "true" ]] && ok "momo-scheduler container is running" || blocked "momo-scheduler container is not running"
|
||||
[[ "$scheduler_health" == "healthy" ]] && ok "momo-scheduler container health is healthy" || warn "momo-scheduler health is not healthy: ${scheduler_health:-missing}"
|
||||
[[ -n "$scheduler_started_at" ]] && ok "momo-scheduler started_at metadata is available: $scheduler_started_at" || warn "momo-scheduler started_at metadata unavailable"
|
||||
[[ "$scheduler_restart_count" == "0" ]] && ok "momo-scheduler restart count is 0" || warn "momo-scheduler restart count is not 0: ${scheduler_restart_count:-missing}"
|
||||
[[ "$momo_app_health" == "healthy" ]] && ok "momo-pro-system health is healthy" || warn "momo-pro-system health is not healthy: ${momo_app_health:-missing}"
|
||||
[[ -n "$momo_app_started_at" ]] && ok "momo-pro-system started_at metadata is available: $momo_app_started_at" || warn "momo-pro-system started_at metadata unavailable"
|
||||
[[ "$telegram_bot_health" == "healthy" ]] && ok "momo-telegram-bot health is healthy" || warn "momo-telegram-bot health is not healthy: ${telegram_bot_health:-missing}"
|
||||
[[ -n "$telegram_bot_started_at" ]] && ok "momo-telegram-bot started_at metadata is available: $telegram_bot_started_at" || warn "momo-telegram-bot started_at metadata unavailable"
|
||||
|
||||
replace_events="$(num_for MOMO_CONTAINER_REPLACE_EVENTS_45M)"
|
||||
if [[ "$replace_events" -gt 0 ]]; then
|
||||
warn "recent MOMO container replace/restart events observed in the last 45m: $replace_events"
|
||||
else
|
||||
ok "no MOMO container replace/restart events observed in the last 45m"
|
||||
fi
|
||||
|
||||
scheduler_uid="$(value_for SCHEDULER_UID)"
|
||||
token_stat="$(value_for TOKEN_STAT)"
|
||||
@@ -182,6 +221,14 @@ else
|
||||
warn "scheduler failure notification success evidence not observed in the last 8h"
|
||||
fi
|
||||
|
||||
local_source_count="$(num_for LOCAL_EXACT_DAILY_SOURCE_COUNT)"
|
||||
local_source_latest="$(value_for LOCAL_EXACT_DAILY_SOURCE_LATEST)"
|
||||
if [[ "$local_source_count" -gt 0 ]]; then
|
||||
warn "exact local daily-sales source candidates exist outside Drive intake: count=$local_source_count latest=${local_source_latest:-unknown}"
|
||||
else
|
||||
ok "no exact local daily-sales source candidate found on 188 / backup paths"
|
||||
fi
|
||||
|
||||
import_config="$(value_for IMPORT_CONFIG)"
|
||||
[[ "$import_config" == *"當日業績匯入|即時業績_當日"* ]] && ok "Drive import config points to expected daily-sales intake" || blocked "Drive import config is unavailable or drifted: ${import_config:-missing}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user