Files
ewoooc/scripts/tools/system_startup_complete.sh
OoO db21e7e8e8
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix(devops): 移除 startup 腳本危險 compose 操作
2026-04-30 14:05:41 +08:00

284 lines
10 KiB
Bash

#!/bin/bash
# =============================================================================
# WOOO TECH - MOMO Pro System 完整啟動腳本
# 用途: 系統重開機後自動啟動所有服務
# 版本: 2.0
# 日期: 2026-02-06
# =============================================================================
set -e
# 配置
LOG_FILE="/var/log/momo_startup.log"
TELEGRAM_BOT_TOKEN="8075645931:AAH-EGKMo8ZC4QJs-Nc1_0s92xHrGdQvdpg"
TELEGRAM_CHAT_ID="5619078117"
# 顏色輸出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# 日誌函數
log() {
local level=$1
shift
local message=$@
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "${timestamp} [${level}] ${message}" | tee -a "$LOG_FILE"
}
log_info() { log "INFO" "$@"; }
log_warn() { log "WARN" "${YELLOW}$@${NC}"; }
log_error() { log "ERROR" "${RED}$@${NC}"; }
log_success() { log "SUCCESS" "${GREEN}$@${NC}"; }
# 發送 Telegram 通知
send_telegram() {
local message=$1
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
-d chat_id="${TELEGRAM_CHAT_ID}" \
-d parse_mode="HTML" \
-d text="${message}" > /dev/null 2>&1 || true
}
# 等待服務就緒
wait_for_service() {
local name=$1
local check_cmd=$2
local max_attempts=${3:-30}
local attempt=1
log_info "等待 ${name} 就緒..."
while [ $attempt -le $max_attempts ]; do
if eval "$check_cmd" > /dev/null 2>&1; then
log_success "${name} 已就緒 (嘗試 ${attempt}/${max_attempts})"
return 0
fi
sleep 2
((attempt++))
done
log_error "${name} 啟動失敗 (超過 ${max_attempts} 次嘗試)"
return 1
}
# =============================================================================
# 服務啟動順序和依賴關係
# =============================================================================
#
# 啟動順序圖:
#
# ┌──────────────┐
# │ 1. Docker │ ← 所有容器服務的基礎
# └──────┬───────┘
# │
# ┌──────▼───────┐ ┌──────────────────┐
# │ 2. Harbor │ │ 3. K8s (K3s) │
# │ Registry │ │ 自動啟動 │
# └──────┬───────┘ └────────┬─────────┘
# │ │
# ┌──────▼───────┐ ┌────────▼─────────┐
# │ 4. GitLab │ │ 5. K8s Services │
# │ CI/CD │ │ - PostgreSQL │
# └──────────────┘ │ - momo-app │
# │ - scheduler │
# └────────┬─────────┘
# │
# ┌────────────────────────────▼───────────────────────────┐
# │ 6. 監控服務 (Docker) │
# │ - Prometheus │
# │ - Grafana │
# │ - Node Exporter │
# │ - Alertmanager │
# └────────────────────────────┬───────────────────────────┘
# │
# ┌────────▼─────────┐
# │ 7. 健康檢查 │
# │ 發送通知 │
# └──────────────────┘
#
# =============================================================================
main() {
local start_time=$(date +%s)
local errors=0
log_info "=========================================="
log_info "MOMO Pro System 啟動程序開始"
log_info "=========================================="
# =========================================================================
# 1. 確認 Docker 服務
# =========================================================================
log_info "[1/7] 確認 Docker 服務..."
if ! systemctl is-active --quiet docker; then
log_warn "Docker 未運行,正在啟動..."
systemctl start docker
wait_for_service "Docker" "docker info" 30 || { ((errors++)); log_error "Docker 啟動失敗"; }
else
log_success "Docker 已運行"
fi
# =========================================================================
# 2. 啟動 Harbor Registry
# =========================================================================
log_info "[2/7] 啟動 Harbor Registry..."
cd /home/wooo/devops/harbor/harbor
# ADR-011: 多專案共享主機避免使用會清除 orphan 資源的 compose 操作。
docker compose stop 2>/dev/null || true
sleep 5
docker compose up -d --force-recreate
wait_for_service "Harbor" "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:5050/api/v2.0/ping | grep -q 200" 60 || {
((errors++))
log_error "Harbor 啟動失敗"
}
# =========================================================================
# 3. 確認 K3s 服務
# =========================================================================
log_info "[3/7] 確認 K3s 服務..."
if ! systemctl is-active --quiet k3s; then
log_warn "K3s 未運行,正在啟動..."
systemctl start k3s
wait_for_service "K3s" "kubectl get nodes" 60 || { ((errors++)); log_error "K3s 啟動失敗"; }
else
log_success "K3s 已運行"
fi
# =========================================================================
# 4. 啟動 GitLab
# =========================================================================
log_info "[4/7] 啟動 GitLab..."
if ! docker ps | grep -q gitlab; then
docker start gitlab gitlab-runner 2>/dev/null || {
log_warn "GitLab 容器不存在,嘗試從 compose 啟動..."
cd /home/wooo/devops/gitlab && docker compose up -d 2>/dev/null || true
}
fi
wait_for_service "GitLab" "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8929 | grep -q -E '200|302'" 120 || {
((errors++))
log_warn "GitLab 啟動緩慢,可能需要更多時間"
}
# =========================================================================
# 5. 確認 K8s 服務
# =========================================================================
log_info "[5/7] 確認 K8s 服務..."
# 等待 PostgreSQL
log_info "等待 PostgreSQL..."
wait_for_service "PostgreSQL" "kubectl exec momo-postgres-0 -n momo -- pg_isready -U momo -d momo_analytics" 60 || {
log_warn "PostgreSQL 未就緒,重啟中..."
kubectl rollout restart statefulset/momo-postgres -n momo
sleep 30
}
# 等待 momo-app
log_info "等待 momo-app..."
wait_for_service "momo-app" "kubectl exec -n momo deploy/momo-app -- curl -s http://localhost:80/health | grep -q healthy" 90 || {
log_warn "momo-app 未就緒,重啟中..."
kubectl rollout restart deployment/momo-app -n momo
sleep 30
}
# 等待 scheduler
log_info "等待 scheduler..."
kubectl get pods -n momo -l app=momo-scheduler | grep -q Running || {
log_warn "scheduler 未運行,重啟中..."
kubectl rollout restart deployment/momo-scheduler -n momo
}
# =========================================================================
# 6. 啟動監控服務
# =========================================================================
log_info "[6/7] 啟動監控服務..."
cd /home/wooo/monitoring
docker compose up -d 2>/dev/null || log_warn "監控服務啟動失敗(可能已存在)"
# 確認 Grafana
wait_for_service "Grafana" "curl -s -o /dev/null http://127.0.0.1:3000/login" 30 || {
((errors++))
log_warn "Grafana 未就緒"
}
# =========================================================================
# 7. 最終健康檢查
# =========================================================================
log_info "[7/7] 執行最終健康檢查..."
local health_status=""
# 檢查 mo.wooo.work
if curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health 2>/dev/null | grep -q 200; then
health_status+="✅ mo.wooo.work: 正常\n"
else
health_status+="❌ mo.wooo.work: 異常\n"
((errors++))
fi
# 檢查 Harbor
if curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:5050/api/v2.0/ping 2>/dev/null | grep -q 200; then
health_status+="✅ Harbor: 正常\n"
else
health_status+="❌ Harbor: 異常\n"
((errors++))
fi
# 檢查 GitLab
if curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8929 2>/dev/null | grep -q -E '200|302'; then
health_status+="✅ GitLab: 正常\n"
else
health_status+="⚠️ GitLab: 啟動中\n"
fi
# 檢查 K8s Pods
local pod_status=$(kubectl get pods -n momo --no-headers 2>/dev/null | awk '{print $2}')
if echo "$pod_status" | grep -v "1/1" > /dev/null; then
health_status+="⚠️ K8s Pods: 部分異常\n"
else
health_status+="✅ K8s Pods: 全部正常\n"
fi
# =========================================================================
# 完成
# =========================================================================
local end_time=$(date +%s)
local duration=$((end_time - start_time))
log_info "=========================================="
log_info "啟動程序完成"
log_info "耗時: ${duration}"
log_info "錯誤數: ${errors}"
log_info "=========================================="
# 發送 Telegram 通知
local emoji="🟢"
local status_text="成功"
if [ $errors -gt 0 ]; then
emoji="🟡"
status_text="部分失敗 (${errors} 個錯誤)"
fi
send_telegram "$(cat <<EOF
${emoji} <b>MOMO Pro System 啟動通知</b>
📋 <b>狀態:</b> ${status_text}
⏱️ <b>耗時:</b> ${duration} 秒
🕐 <b>時間:</b> $(date '+%Y-%m-%d %H:%M:%S')
<b>服務健康檢查:</b>
$(echo -e "$health_status")
🏷️ <i>UAT Server (192.168.0.110)</i>
EOF
)"
return $errors
}
# 執行主程序
main "$@"