Phase 19.6 測試文檔收尾:
- E2E 測試擴充至 18 項 (Terminal/GenUI 驗證)
- 新增 PHASE19-VERIFICATION-CHECKLIST.md (完整驗證清單)
P1 驗證:
- ArgoCD Metrics NodePort 監控 (30883/30884)
- TLS 證書監控 (Blackbox Exporter 9115)
P2 改進:
- waitForTimeout → waitForLoadState('networkidle')
- 跨平台快捷鍵 (Meta+J / Control+J)
- SKIP_MULTISIG_TESTS 環境變數控制
- Prometheus GitOps 部署腳本
P3 改進:
- HPA maxReplicas 4 → 6 (API/Web)
首席架構師審查: 47/50 OUTSTANDING (94%)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
119 lines
4.0 KiB
Bash
Executable File
119 lines
4.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# =============================================================================
|
|
# Prometheus Config GitOps 部署腳本
|
|
# =============================================================================
|
|
# 用途: 將 Git 管理的 Prometheus 配置同步到 192.168.0.188
|
|
# 建立者: Claude Code (首席架構師)
|
|
# 日期: 2026-03-29 (台北時間)
|
|
#
|
|
# 使用方式:
|
|
# ./deploy-prometheus-config.sh [--dry-run]
|
|
#
|
|
# 依賴:
|
|
# - SSH 免密碼登入 ollama@192.168.0.188
|
|
# - 遠端主機已安裝 Prometheus
|
|
# =============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
# 配置
|
|
REMOTE_HOST="ollama@192.168.0.188"
|
|
REMOTE_CONFIG="/home/ollama/momo-pro/monitoring/prometheus.yml"
|
|
LOCAL_ADDITIONS="$(dirname "$0")/prometheus-config-additions.yaml"
|
|
DRY_RUN="${1:-}"
|
|
|
|
# 顏色輸出
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
|
|
# 檢查本地配置檔案
|
|
if [[ ! -f "$LOCAL_ADDITIONS" ]]; then
|
|
log_error "找不到配置檔案: $LOCAL_ADDITIONS"
|
|
exit 1
|
|
fi
|
|
|
|
log_info "Prometheus GitOps 部署開始"
|
|
log_info "目標主機: $REMOTE_HOST"
|
|
log_info "遠端配置: $REMOTE_CONFIG"
|
|
|
|
# 備份遠端配置
|
|
BACKUP_FILE="${REMOTE_CONFIG}.bak.$(date +%Y%m%d_%H%M%S)"
|
|
log_info "備份遠端配置至: $BACKUP_FILE"
|
|
|
|
if [[ "$DRY_RUN" == "--dry-run" ]]; then
|
|
log_warn "DRY RUN 模式 - 不會實際執行"
|
|
echo ""
|
|
echo "將執行以下操作:"
|
|
echo " 1. ssh $REMOTE_HOST \"cp $REMOTE_CONFIG $BACKUP_FILE\""
|
|
echo " 2. 檢查配置中是否已包含 argocd job"
|
|
echo " 3. 如需更新,附加新配置"
|
|
echo " 4. ssh $REMOTE_HOST \"docker exec prometheus kill -SIGHUP 1\""
|
|
exit 0
|
|
fi
|
|
|
|
# 執行備份
|
|
ssh "$REMOTE_HOST" "cp $REMOTE_CONFIG $BACKUP_FILE"
|
|
log_info "備份完成"
|
|
|
|
# 檢查是否已有 argocd job
|
|
ARGOCD_EXISTS=$(ssh "$REMOTE_HOST" "grep -c 'job_name.*argocd' $REMOTE_CONFIG || true")
|
|
|
|
if [[ "$ARGOCD_EXISTS" -gt 0 ]]; then
|
|
log_info "ArgoCD job 已存在於配置中,跳過新增"
|
|
else
|
|
log_info "新增 ArgoCD 和 Blackbox scrape configs..."
|
|
|
|
# 提取配置內容 (去除註解標題)
|
|
CONFIG_CONTENT=$(grep -A 100 "^- job_name: argocd" "$LOCAL_ADDITIONS" | head -35)
|
|
|
|
# 附加到遠端配置
|
|
ssh "$REMOTE_HOST" "cat >> $REMOTE_CONFIG << 'EOF'
|
|
|
|
# === GitOps 新增 ($(date +%Y-%m-%d)) ===
|
|
$CONFIG_CONTENT
|
|
EOF"
|
|
|
|
log_info "配置已更新"
|
|
fi
|
|
|
|
# 驗證配置語法
|
|
log_info "驗證 Prometheus 配置語法..."
|
|
SYNTAX_CHECK=$(ssh "$REMOTE_HOST" "docker exec prometheus promtool check config /etc/prometheus/prometheus.yml 2>&1 || true")
|
|
|
|
if echo "$SYNTAX_CHECK" | grep -q "SUCCESS"; then
|
|
log_info "配置語法正確"
|
|
else
|
|
log_error "配置語法錯誤!"
|
|
echo "$SYNTAX_CHECK"
|
|
log_warn "正在還原備份..."
|
|
ssh "$REMOTE_HOST" "cp $BACKUP_FILE $REMOTE_CONFIG"
|
|
exit 1
|
|
fi
|
|
|
|
# 重載 Prometheus
|
|
log_info "重載 Prometheus 配置..."
|
|
ssh "$REMOTE_HOST" "docker exec prometheus kill -SIGHUP 1 || sudo systemctl reload prometheus"
|
|
|
|
# 等待並驗證
|
|
sleep 3
|
|
log_info "驗證 targets..."
|
|
|
|
TARGETS=$(curl -s "http://192.168.0.188:9090/api/v1/targets" 2>/dev/null | \
|
|
jq -r '.data.activeTargets[] | "\(.labels.job): \(.health)"' 2>/dev/null || echo "驗證失敗")
|
|
|
|
echo ""
|
|
echo "═══════════════════════════════════════════════════════"
|
|
echo " Prometheus Targets 狀態"
|
|
echo "═══════════════════════════════════════════════════════"
|
|
echo "$TARGETS" | grep -E "argocd|blackbox" || echo " (無 argocd/blackbox targets)"
|
|
echo "═══════════════════════════════════════════════════════"
|
|
echo ""
|
|
|
|
log_info "部署完成!"
|