Files
awoooi/k8s/monitoring/deploy-prometheus-config.sh
OG T ee2bceefff feat(monitoring): Phase 19.6 測試文檔 + P1-P3 改進 + 首席架構師審查
Phase 19.6 測試文檔收尾:
- E2E 測試擴充至 18 項 (Terminal/GenUI 驗證)
- 新增 PHASE19-VERIFICATION-CHECKLIST.md (完整驗證清單)

P1 驗證:
- ArgoCD Metrics NodePort 監控 (30883/30884)
- TLS 證書監控 (Blackbox Exporter 9115)

P2 改進:
- waitForTimeout → waitForLoadState('networkidle')
- 跨平台快捷鍵 (Meta+J / Control+J)
- SKIP_MULTISIG_TESTS 環境變數控制
- Prometheus GitOps 部署腳本

P3 改進:
- HPA maxReplicas 4 → 6 (API/Web)

首席架構師審查: 47/50 OUTSTANDING (94%)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-29 01:19:26 +08:00

119 lines
4.0 KiB
Bash
Executable File

#!/bin/bash
# =============================================================================
# Prometheus Config GitOps 部署腳本
# =============================================================================
# 用途: 將 Git 管理的 Prometheus 配置同步到 192.168.0.188
# 建立者: Claude Code (首席架構師)
# 日期: 2026-03-29 (台北時間)
#
# 使用方式:
# ./deploy-prometheus-config.sh [--dry-run]
#
# 依賴:
# - SSH 免密碼登入 ollama@192.168.0.188
# - 遠端主機已安裝 Prometheus
# =============================================================================
set -euo pipefail
# 配置
REMOTE_HOST="ollama@192.168.0.188"
REMOTE_CONFIG="/home/ollama/momo-pro/monitoring/prometheus.yml"
LOCAL_ADDITIONS="$(dirname "$0")/prometheus-config-additions.yaml"
DRY_RUN="${1:-}"
# 顏色輸出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# 檢查本地配置檔案
if [[ ! -f "$LOCAL_ADDITIONS" ]]; then
log_error "找不到配置檔案: $LOCAL_ADDITIONS"
exit 1
fi
log_info "Prometheus GitOps 部署開始"
log_info "目標主機: $REMOTE_HOST"
log_info "遠端配置: $REMOTE_CONFIG"
# 備份遠端配置
BACKUP_FILE="${REMOTE_CONFIG}.bak.$(date +%Y%m%d_%H%M%S)"
log_info "備份遠端配置至: $BACKUP_FILE"
if [[ "$DRY_RUN" == "--dry-run" ]]; then
log_warn "DRY RUN 模式 - 不會實際執行"
echo ""
echo "將執行以下操作:"
echo " 1. ssh $REMOTE_HOST \"cp $REMOTE_CONFIG $BACKUP_FILE\""
echo " 2. 檢查配置中是否已包含 argocd job"
echo " 3. 如需更新,附加新配置"
echo " 4. ssh $REMOTE_HOST \"docker exec prometheus kill -SIGHUP 1\""
exit 0
fi
# 執行備份
ssh "$REMOTE_HOST" "cp $REMOTE_CONFIG $BACKUP_FILE"
log_info "備份完成"
# 檢查是否已有 argocd job
ARGOCD_EXISTS=$(ssh "$REMOTE_HOST" "grep -c 'job_name.*argocd' $REMOTE_CONFIG || true")
if [[ "$ARGOCD_EXISTS" -gt 0 ]]; then
log_info "ArgoCD job 已存在於配置中,跳過新增"
else
log_info "新增 ArgoCD 和 Blackbox scrape configs..."
# 提取配置內容 (去除註解標題)
CONFIG_CONTENT=$(grep -A 100 "^- job_name: argocd" "$LOCAL_ADDITIONS" | head -35)
# 附加到遠端配置
ssh "$REMOTE_HOST" "cat >> $REMOTE_CONFIG << 'EOF'
# === GitOps 新增 ($(date +%Y-%m-%d)) ===
$CONFIG_CONTENT
EOF"
log_info "配置已更新"
fi
# 驗證配置語法
log_info "驗證 Prometheus 配置語法..."
SYNTAX_CHECK=$(ssh "$REMOTE_HOST" "docker exec prometheus promtool check config /etc/prometheus/prometheus.yml 2>&1 || true")
if echo "$SYNTAX_CHECK" | grep -q "SUCCESS"; then
log_info "配置語法正確"
else
log_error "配置語法錯誤!"
echo "$SYNTAX_CHECK"
log_warn "正在還原備份..."
ssh "$REMOTE_HOST" "cp $BACKUP_FILE $REMOTE_CONFIG"
exit 1
fi
# 重載 Prometheus
log_info "重載 Prometheus 配置..."
ssh "$REMOTE_HOST" "docker exec prometheus kill -SIGHUP 1 || sudo systemctl reload prometheus"
# 等待並驗證
sleep 3
log_info "驗證 targets..."
TARGETS=$(curl -s "http://192.168.0.188:9090/api/v1/targets" 2>/dev/null | \
jq -r '.data.activeTargets[] | "\(.labels.job): \(.health)"' 2>/dev/null || echo "驗證失敗")
echo ""
echo "═══════════════════════════════════════════════════════"
echo " Prometheus Targets 狀態"
echo "═══════════════════════════════════════════════════════"
echo "$TARGETS" | grep -E "argocd|blackbox" || echo " (無 argocd/blackbox targets)"
echo "═══════════════════════════════════════════════════════"
echo ""
log_info "部署完成!"