#!/bin/bash # Drift Scanner Pods Cleanup Script # cleans up failed drift-scanner pods and provides analysis set -e # Configuration NAMESPACE="momo" LOG_FILE="/var/log/drift_scanner_cleanup.log" TELEGRAM_BOT_TOKEN="" TELEGRAM_CHAT_ID="5619078117" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" } # Telegram notification send_telegram() { local message="$1" curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ -d chat_id="${TELEGRAM_CHAT_ID}" \ -d text="${message}" \ -d parse_mode="HTML" > /dev/null 2>&1 } # Check kubectl access check_kubectl() { if ! kubectl get pods -n "$NAMESPACE" > /dev/null 2>&1; then log "${RED}ERROR: kubectl access denied or namespace not found${NC}" exit 1 fi } # Get drift-scanner pods status get_drift_scanner_pods() { kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner || echo "" } # Analyze pod status analyze_pods() { log "${YELLOW}=== Drift Scanner Pod Analysis ===${NC}" local pods=$(get_drift_scanner_pods) if [[ -z "$pods" ]]; then log "${GREEN}No drift-scanner pods found${NC}" return 0 fi local total=0 local failed=0 local succeeded=0 local running=0 while read -r pod_name ready status restarts age; do [[ -z "$pod_name" ]] && continue ((total++)) case "$status" in "Failed") ((failed++)) log "${RED}FAILED: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}" ;; "Succeeded") ((succeeded++)) log "${GREEN}SUCCEEDED: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}" ;; "Running") ((running++)) log "${YELLOW}RUNNING: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}" ;; *) log "${YELLOW}UNKNOWN: $pod_name (Status: $status, Ready: $ready, Restarts: $restarts, Age: $age)${NC}" ;; esac done <<< "$pods" log "${YELLOW}Summary: Total=$total, Failed=$failed, Succeeded=$succeeded, Running=$running${NC}" # Return counts for use in other functions echo "$total|$failed|$succeeded|$running" } # Clean up failed pods cleanup_failed_pods() { log "${YELLOW}=== Cleaning Up Failed Pods ===${NC}" local failed_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner | grep "Failed" | awk '{print $1}') if [[ -z "$failed_pods" ]]; then log "${GREEN}No failed drift-scanner pods to clean${NC}" return 0 fi local cleaned_count=0 while read -r pod_name; do [[ -z "$pod_name" ]] && continue log "Deleting failed pod: $pod_name" if kubectl delete pod "$pod_name" -n "$NAMESPACE" --force --grace-period=0; then ((cleaned_count++)) log "${GREEN}Successfully deleted: $pod_name${NC}" else log "${RED}Failed to delete: $pod_name${NC}" fi done <<< "$failed_pods" log "${GREEN}Cleaned up $cleaned_count failed pods${NC}" return $cleaned_count } # Clean up succeeded pods (optional, based on age) cleanup_succeeded_pods() { local max_age_hours="${1:-24}" # Default 24 hours log "${YELLOW}=== Cleaning Up Succeeded Pods (older than ${max_age_hours}h) ===${NC}" local succeeded_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner | grep "Succeeded") if [[ -z "$succeeded_pods" ]]; then log "${GREEN}No succeeded drift-scanner pods to clean${NC}" return 0 fi local cleaned_count=0 while read -r pod_name ready status restarts age; do [[ -z "$pod_name" ]] && continue # Convert age to hours (simplified - handles Xh, Xd, XhXm formats) local age_hours=0 if [[ "$age" =~ ^([0-9]+)h$ ]]; then age_hours=${BASH_REMATCH[1]} elif [[ "$age" =~ ^([0-9]+)d$ ]]; then age_hours=$((${BASH_REMATCH[1]} * 24)) elif [[ "$age" =~ ^([0-9]+)h([0-9]+)m$ ]]; then age_hours=${BASH_REMATCH[1]} fi if [[ $age_hours -gt $max_age_hours ]]; then log "Deleting old succeeded pod: $pod_name (Age: $age)" if kubectl delete pod "$pod_name" -n "$NAMESPACE" --force --grace-period=0; then ((cleaned_count++)) log "${GREEN}Successfully deleted: $pod_name${NC}" else log "${RED}Failed to delete: $pod_name${NC}" fi fi done <<< "$succeeded_pods" log "${GREEN}Cleaned up $cleaned_count old succeeded pods${NC}" return $cleaned_count } # Main function main() { local action="${1:-analyze}" local max_age="${2:-24}" log "${YELLOW}=== Drift Scanner Pod Cleanup Script ===${NC}" log "Action: $action" log "Namespace: $NAMESPACE" # Check kubectl access check_kubectl # Analyze current state local analysis=$(analyze_pods) local total=$(echo "$analysis" | cut -d'|' -f1) local failed=$(echo "$analysis" | cut -d'|' -f2) local succeeded=$(echo "$analysis" | cut -d'|' -f3) local running=$(echo "$analysis" | cut -d'|' -f4) case "$action" in "analyze") log "${GREEN}Analysis complete. No cleanup performed.${NC}" ;; "cleanup-failed") cleanup_failed_pods local cleaned_failed=$? if [[ $cleaned_failed -gt 0 ]]; then send_telegram "Cleaned up $cleaned_failed failed drift-scanner pods" fi ;; "cleanup-succeeded") cleanup_succeeded_pods "$max_age" local cleaned_succeeded=$? if [[ $cleaned_succeeded -gt 0 ]]; then send_telegram "Cleaned up $cleaned_succeeded old succeeded drift-scanner pods" fi ;; "cleanup-all") cleanup_failed_pods cleanup_succeeded_pods "$max_age" ;; *) echo "Usage: $0 [analyze|cleanup-failed|cleanup-succeeded|cleanup-all] [max-age-hours]" echo " analyze - Show current pod status (default)" echo " cleanup-failed - Delete all failed pods" echo " cleanup-succeeded - Delete succeeded pods older than max-age-hours (default: 24h)" echo " cleanup-all - Delete failed pods and old succeeded pods" exit 1 ;; esac log "${GREEN}=== Script completed ===${NC}" } # Run main function with all arguments main "$@"