216 lines
6.8 KiB
Bash
Executable File
216 lines
6.8 KiB
Bash
Executable File
#!/bin/bash
|
|
# Drift Scanner Pods Cleanup Script
|
|
# cleans up failed drift-scanner pods and provides analysis
|
|
|
|
set -e
|
|
|
|
# Configuration
|
|
NAMESPACE="momo"
|
|
LOG_FILE="/var/log/drift_scanner_cleanup.log"
|
|
TELEGRAM_BOT_TOKEN="<TELEGRAM_BOT_TOKEN>"
|
|
TELEGRAM_CHAT_ID="5619078117"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Logging function
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
|
}
|
|
|
|
# Telegram notification
|
|
send_telegram() {
|
|
local message="$1"
|
|
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
|
-d chat_id="${TELEGRAM_CHAT_ID}" \
|
|
-d text="${message}" \
|
|
-d parse_mode="HTML" > /dev/null 2>&1
|
|
}
|
|
|
|
# Check kubectl access
|
|
check_kubectl() {
|
|
if ! kubectl get pods -n "$NAMESPACE" > /dev/null 2>&1; then
|
|
log "${RED}ERROR: kubectl access denied or namespace not found${NC}"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Get drift-scanner pods status
|
|
get_drift_scanner_pods() {
|
|
kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner || echo ""
|
|
}
|
|
|
|
# Analyze pod status
|
|
analyze_pods() {
|
|
log "${YELLOW}=== Drift Scanner Pod Analysis ===${NC}"
|
|
|
|
local pods=$(get_drift_scanner_pods)
|
|
if [[ -z "$pods" ]]; then
|
|
log "${GREEN}No drift-scanner pods found${NC}"
|
|
return 0
|
|
fi
|
|
|
|
local total=0
|
|
local failed=0
|
|
local succeeded=0
|
|
local running=0
|
|
|
|
while read -r pod_name ready status restarts age; do
|
|
[[ -z "$pod_name" ]] && continue
|
|
((total++))
|
|
|
|
case "$status" in
|
|
"Failed")
|
|
((failed++))
|
|
log "${RED}FAILED: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
|
|
;;
|
|
"Succeeded")
|
|
((succeeded++))
|
|
log "${GREEN}SUCCEEDED: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
|
|
;;
|
|
"Running")
|
|
((running++))
|
|
log "${YELLOW}RUNNING: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
|
|
;;
|
|
*)
|
|
log "${YELLOW}UNKNOWN: $pod_name (Status: $status, Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
|
|
;;
|
|
esac
|
|
done <<< "$pods"
|
|
|
|
log "${YELLOW}Summary: Total=$total, Failed=$failed, Succeeded=$succeeded, Running=$running${NC}"
|
|
|
|
# Return counts for use in other functions
|
|
echo "$total|$failed|$succeeded|$running"
|
|
}
|
|
|
|
# Clean up failed pods
|
|
cleanup_failed_pods() {
|
|
log "${YELLOW}=== Cleaning Up Failed Pods ===${NC}"
|
|
|
|
local failed_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner | grep "Failed" | awk '{print $1}')
|
|
|
|
if [[ -z "$failed_pods" ]]; then
|
|
log "${GREEN}No failed drift-scanner pods to clean${NC}"
|
|
return 0
|
|
fi
|
|
|
|
local cleaned_count=0
|
|
while read -r pod_name; do
|
|
[[ -z "$pod_name" ]] && continue
|
|
|
|
log "Deleting failed pod: $pod_name"
|
|
if kubectl delete pod "$pod_name" -n "$NAMESPACE" --force --grace-period=0; then
|
|
((cleaned_count++))
|
|
log "${GREEN}Successfully deleted: $pod_name${NC}"
|
|
else
|
|
log "${RED}Failed to delete: $pod_name${NC}"
|
|
fi
|
|
done <<< "$failed_pods"
|
|
|
|
log "${GREEN}Cleaned up $cleaned_count failed pods${NC}"
|
|
return $cleaned_count
|
|
}
|
|
|
|
# Clean up succeeded pods (optional, based on age)
|
|
cleanup_succeeded_pods() {
|
|
local max_age_hours="${1:-24}" # Default 24 hours
|
|
|
|
log "${YELLOW}=== Cleaning Up Succeeded Pods (older than ${max_age_hours}h) ===${NC}"
|
|
|
|
local succeeded_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner | grep "Succeeded")
|
|
|
|
if [[ -z "$succeeded_pods" ]]; then
|
|
log "${GREEN}No succeeded drift-scanner pods to clean${NC}"
|
|
return 0
|
|
fi
|
|
|
|
local cleaned_count=0
|
|
while read -r pod_name ready status restarts age; do
|
|
[[ -z "$pod_name" ]] && continue
|
|
|
|
# Convert age to hours (simplified - handles Xh, Xd, XhXm formats)
|
|
local age_hours=0
|
|
if [[ "$age" =~ ^([0-9]+)h$ ]]; then
|
|
age_hours=${BASH_REMATCH[1]}
|
|
elif [[ "$age" =~ ^([0-9]+)d$ ]]; then
|
|
age_hours=$((${BASH_REMATCH[1]} * 24))
|
|
elif [[ "$age" =~ ^([0-9]+)h([0-9]+)m$ ]]; then
|
|
age_hours=${BASH_REMATCH[1]}
|
|
fi
|
|
|
|
if [[ $age_hours -gt $max_age_hours ]]; then
|
|
log "Deleting old succeeded pod: $pod_name (Age: $age)"
|
|
if kubectl delete pod "$pod_name" -n "$NAMESPACE" --force --grace-period=0; then
|
|
((cleaned_count++))
|
|
log "${GREEN}Successfully deleted: $pod_name${NC}"
|
|
else
|
|
log "${RED}Failed to delete: $pod_name${NC}"
|
|
fi
|
|
fi
|
|
done <<< "$succeeded_pods"
|
|
|
|
log "${GREEN}Cleaned up $cleaned_count old succeeded pods${NC}"
|
|
return $cleaned_count
|
|
}
|
|
|
|
# Main function
|
|
main() {
|
|
local action="${1:-analyze}"
|
|
local max_age="${2:-24}"
|
|
|
|
log "${YELLOW}=== Drift Scanner Pod Cleanup Script ===${NC}"
|
|
log "Action: $action"
|
|
log "Namespace: $NAMESPACE"
|
|
|
|
# Check kubectl access
|
|
check_kubectl
|
|
|
|
# Analyze current state
|
|
local analysis=$(analyze_pods)
|
|
local total=$(echo "$analysis" | cut -d'|' -f1)
|
|
local failed=$(echo "$analysis" | cut -d'|' -f2)
|
|
local succeeded=$(echo "$analysis" | cut -d'|' -f3)
|
|
local running=$(echo "$analysis" | cut -d'|' -f4)
|
|
|
|
case "$action" in
|
|
"analyze")
|
|
log "${GREEN}Analysis complete. No cleanup performed.${NC}"
|
|
;;
|
|
"cleanup-failed")
|
|
cleanup_failed_pods
|
|
local cleaned_failed=$?
|
|
if [[ $cleaned_failed -gt 0 ]]; then
|
|
send_telegram "Cleaned up $cleaned_failed failed drift-scanner pods"
|
|
fi
|
|
;;
|
|
"cleanup-succeeded")
|
|
cleanup_succeeded_pods "$max_age"
|
|
local cleaned_succeeded=$?
|
|
if [[ $cleaned_succeeded -gt 0 ]]; then
|
|
send_telegram "Cleaned up $cleaned_succeeded old succeeded drift-scanner pods"
|
|
fi
|
|
;;
|
|
"cleanup-all")
|
|
cleanup_failed_pods
|
|
cleanup_succeeded_pods "$max_age"
|
|
;;
|
|
*)
|
|
echo "Usage: $0 [analyze|cleanup-failed|cleanup-succeeded|cleanup-all] [max-age-hours]"
|
|
echo " analyze - Show current pod status (default)"
|
|
echo " cleanup-failed - Delete all failed pods"
|
|
echo " cleanup-succeeded - Delete succeeded pods older than max-age-hours (default: 24h)"
|
|
echo " cleanup-all - Delete failed pods and old succeeded pods"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
log "${GREEN}=== Script completed ===${NC}"
|
|
}
|
|
|
|
# Run main function with all arguments
|
|
main "$@"
|