Files
ewoooc/scripts/cleanup_drift_scanner_pods.sh
ogt 03c345d46d
Some checks failed
CD Pipeline / deploy (push) Failing after 50s
fix: drift-scanner pods cleanup script and guide
- add cleanup script for failed drift-scanner pods
- add comprehensive fix guide with prevention strategies
- resolve pod resource issues in K8s cluster
2026-04-22 11:14:48 +08:00

216 lines
6.8 KiB
Bash
Executable File

#!/bin/bash
# Drift Scanner Pods Cleanup Script
# cleans up failed drift-scanner pods and provides analysis
set -e
# Configuration
NAMESPACE="momo"
LOG_FILE="/var/log/drift_scanner_cleanup.log"
TELEGRAM_BOT_TOKEN="8075645931:AAH-EGKMo8ZC4QJs-Nc1_0s92xHrGdQvdpg"
TELEGRAM_CHAT_ID="5619078117"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Logging function
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
# Telegram notification
send_telegram() {
local message="$1"
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
-d chat_id="${TELEGRAM_CHAT_ID}" \
-d text="${message}" \
-d parse_mode="HTML" > /dev/null 2>&1
}
# Check kubectl access
check_kubectl() {
if ! kubectl get pods -n "$NAMESPACE" > /dev/null 2>&1; then
log "${RED}ERROR: kubectl access denied or namespace not found${NC}"
exit 1
fi
}
# Get drift-scanner pods status
get_drift_scanner_pods() {
kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner || echo ""
}
# Analyze pod status
analyze_pods() {
log "${YELLOW}=== Drift Scanner Pod Analysis ===${NC}"
local pods=$(get_drift_scanner_pods)
if [[ -z "$pods" ]]; then
log "${GREEN}No drift-scanner pods found${NC}"
return 0
fi
local total=0
local failed=0
local succeeded=0
local running=0
while read -r pod_name ready status restarts age; do
[[ -z "$pod_name" ]] && continue
((total++))
case "$status" in
"Failed")
((failed++))
log "${RED}FAILED: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
;;
"Succeeded")
((succeeded++))
log "${GREEN}SUCCEEDED: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
;;
"Running")
((running++))
log "${YELLOW}RUNNING: $pod_name (Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
;;
*)
log "${YELLOW}UNKNOWN: $pod_name (Status: $status, Ready: $ready, Restarts: $restarts, Age: $age)${NC}"
;;
esac
done <<< "$pods"
log "${YELLOW}Summary: Total=$total, Failed=$failed, Succeeded=$succeeded, Running=$running${NC}"
# Return counts for use in other functions
echo "$total|$failed|$succeeded|$running"
}
# Clean up failed pods
cleanup_failed_pods() {
log "${YELLOW}=== Cleaning Up Failed Pods ===${NC}"
local failed_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner | grep "Failed" | awk '{print $1}')
if [[ -z "$failed_pods" ]]; then
log "${GREEN}No failed drift-scanner pods to clean${NC}"
return 0
fi
local cleaned_count=0
while read -r pod_name; do
[[ -z "$pod_name" ]] && continue
log "Deleting failed pod: $pod_name"
if kubectl delete pod "$pod_name" -n "$NAMESPACE" --force --grace-period=0; then
((cleaned_count++))
log "${GREEN}Successfully deleted: $pod_name${NC}"
else
log "${RED}Failed to delete: $pod_name${NC}"
fi
done <<< "$failed_pods"
log "${GREEN}Cleaned up $cleaned_count failed pods${NC}"
return $cleaned_count
}
# Clean up succeeded pods (optional, based on age)
cleanup_succeeded_pods() {
local max_age_hours="${1:-24}" # Default 24 hours
log "${YELLOW}=== Cleaning Up Succeeded Pods (older than ${max_age_hours}h) ===${NC}"
local succeeded_pods=$(kubectl get pods -n "$NAMESPACE" --no-headers | grep drift-scanner | grep "Succeeded")
if [[ -z "$succeeded_pods" ]]; then
log "${GREEN}No succeeded drift-scanner pods to clean${NC}"
return 0
fi
local cleaned_count=0
while read -r pod_name ready status restarts age; do
[[ -z "$pod_name" ]] && continue
# Convert age to hours (simplified - handles Xh, Xd, XhXm formats)
local age_hours=0
if [[ "$age" =~ ^([0-9]+)h$ ]]; then
age_hours=${BASH_REMATCH[1]}
elif [[ "$age" =~ ^([0-9]+)d$ ]]; then
age_hours=$((${BASH_REMATCH[1]} * 24))
elif [[ "$age" =~ ^([0-9]+)h([0-9]+)m$ ]]; then
age_hours=${BASH_REMATCH[1]}
fi
if [[ $age_hours -gt $max_age_hours ]]; then
log "Deleting old succeeded pod: $pod_name (Age: $age)"
if kubectl delete pod "$pod_name" -n "$NAMESPACE" --force --grace-period=0; then
((cleaned_count++))
log "${GREEN}Successfully deleted: $pod_name${NC}"
else
log "${RED}Failed to delete: $pod_name${NC}"
fi
fi
done <<< "$succeeded_pods"
log "${GREEN}Cleaned up $cleaned_count old succeeded pods${NC}"
return $cleaned_count
}
# Main function
main() {
local action="${1:-analyze}"
local max_age="${2:-24}"
log "${YELLOW}=== Drift Scanner Pod Cleanup Script ===${NC}"
log "Action: $action"
log "Namespace: $NAMESPACE"
# Check kubectl access
check_kubectl
# Analyze current state
local analysis=$(analyze_pods)
local total=$(echo "$analysis" | cut -d'|' -f1)
local failed=$(echo "$analysis" | cut -d'|' -f2)
local succeeded=$(echo "$analysis" | cut -d'|' -f3)
local running=$(echo "$analysis" | cut -d'|' -f4)
case "$action" in
"analyze")
log "${GREEN}Analysis complete. No cleanup performed.${NC}"
;;
"cleanup-failed")
cleanup_failed_pods
local cleaned_failed=$?
if [[ $cleaned_failed -gt 0 ]]; then
send_telegram "Cleaned up $cleaned_failed failed drift-scanner pods"
fi
;;
"cleanup-succeeded")
cleanup_succeeded_pods "$max_age"
local cleaned_succeeded=$?
if [[ $cleaned_succeeded -gt 0 ]]; then
send_telegram "Cleaned up $cleaned_succeeded old succeeded drift-scanner pods"
fi
;;
"cleanup-all")
cleanup_failed_pods
cleanup_succeeded_pods "$max_age"
;;
*)
echo "Usage: $0 [analyze|cleanup-failed|cleanup-succeeded|cleanup-all] [max-age-hours]"
echo " analyze - Show current pod status (default)"
echo " cleanup-failed - Delete all failed pods"
echo " cleanup-succeeded - Delete succeeded pods older than max-age-hours (default: 24h)"
echo " cleanup-all - Delete failed pods and old succeeded pods"
exit 1
;;
esac
log "${GREEN}=== Script completed ===${NC}"
}
# Run main function with all arguments
main "$@"