diff --git a/k8s/descheduler/descheduler.yaml b/k8s/descheduler/descheduler.yaml index e2962c67..9278bc62 100644 --- a/k8s/descheduler/descheduler.yaml +++ b/k8s/descheduler/descheduler.yaml @@ -12,6 +12,10 @@ metadata: name: descheduler labels: app.kubernetes.io/name: descheduler + # R1 Review: 顯式宣告 PSA restricted + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/warn: restricted --- apiVersion: v1 kind: ServiceAccount @@ -111,7 +115,7 @@ spec: schedule: "0 */2 * * *" # Every 2 hours concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 - failedJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: template: diff --git a/k8s/monitoring/minio-kali-alerts.yaml b/k8s/monitoring/minio-kali-alerts.yaml index 73df2a11..c32ab5a4 100644 --- a/k8s/monitoring/minio-kali-alerts.yaml +++ b/k8s/monitoring/minio-kali-alerts.yaml @@ -31,6 +31,17 @@ groups: summary: "MinIO 磁碟剩餘空間不足 20%" description: "MinIO 可用空間: {{ $value | humanizePercentage }}" + # R1 Review: 加 5% critical 磁碟告警作為最後防線 + - alert: MinioDiskUsageCritical + expr: minio_cluster_capacity_usable_free_bytes / minio_cluster_capacity_usable_total_bytes < 0.05 + for: 5m + labels: + severity: critical + service: minio + annotations: + summary: "MinIO 磁碟剩餘空間不足 5% (緊急)" + description: "MinIO 可用空間: {{ $value | humanizePercentage }},備份即將失敗" + # MinIO 離線磁碟 - alert: MinioOfflineDisk expr: minio_cluster_disk_offline_total > 0 diff --git a/k8s/monitoring/prometheus-config-phase-o.yaml b/k8s/monitoring/prometheus-config-phase-o.yaml index 029aea4f..096dfbba 100644 --- a/k8s/monitoring/prometheus-config-phase-o.yaml +++ b/k8s/monitoring/prometheus-config-phase-o.yaml @@ -57,8 +57,10 @@ target_label: __param_target - source_labels: [__param_target] target_label: instance + # R1 Review Critical Fix: Prometheus 在主機上,無法解析 K8s DNS + # Blackbox Exporter 部署在 .188 主機上 - target_label: __address__ - replacement: blackbox-exporter:9115 + replacement: 192.168.0.188:9115 # ============================================================================= # 驗證指令