diff --git a/cmd/main.go b/cmd/main.go index 7f80758..855801e 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -55,6 +55,7 @@ func main() { var probeAddr string var rejectEmptyNodeDisruption bool var retryInterval time.Duration + var rejectOverlappingDisruption bool flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, @@ -62,6 +63,7 @@ func main() { "Enabling this will ensure there is only one active controller manager.") flag.BoolVar(&rejectEmptyNodeDisruption, "reject-empty-node-disruption", false, "Reject NodeDisruption matching no actual node.") flag.DurationVar(&retryInterval, "retry-interval", controller.DefaultRetryInterval, "How long to wait between each retry (Default 60s)") + flag.BoolVar(&rejectOverlappingDisruption, "reject-overlapping-disruption", false, "Automatically reject any overlapping NodeDisruption (based on node selector), preserving the oldest one") opts := zap.Options{ Development: true, @@ -99,8 +101,9 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Config: controller.NodeDisruptionReconcilerConfig{ - RejectEmptyNodeDisruption: rejectEmptyNodeDisruption, - RetryInterval: retryInterval, + RejectEmptyNodeDisruption: rejectEmptyNodeDisruption, + RetryInterval: retryInterval, + RejectOverlappingDisruption: rejectOverlappingDisruption, }, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "NodeDisruption") diff --git a/internal/controller/nodedisruption_controller.go b/internal/controller/nodedisruption_controller.go index 06e0dfa..50e330d 100644 --- a/internal/controller/nodedisruption_controller.go +++ b/internal/controller/nodedisruption_controller.go @@ -42,6 +42,8 @@ type NodeDisruptionReconcilerConfig struct { RejectEmptyNodeDisruption bool // How long to retry between each validation attempt RetryInterval time.Duration + // Reject NodeDisruption if its node selector overlaps an older NodeDisruption's selector + RejectOverlappingDisruption bool } // NodeDisruptionReconciler reconciles NodeDisruptions @@ -211,7 +213,7 @@ func (ndr *SingleNodeDisruptionReconciler) UpdateStatus(ctx context.Context) err // ValidateInternalConstraints check that the Node Disruption is valid against internal constraints // such as deadline or constraints on number of impacted nodes -func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(_ context.Context) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus) { +func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx context.Context) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus) { disruptedNodes := resolver.NewNodeSetFromStringList(ndr.NodeDisruption.Status.DisruptedNodes) if ndr.Config.RejectEmptyNodeDisruption && disruptedNodes.Len() == 0 { @@ -228,6 +230,30 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(_ con } } + allDisruptions := &nodedisruptionv1alpha1.NodeDisruptionList{} + ndr.Client.List(ctx, allDisruptions) // TODO handle error here + for _, otherDisruption := range allDisruptions.Items { + if otherDisruption.Name == ndr.NodeDisruption.Name { + continue + } + if otherDisruption.Status.State == nodedisruptionv1alpha1.Pending || otherDisruption.Status.State == nodedisruptionv1alpha1.Granted { + otherDisruptedNodes := resolver.NewNodeSetFromStringList(otherDisruption.Status.DisruptedNodes) + if otherDisruptedNodes.Intersection(disruptedNodes).Len() > 0 { + return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{ + { + Reference: nodedisruptionv1alpha1.NamespacedName{ + Namespace: ndr.NodeDisruption.Namespace, + Name: ndr.NodeDisruption.Name, + Kind: ndr.NodeDisruption.Kind, + }, + Reason: fmt.Sprintf(`Selected node(s) overlap with another disruption: ”%s"`, otherDisruption.Name), + Ok: false, + }, + } + } + } + } + if ndr.NodeDisruption.Spec.Retry.IsAfterDeadline() { return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{ {