Skip to content

Commit

Permalink
Reject overlapping disruptions
Browse files Browse the repository at this point in the history
  • Loading branch information
komuta committed Dec 5, 2023
1 parent 7b330a0 commit 5f570f6
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
7 changes: 5 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@ func main() {
var probeAddr string
var rejectEmptyNodeDisruption bool
var retryInterval time.Duration
var rejectOverlappingDisruption bool
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&rejectEmptyNodeDisruption, "reject-empty-node-disruption", false, "Reject NodeDisruption matching no actual node.")
flag.DurationVar(&retryInterval, "retry-interval", controller.DefaultRetryInterval, "How long to wait between each retry (Default 60s)")
flag.BoolVar(&rejectOverlappingDisruption, "reject-overlapping-disruption", false, "Automatically reject any overlapping NodeDisruption (based on node selector), preserving the oldest one")

opts := zap.Options{
Development: true,
Expand Down Expand Up @@ -99,8 +101,9 @@ func main() {
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Config: controller.NodeDisruptionReconcilerConfig{
RejectEmptyNodeDisruption: rejectEmptyNodeDisruption,
RetryInterval: retryInterval,
RejectEmptyNodeDisruption: rejectEmptyNodeDisruption,
RetryInterval: retryInterval,
RejectOverlappingDisruption: rejectOverlappingDisruption,
},
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeDisruption")
Expand Down
28 changes: 27 additions & 1 deletion internal/controller/nodedisruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ type NodeDisruptionReconcilerConfig struct {
RejectEmptyNodeDisruption bool
// How long to retry between each validation attempt
RetryInterval time.Duration
// Reject NodeDisruption if its node selector overlaps an older NodeDisruption's selector
RejectOverlappingDisruption bool
}

// NodeDisruptionReconciler reconciles NodeDisruptions
Expand Down Expand Up @@ -211,7 +213,7 @@ func (ndr *SingleNodeDisruptionReconciler) UpdateStatus(ctx context.Context) err

// ValidateInternalConstraints check that the Node Disruption is valid against internal constraints
// such as deadline or constraints on number of impacted nodes
func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(_ context.Context) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus) {
func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx context.Context) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus) {
disruptedNodes := resolver.NewNodeSetFromStringList(ndr.NodeDisruption.Status.DisruptedNodes)

if ndr.Config.RejectEmptyNodeDisruption && disruptedNodes.Len() == 0 {
Expand All @@ -228,6 +230,30 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(_ con
}
}

allDisruptions := &nodedisruptionv1alpha1.NodeDisruptionList{}
ndr.Client.List(ctx, allDisruptions) // TODO handle error here

Check failure on line 234 in internal/controller/nodedisruption_controller.go

View workflow job for this annotation

GitHub Actions / Build

Error return value of `ndr.Client.List` is not checked (errcheck)

Check failure on line 234 in internal/controller/nodedisruption_controller.go

View workflow job for this annotation

GitHub Actions / Build

Error return value of `ndr.Client.List` is not checked (errcheck)
for _, otherDisruption := range allDisruptions.Items {
if otherDisruption.Name == ndr.NodeDisruption.Name {
continue
}
if otherDisruption.Status.State == nodedisruptionv1alpha1.Pending || otherDisruption.Status.State == nodedisruptionv1alpha1.Granted {
otherDisruptedNodes := resolver.NewNodeSetFromStringList(otherDisruption.Status.DisruptedNodes)
if otherDisruptedNodes.Intersection(disruptedNodes).Len() > 0 {
return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{
{
Reference: nodedisruptionv1alpha1.NamespacedName{
Namespace: ndr.NodeDisruption.Namespace,
Name: ndr.NodeDisruption.Name,
Kind: ndr.NodeDisruption.Kind,
},
Reason: fmt.Sprintf(`Selected node(s) overlap with another disruption: ”%s"`, otherDisruption.Name),
Ok: false,
},
}
}
}
}

if ndr.NodeDisruption.Spec.Retry.IsAfterDeadline() {
return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{
{
Expand Down

0 comments on commit 5f570f6

Please sign in to comment.