Skip to content

Commit

Permalink
Improve deadline exceed message (#65)
Browse files Browse the repository at this point in the history
* Improve deadline exceed message

Deadline exceeded is what the status of a node disruption will
show once a Node Disruption is fully rejected. Most of the time,
we reach the deadline because some budgets are preventing
from reaching the granted state. In the previous version, we lost
the context and had to look at logs. Now we keep the previous
status and only append a reason. This way we know why the last
attempt was rejected.

* Refactor internal disruption status to avoid shallow functions

* Refactor to remove duplication of code

Make the reference calculation a method from ndr
Remove redondant error handling for RejectOverlappingDisruption
  • Loading branch information
geobeau authored Jun 18, 2024
1 parent aaf4722 commit 7184636
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 28 deletions.
62 changes: 34 additions & 28 deletions internal/controller/nodedisruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,26 +267,12 @@ func (ndr *SingleNodeDisruptionReconciler) UpdateStatus(ctx context.Context) err
return ndr.Client.Status().Update(ctx, &ndr.NodeDisruption, []client.SubResourceUpdateOption{}...)
}

func (ndr *SingleNodeDisruptionReconciler) generateRejectedStatus(reason string) []nodedisruptionv1alpha1.DisruptedBudgetStatus {
return []nodedisruptionv1alpha1.DisruptedBudgetStatus{
{
Reference: nodedisruptionv1alpha1.NamespacedName{
Namespace: ndr.NodeDisruption.Namespace,
Name: ndr.NodeDisruption.Name,
Kind: ndr.NodeDisruption.Kind,
},
Reason: reason,
Ok: false,
},
}
}

// ValidateOverlappingDisruption checks that the current disruption doesn't overlap and existing one
func (ndr *SingleNodeDisruptionReconciler) ValidateOverlappingDisruption(ctx context.Context, disruptedNodes resolver.NodeSet) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus, err error) {
func (ndr *SingleNodeDisruptionReconciler) ValidateOverlappingDisruption(ctx context.Context, disruptedNodes resolver.NodeSet) (anyFailed bool, status nodedisruptionv1alpha1.DisruptedBudgetStatus, err error) {
allDisruptions := &nodedisruptionv1alpha1.NodeDisruptionList{}
err = ndr.Client.List(ctx, allDisruptions)
if err != nil {
return false, nil, err
return true, status, err
}
for _, otherDisruption := range allDisruptions.Items {
if otherDisruption.Name == ndr.NodeDisruption.Name {
Expand All @@ -295,34 +281,54 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateOverlappingDisruption(ctx con
if otherDisruption.Status.State == nodedisruptionv1alpha1.Pending || otherDisruption.Status.State == nodedisruptionv1alpha1.Granted {
otherDisruptedNodes := resolver.NewNodeSetFromStringList(otherDisruption.Status.DisruptedNodes)
if otherDisruptedNodes.Intersection(disruptedNodes).Len() > 0 {
return true, ndr.generateRejectedStatus(fmt.Sprintf(`Selected node(s) overlap with another disruption: ”%s"`, otherDisruption.Name)), nil
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: fmt.Sprintf(`Selected node(s) overlap with another disruption: ”%s"`, otherDisruption.Name),
Ok: false,
}
return true, status, nil
}
}
}

return false, nil, nil
return false, status, nil
}

func (ndr *SingleNodeDisruptionReconciler) getNodeDisruptionReference() nodedisruptionv1alpha1.NamespacedName {
return nodedisruptionv1alpha1.NamespacedName{
Namespace: ndr.NodeDisruption.Namespace,
Name: ndr.NodeDisruption.Name,
Kind: ndr.NodeDisruption.Kind,
}
}

// ValidateInternalConstraints check that the Node Disruption is valid against internal constraints
// such as deadline or constraints on number of impacted nodes
func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx context.Context) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus, err error) {
disruptedNodes := resolver.NewNodeSetFromStringList(ndr.NodeDisruption.Status.DisruptedNodes)

if ndr.Config.RejectEmptyNodeDisruption && disruptedNodes.Len() == 0 {
return true, ndr.generateRejectedStatus("No Node matching selector"), nil
if ndr.NodeDisruption.Spec.Retry.IsAfterDeadline() {
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: "Deadline exceeded",
Ok: false,
}
// Conserve the statuses of the previous iteration to conserve the reason of rejection
return true, append(ndr.NodeDisruption.Status.DisruptedDisruptionBudgets, status), nil
}

if ndr.Config.RejectOverlappingDisruption {
anyFailed, statuses, err = ndr.ValidateOverlappingDisruption(ctx, disruptedNodes)
if err != nil {
return false, nil, err
} else if anyFailed {
return true, statuses, nil
if ndr.Config.RejectEmptyNodeDisruption && disruptedNodes.Len() == 0 {
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: "No node matching label selector",
Ok: false,
}
return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{status}, nil
}

if ndr.NodeDisruption.Spec.Retry.IsAfterDeadline() {
return true, ndr.generateRejectedStatus("Deadline exceeded"), nil
if ndr.Config.RejectOverlappingDisruption {
anyFailed, status, err := ndr.ValidateOverlappingDisruption(ctx, disruptedNodes)
return anyFailed, []nodedisruptionv1alpha1.DisruptedBudgetStatus{status}, err
}

return false, statuses, nil
Expand Down
96 changes: 96 additions & 0 deletions internal/controller/nodedisruption_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,102 @@ var _ = Describe("NodeDisruption controller", func() {
}, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Rejected))
})
})

When("a node disruption's deadline is elapsed", func() {
It("It keeps the previous status", func() {
By("creating a budget that rejects everything")
ndb := &nodedisruptionv1alpha1.NodeDisruptionBudget{
TypeMeta: metav1.TypeMeta{
APIVersion: "nodedisruption.criteo.com/v1alpha1",
Kind: "NodeDisruptionBudget",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
Spec: nodedisruptionv1alpha1.NodeDisruptionBudgetSpec{
NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1},
MaxDisruptedNodes: 0,
},
}
Expect(k8sClient.Create(ctx, ndb)).Should(Succeed())

By("creating a new NodeDisruption with retry enabled and an far deadline")
disruption := &nodedisruptionv1alpha1.NodeDisruption{
TypeMeta: metav1.TypeMeta{
APIVersion: "nodedisruption.criteo.com/v1alpha1",
Kind: "NodeDisruption",
},
ObjectMeta: metav1.ObjectMeta{
Name: NDName,
Namespace: NDNamespace,
},
Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{
NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1},
Retry: nodedisruptionv1alpha1.RetrySpec{
Enabled: true,
// Deadline is 1m in future
Deadline: metav1.Time{Time: time.Now().Add(time.Minute)},
},
},
}
Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed())

By("trying at least one and filling the status field")
Eventually(func() bool {
err := k8sClient.Get(ctx, NDLookupKey, disruption)
if err != nil {
panic("should be able to get")
}
return !disruption.Status.NextRetryDate.IsZero()
}, timeout, interval).Should(BeTrue())

By("properly filling the status")
expectedStatus := []nodedisruptionv1alpha1.DisruptedBudgetStatus{{
Reference: nodedisruptionv1alpha1.NamespacedName{
Namespace: "",
Name: "test",
Kind: "NodeDisruptionBudget",
},
Reason: "No more disruption allowed",
Ok: false,
}}
Expect(disruption.Status.DisruptedDisruptionBudgets).Should(Equal(expectedStatus))

By("forcing deadline exceeding")
disruption.Spec.Retry.Deadline = metav1.Time{Time: time.Now().Add(-time.Minute)}
Expect(k8sClient.Update(ctx, disruption.DeepCopy())).Should(Succeed())

By("waiting for rejection")
Eventually(func() nodedisruptionv1alpha1.NodeDisruptionState {
err := k8sClient.Get(ctx, NDLookupKey, disruption)
if err != nil {
panic("should be able to get")
}
return disruption.Status.State
}, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Rejected))

By("keeping previous status")
expectedStatus = []nodedisruptionv1alpha1.DisruptedBudgetStatus{{
Reference: nodedisruptionv1alpha1.NamespacedName{
Namespace: "",
Name: "test",
Kind: "NodeDisruptionBudget",
},
Reason: "No more disruption allowed",
Ok: false,
}, {
Reference: nodedisruptionv1alpha1.NamespacedName{
Namespace: "",
Name: "test-nodedisruption",
Kind: "NodeDisruption",
},
Reason: "Deadline exceeded",
Ok: false,
}}
Expect(disruption.Status.DisruptedDisruptionBudgets).Should(Equal(expectedStatus))
})
})

})

Context("If a NodeDisruption's nodeSelector does not match any node", func() {
Expand Down

0 comments on commit 7184636

Please sign in to comment.