Skip to content

Commit

Permalink
Improve logging and reasons
Browse files Browse the repository at this point in the history
Several changes:
- deadline exceeded: Feedback was that it was too general (looked like http error)
- disruption type message is clearer (display allowed types as well)
- log all rejections: before we logged only the "budget" rejections, now all rejections are logged.
  • Loading branch information
geobeau committed Oct 3, 2024
1 parent cf87246 commit 1f723ec
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 33 deletions.
3 changes: 1 addition & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (

nodedisruptionv1alpha1 "github.com/criteo/node-disruption-controller/api/v1alpha1"
"github.com/criteo/node-disruption-controller/internal/controller"
"github.com/criteo/node-disruption-controller/pkg/utils"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -101,7 +100,7 @@ func main() {
os.Exit(1)
}

nodeDisruptionTypes := utils.FilterEmptyString(strings.Split(nodeDisruptionTypesRaw, ","))
nodeDisruptionTypes := strings.FieldsFunc(nodeDisruptionTypesRaw, func(c rune) bool { return c == ',' })

if err = (&controller.NodeDisruptionReconciler{
Client: mgr.GetClient(),
Expand Down
13 changes: 8 additions & 5 deletions internal/controller/nodedisruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ func (ndr *SingleNodeDisruptionReconciler) getRetryDate() metav1.Time {
// tryTransitionToGranted attempt to transition to the granted state but can result in either pending or rejected
func (ndr *SingleNodeDisruptionReconciler) tryTransitionToGranted(ctx context.Context) (err error) {
nextRetryDate := ndr.getRetryDate()
logger := log.FromContext(ctx)

var state nodedisruptionv1alpha1.NodeDisruptionState

Expand All @@ -245,6 +246,11 @@ func (ndr *SingleNodeDisruptionReconciler) tryTransitionToGranted(ctx context.Co
if !nextRetryDate.IsZero() {
state = nodedisruptionv1alpha1.Pending
} else {
for _, status := range statuses {
if !status.Ok {
logger.Info("Disruption rejected", "status", status)
}
}
state = nodedisruptionv1alpha1.Rejected
}
}
Expand Down Expand Up @@ -314,7 +320,7 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx c
if ndr.NodeDisruption.Spec.Retry.IsAfterDeadline() {
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: "Deadline exceeded",
Reason: fmt.Sprintf("Failed to grant maintenance before deadline (deadline: %s)", ndr.NodeDisruption.Spec.Retry.Deadline),
Ok: false,
}
// Conserve the statuses of the previous iteration to conserve the reason of rejection
Expand All @@ -338,7 +344,7 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx c
if len(ndr.Config.NodeDisruptionTypes) != 0 && !slices.Contains(ndr.Config.NodeDisruptionTypes, ndr.NodeDisruption.Spec.Type) {
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: "Type provided of node disruption is not managed",
Reason: fmt.Sprintf("Type of node disruption provided is not allowed (supported: %s) (see --node-disruption-types on the controller)", ndr.Config.NodeDisruptionTypes),
Ok: false,
}
return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{status}, nil
Expand All @@ -350,7 +356,6 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx c
// ValidateBudgetConstraints check that the Node Disruption is valid against the budgets defined in the cluster
func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx context.Context, budgets []Budget) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus) {
disruptedNodes := resolver.NewNodeSetFromStringList(ndr.NodeDisruption.Status.DisruptedNodes)
logger := log.FromContext(ctx)
anyFailed = false

impactedBudgets := []Budget{}
Expand All @@ -368,7 +373,6 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx con
Ok: false,
}
statuses = append(statuses, status)
logger.Info("Disruption rejected because: ", "status", status)
DisruptionBudgetRejectedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
break
}
Expand All @@ -390,7 +394,6 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx con
Ok: false,
}
statuses = append(statuses, status)
logger.Info("Disruption rejected because: ", "status", status)
DisruptionBudgetRejectedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
break
}
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/nodedisruption_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ var _ = Describe("NodeDisruption controller", func() {
Name: "test-nodedisruption",
Kind: "NodeDisruption",
},
Reason: "Deadline exceeded",
Reason: fmt.Sprintf("Failed to grant maintenance before deadline (deadline: %s)", disruption.Spec.Retry.Deadline),
Ok: false,
}}
Expect(disruption.Status.DisruptedDisruptionBudgets).Should(Equal(expectedStatus))
Expand Down
10 changes: 0 additions & 10 deletions pkg/utils/utils.go

This file was deleted.

15 changes: 0 additions & 15 deletions pkg/utils/utils_test.go

This file was deleted.

0 comments on commit 1f723ec

Please sign in to comment.