From 57bbc9ee2bb8aec5effc32c6d79794849873664f Mon Sep 17 00:00:00 2001 From: Geoffrey Beausire Date: Fri, 27 Oct 2023 11:44:54 +0200 Subject: [PATCH] Add information about current disruption in budgets This helps owners of budget understand what disruptions are impacting/might impact their budgets --- .../applicationdisruptionbudget_types.go | 11 ++++ api/v1alpha1/zz_generated.deepcopy.go | 20 ++++++ .../applicationdisruptionbudget-crd.yaml | 30 +++++++-- chart/templates/nodedisruptionbudget-crd.yaml | 18 ++++++ ...iteo.com_applicationdisruptionbudgets.yaml | 18 ++++++ ...tion.criteo.com_nodedisruptionbudgets.yaml | 18 ++++++ .../applicationdisruptionbudget_controller.go | 33 ++++++---- ...icationdisruptionbudget_controller_test.go | 62 ++++++++++++++++++- .../nodedisruptionbudget_controller.go | 34 ++++++---- .../nodedisruptionbudget_controller_test.go | 59 +++++++++++++++++- 10 files changed, 269 insertions(+), 34 deletions(-) diff --git a/api/v1alpha1/applicationdisruptionbudget_types.go b/api/v1alpha1/applicationdisruptionbudget_types.go index 4b6f199..9a81670 100644 --- a/api/v1alpha1/applicationdisruptionbudget_types.go +++ b/api/v1alpha1/applicationdisruptionbudget_types.go @@ -74,6 +74,17 @@ type DisruptionBudgetStatus struct { // Number of disruption currently seen on the cluster // +kubebuilder:default=0 CurrentDisruptions int `json:"currentDisruptions"` + + // Disruptions contains a list of disruptions that are related to the budget + Disruptions []Disruption `json:"disruptions"` +} + +// Basic information about disruptions +type Disruption struct { + // Name of the disruption + Name string `json:"name"` + // State of the disruption + State string `json:"state"` } //+kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 3e874cf..f35b9c1 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -123,6 +123,21 @@ func (in *DisruptedBudgetStatus) DeepCopy() *DisruptedBudgetStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Disruption) DeepCopyInto(out *Disruption) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Disruption. +func (in *Disruption) DeepCopy() *Disruption { + if in == nil { + return nil + } + out := new(Disruption) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DisruptionBudgetStatus) DeepCopyInto(out *DisruptionBudgetStatus) { *out = *in @@ -131,6 +146,11 @@ func (in *DisruptionBudgetStatus) DeepCopyInto(out *DisruptionBudgetStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.Disruptions != nil { + in, out := &in.Disruptions, &out.Disruptions + *out = make([]Disruption, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DisruptionBudgetStatus. diff --git a/chart/templates/applicationdisruptionbudget-crd.yaml b/chart/templates/applicationdisruptionbudget-crd.yaml index bb386b6..0e06c09 100644 --- a/chart/templates/applicationdisruptionbudget-crd.yaml +++ b/chart/templates/applicationdisruptionbudget-crd.yaml @@ -49,12 +49,6 @@ spec: description: ApplicationDisruptionBudgetSpec defines the desired state of ApplicationDisruptionBudget properties: - healthURL: - description: Health URL is deprecated and will be removed in next version, - please use healthHook instead. Health URL is an optional URL to - call to validate the state of the application. Maintenance will proceed - only if the endpoint responds 2XX. - type: string healthHook: description: Define a optional hook to call when validating a NodeDisruption. It perform a POST http request containing the NodeDisruption that @@ -71,6 +65,12 @@ spec: form (`scheme://host:port/path`). type: string type: object + healthURL: + description: Health URL is deprecated and will be removed in next version, + please use healthHook instead. Health URL is an optional URL to call + to validate the state of the application. Maintenance will proceed + only if the endpoint responds 2XX. + type: string maxDisruptions: description: A NodeDisruption is allowed if at most "maxDisruptions" nodes selected by selectors are unavailable after the disruption. @@ -175,6 +175,23 @@ spec: default: 0 description: Number of disruption currently seen on the cluster type: integer + disruptions: + description: Disruptions contains a list of disruptions that are related + to the budget + items: + description: Basic information about disruptions + properties: + name: + description: Name of the disruption + type: string + state: + description: State of the disruption + type: string + required: + - name + - state + type: object + type: array disruptionsAllowed: default: 0 description: Number of disruption allowed on the nodes of this @@ -188,6 +205,7 @@ spec: type: array required: - currentDisruptions + - disruptions - disruptionsAllowed type: object type: object diff --git a/chart/templates/nodedisruptionbudget-crd.yaml b/chart/templates/nodedisruptionbudget-crd.yaml index d510fd8..19a1b5f 100644 --- a/chart/templates/nodedisruptionbudget-crd.yaml +++ b/chart/templates/nodedisruptionbudget-crd.yaml @@ -115,6 +115,23 @@ spec: default: 0 description: Number of disruption currently seen on the cluster type: integer + disruptions: + description: Disruptions contains a list of disruptions that are related + to the budget + items: + description: Basic information about disruptions + properties: + name: + description: Name of the disruption + type: string + state: + description: State of the disruption + type: string + required: + - name + - state + type: object + type: array disruptionsAllowed: default: 0 description: Number of disruption allowed on the nodes of this @@ -128,6 +145,7 @@ spec: type: array required: - currentDisruptions + - disruptions - disruptionsAllowed type: object type: object diff --git a/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml b/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml index a5fa77e..eca7174 100644 --- a/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml +++ b/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml @@ -176,6 +176,23 @@ spec: default: 0 description: Number of disruption currently seen on the cluster type: integer + disruptions: + description: Disruptions contains a list of disruptions that are related + to the budget + items: + description: Basic information about disruptions + properties: + name: + description: Name of the disruption + type: string + state: + description: State of the disruption + type: string + required: + - name + - state + type: object + type: array disruptionsAllowed: default: 0 description: Number of disruption allowed on the nodes of this @@ -189,6 +206,7 @@ spec: type: array required: - currentDisruptions + - disruptions - disruptionsAllowed type: object type: object diff --git a/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml b/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml index b5f2ab8..f37ecef 100644 --- a/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml +++ b/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml @@ -115,6 +115,23 @@ spec: default: 0 description: Number of disruption currently seen on the cluster type: integer + disruptions: + description: Disruptions contains a list of disruptions that are related + to the budget + items: + description: Basic information about disruptions + properties: + name: + description: Name of the disruption + type: string + state: + description: State of the disruption + type: string + required: + - name + - state + type: object + type: array disruptionsAllowed: default: 0 description: Number of disruption allowed on the nodes of this @@ -128,6 +145,7 @@ spec: type: array required: - currentDisruptions + - disruptions - disruptionsAllowed type: object type: object diff --git a/internal/controller/applicationdisruptionbudget_controller.go b/internal/controller/applicationdisruptionbudget_controller.go index cbce1f1..a79ff4f 100644 --- a/internal/controller/applicationdisruptionbudget_controller.go +++ b/internal/controller/applicationdisruptionbudget_controller.go @@ -137,6 +137,10 @@ func (r *ApplicationDisruptionBudgetReconciler) SetupWithManager(mgr ctrl.Manage &corev1.PersistentVolumeClaim{}, handler.EnqueueRequestsFromMapFunc(r.MapFuncBuilder()), builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). + Watches( + &nodedisruptionv1alpha1.NodeDisruption{}, + handler.EnqueueRequestsFromMapFunc(r.MapFuncBuilder()), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). Complete(r) } @@ -155,7 +159,7 @@ func (r *ApplicationDisruptionBudgetResolver) Sync(ctx context.Context) error { nodes := resolver.NodeSetToStringList(nodeNames) - disruptionCount, err := r.ResolveDisruption(ctx) + disruptionCount, disruptions, err := r.ResolveDisruption(ctx) if err != nil { return err } @@ -163,6 +167,7 @@ func (r *ApplicationDisruptionBudgetResolver) Sync(ctx context.Context) error { r.ApplicationDisruptionBudget.Status.WatchedNodes = nodes r.ApplicationDisruptionBudget.Status.CurrentDisruptions = disruptionCount r.ApplicationDisruptionBudget.Status.DisruptionsAllowed = r.ApplicationDisruptionBudget.Spec.MaxDisruptions - disruptionCount + r.ApplicationDisruptionBudget.Status.Disruptions = disruptions return nil } @@ -259,34 +264,38 @@ func (r *ApplicationDisruptionBudgetResolver) GetSelectedNodes(ctx context.Conte return nodesFromPods.Union(nodesFromPVCs), nil } -func (r *ApplicationDisruptionBudgetResolver) ResolveDisruption(ctx context.Context) (int, error) { +func (r *ApplicationDisruptionBudgetResolver) ResolveDisruption(ctx context.Context) (int, []nodedisruptionv1alpha1.Disruption, error) { + disruptions := []nodedisruptionv1alpha1.Disruption{} selectedNodes, err := r.GetSelectedNodes(ctx) if err != nil { - return 0, err + return 0, disruptions, err } - disruptions := 0 + disruptionCount := 0 opts := []client.ListOption{} nodeDisruptions := &nodedisruptionv1alpha1.NodeDisruptionList{} err = r.Client.List(ctx, nodeDisruptions, opts...) if err != nil { - return 0, err + return 0, disruptions, err } for _, nd := range nodeDisruptions.Items { - if nd.Status.State != nodedisruptionv1alpha1.Granted { - continue - } - impactedNodes, err := r.Resolver.GetNodeFromNodeSelector(ctx, nd.Spec.NodeSelector) if err != nil { - return 0, err + return 0, disruptions, err } + if selectedNodes.Intersection(impactedNodes).Len() > 0 { - disruptions++ + if nd.Status.State == nodedisruptionv1alpha1.Granted { + disruptionCount++ + } + disruptions = append(disruptions, nodedisruptionv1alpha1.Disruption{ + Name: nd.Name, + State: string(nd.Status.State), + }) } } - return disruptions, nil + return disruptionCount, disruptions, nil } diff --git a/internal/controller/applicationdisruptionbudget_controller_test.go b/internal/controller/applicationdisruptionbudget_controller_test.go index 2c7ca56..ce0ee9e 100644 --- a/internal/controller/applicationdisruptionbudget_controller_test.go +++ b/internal/controller/applicationdisruptionbudget_controller_test.go @@ -100,8 +100,8 @@ var _ = Describe("ApplicationDisruptionBudget controller", func() { clearAllNodeDisruptionRessources() }) - When("there are no budgets in the cluster", func() { - It("grants the node disruption", func() { + When("Pods or PVC changes", func() { + It("updates the budget status", func() { By("creating a budget that accepts one disruption") ndb := &nodedisruptionv1alpha1.ApplicationDisruptionBudget{ TypeMeta: metav1.TypeMeta{ @@ -184,6 +184,64 @@ var _ = Describe("ApplicationDisruptionBudget controller", func() { }) }) + When("Node disruption is created", func() { + It("updates the budget status", func() { + By("creating a budget that accepts one disruption") + ndb := &nodedisruptionv1alpha1.ApplicationDisruptionBudget{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "ApplicationDisruptionBudget", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: ADBname, + Namespace: ADBNamespace, + }, + Spec: nodedisruptionv1alpha1.ApplicationDisruptionBudgetSpec{ + PodSelector: metav1.LabelSelector{MatchLabels: podLabels}, + PVCSelector: metav1.LabelSelector{MatchLabels: podLabels}, + MaxDisruptions: 1, + }, + } + Expect(k8sClient.Create(ctx, ndb)).Should(Succeed()) + + By("checking the ApplicationDisruptionBudget in synchronized") + ADBLookupKey := types.NamespacedName{Name: ADBname, Namespace: ADBNamespace} + createdADB := &nodedisruptionv1alpha1.ApplicationDisruptionBudget{} + Eventually(func() []string { + err := k8sClient.Get(ctx, ADBLookupKey, createdADB) + Expect(err).Should(Succeed()) + return createdADB.Status.WatchedNodes + }, timeout, interval).Should(Equal([]string{"node1", "node2", "node3"})) + + By("Adding Node Disruption") + disruption := &nodedisruptionv1alpha1.NodeDisruption{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "NodeDisruption", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-adb-1", + Namespace: "", + }, + Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{ + NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1}, + }, + } + Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed()) + + By("checking the ApplicationDisruptionBudget updated the status") + Eventually(func() []nodedisruptionv1alpha1.Disruption { + err := k8sClient.Get(ctx, ADBLookupKey, createdADB) + Expect(err).Should(Succeed()) + return createdADB.Status.Disruptions + }, timeout, interval).Should(Equal([]nodedisruptionv1alpha1.Disruption{{ + Name: "test-adb-1", + State: "granted", + }})) + + }) + }) + }) }) diff --git a/internal/controller/nodedisruptionbudget_controller.go b/internal/controller/nodedisruptionbudget_controller.go index 7358325..af3ca44 100644 --- a/internal/controller/nodedisruptionbudget_controller.go +++ b/internal/controller/nodedisruptionbudget_controller.go @@ -122,6 +122,10 @@ func (r *NodeDisruptionBudgetReconciler) SetupWithManager(mgr ctrl.Manager) erro &corev1.Node{}, handler.EnqueueRequestsFromMapFunc(r.MapFuncBuilder()), builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). + Watches( + &nodedisruptionv1alpha1.NodeDisruption{}, + handler.EnqueueRequestsFromMapFunc(r.MapFuncBuilder()), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). Complete(r) } @@ -140,7 +144,7 @@ func (r *NodeDisruptionBudgetResolver) Sync(ctx context.Context) error { nodes := resolver.NodeSetToStringList(nodeNames) - disruptionCount, err := r.ResolveDisruption(ctx) + disruptionCount, disruptions, err := r.ResolveDisruption(ctx) if err != nil { return err } @@ -150,7 +154,7 @@ func (r *NodeDisruptionBudgetResolver) Sync(ctx context.Context) error { disruptionsForMax := r.NodeDisruptionBudget.Spec.MaxDisruptedNodes - disruptionCount disruptionsForMin := (len(nodes) - disruptionCount) - r.NodeDisruptionBudget.Spec.MinUndisruptedNodes r.NodeDisruptionBudget.Status.DisruptionsAllowed = int(math.Min(float64(disruptionsForMax), float64(disruptionsForMin))) - disruptionCount - + r.NodeDisruptionBudget.Status.Disruptions = disruptions return nil } @@ -198,34 +202,38 @@ func (r *NodeDisruptionBudgetResolver) GetSelectedNodes(ctx context.Context) (re return nodesFromPods, nil } -func (r *NodeDisruptionBudgetResolver) ResolveDisruption(ctx context.Context) (int, error) { +func (r *NodeDisruptionBudgetResolver) ResolveDisruption(ctx context.Context) (int, []nodedisruptionv1alpha1.Disruption, error) { + disruptions := []nodedisruptionv1alpha1.Disruption{} selectedNodes, err := r.GetSelectedNodes(ctx) if err != nil { - return 0, err + return 0, disruptions, err } - disruptions := 0 + disruptionCount := 0 opts := []client.ListOption{} nodeDisruptions := &nodedisruptionv1alpha1.NodeDisruptionList{} err = r.Client.List(ctx, nodeDisruptions, opts...) if err != nil { - return 0, err + return 0, disruptions, err } for _, nd := range nodeDisruptions.Items { - if nd.Status.State != nodedisruptionv1alpha1.Granted { - continue - } - impactedNodes, err := r.Resolver.GetNodeFromNodeSelector(ctx, nd.Spec.NodeSelector) if err != nil { - return 0, err + return 0, disruptions, err } + if selectedNodes.Intersection(impactedNodes).Len() > 0 { - disruptions++ + if nd.Status.State == nodedisruptionv1alpha1.Granted { + disruptionCount++ + } + disruptions = append(disruptions, nodedisruptionv1alpha1.Disruption{ + Name: nd.Name, + State: string(nd.Status.State), + }) } } - return disruptions, nil + return disruptionCount, disruptions, err } diff --git a/internal/controller/nodedisruptionbudget_controller_test.go b/internal/controller/nodedisruptionbudget_controller_test.go index 825a09d..210a039 100644 --- a/internal/controller/nodedisruptionbudget_controller_test.go +++ b/internal/controller/nodedisruptionbudget_controller_test.go @@ -64,7 +64,7 @@ var _ = Describe("NodeDisruptionBudget controller", func() { clearAllNodeDisruptionRessources() }) - When("there are no budgets in the cluster", func() { + When("Nodes changes in the cluster", func() { It("it updates the NDB", func() { By("creating a budget that accepts one disruption") ndb := &nodedisruptionv1alpha1.NodeDisruptionBudget{ @@ -124,6 +124,63 @@ var _ = Describe("NodeDisruptionBudget controller", func() { }) }) + When("ND are created", func() { + It("it updates the NDB", func() { + By("creating a budget that accepts one disruption") + ndb := &nodedisruptionv1alpha1.NodeDisruptionBudget{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "NodeDisruptionBudget", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: NDBname, + Namespace: NDBNamespace, + }, + Spec: nodedisruptionv1alpha1.NodeDisruptionBudgetSpec{ + NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1}, + MaxDisruptedNodes: 10, + }, + } + Expect(k8sClient.Create(ctx, ndb)).Should(Succeed()) + + By("checking the NodeDisruptionBudget in synchronized") + NDBLookupKey := types.NamespacedName{Name: NDBname, Namespace: NDBNamespace} + createdNDB := &nodedisruptionv1alpha1.NodeDisruptionBudget{} + Eventually(func() []string { + err := k8sClient.Get(ctx, NDBLookupKey, createdNDB) + Expect(err).Should(Succeed()) + return createdNDB.Status.WatchedNodes + }, timeout, interval).Should(Equal([]string{"node1", "node2"})) + + By("Adding Node Disruption") + disruption := &nodedisruptionv1alpha1.NodeDisruption{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "NodeDisruption", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ndb-1", + Namespace: "", + }, + Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{ + NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1}, + }, + } + Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed()) + + By("checking the NodeDisruptionBudget updated the status") + Eventually(func() []nodedisruptionv1alpha1.Disruption { + err := k8sClient.Get(ctx, NDBLookupKey, createdNDB) + Expect(err).Should(Succeed()) + return createdNDB.Status.Disruptions + }, timeout, interval).Should(Equal([]nodedisruptionv1alpha1.Disruption{{ + Name: "test-ndb-1", + State: "granted", + }})) + + }) + }) + }) })