From 8c463b604fcef9ad5f033cca371adf4b2feb7f7a Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Sat, 28 Dec 2024 15:51:27 +0100 Subject: [PATCH] ETCD-695: Add job parallelism to recurrent backups --- pkg/cmd/request-backup/requestbackup.go | 7 +- .../backupcontroller/backupcontroller.go | 24 ++++- .../periodicbackupcontroller.go | 87 +++++++++++++++++-- 3 files changed, 107 insertions(+), 11 deletions(-) diff --git a/pkg/cmd/request-backup/requestbackup.go b/pkg/cmd/request-backup/requestbackup.go index 01da6fe55b..d4bf778fc6 100644 --- a/pkg/cmd/request-backup/requestbackup.go +++ b/pkg/cmd/request-backup/requestbackup.go @@ -2,19 +2,20 @@ package requestbackup import ( "context" - goflag "flag" "fmt" "os" "os/signal" "syscall" - "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" + goflag "flag" operatorv1alpha1 "github.com/openshift/api/operator/v1alpha1" operatorversionedclientv1alpha1 "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1alpha1" + "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" "github.com/spf13/cobra" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilrand "k8s.io/apimachinery/pkg/util/rand" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" ) @@ -143,7 +144,7 @@ func (r *requestBackupOpts) Run(ctx context.Context) error { // like we usually do for other manifests? etcdBackup := &operatorv1alpha1.EtcdBackup{ ObjectMeta: metav1.ObjectMeta{ - Name: r.etcdBackupName, + Name: fmt.Sprintf("%s-%s", r.etcdBackupName, utilrand.String(8)), Namespace: operatorclient.TargetNamespace, // Due to a limitation of the kube-controller, we can't rely on the api to garbage collect non-namespaced // etcdbackups from their corresponding namespaced jobs. diff --git a/pkg/operator/backupcontroller/backupcontroller.go b/pkg/operator/backupcontroller/backupcontroller.go index ec60b3dd37..9e005f2ae5 100644 --- a/pkg/operator/backupcontroller/backupcontroller.go +++ b/pkg/operator/backupcontroller/backupcontroller.go @@ -3,6 +3,7 @@ package backupcontroller import ( "context" "fmt" + "k8s.io/utils/ptr" "sort" "strings" "time" @@ -438,7 +439,11 @@ func createBackupJob(ctx context.Context, } if !injected { - return fmt.Errorf("could not inject PVC into Job template, please check the included cluster-backup-job.yaml") + if backup.Spec.PVCName == "no-config" { + useHostPathVol(job) + } else { + return fmt.Errorf("could not inject PVC into Job template, please check the included cluster-backup-job.yaml") + } } klog.Infof("BackupController starts with backup [%s] as job [%s], writing to filename [%s]", backup.Name, job.Name, backupFileName) @@ -470,3 +475,20 @@ func createBackupJob(ctx context.Context, return nil } + +func useHostPathVol(job *batchv1.Job) *batchv1.Job { + + job.Spec.Template.Spec.Volumes = []corev1.Volume{ + { + Name: "etc-kubernetes-cluster-backup", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/etc/kubernetes/cluster-backup", + Type: ptr.To(corev1.HostPathDirectoryOrCreate), + }, + }, + }, + } + + return job +} diff --git a/pkg/operator/periodicbackupcontroller/periodicbackupcontroller.go b/pkg/operator/periodicbackupcontroller/periodicbackupcontroller.go index c9c2a7cbe7..d72717304d 100644 --- a/pkg/operator/periodicbackupcontroller/periodicbackupcontroller.go +++ b/pkg/operator/periodicbackupcontroller/periodicbackupcontroller.go @@ -3,6 +3,7 @@ package periodicbackupcontroller import ( "context" "fmt" + "k8s.io/utils/ptr" "time" clientv1 "k8s.io/client-go/listers/core/v1" @@ -31,9 +32,8 @@ import ( ) const ( - backupJobLabel = "backup-name" - defaultBackupCRName = "default" - etcdBackupServerContainerName = "etcd-backup-server" + backupJobLabel = "backup-name" + defaultBackupCRName = "default" ) type PeriodicBackupController struct { @@ -160,7 +160,14 @@ func reconcileCronJob(ctx context.Context, } if !injected { - return fmt.Errorf("could not inject PVC into CronJob template, please check the included cluster-backup-cronjob.yaml") + if backup.Name == defaultBackupCRName { + cronJob, err = applyAutomatedNoConfigBackup(cronJob) + if err != nil { + return err + } + } else { + return fmt.Errorf("could not inject PVC into CronJob template, please check the included cluster-backup-cronjob.yaml") + } } cronJob.Spec.Schedule = backup.Spec.EtcdBackupSpec.Schedule @@ -182,9 +189,16 @@ func reconcileCronJob(ctx context.Context, // The name of the CR will need to be unique for each scheduled run of the CronJob, so the name is // set at runtime as the pod via the MY_POD_NAME populated via the downward API. // See the CronJob template manifest for reference. - cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args = []string{ - "request-backup", - "--pvc-name=" + backup.Spec.EtcdBackupSpec.PVCName, + if injected { + cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args = []string{ + "request-backup", + "--pvc-name=" + backup.Spec.EtcdBackupSpec.PVCName, + } + } else { + cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args = []string{ + "request-backup", + "--pvc-name=" + "no-config", + } } if create { @@ -272,3 +286,62 @@ func newCronJob() (*batchv1.CronJob, error) { return obj.(*batchv1.CronJob), nil } + +func applyAutomatedNoConfigBackup(cronJob *batchv1.CronJob) (*batchv1.CronJob, error) { + if cronJob == nil { + return nil, fmt.Errorf("cronJob can not be nil") + } + + // add job parallelism + cronJob.Spec.JobTemplate.Spec.Parallelism = ptr.To(int32(3)) + cronJob.Spec.JobTemplate.Spec.Completions = ptr.To(int32(3)) + + cronJob.Spec.JobTemplate.Spec.Template.Spec.Affinity = &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{ + { + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "node-role.kubernetes.io/master", + Operator: corev1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + }, + + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &v1.LabelSelector{ + MatchExpressions: []v1.LabelSelectorRequirement{ + { + Key: "app", + Operator: v1.LabelSelectorOpIn, + Values: []string{"cluster-backup-cronjob"}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + } + + // add hostPath per job + cronJob.Spec.JobTemplate.Spec.Template.Spec.Volumes = []corev1.Volume{ + { + Name: "etc-kubernetes-cluster-backup", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/etc/kubernetes/cluster-backup", + Type: ptr.To(corev1.HostPathDirectoryOrCreate), + }, + }, + }, + } + + return cronJob, nil +}