From 8ce215e13e21e4ba41c176d55ad601e94ecc9d4c Mon Sep 17 00:00:00 2001 From: Vimal Kumar Date: Sat, 2 Mar 2024 01:05:27 +0530 Subject: [PATCH] POWERMON-235: Add support for Redfish - Extended Kepler and KeplerInternal API to add `Exporter.Redfish` - Redfish spec has fields for adding reference to a secret which should contain Redfish credentials. - The user supplied secret is mounted in the kepler container. - Added Unit tests and e2e test Signed-off-by: Vimal Kumar --- ...kepler-operator.clusterserviceversion.yaml | 4 +- ...tainable.computing.io_keplerinternals.yaml | 19 +++ ...stem.sustainable.computing.io_keplers.yaml | 19 +++ ...tainable.computing.io_keplerinternals.yaml | 19 +++ ...stem.sustainable.computing.io_keplers.yaml | 19 +++ config/rbac/role.yaml | 2 + docs/api.md | 100 ++++++++++++ pkg/api/v1alpha1/kepler_internal_types.go | 2 + pkg/api/v1alpha1/kepler_types.go | 17 ++ pkg/api/v1alpha1/zz_generated.deepcopy.go | 26 ++++ pkg/components/exporter/exporter.go | 20 ++- pkg/components/exporter/exporter_test.go | 147 +++++++++++------- pkg/controllers/kepler.go | 1 + pkg/controllers/kepler_internal.go | 85 +++++++--- pkg/reconciler/reconciler.go | 57 +++++++ pkg/utils/k8s/k8s.go | 26 +++- pkg/utils/test/framework.go | 6 + pkg/utils/test/kepler_internal_builder.go | 8 + tests/e2e/kepler_internal_test.go | 90 +++++++++++ tests/run-e2e.sh | 4 +- 20 files changed, 588 insertions(+), 83 deletions(-) diff --git a/bundle/manifests/kepler-operator.clusterserviceversion.yaml b/bundle/manifests/kepler-operator.clusterserviceversion.yaml index 20437377..3e763a11 100644 --- a/bundle/manifests/kepler-operator.clusterserviceversion.yaml +++ b/bundle/manifests/kepler-operator.clusterserviceversion.yaml @@ -27,7 +27,7 @@ metadata: capabilities: Basic Install categories: Monitoring containerImage: quay.io/sustainable_computing_io/kepler-operator:0.10.0 - createdAt: "2024-02-15T04:47:16Z" + createdAt: "2024-03-01T19:29:40Z" description: 'Deploys and Manages Kepler on Kubernetes ' operators.operatorframework.io/builder: operator-sdk-v1.27.0 operators.operatorframework.io/internal-objects: |- @@ -117,6 +117,7 @@ spec: resources: - daemonsets - deployments + - secrets verbs: - create - delete @@ -155,6 +156,7 @@ spec: - nodes/metrics - nodes/proxy - nodes/stats + - secrets verbs: - get - list diff --git a/bundle/manifests/kepler.system.sustainable.computing.io_keplerinternals.yaml b/bundle/manifests/kepler.system.sustainable.computing.io_keplerinternals.yaml index 3829f4ad..5ed8c66e 100644 --- a/bundle/manifests/kepler.system.sustainable.computing.io_keplerinternals.yaml +++ b/bundle/manifests/kepler.system.sustainable.computing.io_keplerinternals.yaml @@ -221,6 +221,25 @@ spec: - image - namespace type: object + redfish: + description: RedfishSpec for connecting to Redfish API + properties: + probeInterval: + description: ProbeInterval controls how frequently power info + is queried from Redfish + type: string + secretRef: + description: SecretRef refers to the name of secret which + contains credentials to initialize RedfishClient + type: string + skipSSLVerify: + default: false + description: SkipSSLVerify controls if RedfishClient will + skip verifying server + type: boolean + required: + - secretRef + type: object required: - deployment type: object diff --git a/bundle/manifests/kepler.system.sustainable.computing.io_keplers.yaml b/bundle/manifests/kepler.system.sustainable.computing.io_keplers.yaml index 4204d599..7dc887e9 100644 --- a/bundle/manifests/kepler.system.sustainable.computing.io_keplers.yaml +++ b/bundle/manifests/kepler.system.sustainable.computing.io_keplers.yaml @@ -139,6 +139,25 @@ spec: type: object type: array type: object + redfish: + description: RedfishSpec for connecting to Redfish API + properties: + probeInterval: + description: ProbeInterval controls how frequently power info + is queried from Redfish + type: string + secretRef: + description: SecretRef refers to the name of secret which + contains credentials to initialize RedfishClient + type: string + skipSSLVerify: + default: false + description: SkipSSLVerify controls if RedfishClient will + skip verifying server + type: boolean + required: + - secretRef + type: object type: object type: object status: diff --git a/config/crd/bases/kepler.system.sustainable.computing.io_keplerinternals.yaml b/config/crd/bases/kepler.system.sustainable.computing.io_keplerinternals.yaml index dcdaccc7..151582f0 100644 --- a/config/crd/bases/kepler.system.sustainable.computing.io_keplerinternals.yaml +++ b/config/crd/bases/kepler.system.sustainable.computing.io_keplerinternals.yaml @@ -221,6 +221,25 @@ spec: - image - namespace type: object + redfish: + description: RedfishSpec for connecting to Redfish API + properties: + probeInterval: + description: ProbeInterval controls how frequently power info + is queried from Redfish + type: string + secretRef: + description: SecretRef refers to the name of secret which + contains credentials to initialize RedfishClient + type: string + skipSSLVerify: + default: false + description: SkipSSLVerify controls if RedfishClient will + skip verifying server + type: boolean + required: + - secretRef + type: object required: - deployment type: object diff --git a/config/crd/bases/kepler.system.sustainable.computing.io_keplers.yaml b/config/crd/bases/kepler.system.sustainable.computing.io_keplers.yaml index 7816020e..0822f2dd 100644 --- a/config/crd/bases/kepler.system.sustainable.computing.io_keplers.yaml +++ b/config/crd/bases/kepler.system.sustainable.computing.io_keplers.yaml @@ -129,6 +129,25 @@ spec: type: object type: array type: object + redfish: + description: RedfishSpec for connecting to Redfish API + properties: + probeInterval: + description: ProbeInterval controls how frequently power info + is queried from Redfish + type: string + secretRef: + description: SecretRef refers to the name of secret which + contains credentials to initialize RedfishClient + type: string + skipSSLVerify: + default: false + description: SkipSSLVerify controls if RedfishClient will + skip verifying server + type: boolean + required: + - secretRef + type: object type: object type: object status: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 667509dc..6187064a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -9,6 +9,7 @@ rules: resources: - daemonsets - deployments + - secrets verbs: - create - delete @@ -47,6 +48,7 @@ rules: - nodes/metrics - nodes/proxy - nodes/stats + - secrets verbs: - get - list diff --git a/docs/api.md b/docs/api.md index 23544960..11519698 100644 --- a/docs/api.md +++ b/docs/api.md @@ -140,6 +140,13 @@ KeplerInternalSpec defines the desired state of KeplerInternal
true + + redfish + object + + RedfishSpec for connecting to Redfish API
+ + false @@ -265,6 +272,49 @@ The pod this Toleration is attached to tolerates any taint that matches the trip +### KeplerInternal.spec.exporter.redfish +[↩ Parent](#keplerinternalspecexporter) + + + +RedfishSpec for connecting to Redfish API + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
secretRefstring + SecretRef refers to the name of secret which contains credentials to initialize RedfishClient
+
true
probeIntervalstring + ProbeInterval controls how frequently power info is queried from Redfish
+
false
skipSSLVerifyboolean + SkipSSLVerify controls if RedfishClient will skip verifying server
+
+ Default: false
+
false
+ + ### KeplerInternal.spec.estimator [↩ Parent](#keplerinternalspec) @@ -1518,6 +1568,13 @@ KeplerSpec defines the desired state of Kepler
false + + redfish + object + + RedfishSpec for connecting to Redfish API
+ + false @@ -1629,6 +1686,49 @@ The pod this Toleration is attached to tolerates any taint that matches the trip +### Kepler.spec.exporter.redfish +[↩ Parent](#keplerspecexporter) + + + +RedfishSpec for connecting to Redfish API + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
secretRefstring + SecretRef refers to the name of secret which contains credentials to initialize RedfishClient
+
true
probeIntervalstring + ProbeInterval controls how frequently power info is queried from Redfish
+
false
skipSSLVerifyboolean + SkipSSLVerify controls if RedfishClient will skip verifying server
+
+ Default: false
+
false
+ + ### Kepler.status [↩ Parent](#kepler) diff --git a/pkg/api/v1alpha1/kepler_internal_types.go b/pkg/api/v1alpha1/kepler_internal_types.go index 94074533..771f598b 100644 --- a/pkg/api/v1alpha1/kepler_internal_types.go +++ b/pkg/api/v1alpha1/kepler_internal_types.go @@ -39,6 +39,8 @@ type InternalExporterDeploymentSpec struct { type InternalExporterSpec struct { // +kubebuilder:validation:Required Deployment InternalExporterDeploymentSpec `json:"deployment"` + + Redfish *RedfishSpec `json:"redfish,omitempty"` } type DashboardSpec struct { diff --git a/pkg/api/v1alpha1/kepler_types.go b/pkg/api/v1alpha1/kepler_types.go index 41ec8e3a..a23facba 100644 --- a/pkg/api/v1alpha1/kepler_types.go +++ b/pkg/api/v1alpha1/kepler_types.go @@ -38,8 +38,25 @@ type ExporterDeploymentSpec struct { Tolerations []corev1.Toleration `json:"tolerations,omitempty"` } +// RedfishSpec for connecting to Redfish API +type RedfishSpec struct { + // SecretRef refers to the name of secret which contains credentials to initialize RedfishClient + SecretRef string `json:"secretRef"` + + // ProbeInterval controls how frequently power info is queried from Redfish + // +optional + ProbeInterval metav1.Duration `json:"probeInterval,omitempty"` + + // SkipSSLVerify controls if RedfishClient will skip verifying server + // +optional + // +kubebuilder:validation:Required + // +kubebuilder:default:=false + SkipSSLVerify bool `json:"skipSSLVerify,omitempty"` +} + type ExporterSpec struct { Deployment ExporterDeploymentSpec `json:"deployment,omitempty"` + Redfish *RedfishSpec `json:"redfish,omitempty"` } // KeplerSpec defines the desired state of Kepler diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 821d301f..621fce09 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -150,6 +150,11 @@ func (in *ExporterDeploymentSpec) DeepCopy() *ExporterDeploymentSpec { func (in *ExporterSpec) DeepCopyInto(out *ExporterSpec) { *out = *in in.Deployment.DeepCopyInto(&out.Deployment) + if in.Redfish != nil { + in, out := &in.Redfish, &out.Redfish + *out = new(RedfishSpec) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExporterSpec. @@ -221,6 +226,11 @@ func (in *InternalExporterDeploymentSpec) DeepCopy() *InternalExporterDeployment func (in *InternalExporterSpec) DeepCopyInto(out *InternalExporterSpec) { *out = *in in.Deployment.DeepCopyInto(&out.Deployment) + if in.Redfish != nil { + in, out := &in.Redfish, &out.Redfish + *out = new(RedfishSpec) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InternalExporterSpec. @@ -509,3 +519,19 @@ func (in *OpenShiftSpec) DeepCopy() *OpenShiftSpec { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RedfishSpec) DeepCopyInto(out *RedfishSpec) { + *out = *in + out.ProbeInterval = in.ProbeInterval +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RedfishSpec. +func (in *RedfishSpec) DeepCopy() *RedfishSpec { + if in == nil { + return nil + } + out := new(RedfishSpec) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/components/exporter/exporter.go b/pkg/components/exporter/exporter.go index b72e3fb3..89fa4525 100644 --- a/pkg/components/exporter/exporter.go +++ b/pkg/components/exporter/exporter.go @@ -45,6 +45,11 @@ const ( overviewDashboardName = "power-monitoring-overview" nsInfoDashboardName = "power-monitoring-by-ns" DashboardNs = "openshift-config-managed" + REDFISH_ARGS = "-redfish-cred-file-path=/etc/redfish/redfish.csv" + REDFISH_CSV = "redfish.csv" + REDFISH_ANNOTATION = "kepler.system.sustainable.computing.io/redfish-secret-ref" + + IdxKeplerContainer = 0 ) var ( @@ -132,6 +137,19 @@ func NewDaemonSet(detail components.Detail, k *v1alpha1.KeplerInternal) *appsv1. } } +func MountRedfishSecretToDaemonSet(ds *appsv1.DaemonSet, secret *corev1.Secret) { + spec := ds.Spec.Template.Spec + spec.Containers[IdxKeplerContainer].Command = append(spec.Containers[IdxKeplerContainer].Command, REDFISH_ARGS) + spec.Containers[IdxKeplerContainer].VolumeMounts = append(spec.Containers[IdxKeplerContainer].VolumeMounts, + corev1.VolumeMount{Name: "redfish-cred", MountPath: "/etc/redfish", ReadOnly: true}) + spec.Volumes = append(spec.Volumes, + k8s.VolumeFromSecret("redfish-cred", secret.ObjectMeta.Name)) + ds.Spec.Template.Spec = spec + ds.Spec.Template.Annotations = map[string]string{ + REDFISH_ANNOTATION: secret.ResourceVersion, + } +} + func openshiftDashboardObjectMeta(name string) metav1.ObjectMeta { return metav1.ObjectMeta{ Name: name, @@ -377,6 +395,7 @@ func NewSCC(d components.Detail, ki *v1alpha1.KeplerInternal) *secv1.SecurityCon Users: []string{ki.FQServiceAccountName()}, Volumes: []secv1.FSType{ secv1.FSType("configMap"), + secv1.FSType("secret"), secv1.FSType("projected"), secv1.FSType("emptyDir"), secv1.FSType("hostPath")}, @@ -596,7 +615,6 @@ func newExporterContainer(kiName, dsName string, deployment v1alpha1.InternalExp "-enable-gpu=$(ENABLE_GPU)", "-v=$(KEPLER_LOG_LEVEL)", "-kernel-source-dir=/usr/share/kepler/kernel_sources", - "-redfish-cred-file-path=/etc/redfish/redfish.csv", }, Ports: []corev1.ContainerPort{{ ContainerPort: int32(deployment.Port), diff --git a/pkg/components/exporter/exporter_test.go b/pkg/components/exporter/exporter_test.go index f9425683..b0295357 100644 --- a/pkg/components/exporter/exporter_test.go +++ b/pkg/components/exporter/exporter_test.go @@ -93,41 +93,36 @@ func TestTolerations(t *testing.T) { } } -func TestHostPID(t *testing.T) { +func TestDaemonSet(t *testing.T) { tt := []struct { - spec v1alpha1.InternalExporterSpec - hostPID bool - scenario string + spec v1alpha1.InternalExporterSpec + hostPID bool + exporterCommand []string + volumeMounts []corev1.VolumeMount + volumes []corev1.Volume + scenario string + addRedfish bool + redfishSecret *corev1.Secret + annotation map[string]string }{ { - spec: v1alpha1.InternalExporterSpec{}, - hostPID: true, - scenario: "default case", - }, - } - - for _, tc := range tt { - tc := tc - t.Run(tc.scenario, func(t *testing.T) { - t.Parallel() - k := v1alpha1.KeplerInternal{ - Spec: v1alpha1.KeplerInternalSpec{ - Exporter: tc.spec, + spec: v1alpha1.InternalExporterSpec{ + Deployment: v1alpha1.InternalExporterDeploymentSpec{ + ExporterDeploymentSpec: v1alpha1.ExporterDeploymentSpec{ + Port: 9103, + }, }, - } - actual := k8s.HostPIDFromDS(NewDaemonSet(components.Full, &k)) - assert.Equal(t, actual, tc.hostPID) - }) - } -} -func TestVolumeMounts(t *testing.T) { - tt := []struct { - spec v1alpha1.InternalExporterSpec - volumeMounts []corev1.VolumeMount - scenario string - }{ - { - spec: v1alpha1.InternalExporterSpec{}, + }, + hostPID: true, + exporterCommand: []string{ + "/usr/bin/kepler", + "-address", + "0.0.0.0:9103", + "-enable-cgroup-id=true", + "-enable-gpu=$(ENABLE_GPU)", + "-v=$(KEPLER_LOG_LEVEL)", + "-kernel-source-dir=/usr/share/kepler/kernel_sources", + }, volumeMounts: []corev1.VolumeMount{ {Name: "lib-modules", MountPath: "/lib/modules", ReadOnly: true}, {Name: "tracing", MountPath: "/sys", ReadOnly: true}, @@ -135,40 +130,61 @@ func TestVolumeMounts(t *testing.T) { {Name: "proc", MountPath: "/proc"}, {Name: "cfm", MountPath: "/etc/kepler/kepler.config"}, }, + volumes: []corev1.Volume{ + k8s.VolumeFromHost("lib-modules", "/lib/modules"), + k8s.VolumeFromHost("tracing", "/sys"), + k8s.VolumeFromHost("proc", "/proc"), + k8s.VolumeFromHost("kernel-src", "/usr/src/kernels"), + k8s.VolumeFromConfigMap("cfm", "kepler-internal"), + }, scenario: "default case", }, - } - - for _, tc := range tt { - tc := tc - t.Run(tc.scenario, func(t *testing.T) { - t.Parallel() - k := v1alpha1.KeplerInternal{ - Spec: v1alpha1.KeplerInternalSpec{ - Exporter: tc.spec, - }, - } - actual := k8s.VolumeMountsFromDS(NewDaemonSet(components.Full, &k)) - assert.Equal(t, actual, tc.volumeMounts) - }) - } -} -func TestVolumes(t *testing.T) { - tt := []struct { - spec v1alpha1.InternalExporterSpec - volumes []corev1.Volume - scenario string - }{ { - spec: v1alpha1.InternalExporterSpec{}, + spec: v1alpha1.InternalExporterSpec{ + Deployment: v1alpha1.InternalExporterDeploymentSpec{ + ExporterDeploymentSpec: v1alpha1.ExporterDeploymentSpec{ + Port: 9103, + }, + }, + }, + hostPID: true, + exporterCommand: []string{ + "/usr/bin/kepler", + "-address", + "0.0.0.0:9103", + "-enable-cgroup-id=true", + "-enable-gpu=$(ENABLE_GPU)", + "-v=$(KEPLER_LOG_LEVEL)", + "-kernel-source-dir=/usr/share/kepler/kernel_sources", + "-redfish-cred-file-path=/etc/redfish/redfish.csv", + }, + volumeMounts: []corev1.VolumeMount{ + {Name: "lib-modules", MountPath: "/lib/modules", ReadOnly: true}, + {Name: "tracing", MountPath: "/sys", ReadOnly: true}, + {Name: "kernel-src", MountPath: "/usr/src/kernels", ReadOnly: true}, + {Name: "proc", MountPath: "/proc"}, + {Name: "cfm", MountPath: "/etc/kepler/kepler.config"}, + {Name: "redfish-cred", MountPath: "/etc/redfish", ReadOnly: true}, + }, volumes: []corev1.Volume{ k8s.VolumeFromHost("lib-modules", "/lib/modules"), k8s.VolumeFromHost("tracing", "/sys"), k8s.VolumeFromHost("proc", "/proc"), k8s.VolumeFromHost("kernel-src", "/usr/src/kernels"), k8s.VolumeFromConfigMap("cfm", "kepler-internal"), + k8s.VolumeFromSecret("redfish-cred", "my-redfish-secret"), }, - scenario: "default case", + addRedfish: true, + redfishSecret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-redfish-secret", + ResourceVersion: "123", + }, + }, + annotation: map[string]string{ + "kepler.system.sustainable.computing.io/redfish-secret-ref": "123", + }, + scenario: "redfish case", }, } @@ -184,8 +200,25 @@ func TestVolumes(t *testing.T) { Exporter: tc.spec, }, } - actual := k8s.VolumesFromDS(NewDaemonSet(components.Full, &k)) - assert.Equal(t, actual, tc.volumes) + ds := NewDaemonSet(components.Full, &k) + if tc.addRedfish { + MountRedfishSecretToDaemonSet(ds, tc.redfishSecret) + } + + actual_hostPID := k8s.HostPIDFromDS(ds) + assert.Equal(t, actual_hostPID, tc.hostPID) + + actual_exporterCommand := k8s.CommandFromDS(ds, IdxKeplerContainer) + assert.Equal(t, actual_exporterCommand, tc.exporterCommand) + + actual_volumeMounts := k8s.VolumeMountsFromDS(ds, IdxKeplerContainer) + assert.Equal(t, actual_volumeMounts, tc.volumeMounts) + + actual_Volumes := k8s.VolumesFromDS(ds) + assert.Equal(t, actual_Volumes, tc.volumes) + + actual_Annotation := k8s.AnnotationFromDS(ds) + assert.Equal(t, actual_Annotation, tc.annotation) }) } } diff --git a/pkg/controllers/kepler.go b/pkg/controllers/kepler.go index da9f4b86..649e567c 100644 --- a/pkg/controllers/kepler.go +++ b/pkg/controllers/kepler.go @@ -269,6 +269,7 @@ func newKeplerInternal(d components.Detail, k *v1alpha1.Kepler) *v1alpha1.Kepler Image: Config.Image, Namespace: KeplerDeploymentNS, }, + Redfish: k.Spec.Exporter.Redfish, }, OpenShift: v1alpha1.OpenShiftSpec{ Enabled: isOpenShift, diff --git a/pkg/controllers/kepler_internal.go b/pkg/controllers/kepler_internal.go index 137b04c0..b8371c3d 100644 --- a/pkg/controllers/kepler_internal.go +++ b/pkg/controllers/kepler_internal.go @@ -8,8 +8,10 @@ import ( "github.com/go-logr/logr" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/sustainable.computing.io/kepler-operator/pkg/api/v1alpha1" "github.com/sustainable.computing.io/kepler-operator/pkg/components" @@ -46,12 +48,12 @@ type KeplerInternalReconciler struct { //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=*,verbs=* // RBAC for running Kepler exporter -//+kubebuilder:rbac:groups=apps,resources=daemonsets;deployments,verbs=list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=apps,resources=daemonsets;deployments;secrets,verbs=list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=list;watch;create;update;patch;delete;use //+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=list;watch;create;update;patch;delete // RBAC required by Kepler exporter -//+kubebuilder:rbac:groups=core,resources=nodes/metrics;nodes/proxy;nodes/stats,verbs=get;list;watch +//+kubebuilder:rbac:groups=core,resources=nodes/metrics;nodes/proxy;nodes/stats;secrets,verbs=get;list;watch // SetupWithManager sets up the controller with the Manager. func (r *KeplerInternalReconciler) SetupWithManager(mgr ctrl.Manager) error { @@ -74,12 +76,40 @@ func (r *KeplerInternalReconciler) SetupWithManager(mgr ctrl.Manager) error { Owns(&rbacv1.ClusterRoleBinding{}, genChanged). Owns(&rbacv1.ClusterRole{}, genChanged) + c = c.Watches(&corev1.Secret{}, + handler.EnqueueRequestsFromMapFunc(r.mapSecretToRequests), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), + ) + if Config.Cluster == k8s.OpenShift { c = c.Owns(&secv1.SecurityContextConstraints{}, genChanged) } return c.Complete(r) } +// mapSecretToRequests returns the reconcile requests for kepler-internal objects for which an associated redfish secret has been changed. +func (r *KeplerInternalReconciler) mapSecretToRequests(ctx context.Context, object client.Object) []reconcile.Request { + + secret, _ := object.(*corev1.Secret) + + ks := v1alpha1.KeplerInternalList{} + if err := r.List(ctx, &ks); err != nil { + return nil + } + + requests := []reconcile.Request{} + for _, ki := range ks.Items { + ex := ki.Spec.Exporter + if ex.Redfish.SecretRef == secret.GetName() && + ex.Deployment.Namespace == secret.GetNamespace() { + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: ki.ObjectMeta.Name, Namespace: ki.ObjectMeta.Namespace}, + }) + } + } + return requests +} + // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by @@ -93,7 +123,7 @@ func (r *KeplerInternalReconciler) Reconcile(ctx context.Context, req ctrl.Reque logger := log.FromContext(ctx) r.logger = logger - logger.Info("Start of reconcile") + logger.Info("Start of reconcile") defer logger.Info("End of reconcile") ki, err := r.getInternal(ctx, req) @@ -123,7 +153,7 @@ func (r *KeplerInternalReconciler) Reconcile(ctx context.Context, req ctrl.Reque func (r KeplerInternalReconciler) runReconcilers(ctx context.Context, ki *v1alpha1.KeplerInternal) (ctrl.Result, error) { reconcilers := r.reconcilersForInternal(ki) - r.logger.V(6).Info("renconcilers ...", "count", len(reconcilers)) + r.logger.V(6).Info("reconcilers ...", "count", len(reconcilers)) return reconciler.Runner{ Reconcilers: reconcilers, @@ -414,33 +444,33 @@ func availableCondition(dset *appsv1.DaemonSet) v1alpha1.Condition { return c } -func (r KeplerInternalReconciler) reconcilersForInternal(k *v1alpha1.KeplerInternal) []reconciler.Reconciler { +func (r KeplerInternalReconciler) reconcilersForInternal(ki *v1alpha1.KeplerInternal) []reconciler.Reconciler { rs := []reconciler.Reconciler{} - cleanup := !k.DeletionTimestamp.IsZero() + cleanup := !ki.DeletionTimestamp.IsZero() if !cleanup { // NOTE: create namespace first and for deletion, reverse the order rs = append(rs, reconciler.Updater{ - Owner: k, - Resource: components.NewNamespace(k.Namespace()), + Owner: ki, + Resource: components.NewNamespace(ki.Namespace()), OnError: reconciler.Requeue, Logger: r.logger, }) } - if k.Spec.Estimator != nil { - if k.Spec.Estimator.Image == "" { - k.Spec.Estimator.Image = InternalConfig.EstimatorImage + if ki.Spec.Estimator != nil { + if ki.Spec.Estimator.Image == "" { + ki.Spec.Estimator.Image = InternalConfig.EstimatorImage } } - rs = append(rs, exporterReconcilers(k, Config.Cluster)...) + rs = append(rs, exporterReconcilers(ki, Config.Cluster)...) - if k.Spec.ModelServer != nil && k.Spec.ModelServer.Enabled { - if k.Spec.ModelServer.Image == "" { - k.Spec.ModelServer.Image = InternalConfig.ModelServerImage + if ki.Spec.ModelServer != nil && ki.Spec.ModelServer.Enabled { + if ki.Spec.ModelServer.Image == "" { + ki.Spec.ModelServer.Image = InternalConfig.ModelServerImage } - reconcilers, err := modelServerInternalReconcilers(k) + reconcilers, err := modelServerInternalReconcilers(ki) if err != nil { r.logger.Info(fmt.Sprintf("cannot init model server reconciler from config: %v", err)) } else { @@ -451,7 +481,7 @@ func (r KeplerInternalReconciler) reconcilersForInternal(k *v1alpha1.KeplerInter if cleanup { rs = append(rs, reconciler.Deleter{ OnError: reconciler.Requeue, - Resource: components.NewNamespace(k.Namespace()), + Resource: components.NewNamespace(ki.Namespace()), WaitTimeout: 2 * time.Minute, }) } @@ -459,7 +489,7 @@ func (r KeplerInternalReconciler) reconcilersForInternal(k *v1alpha1.KeplerInter // WARN: only run finalizer if theren't any errors // this bug 🐛 must be FIXED rs = append(rs, reconciler.Finalizer{ - Resource: k, + Resource: ki, Finalizer: Finalizer, Logger: r.logger, }) @@ -490,12 +520,27 @@ func exporterReconcilers(ki *v1alpha1.KeplerInternal, cluster k8s.Cluster) []rec // namespace scoped rs = append(rs, resourceReconcilers(updateResource, exporter.NewServiceAccount(ki), - exporter.NewConfigMap(components.Full, ki), - exporter.NewDaemonSet(components.Full, ki), exporter.NewService(ki), exporter.NewServiceMonitor(ki), exporter.NewPrometheusRule(ki), )...) + if ki.Spec.Exporter.Redfish == nil { + rs = append(rs, resourceReconcilers(updateResource, + exporter.NewDaemonSet(components.Full, ki), + exporter.NewConfigMap(components.Full, ki), + )...) + } else { + rs = append(rs, + reconciler.KeplerDaemonSetReconciler{ + Ki: *ki, + Ds: exporter.NewDaemonSet(components.Full, ki), + }, + reconciler.KeplerConfigMapReconciler{ + Ki: *ki, + Cfm: exporter.NewConfigMap(components.Full, ki), + }, + ) + } rs = append(rs, resourceReconcilers(updateResource, openshiftNamespacedResources(ki, cluster)...)...) return rs } diff --git a/pkg/reconciler/reconciler.go b/pkg/reconciler/reconciler.go index 81de3d00..8622bd21 100644 --- a/pkg/reconciler/reconciler.go +++ b/pkg/reconciler/reconciler.go @@ -18,8 +18,16 @@ package reconciler import ( "context" + "fmt" + "strconv" + "github.com/sustainable.computing.io/kepler-operator/pkg/api/v1alpha1" + "github.com/sustainable.computing.io/kepler-operator/pkg/components/exporter" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -44,3 +52,52 @@ type Result struct { type Reconciler interface { Reconcile(context.Context, client.Client, *runtime.Scheme) Result } + +type KeplerDaemonSetReconciler struct { + Ki v1alpha1.KeplerInternal + Ds *appsv1.DaemonSet +} + +func (r KeplerDaemonSetReconciler) Reconcile(ctx context.Context, cli client.Client, s *runtime.Scheme) Result { + + secretRef := r.Ki.Spec.Exporter.Redfish.SecretRef + secret, err := r.getRedfishSecret(ctx, cli, secretRef) + + if err != nil { + return Result{Action: Stop, Error: fmt.Errorf("Error occured while getting secret %w", err)} + } + if secret == nil { + return Result{Action: Stop, Error: fmt.Errorf("Redfish secret configured, but secret %q not found", secretRef)} + } + if _, ok := secret.Data[exporter.REDFISH_CSV]; !ok { + return Result{Action: Stop, Error: fmt.Errorf("Redfish secret does not contain \"redfish.csv\"")} + } + + exporter.MountRedfishSecretToDaemonSet(r.Ds, secret) + + return Updater{Owner: &r.Ki, Resource: r.Ds}.Reconcile(ctx, cli, s) +} + +func (r KeplerDaemonSetReconciler) getRedfishSecret(ctx context.Context, cli client.Client, secretName string) (*corev1.Secret, error) { + ns := r.Ki.Spec.Exporter.Deployment.Namespace + redfishSecret := corev1.Secret{} + if err := cli.Get(ctx, types.NamespacedName{Namespace: ns, Name: secretName}, &redfishSecret); err != nil { + return nil, client.IgnoreNotFound(err) + } + return &redfishSecret, nil +} + +type KeplerConfigMapReconciler struct { + Ki v1alpha1.KeplerInternal + Cfm *corev1.ConfigMap +} + +func (r KeplerConfigMapReconciler) Reconcile(ctx context.Context, cli client.Client, s *runtime.Scheme) Result { + rf := r.Ki.Spec.Exporter.Redfish + zero := metav1.Duration{} + if rf.ProbeInterval != zero { + r.Cfm.Data["REDFISH_PROBE_INTERVAL_IN_SECONDS"] = fmt.Sprintf("%f", rf.ProbeInterval.Duration.Seconds()) + } + r.Cfm.Data["REDFISH_SKIP_SSL_VERIFY"] = strconv.FormatBool(rf.SkipSSLVerify) + return Updater{Owner: &r.Ki, Resource: r.Cfm}.Reconcile(ctx, cli, s) +} diff --git a/pkg/utils/k8s/k8s.go b/pkg/utils/k8s/k8s.go index 56f68771..5160f6c8 100644 --- a/pkg/utils/k8s/k8s.go +++ b/pkg/utils/k8s/k8s.go @@ -23,6 +23,7 @@ import ( "github.com/sustainable.computing.io/kepler-operator/pkg/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -157,14 +158,35 @@ func HostPIDFromDS(ds *appsv1.DaemonSet) bool { return ds.Spec.Template.Spec.HostPID } -func VolumeMountsFromDS(ds *appsv1.DaemonSet) []corev1.VolumeMount { - return ds.Spec.Template.Spec.Containers[0].VolumeMounts +func CommandFromDS(ds *appsv1.DaemonSet, index int) []string { + return ds.Spec.Template.Spec.Containers[index].Command +} + +func AnnotationFromDS(ds *appsv1.DaemonSet) map[string]string { + return ds.Spec.Template.Annotations +} + +func VolumeMountsFromDS(ds *appsv1.DaemonSet, index int) []corev1.VolumeMount { + return ds.Spec.Template.Spec.Containers[index].VolumeMounts } func VolumesFromDS(ds *appsv1.DaemonSet) []corev1.Volume { return ds.Spec.Template.Spec.Volumes } +func VolumeFromSecret(name, secretName string) corev1.Volume { + return corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: secretName, + Optional: pointer.Bool(true), + DefaultMode: pointer.Int32(420), + }, + }, + } +} + func AllowsFromSCC(SCC *secv1.SecurityContextConstraints) SCCAllows { return SCCAllows{ AllowPrivilegedContainer: SCC.AllowPrivilegedContainer, diff --git a/pkg/utils/test/framework.go b/pkg/utils/test/framework.go index 3a5f8195..6c5aa3cf 100644 --- a/pkg/utils/test/framework.go +++ b/pkg/utils/test/framework.go @@ -193,6 +193,12 @@ func (f Framework) DeleteKepler(name string) { }) } +func (f Framework) GetKeplerInternal(name string) *v1alpha1.KeplerInternal { + kepler := v1alpha1.KeplerInternal{} + f.AssertResourceExists(name, "", &kepler) + return &kepler +} + func (f Framework) CreateInternal(name string, fns ...internalFn) *v1alpha1.KeplerInternal { ki := v1alpha1.KeplerInternal{ TypeMeta: metav1.TypeMeta{ diff --git a/pkg/utils/test/kepler_internal_builder.go b/pkg/utils/test/kepler_internal_builder.go index 6c208293..c1883568 100644 --- a/pkg/utils/test/kepler_internal_builder.go +++ b/pkg/utils/test/kepler_internal_builder.go @@ -72,3 +72,11 @@ func (InternalBuilder) WithCluster(c k8s.Cluster) func(k *v1alpha1.KeplerInterna } } } + +func (InternalBuilder) WithRedfish(c k8s.Cluster, secretName string) func(k *v1alpha1.KeplerInternal) { + return func(k *v1alpha1.KeplerInternal) { + k.Spec.Exporter.Redfish = &v1alpha1.RedfishSpec{ + SecretRef: secretName, + } + } +} diff --git a/tests/e2e/kepler_internal_test.go b/tests/e2e/kepler_internal_test.go index d3739a4e..63f6c08e 100644 --- a/tests/e2e/kepler_internal_test.go +++ b/tests/e2e/kepler_internal_test.go @@ -16,15 +16,22 @@ limitations under the License. package e2e import ( + "context" + "fmt" "testing" "time" "github.com/stretchr/testify/assert" "github.com/sustainable.computing.io/kepler-operator/pkg/api/v1alpha1" + "github.com/sustainable.computing.io/kepler-operator/pkg/components/exporter" "github.com/sustainable.computing.io/kepler-operator/pkg/controllers" + "github.com/sustainable.computing.io/kepler-operator/pkg/utils/k8s" "github.com/sustainable.computing.io/kepler-operator/pkg/utils/test" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" ) const ( @@ -63,6 +70,89 @@ func TestKeplerInternal_Reconciliation(t *testing.T) { f.AssertInternalStatus(ki.Name, test.Timeout(5*time.Minute)) } +func TestKeplerInternal_ReconciliationWithRedfish(t *testing.T) { + f := test.NewFramework(t) + name := "e2e-ki-redfish" + secretName := "my-redfish-secret" + // test namespace must be the deployment namespace for controller + // to watch the deployments / daemonsets etc + testNs := controllers.KeplerDeploymentNS + + // pre-condition + f.AssertNoResourceExists(name, "", &v1alpha1.KeplerInternal{}, test.NoWait()) + + // when + b := test.InternalBuilder{} + ki := f.CreateInternal(name, + b.WithNamespace(testNs), + b.WithExporterImage(keplerImage), + b.WithExporterPort(9108), + b.WithCluster(Cluster), + b.WithRedfish(Cluster, secretName), + ) + + // then the following resources will be created + f.AssertResourceExists(testNs, "", &corev1.Namespace{}) + + ds := appsv1.DaemonSet{} + f.AssertNoResourceExists(ki.Name, testNs, &ds) + + cond := f.GetKeplerInternal(name).Status.Exporter.Conditions + assert.True(t, len(cond) > 1) + assert.Equal(t, fmt.Sprintf("Redfish secret configured, but secret %q not found", secretName), cond[0].Message) + + // create redfish secret + redfishSecret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: controllers.KeplerDeploymentNS, + }, + Data: map[string][]byte{ + "redfish.csv": []byte("dummy"), + }, + } + err := f.Client().Create(context.TODO(), &redfishSecret) + assert.NoError(t, err) + + // wait for DaemonSet to be created + f.AssertResourceExists(ki.Name, testNs, &ds) + cond = f.GetKeplerInternal(name).Status.Exporter.Conditions + assert.True(t, len(cond) > 1) + + containers := ds.Spec.Template.Spec.Containers + assert.Equal(t, 1, len(containers)) + exp := containers[exporter.IdxKeplerContainer] + assert.Contains(t, exp.Command, exporter.REDFISH_ARGS) + assert.Contains(t, exp.VolumeMounts, + corev1.VolumeMount{Name: "redfish-cred", MountPath: "/etc/redfish", ReadOnly: true}) + assert.Contains(t, ds.Spec.Template.Spec.Volumes, + k8s.VolumeFromSecret("redfish-cred", redfishSecret.Name)) + assert.Contains(t, ds.Spec.Template.Annotations, exporter.REDFISH_ANNOTATION) + + og := ds.Status.ObservedGeneration + assert.Equal(t, og, int64(1)) + + redfishSecret.Data["redfish.csv"] = []byte("dummy2") + err = f.Client().Update(context.TODO(), &redfishSecret) + assert.NoError(t, err) + + // wait for DaemonSet to restart + ds = appsv1.DaemonSet{} + f.WaitUntil("Daemonset to restart", func() (bool, error) { + err := f.Client().Get(context.TODO(), + client.ObjectKey{Namespace: controllers.KeplerDeploymentNS, Name: ki.Name}, &ds) + if errors.IsNotFound(err) { + return false, nil + } else if err != nil { + return false, err + } + return ds.Status.ObservedGeneration == og+1, nil + }) + + // test expected status + f.AssertInternalStatus(ki.Name) +} + func TestKeplerInternal_WithEstimator(t *testing.T) { f := test.NewFramework(t) name := "e2e-ki-with-estimator" diff --git a/tests/run-e2e.sh b/tests/run-e2e.sh index 0f52c0af..2338e948 100755 --- a/tests/run-e2e.sh +++ b/tests/run-e2e.sh @@ -11,7 +11,7 @@ source "$PROJECT_ROOT/hack/utils.bash" declare -r LOCAL_BIN="$PROJECT_ROOT/tmp/bin" declare -r OPERATOR="kepler-operator" declare -r OLM_CATALOG="kepler-operator-catalog" -declare -r VERSION="0.0.0-e2e" +declare -r VERSION=${VERSION:-"0.0.0-e2e"} declare -r OPERATOR_DEPLOY_YAML="config/manager/manager.yaml" declare -r OPERATOR_CSV="bundle/manifests/$OPERATOR.clusterserviceversion.yaml" declare -r OPERATOR_DEPLOY_NAME="kepler-operator-controller" @@ -302,7 +302,7 @@ restart_operator() { info "scale down Operator" run kubectl scale -n "$OPERATORS_NS" --replicas=0 "$deployment" run kubectl wait -n "$OPERATORS_NS" --for=delete \ - pods -l app.kubernetes.io/component=operator --timeout=60s + pods -l app.kubernetes.io/component=manager --timeout=60s update_crds