Skip to content

Commit

Permalink
feat: Provisioning for fw binaries
Browse files Browse the repository at this point in the history
Introduced two new CRDs: NicFirmwareSource and NicFirmwareTemplate
New controller reconciling NicFirmwareSources
firmware package with logic for provisioning firmware files on a shared storage
unit tests

Signed-off-by: Alexander Maslennikov <amaslennikov@nvidia.com>
  • Loading branch information
almaslennikov committed Jan 18, 2025
1 parent 26b9aa9 commit 3ff1374
Show file tree
Hide file tree
Showing 12 changed files with 1,934 additions and 3 deletions.
7 changes: 4 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ COPY ./ ./
#RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/maintenance-manager/main.go
RUN --mount=type=cache,target=/go/pkg/mod/ GO_GCFLAGS=${GCFLAGS} make build-manager

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot
FROM quay.io/centos/centos:stream9

RUN yum -y install mstflint && yum clean all

WORKDIR /
COPY --from=builder /workspace/build/manager .
COPY bindata /bindata
Expand Down
10 changes: 10 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (

configurationnetv1alpha1 "github.com/Mellanox/nic-configuration-operator/api/v1alpha1"
"github.com/Mellanox/nic-configuration-operator/internal/controller"
"github.com/Mellanox/nic-configuration-operator/pkg/firmware"
"github.com/Mellanox/nic-configuration-operator/pkg/ncolog"
"github.com/Mellanox/nic-configuration-operator/pkg/version"
//+kubebuilder:scaffold:imports
Expand Down Expand Up @@ -145,6 +146,15 @@ func main() {
setupLog.Error(err, "unable to create controller", "controller", "NicConfigurationTemplate")
os.Exit(1)
}

if err = (&controller.NicFirmwareSourceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
FirmwareProvisioner: firmware.NewFirmwareProvisioner(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NicFirmwareSource")
os.Exit(1)
}
//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
Expand Down
139 changes: 139 additions & 0 deletions internal/controller/nicfirmwaresource_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
2025 NVIDIA CORPORATION & AFFILIATES
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controller

import (
"context"

"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/Mellanox/nic-configuration-operator/api/v1alpha1"
"github.com/Mellanox/nic-configuration-operator/pkg/consts"
"github.com/Mellanox/nic-configuration-operator/pkg/firmware"
)

// NicFirmwareSourceReconciler reconciles a NicDevice object
type NicFirmwareSourceReconciler struct {
client.Client
Scheme *runtime.Scheme

FirmwareProvisioner firmware.FirmwareProvisioner
}

// Reconcile reconciles the NicFirmwareSource object
func (r *NicFirmwareSourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
// Fetch the HostDeviceNetwork instance
instance := &v1alpha1.NicFirmwareSource{}
err := r.Get(ctx, req.NamespacedName, instance)

// TODO use finalizers to clean up cache storage after CR deletion

if err != nil {
if errors.IsNotFound(err) {
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected.
// Return and don't requeue
return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}

cacheName := instance.Name

urlsToProcess, err := r.FirmwareProvisioner.VerifyCachedBinaries(cacheName, instance.Spec.BinUrlSource)
if err != nil {
if err = r.updateStatus(ctx, instance, consts.FirmwareSourceCacheVerificationFailedStatus, err, nil); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, err
}
if len(urlsToProcess) == 0 {
if err = r.updateStatus(ctx, instance, consts.FirmwareSourceProcessingStatus, nil, nil); err != nil {
return reconcile.Result{}, err
}

return r.ValidateCache(ctx, instance)
}

if err = r.updateStatus(ctx, instance, consts.FirmwareSourceDownloadingStatus, nil, nil); err != nil {
return reconcile.Result{}, err
}

err = r.FirmwareProvisioner.DownloadAndUnzipFirmwareArchives(cacheName, urlsToProcess, true)
if err != nil {
if err = r.updateStatus(ctx, instance, consts.FirmwareSourceDownloadFailedStatus, err, nil); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, err
}

if err = r.updateStatus(ctx, instance, consts.FirmwareSourceProcessingStatus, nil, nil); err != nil {
return reconcile.Result{}, err
}

err = r.FirmwareProvisioner.AddFirmwareBinariesToCacheByMetadata(cacheName)
if err != nil {
if err = r.updateStatus(ctx, instance, consts.FirmwareSourceProcessingFailedStatus, err, nil); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, err
}

return r.ValidateCache(ctx, instance)
}

func (r *NicFirmwareSourceReconciler) ValidateCache(ctx context.Context, instance *v1alpha1.NicFirmwareSource) (reconcile.Result, error) {
versions, err := r.FirmwareProvisioner.ValidateCache(instance.Name)
if err != nil {
if err = r.updateStatus(ctx, instance, consts.FirmwareSourceProcessingFailedStatus, err, nil); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, err
}

if err = r.updateStatus(ctx, instance, consts.FirmwareSourceSuccessStatus, nil, versions); err != nil {
return reconcile.Result{}, err
}

return ctrl.Result{}, nil
}

func (r *NicFirmwareSourceReconciler) updateStatus(ctx context.Context, obj *v1alpha1.NicFirmwareSource, status string, err error, versions map[string][]string) error {
obj.Status.State = status
if err != nil {
obj.Status.Reason = err.Error()
} else {
obj.Status.Reason = ""
}

obj.Status.Versions = versions
return r.Status().Update(ctx, obj)
}

// SetupWithManager sets up the controller with the Manager.
func (r *NicFirmwareSourceReconciler) SetupWithManager(mgr ctrl.Manager) error {
controller := ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.NicFirmwareSource{})

return controller.
Named("nicFirmwareSourceReconciler").
Complete(r)
}
Loading

0 comments on commit 3ff1374

Please sign in to comment.