diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh b/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh index 1f5c2e95c..fd38ff64c 100755 --- a/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh +++ b/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh @@ -164,6 +164,32 @@ EOF fi } +function applyWorkaroundForOlderOCPVersions () { + # workaround for OCP versions less than 4.16 + # AMD certified operator is published starting from OCP v4.16 + ocpVersion=$(oc version --output json | jq '.openshiftVersion' | tr -d '"') + IFS='.' read -ra ocpVersionSplit <<< "$ocpVersion" + if [ "${ocpVersionSplit[1]}" -lt 16 ]; then + echo "OCP Version: $ocpVersion" + echo "AMD Operator is not available for versions < 4.16, hence creating custom catalog source as workaround" + oc apply -f - </$imageUrl/g" $NFD_INSTANCE +oc apply -f "$NFD_INSTANCE" diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/nfd_deploy.yaml b/ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_deploy.yaml similarity index 85% rename from ods_ci/tasks/Resources/Provisioning/GPU/nfd_deploy.yaml rename to ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_deploy.yaml index 113980150..ffa3dd1f9 100644 --- a/ods_ci/tasks/Resources/Provisioning/GPU/nfd_deploy.yaml +++ b/ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_deploy.yaml @@ -7,8 +7,8 @@ spec: instance: "" # instance is empty by default topologyupdater: false # False by default operand: - # Image digest for registry.redhat.io/openshift4/ose-node-feature-discovery:v4.11 - image: registry.redhat.io/openshift4/ose-node-feature-discovery@sha256:d6242132d2ddec00c46d22b63015a33af821eace0150ba47d185cd992fee317d + # Image URL example: registry.redhat.io/openshift4/ose-node-feature-discovery:v4.11 + image: imagePullPolicy: Always workerConfig: configData: | diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/nfd_operator.yaml b/ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_operator.yaml similarity index 100% rename from ods_ci/tasks/Resources/Provisioning/GPU/nfd_operator.yaml rename to ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_operator.yaml diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh b/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh index 56934125a..3a53f327e 100755 --- a/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh +++ b/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh @@ -12,10 +12,9 @@ CSVNAME="$(oc get packagemanifests/gpu-operator-certified -n openshift-marketpla sed -i'' -e "0,/v1.11/s//$CHANNEL/g" "$GPU_INSTALL_DIR/gpu_install.yaml" oc apply -f "$GPU_INSTALL_DIR/gpu_install.yaml" -oc apply -f "$GPU_INSTALL_DIR/../nfd_operator.yaml" -echo "Wait for Nvidia GPU Operator Subscription, InstallPlan and Deployment to complete" +/bin/bash tasks/Resources/Provisioning/GPU/NFD/install_nfd.sh -oc wait --timeout=3m --for jsonpath='{.status.state}'=AtLatestKnown -n openshift-nfd sub nfd +echo "Wait for Nvidia GPU Operator Subscription, InstallPlan and Deployment to complete" oc wait --timeout=3m --for jsonpath='{.status.state}'=AtLatestKnown -n nvidia-gpu-operator sub gpu-operator-certified @@ -88,7 +87,6 @@ function rerun_accelerator_migration() { } wait_until_pod_ready_status "gpu-operator" -oc apply -f "$GPU_INSTALL_DIR/../nfd_deploy.yaml" oc get csv -n nvidia-gpu-operator "$CSVNAME" -o jsonpath='{.metadata.annotations.alm-examples}' | jq .[0] > clusterpolicy.json oc apply -f clusterpolicy.json wait_until_pod_ready_status "nvidia-device-plugin-daemonset" 600