diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh b/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh index 4ff4b4d88..aa27ef97b 100755 --- a/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh +++ b/ods_ci/tasks/Resources/Provisioning/GPU/AMD/amd_operator.sh @@ -157,6 +157,32 @@ EOF fi } +function applyWorkaroundForOlderOCPVersions () { + # workaround for OCP versions less than 4.16 + # AMD certified operator is published starting from OCP v4.16 + ocpVersion=$(oc version --output json | jq '.openshiftVersion' | tr -d '"') + IFS='.' read -ra ocpVersionSplit <<< "$ocpVersion" + if [ "${ocpVersionSplit[1]}" -lt 16 ]; then + echo "OCP Version: $ocpVersion" + echo "AMD Operator is not available for versions < 4.16, hence creating custom catalog source as workaround" + oc apply -f - </$imageUrl/g" $NFD_INSTANCE +oc apply -f "$NFD_INSTANCE" diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/nfd_deploy.yaml b/ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_deploy.yaml similarity index 92% rename from ods_ci/tasks/Resources/Provisioning/GPU/nfd_deploy.yaml rename to ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_deploy.yaml index 4cb56c3af..ffa3dd1f9 100644 --- a/ods_ci/tasks/Resources/Provisioning/GPU/nfd_deploy.yaml +++ b/ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_deploy.yaml @@ -7,7 +7,8 @@ spec: instance: "" # instance is empty by default topologyupdater: false # False by default operand: - image: registry.redhat.io/openshift4/ose-node-feature-discovery:v4.11 + # Image URL example: registry.redhat.io/openshift4/ose-node-feature-discovery:v4.11 + image: imagePullPolicy: Always workerConfig: configData: | diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/nfd_operator.yaml b/ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_operator.yaml similarity index 100% rename from ods_ci/tasks/Resources/Provisioning/GPU/nfd_operator.yaml rename to ods_ci/tasks/Resources/Provisioning/GPU/NFD/nfd_operator.yaml diff --git a/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh b/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh index b45728d97..45c4731bf 100755 --- a/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh +++ b/ods_ci/tasks/Resources/Provisioning/GPU/NVIDIA/gpu_deploy.sh @@ -12,10 +12,9 @@ CSVNAME="$(oc get packagemanifests/gpu-operator-certified -n openshift-marketpla sed -i'' -e "0,/v1.11/s//$CHANNEL/g" -e "s/gpu-operator-certified.v1.11.0/$CSVNAME/g" "$GPU_INSTALL_DIR/gpu_install.yaml" oc apply -f "$GPU_INSTALL_DIR/gpu_install.yaml" -oc apply -f "$GPU_INSTALL_DIR/../nfd_operator.yaml" -echo "Wait for Nvidia GPU Operator Subscription, InstallPlan and Deployment to complete" +/bin/bash tasks/Resources/Provisioning/GPU/NFD/install_nfd.sh -oc wait --timeout=3m --for jsonpath='{.status.state}'=AtLatestKnown -n openshift-nfd sub nfd +echo "Wait for Nvidia GPU Operator Subscription, InstallPlan and Deployment to complete" oc wait --timeout=3m --for jsonpath='{.status.state}'=AtLatestKnown -n nvidia-gpu-operator sub gpu-operator-certified @@ -80,7 +79,6 @@ function rerun_accelerator_migration() { } wait_until_pod_ready_status "gpu-operator" -oc apply -f "$GPU_INSTALL_DIR/../nfd_deploy.yaml" oc get csv -n nvidia-gpu-operator "$CSVNAME" -o jsonpath='{.metadata.annotations.alm-examples}' | jq .[0] > clusterpolicy.json oc apply -f clusterpolicy.json wait_until_pod_ready_status "nvidia-device-plugin-daemonset"