diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml index 38ee71ded..77d921b54 100644 --- a/.github/workflows/_ci.yaml +++ b/.github/workflows/_ci.yaml @@ -11,6 +11,11 @@ on: description: Build date in YYYY-MM-DD format required: false default: NOT SPECIFIED + CUDA_VERSION: + type: string + description: 'CUDA version to build the image for, e.g. 12.3.0, 12.4.0' + required: false + default: 12.4.0 outputs: DOCKER_TAGS: description: JSON object containing tags of all docker images built @@ -27,6 +32,8 @@ jobs: uses: ./.github/workflows/_build_base.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} + ARTIFACT_NAME: artifact-base-build-cuda${{ inputs.CUDA_VERSION }} + BASE_IMAGE: nvidia/cuda:${{ inputs.CUDA_VERSION }}-devel-ubuntu22.04 BUILD_DATE: ${{ inputs.BUILD_DATE }} secrets: inherit @@ -35,7 +42,7 @@ jobs: uses: ./.github/workflows/_build.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} - ARTIFACT_NAME: artifact-jax-build + ARTIFACT_NAME: artifact-jax-build-cuda${{ inputs.CUDA_VERSION }} BADGE_FILENAME: badge-jax-build BUILD_DATE: ${{ inputs.BUILD_DATE }} BASE_IMAGE: ${{ needs.build-base.outputs.DOCKER_TAG }} @@ -49,7 +56,7 @@ jobs: uses: ./.github/workflows/_build.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} - ARTIFACT_NAME: artifact-pax-build + ARTIFACT_NAME: artifact-pax-build-cuda${{ inputs.CUDA_VERSION }} BADGE_FILENAME: badge-pax-build BUILD_DATE: ${{ inputs.BUILD_DATE }} BASE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_MEALKIT }} @@ -62,6 +69,7 @@ jobs: uses: ./.github/workflows/_build_rosetta.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} + ARTIFACT_NAME: artifact-rosetta-pax-build-cuda${{ inputs.CUDA_VERSION }} BUILD_DATE: ${{ inputs.BUILD_DATE }} BASE_IMAGE: ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_MEALKIT }} BASE_LIBRARY: pax @@ -124,7 +132,7 @@ jobs: if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a uses: ./.github/workflows/_test_unit.yaml with: - TEST_NAME: jax + TEST_NAME: jax-cuda${{ inputs.CUDA_VERSION }} EXECUTE: | docker run -i --shm-size=1g --gpus all \ ${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }} \ @@ -155,6 +163,7 @@ jobs: if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a uses: ./.github/workflows/_test_te.yaml with: + ARTIFACT_PREFIX: te-${{ inputs.CUDA_VERSION }} TE_IMAGE: ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_FINAL }} secrets: inherit @@ -163,7 +172,7 @@ jobs: if: inputs.ARCHITECTURE == 'amd64' # triton doesn't support arm64(?) uses: ./.github/workflows/_test_unit.yaml with: - TEST_NAME: pallas + TEST_NAME: pallas-cuda${{ inputs.CUDA_VERSION }} EXECUTE: | docker run -i --shm-size=1g --gpus all --volume $PWD:/output \ ${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }} \ @@ -189,7 +198,7 @@ jobs: if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a uses: ./.github/workflows/_test_unit.yaml with: - TEST_NAME: te + TEST_NAME: te-cuda${{ inputs.CUDA_VERSION }} EXECUTE: | docker run -i --gpus all --shm-size=1g -v $PWD:/log \ ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_FINAL }} \ @@ -217,6 +226,7 @@ jobs: if: inputs.ARCHITECTURE == 'amd64' # no images for arm64 uses: ./.github/workflows/_test_upstream_pax.yaml with: + ARTIFACT_NAME: artifact-upstream-pax-mgmn-test-cuda${{ inputs.CUDA_VERSION }} PAX_IMAGE: ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_FINAL }} secrets: inherit @@ -225,5 +235,6 @@ jobs: if: inputs.ARCHITECTURE == 'amd64' # no images for arm64 uses: ./.github/workflows/_test_pax_rosetta.yaml with: + ARTIFACT_NAME: artifact-rosetta-pax-mgmn-test-cuda${{ inputs.CUDA_VERSION }} PAX_IMAGE: ${{ needs.build-rosetta-pax.outputs.DOCKER_TAG_FINAL }} secrets: inherit diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index bea97353a..f35bdd5fe 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -27,24 +27,23 @@ jobs: BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT - amd64: + _ci: + strategy: + fail-fast: false + matrix: + CUDA_VERSION: [12.3.0, 12.4.0] + ARCHITECTURE: [amd64, arm64] + name: ${{ matrix.ARCHITECTURE }}-${{ matrix.CUDA_VERSION }} needs: [metadata] uses: ./.github/workflows/_ci.yaml with: - ARCHITECTURE: amd64 - BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} - secrets: inherit - - arm64: - needs: [metadata] - uses: ./.github/workflows/_ci.yaml - with: - ARCHITECTURE: arm64 + ARCHITECTURE: ${{ matrix.ARCHITECTURE }} BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} + CUDA_VERSION: ${{ matrix.CUDA_VERSION }} secrets: inherit finalize: - needs: [metadata, amd64, arm64] + needs: [metadata, _ci] if: "!cancelled()" uses: ./.github/workflows/_finalize.yaml with: