Skip to content

Commit

Permalink
Add matrix test of CUDA 12.3/12.4 to 24.04 CI (#726)
Browse files Browse the repository at this point in the history
  • Loading branch information
yhtang authored Apr 16, 2024
1 parent f0c6772 commit 62f0aa5
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 16 deletions.
21 changes: 16 additions & 5 deletions .github/workflows/_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ on:
description: Build date in YYYY-MM-DD format
required: false
default: NOT SPECIFIED
CUDA_VERSION:
type: string
description: 'CUDA version to build the image for, e.g. 12.3.0, 12.4.0'
required: false
default: 12.4.0
outputs:
DOCKER_TAGS:
description: JSON object containing tags of all docker images built
Expand All @@ -27,6 +32,8 @@ jobs:
uses: ./.github/workflows/_build_base.yaml
with:
ARCHITECTURE: ${{ inputs.ARCHITECTURE }}
ARTIFACT_NAME: artifact-base-build-cuda${{ inputs.CUDA_VERSION }}
BASE_IMAGE: nvidia/cuda:${{ inputs.CUDA_VERSION }}-devel-ubuntu22.04
BUILD_DATE: ${{ inputs.BUILD_DATE }}
secrets: inherit

Expand All @@ -35,7 +42,7 @@ jobs:
uses: ./.github/workflows/_build.yaml
with:
ARCHITECTURE: ${{ inputs.ARCHITECTURE }}
ARTIFACT_NAME: artifact-jax-build
ARTIFACT_NAME: artifact-jax-build-cuda${{ inputs.CUDA_VERSION }}
BADGE_FILENAME: badge-jax-build
BUILD_DATE: ${{ inputs.BUILD_DATE }}
BASE_IMAGE: ${{ needs.build-base.outputs.DOCKER_TAG }}
Expand All @@ -49,7 +56,7 @@ jobs:
uses: ./.github/workflows/_build.yaml
with:
ARCHITECTURE: ${{ inputs.ARCHITECTURE }}
ARTIFACT_NAME: artifact-pax-build
ARTIFACT_NAME: artifact-pax-build-cuda${{ inputs.CUDA_VERSION }}
BADGE_FILENAME: badge-pax-build
BUILD_DATE: ${{ inputs.BUILD_DATE }}
BASE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_MEALKIT }}
Expand All @@ -62,6 +69,7 @@ jobs:
uses: ./.github/workflows/_build_rosetta.yaml
with:
ARCHITECTURE: ${{ inputs.ARCHITECTURE }}
ARTIFACT_NAME: artifact-rosetta-pax-build-cuda${{ inputs.CUDA_VERSION }}
BUILD_DATE: ${{ inputs.BUILD_DATE }}
BASE_IMAGE: ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_MEALKIT }}
BASE_LIBRARY: pax
Expand Down Expand Up @@ -124,7 +132,7 @@ jobs:
if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
uses: ./.github/workflows/_test_unit.yaml
with:
TEST_NAME: jax
TEST_NAME: jax-cuda${{ inputs.CUDA_VERSION }}
EXECUTE: |
docker run -i --shm-size=1g --gpus all \
${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }} \
Expand Down Expand Up @@ -155,6 +163,7 @@ jobs:
if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
uses: ./.github/workflows/_test_te.yaml
with:
ARTIFACT_PREFIX: te-${{ inputs.CUDA_VERSION }}
TE_IMAGE: ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_FINAL }}
secrets: inherit

Expand All @@ -163,7 +172,7 @@ jobs:
if: inputs.ARCHITECTURE == 'amd64' # triton doesn't support arm64(?)
uses: ./.github/workflows/_test_unit.yaml
with:
TEST_NAME: pallas
TEST_NAME: pallas-cuda${{ inputs.CUDA_VERSION }}
EXECUTE: |
docker run -i --shm-size=1g --gpus all --volume $PWD:/output \
${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }} \
Expand All @@ -189,7 +198,7 @@ jobs:
if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
uses: ./.github/workflows/_test_unit.yaml
with:
TEST_NAME: te
TEST_NAME: te-cuda${{ inputs.CUDA_VERSION }}
EXECUTE: |
docker run -i --gpus all --shm-size=1g -v $PWD:/log \
${{ needs.build-upstream-pax.outputs.DOCKER_TAG_FINAL }} \
Expand Down Expand Up @@ -217,6 +226,7 @@ jobs:
if: inputs.ARCHITECTURE == 'amd64' # no images for arm64
uses: ./.github/workflows/_test_upstream_pax.yaml
with:
ARTIFACT_NAME: artifact-upstream-pax-mgmn-test-cuda${{ inputs.CUDA_VERSION }}
PAX_IMAGE: ${{ needs.build-upstream-pax.outputs.DOCKER_TAG_FINAL }}
secrets: inherit

Expand All @@ -225,5 +235,6 @@ jobs:
if: inputs.ARCHITECTURE == 'amd64' # no images for arm64
uses: ./.github/workflows/_test_pax_rosetta.yaml
with:
ARTIFACT_NAME: artifact-rosetta-pax-mgmn-test-cuda${{ inputs.CUDA_VERSION }}
PAX_IMAGE: ${{ needs.build-rosetta-pax.outputs.DOCKER_TAG_FINAL }}
secrets: inherit
21 changes: 10 additions & 11 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,23 @@ jobs:
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d')
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT
amd64:
_ci:
strategy:
fail-fast: false
matrix:
CUDA_VERSION: [12.3.0, 12.4.0]
ARCHITECTURE: [amd64, arm64]
name: ${{ matrix.ARCHITECTURE }}-${{ matrix.CUDA_VERSION }}
needs: [metadata]
uses: ./.github/workflows/_ci.yaml
with:
ARCHITECTURE: amd64
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
secrets: inherit

arm64:
needs: [metadata]
uses: ./.github/workflows/_ci.yaml
with:
ARCHITECTURE: arm64
ARCHITECTURE: ${{ matrix.ARCHITECTURE }}
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
CUDA_VERSION: ${{ matrix.CUDA_VERSION }}
secrets: inherit

finalize:
needs: [metadata, amd64, arm64]
needs: [metadata, _ci]
if: "!cancelled()"
uses: ./.github/workflows/_finalize.yaml
with:
Expand Down

0 comments on commit 62f0aa5

Please sign in to comment.