Skip to content

CI: run MaxText tests on AWS with NGC release candidate images #108

CI: run MaxText tests on AWS with NGC release candidate images

CI: run MaxText tests on AWS with NGC release candidate images #108

Workflow file for this run

name: NCCL on Kubernetes
on:
schedule:
- cron: '30 8 * * *'
pull_request:
types:
- opened
- reopened
- ready_for_review
- synchronize
paths-ignore:
- '**.md'
workflow_dispatch:
inputs:
# Note that cuda-dl-base installs the NCCL tests, while the vanilla nvidia/cuda
# images do not; when JAX-Toolbox moves to using cuda-dl-base this workflow ought
# to be modified to test one of the JAX-Toolbox containers.
CONTAINER:
type: string
description: Container to test, this is assumed to already contain the NCCL tests e.g. cuda-dl-base or derived
default: ''
required: false
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
permissions:
actions: write # to cancel previous workflows
contents: read # to fetch code
packages: write # to upload container
jobs:
nccl-tests:
uses: ./.github/workflows/_test_nccl.yaml
with:
CONTAINER: ${{ inputs.CONTAINER || 'nvcr.io/nvidia/cuda-dl-base:24.12-cuda12.6-devel-ubuntu24.04' }}
secrets: inherit