Skip to content

Commit

Permalink
Re-enable a100 tests and benchmarks. (iree-org#17567)
Browse files Browse the repository at this point in the history
The a100 postsubmit runner is back online now, so we should be ready to
re-enable these jobs.

Follow-up to iree-org#17527 and
iree-org#17549

ci-extra: test_nvidia_a100
  • Loading branch information
ScottTodd authored Jun 4, 2024
1 parent 3803de5 commit 0467f48
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 67 deletions.
129 changes: 64 additions & 65 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -326,70 +326,69 @@ jobs:
./build_tools/scripts/check_vulkan.sh
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}"
# TODO: re-enable when a100 runners are available again
# test_nvidia_a100:
# needs: [setup, build_all]
# if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_a100')
# env:
# BUILD_DIR: build-tests
# INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
# INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
# INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
# IREE_CPU_DISABLE: 1
# IREE_VULKAN_DISABLE: 0
# IREE_CUDA_DISABLE: 0
# IREE_HIP_DISABLE: 1
# runs-on:
# - self-hosted # must come first
# - runner-group=${{ needs.setup.outputs.runner-group }}
# - environment=${{ needs.setup.outputs.runner-env }}
# - a100
# - os-family=Linux
# steps:
# - name: "Checking out repository"
# uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
# - name: "Checking out runtime submodules"
# run: ./build_tools/scripts/git/update_runtime_submodules.sh
# - name: Querying GPU information
# run: |
# ./build_tools/scripts/check_cuda.sh
# ./build_tools/scripts/check_vulkan.sh
# - name: "Downloading install dir archive"
# run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
# - name: "Extracting install directory"
# run: tar -xf "${INSTALL_DIR_ARCHIVE}"
# - name: "Building tests"
# run: |
# ./build_tools/github_actions/docker_run.sh \
# --env IREE_CPU_DISABLE \
# --env IREE_VULKAN_DISABLE \
# --env IREE_CUDA_DISABLE \
# --env IREE_HIP_DISABLE \
# gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
# ./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
# - name: "Running GPU tests"
# env:
# IREE_CTEST_LABEL_REGEX: ^requires-gpu-sm80|^requires-gpu|^driver=vulkan$|^driver=cuda$
# IREE_NVIDIA_SM80_TESTS_DISABLE: 0
# IREE_MULTI_DEVICE_TESTS_DISABLE: 1
# run: |
# ./build_tools/github_actions/docker_run.sh \
# --env IREE_VULKAN_DISABLE \
# --env IREE_CUDA_DISABLE \
# --env IREE_HIP_DISABLE \
# --env IREE_CTEST_LABEL_REGEX \
# --env IREE_NVIDIA_SM80_TESTS_DISABLE \
# --env IREE_MULTI_DEVICE_TESTS_DISABLE \
# --env IREE_VULKAN_F16_DISABLE=0 \
# --env IREE_NVIDIA_GPU_TESTS_DISABLE=0 \
# --env CTEST_PARALLEL_LEVEL=4 \
# --env NVIDIA_DRIVER_CAPABILITIES=all \
# --gpus all \
# gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
# bash -euo pipefail -c \
# "./build_tools/scripts/check_cuda.sh
# ./build_tools/scripts/check_vulkan.sh
# ./build_tools/cmake/ctest_all.sh ${BUILD_DIR}"
test_nvidia_a100:
needs: [setup, build_all]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_a100')
env:
BUILD_DIR: build-tests
INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
IREE_CPU_DISABLE: 1
IREE_VULKAN_DISABLE: 0
IREE_CUDA_DISABLE: 0
IREE_HIP_DISABLE: 1
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=${{ needs.setup.outputs.runner-env }}
- a100
- os-family=Linux
steps:
- name: "Checking out repository"
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
- name: "Checking out runtime submodules"
run: ./build_tools/scripts/git/update_runtime_submodules.sh
- name: Querying GPU information
run: |
./build_tools/scripts/check_cuda.sh
./build_tools/scripts/check_vulkan.sh
- name: "Downloading install dir archive"
run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
- name: "Extracting install directory"
run: tar -xf "${INSTALL_DIR_ARCHIVE}"
- name: "Building tests"
run: |
./build_tools/github_actions/docker_run.sh \
--env IREE_CPU_DISABLE \
--env IREE_VULKAN_DISABLE \
--env IREE_CUDA_DISABLE \
--env IREE_HIP_DISABLE \
gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
- name: "Running GPU tests"
env:
IREE_CTEST_LABEL_REGEX: ^requires-gpu-sm80|^requires-gpu|^driver=vulkan$|^driver=cuda$
IREE_NVIDIA_SM80_TESTS_DISABLE: 0
IREE_MULTI_DEVICE_TESTS_DISABLE: 1
run: |
./build_tools/github_actions/docker_run.sh \
--env IREE_VULKAN_DISABLE \
--env IREE_CUDA_DISABLE \
--env IREE_HIP_DISABLE \
--env IREE_CTEST_LABEL_REGEX \
--env IREE_NVIDIA_SM80_TESTS_DISABLE \
--env IREE_MULTI_DEVICE_TESTS_DISABLE \
--env IREE_VULKAN_F16_DISABLE=0 \
--env IREE_NVIDIA_GPU_TESTS_DISABLE=0 \
--env CTEST_PARALLEL_LEVEL=4 \
--env NVIDIA_DRIVER_CAPABILITIES=all \
--gpus all \
gcr.io/iree-oss/nvidia@sha256:82fa00b5cdda1b35634796cd0f88cb5d6d22d80328b94bfb51e5f2820598ba23 \
bash -euo pipefail -c \
"./build_tools/scripts/check_cuda.sh
./build_tools/scripts/check_vulkan.sh
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}"
test_amd_mi250:
needs: [setup, build_all]
Expand Down Expand Up @@ -920,7 +919,7 @@ jobs:

# Accelerators
- test_nvidia_gpu
# - test_nvidia_a100
- test_nvidia_a100
- test_amd_mi250
# - test_amd_w7900

Expand Down
2 changes: 0 additions & 2 deletions build_tools/github_actions/configure_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,6 @@ def contains(cls, val):
for preset in benchmark_presets.DEFAULT_PRESETS
# RISC-V benchmarks haven't been supported in CI workflow.
if preset not in [benchmark_presets.RISCV]
# CUDA benchmarks on CI depend on unreliable a100 runners.
and preset not in [benchmark_presets.CUDA]
] + ["comp-stats"]
DEFAULT_BENCHMARK_PRESET = "default"
LARGE_BENCHMARK_PRESET_GROUP = benchmark_presets.LARGE_PRESETS
Expand Down

0 comments on commit 0467f48

Please sign in to comment.