diff --git a/.github/workflows/pkgci.yml b/.github/workflows/pkgci.yml index 369d6b3c9e58..a2fad6584480 100644 --- a/.github/workflows/pkgci.yml +++ b/.github/workflows/pkgci.yml @@ -37,35 +37,11 @@ jobs: with: package_version: 0.dev1 - regression_test_cpu: - name: Regression Test CPU + regression_test: + name: Regression Test needs: [setup, build_packages] - if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test_cpu') - uses: ./.github/workflows/pkgci_regression_test_cpu.yml - - regression_test_amdgpu_vulkan: - name: Regression Test AMDGPU-Vulkan - needs: [setup, build_packages] - if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test_amdgpu_vulkan') - uses: ./.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml - - regression_test_amdgpu_rocm: - name: Regression Test AMDGPU-ROCm - needs: [setup, build_packages] - if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test_amdgpu_rocm') - uses: ./.github/workflows/pkgci_regression_test_amdgpu_rocm.yml - - regression_test_nvidiagpu_vulkan: - name: Regression Test NVIDIAGPU-Vulkan - needs: [setup, build_packages] - if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test_nvidiagpu_vulkan') - uses: ./.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml - - regression_test_nvidiagpu_cuda: - name: Regression Test NVIDIAGPU-CUDA - needs: [setup, build_packages] - if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test_nvidiagpu_cuda') - uses: ./.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml + if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test') + uses: ./.github/workflows/pkgci_regression_test.yml test_tensorflow_cpu: name: Test TensorFlow CPU diff --git a/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml b/.github/workflows/pkgci_regression_test.yml similarity index 53% rename from .github/workflows/pkgci_regression_test_amdgpu_rocm.yml rename to .github/workflows/pkgci_regression_test.yml index df9d36c67eb9..925209c54f59 100644 --- a/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml +++ b/.github/workflows/pkgci_regression_test.yml @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -name: PkgCI Regression Test (AMDGPU ROCm) +name: PkgCI Regression Test on: workflow_call: inputs: @@ -18,12 +18,54 @@ on: default: "" jobs: - linux_x86_64: - name: Linux (x86_64) - runs-on: nodai-amdgpu-w7900-x86-64 + test_onnx: + name: "test_onnx :: ${{ matrix.name }}" + runs-on: ${{ matrix.runs-on }} + strategy: + fail-fast: false + matrix: + include: + # CPU + - name: cpu_llvm_sync + config-file: onnx_cpu_llvm_sync.json + numprocesses: auto + runs-on: ubuntu-20.04 + + # AMD GPU + - name: amdgpu_rocm_rdna3 + numprocesses: 1 + config-file: onnx_gpu_rocm_rdna3.json + runs-on: nodai-amdgpu-w7900-x86-64 + - name: amdgpu_vulkan + numprocesses: 4 + config-file: onnx_gpu_vulkan.json + runs-on: nodai-amdgpu-w7900-x86-64 + + # NVIDIA GPU + - name: nvidiagpu_cuda + config-file: onnx_gpu_cuda.json + numprocesses: 4 + runs-on: + - self-hosted # must come first + - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} + - environment=prod + - gpu # TODO(scotttodd): qualify further with vendor/model + - os-family=Linux + - name: nvidiagpu_vulkan + config-file: onnx_gpu_vulkan.json + numprocesses: 4 + runs-on: + - self-hosted # must come first + - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} + - environment=prod + - gpu # TODO(scotttodd): qualify further with vendor/model + - os-family=Linux env: PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts + CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.config-file }} + NUMPROCESSES: ${{ matrix.numprocesses }} + LOG_FILE_PATH: /tmp/iree_tests_onnx_${{ matrix.name }}_logs.json VENV_DIR: ${{ github.workspace }}/venv steps: - name: Checking out IREE repository @@ -44,63 +86,88 @@ jobs: --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ --fetch-gh-workflow=${{ inputs.artifact_run_id }} - # TODO(#17344): regenerate .mlirbc files - # # In-tree tests - # - name: Run experimental/regression_suite tests - # run: | - # source ${VENV_DIR}/bin/activate - # export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/hip/lib - # pytest \ - # -rA -s -m "plat_rdna3_rocm and presubmit" \ - # experimental/regression_suite - - # Out of tree tests - # TODO(scotttodd): Increase parallelism when supported by the HIP HAL - # driver and/or test runner machine. - - name: Checking out external TestSuite repository + - name: Check out external TestSuite repository uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: repository: nod-ai/SHARK-TestSuite ref: c9b3337e1f754c83d178568be1339aaef5f08045 path: SHARK-TestSuite submodules: false - - name: Installing external TestSuite Python requirements + lfs: false + - name: Install external TestSuite Python requirements run: | source ${VENV_DIR}/bin/activate python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt + - name: Run external tests - ONNX test suite run: | source ${VENV_DIR}/bin/activate - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/hip/lib pytest SHARK-TestSuite/iree_tests/onnx/ \ - -rpfE --timeout=30 --durations=20 \ - --config-files=build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json \ + -rpfE \ + --numprocesses ${NUMPROCESSES} \ + --timeout=30 \ + --durations=20 \ --no-skip-tests-missing-files \ - --report-log=/tmp/iree_tests_onnx_gpu_rocm_rdna3_logs.json + --config-files=${CONFIG_FILE_PATH} \ + --report-log=${LOG_FILE_PATH} - name: "Updating config file with latest XFAIL lists" if: failure() run: | source ${VENV_DIR}/bin/activate python SHARK-TestSuite/iree_tests/update_config_xfails.py \ - --log-file=/tmp/iree_tests_onnx_gpu_rocm_rdna3_logs.json \ - --config-file=build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json - cat build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json + --log-file=${LOG_FILE_PATH} \ + --config-file=${CONFIG_FILE_PATH} + cat ${CONFIG_FILE_PATH} - name: "Uploading new config file" if: failure() uses: actions/upload-artifact@v4 with: - name: "onnx_gpu_rocm_rdna3.json" - path: "build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json" + name: "${{ matrix.config-file }}" + path: "${CONFIG_FILE_PATH}" - linux_x86_64_rocm_models: - name: MI250 - Models - runs-on: nodai-amdgpu-mi250-x86-64 + test_models: + name: "test_models :: ${{ matrix.name }}" + runs-on: ${{ matrix.runs-on }} + strategy: + fail-fast: false + + # Note: these jobs should use persistent runners with local caches. + # Downloading test files (50GB+) without a cache can take 20+ minutes. + matrix: + include: + # CPU + - name: cpu_llvm_task + models-config-file: pytorch_models_cpu_llvm_task.json + sdxl-config-file: sdxl_scheduled_unet_cpu_llvm_task.json + runs-on: nodai-amdgpu-w7900-x86-64 + + # AMD GPU + - name: amdgpu_rocm_gfx90a + models-config-file: pytorch_models_gpu_rocm_gfx90a.json + models-extra-flags-config-file: pytorch_models_gpu_rocm_gfx90a_additional_flags.json + sdxl-config-file: sdxl_scheduled_unet_gpu_rocm_gfx90a.json + runs-on: nodai-amdgpu-mi250-x86-64 + - name: amdgpu_vulkan + models-config-file: pytorch_models_gpu_vulkan.json + runs-on: nodai-amdgpu-w7900-x86-64 + + # NVIDIA GPU + # None at the moment. Could maybe use the persistent a100 runners: + # - self-hosted # must come first + # - runner-group=${{ needs.setup.outputs.runner-group }} + # - environment=${{ needs.setup.outputs.runner-env }} + # - a100 + # - os-family=Linux + # (note: would need to plumb the presubmit/postsubmit runner-group through to here too) env: PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv IREE_TEST_FILES: ~/iree_tests_cache IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite + MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }} + MODELS_EXTRA_FLAGS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-extra-flags-config-file }} + SDXL_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-config-file }} + VENV_DIR: ${{ github.workspace }}/venv steps: - name: Checking out IREE repository uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 @@ -120,6 +187,15 @@ jobs: --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ --fetch-gh-workflow=${{ inputs.artifact_run_id }} + # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm + # # In-tree tests + # - name: Run experimental/regression_suite tests + # run: | + # source ${VENV_DIR}/bin/activate + # pytest \ + # -rA -s -m "plat_host_cpu and presubmit" \ + # experimental/regression_suite + # Out of tree tests - name: Check out external TestSuite repository uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 @@ -137,21 +213,23 @@ jobs: run: | source ${VENV_DIR}/bin/activate python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir pytorch/models - - name: Run external tests - Models with real weights + + - name: Run external tests - models with real weights + if: "matrix.models-config-file != '' && !cancelled()" run: | source ${VENV_DIR}/bin/activate pytest SHARK-TestSuite/iree_tests/pytorch/models \ -rpfE \ -k real_weights \ --no-skip-tests-missing-files \ - --log-cli-level=info \ --capture=no \ + --log-cli-level=info \ --timeout=1200 \ - --retries 2 \ - --retry-delay 5 \ --durations=0 \ - --config-files=build_tools/pkgci/external_test_suite/gpu_rocm_models_gfx90a.json - - name: Run external tests - Models with real weights and additional flags + --config-files=${MODELS_CONFIG_FILE_PATH} + + - name: Run external tests - models with real weights and additional flags + if: "matrix.models-extra-flags-config-file != '' && !cancelled()" run: | source ${VENV_DIR}/bin/activate pytest SHARK-TestSuite/iree_tests/pytorch/models \ @@ -161,25 +239,25 @@ jobs: --capture=no \ --log-cli-level=info \ --timeout=1200 \ - --retries 2 \ - --retry-delay 5 \ --durations=0 \ - --config-files=build_tools/pkgci/external_test_suite/gpu_rocm_models_additional_flags_gfx90a.json - - name: "Running real weight model tests scheduled unet" + --config-files=${MODELS_EXTRA_FLAGS_CONFIG_FILE_PATH} + + - name: "Run external tests - SDXL scheduled unet" + if: "matrix.sdxl-config-file != '' && !cancelled()" run: | source ${VENV_DIR}/bin/activate pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-scheduled-unet-3-tank \ -rpfE \ -k real_weights \ --no-skip-tests-missing-files \ - --log-cli-level=info \ --capture=no \ + --log-cli-level=info \ --timeout=1200 \ - --retries 2 \ - --retry-delay 5 \ --durations=0 \ - --config-files=build_tools/pkgci/external_test_suite/sdxl_scheduled_unet_gpu_rocm_gfx90a.json + --config-files=${SDXL_CONFIG_FILE_PATH} + - name: "Running SDXL rocm pipeline benchmark" + if: contains(matrix.name, 'rocm') run: | source ${VENV_DIR}/bin/activate bash SHARK-TestSuite/iree_tests/benchmarks/benchmark_sdxl_rocm.sh diff --git a/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml b/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml deleted file mode 100644 index 010c2e1f2a8e..000000000000 --- a/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -name: PkgCI Regression Test (AMDGPU Vulkan) -on: - workflow_call: - inputs: - artifact_run_id: - type: string - default: "" - workflow_dispatch: - inputs: - artifact_run_id: - type: string - default: "" - -jobs: - linux_x86_64: - name: Linux (x86_64) - runs-on: nodai-amdgpu-w7900-x86-64 - env: - PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages - IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv - steps: - - name: Checking out IREE repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - submodules: false - - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 - with: - # Must match the subset of versions built in pkgci_build_packages. - python-version: '3.11' - - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 - with: - name: linux_x86_64_release_packages - path: ${{ env.PACKAGE_DOWNLOAD_DIR }} - - name: Setup venv - run: | - ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ - --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ - --fetch-gh-workflow=${{ inputs.artifact_run_id }} - - # TODO(#17344): regenerate .mlirbc files - # # In-tree tests - # - name: Run experimental/regression_suite tests - # run: | - # source ${VENV_DIR}/bin/activate - # pytest \ - # -rA -s -m "plat_rdna3_vulkan and presubmit" \ - # experimental/regression_suite - - # Out of tree tests - - name: Checking out external TestSuite repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - repository: nod-ai/SHARK-TestSuite - ref: c9b3337e1f754c83d178568be1339aaef5f08045 - path: SHARK-TestSuite - submodules: false - - name: Installing external TestSuite Python requirements - run: | - source ${VENV_DIR}/bin/activate - python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt - - name: Run external tests - ONNX test suite - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/onnx/ \ - -n 4 -rpfE --timeout=30 --durations=20 \ - --config-files=build_tools/pkgci/external_test_suite/onnx_gpu_vulkan.json \ - --no-skip-tests-missing-files - - # Note: this is a persistent runner with a local cache. Downloading all input - # files (50GB+) without a cache can take 20+ minutes. - linux_x86_64_models: - name: Linux (x86_64) Model Testing - runs-on: nodai-amdgpu-w7900-x86-64 - env: - PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages - IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv - IREE_TEST_FILES: ~/iree_tests_cache - steps: - - name: Checking out IREE repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - submodules: false - - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 - with: - # Must match the subset of versions built in pkgci_build_packages. - python-version: '3.11' - - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 - with: - name: linux_x86_64_release_packages - path: ${{ env.PACKAGE_DOWNLOAD_DIR }} - - name: Setup venv - run: | - ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ - --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ - --fetch-gh-workflow=${{ inputs.artifact_run_id }} - - # Out of tree tests - - name: Check out external TestSuite repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - repository: nod-ai/SHARK-TestSuite - ref: c9b3337e1f754c83d178568be1339aaef5f08045 - path: SHARK-TestSuite - submodules: false - lfs: true - - name: Install external TestSuite Python requirements - run: | - source ${VENV_DIR}/bin/activate - python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt - - name: Download remote files for real weight model tests - run: | - source ${VENV_DIR}/bin/activate - python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir pytorch/models - - name: Run external tests - Models with real weights - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/pytorch/models -k real_weights \ - -rpfxE --timeout=600 --durations=0 \ - --log-cli-level=info \ - --config-files=build_tools/pkgci/external_test_suite/pytorch_models_gpu_vulkan.json \ - --no-skip-tests-missing-files \ - --report-log=/tmp/iree_tests_pytorch_models_gpu_vulkan_logs.json - - name: "Updating config file with latest XFAIL lists" - if: failure() - run: | - source ${VENV_DIR}/bin/activate - python SHARK-TestSuite/iree_tests/update_config_xfails.py \ - --log-file=/tmp/iree_tests_pytorch_models_gpu_vulkan_logs.json \ - --config-file=build_tools/pkgci/external_test_suite/pytorch_models_gpu_vulkan.json - cat build_tools/pkgci/external_test_suite/pytorch_models_gpu_vulkan.json - - name: "Uploading new config file" - if: failure() - uses: actions/upload-artifact@v4 - with: - name: "pytorch_models_gpu_vulkan.json" - path: "build_tools/pkgci/external_test_suite/pytorch_models_gpu_vulkan.json" diff --git a/.github/workflows/pkgci_regression_test_cpu.yml b/.github/workflows/pkgci_regression_test_cpu.yml deleted file mode 100644 index c8c029dc89c3..000000000000 --- a/.github/workflows/pkgci_regression_test_cpu.yml +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -name: PkgCI Regression Test (CPU) -on: - workflow_call: - inputs: - artifact_run_id: - type: string - default: "" - workflow_dispatch: - inputs: - artifact_run_id: - type: string - default: "" - -jobs: - linux_x86_64: - name: Linux (x86_64) - runs-on: ubuntu-20.04 - env: - PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages - IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv - steps: - - name: Checking out IREE repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - submodules: false - - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 - with: - # Must match the subset of versions built in pkgci_build_packages. - python-version: '3.11' - - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 - with: - name: linux_x86_64_release_packages - path: ${{ env.PACKAGE_DOWNLOAD_DIR }} - - name: Setup venv - run: | - ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ - --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ - --fetch-gh-workflow=${{ inputs.artifact_run_id }} - - name: Check out external TestSuite repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - repository: nod-ai/SHARK-TestSuite - ref: c9b3337e1f754c83d178568be1339aaef5f08045 - path: SHARK-TestSuite - submodules: false - lfs: true - - name: Install external TestSuite Python requirements - run: | - source ${VENV_DIR}/bin/activate - python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt - - name: Run external tests - ONNX test suite - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/onnx/ \ - -n auto -rpfE --timeout=30 --durations=20 \ - --config-files=build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json \ - --no-skip-tests-missing-files \ - --report-log=/tmp/iree_tests_onnx_cpu_llvm_sync_logs.json - - name: "Updating config file with latest XFAIL lists" - if: failure() - run: | - source ${VENV_DIR}/bin/activate - python SHARK-TestSuite/iree_tests/update_config_xfails.py \ - --log-file=/tmp/iree_tests_onnx_cpu_llvm_sync_logs.json \ - --config-file=build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json - cat build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json - - name: "Uploading new config file" - if: failure() - uses: actions/upload-artifact@v4 - with: - name: "onnx_cpu_llvm_sync.json" - path: "build_tools/pkgci/external_test_suite/onnx_cpu_llvm_sync.json" - - # Note: this is a persistent runner with a local cache. Downloading all input - # files (50GB+) without a cache can take 20+ minutes. - linux_x86_64_models: - name: Linux (x86_64) Model Testing - runs-on: nodai-amdgpu-w7900-x86-64 - env: - PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages - IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv - IREE_TEST_FILES: ~/iree_tests_cache - steps: - - name: Checking out IREE repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - submodules: false - - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 - with: - # Must match the subset of versions built in pkgci_build_packages. - python-version: '3.11' - - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 - with: - name: linux_x86_64_release_packages - path: ${{ env.PACKAGE_DOWNLOAD_DIR }} - - name: Setup venv - run: | - ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ - --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ - --fetch-gh-workflow=${{ inputs.artifact_run_id }} - - # TODO(#17344): regenerate .mlirbc files - # # In-tree tests - # - name: Run experimental/regression_suite tests - # run: | - # source ${VENV_DIR}/bin/activate - # pytest \ - # -rA -s -m "plat_host_cpu and presubmit" \ - # experimental/regression_suite - - # Out of tree tests - - name: Check out external TestSuite repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - repository: nod-ai/SHARK-TestSuite - ref: c9b3337e1f754c83d178568be1339aaef5f08045 - path: SHARK-TestSuite - submodules: false - lfs: true - - name: Install external TestSuite Python requirements - run: | - source ${VENV_DIR}/bin/activate - python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt - - name: Download remote files for real weight model tests - run: | - source ${VENV_DIR}/bin/activate - python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir pytorch/models - - name: Run external tests - Models with real weights - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/pytorch/models \ - -rpfE \ - -k real_weights \ - --no-skip-tests-missing-files \ - --capture=no \ - --log-cli-level=info \ - --timeout=1200 \ - --retries 2 \ - --retry-delay 5 \ - --durations=0 \ - --config-files=build_tools/pkgci/external_test_suite/pytorch_models_cpu_llvm_task.json - - name: "Running real weight model tests scheduled unet cpu" - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-scheduled-unet-3-tank \ - -rpfE \ - -k real_weights \ - --no-skip-tests-missing-files \ - --capture=no \ - --log-cli-level=info \ - --timeout=1200 \ - --retries 2 \ - --retry-delay 5 \ - --durations=0 \ - --config-files=build_tools/pkgci/external_test_suite/sdxl_scheduled_unet_cpu_llvm_task.json diff --git a/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml b/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml deleted file mode 100644 index cd82195e9c32..000000000000 --- a/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -name: PkgCI Regression Test (NVIDIA GPU CUDA) -on: - workflow_call: - inputs: - artifact_run_id: - type: string - default: "" - workflow_dispatch: - inputs: - artifact_run_id: - type: string - default: "" - -jobs: - linux_x86_64: - name: Linux (x86_64) - runs-on: - - self-hosted # must come first - - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} - - environment=prod - - gpu # TODO(scotttodd): qualify further with vendor/model - - os-family=Linux - env: - PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages - IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv - steps: - - name: Checking out IREE repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - submodules: false - - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 - with: - # Must match the subset of versions built in pkgci_build_packages. - python-version: '3.11' - - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 - with: - name: linux_x86_64_release_packages - path: ${{ env.PACKAGE_DOWNLOAD_DIR }} - - name: Setup venv - run: | - ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ - --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ - --fetch-gh-workflow=${{ inputs.artifact_run_id }} - - # Note: in-tree tests are omitted from this file. We could run tests - # marked for CUDA and 'plat_nvidia_[t4,a100]' (matching the runner used) - - # Out of tree tests - - name: Checking out external TestSuite repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - repository: nod-ai/SHARK-TestSuite - ref: c9b3337e1f754c83d178568be1339aaef5f08045 - path: SHARK-TestSuite - submodules: false - - name: Installing external TestSuite Python requirements - run: | - source ${VENV_DIR}/bin/activate - python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt - - name: Run external tests - ONNX test suite - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/onnx/ \ - -n 4 -rpfE --timeout=30 --durations=20 \ - --config-files=build_tools/pkgci/external_test_suite/onnx_gpu_cuda.json \ - --no-skip-tests-missing-files \ - --report-log=/tmp/iree_tests_onnx_gpu_cuda_logs.json - - name: "Updating config file with latest XFAIL lists" - if: failure() - run: | - source ${VENV_DIR}/bin/activate - python SHARK-TestSuite/iree_tests/update_config_xfails.py \ - --log-file=/tmp/iree_tests_onnx_gpu_cuda_logs.json \ - --config-file=build_tools/pkgci/external_test_suite/onnx_gpu_cuda.json - cat build_tools/pkgci/external_test_suite/onnx_gpu_cuda.json - - name: "Uploading new config file" - if: failure() - uses: actions/upload-artifact@v4 - with: - name: "onnx_gpu_cuda.json" - path: "build_tools/pkgci/external_test_suite/onnx_gpu_cuda.json" diff --git a/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml b/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml deleted file mode 100644 index 5a15315f922e..000000000000 --- a/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -name: PkgCI Regression Test (NVIDIA GPU Vulkan) -on: - workflow_call: - inputs: - artifact_run_id: - type: string - default: "" - workflow_dispatch: - inputs: - artifact_run_id: - type: string - default: "" - -jobs: - linux_x86_64: - name: Linux (x86_64) - runs-on: - - self-hosted # must come first - - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} - - environment=prod - - gpu # TODO(scotttodd): qualify further with vendor/model - - os-family=Linux - env: - PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages - IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts - VENV_DIR: ${{ github.workspace }}/venv - steps: - - name: Checking out IREE repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - submodules: false - - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 - with: - # Must match the subset of versions built in pkgci_build_packages. - python-version: '3.11' - - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 - with: - name: linux_x86_64_release_packages - path: ${{ env.PACKAGE_DOWNLOAD_DIR }} - - name: Setup venv - run: | - ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ - --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ - --fetch-gh-workflow=${{ inputs.artifact_run_id }} - - # Note: in-tree tests are omitted from this file. We could run tests - # marked for Vulkan and 'plat_nvidia_[t4,a100]' (matching the runner used) - - # Out of tree tests - - name: Checking out external TestSuite repository - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - with: - repository: nod-ai/SHARK-TestSuite - ref: c9b3337e1f754c83d178568be1339aaef5f08045 - path: SHARK-TestSuite - submodules: false - - name: Installing external TestSuite Python requirements - run: | - source ${VENV_DIR}/bin/activate - python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt - - name: Run external tests - ONNX test suite - run: | - source ${VENV_DIR}/bin/activate - pytest SHARK-TestSuite/iree_tests/onnx/ \ - -n 4 -rpfE --timeout=30 --durations=20 \ - --config-files=build_tools/pkgci/external_test_suite/onnx_gpu_vulkan.json \ - --no-skip-tests-missing-files \ - --report-log=/tmp/iree_tests_onnx_gpu_vulkan_logs.json - - name: "Updating config file with latest XFAIL lists" - if: failure() - run: | - source ${VENV_DIR}/bin/activate - python SHARK-TestSuite/iree_tests/update_config_xfails.py \ - --log-file=/tmp/iree_tests_onnx_gpu_vulkan_logs.json \ - --config-file=build_tools/pkgci/external_test_suite/onnx_gpu_vulkan.json - cat build_tools/pkgci/external_test_suite/onnx_gpu_vulkan.json - - name: "Uploading new config file" - if: failure() - uses: actions/upload-artifact@v4 - with: - name: "onnx_gpu_vulkan.json" - path: "build_tools/pkgci/external_test_suite/onnx_gpu_vulkan.json" diff --git a/build_tools/pkgci/external_test_suite/gpu_rocm_models_gfx90a.json b/build_tools/pkgci/external_test_suite/pytorch_models_gpu_rocm_gfx90a.json similarity index 100% rename from build_tools/pkgci/external_test_suite/gpu_rocm_models_gfx90a.json rename to build_tools/pkgci/external_test_suite/pytorch_models_gpu_rocm_gfx90a.json diff --git a/build_tools/pkgci/external_test_suite/gpu_rocm_models_additional_flags_gfx90a.json b/build_tools/pkgci/external_test_suite/pytorch_models_gpu_rocm_gfx90a_additional_flags.json similarity index 100% rename from build_tools/pkgci/external_test_suite/gpu_rocm_models_additional_flags_gfx90a.json rename to build_tools/pkgci/external_test_suite/pytorch_models_gpu_rocm_gfx90a_additional_flags.json