From 6c45befbf9efa47e52471e612fedded32f05f0ee Mon Sep 17 00:00:00 2001 From: Nithin Meganathan <18070964+nithinsubbiah@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:37:30 -0700 Subject: [PATCH] [runtime][HIP] Retire ROCm HAL backend (#17029) This patch retires the ROCm HAL backend given the new HIP HAL backend is functionally complete (https://github.com/openxla/iree/issues/15790). The new backend has been tested and proved to give on par or better performance compared to the ROCm backend. The documentation still preserves the `ROCm` term to denote the AMD compute software while using `HIP` specifically for the runtime. --- CMakeLists.txt | 16 - build_tools/cmake/test_riscv.sh | 1 - build_tools/pkgci/build_linux_packages.sh | 2 +- .../ROCM/builtins/ukernel/test/CMakeLists.txt | 2 +- .../deployment-configurations/gpu-rocm.md | 8 +- .../tests/pregenerated/test_llama2.py | 4 +- .../tests/pregenerated/test_ukernel.py | 16 +- experimental/rocm/CMakeLists.txt | 116 ---- experimental/rocm/api.h | 47 -- experimental/rocm/context_wrapper.h | 24 - experimental/rocm/cts/CMakeLists.txt | 35 -- experimental/rocm/direct_command_buffer.c | 496 ------------------ experimental/rocm/direct_command_buffer.h | 54 -- experimental/rocm/dynamic_symbol_tables.h | 86 --- experimental/rocm/dynamic_symbols.c | 74 --- experimental/rocm/dynamic_symbols.h | 53 -- experimental/rocm/dynamic_symbols_test.cc | 47 -- experimental/rocm/event_semaphore.c | 108 ---- experimental/rocm/event_semaphore.h | 30 -- experimental/rocm/native_executable.c | 317 ----------- experimental/rocm/native_executable.h | 59 --- experimental/rocm/nop_executable_cache.c | 89 ---- experimental/rocm/nop_executable_cache.h | 29 - experimental/rocm/pipeline_layout.c | 201 ------- experimental/rocm/pipeline_layout.h | 62 --- experimental/rocm/registration/CMakeLists.txt | 21 - .../rocm/registration/driver_module.c | 55 -- .../rocm/registration/driver_module.h | 24 - experimental/rocm/rocm_allocator.c | 375 ------------- experimental/rocm/rocm_allocator.h | 28 - experimental/rocm/rocm_buffer.c | 137 ----- experimental/rocm/rocm_buffer.h | 38 -- experimental/rocm/rocm_device.c | 466 ---------------- experimental/rocm/rocm_device.h | 31 -- experimental/rocm/rocm_driver.c | 429 --------------- experimental/rocm/rocm_event.c | 60 --- experimental/rocm/rocm_event.h | 31 -- experimental/rocm/rocm_headers.h | 31 -- experimental/rocm/status_util.c | 32 -- experimental/rocm/status_util.h | 54 -- experimental/rocm/tracing.c | 293 ----------- experimental/rocm/tracing.h | 119 ----- .../hip/kernels/CMakeLists.txt | 2 +- .../custom_dispatch/hip/kernels/example.mlir | 4 +- 44 files changed, 19 insertions(+), 4187 deletions(-) delete mode 100644 experimental/rocm/CMakeLists.txt delete mode 100644 experimental/rocm/api.h delete mode 100644 experimental/rocm/context_wrapper.h delete mode 100644 experimental/rocm/cts/CMakeLists.txt delete mode 100644 experimental/rocm/direct_command_buffer.c delete mode 100644 experimental/rocm/direct_command_buffer.h delete mode 100644 experimental/rocm/dynamic_symbol_tables.h delete mode 100644 experimental/rocm/dynamic_symbols.c delete mode 100644 experimental/rocm/dynamic_symbols.h delete mode 100644 experimental/rocm/dynamic_symbols_test.cc delete mode 100644 experimental/rocm/event_semaphore.c delete mode 100644 experimental/rocm/event_semaphore.h delete mode 100644 experimental/rocm/native_executable.c delete mode 100644 experimental/rocm/native_executable.h delete mode 100644 experimental/rocm/nop_executable_cache.c delete mode 100644 experimental/rocm/nop_executable_cache.h delete mode 100644 experimental/rocm/pipeline_layout.c delete mode 100644 experimental/rocm/pipeline_layout.h delete mode 100644 experimental/rocm/registration/CMakeLists.txt delete mode 100644 experimental/rocm/registration/driver_module.c delete mode 100644 experimental/rocm/registration/driver_module.h delete mode 100644 experimental/rocm/rocm_allocator.c delete mode 100644 experimental/rocm/rocm_allocator.h delete mode 100644 experimental/rocm/rocm_buffer.c delete mode 100644 experimental/rocm/rocm_buffer.h delete mode 100644 experimental/rocm/rocm_device.c delete mode 100644 experimental/rocm/rocm_device.h delete mode 100644 experimental/rocm/rocm_driver.c delete mode 100644 experimental/rocm/rocm_event.c delete mode 100644 experimental/rocm/rocm_event.h delete mode 100644 experimental/rocm/rocm_headers.h delete mode 100644 experimental/rocm/status_util.c delete mode 100644 experimental/rocm/status_util.h delete mode 100644 experimental/rocm/tracing.c delete mode 100644 experimental/rocm/tracing.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e391da801403..6c36350e402b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -580,22 +580,6 @@ include(iree_plugin_register) set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}") set(IREE_PACKAGE_ROOT_PREFIX "iree") -#------------------------------------------------------------------------------- -# Experimental ROCM HAL driver -# Enable with: -DIREE_EXTERNAL_HAL_DRIVERS=rocm -#------------------------------------------------------------------------------- - -iree_register_external_hal_driver( - NAME - rocm - SOURCE_DIR - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/rocm" - DRIVER_TARGET - iree::experimental::rocm::registration - REGISTER_FN - iree_hal_rocm_driver_module_register -) - #------------------------------------------------------------------------------- # Experimental WebGPU HAL driver # Enable with: -DIREE_EXTERNAL_HAL_DRIVERS=webgpu diff --git a/build_tools/cmake/test_riscv.sh b/build_tools/cmake/test_riscv.sh index e14855b7a9e4..da20b8719c15 100755 --- a/build_tools/cmake/test_riscv.sh +++ b/build_tools/cmake/test_riscv.sh @@ -36,7 +36,6 @@ declare -a label_exclude_args=( "^driver=vulkan$" "^driver=metal$" "^driver=cuda$" - "^driver=rocm$" "^driver=hip$" "^vulkan_uses_vk_khr_shader_float16_int8$" "^requires-filesystem$" diff --git a/build_tools/pkgci/build_linux_packages.sh b/build_tools/pkgci/build_linux_packages.sh index f97af7e557fd..ca7afcbc9e51 100755 --- a/build_tools/pkgci/build_linux_packages.sh +++ b/build_tools/pkgci/build_linux_packages.sh @@ -188,7 +188,7 @@ function build_wheel() { function build_iree_runtime() { # We install the needed build deps below for the tools. IREE_RUNTIME_BUILD_TRACY=ON IREE_RUNTIME_BUILD_TRACY_TOOLS=ON \ - IREE_EXTERNAL_HAL_DRIVERS="rocm" \ + IREE_HAL_DRIVER_HIP=ON \ build_wheel runtime/ } diff --git a/compiler/plugins/target/ROCM/builtins/ukernel/test/CMakeLists.txt b/compiler/plugins/target/ROCM/builtins/ukernel/test/CMakeLists.txt index e18c17e4bad7..64347801ef31 100644 --- a/compiler/plugins/target/ROCM/builtins/ukernel/test/CMakeLists.txt +++ b/compiler/plugins/target/ROCM/builtins/ukernel/test/CMakeLists.txt @@ -16,6 +16,6 @@ iree_lit_test_suite( FileCheck iree-compile LABELS - "driver=rocm" + "driver=hip" "hostonly" ) diff --git a/docs/website/docs/guides/deployment-configurations/gpu-rocm.md b/docs/website/docs/guides/deployment-configurations/gpu-rocm.md index 11c92897256c..9f7d34ca1491 100644 --- a/docs/website/docs/guides/deployment-configurations/gpu-rocm.md +++ b/docs/website/docs/guides/deployment-configurations/gpu-rocm.md @@ -45,14 +45,14 @@ the IREE compiler, then enable the ROCm compiler target with the ### Get the IREE runtime -Next you will need to get an IREE runtime that includes the ROCm HAL driver. +Next you will need to get an IREE runtime that includes the HIP HAL driver. #### :material-hammer-wrench: Build the runtime from source Please make sure you have followed the [Getting started](../../building-from-source/getting-started.md) page to build -IREE from source, then enable the experimental ROCm HAL driver with the -`IREE_EXTERNAL_HAL_DRIVERS=rocm` option. +IREE from source, then enable the HIP HAL driver with the `IREE_HAL_DRIVER_HIP` +option. ## Compile and run a program model @@ -108,7 +108,7 @@ Run the following command: ``` shell hl_lines="2" iree-run-module \ - --device=rocm \ + --device=hip \ --module=mobilenet_rocm.vmfb \ --function=predict \ --input="1x224x224x3xf32=0" diff --git a/experimental/regression_suite/tests/pregenerated/test_llama2.py b/experimental/regression_suite/tests/pregenerated/test_llama2.py index 0db2abf9e6a4..74d5a31bb48c 100644 --- a/experimental/regression_suite/tests/pregenerated/test_llama2.py +++ b/experimental/regression_suite/tests/pregenerated/test_llama2.py @@ -223,7 +223,7 @@ def test_step_a100_vulkan_stripped(llama2_7b_f16qi4_a100_vulkan_vmfb): def test_step_rdna3_rocm_stripped(llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb): iree_benchmark_module( llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb, - device="rocm", + device="hip", function="first_vicuna_forward", args=[ "--input=1x1xi64", @@ -231,7 +231,7 @@ def test_step_rdna3_rocm_stripped(llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb): ) iree_benchmark_module( llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb, - device="rocm", + device="hip", function="second_vicuna_forward", args=[ "--input=1x1xi64", diff --git a/experimental/regression_suite/tests/pregenerated/test_ukernel.py b/experimental/regression_suite/tests/pregenerated/test_ukernel.py index f9c92c66de22..11806acd250c 100644 --- a/experimental/regression_suite/tests/pregenerated/test_ukernel.py +++ b/experimental/regression_suite/tests/pregenerated/test_ukernel.py @@ -156,7 +156,7 @@ def test_correctness_gfx90a_rocm( ): iree_run_module( argmax_ukernel_gfx90a_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f16i32", args=[ f"--input=@{argmax_input_f16.path}", @@ -165,7 +165,7 @@ def test_correctness_gfx90a_rocm( ) iree_run_module( argmax_ukernel_gfx90a_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f16i64", args=[ f"--input=@{argmax_input_f16.path}", @@ -175,7 +175,7 @@ def test_correctness_gfx90a_rocm( iree_run_module( argmax_ukernel_gfx90a_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f32i32", args=[ f"--input=@{argmax_input_f32.path}", @@ -184,7 +184,7 @@ def test_correctness_gfx90a_rocm( ) iree_run_module( argmax_ukernel_gfx90a_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f32i64", args=[ f"--input=@{argmax_input_f32.path}", @@ -205,7 +205,7 @@ def test_correctness_gfx940_rocm( ): iree_run_module( argmax_ukernel_gfx940_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f16i32", args=[ f"--input=@{argmax_input_f16.path}", @@ -214,7 +214,7 @@ def test_correctness_gfx940_rocm( ) iree_run_module( argmax_ukernel_gfx940_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f16i64", args=[ f"--input=@{argmax_input_f16.path}", @@ -224,7 +224,7 @@ def test_correctness_gfx940_rocm( iree_run_module( argmax_ukernel_gfx940_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f32i32", args=[ f"--input=@{argmax_input_f32.path}", @@ -233,7 +233,7 @@ def test_correctness_gfx940_rocm( ) iree_run_module( argmax_ukernel_gfx940_rocm_vmfb, - device="rocm", + device="hip", function="argmax_3d_dyn_f32i64", args=[ f"--input=@{argmax_input_f32.path}", diff --git a/experimental/rocm/CMakeLists.txt b/experimental/rocm/CMakeLists.txt deleted file mode 100644 index d2b6908ddc5e..000000000000 --- a/experimental/rocm/CMakeLists.txt +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -set(IREE_PACKAGE_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR}/../..) -# Canonicalize path. -cmake_path(ABSOLUTE_PATH IREE_PACKAGE_ROOT_DIR - BASE_DIRECTORY ${IREE_PACKAGE_ROOT_DIR} - NORMALIZE - OUTPUT_VARIABLE IREE_PACKAGE_ROOT_DIR) -set(IREE_PACKAGE_ROOT_PREFIX iree) - -set(IREE_ROCM_TARGET_CHIP_DEFAULT "gfx908") -set(IREE_ROCM_TARGET_CHIP "${IREE_ROCM_TARGET_CHIP_DEFAULT}" CACHE STRING - "Target chip for ROCm. This influences conformance tests that need to compile device code. Defaults to \"${IREE_ROCM_TARGET_CHIP_DEFAULT}\".") - -iree_add_all_subdirs() - -if(NOT DEFINED ROCM_HEADERS_API_ROOT) - set(ROCM_HEADERS_API_ROOT "${IREE_SOURCE_DIR}/third_party/hip-build-deps/include") -endif() - -if(NOT EXISTS "${ROCM_HEADERS_API_ROOT}/hip/hip_version.h") - message(SEND_ERROR "Could not find HIP headers at: ${ROCM_HEADERS_API_ROOT}") -endif() - -iree_cc_library( - NAME - rocm - HDRS - "api.h" - SRCS - "api.h" - "context_wrapper.h" - "rocm_allocator.c" - "rocm_allocator.h" - "rocm_buffer.c" - "rocm_buffer.h" - "rocm_device.c" - "rocm_device.h" - "rocm_driver.c" - "rocm_event.c" - "rocm_event.h" - "event_semaphore.c" - "event_semaphore.h" - "direct_command_buffer.c" - "direct_command_buffer.h" - "native_executable.c" - "native_executable.h" - "nop_executable_cache.c" - "nop_executable_cache.h" - "pipeline_layout.c" - "pipeline_layout.h" - "status_util.c" - "status_util.h" - "tracing.c" - "tracing.h" - INCLUDES - "${CMAKE_CURRENT_LIST_DIR}/../.." - "${PROJECT_BINARY_DIR}" - "${ROCM_HEADERS_API_ROOT}" - DEPS - ::dynamic_symbols - iree::base - iree::base::internal - iree::base::internal::arena - iree::base::internal::flatcc::parsing - iree::base::internal::synchronization - iree::hal - iree::hal::utils::file_transfer - iree::hal::utils::memory_file - iree::hal::utils::semaphore_base - iree::schemas::rocm_executable_def_c_fbs - COPTS - "-D__HIP_PLATFORM_HCC__=1" - PUBLIC -) - -iree_cc_library( - NAME - dynamic_symbols - HDRS - "dynamic_symbols.h" - TEXTUAL_HDRS - "dynamic_symbol_tables.h" - SRCS - "rocm_headers.h" - "dynamic_symbols.c" - INCLUDES - "${ROCM_HEADERS_API_ROOT}" - "${CMAKE_CURRENT_LIST_DIR}/../.." - COPTS - "-D__HIP_PLATFORM_HCC__=1" - DEPS - iree::base - iree::base::internal::dynamic_library - PUBLIC -) - -iree_cc_test( - NAME - dynamic_symbols_test - SRCS - "dynamic_symbols_test.cc" - DEPS - ::dynamic_symbols - iree::base - iree::testing::gtest - iree::testing::gtest_main - LABELS - "driver=rocm" - COPTS - "-D__HIP_PLATFORM_HCC__=1" -) diff --git a/experimental/rocm/api.h b/experimental/rocm/api.h deleted file mode 100644 index 68fa1913bf2f..000000000000 --- a/experimental/rocm/api.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -// See iree/base/api.h for documentation on the API conventions used. - -#ifndef IREE_HAL_ROCM_API_H_ -#define IREE_HAL_ROCM_API_H_ - -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -//===----------------------------------------------------------------------===// -// iree_hal_rocm_driver_t -//===----------------------------------------------------------------------===// - -// ROCM driver creation options. -typedef struct iree_hal_rocm_driver_options_t { - // Index of the default ROCM device to use within the list of available - // devices. - int default_device_index; -} iree_hal_rocm_driver_options_t; - -IREE_API_EXPORT void iree_hal_rocm_driver_options_initialize( - iree_hal_rocm_driver_options_t *out_options); - -// Creates a ROCM HAL driver that manage its own hipcontext. -// -// |out_driver| must be released by the caller (see |iree_hal_driver_release|). -IREE_API_EXPORT iree_status_t iree_hal_rocm_driver_create( - iree_string_view_t identifier, - const iree_hal_rocm_driver_options_t *options, - iree_allocator_t host_allocator, iree_hal_driver_t **out_driver); - -// TODO(thomasraoux): Support importing a CUcontext from app. - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_API_H_ diff --git a/experimental/rocm/context_wrapper.h b/experimental/rocm/context_wrapper.h deleted file mode 100644 index 66451cbab8b2..000000000000 --- a/experimental/rocm/context_wrapper.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_CONTEXT_WRAPPER_H_ -#define IREE_HAL_ROCM_CONTEXT_WRAPPER_H_ - -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/rocm_headers.h" -#include "iree/hal/api.h" - -// Structure to wrap all objects constant within a context. This makes it -// simpler to pass it to the different objects and saves memory. -typedef struct iree_hal_rocm_context_wrapper_t { - hipCtx_t rocm_context; - hipDevice_t rocm_device; - hipStream_t rocm_stream; - iree_allocator_t host_allocator; - iree_hal_rocm_dynamic_symbols_t *syms; -} iree_hal_rocm_context_wrapper_t; - -#endif // IREE_HAL_ROCM_CONTEXT_WRAPPER_H_ diff --git a/experimental/rocm/cts/CMakeLists.txt b/experimental/rocm/cts/CMakeLists.txt deleted file mode 100644 index 398062b537d4..000000000000 --- a/experimental/rocm/cts/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -unset(COMPILER_FLAGS) -list(APPEND COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_ROCM_TARGET_CHIP}") - -iree_hal_cts_test_suite( - DRIVER_NAME - rocm - DRIVER_REGISTRATION_HDR - "experimental/rocm/registration/driver_module.h" - DRIVER_REGISTRATION_FN - "iree_hal_rocm_driver_module_register" - COMPILER_TARGET_BACKEND - "rocm" - EXECUTABLE_FORMAT - "\"PTXE\"" - COMPILER_FLAGS - ${COMPILER_FLAGS} - DEPS - iree::experimental::rocm::registration - EXCLUDED_TESTS - # This test depends on iree_hal_rocm_direct_command_buffer_update_buffer - # via iree_hal_buffer_view_allocate_buffer_copy, which is not implemented yet. - "command_buffer_dispatch" - # Semaphores are not implemented in the ROCm backend yet. - "semaphore_submission" - "semaphore" - LABELS - driver=rocm -) diff --git a/experimental/rocm/direct_command_buffer.c b/experimental/rocm/direct_command_buffer.c deleted file mode 100644 index 42b476baa815..000000000000 --- a/experimental/rocm/direct_command_buffer.c +++ /dev/null @@ -1,496 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/direct_command_buffer.h" - -#include -#include -#include - -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/native_executable.h" -#include "experimental/rocm/pipeline_layout.h" -#include "experimental/rocm/rocm_buffer.h" -#include "experimental/rocm/status_util.h" -#include "iree/base/api.h" - -// Command buffer implementation that directly maps to rocm direct. -// This records the commands on the calling thread without additional threading -// indirection. - -typedef struct { - iree_hal_command_buffer_t base; - iree_hal_rocm_context_wrapper_t* context; - iree_arena_block_pool_t* block_pool; - iree_hal_rocm_tracing_context_t* tracing_context; - - // Staging arena used for host->device transfers. - // Used for when we need HIP to be able to reference memory as it performs - // asynchronous operations. - iree_arena_allocator_t arena; - - // Keep track of the current set of kernel arguments. - int32_t push_constant[IREE_HAL_ROCM_MAX_PUSH_CONSTANT_COUNT]; - void* current_descriptor[]; -} iree_hal_rocm_direct_command_buffer_t; - -#define IREE_HAL_ROCM_MAX_BINDING_COUNT 64 -// Kernel arguments contains binding and push constants. -#define IREE_HAL_ROCM_MAX_KERNEL_ARG 128 - -static const iree_hal_command_buffer_vtable_t - iree_hal_rocm_direct_command_buffer_vtable; - -static iree_hal_rocm_direct_command_buffer_t* -iree_hal_rocm_direct_command_buffer_cast( - iree_hal_command_buffer_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_direct_command_buffer_vtable); - return (iree_hal_rocm_direct_command_buffer_t*)base_value; -} - -iree_status_t iree_hal_rocm_direct_command_buffer_create( - iree_hal_allocator_t* device_allocator, - iree_hal_rocm_context_wrapper_t* context, - iree_hal_rocm_tracing_context_t* tracing_context, - iree_hal_command_buffer_mode_t mode, - iree_hal_command_category_t command_categories, - iree_hal_queue_affinity_t queue_affinity, iree_host_size_t binding_capacity, - iree_arena_block_pool_t* block_pool, - iree_hal_command_buffer_t** out_command_buffer) { - IREE_ASSERT_ARGUMENT(context); - IREE_ASSERT_ARGUMENT(block_pool); - IREE_ASSERT_ARGUMENT(out_command_buffer); - *out_command_buffer = NULL; - - if (binding_capacity > 0) { - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "indirect command buffers not yet implemented"); - } - - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_direct_command_buffer_t* command_buffer = NULL; - size_t total_size = sizeof(*command_buffer) + - IREE_HAL_ROCM_MAX_KERNEL_ARG * sizeof(void*) + - IREE_HAL_ROCM_MAX_KERNEL_ARG * sizeof(hipDeviceptr_t); - iree_status_t status = iree_allocator_malloc( - context->host_allocator, - total_size + - iree_hal_command_buffer_validation_state_size(mode, binding_capacity), - (void**)&command_buffer); - if (iree_status_is_ok(status)) { - iree_hal_command_buffer_initialize( - device_allocator, mode, command_categories, queue_affinity, - binding_capacity, (uint8_t*)command_buffer + total_size, - &iree_hal_rocm_direct_command_buffer_vtable, &command_buffer->base); - command_buffer->context = context; - command_buffer->tracing_context = tracing_context; - command_buffer->block_pool = block_pool; - iree_arena_initialize(block_pool, &command_buffer->arena); - hipDeviceptr_t* device_ptrs = - (hipDeviceptr_t*)(command_buffer->current_descriptor + - IREE_HAL_ROCM_MAX_KERNEL_ARG); - for (size_t i = 0; i < IREE_HAL_ROCM_MAX_KERNEL_ARG; i++) { - command_buffer->current_descriptor[i] = &device_ptrs[i]; - } - - *out_command_buffer = &command_buffer->base; - } - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void iree_hal_rocm_direct_command_buffer_destroy( - iree_hal_command_buffer_t* base_command_buffer) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - IREE_TRACE_ZONE_BEGIN(z0); - - iree_arena_deinitialize(&command_buffer->arena); - iree_allocator_free(command_buffer->context->host_allocator, command_buffer); - - IREE_TRACE_ZONE_END(z0); -} - -bool iree_hal_rocm_direct_command_buffer_isa( - iree_hal_command_buffer_t* command_buffer) { - return iree_hal_resource_is(&command_buffer->resource, - &iree_hal_rocm_direct_command_buffer_vtable); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_begin( - iree_hal_command_buffer_t* base_command_buffer) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - (void)command_buffer; - - IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( - command_buffer->tracing_context, 0, - /*file_name=*/NULL, 0, /*line=*/0, "iree_hal_rocm_direct_command_buffer", - strlen("iree_hal_rocm_direct_command_buffer"), - /*name=*/NULL, 0); - - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_end( - iree_hal_command_buffer_t* base_command_buffer) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - (void)command_buffer; - - IREE_ROCM_TRACE_ZONE_END(command_buffer->tracing_context, 0); - - return iree_ok_status(); -} - -static void iree_hal_rocm_direct_command_buffer_begin_debug_group( - iree_hal_command_buffer_t* base_command_buffer, iree_string_view_t label, - iree_hal_label_color_t label_color, - const iree_hal_label_location_t* location) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - (void)command_buffer; - - IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( - command_buffer->tracing_context, 0, location ? location->file.data : NULL, - location ? location->file.size : 0, location ? location->line : 0, - /*func_name=*/NULL, 0, label.data, label.size); -} - -static void iree_hal_rocm_direct_command_buffer_end_debug_group( - iree_hal_command_buffer_t* base_command_buffer) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - (void)command_buffer; - - IREE_ROCM_TRACE_ZONE_END(command_buffer->tracing_context, 0); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_execution_barrier( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_execution_stage_t source_stage_mask, - iree_hal_execution_stage_t target_stage_mask, - iree_hal_execution_barrier_flags_t flags, - iree_host_size_t memory_barrier_count, - const iree_hal_memory_barrier_t* memory_barriers, - iree_host_size_t buffer_barrier_count, - const iree_hal_buffer_barrier_t* buffer_barriers) { - // TODO: Implement barrier - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_signal_event( - iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event, - iree_hal_execution_stage_t source_stage_mask) { - // TODO: Implement barrier - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_reset_event( - iree_hal_command_buffer_t* base_command_buffer, iree_hal_event_t* event, - iree_hal_execution_stage_t source_stage_mask) { - // TODO: Implement barrier - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_wait_events( - iree_hal_command_buffer_t* base_command_buffer, - iree_host_size_t event_count, const iree_hal_event_t** events, - iree_hal_execution_stage_t source_stage_mask, - iree_hal_execution_stage_t target_stage_mask, - iree_host_size_t memory_barrier_count, - const iree_hal_memory_barrier_t* memory_barriers, - iree_host_size_t buffer_barrier_count, - const iree_hal_buffer_barrier_t* buffer_barriers) { - // TODO: Implement barrier - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_discard_buffer( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_buffer_ref_t buffer_ref) { - // nothing to do. - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_fill_buffer( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_buffer_ref_t target_ref, const void* pattern, - iree_host_size_t pattern_length) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - - IREE_ROCM_TRACE_ZONE_BEGIN(command_buffer->tracing_context, 0); - - hipDeviceptr_t target_device_buffer = iree_hal_rocm_buffer_device_pointer( - iree_hal_buffer_allocated_buffer(target_ref.buffer)); - hipDeviceptr_t dst = - (hipDeviceptr_t)((uintptr_t)target_device_buffer + - iree_hal_buffer_byte_offset(target_ref.buffer) + - target_ref.offset); - size_t num_elements = target_ref.length / pattern_length; - // TODO(raikonenfnu): Currently using NULL stream, need to figure out way to - // access proper stream from command buffer - iree_status_t status = iree_ok_status(); - switch (pattern_length) { - case 4: { - status = ROCM_RESULT_TO_STATUS( - command_buffer->context->syms, - hipMemsetD32Async(dst, *(const uint32_t*)(pattern), num_elements, 0), - "hipMemsetD32Async"); - break; - } - case 2: { - status = ROCM_RESULT_TO_STATUS( - command_buffer->context->syms, - hipMemsetD16Async(dst, *(const uint16_t*)(pattern), num_elements, 0), - "hipMemsetD16Async"); - break; - } - case 1: { - status = ROCM_RESULT_TO_STATUS( - command_buffer->context->syms, - hipMemsetD8Async(dst, *(const uint8_t*)(pattern), num_elements, 0), - "hipMemsetD8Async"); - break; - } - default: { - status = iree_make_status(IREE_STATUS_INTERNAL, - "unsupported fill pattern length"); - break; - } - } - - IREE_ROCM_TRACE_ZONE_END(command_buffer->tracing_context, 0); - return status; -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_update_buffer( - iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer, - iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - - // Allocate scratch space in the arena for the data and copy it in. - // The update buffer API requires that the command buffer capture the host - // memory at the time the method is called in case the caller wants to reuse - // the memory. Because HIP memcpys are async if we didn't copy it's possible - // for the reused memory to change before the stream reaches the copy - // operation and get the wrong data. - const uint8_t* src = (const uint8_t*)source_buffer + source_offset; - uint8_t* storage = NULL; - IREE_RETURN_IF_ERROR(iree_arena_allocate( - &command_buffer->arena, target_ref.length, (void**)&storage)); - memcpy(storage, src, target_ref.length); - src = storage; - - // Issue the copy using the scratch memory as the source. - hipDeviceptr_t target_device_buffer = iree_hal_rocm_buffer_device_pointer( - iree_hal_buffer_allocated_buffer(target_ref.buffer)); - hipDeviceptr_t dst = (uint8_t*)target_device_buffer + - iree_hal_buffer_byte_offset(target_ref.buffer) + - target_ref.offset; - ROCM_RETURN_IF_ERROR(command_buffer->context->syms, - hipMemcpyHtoDAsync(dst, (void*)src, target_ref.length, - command_buffer->context->rocm_stream), - "hipMemcpyHtoDAsync"); - - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_copy_buffer( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - - IREE_ROCM_TRACE_ZONE_BEGIN(command_buffer->tracing_context, 0); - - hipDeviceptr_t target_device_buffer = iree_hal_rocm_buffer_device_pointer( - iree_hal_buffer_allocated_buffer(target_ref.buffer)); - iree_device_size_t target_offset = - iree_hal_buffer_byte_offset(target_ref.buffer) + target_ref.offset; - hipDeviceptr_t source_device_buffer = iree_hal_rocm_buffer_device_pointer( - iree_hal_buffer_allocated_buffer(source_ref.buffer)); - iree_device_size_t source_offset = - iree_hal_buffer_byte_offset(source_ref.buffer) + source_ref.offset; - hipDeviceptr_t dst = - (hipDeviceptr_t)((uintptr_t)target_device_buffer + target_offset); - hipDeviceptr_t src = - (hipDeviceptr_t)((uintptr_t)source_device_buffer + source_offset); - // TODO(raikonenfnu): Currently using NULL stream, need to figure out way to - // access proper stream from command buffer - iree_status_t status = ROCM_RESULT_TO_STATUS( - command_buffer->context->syms, - hipMemcpyAsync(dst, src, target_ref.length, hipMemcpyDeviceToDevice, 0), - "hipMemcpyAsync"); - - IREE_ROCM_TRACE_ZONE_END(command_buffer->tracing_context, 0); - return status; -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_collective( - iree_hal_command_buffer_t* base_command_buffer, iree_hal_channel_t* channel, - iree_hal_collective_op_t op, uint32_t param, iree_hal_buffer_ref_t send_ref, - iree_hal_buffer_ref_t recv_ref, iree_device_size_t element_count) { - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "need rocm implementation"); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_push_constants( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_pipeline_layout_t* pipeline_layout, iree_host_size_t offset, - const void* values, iree_host_size_t values_length) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - iree_host_size_t constant_base_index = offset / sizeof(int32_t); - for (iree_host_size_t i = 0; i < values_length / sizeof(int32_t); i++) { - command_buffer->push_constant[i + constant_base_index] = - ((uint32_t*)values)[i]; - } - return iree_ok_status(); -} - -// Tie together the binding index and its index in |bindings| array. -typedef struct { - uint32_t index; - uint32_t binding; -} iree_hal_rocm_binding_mapping_t; - -// Helper to sort the binding based on their binding index. -static int compare_binding_index(const void* a, const void* b) { - const iree_hal_rocm_binding_mapping_t buffer_a = - *(const iree_hal_rocm_binding_mapping_t*)a; - const iree_hal_rocm_binding_mapping_t buffer_b = - *(const iree_hal_rocm_binding_mapping_t*)b; - return buffer_a.binding < buffer_b.binding ? -1 : 1; -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_push_descriptor_set( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_pipeline_layout_t* pipeline_layout, uint32_t set, - iree_host_size_t binding_count, const iree_hal_buffer_ref_t* bindings) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - iree_host_size_t base_binding = - iree_hal_rocm_base_binding_index(pipeline_layout, set); - // Convention with the compiler side. We map bindings to kernel argument. - // We compact the bindings to get a dense set of arguments and keep them order - // based on the binding index. - // Sort the binding based on the binding index and map the array index to the - // argument index. - iree_hal_rocm_binding_mapping_t binding_used[IREE_HAL_ROCM_MAX_BINDING_COUNT]; - for (iree_host_size_t i = 0; i < binding_count; i++) { - iree_hal_rocm_binding_mapping_t buffer = {i, bindings[i].ordinal}; - binding_used[i] = buffer; - } - qsort(binding_used, binding_count, sizeof(iree_hal_rocm_binding_mapping_t), - compare_binding_index); - assert(binding_count < IREE_HAL_ROCM_MAX_BINDING_COUNT && - "binding count larger than the max expected."); - for (iree_host_size_t i = 0; i < binding_count; i++) { - iree_hal_buffer_ref_t binding = bindings[binding_used[i].index]; - hipDeviceptr_t device_ptr = - binding.buffer - ? (hipDeviceptr_t)((uintptr_t)iree_hal_rocm_buffer_device_pointer( - iree_hal_buffer_allocated_buffer( - binding.buffer)) + - iree_hal_buffer_byte_offset(binding.buffer) + - binding.offset) - : 0; - *((hipDeviceptr_t*)command_buffer->current_descriptor[i + base_binding]) = - device_ptr; - } - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_dispatch( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_executable_t* executable, int32_t entry_point, - uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z, - iree_hal_dispatch_flags_t flags) { - iree_hal_rocm_direct_command_buffer_t* command_buffer = - iree_hal_rocm_direct_command_buffer_cast(base_command_buffer); - // Lookup kernel parameters used for side-channeling additional launch - // information from the compiler. - iree_hal_rocm_kernel_params_t kernel_params; - IREE_RETURN_IF_ERROR( - iree_hal_rocm_native_executable_entry_point_kernel_params( - executable, entry_point, &kernel_params)); - - IREE_TRACE({ - iree_hal_rocm_source_location_t source_location; - iree_hal_rocm_native_executable_entry_point_source_location( - executable, entry_point, &source_location); - IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( - command_buffer->tracing_context, /*stream=*/0, - source_location.file_name.data, source_location.file_name.size, - source_location.line, source_location.func_name.data, - source_location.func_name.size, - /*name=*/NULL, 0); - }); - - // Patch the push constants in the kernel arguments. - iree_host_size_t num_constants = - iree_hal_rocm_pipeline_layout_num_constants(kernel_params.layout); - iree_host_size_t constant_base_index = - iree_hal_rocm_push_constant_index(kernel_params.layout); - // Patch the push constants in the kernel arguments. - for (iree_host_size_t i = 0; i < num_constants; i++) { - *((uint32_t*)command_buffer->current_descriptor[i + constant_base_index]) = - command_buffer->push_constant[i]; - } - - // TODO(raikonenfnu): Currently using NULL stream, need to figure out way to - // access proper stream from command buffer - ROCM_RETURN_IF_ERROR( - command_buffer->context->syms, - hipModuleLaunchKernel( - kernel_params.function, workgroup_x, workgroup_y, workgroup_z, - kernel_params.block_size[0], kernel_params.block_size[1], - kernel_params.block_size[2], kernel_params.shared_memory_size, 0, - command_buffer->current_descriptor, NULL), - "hipModuleLaunchKernel"); - - IREE_ROCM_TRACE_ZONE_END(command_buffer->tracing_context, 0); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_direct_command_buffer_dispatch_indirect( - iree_hal_command_buffer_t* base_command_buffer, - iree_hal_executable_t* executable, int32_t entry_point, - iree_hal_buffer_ref_t workgroups_ref, iree_hal_dispatch_flags_t flags) { - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "need rocm implementation"); -} - -static const iree_hal_command_buffer_vtable_t - iree_hal_rocm_direct_command_buffer_vtable = { - .destroy = iree_hal_rocm_direct_command_buffer_destroy, - .begin = iree_hal_rocm_direct_command_buffer_begin, - .end = iree_hal_rocm_direct_command_buffer_end, - .begin_debug_group = - iree_hal_rocm_direct_command_buffer_begin_debug_group, - .end_debug_group = iree_hal_rocm_direct_command_buffer_end_debug_group, - .execution_barrier = - iree_hal_rocm_direct_command_buffer_execution_barrier, - .signal_event = iree_hal_rocm_direct_command_buffer_signal_event, - .reset_event = iree_hal_rocm_direct_command_buffer_reset_event, - .wait_events = iree_hal_rocm_direct_command_buffer_wait_events, - .discard_buffer = iree_hal_rocm_direct_command_buffer_discard_buffer, - .fill_buffer = iree_hal_rocm_direct_command_buffer_fill_buffer, - .update_buffer = iree_hal_rocm_direct_command_buffer_update_buffer, - .copy_buffer = iree_hal_rocm_direct_command_buffer_copy_buffer, - .collective = iree_hal_rocm_direct_command_buffer_collective, - .push_constants = iree_hal_rocm_direct_command_buffer_push_constants, - .push_descriptor_set = - iree_hal_rocm_direct_command_buffer_push_descriptor_set, - .dispatch = iree_hal_rocm_direct_command_buffer_dispatch, - .dispatch_indirect = - iree_hal_rocm_direct_command_buffer_dispatch_indirect, -}; diff --git a/experimental/rocm/direct_command_buffer.h b/experimental/rocm/direct_command_buffer.h deleted file mode 100644 index 9d6f5ae4916b..000000000000 --- a/experimental/rocm/direct_command_buffer.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_DIRECT_COMMAND_BUFFER_H_ -#define IREE_HAL_ROCM_DIRECT_COMMAND_BUFFER_H_ - -#include "experimental/rocm/context_wrapper.h" -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/rocm_headers.h" -#include "experimental/rocm/tracing.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -typedef struct iree_arena_block_pool_t iree_arena_block_pool_t; - -// ROCM Kernel Information Structure -typedef struct { - hipFunction_t func; - unsigned int gridDimX; - unsigned int gridDimY; - unsigned int gridDimZ; - unsigned int blockDimX; - unsigned int blockDimY; - unsigned int blockDimZ; - void** kernelParams; -} hip_launch_params; - -// Creates a rocm direct command buffer. -iree_status_t iree_hal_rocm_direct_command_buffer_create( - iree_hal_allocator_t* device_allocator, - iree_hal_rocm_context_wrapper_t* context, - iree_hal_rocm_tracing_context_t* tracing_context, - iree_hal_command_buffer_mode_t mode, - iree_hal_command_category_t command_categories, - iree_hal_queue_affinity_t queue_affinity, iree_host_size_t binding_capacity, - iree_arena_block_pool_t* block_pool, - iree_hal_command_buffer_t** out_command_buffer); - -// Returns true if |command_buffer| is a ROCM command buffer. -bool iree_hal_rocm_direct_command_buffer_isa( - iree_hal_command_buffer_t* command_buffer); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_DIRECT_COMMAND_BUFFER_H_ diff --git a/experimental/rocm/dynamic_symbol_tables.h b/experimental/rocm/dynamic_symbol_tables.h deleted file mode 100644 index d42054d19b45..000000000000 --- a/experimental/rocm/dynamic_symbol_tables.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipCtxCreate, hipCtx_t *, unsigned int, - hipDevice_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipCtxDestroy, hipCtx_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipDeviceGet, hipDevice_t *, - int) // No direct, need to modify -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipGetDeviceCount, int *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipGetDeviceProperties, hipDeviceProp_tR0000 *, - int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipDeviceGetName, char *, int, - hipDevice_t) // No direct, need to modify -IREE_HAL_ROCM_REQUIRED_PFN_STR_DECL( - hipGetErrorName, - hipError_t) // Unlike other functions hipGetErrorName(hipError_t) return - // const char* instead of hipError_t so it uses a different - // macro -IREE_HAL_ROCM_REQUIRED_PFN_STR_DECL( - hipGetErrorString, - hipError_t) // Unlike other functions hipGetErrorName(hipError_t) return - // const char* instead of hipError_t so it uses a different - // macro -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipInit, unsigned int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipModuleLaunchKernel, hipFunction_t, - unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, - unsigned int, hipStream_t, void **, void **) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemset, void *, int, size_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemsetAsync, void *, int, size_t, - hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemsetD32Async, void *, int, size_t, - hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemsetD16Async, void *, short, size_t, - hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemsetD8Async, void *, char, size_t, - hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemcpy, void *, const void *, size_t, - hipMemcpyKind) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemcpyAsync, void *, const void *, size_t, - hipMemcpyKind, hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMalloc, void **, size_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMallocManaged, hipDeviceptr_t *, size_t, - unsigned int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipFree, void *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipHostFree, void *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemAllocHost, void **, size_t, unsigned int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipHostGetDevicePointer, void **, void *, - unsigned int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipModuleGetFunction, hipFunction_t *, - hipModule_t, const char *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipModuleLoadDataEx, hipModule_t *, - const void *, unsigned int, hipJitOption *, - void **) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipModuleLoadData, hipModule_t *, const void *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipModuleUnload, hipModule_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipStreamCreateWithFlags, hipStream_t *, - unsigned int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipStreamDestroy, hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipStreamSynchronize, hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipStreamWaitEvent, hipStream_t, hipEvent_t, - unsigned int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipEventCreate, hipEvent_t *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipEventDestroy, hipEvent_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipEventElapsedTime, float *, hipEvent_t, - hipEvent_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipEventQuery, hipEvent_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipEventRecord, hipEvent_t, hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipEventSynchronize, hipEvent_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipDeviceGetAttribute, int *, - hipDeviceAttribute_t, int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipFuncSetAttribute, const void *, - hipFuncAttribute, int) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipDeviceGetUuid, hipUUID *, hipDevice_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipDevicePrimaryCtxRetain, hipCtx_t *, - hipDevice_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipCtxGetDevice, hipDevice_t *) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipCtxSetCurrent, hipCtx_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipDevicePrimaryCtxRelease, hipDevice_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemPrefetchAsync, const void *, size_t, int, - hipStream_t) -IREE_HAL_ROCM_REQUIRED_PFN_DECL(hipMemcpyHtoDAsync, hipDeviceptr_t, void *, - size_t, hipStream_t) diff --git a/experimental/rocm/dynamic_symbols.c b/experimental/rocm/dynamic_symbols.c deleted file mode 100644 index 7b14f711b082..000000000000 --- a/experimental/rocm/dynamic_symbols.c +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/dynamic_symbols.h" - -#include - -#include "iree/base/api.h" -#include "iree/base/internal/dynamic_library.h" - -static const char* kROCMLoaderSearchNames[] = { -#if defined(IREE_PLATFORM_WINDOWS) - "amdhip64.dll", -#else - "libamdhip64.so", -#endif -}; - -static iree_status_t iree_hal_rocm_dynamic_symbols_resolve_all( - iree_hal_rocm_dynamic_symbols_t* syms) { -#define IREE_HAL_ROCM_REQUIRED_PFN_DECL(rocmSymbolName, ...) \ - { \ - static const char* kName = #rocmSymbolName; \ - IREE_RETURN_IF_ERROR(iree_dynamic_library_lookup_symbol( \ - syms->loader_library, kName, (void**)&syms->rocmSymbolName)); \ - } -#define IREE_HAL_ROCM_REQUIRED_PFN_STR_DECL(rocmSymbolName, ...) \ - IREE_HAL_ROCM_REQUIRED_PFN_DECL(rocmSymbolName, ...) -#define IREE_HAL_ROCM_OPTIONAL_PFN_DECL(rocmSymbolName, ...) \ - { \ - static const char* kName = #rocmSymbolName; \ - IREE_IGNORE_ERROR(iree_dynamic_library_lookup_symbol( \ - syms->loader_library, kName, (void**)&syms->rocmSymbolName)); \ - } -#include "experimental/rocm/dynamic_symbol_tables.h" // IWYU pragma: keep -#undef IREE_HAL_ROCM_REQUIRED_PFN_DECL -#undef IREE_HAL_ROCM_REQUIRED_PFN_STR_DECL -#undef IREE_HAL_ROCM_OPTIONAL_PFN_DECL - return iree_ok_status(); -} - -iree_status_t iree_hal_rocm_dynamic_symbols_initialize( - iree_allocator_t allocator, iree_hal_rocm_dynamic_symbols_t* out_syms) { - IREE_TRACE_ZONE_BEGIN(z0); - memset(out_syms, 0, sizeof(*out_syms)); - iree_status_t status = iree_dynamic_library_load_from_files( - IREE_ARRAYSIZE(kROCMLoaderSearchNames), kROCMLoaderSearchNames, - IREE_DYNAMIC_LIBRARY_FLAG_NONE, allocator, &out_syms->loader_library); - if (iree_status_is_not_found(status)) { - iree_status_ignore(status); - return iree_make_status( - IREE_STATUS_UNAVAILABLE, - "ROCM/HIP runtime library not available; ensure installed and on path"); - } - if (iree_status_is_ok(status)) { - status = iree_hal_rocm_dynamic_symbols_resolve_all(out_syms); - } - if (!iree_status_is_ok(status)) { - iree_hal_rocm_dynamic_symbols_deinitialize(out_syms); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -void iree_hal_rocm_dynamic_symbols_deinitialize( - iree_hal_rocm_dynamic_symbols_t* syms) { - IREE_TRACE_ZONE_BEGIN(z0); - iree_dynamic_library_release(syms->loader_library); - memset(syms, 0, sizeof(*syms)); - IREE_TRACE_ZONE_END(z0); -} diff --git a/experimental/rocm/dynamic_symbols.h b/experimental/rocm/dynamic_symbols.h deleted file mode 100644 index 85705324acce..000000000000 --- a/experimental/rocm/dynamic_symbols.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_DYNAMIC_SYMBOLS_H_ -#define IREE_HAL_ROCM_DYNAMIC_SYMBOLS_H_ - -#include "experimental/rocm/rocm_headers.h" -#include "iree/base/api.h" -#include "iree/base/internal/dynamic_library.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// DynamicSymbols allow loading dynamically a subset of ROCM driver API. It -// loads all the function declared in `dynamic_symbol_tables.def` and fail if -// any of the symbol is not available. The functions signatures are matching -// the declarations in `hipruntime.h`. -typedef struct iree_hal_rocm_dynamic_symbols_t { - iree_dynamic_library_t* loader_library; - -#define IREE_HAL_ROCM_REQUIRED_PFN_DECL(rocmSymbolName, ...) \ - hipError_t (*rocmSymbolName)(__VA_ARGS__); -#define IREE_HAL_ROCM_REQUIRED_PFN_STR_DECL(rocmSymbolName, ...) \ - const char* (*rocmSymbolName)(__VA_ARGS__); -#define IREE_HAL_ROCM_OPTIONAL_PFN_DECL(rocmSymbolName, ...) \ - hipError_t (*rocmSymbolName)(__VA_ARGS__); -#include "experimental/rocm/dynamic_symbol_tables.h" // IWYU pragma: export -#undef IREE_HAL_ROCM_REQUIRED_PFN_DECL -#undef IREE_HAL_ROCM_REQUIRED_PFN_STR_DECL -#undef IREE_HAL_ROCM_OPTIONAL_PFN_DECL -} iree_hal_rocm_dynamic_symbols_t; - -// Initializes |out_syms| in-place with dynamically loaded ROCM symbols. -// iree_hal_rocm_dynamic_symbols_deinitialize must be used to release the -// library resources. -iree_status_t iree_hal_rocm_dynamic_symbols_initialize( - iree_allocator_t allocator, iree_hal_rocm_dynamic_symbols_t* out_syms); - -// Deinitializes |syms| by unloading the backing library. All function pointers -// will be invalidated. They _may_ still work if there are other reasons the -// library remains loaded so be careful. -void iree_hal_rocm_dynamic_symbols_deinitialize( - iree_hal_rocm_dynamic_symbols_t* syms); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_DYNAMIC_SYMBOLS_H_ diff --git a/experimental/rocm/dynamic_symbols_test.cc b/experimental/rocm/dynamic_symbols_test.cc deleted file mode 100644 index 57eb936e3011..000000000000 --- a/experimental/rocm/dynamic_symbols_test.cc +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/dynamic_symbols.h" - -#include - -#include "iree/base/api.h" -#include "iree/testing/gtest.h" - -namespace iree { -namespace hal { -namespace rocm { -namespace { - -#define ROCM_CHECK_ERRORS(expr) \ - { \ - hipError_t status = expr; \ - ASSERT_EQ(hipSuccess, status); \ - } - -TEST(DynamicSymbolsTest, CreateFromSystemLoader) { - iree_hal_rocm_dynamic_symbols_t symbols; - iree_status_t status = iree_hal_rocm_dynamic_symbols_initialize( - iree_allocator_system(), &symbols); - if (!iree_status_is_ok(status)) { - GTEST_SKIP() << "Symbols cannot be loaded, skipping test."; - } - - int device_count = 0; - ROCM_CHECK_ERRORS(symbols.hipInit(0)); - ROCM_CHECK_ERRORS(symbols.hipGetDeviceCount(&device_count)); - if (device_count > 0) { - hipDevice_t device; - ROCM_CHECK_ERRORS(symbols.hipDeviceGet(&device, /*ordinal=*/0)); - } - - iree_hal_rocm_dynamic_symbols_deinitialize(&symbols); -} - -} // namespace -} // namespace rocm -} // namespace hal -} // namespace iree diff --git a/experimental/rocm/event_semaphore.c b/experimental/rocm/event_semaphore.c deleted file mode 100644 index 5893c8ce979e..000000000000 --- a/experimental/rocm/event_semaphore.c +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/event_semaphore.h" - -#include - -#include "iree/base/api.h" -#include "iree/hal/utils/semaphore_base.h" - -typedef struct iree_hal_rocm_semaphore_t { - iree_hal_semaphore_t base; - iree_hal_rocm_context_wrapper_t* context; - iree_atomic_int64_t value; -} iree_hal_rocm_semaphore_t; - -static const iree_hal_semaphore_vtable_t iree_hal_rocm_semaphore_vtable; - -static iree_hal_rocm_semaphore_t* iree_hal_rocm_semaphore_cast( - iree_hal_semaphore_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_semaphore_vtable); - return (iree_hal_rocm_semaphore_t*)base_value; -} - -iree_status_t iree_hal_rocm_semaphore_create( - iree_hal_rocm_context_wrapper_t* context, uint64_t initial_value, - iree_hal_semaphore_t** out_semaphore) { - IREE_ASSERT_ARGUMENT(context); - IREE_ASSERT_ARGUMENT(out_semaphore); - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_semaphore_t* semaphore = NULL; - iree_status_t status = iree_allocator_malloc( - context->host_allocator, sizeof(*semaphore), (void**)&semaphore); - if (iree_status_is_ok(status)) { - iree_hal_semaphore_initialize(&iree_hal_rocm_semaphore_vtable, - &semaphore->base); - semaphore->context = context; - *out_semaphore = &semaphore->base; - } - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void iree_hal_rocm_semaphore_destroy( - iree_hal_semaphore_t* base_semaphore) { - iree_hal_rocm_semaphore_t* semaphore = - iree_hal_rocm_semaphore_cast(base_semaphore); - iree_allocator_t host_allocator = semaphore->context->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_semaphore_deinitialize(&semaphore->base); - iree_allocator_free(host_allocator, semaphore); - - IREE_TRACE_ZONE_END(z0); -} - -static iree_status_t iree_hal_rocm_semaphore_query( - iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) { - iree_hal_rocm_semaphore_t* semaphore = - iree_hal_rocm_semaphore_cast(base_semaphore); - // TODO: Support semaphores completely. - *out_value = - iree_atomic_load_int64(&semaphore->value, iree_memory_order_acquire); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_semaphore_signal( - iree_hal_semaphore_t* base_semaphore, uint64_t new_value) { - iree_hal_rocm_semaphore_t* semaphore = - iree_hal_rocm_semaphore_cast(base_semaphore); - // TODO: Support semaphores completely. Return OK currently as everything is - // synchronized for each submit to allow things to run. - iree_hal_semaphore_poll(&semaphore->base); - return iree_ok_status(); -} - -static void iree_hal_rocm_semaphore_fail(iree_hal_semaphore_t* base_semaphore, - iree_status_t status) { - iree_hal_rocm_semaphore_t* semaphore = - iree_hal_rocm_semaphore_cast(base_semaphore); - // TODO: save status and mark timepoint as failed. - iree_status_ignore(status); - iree_hal_semaphore_poll(&semaphore->base); -} - -static iree_status_t iree_hal_rocm_semaphore_wait( - iree_hal_semaphore_t* base_semaphore, uint64_t value, - iree_timeout_t timeout) { - iree_hal_rocm_semaphore_t* semaphore = - iree_hal_rocm_semaphore_cast(base_semaphore); - // TODO: Support semaphores completely. Return OK currently as everything is - // synchronized for each submit to allow things to run. - iree_hal_semaphore_poll(&semaphore->base); - return iree_ok_status(); -} - -static const iree_hal_semaphore_vtable_t iree_hal_rocm_semaphore_vtable = { - .destroy = iree_hal_rocm_semaphore_destroy, - .query = iree_hal_rocm_semaphore_query, - .signal = iree_hal_rocm_semaphore_signal, - .fail = iree_hal_rocm_semaphore_fail, - .wait = iree_hal_rocm_semaphore_wait, -}; diff --git a/experimental/rocm/event_semaphore.h b/experimental/rocm/event_semaphore.h deleted file mode 100644 index 9e79aa854d4e..000000000000 --- a/experimental/rocm/event_semaphore.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_SEMAPHORE_H_ -#define IREE_HAL_ROCM_SEMAPHORE_H_ - -#include - -#include "experimental/rocm/context_wrapper.h" -#include "experimental/rocm/status_util.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Create a rocm allocator. -iree_status_t iree_hal_rocm_semaphore_create( - iree_hal_rocm_context_wrapper_t* context, uint64_t initial_value, - iree_hal_semaphore_t** out_semaphore); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_SEMAPHORE_H_ diff --git a/experimental/rocm/native_executable.c b/experimental/rocm/native_executable.c deleted file mode 100644 index 35d33f5b6d80..000000000000 --- a/experimental/rocm/native_executable.c +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/native_executable.h" - -#include - -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/pipeline_layout.h" -#include "experimental/rocm/status_util.h" -#include "iree/base/api.h" - -// flatcc schemas: -#include "iree/base/internal/flatcc/parsing.h" -#include "iree/schemas/rocm_executable_def_reader.h" -#include "iree/schemas/rocm_executable_def_verifier.h" - -typedef struct iree_hal_rocm_entry_point_t { - iree_hal_rocm_kernel_params_t kernel_params; - iree_string_view_t name; - IREE_TRACE(iree_hal_rocm_FileLineLocDef_table_t source_location;) - IREE_TRACE(iree_hal_rocm_StageLocationDef_vec_t stage_locations;) -} iree_hal_rocm_entry_point_t; - -typedef struct iree_hal_rocm_native_executable_t { - iree_hal_resource_t resource; - iree_hal_rocm_context_wrapper_t* context; - iree_hal_pipeline_layout_t** pipeline_layouts; - iree_host_size_t entry_count; - hipModule_t module; - iree_host_size_t entry_point_count; - iree_hal_rocm_entry_point_t entry_points[]; -} iree_hal_rocm_native_executable_t; - -static const iree_hal_executable_vtable_t - iree_hal_rocm_native_executable_vtable; - -static iree_hal_rocm_native_executable_t* iree_hal_rocm_native_executable_cast( - iree_hal_executable_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_native_executable_vtable); - return (iree_hal_rocm_native_executable_t*)base_value; -} - -iree_status_t iree_hal_rocm_native_executable_create( - iree_hal_rocm_context_wrapper_t* context, - const iree_hal_executable_params_t* executable_params, - iree_hal_executable_t** out_executable) { - IREE_ASSERT_ARGUMENT(context); - IREE_ASSERT_ARGUMENT(executable_params); - IREE_ASSERT_ARGUMENT(out_executable); - *out_executable = NULL; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_native_executable_t* executable = NULL; - - // TODO: Verify the flat buffer. - iree_hal_rocm_ExecutableDef_table_t executable_def = - iree_hal_rocm_ExecutableDef_as_root( - executable_params->executable_data.data); - - // Create the kernel module. - flatbuffers_string_t hsaco_image = - iree_hal_rocm_ExecutableDef_hsaco_image_get(executable_def); - flatbuffers_string_vec_t entry_points_vec = - iree_hal_rocm_ExecutableDef_entry_points_get(executable_def); - iree_hal_rocm_BlockSizeDef_vec_t block_sizes_vec = - iree_hal_rocm_ExecutableDef_block_sizes_get(executable_def); - flatbuffers_uint32_vec_t shared_memory_sizes = - iree_hal_rocm_ExecutableDef_shared_memory_sizes_get(executable_def); - iree_host_size_t entry_count = flatbuffers_string_vec_len(entry_points_vec); - - iree_host_size_t total_size = - sizeof(*executable) + entry_count * sizeof(executable->entry_points[0]); - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, iree_allocator_malloc(context->host_allocator, total_size, - (void**)&executable)); - - iree_hal_resource_initialize(&iree_hal_rocm_native_executable_vtable, - &executable->resource); - - executable->context = context; - executable->entry_point_count = entry_count; - iree_status_t status = ROCM_RESULT_TO_STATUS( - context->syms, - hipModuleLoadDataEx(&executable->module, hsaco_image, 0, NULL, NULL), - "hipModuleLoadDataEx"); - if (!iree_status_is_ok(status)) { - status = iree_status_annotate( - status, - IREE_SV("mismatched target chip? missing/wrong bitcode directory?")); - } - - // Query allowed max shared memory. - int32_t max_shared_mem = 0; - if (iree_status_is_ok(status)) { - status = ROCM_RESULT_TO_STATUS( - context->syms, - hipDeviceGetAttribute(&max_shared_mem, - hipDeviceAttributeMaxSharedMemoryPerBlock, - context->rocm_device), - "hipDeviceGetAttribute"); - } - - if (iree_status_is_ok(status)) { - executable->entry_count = entry_count; - for (iree_host_size_t i = 0; i < entry_count; i++) { - if (iree_status_is_ok(status)) { - hipFunction_t function = NULL; - flatbuffers_string_t entry_name = - flatbuffers_string_vec_at(entry_points_vec, i); - status = ROCM_RESULT_TO_STATUS( - context->syms, - hipModuleGetFunction(&function, executable->module, entry_name), - "hipModuleGetFunction"); - if (!iree_status_is_ok(status)) break; - if (!function) { - status = iree_make_status(IREE_STATUS_NOT_FOUND, - "exported module function %s not found", - entry_name); - break; - } - if (shared_memory_sizes[i] > max_shared_mem) { - status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "function '%s' requested shared memory " - "size of %d larger than allowed size of %d", - entry_name, shared_memory_sizes[i], - max_shared_mem); - } else if (shared_memory_sizes[i] != 0) { - status = ROCM_RESULT_TO_STATUS( - context->syms, - hipFuncSetAttribute( - function, - (hipFuncAttribute) - HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, - shared_memory_sizes[i]), - "hipFuncSetAttribute"); - } - // Package required parameters for kernel launches for each entry point. - iree_hal_rocm_entry_point_t* entry_point_info = - &executable->entry_points[i]; - iree_hal_rocm_kernel_params_t* params = - &entry_point_info->kernel_params; - params->layout = executable_params->pipeline_layouts[i]; - iree_hal_pipeline_layout_retain(params->layout); - params->function = function; - params->block_size[0] = block_sizes_vec[i].x; - params->block_size[1] = block_sizes_vec[i].y; - params->block_size[2] = block_sizes_vec[i].z; - params->shared_memory_size = shared_memory_sizes[i]; - entry_point_info->name = iree_make_string_view( - entry_name, flatbuffers_string_len(entry_name)); - } - } - -#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION - if (iree_status_is_ok(status)) { - if (iree_hal_rocm_ExecutableDef_source_locations_is_present( - executable_def)) { - iree_hal_rocm_FileLineLocDef_vec_t source_locations_vec = - iree_hal_rocm_ExecutableDef_source_locations_get(executable_def); - for (iree_host_size_t i = 0; i < entry_count; ++i) { - executable->entry_points[i].source_location = - iree_hal_rocm_FileLineLocDef_vec_at(source_locations_vec, i); - } - } - if (iree_hal_rocm_ExecutableDef_stage_locations_is_present( - executable_def)) { - iree_hal_rocm_StageLocationsDef_vec_t stage_locations_vec = - iree_hal_rocm_ExecutableDef_stage_locations_get(executable_def); - for (iree_host_size_t i = 0; i < entry_count; ++i) { - iree_hal_rocm_StageLocationsDef_table_t stage_locations = - iree_hal_rocm_StageLocationsDef_vec_at(stage_locations_vec, i); - executable->entry_points[i].stage_locations = - iree_hal_rocm_StageLocationsDef_locations_get(stage_locations); - } - } - - // Publish any embedded source files to the tracing infrastructure. - if (iree_hal_rocm_ExecutableDef_source_files_is_present(executable_def)) { - iree_hal_rocm_SourceFileDef_vec_t source_files_vec = - iree_hal_rocm_ExecutableDef_source_files_get(executable_def); - for (iree_host_size_t i = 0; - i < iree_hal_rocm_SourceFileDef_vec_len(source_files_vec); ++i) { - iree_hal_rocm_SourceFileDef_table_t source_file = - iree_hal_rocm_SourceFileDef_vec_at(source_files_vec, i); - flatbuffers_string_t path = - iree_hal_rocm_SourceFileDef_path_get(source_file); - flatbuffers_uint8_vec_t content = - iree_hal_rocm_SourceFileDef_content_get(source_file); - IREE_TRACE_PUBLISH_SOURCE_FILE(path, flatbuffers_string_len(path), - content, - flatbuffers_uint8_vec_len(content)); - } - } - } -#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION - } - - if (iree_status_is_ok(status)) { - *out_executable = (iree_hal_executable_t*)executable; - } else { - if (executable) { - iree_hal_executable_destroy((iree_hal_executable_t*)executable); - } - } - - IREE_TRACE_ZONE_END(z0); - return status; -} - -hipFunction_t iree_hal_rocm_native_executable_for_entry_point( - iree_hal_executable_t* base_executable, int32_t entry_point) { - iree_hal_rocm_native_executable_t* executable = - iree_hal_rocm_native_executable_cast(base_executable); - return executable->entry_points[entry_point].kernel_params.function; -} - -static void iree_hal_rocm_native_executable_destroy( - iree_hal_executable_t* base_executable) { - iree_hal_rocm_native_executable_t* executable = - iree_hal_rocm_native_executable_cast(base_executable); - iree_allocator_t host_allocator = executable->context->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - if (executable->module) { - iree_status_t status = ROCM_RESULT_TO_STATUS( - executable->context->syms, hipModuleUnload(executable->module), - "hipModuleUnload"); - if (!iree_status_is_ok(status)) { - fprintf(stderr, "Failed unloading ROCm module: "); - iree_status_fprint(stderr, status); - iree_status_free(status); - } - } - - if (executable->pipeline_layouts) { - for (iree_host_size_t i = 0; i < executable->entry_count; ++i) { - if (executable->pipeline_layouts[i]) { - iree_hal_pipeline_layout_release(executable->pipeline_layouts[i]); - } - } - } - - iree_allocator_free(host_allocator, executable); - - IREE_TRACE_ZONE_END(z0); -} - -iree_status_t iree_hal_rocm_native_executable_entry_point_kernel_params( - iree_hal_executable_t* base_executable, int32_t entry_point, - iree_hal_rocm_kernel_params_t* out_params) { - iree_hal_rocm_native_executable_t* executable = - iree_hal_rocm_native_executable_cast(base_executable); - if (entry_point >= executable->entry_count) { - return iree_make_status(IREE_STATUS_OUT_OF_RANGE, - "invalid entry point ordinal %d", entry_point); - } - memcpy(out_params, &executable->entry_points[entry_point], - sizeof(*out_params)); - return iree_ok_status(); -} - -void iree_hal_rocm_native_executable_entry_point_source_location( - iree_hal_executable_t* base_executable, iree_host_size_t entry_ordinal, - iree_hal_rocm_source_location_t* out_source_location) { - iree_hal_rocm_native_executable_t* executable = - iree_hal_rocm_native_executable_cast(base_executable); - memset(out_source_location, 0, sizeof(*out_source_location)); - if (entry_ordinal >= executable->entry_point_count) { - return; - } - const iree_hal_rocm_entry_point_t* entry_point = - &executable->entry_points[entry_ordinal]; - - out_source_location->func_name = entry_point->name; - -#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION - iree_hal_rocm_FileLineLocDef_table_t source_location = - entry_point->source_location; - if (entry_point->stage_locations) { - for (size_t i = 0; i < iree_hal_rocm_StageLocationDef_vec_len( - entry_point->stage_locations); - ++i) { - iree_hal_rocm_StageLocationDef_table_t stage_location = - iree_hal_rocm_StageLocationDef_vec_at(entry_point->stage_locations, - i); - // TODO(benvanik): a way to select what location is chosen. For now we - // just pick the first one. - source_location = - iree_hal_rocm_StageLocationDef_location_get(stage_location); - break; - } - } - if (source_location) { - flatbuffers_string_t filename = - iree_hal_rocm_FileLineLocDef_filename_get(source_location); - out_source_location->file_name = - iree_make_string_view(filename, flatbuffers_string_len(filename)); - out_source_location->line = - iree_hal_rocm_FileLineLocDef_line_get(source_location); - } else { - out_source_location->file_name = out_source_location->func_name; - out_source_location->line = 0; - } -#else - out_source_location->file_name = out_source_location->func_name; - out_source_location->line = 0; -#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION -} - -static const iree_hal_executable_vtable_t - iree_hal_rocm_native_executable_vtable = { - .destroy = iree_hal_rocm_native_executable_destroy, -}; diff --git a/experimental/rocm/native_executable.h b/experimental/rocm/native_executable.h deleted file mode 100644 index 997d38560e6f..000000000000 --- a/experimental/rocm/native_executable.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_NATIVE_EXECUTABLE_H_ -#define IREE_HAL_ROCM_NATIVE_EXECUTABLE_H_ - -#include - -#include "experimental/rocm/context_wrapper.h" -#include "experimental/rocm/rocm_headers.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -typedef struct iree_hal_rocm_source_location_t { - iree_string_view_t file_name; - int line; - iree_string_view_t func_name; -} iree_hal_rocm_source_location_t; - -typedef struct iree_hal_rocm_kernel_params_t { - iree_hal_pipeline_layout_t* layout; - hipFunction_t function; - uint32_t block_size[3]; - uint32_t shared_memory_size; -} iree_hal_rocm_kernel_params_t; - -// Creates an executable from a HSACO module. The module may contain several -// kernels that can be extracted along with the associated block size. -iree_status_t iree_hal_rocm_native_executable_create( - iree_hal_rocm_context_wrapper_t* context, - const iree_hal_executable_params_t* executable_params, - iree_hal_executable_t** out_executable); - -// Returns the kernel launch parameters for the given |entry_point|. -iree_status_t iree_hal_rocm_native_executable_entry_point_kernel_params( - iree_hal_executable_t* executable, int32_t entry_point, - iree_hal_rocm_kernel_params_t* out_params); - -hipFunction_t iree_hal_rocm_native_executable_for_entry_point( - iree_hal_executable_t* executable, int32_t entry_point); - -// Returns the source location for the given entry point. May be empty if not -// available. -void iree_hal_rocm_native_executable_entry_point_source_location( - iree_hal_executable_t* base_executable, iree_host_size_t entry_ordinal, - iree_hal_rocm_source_location_t* out_source_location); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_NATIVE_EXECUTABLE_H_ diff --git a/experimental/rocm/nop_executable_cache.c b/experimental/rocm/nop_executable_cache.c deleted file mode 100644 index 011a4d797e00..000000000000 --- a/experimental/rocm/nop_executable_cache.c +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/nop_executable_cache.h" - -#include -#include - -#include "experimental/rocm/native_executable.h" -#include "iree/base/api.h" - -typedef struct iree_hal_rocm_nop_executable_cache_t { - iree_hal_resource_t resource; - iree_hal_rocm_context_wrapper_t* context; -} iree_hal_rocm_nop_executable_cache_t; - -static const iree_hal_executable_cache_vtable_t - iree_hal_rocm_nop_executable_cache_vtable; - -static iree_hal_rocm_nop_executable_cache_t* -iree_hal_rocm_nop_executable_cache_cast( - iree_hal_executable_cache_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_nop_executable_cache_vtable); - return (iree_hal_rocm_nop_executable_cache_t*)base_value; -} - -iree_status_t iree_hal_rocm_nop_executable_cache_create( - iree_hal_rocm_context_wrapper_t* context, iree_string_view_t identifier, - iree_hal_executable_cache_t** out_executable_cache) { - IREE_ASSERT_ARGUMENT(out_executable_cache); - *out_executable_cache = NULL; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_nop_executable_cache_t* executable_cache = NULL; - iree_status_t status = - iree_allocator_malloc(context->host_allocator, sizeof(*executable_cache), - (void**)&executable_cache); - if (iree_status_is_ok(status)) { - iree_hal_resource_initialize(&iree_hal_rocm_nop_executable_cache_vtable, - &executable_cache->resource); - executable_cache->context = context; - - *out_executable_cache = (iree_hal_executable_cache_t*)executable_cache; - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void iree_hal_rocm_nop_executable_cache_destroy( - iree_hal_executable_cache_t* base_executable_cache) { - iree_hal_rocm_nop_executable_cache_t* executable_cache = - iree_hal_rocm_nop_executable_cache_cast(base_executable_cache); - iree_allocator_t host_allocator = executable_cache->context->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_free(host_allocator, executable_cache); - - IREE_TRACE_ZONE_END(z0); -} - -static bool iree_hal_rocm_nop_executable_cache_can_prepare_format( - iree_hal_executable_cache_t* base_executable_cache, - iree_hal_executable_caching_mode_t caching_mode, - iree_string_view_t executable_format) { - return iree_string_view_equal(executable_format, - iree_make_cstring_view("PTXE")); -} - -static iree_status_t iree_hal_rocm_nop_executable_cache_prepare_executable( - iree_hal_executable_cache_t* base_executable_cache, - const iree_hal_executable_params_t* executable_params, - iree_hal_executable_t** out_executable) { - iree_hal_rocm_nop_executable_cache_t* executable_cache = - iree_hal_rocm_nop_executable_cache_cast(base_executable_cache); - return iree_hal_rocm_native_executable_create( - executable_cache->context, executable_params, out_executable); -} - -static const iree_hal_executable_cache_vtable_t - iree_hal_rocm_nop_executable_cache_vtable = { - .destroy = iree_hal_rocm_nop_executable_cache_destroy, - .can_prepare_format = - iree_hal_rocm_nop_executable_cache_can_prepare_format, - .prepare_executable = - iree_hal_rocm_nop_executable_cache_prepare_executable, -}; diff --git a/experimental/rocm/nop_executable_cache.h b/experimental/rocm/nop_executable_cache.h deleted file mode 100644 index a057466712d4..000000000000 --- a/experimental/rocm/nop_executable_cache.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_NOP_EXECUTABLE_CACHE_H_ -#define IREE_HAL_ROCM_NOP_EXECUTABLE_CACHE_H_ - -#include "experimental/rocm/context_wrapper.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Creates a no-op executable cache that does not cache at all. -// This is useful to isolate pipeline caching behavior and verify compilation -// behavior. -iree_status_t iree_hal_rocm_nop_executable_cache_create( - iree_hal_rocm_context_wrapper_t* context, iree_string_view_t identifier, - iree_hal_executable_cache_t** out_executable_cache); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_NOP_EXECUTABLE_CACHE_H_ diff --git a/experimental/rocm/pipeline_layout.c b/experimental/rocm/pipeline_layout.c deleted file mode 100644 index 0a27c5b71835..000000000000 --- a/experimental/rocm/pipeline_layout.c +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/pipeline_layout.h" - -#include - -#include "iree/base/api.h" - -//===----------------------------------------------------------------------===// -// iree_hal_rocm_descriptor_set_layout_t -//===----------------------------------------------------------------------===// - -typedef struct iree_hal_rocm_descriptor_set_layout_t { - iree_hal_resource_t resource; - iree_hal_rocm_context_wrapper_t* context; - iree_host_size_t binding_count; -} iree_hal_rocm_descriptor_set_layout_t; - -static const iree_hal_descriptor_set_layout_vtable_t - iree_hal_rocm_descriptor_set_layout_vtable; - -static iree_hal_rocm_descriptor_set_layout_t* -iree_hal_rocm_descriptor_set_layout_cast( - iree_hal_descriptor_set_layout_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_descriptor_set_layout_vtable); - return (iree_hal_rocm_descriptor_set_layout_t*)base_value; -} - -iree_status_t iree_hal_rocm_descriptor_set_layout_create( - iree_hal_rocm_context_wrapper_t* context, - iree_hal_descriptor_set_layout_flags_t flags, - iree_host_size_t binding_count, - const iree_hal_descriptor_set_layout_binding_t* bindings, - iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) { - IREE_ASSERT_ARGUMENT(context); - IREE_ASSERT_ARGUMENT(!binding_count || bindings); - IREE_ASSERT_ARGUMENT(out_descriptor_set_layout); - *out_descriptor_set_layout = NULL; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_descriptor_set_layout_t* descriptor_set_layout = NULL; - iree_status_t status = iree_allocator_malloc(context->host_allocator, - sizeof(*descriptor_set_layout), - (void**)&descriptor_set_layout); - if (iree_status_is_ok(status)) { - iree_hal_resource_initialize(&iree_hal_rocm_descriptor_set_layout_vtable, - &descriptor_set_layout->resource); - descriptor_set_layout->context = context; - descriptor_set_layout->binding_count = binding_count; - *out_descriptor_set_layout = - (iree_hal_descriptor_set_layout_t*)descriptor_set_layout; - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -iree_host_size_t iree_hal_rocm_descriptor_set_layout_binding_count( - iree_hal_descriptor_set_layout_t* base_descriptor_set_layout) { - iree_hal_rocm_descriptor_set_layout_t* descriptor_set_layout = - iree_hal_rocm_descriptor_set_layout_cast(base_descriptor_set_layout); - return descriptor_set_layout->binding_count; -} - -static void iree_hal_rocm_descriptor_set_layout_destroy( - iree_hal_descriptor_set_layout_t* base_descriptor_set_layout) { - iree_hal_rocm_descriptor_set_layout_t* descriptor_set_layout = - iree_hal_rocm_descriptor_set_layout_cast(base_descriptor_set_layout); - iree_allocator_t host_allocator = - descriptor_set_layout->context->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_free(host_allocator, descriptor_set_layout); - - IREE_TRACE_ZONE_END(z0); -} - -static const iree_hal_descriptor_set_layout_vtable_t - iree_hal_rocm_descriptor_set_layout_vtable = { - .destroy = iree_hal_rocm_descriptor_set_layout_destroy, -}; - -//===----------------------------------------------------------------------===// -// iree_hal_rocm_pipeline_layout_t -//===----------------------------------------------------------------------===// - -typedef struct iree_hal_rocm_pipeline_layout_t { - iree_hal_resource_t resource; - iree_hal_rocm_context_wrapper_t* context; - iree_host_size_t push_constant_base_index; - iree_host_size_t push_constant_count; - iree_host_size_t set_layout_count; - iree_hal_descriptor_set_layout_t* set_layouts[]; -} iree_hal_rocm_pipeline_layout_t; - -static const iree_hal_pipeline_layout_vtable_t - iree_hal_rocm_pipeline_layout_vtable; - -static iree_hal_rocm_pipeline_layout_t* iree_hal_rocm_pipeline_layout_cast( - iree_hal_pipeline_layout_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_pipeline_layout_vtable); - return (iree_hal_rocm_pipeline_layout_t*)base_value; -} - -static void iree_hal_rocm_pipeline_layout_destroy( - iree_hal_pipeline_layout_t* base_pipeline_layout) { - iree_hal_rocm_pipeline_layout_t* pipeline_layout = - iree_hal_rocm_pipeline_layout_cast(base_pipeline_layout); - iree_allocator_t host_allocator = pipeline_layout->context->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - for (iree_host_size_t i = 0; i < pipeline_layout->set_layout_count; ++i) { - iree_hal_descriptor_set_layout_release(pipeline_layout->set_layouts[i]); - } - iree_allocator_free(host_allocator, pipeline_layout); - - IREE_TRACE_ZONE_END(z0); -} - -iree_status_t iree_hal_rocm_pipeline_layout_create( - iree_hal_rocm_context_wrapper_t* context, iree_host_size_t set_layout_count, - iree_hal_descriptor_set_layout_t* const* set_layouts, - iree_host_size_t push_constant_count, - iree_hal_pipeline_layout_t** out_pipeline_layout) { - IREE_ASSERT_ARGUMENT(context); - IREE_ASSERT_ARGUMENT(!set_layout_count || set_layouts); - IREE_ASSERT_ARGUMENT(out_pipeline_layout); - *out_pipeline_layout = NULL; - IREE_TRACE_ZONE_BEGIN(z0); - - if (push_constant_count > IREE_HAL_ROCM_MAX_PUSH_CONSTANT_COUNT) { - return iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, - "push constant count %" PRIhsz " over the limit of %d", - push_constant_count, IREE_HAL_ROCM_MAX_PUSH_CONSTANT_COUNT); - } - - // Currently the pipeline layout doesn't do anything. - // TODO: Handle creating the argument layout at that time hadling both push - // constant and buffers. - iree_hal_rocm_pipeline_layout_t* pipeline_layout = NULL; - iree_host_size_t total_size = - sizeof(*pipeline_layout) + - set_layout_count * sizeof(*pipeline_layout->set_layouts); - iree_status_t status = iree_allocator_malloc( - context->host_allocator, total_size, (void**)&pipeline_layout); - if (iree_status_is_ok(status)) { - iree_hal_resource_initialize(&iree_hal_rocm_pipeline_layout_vtable, - &pipeline_layout->resource); - pipeline_layout->context = context; - pipeline_layout->set_layout_count = set_layout_count; - iree_host_size_t binding_number = 0; - for (iree_host_size_t i = 0; i < set_layout_count; ++i) { - pipeline_layout->set_layouts[i] = set_layouts[i]; - iree_hal_descriptor_set_layout_retain(set_layouts[i]); - binding_number += - iree_hal_rocm_descriptor_set_layout_binding_count(set_layouts[i]); - } - pipeline_layout->push_constant_base_index = binding_number; - pipeline_layout->push_constant_count = push_constant_count; - *out_pipeline_layout = (iree_hal_pipeline_layout_t*)pipeline_layout; - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -iree_host_size_t iree_hal_rocm_base_binding_index( - iree_hal_pipeline_layout_t* base_pipeline_layout, uint32_t set) { - iree_hal_rocm_pipeline_layout_t* pipeline_layout = - iree_hal_rocm_pipeline_layout_cast(base_pipeline_layout); - iree_host_size_t base_binding = 0; - for (iree_host_size_t i = 0; i < set; ++i) { - iree_host_size_t binding_count = - iree_hal_rocm_descriptor_set_layout_binding_count( - pipeline_layout->set_layouts[i]); - base_binding += binding_count; - } - return base_binding; -} - -iree_host_size_t iree_hal_rocm_push_constant_index( - iree_hal_pipeline_layout_t* base_pipeline_layout) { - iree_hal_rocm_pipeline_layout_t* pipeline_layout = - iree_hal_rocm_pipeline_layout_cast(base_pipeline_layout); - return pipeline_layout->push_constant_base_index; -} - -iree_host_size_t iree_hal_rocm_pipeline_layout_num_constants( - iree_hal_pipeline_layout_t* base_pipeline_layout) { - iree_hal_rocm_pipeline_layout_t* pipeline_layout = - iree_hal_rocm_pipeline_layout_cast(base_pipeline_layout); - return pipeline_layout->push_constant_count; -} - -static const iree_hal_pipeline_layout_vtable_t - iree_hal_rocm_pipeline_layout_vtable = { - .destroy = iree_hal_rocm_pipeline_layout_destroy, -}; diff --git a/experimental/rocm/pipeline_layout.h b/experimental/rocm/pipeline_layout.h deleted file mode 100644 index 1a81ce380d80..000000000000 --- a/experimental/rocm/pipeline_layout.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_PIPELINE_LAYOUT_H_ -#define IREE_HAL_ROCM_PIPELINE_LAYOUT_H_ - -#include "experimental/rocm/context_wrapper.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#define IREE_HAL_ROCM_MAX_PUSH_CONSTANT_COUNT 64 - -//===----------------------------------------------------------------------===// -// iree_hal_rocm_descriptor_set_layout_t -//===----------------------------------------------------------------------===// - -iree_status_t iree_hal_rocm_descriptor_set_layout_create( - iree_hal_rocm_context_wrapper_t* context, - iree_hal_descriptor_set_layout_flags_t flags, - iree_host_size_t binding_count, - const iree_hal_descriptor_set_layout_binding_t* bindings, - iree_hal_descriptor_set_layout_t** out_descriptor_set_layout); - -// Return the binding count for the given descriptor set layout. -iree_host_size_t iree_hal_rocm_descriptor_set_layout_binding_count( - iree_hal_descriptor_set_layout_t* descriptor_set_layout); - -//===----------------------------------------------------------------------===// -// iree_hal_rocm_pipeline_layout_t -//===----------------------------------------------------------------------===// - -// Creates the kernel arguments. -iree_status_t iree_hal_rocm_pipeline_layout_create( - iree_hal_rocm_context_wrapper_t* context, iree_host_size_t set_layout_count, - iree_hal_descriptor_set_layout_t* const* set_layouts, - iree_host_size_t push_constant_count, - iree_hal_pipeline_layout_t** out_pipeline_layout); - -// Return the base binding index for the given set. -iree_host_size_t iree_hal_rocm_base_binding_index( - iree_hal_pipeline_layout_t* pipeline_layout, uint32_t set); - -// Return the base index for push constant data. -iree_host_size_t iree_hal_rocm_push_constant_index( - iree_hal_pipeline_layout_t* base_pipeline_layout); - -// Return the number of constants in the pipeline layout. -iree_host_size_t iree_hal_rocm_pipeline_layout_num_constants( - iree_hal_pipeline_layout_t* base_pipeline_layout); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_PIPELINE_LAYOUT_H_ diff --git a/experimental/rocm/registration/CMakeLists.txt b/experimental/rocm/registration/CMakeLists.txt deleted file mode 100644 index 7db7b789ec22..000000000000 --- a/experimental/rocm/registration/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_cc_library( - NAME - registration - HDRS - "driver_module.h" - SRCS - "driver_module.c" - DEPS - iree::base - iree::experimental::rocm - iree::hal - DEFINES - "IREE_HAVE_HAL_EXPERIMENTAL_ROCM_DRIVER_MODULE=1" - PUBLIC -) diff --git a/experimental/rocm/registration/driver_module.c b/experimental/rocm/registration/driver_module.c deleted file mode 100644 index fcdadfe3c112..000000000000 --- a/experimental/rocm/registration/driver_module.c +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/registration/driver_module.h" - -#include -#include - -#include "experimental/rocm/api.h" -#include "iree/base/api.h" - -static iree_status_t iree_hal_rocm_driver_factory_enumerate( - void *self, iree_host_size_t *out_driver_info_count, - const iree_hal_driver_info_t **out_driver_infos) { - // NOTE: we could query supported ROCM versions or featuresets here. - static const iree_hal_driver_info_t driver_infos[1] = {{ - .driver_name = iree_string_view_literal("rocm"), - .full_name = iree_string_view_literal("ROCM (dynamic)"), - }}; - *out_driver_info_count = IREE_ARRAYSIZE(driver_infos); - *out_driver_infos = driver_infos; - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_driver_factory_try_create( - void *self, iree_string_view_t driver_name, iree_allocator_t host_allocator, - iree_hal_driver_t **out_driver) { - IREE_ASSERT_ARGUMENT(out_driver); - *out_driver = NULL; - if (!iree_string_view_equal(driver_name, IREE_SV("rocm"))) { - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "no driver '%.*s' is provided by this factory", - (int)driver_name.size, driver_name.data); - } - IREE_TRACE_ZONE_BEGIN(z0); - iree_hal_rocm_driver_options_t driver_options; - iree_hal_rocm_driver_options_initialize(&driver_options); - iree_status_t status = iree_hal_rocm_driver_create( - driver_name, &driver_options, host_allocator, out_driver); - IREE_TRACE_ZONE_END(z0); - return status; -} - -IREE_API_EXPORT iree_status_t -iree_hal_rocm_driver_module_register(iree_hal_driver_registry_t *registry) { - static const iree_hal_driver_factory_t factory = { - .self = NULL, - .enumerate = iree_hal_rocm_driver_factory_enumerate, - .try_create = iree_hal_rocm_driver_factory_try_create, - }; - return iree_hal_driver_registry_register_factory(registry, &factory); -} diff --git a/experimental/rocm/registration/driver_module.h b/experimental/rocm/registration/driver_module.h deleted file mode 100644 index efc1a705d3b2..000000000000 --- a/experimental/rocm/registration/driver_module.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_REGISTRATION_DRIVER_MODULE_H_ -#define IREE_HAL_ROCM_REGISTRATION_DRIVER_MODULE_H_ - -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -IREE_API_EXPORT iree_status_t -iree_hal_rocm_driver_module_register(iree_hal_driver_registry_t *registry); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_REGISTRATION_DRIVER_MODULE_H_ diff --git a/experimental/rocm/rocm_allocator.c b/experimental/rocm/rocm_allocator.c deleted file mode 100644 index 37648320989f..000000000000 --- a/experimental/rocm/rocm_allocator.c +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/rocm_allocator.h" - -#include - -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/rocm_buffer.h" -#include "experimental/rocm/status_util.h" -#include "iree/base/api.h" - -#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING -static const char* IREE_HAL_ROCM_ALLOCATOR_ID = "ROCm"; -#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING - -typedef struct iree_hal_rocm_allocator_t { - iree_hal_resource_t resource; - iree_hal_device_t* base_device; - iree_hal_rocm_context_wrapper_t* context; - - bool supports_concurrent_managed_access; - - IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;) -} iree_hal_rocm_allocator_t; - -static const iree_hal_allocator_vtable_t iree_hal_rocm_allocator_vtable; - -static iree_hal_rocm_allocator_t* iree_hal_rocm_allocator_cast( - iree_hal_allocator_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_allocator_vtable); - return (iree_hal_rocm_allocator_t*)base_value; -} - -iree_status_t iree_hal_rocm_allocator_create( - iree_hal_rocm_context_wrapper_t* context, - iree_hal_allocator_t** out_allocator) { - IREE_ASSERT_ARGUMENT(context); - IREE_TRACE_ZONE_BEGIN(z0); - - // To support device-local + host-visible memory we need concurrent managed - // access indicating that the host and devices can concurrently access the - // device memory. If we don't have this feature then we fall back to forcing - // all device-local + host-visible memory into host-local + device-visible - // page-locked memory. The compiler tries to avoid this for high-traffic - // buffers except for readback staging buffers. - int supports_concurrent_managed_access = 0; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, ROCM_RESULT_TO_STATUS( - context->syms, - hipDeviceGetAttribute(&supports_concurrent_managed_access, - hipDeviceAttributeConcurrentManagedAccess, - context->rocm_device), - "hipDeviceGetAttribute")); - - IREE_TRACE_ZONE_APPEND_TEXT( - z0, supports_concurrent_managed_access - ? "has CONCURRENT_MANAGED_ACCESS" - : "no CONCURRENT_MANAGED_ACCESS (expect slow accesses on " - "device-local + host-visible memory)"); - - iree_hal_rocm_allocator_t* allocator = NULL; - iree_status_t status = iree_allocator_malloc( - context->host_allocator, sizeof(*allocator), (void**)&allocator); - if (iree_status_is_ok(status)) { - iree_hal_resource_initialize(&iree_hal_rocm_allocator_vtable, - &allocator->resource); - allocator->context = context; - allocator->supports_concurrent_managed_access = - supports_concurrent_managed_access != 0; - *out_allocator = (iree_hal_allocator_t*)allocator; - } - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void iree_hal_rocm_allocator_destroy( - iree_hal_allocator_t* IREE_RESTRICT base_allocator) { - iree_hal_rocm_allocator_t* allocator = - iree_hal_rocm_allocator_cast(base_allocator); - iree_allocator_t host_allocator = allocator->context->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_free(host_allocator, allocator); - - IREE_TRACE_ZONE_END(z0); -} - -static iree_allocator_t iree_hal_rocm_allocator_host_allocator( - const iree_hal_allocator_t* IREE_RESTRICT base_allocator) { - iree_hal_rocm_allocator_t* allocator = - (iree_hal_rocm_allocator_t*)base_allocator; - return allocator->context->host_allocator; -} - -static iree_status_t iree_hal_rocm_allocator_trim( - iree_hal_allocator_t* IREE_RESTRICT base_allocator) { - return iree_ok_status(); -} - -static void iree_hal_rocm_allocator_query_statistics( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) { - IREE_STATISTICS({ - iree_hal_rocm_allocator_t* allocator = - iree_hal_rocm_allocator_cast(base_allocator); - memcpy(out_statistics, &allocator->statistics, sizeof(*out_statistics)); - }); -} - -static iree_status_t iree_hal_rocm_allocator_query_memory_heaps( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - iree_host_size_t capacity, - iree_hal_allocator_memory_heap_t* IREE_RESTRICT heaps, - iree_host_size_t* IREE_RESTRICT out_count) { - const iree_host_size_t count = 3; - if (out_count) *out_count = count; - if (capacity < count) { - // NOTE: lightweight as this is hit in normal pre-sizing usage. - return iree_status_from_code(IREE_STATUS_OUT_OF_RANGE); - } - - // NOTE: this is all a guess - someone who is familiar with rocm will want - // to refine this further. - - // Don't think there's a query for these. - // Max allocation size may be much smaller in certain memory types such as - // page-locked memory and it'd be good to enforce that. - const iree_device_size_t max_allocation_size = ~(iree_device_size_t)0; - const iree_device_size_t min_alignment = 64; - - // Device-local memory (dispatch resources): - heaps[0] = (iree_hal_allocator_memory_heap_t){ - .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL, - .allowed_usage = - IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_DISPATCH, - .max_allocation_size = max_allocation_size, - .min_alignment = min_alignment, - }; - - // Write-combined page-locked host-local memory (upload): - heaps[1] = (iree_hal_allocator_memory_heap_t){ - .type = - IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_COHERENT, - .allowed_usage = - IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING, - .max_allocation_size = max_allocation_size, - .min_alignment = min_alignment, - }; - - // Cached page-locked host-local memory (download): - heaps[2] = (iree_hal_allocator_memory_heap_t){ - .type = IREE_HAL_MEMORY_TYPE_HOST_LOCAL | - IREE_HAL_MEMORY_TYPE_HOST_COHERENT | - IREE_HAL_MEMORY_TYPE_HOST_CACHED, - .allowed_usage = - IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING, - .max_allocation_size = max_allocation_size, - .min_alignment = min_alignment, - }; - - return iree_ok_status(); -} - -static iree_hal_buffer_compatibility_t -iree_hal_rocm_allocator_query_buffer_compatibility( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - iree_hal_buffer_params_t* IREE_RESTRICT params, - iree_device_size_t* IREE_RESTRICT allocation_size) { - iree_hal_rocm_allocator_t* allocator = - iree_hal_rocm_allocator_cast(base_allocator); - - // All buffers can be allocated on the heap. - iree_hal_buffer_compatibility_t compatibility = - IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE; - - if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) { - compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER; - } - - // Buffers can only be used on the queue if they are device visible. - if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) { - if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) { - compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER; - } - if (iree_any_bit_set(params->usage, - IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) { - compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH; - } - } - - if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | - IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) { - compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_LOW_PERFORMANCE; - // If concurrent managed access is not supported then make device-local + - // host-visible allocations fall back to host-local + device-visible - // page-locked memory. This will be significantly slower for the device to - // access but the compiler only uses this type for readback staging buffers - // and it's better to function than function fast. - if (!allocator->supports_concurrent_managed_access) { - params->type &= ~(IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | - IREE_HAL_MEMORY_TYPE_HOST_VISIBLE); - params->type |= - IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE; - } - } - - // We are now optimal. - params->type &= ~IREE_HAL_MEMORY_TYPE_OPTIMAL; - - // Guard against the corner case where the requested buffer size is 0. The - // application is unlikely to do anything when requesting a 0-byte buffer; but - // it can happen in real world use cases. So we should at least not crash. - if (*allocation_size == 0) *allocation_size = 4; - - return compatibility; -} - -static void iree_hal_rocm_buffer_free(iree_hal_rocm_context_wrapper_t* context, - iree_hal_memory_type_t memory_type, - hipDeviceptr_t device_ptr, - void* host_ptr) { - if (iree_all_bits_set(memory_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) { - // Device local. - ROCM_IGNORE_ERROR(context->syms, hipFree(device_ptr)); - } else { - // Host local. - ROCM_IGNORE_ERROR(context->syms, hipHostFree(host_ptr)); - } -} - -static iree_status_t iree_hal_rocm_allocator_allocate_buffer( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - const iree_hal_buffer_params_t* IREE_RESTRICT params, - iree_device_size_t allocation_size, - iree_hal_buffer_t** IREE_RESTRICT out_buffer) { - iree_hal_rocm_allocator_t* allocator = - iree_hal_rocm_allocator_cast(base_allocator); - // Coerce options into those required by the current device. - iree_hal_buffer_params_t compat_params = *params; - if (!iree_all_bits_set(iree_hal_rocm_allocator_query_buffer_compatibility( - base_allocator, &compat_params, &allocation_size), - IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE)) { - return iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, - "allocator cannot allocate a buffer with the given parameters"); - } - - iree_status_t status = iree_ok_status(); - void* host_ptr = NULL; - hipDeviceptr_t device_ptr = 0; - if (iree_all_bits_set(compat_params.type, - IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) { - // Device local case. - if (iree_all_bits_set(compat_params.type, - IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) { - status = ROCM_RESULT_TO_STATUS( - allocator->context->syms, - hipMallocManaged(&device_ptr, allocation_size, hipMemAttachGlobal)); - if (iree_status_is_ok(status) && - allocator->supports_concurrent_managed_access) { - // Prefetch the buffer on the GPU device. - status = ROCM_RESULT_TO_STATUS( - allocator->context->syms, - hipMemPrefetchAsync(device_ptr, allocation_size, - allocator->context->rocm_device, - allocator->context->rocm_stream)); - } - host_ptr = (void*)device_ptr; - } else { - // Device only. - status = ROCM_RESULT_TO_STATUS(allocator->context->syms, - hipMalloc(&device_ptr, allocation_size)); - } - } else { - unsigned int flags = hipHostMallocMapped; - if (!iree_all_bits_set(compat_params.type, - IREE_HAL_MEMORY_TYPE_HOST_CACHED)) { - flags |= hipHostMallocWriteCombined; - } - status = ROCM_RESULT_TO_STATUS( - allocator->context->syms, - hipMemAllocHost(&host_ptr, allocation_size, flags)); - if (iree_status_is_ok(status)) { - status = ROCM_RESULT_TO_STATUS( - allocator->context->syms, - hipHostGetDevicePointer(&device_ptr, host_ptr, /*flags=*/0)); - } - } - - iree_hal_buffer_t* buffer = NULL; - if (iree_status_is_ok(status)) { - status = iree_hal_rocm_buffer_wrap( - (iree_hal_allocator_t*)allocator, compat_params.type, - compat_params.access, compat_params.usage, allocation_size, - /*byte_offset=*/0, - /*byte_length=*/allocation_size, device_ptr, host_ptr, &buffer); - } - - if (iree_status_is_ok(status)) { - IREE_TRACE_ALLOC_NAMED(IREE_HAL_ROCM_ALLOCATOR_ID, - (void*)iree_hal_rocm_buffer_device_pointer(buffer), - allocation_size); - IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc( - &allocator->statistics, compat_params.type, allocation_size)); - *out_buffer = buffer; - } else { - if (!buffer) { - iree_hal_rocm_buffer_free(allocator->context, compat_params.type, - device_ptr, host_ptr); - } else { - iree_hal_buffer_release(buffer); - } - } - return status; -} - -static void iree_hal_rocm_allocator_deallocate_buffer( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - iree_hal_buffer_t* IREE_RESTRICT base_buffer) { - iree_hal_rocm_allocator_t* allocator = - iree_hal_rocm_allocator_cast(base_allocator); - - iree_hal_memory_type_t memory_type = iree_hal_buffer_memory_type(base_buffer); - iree_hal_rocm_buffer_free(allocator->context, memory_type, - iree_hal_rocm_buffer_device_pointer(base_buffer), - iree_hal_rocm_buffer_host_pointer(base_buffer)); - - IREE_TRACE_FREE_NAMED( - IREE_HAL_ROCM_ALLOCATOR_ID, - (void*)iree_hal_rocm_buffer_device_pointer(base_buffer)); - IREE_STATISTICS(iree_hal_allocator_statistics_record_free( - &allocator->statistics, memory_type, - iree_hal_buffer_allocation_size(base_buffer))); - - iree_hal_buffer_destroy(base_buffer); -} - -static iree_status_t iree_hal_rocm_allocator_import_buffer( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - const iree_hal_buffer_params_t* IREE_RESTRICT params, - iree_hal_external_buffer_t* IREE_RESTRICT external_buffer, - iree_hal_buffer_release_callback_t release_callback, - iree_hal_buffer_t** IREE_RESTRICT out_buffer) { - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "importing from external buffers not supported"); -} - -static iree_status_t iree_hal_rocm_allocator_export_buffer( - iree_hal_allocator_t* IREE_RESTRICT base_allocator, - iree_hal_buffer_t* IREE_RESTRICT buffer, - iree_hal_external_buffer_type_t requested_type, - iree_hal_external_buffer_flags_t requested_flags, - iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) { - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "exporting to external buffers not supported"); -} - -static const iree_hal_allocator_vtable_t iree_hal_rocm_allocator_vtable = { - .destroy = iree_hal_rocm_allocator_destroy, - .host_allocator = iree_hal_rocm_allocator_host_allocator, - .trim = iree_hal_rocm_allocator_trim, - .query_statistics = iree_hal_rocm_allocator_query_statistics, - .query_memory_heaps = iree_hal_rocm_allocator_query_memory_heaps, - .query_buffer_compatibility = - iree_hal_rocm_allocator_query_buffer_compatibility, - .allocate_buffer = iree_hal_rocm_allocator_allocate_buffer, - .deallocate_buffer = iree_hal_rocm_allocator_deallocate_buffer, - .import_buffer = iree_hal_rocm_allocator_import_buffer, - .export_buffer = iree_hal_rocm_allocator_export_buffer, -}; diff --git a/experimental/rocm/rocm_allocator.h b/experimental/rocm/rocm_allocator.h deleted file mode 100644 index a2a89eab2cdd..000000000000 --- a/experimental/rocm/rocm_allocator.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_ALLOCATOR_H_ -#define IREE_HAL_ROCM_ALLOCATOR_H_ - -#include "experimental/rocm/context_wrapper.h" -#include "experimental/rocm/status_util.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Create a ROCM allocator. -iree_status_t iree_hal_rocm_allocator_create( - iree_hal_rocm_context_wrapper_t* context, - iree_hal_allocator_t** out_allocator); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_ALLOCATOR_H_ diff --git a/experimental/rocm/rocm_buffer.c b/experimental/rocm/rocm_buffer.c deleted file mode 100644 index 7001977ced1b..000000000000 --- a/experimental/rocm/rocm_buffer.c +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/rocm_buffer.h" - -#include -#include -#include - -#include "iree/base/api.h" - -typedef struct iree_hal_rocm_buffer_t { - iree_hal_buffer_t base; - void* host_ptr; - hipDeviceptr_t device_ptr; -} iree_hal_rocm_buffer_t; - -static const iree_hal_buffer_vtable_t iree_hal_rocm_buffer_vtable; - -static iree_hal_rocm_buffer_t* iree_hal_rocm_buffer_cast( - iree_hal_buffer_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_buffer_vtable); - return (iree_hal_rocm_buffer_t*)base_value; -} - -iree_status_t iree_hal_rocm_buffer_wrap( - iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type, - iree_hal_memory_access_t allowed_access, - iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size, - iree_device_size_t byte_offset, iree_device_size_t byte_length, - hipDeviceptr_t device_ptr, void* host_ptr, iree_hal_buffer_t** out_buffer) { - IREE_ASSERT_ARGUMENT(allocator); - IREE_ASSERT_ARGUMENT(out_buffer); - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_t host_allocator = - iree_hal_allocator_host_allocator(allocator); - iree_hal_rocm_buffer_t* buffer = NULL; - iree_status_t status = - iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer); - if (iree_status_is_ok(status)) { - iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base, - allocation_size, byte_offset, byte_length, - memory_type, allowed_access, allowed_usage, - &iree_hal_rocm_buffer_vtable, &buffer->base); - buffer->host_ptr = host_ptr; - buffer->device_ptr = device_ptr; - *out_buffer = &buffer->base; - } - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void iree_hal_rocm_buffer_destroy(iree_hal_buffer_t* base_buffer) { - iree_hal_rocm_buffer_t* buffer = iree_hal_rocm_buffer_cast(base_buffer); - iree_allocator_t host_allocator = base_buffer->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - iree_allocator_free(host_allocator, buffer); - IREE_TRACE_ZONE_END(z0); -} - -static iree_status_t iree_hal_rocm_buffer_map_range( - iree_hal_buffer_t* base_buffer, iree_hal_mapping_mode_t mapping_mode, - iree_hal_memory_access_t memory_access, - iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length, - iree_hal_buffer_mapping_t* mapping) { - iree_hal_rocm_buffer_t* buffer = iree_hal_rocm_buffer_cast(base_buffer); - - // TODO(benvanik): add upload/download for unmapped buffers. - IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type( - iree_hal_buffer_memory_type(base_buffer), - IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)); - IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage( - iree_hal_buffer_allowed_usage(base_buffer), - mapping_mode == IREE_HAL_MAPPING_MODE_PERSISTENT - ? IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT - : IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED)); - - uint8_t* data_ptr = (uint8_t*)(buffer->host_ptr) + local_byte_offset; - // If we mapped for discard scribble over the bytes. This is not a mandated - // behavior but it will make debugging issues easier. Alternatively for - // heap buffers we could reallocate them such that ASAN yells, but that - // would only work if the entire buffer was discarded. -#ifndef NDEBUG - if (iree_any_bit_set(memory_access, IREE_HAL_MEMORY_ACCESS_DISCARD)) { - memset(data_ptr, 0xCD, local_byte_length); - } -#endif // !NDEBUG - - mapping->contents = iree_make_byte_span(data_ptr, local_byte_length); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_buffer_unmap_range( - iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset, - iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) { - // Nothing to do (today). - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_buffer_invalidate_range( - iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset, - iree_device_size_t local_byte_length) { - // Nothing to do. - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_buffer_flush_range( - iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset, - iree_device_size_t local_byte_length) { - // Nothing to do. - return iree_ok_status(); -} - -hipDeviceptr_t iree_hal_rocm_buffer_device_pointer( - iree_hal_buffer_t* base_buffer) { - iree_hal_rocm_buffer_t* buffer = iree_hal_rocm_buffer_cast(base_buffer); - return buffer->device_ptr; -} - -void* iree_hal_rocm_buffer_host_pointer(iree_hal_buffer_t* base_buffer) { - iree_hal_rocm_buffer_t* buffer = iree_hal_rocm_buffer_cast(base_buffer); - return buffer->host_ptr; -} - -static const iree_hal_buffer_vtable_t iree_hal_rocm_buffer_vtable = { - .recycle = iree_hal_buffer_recycle, - .destroy = iree_hal_rocm_buffer_destroy, - .map_range = iree_hal_rocm_buffer_map_range, - .unmap_range = iree_hal_rocm_buffer_unmap_range, - .invalidate_range = iree_hal_rocm_buffer_invalidate_range, - .flush_range = iree_hal_rocm_buffer_flush_range, -}; diff --git a/experimental/rocm/rocm_buffer.h b/experimental/rocm/rocm_buffer.h deleted file mode 100644 index c87be8004c34..000000000000 --- a/experimental/rocm/rocm_buffer.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_BUFFER_H_ -#define IREE_HAL_ROCM_BUFFER_H_ - -#include "experimental/rocm/rocm_headers.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Wraps a ROCm allocation in an iree_hal_buffer_t. -iree_status_t iree_hal_rocm_buffer_wrap( - iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type, - iree_hal_memory_access_t allowed_access, - iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size, - iree_device_size_t byte_offset, iree_device_size_t byte_length, - hipDeviceptr_t device_ptr, void* host_ptr, iree_hal_buffer_t** out_buffer); - -// Returns the ROCm base pointer for the given |buffer|. -// This is the entire allocated_buffer and must be offset by the buffer -// byte_offset and byte_length when used. -hipDeviceptr_t iree_hal_rocm_buffer_device_pointer(iree_hal_buffer_t* buffer); - -// Returns the ROCm host pointer for the given |buffer|, if available. -void* iree_hal_rocm_buffer_host_pointer(iree_hal_buffer_t* buffer); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_BUFFER_H_ diff --git a/experimental/rocm/rocm_device.c b/experimental/rocm/rocm_device.c deleted file mode 100644 index 697ab1fca473..000000000000 --- a/experimental/rocm/rocm_device.c +++ /dev/null @@ -1,466 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/rocm_device.h" - -#include -#include -#include - -#include "experimental/rocm/context_wrapper.h" -#include "experimental/rocm/direct_command_buffer.h" -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/event_semaphore.h" -#include "experimental/rocm/nop_executable_cache.h" -#include "experimental/rocm/pipeline_layout.h" -#include "experimental/rocm/rocm_allocator.h" -#include "experimental/rocm/rocm_event.h" -#include "experimental/rocm/status_util.h" -#include "experimental/rocm/tracing.h" -#include "iree/base/internal/arena.h" -#include "iree/hal/utils/file_transfer.h" -#include "iree/hal/utils/memory_file.h" - -//===----------------------------------------------------------------------===// -// iree_hal_rocm_device_t -//===----------------------------------------------------------------------===// - -typedef struct iree_hal_rocm_device_t { - iree_hal_resource_t resource; - iree_string_view_t identifier; - - // Block pool used for command buffers with a larger block size (as command - // buffers can contain inlined data uploads). - iree_arena_block_pool_t block_pool; - - // Optional driver that owns the ROCM symbols. We retain it for our lifetime - // to ensure the symbols remains valid. - iree_hal_driver_t* driver; - - hipDevice_t device; - - // TODO: support multiple streams. - hipStream_t stream; - iree_hal_rocm_tracing_context_t* tracing_context; - iree_hal_rocm_context_wrapper_t context_wrapper; - iree_hal_allocator_t* device_allocator; - - // Optional provider used for creating/configuring collective channels. - iree_hal_channel_provider_t* channel_provider; -} iree_hal_rocm_device_t; - -static const iree_hal_device_vtable_t iree_hal_rocm_device_vtable; - -static iree_hal_rocm_device_t* iree_hal_rocm_device_cast( - iree_hal_device_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_device_vtable); - return (iree_hal_rocm_device_t*)base_value; -} - -static void iree_hal_rocm_device_destroy(iree_hal_device_t* base_device) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - iree_allocator_t host_allocator = iree_hal_device_host_allocator(base_device); - IREE_TRACE_ZONE_BEGIN(z0); - - // There should be no more buffers live that use the allocator. - iree_hal_allocator_release(device->device_allocator); - - // Buffers may have been retaining collective resources. - iree_hal_channel_provider_release(device->channel_provider); - - iree_hal_rocm_tracing_context_free(device->tracing_context); - ROCM_IGNORE_ERROR(device->context_wrapper.syms, - hipStreamDestroy(device->stream)); - - // Finally, destroy the device. - iree_hal_driver_release(device->driver); - - iree_allocator_free(host_allocator, device); - - IREE_TRACE_ZONE_END(z0); -} - -static iree_status_t iree_hal_rocm_device_create_internal( - iree_hal_driver_t* driver, iree_string_view_t identifier, - hipDevice_t rocm_device, hipStream_t stream, hipCtx_t context, - iree_hal_rocm_dynamic_symbols_t* syms, iree_allocator_t host_allocator, - iree_hal_device_t** out_device) { - iree_hal_rocm_device_t* device = NULL; - iree_host_size_t total_size = sizeof(*device) + identifier.size; - IREE_RETURN_IF_ERROR( - iree_allocator_malloc(host_allocator, total_size, (void**)&device)); - memset(device, 0, total_size); - iree_hal_resource_initialize(&iree_hal_rocm_device_vtable, &device->resource); - device->driver = driver; - iree_hal_driver_retain(device->driver); - uint8_t* buffer_ptr = (uint8_t*)device + sizeof(*device); - buffer_ptr += iree_string_view_append_to_buffer( - identifier, &device->identifier, (char*)buffer_ptr); - iree_arena_block_pool_initialize(/*arena_block_size=*/32 * 1024, - host_allocator, &device->block_pool); - device->device = rocm_device; - device->stream = stream; - device->context_wrapper.rocm_context = context; - device->context_wrapper.rocm_device = rocm_device; - device->context_wrapper.rocm_stream = stream; - device->context_wrapper.host_allocator = host_allocator; - device->context_wrapper.syms = syms; - // Enable tracing for the (currently only) stream - no-op if disabled. - iree_status_t status = iree_hal_rocm_tracing_context_allocate( - &device->context_wrapper, device->identifier, stream, &device->block_pool, - host_allocator, &device->tracing_context); - if (iree_status_is_ok(status)) { - status = iree_hal_rocm_allocator_create(&device->context_wrapper, - &device->device_allocator); - } - if (iree_status_is_ok(status)) { - *out_device = (iree_hal_device_t*)device; - } else { - iree_hal_device_release((iree_hal_device_t*)device); - } - return status; -} - -iree_status_t iree_hal_rocm_device_create(iree_hal_driver_t* driver, - iree_string_view_t identifier, - iree_hal_rocm_dynamic_symbols_t* syms, - hipDevice_t device, - iree_allocator_t host_allocator, - iree_hal_device_t** out_device) { - IREE_TRACE_ZONE_BEGIN(z0); - hipCtx_t context; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, - ROCM_RESULT_TO_STATUS(syms, hipDevicePrimaryCtxRetain(&context, device))); - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, ROCM_RESULT_TO_STATUS(syms, hipCtxSetCurrent(context))); - - hipStream_t stream; - iree_status_t status = ROCM_RESULT_TO_STATUS( - syms, hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); - - if (iree_status_is_ok(status)) { - status = iree_hal_rocm_device_create_internal(driver, identifier, device, - stream, context, syms, - host_allocator, out_device); - } - if (!iree_status_is_ok(status)) { - if (stream) { - syms->hipStreamDestroy(stream); - } - syms->hipDevicePrimaryCtxRelease(device); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -static iree_string_view_t iree_hal_rocm_device_id( - iree_hal_device_t* base_device) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return device->identifier; -} - -static iree_allocator_t iree_hal_rocm_device_host_allocator( - iree_hal_device_t* base_device) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return device->context_wrapper.host_allocator; -} - -static iree_hal_allocator_t* iree_hal_rocm_device_allocator( - iree_hal_device_t* base_device) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return device->device_allocator; -} - -static void iree_hal_rocm_replace_device_allocator( - iree_hal_device_t* base_device, iree_hal_allocator_t* new_allocator) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - iree_hal_allocator_retain(new_allocator); - iree_hal_allocator_release(device->device_allocator); - device->device_allocator = new_allocator; -} - -static void iree_hal_rocm_replace_channel_provider( - iree_hal_device_t* base_device, iree_hal_channel_provider_t* new_provider) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - iree_hal_channel_provider_retain(new_provider); - iree_hal_channel_provider_release(device->channel_provider); - device->channel_provider = new_provider; -} - -static iree_status_t iree_hal_rocm_device_query_i64( - iree_hal_device_t* base_device, iree_string_view_t category, - iree_string_view_t key, int64_t* out_value) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - *out_value = 0; - - if (iree_string_view_equal(category, IREE_SV("hal.device.id"))) { - *out_value = - iree_string_view_match_pattern(device->identifier, key) ? 1 : 0; - return iree_ok_status(); - } - - if (iree_string_view_equal(category, - iree_make_cstring_view("hal.executable.format"))) { - *out_value = - iree_string_view_equal(key, iree_make_cstring_view("rocm-hsaco-fb")) - ? 1 - : 0; - return iree_ok_status(); - } - - return iree_make_status( - IREE_STATUS_NOT_FOUND, - "unknown device configuration key value '%.*s :: %.*s'", - (int)category.size, category.data, (int)key.size, key.data); -} - -static iree_status_t iree_hal_rocm_device_trim(iree_hal_device_t* base_device) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - iree_arena_block_pool_trim(&device->block_pool); - return iree_hal_allocator_trim(device->device_allocator); -} - -static iree_status_t iree_hal_rocm_device_create_channel( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - iree_hal_channel_params_t params, iree_hal_channel_t** out_channel) { - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "collectives not implemented"); -} - -static iree_status_t iree_hal_rocm_device_create_command_buffer( - iree_hal_device_t* base_device, iree_hal_command_buffer_mode_t mode, - iree_hal_command_category_t command_categories, - iree_hal_queue_affinity_t queue_affinity, iree_host_size_t binding_capacity, - iree_hal_command_buffer_t** out_command_buffer) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return iree_hal_rocm_direct_command_buffer_create( - iree_hal_device_allocator(base_device), &device->context_wrapper, - device->tracing_context, mode, command_categories, queue_affinity, - binding_capacity, &device->block_pool, out_command_buffer); -} - -static iree_status_t iree_hal_rocm_device_create_descriptor_set_layout( - iree_hal_device_t* base_device, - iree_hal_descriptor_set_layout_flags_t flags, - iree_host_size_t binding_count, - const iree_hal_descriptor_set_layout_binding_t* bindings, - iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return iree_hal_rocm_descriptor_set_layout_create( - &device->context_wrapper, flags, binding_count, bindings, - out_descriptor_set_layout); -} - -static iree_status_t iree_hal_rocm_device_create_event( - iree_hal_device_t* base_device, iree_hal_event_t** out_event) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return iree_hal_rocm_event_create(&device->context_wrapper, out_event); -} - -static iree_status_t iree_hal_rocm_device_create_executable_cache( - iree_hal_device_t* base_device, iree_string_view_t identifier, - iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return iree_hal_rocm_nop_executable_cache_create( - &device->context_wrapper, identifier, out_executable_cache); -} - -static iree_status_t iree_hal_rocm_device_import_file( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - iree_hal_memory_access_t access, iree_io_file_handle_t* handle, - iree_hal_external_file_flags_t flags, iree_hal_file_t** out_file) { - if (iree_io_file_handle_type(handle) != - IREE_IO_FILE_HANDLE_TYPE_HOST_ALLOCATION) { - return iree_make_status( - IREE_STATUS_UNAVAILABLE, - "implementation does not support the external file type"); - } - return iree_hal_memory_file_wrap( - queue_affinity, access, handle, iree_hal_device_allocator(base_device), - iree_hal_device_host_allocator(base_device), out_file); -} - -static iree_status_t iree_hal_rocm_device_create_pipeline_layout( - iree_hal_device_t* base_device, iree_host_size_t push_constants, - iree_host_size_t set_layout_count, - iree_hal_descriptor_set_layout_t* const* set_layouts, - iree_hal_pipeline_layout_t** out_pipeline_layout) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return iree_hal_rocm_pipeline_layout_create( - &device->context_wrapper, set_layout_count, set_layouts, push_constants, - out_pipeline_layout); -} - -static iree_status_t iree_hal_rocm_device_create_semaphore( - iree_hal_device_t* base_device, uint64_t initial_value, - iree_hal_semaphore_t** out_semaphore) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - return iree_hal_rocm_semaphore_create(&device->context_wrapper, initial_value, - out_semaphore); -} - -static iree_hal_semaphore_compatibility_t -iree_hal_rocm_device_query_semaphore_compatibility( - iree_hal_device_t* base_device, iree_hal_semaphore_t* semaphore) { - // TODO: implement ROCM semaphores. - return IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_ONLY; -} - -static iree_status_t iree_hal_rocm_device_queue_alloca( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - const iree_hal_semaphore_list_t wait_semaphore_list, - const iree_hal_semaphore_list_t signal_semaphore_list, - iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params, - iree_device_size_t allocation_size, - iree_hal_buffer_t** IREE_RESTRICT out_buffer) { - // TODO: queue-ordered allocations. - IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_wait(wait_semaphore_list, - iree_infinite_timeout())); - IREE_RETURN_IF_ERROR( - iree_hal_allocator_allocate_buffer(iree_hal_device_allocator(base_device), - params, allocation_size, out_buffer)); - IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_signal(signal_semaphore_list)); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_device_queue_dealloca( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - const iree_hal_semaphore_list_t wait_semaphore_list, - const iree_hal_semaphore_list_t signal_semaphore_list, - iree_hal_buffer_t* buffer) { - // TODO: queue-ordered allocations. - IREE_RETURN_IF_ERROR(iree_hal_device_queue_barrier( - base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list)); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_device_queue_read( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - const iree_hal_semaphore_list_t wait_semaphore_list, - const iree_hal_semaphore_list_t signal_semaphore_list, - iree_hal_file_t* source_file, uint64_t source_offset, - iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset, - iree_device_size_t length, uint32_t flags) { - // TODO: expose streaming chunk count/size options. - iree_status_t loop_status = iree_ok_status(); - iree_hal_file_transfer_options_t options = { - .loop = iree_loop_inline(&loop_status), - .chunk_count = IREE_HAL_FILE_TRANSFER_CHUNK_COUNT_DEFAULT, - .chunk_size = IREE_HAL_FILE_TRANSFER_CHUNK_SIZE_DEFAULT, - }; - IREE_RETURN_IF_ERROR(iree_hal_device_queue_read_streaming( - base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list, - source_file, source_offset, target_buffer, target_offset, length, flags, - options)); - return loop_status; -} - -static iree_status_t iree_hal_rocm_device_queue_write( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - const iree_hal_semaphore_list_t wait_semaphore_list, - const iree_hal_semaphore_list_t signal_semaphore_list, - iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset, - iree_hal_file_t* target_file, uint64_t target_offset, - iree_device_size_t length, uint32_t flags) { - // TODO: expose streaming chunk count/size options. - iree_status_t loop_status = iree_ok_status(); - iree_hal_file_transfer_options_t options = { - .loop = iree_loop_inline(&loop_status), - .chunk_count = IREE_HAL_FILE_TRANSFER_CHUNK_COUNT_DEFAULT, - .chunk_size = IREE_HAL_FILE_TRANSFER_CHUNK_SIZE_DEFAULT, - }; - IREE_RETURN_IF_ERROR(iree_hal_device_queue_write_streaming( - base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list, - source_buffer, source_offset, target_file, target_offset, length, flags, - options)); - return loop_status; -} - -static iree_status_t iree_hal_rocm_device_queue_execute( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity, - const iree_hal_semaphore_list_t wait_semaphore_list, - const iree_hal_semaphore_list_t signal_semaphore_list, - iree_host_size_t command_buffer_count, - iree_hal_command_buffer_t* const* command_buffers, - iree_hal_buffer_binding_table_t const* binding_tables) { - iree_hal_rocm_device_t* device = iree_hal_rocm_device_cast(base_device); - // TODO(raikonenfnu): Once semaphore is implemented wait for semaphores - // TODO(thomasraoux): implement semaphores - for now this conservatively - // synchronizes after every submit. - // TODO(raikonenfnu): currently run on default/null stream, when cmd buffer - // stream work with device->stream, we'll change - IREE_TRACE_ZONE_BEGIN_NAMED(z0, "hipStreamSynchronize"); - ROCM_RETURN_IF_ERROR(device->context_wrapper.syms, hipStreamSynchronize(0), - "hipStreamSynchronize"); - iree_hal_rocm_tracing_context_collect(device->tracing_context); - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_device_queue_flush( - iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity) { - // Currently unused; we flush as submissions are made. - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_device_wait_semaphores( - iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode, - const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout) { - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "semaphore not implemented"); -} - -static iree_status_t iree_hal_rocm_device_profiling_begin( - iree_hal_device_t* base_device, - const iree_hal_device_profiling_options_t* options) { - // Unimplemented (and that's ok). - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_device_profiling_flush( - iree_hal_device_t* base_device) { - // Unimplemented (and that's ok). - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_device_profiling_end( - iree_hal_device_t* base_device) { - // Unimplemented (and that's ok). - return iree_ok_status(); -} - -static const iree_hal_device_vtable_t iree_hal_rocm_device_vtable = { - .destroy = iree_hal_rocm_device_destroy, - .id = iree_hal_rocm_device_id, - .host_allocator = iree_hal_rocm_device_host_allocator, - .device_allocator = iree_hal_rocm_device_allocator, - .replace_device_allocator = iree_hal_rocm_replace_device_allocator, - .replace_channel_provider = iree_hal_rocm_replace_channel_provider, - .trim = iree_hal_rocm_device_trim, - .query_i64 = iree_hal_rocm_device_query_i64, - .create_channel = iree_hal_rocm_device_create_channel, - .create_command_buffer = iree_hal_rocm_device_create_command_buffer, - .create_descriptor_set_layout = - iree_hal_rocm_device_create_descriptor_set_layout, - .create_event = iree_hal_rocm_device_create_event, - .create_executable_cache = iree_hal_rocm_device_create_executable_cache, - .import_file = iree_hal_rocm_device_import_file, - .create_pipeline_layout = iree_hal_rocm_device_create_pipeline_layout, - .create_semaphore = iree_hal_rocm_device_create_semaphore, - .query_semaphore_compatibility = - iree_hal_rocm_device_query_semaphore_compatibility, - .queue_alloca = iree_hal_rocm_device_queue_alloca, - .queue_dealloca = iree_hal_rocm_device_queue_dealloca, - .queue_read = iree_hal_rocm_device_queue_read, - .queue_write = iree_hal_rocm_device_queue_write, - .queue_execute = iree_hal_rocm_device_queue_execute, - .queue_flush = iree_hal_rocm_device_queue_flush, - .wait_semaphores = iree_hal_rocm_device_wait_semaphores, - .profiling_begin = iree_hal_rocm_device_profiling_begin, - .profiling_flush = iree_hal_rocm_device_profiling_flush, - .profiling_end = iree_hal_rocm_device_profiling_end, -}; diff --git a/experimental/rocm/rocm_device.h b/experimental/rocm/rocm_device.h deleted file mode 100644 index 083f4c7cddb6..000000000000 --- a/experimental/rocm/rocm_device.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_ROCM_DEVICE_H_ -#define IREE_HAL_ROCM_ROCM_DEVICE_H_ - -#include "experimental/rocm/api.h" -#include "experimental/rocm/dynamic_symbols.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Creates a device that owns and manages its own hipContext. -iree_status_t iree_hal_rocm_device_create(iree_hal_driver_t* driver, - iree_string_view_t identifier, - iree_hal_rocm_dynamic_symbols_t* syms, - hipDevice_t device, - iree_allocator_t host_allocator, - iree_hal_device_t** out_device); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_ROCM_DEVICE_H_ diff --git a/experimental/rocm/rocm_driver.c b/experimental/rocm/rocm_driver.c deleted file mode 100644 index 2e33daac80f6..000000000000 --- a/experimental/rocm/rocm_driver.c +++ /dev/null @@ -1,429 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include - -#include "experimental/rocm/api.h" -#include "experimental/rocm/dynamic_symbols.h" -#include "experimental/rocm/rocm_device.h" -#include "experimental/rocm/status_util.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -typedef struct iree_hal_rocm_driver_t { - iree_hal_resource_t resource; - iree_allocator_t host_allocator; - // Identifier used for the driver in the IREE driver registry. - // We allow overriding so that multiple ROCM versions can be exposed in the - // same process. - iree_string_view_t identifier; - int default_device_index; - // ROCM symbols. - iree_hal_rocm_dynamic_symbols_t syms; -} iree_hal_rocm_driver_t; - -// Maximum device name length supported by the ROCM HAL driver. -#define IREE_MAX_ROCM_DEVICE_NAME_LENGTH 128 - -// Utility macros to convert between HIPDevice and iree_hal_device_id_t. -#define IREE_HIPDEVICE_TO_DEVICE_ID(device) (iree_hal_device_id_t)((device) + 1) -#define IREE_DEVICE_ID_TO_HIPDEVICE(device_id) (hipDevice_t)((device_id) - 1) - -static const iree_hal_driver_vtable_t iree_hal_rocm_driver_vtable; - -static iree_hal_rocm_driver_t* iree_hal_rocm_driver_cast( - iree_hal_driver_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_driver_vtable); - return (iree_hal_rocm_driver_t*)base_value; -} - -IREE_API_EXPORT void iree_hal_rocm_driver_options_initialize( - iree_hal_rocm_driver_options_t* out_options) { - memset(out_options, 0, sizeof(*out_options)); - out_options->default_device_index = 0; -} - -static iree_status_t iree_hal_rocm_driver_create_internal( - iree_string_view_t identifier, - const iree_hal_rocm_driver_options_t* options, - iree_allocator_t host_allocator, iree_hal_driver_t** out_driver) { -#if defined(IREE_PLATFORM_LINUX) - // Hack to force device kernel arguments to be preloaded, when available, and - // improve kernel latency. There doesn't seem to be any API to enable this. - // This option will become the default in ROCm 6.1. - // TODO: Remove this after upgrading to ROCm 6.1. - setenv("HIP_FORCE_DEV_KERNARG", "1", /*replace=*/0); -#endif - - iree_hal_rocm_driver_t* driver = NULL; - iree_host_size_t total_size = sizeof(*driver) + identifier.size; - IREE_RETURN_IF_ERROR( - iree_allocator_malloc(host_allocator, total_size, (void**)&driver)); - iree_hal_resource_initialize(&iree_hal_rocm_driver_vtable, &driver->resource); - driver->host_allocator = host_allocator; - iree_string_view_append_to_buffer( - identifier, &driver->identifier, - (char*)driver + total_size - identifier.size); - driver->default_device_index = options->default_device_index; - iree_status_t status = - iree_hal_rocm_dynamic_symbols_initialize(host_allocator, &driver->syms); - if (iree_status_is_ok(status)) { - *out_driver = (iree_hal_driver_t*)driver; - } else { - iree_hal_driver_release((iree_hal_driver_t*)driver); - } - return status; -} - -static void iree_hal_rocm_driver_destroy(iree_hal_driver_t* base_driver) { - iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver); - iree_allocator_t host_allocator = driver->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_dynamic_symbols_deinitialize(&driver->syms); - iree_allocator_free(host_allocator, driver); - - IREE_TRACE_ZONE_END(z0); -} - -IREE_API_EXPORT iree_status_t iree_hal_rocm_driver_create( - iree_string_view_t identifier, - const iree_hal_rocm_driver_options_t* options, - iree_allocator_t host_allocator, iree_hal_driver_t** out_driver) { - IREE_ASSERT_ARGUMENT(options); - IREE_ASSERT_ARGUMENT(out_driver); - IREE_TRACE_ZONE_BEGIN(z0); - - iree_status_t status = iree_hal_rocm_driver_create_internal( - identifier, options, host_allocator, out_driver); - - IREE_TRACE_ZONE_END(z0); - return status; -} - -// Populates device information from the given ROCM physical device handle. -// |out_device_info| must point to valid memory and additional data will be -// appended to |buffer_ptr| and the new pointer is returned. -static uint8_t* iree_hal_rocm_populate_device_info( - hipDevice_t device, iree_hal_rocm_dynamic_symbols_t* syms, - uint8_t* buffer_ptr, iree_hal_device_info_t* out_device_info) { - char device_name[IREE_MAX_ROCM_DEVICE_NAME_LENGTH]; - hipUUID device_uuid; - ROCM_IGNORE_ERROR(syms, - hipDeviceGetName(device_name, sizeof(device_name), device)); - ROCM_IGNORE_ERROR(syms, hipDeviceGetUuid(&device_uuid, device)); - memset(out_device_info, 0, sizeof(*out_device_info)); - out_device_info->device_id = IREE_HIPDEVICE_TO_DEVICE_ID(device); - - char device_path_str[4 + 36 + 1] = {0}; - snprintf(device_path_str, sizeof(device_path_str), - "GPU-" - "%02x%02x%02x%02x-" - "%02x%02x-" - "%02x%02x-" - "%02x%02x-" - "%02x%02x%02x%02x%02x%02x", - (uint8_t)device_uuid.bytes[0], (uint8_t)device_uuid.bytes[1], - (uint8_t)device_uuid.bytes[2], (uint8_t)device_uuid.bytes[3], - (uint8_t)device_uuid.bytes[4], (uint8_t)device_uuid.bytes[5], - (uint8_t)device_uuid.bytes[6], (uint8_t)device_uuid.bytes[7], - (uint8_t)device_uuid.bytes[8], (uint8_t)device_uuid.bytes[9], - (uint8_t)device_uuid.bytes[10], (uint8_t)device_uuid.bytes[11], - (uint8_t)device_uuid.bytes[12], (uint8_t)device_uuid.bytes[13], - (uint8_t)device_uuid.bytes[14], (uint8_t)device_uuid.bytes[15]); - buffer_ptr += iree_string_view_append_to_buffer( - iree_make_string_view(device_path_str, - IREE_ARRAYSIZE(device_path_str) - 1), - &out_device_info->path, (char*)buffer_ptr); - - iree_string_view_t device_name_string = - iree_make_string_view(device_name, strlen(device_name)); - buffer_ptr += iree_string_view_append_to_buffer( - device_name_string, &out_device_info->name, (char*)buffer_ptr); - return buffer_ptr; -} - -static iree_status_t iree_hal_rocm_driver_query_available_devices( - iree_hal_driver_t* base_driver, iree_allocator_t host_allocator, - iree_host_size_t* out_device_info_count, - iree_hal_device_info_t** out_device_infos) { - iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver); - // Query the number of available ROCM devices. - int device_count = 0; - ROCM_RETURN_IF_ERROR(&driver->syms, hipGetDeviceCount(&device_count), - "hipGetDeviceCount"); - - // Allocate the return infos and populate with the devices. - iree_hal_device_info_t* device_infos = NULL; - iree_host_size_t total_size = device_count * sizeof(iree_hal_device_info_t); - for (iree_host_size_t i = 0; i < device_count; ++i) { - total_size += IREE_MAX_ROCM_DEVICE_NAME_LENGTH * sizeof(char); - } - iree_status_t status = - iree_allocator_malloc(host_allocator, total_size, (void**)&device_infos); - if (iree_status_is_ok(status)) { - uint8_t* buffer_ptr = - (uint8_t*)device_infos + device_count * sizeof(iree_hal_device_info_t); - for (iree_host_size_t i = 0; i < device_count; ++i) { - hipDevice_t device = 0; - status = ROCM_RESULT_TO_STATUS(&driver->syms, hipDeviceGet(&device, i), - "hipDeviceGet"); - if (!iree_status_is_ok(status)) break; - buffer_ptr = iree_hal_rocm_populate_device_info( - device, &driver->syms, buffer_ptr, &device_infos[i]); - } - } - if (iree_status_is_ok(status)) { - *out_device_info_count = device_count; - *out_device_infos = device_infos; - } else { - iree_allocator_free(host_allocator, device_infos); - } - return status; -} - -static iree_status_t iree_hal_rocm_driver_dump_device_info( - iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id, - iree_string_builder_t* builder) { - iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver); - hipDevice_t device = IREE_DEVICE_ID_TO_HIPDEVICE(device_id); - - hipDeviceProp_tR0000 prop; - ROCM_RETURN_IF_ERROR(&driver->syms, hipGetDeviceProperties(&prop, device), - "hipGetDeviceProperties"); - - // GPU capabilities and architecture. - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- gpu-compute-capability: %d.%d", prop.major, prop.minor)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- gpu-arch-name: %s", prop.gcnArchName)); - - // Launch configuration limits. - IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n")); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- launch-max-block-dims: (%d, %d, %d)", prop.maxThreadsDim[0], - prop.maxThreadsDim[1], prop.maxThreadsDim[2])); - - int shared_memory_kb = prop.sharedMemPerBlock / 1024; - IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n")); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- block-max-thread-count: %d", prop.maxThreadsPerBlock)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- block-max-32-bit-register-count: %d", prop.regsPerBlock)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- block-max-shared-memory: %d KB", shared_memory_kb)); - - // Memory hierarchy related information. - int const_memory_mb = prop.totalConstMem / 1024 / 1024; - int global_memory_mb = prop.totalGlobalMem / 1024 / 1024; - IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n")); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- memory-is-integrated-memory: %d", prop.integrated)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- memory-supports-managed-memory: %d", prop.managedMemory)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- memory-total-const-memory-size: %d MB", const_memory_mb)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- memory-total-global-memory-size: %d MB", global_memory_mb)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- memory-l2-cache-size: %d bytes", prop.l2CacheSize)); - - // GPU related information. - int compute_clock_mhz = prop.clockRate / 1000; - int memory_clock_mhz = prop.memoryClockRate / 1000; - IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n")); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- gpu-compute-unit-count: %d", prop.multiProcessorCount)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- gpu-compute-max-clock-rate: %d mHz", compute_clock_mhz)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- gpu-memory-max-clock-rate: %d mHz", memory_clock_mhz)); - IREE_RETURN_IF_ERROR(iree_string_builder_append_format( - builder, "\n- gpu-warp-size: %d", prop.warpSize)); - - IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "\n")); - return iree_ok_status(); -} - -static iree_status_t iree_hal_rocm_driver_select_default_device( - iree_hal_rocm_dynamic_symbols_t* syms, int default_device_index, - iree_allocator_t host_allocator, hipDevice_t* out_device) { - int device_count = 0; - ROCM_RETURN_IF_ERROR(syms, hipGetDeviceCount(&device_count), - "hipGetDeviceCount"); - iree_status_t status = iree_ok_status(); - if (device_count == 0 || default_device_index >= device_count) { - status = iree_make_status(IREE_STATUS_NOT_FOUND, - "default device %d not found (of %d enumerated)", - default_device_index, device_count); - } else { - hipDevice_t device; - ROCM_RETURN_IF_ERROR(syms, hipDeviceGet(&device, default_device_index), - "hipDeviceGet"); - *out_device = device; - } - return status; -} - -static iree_status_t iree_hal_rocm_driver_create_device_by_id( - iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id, - iree_host_size_t param_count, const iree_string_pair_t* params, - iree_allocator_t host_allocator, iree_hal_device_t** out_device) { - iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver); - IREE_TRACE_ZONE_BEGIN(z0); - - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, ROCM_RESULT_TO_STATUS(&driver->syms, hipInit(0), "hipInit")); - // Use either the specified device (enumerated earlier) or whatever default - // one was specified when the driver was created. - hipDevice_t device = 0; - if (device_id == IREE_HAL_DEVICE_ID_DEFAULT) { - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, iree_hal_rocm_driver_select_default_device( - &driver->syms, driver->default_device_index, host_allocator, - &device)); - } else { - device = IREE_DEVICE_ID_TO_HIPDEVICE(device_id); - } - - iree_string_view_t device_name = iree_make_cstring_view("rocm"); - - // Attempt to create the device. - iree_status_t status = - iree_hal_rocm_device_create(base_driver, device_name, &driver->syms, - device, host_allocator, out_device); - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static iree_status_t iree_hal_rocm_driver_create_device_by_uuid( - iree_hal_driver_t* base_driver, iree_string_view_t driver_name, - const hipUUID* device_uuid, iree_host_size_t param_count, - const iree_string_pair_t* params, iree_allocator_t host_allocator, - iree_hal_device_t** out_device) { - iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver); - IREE_TRACE_ZONE_BEGIN(z0); - - // Query all rocm devices for instance. - int device_count = 0; - ROCM_RETURN_IF_ERROR(&driver->syms, hipGetDeviceCount(&device_count), - "hipGetDeviceCount"); - hipDevice_t device = 0; - bool found_device = false; - for (int i = 0; i < device_count; i++) { - ROCM_RETURN_IF_ERROR(&driver->syms, hipDeviceGet(&device, i), - "hipDeviceGet"); - hipUUID query_uuid; - ROCM_RETURN_IF_ERROR(&driver->syms, hipDeviceGetUuid(&query_uuid, device), - "hipDeviceGetUuid"); - if (memcmp(&device_uuid->bytes[0], &query_uuid.bytes[0], - sizeof(device_uuid)) == 0) { - found_device = true; - break; - } - } - if (!found_device) { - return iree_make_status( - IREE_STATUS_NOT_FOUND, - "ROCM device with UUID GPU-" - "%02x%02x%02x%02x-" - "%02x%02x-" - "%02x%02x-" - "%02x%02x-" - "%02x%02x%02x%02x%02x%02x" - " not found", - (uint8_t)device_uuid->bytes[0], (uint8_t)device_uuid->bytes[1], - (uint8_t)device_uuid->bytes[2], (uint8_t)device_uuid->bytes[3], - (uint8_t)device_uuid->bytes[4], (uint8_t)device_uuid->bytes[5], - (uint8_t)device_uuid->bytes[6], (uint8_t)device_uuid->bytes[7], - (uint8_t)device_uuid->bytes[8], (uint8_t)device_uuid->bytes[9], - (uint8_t)device_uuid->bytes[10], (uint8_t)device_uuid->bytes[11], - (uint8_t)device_uuid->bytes[12], (uint8_t)device_uuid->bytes[13], - (uint8_t)device_uuid->bytes[14], (uint8_t)device_uuid->bytes[15]); - } - - iree_status_t status = iree_hal_rocm_driver_create_device_by_id( - base_driver, IREE_HIPDEVICE_TO_DEVICE_ID(device), param_count, params, - host_allocator, out_device); - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static iree_status_t iree_hal_rocm_driver_create_device_by_index( - iree_hal_driver_t* base_driver, iree_string_view_t driver_name, - int device_index, iree_host_size_t param_count, - const iree_string_pair_t* params, iree_allocator_t host_allocator, - iree_hal_device_t** out_device) { - iree_hal_rocm_driver_t* driver = iree_hal_rocm_driver_cast(base_driver); - - // Query the number of available HIP devices. - int device_count = 0; - ROCM_RETURN_IF_ERROR(&driver->syms, hipGetDeviceCount(&device_count), - "hipGetDeviceCount"); - if (device_index >= device_count) { - return iree_make_status(IREE_STATUS_NOT_FOUND, - "device %d not found (of %d enumerated)", - device_index, device_count); - } - - hipDevice_t device = 0; - ROCM_RETURN_IF_ERROR(&driver->syms, hipDeviceGet(&device, device_index), - "hipDeviceGet"); - - iree_status_t status = iree_hal_rocm_driver_create_device_by_id( - base_driver, IREE_HIPDEVICE_TO_DEVICE_ID(device), param_count, params, - host_allocator, out_device); - - return status; -} - -static iree_status_t iree_hal_rocm_driver_create_device_by_path( - iree_hal_driver_t* base_driver, iree_string_view_t driver_name, - iree_string_view_t device_path, iree_host_size_t param_count, - const iree_string_pair_t* params, iree_allocator_t host_allocator, - iree_hal_device_t** out_device) { - if (iree_string_view_is_empty(device_path)) { - return iree_hal_rocm_driver_create_device_by_id( - base_driver, IREE_HAL_DEVICE_ID_DEFAULT, param_count, params, - host_allocator, out_device); - } - - if (iree_string_view_consume_prefix(&device_path, IREE_SV("GPU-"))) { - // UUID as returned by hipDeviceGetUuid. - hipUUID device_uuid; - if (!iree_string_view_parse_hex_bytes(device_path, - IREE_ARRAYSIZE(device_uuid.bytes), - (uint8_t*)device_uuid.bytes)) { - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "invalid GPU UUID: '%.*s'", (int)device_path.size, - device_path.data); - } - return iree_hal_rocm_driver_create_device_by_uuid( - base_driver, driver_name, &device_uuid, param_count, params, - host_allocator, out_device); - } - - // Try to parse as a device index. - int device_index = 0; - if (iree_string_view_atoi_int32(device_path, &device_index)) { - return iree_hal_rocm_driver_create_device_by_index( - base_driver, driver_name, device_index, param_count, params, - host_allocator, out_device); - } - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "unsupported device path"); -} - -static const iree_hal_driver_vtable_t iree_hal_rocm_driver_vtable = { - .destroy = iree_hal_rocm_driver_destroy, - .query_available_devices = iree_hal_rocm_driver_query_available_devices, - .dump_device_info = iree_hal_rocm_driver_dump_device_info, - .create_device_by_id = iree_hal_rocm_driver_create_device_by_id, - .create_device_by_path = iree_hal_rocm_driver_create_device_by_path, -}; diff --git a/experimental/rocm/rocm_event.c b/experimental/rocm/rocm_event.c deleted file mode 100644 index 05b0e734358a..000000000000 --- a/experimental/rocm/rocm_event.c +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/rocm_event.h" - -#include - -#include "iree/base/api.h" - -// Dummy events for now, don't do anything. -typedef struct iree_hal_rocm_event_t { - iree_hal_resource_t resource; - iree_hal_rocm_context_wrapper_t* context_wrapper; -} iree_hal_rocm_event_t; - -static const iree_hal_event_vtable_t iree_hal_rocm_event_vtable; - -static iree_hal_rocm_event_t* iree_hal_rocm_event_cast( - iree_hal_event_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_rocm_event_vtable); - return (iree_hal_rocm_event_t*)base_value; -} - -iree_status_t iree_hal_rocm_event_create( - iree_hal_rocm_context_wrapper_t* context_wrapper, - iree_hal_event_t** out_event) { - IREE_ASSERT_ARGUMENT(context_wrapper); - IREE_ASSERT_ARGUMENT(out_event); - *out_event = NULL; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_hal_rocm_event_t* event = NULL; - iree_status_t status = iree_allocator_malloc(context_wrapper->host_allocator, - sizeof(*event), (void**)&event); - if (iree_status_is_ok(status)) { - iree_hal_resource_initialize(&iree_hal_rocm_event_vtable, &event->resource); - event->context_wrapper = context_wrapper; - *out_event = (iree_hal_event_t*)event; - } - - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void iree_hal_rocm_event_destroy(iree_hal_event_t* base_event) { - iree_hal_rocm_event_t* event = iree_hal_rocm_event_cast(base_event); - iree_allocator_t host_allocator = event->context_wrapper->host_allocator; - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_free(host_allocator, event); - - IREE_TRACE_ZONE_END(z0); -} - -static const iree_hal_event_vtable_t iree_hal_rocm_event_vtable = { - .destroy = iree_hal_rocm_event_destroy, -}; diff --git a/experimental/rocm/rocm_event.h b/experimental/rocm/rocm_event.h deleted file mode 100644 index 0bac1a2feaa8..000000000000 --- a/experimental/rocm/rocm_event.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_EVENT_H_ -#define IREE_HAL_ROCM_EVENT_H_ - -#include "experimental/rocm/context_wrapper.h" -#include "experimental/rocm/rocm_headers.h" -#include "iree/base/api.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Creates a dummy event object. Object will be represented by rocm Graph edges -// so nothing is created at creation time. When an event is signaled in the -// command buffer we will add the appropriate edges to enforce the right -// synchronization. -iree_status_t iree_hal_rocm_event_create( - iree_hal_rocm_context_wrapper_t* context_wrapper, - iree_hal_event_t** out_event); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_EVENT_H_ diff --git a/experimental/rocm/rocm_headers.h b/experimental/rocm/rocm_headers.h deleted file mode 100644 index aa0a5b0453ed..000000000000 --- a/experimental/rocm/rocm_headers.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_ROCM_HEADERS_H_ -#define IREE_HAL_ROCM_ROCM_HEADERS_H_ - -#if defined(IREE_PTR_SIZE_32) -#error 32-bit not supported on ROCm -#endif // defined(IREE_PTR_SIZE_32) - -#define __HIP_PLATFORM_AMD__ - -// Order matters here--hip_deprecated.h depends on hip_runtime_api.h. So turn -// off clang-format. -// -// We need to pull in this hip_deprecated.h for the old hipDeviceProp_t struct -// definition, hipDeviceProp_tR0000. HIP 6.0 release changes the struct in the -// middle. The hipDeviceProp_t struct would need to use the matching -// hipGetDevicePropertiesR0600() API to query it. We want to also support HIP -// 5.x versions so use the old hipGetDeviceProperties() API with its matching -// struct. - -// clang-format off -#include "hip/hip_runtime.h" // IWYU pragma: export -#include "hip/hip_deprecated.h" // IWYU pragma: export -// clang-format on - -#endif // IREE_HAL_ROCM_ROCM_HEADERS_H_ diff --git a/experimental/rocm/status_util.c b/experimental/rocm/status_util.c deleted file mode 100644 index a084c3f983a7..000000000000 --- a/experimental/rocm/status_util.c +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/status_util.h" - -#include - -#include "experimental/rocm/dynamic_symbols.h" - -iree_status_t iree_hal_rocm_result_to_status( - iree_hal_rocm_dynamic_symbols_t *syms, hipError_t result, const char *file, - uint32_t line) { - if (IREE_LIKELY(result == hipSuccess)) { - return iree_ok_status(); - } - - const char *error_name = syms->hipGetErrorName(result); - if (result == hipErrorUnknown) { - error_name = "UNKNOWN"; - } - - const char *error_string = syms->hipGetErrorString(result); - if (result == hipErrorUnknown) { - error_string = "Unknown error."; - } - return iree_make_status(IREE_STATUS_INTERNAL, - "rocm driver error '%s' (%d): %s", error_name, result, - error_string); -} diff --git a/experimental/rocm/status_util.h b/experimental/rocm/status_util.h deleted file mode 100644 index 0f6fcc56196c..000000000000 --- a/experimental/rocm/status_util.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_ROCM_STATUS_UTIL_H_ -#define IREE_HAL_ROCM_STATUS_UTIL_H_ - -#include - -#include "experimental/rocm/dynamic_symbols.h" -#include "iree/base/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Converts a hipError_t to an iree_status_t. -// -// Usage: -// iree_status_t status = ROCM_RESULT_TO_STATUS(rocmDoThing(...)); -#define ROCM_RESULT_TO_STATUS(syms, expr, ...) \ - iree_hal_rocm_result_to_status((syms), ((syms)->expr), __FILE__, __LINE__) - -// IREE_RETURN_IF_ERROR but implicitly converts the hipError_t return value to -// a Status. -// -// Usage: -// ROCM_RETURN_IF_ERROR(rocmDoThing(...), "message"); -#define ROCM_RETURN_IF_ERROR(syms, expr, ...) \ - IREE_RETURN_IF_ERROR(iree_hal_rocm_result_to_status((syms), ((syms)->expr), \ - __FILE__, __LINE__), \ - __VA_ARGS__) - -// IREE_IGNORE_ERROR but implicitly converts the hipError_t return value to a -// Status. -// -// Usage: -// ROCM_IGNORE_ERROR(rocmDoThing(...)); -#define ROCM_IGNORE_ERROR(syms, expr) \ - IREE_IGNORE_ERROR(iree_hal_rocm_result_to_status((syms), ((syms)->expr), \ - __FILE__, __LINE__)) - -// Converts a hipError_t to a Status object. -iree_status_t iree_hal_rocm_result_to_status( - iree_hal_rocm_dynamic_symbols_t* syms, hipError_t result, const char* file, - uint32_t line); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_ROCM_STATUS_UTIL_H_ diff --git a/experimental/rocm/tracing.c b/experimental/rocm/tracing.c deleted file mode 100644 index fbfb32f9e144..000000000000 --- a/experimental/rocm/tracing.c +++ /dev/null @@ -1,293 +0,0 @@ -// Copyright 2023 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "experimental/rocm/tracing.h" - -#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE - -#include "experimental/rocm/status_util.h" - -// Total number of events per tracing context. This translates to the maximum -// number of outstanding timestamp queries before collection is required. -// To prevent spilling pages we leave some room for the context structure. -#define IREE_HAL_ROCM_TRACING_DEFAULT_QUERY_CAPACITY (16 * 1024 - 256) - -struct iree_hal_rocm_tracing_context_t { - iree_hal_rocm_context_wrapper_t* rocm_context; - hipStream_t stream; - iree_arena_block_pool_t* block_pool; - iree_allocator_t host_allocator; - - // A unique GPU zone ID allocated from Tracy. - // There is a global limit of 255 GPU zones (ID 255 is special). - uint8_t id; - - // Base event used for computing relative times for all recorded events. - // This is required as ROCM (without CUPTI) only allows for relative timing - // between events and we need a stable base event. - hipEvent_t base_event; - - // Indices into |event_pool| defining a ringbuffer. - uint32_t query_head; - uint32_t query_tail; - uint32_t query_capacity; - - // Event pool reused to capture tracing timestamps. - hipEvent_t event_pool[IREE_HAL_ROCM_TRACING_DEFAULT_QUERY_CAPACITY]; -}; - -static iree_status_t iree_hal_rocm_tracing_context_initial_calibration( - iree_hal_rocm_context_wrapper_t* rocm_context, hipStream_t stream, - hipEvent_t base_event, int64_t* out_cpu_timestamp, - int64_t* out_gpu_timestamp, float* out_timestamp_period) { - IREE_TRACE_ZONE_BEGIN(z0); - *out_cpu_timestamp = 0; - *out_gpu_timestamp = 0; - *out_timestamp_period = 1.0f; - - // Record event to the stream; in the absence of a synchronize this may not - // flush immediately. - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, ROCM_RESULT_TO_STATUS(rocm_context->syms, - hipEventRecord(base_event, stream))); - - // Force flush the event and wait for it to complete. - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, ROCM_RESULT_TO_STATUS(rocm_context->syms, - hipEventSynchronize(base_event))); - - // Track when we know the event has completed and has a reasonable timestamp. - // This may drift from the actual time differential between host/device but is - // (maybe?) the best we can do without CUPTI. - *out_cpu_timestamp = iree_tracing_time(); - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -iree_status_t iree_hal_rocm_tracing_context_allocate( - iree_hal_rocm_context_wrapper_t* rocm_context, - iree_string_view_t queue_name, hipStream_t stream, - iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator, - iree_hal_rocm_tracing_context_t** out_context) { - IREE_TRACE_ZONE_BEGIN(z0); - IREE_ASSERT_ARGUMENT(rocm_context); - IREE_ASSERT_ARGUMENT(stream); - IREE_ASSERT_ARGUMENT(block_pool); - IREE_ASSERT_ARGUMENT(out_context); - *out_context = NULL; - - iree_hal_rocm_tracing_context_t* context = NULL; - iree_status_t status = - iree_allocator_malloc(host_allocator, sizeof(*context), (void**)&context); - if (iree_status_is_ok(status)) { - context->rocm_context = rocm_context; - context->stream = stream; - context->block_pool = block_pool; - context->host_allocator = host_allocator; - context->query_capacity = IREE_ARRAYSIZE(context->event_pool); - } - - // Pre-allocate all events in the event pool. - if (iree_status_is_ok(status)) { - IREE_TRACE_ZONE_BEGIN_NAMED( - z_event_pool, "iree_hal_rocm_tracing_context_allocate_event_pool"); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z_event_pool, - (int64_t)context->query_capacity); - for (iree_host_size_t i = 0; i < context->query_capacity; ++i) { - status = ROCM_RESULT_TO_STATUS(rocm_context->syms, - hipEventCreate(&context->event_pool[i])); - if (!iree_status_is_ok(status)) break; - } - IREE_TRACE_ZONE_END(z_event_pool); - } - - // Create the initial GPU event and insert it into the stream. - // All events we record are relative to this event. - int64_t cpu_timestamp = 0; - int64_t gpu_timestamp = 0; - float timestamp_period = 0.0f; - if (iree_status_is_ok(status)) { - status = ROCM_RESULT_TO_STATUS(rocm_context->syms, - hipEventCreate(&context->base_event)); - } - if (iree_status_is_ok(status)) { - status = iree_hal_rocm_tracing_context_initial_calibration( - rocm_context, stream, context->base_event, &cpu_timestamp, - &gpu_timestamp, ×tamp_period); - } - - // Allocate the GPU context and pass initial calibration data. - if (iree_status_is_ok(status)) { - context->id = iree_tracing_gpu_context_allocate( - IREE_TRACING_GPU_CONTEXT_TYPE_VULKAN, queue_name.data, queue_name.size, - /*is_calibrated=*/false, cpu_timestamp, gpu_timestamp, - timestamp_period); - } - - if (iree_status_is_ok(status)) { - *out_context = context; - } else { - iree_hal_rocm_tracing_context_free(context); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -void iree_hal_rocm_tracing_context_free( - iree_hal_rocm_tracing_context_t* context) { - if (!context) return; - IREE_TRACE_ZONE_BEGIN(z0); - - // Always perform a collection on shutdown. - iree_hal_rocm_tracing_context_collect(context); - - // Release all events; since collection completed they should all be unused. - IREE_TRACE_ZONE_BEGIN_NAMED(z_event_pool, - "iree_hal_rocm_tracing_context_free_event_pool"); - for (iree_host_size_t i = 0; i < context->query_capacity; ++i) { - if (context->event_pool[i]) { - ROCM_IGNORE_ERROR(context->rocm_context->syms, - hipEventDestroy(context->event_pool[i])); - } - } - IREE_TRACE_ZONE_END(z_event_pool); - if (context->base_event) { - ROCM_IGNORE_ERROR(context->rocm_context->syms, - hipEventDestroy(context->base_event)); - } - - iree_allocator_t host_allocator = context->host_allocator; - iree_allocator_free(host_allocator, context); - - IREE_TRACE_ZONE_END(z0); -} - -void iree_hal_rocm_tracing_context_collect( - iree_hal_rocm_tracing_context_t* context) { - if (!context) return; - if (context->query_tail == context->query_head) { - // No outstanding queries. - return; - } - IREE_TRACE_ZONE_BEGIN(z0); - iree_hal_rocm_dynamic_symbols_t* syms = context->rocm_context->syms; - - while (context->query_tail != context->query_head) { - // Compute the contiguous range of queries ready to be read. - // If the ringbuffer wraps around we'll handle that in the next loop. - uint32_t try_query_count = - context->query_head < context->query_tail - ? context->query_capacity - context->query_tail - : context->query_head - context->query_tail; - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)try_query_count); - - // Scan and feed the times to tracy, stopping when we hit the first - // unavailable query. - uint32_t query_base = context->query_tail; - uint32_t read_query_count = 0; - for (uint32_t i = 0; i < try_query_count; ++i) { - // Ensure the event has completed; will return hipErrorNotReady if - // recorded but not retired or any other deferred error. - uint16_t query_id = (uint16_t)(query_base + i); - hipEvent_t query_event = context->event_pool[query_id]; - hipError_t result = syms->hipEventQuery(query_event); - if (result != hipSuccess) break; - - // Calculate context-relative time and notify tracy. - float relative_millis = 0.0f; - ROCM_IGNORE_ERROR( - syms, hipEventElapsedTime(&relative_millis, context->base_event, - query_event)); - int64_t gpu_timestamp = (int64_t)((double)relative_millis * 1000000.0); - iree_tracing_gpu_zone_notify(context->id, query_id, gpu_timestamp); - - read_query_count = i + 1; - } - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)read_query_count); - - context->query_tail += read_query_count; - if (context->query_tail >= context->query_capacity) { - context->query_tail = 0; - } - } - - IREE_TRACE_ZONE_END(z0); -} - -static uint16_t iree_hal_rocm_tracing_context_insert_query( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream) { - // Allocate an event from the pool for use by the query. - uint32_t query_id = context->query_head; - context->query_head = (context->query_head + 1) % context->query_capacity; - - // TODO: check to see if the read and write heads of the ringbuffer have - // overlapped. If they have we could try to collect but it's not guaranteed - // that collection will complete (e.g. we may be reserving events for use in - // graphs that haven't yet been launched). - // - // For now we just allow the overlap and tracing results will be inconsistent. - IREE_ASSERT_NE(context->query_head, context->query_tail); - - hipEvent_t event = context->event_pool[query_id]; - ROCM_IGNORE_ERROR(context->rocm_context->syms, hipEventRecord(event, stream)); - - return query_id; -} - -// TODO: optimize this implementation to reduce the number of events required: -// today we insert 2 events per zone (one for begin and one for end) but in -// many cases we could reduce this by inserting events only between zones and -// using the differences between them. - -void iree_hal_rocm_tracing_zone_begin_impl( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream, - const iree_tracing_location_t* src_loc) { - if (!context) return; - uint16_t query_id = - iree_hal_rocm_tracing_context_insert_query(context, stream); - iree_tracing_gpu_zone_begin(context->id, query_id, src_loc); -} - -void iree_hal_rocm_tracing_zone_begin_external_impl( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream, - const char* file_name, size_t file_name_length, uint32_t line, - const char* function_name, size_t function_name_length, const char* name, - size_t name_length) { - if (!context) return; - uint16_t query_id = - iree_hal_rocm_tracing_context_insert_query(context, stream); - iree_tracing_gpu_zone_begin_external(context->id, query_id, file_name, - file_name_length, line, function_name, - function_name_length, name, name_length); -} - -void iree_hal_rocm_tracing_zone_end_impl( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream) { - if (!context) return; - uint16_t query_id = - iree_hal_rocm_tracing_context_insert_query(context, stream); - iree_tracing_gpu_zone_end(context->id, query_id); -} - -#else - -iree_status_t iree_hal_rocm_tracing_context_allocate( - iree_hal_rocm_context_wrapper_t* rocm_context, - iree_string_view_t queue_name, hipStream_t stream, - iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator, - iree_hal_rocm_tracing_context_t** out_context) { - *out_context = NULL; - return iree_ok_status(); -} - -void iree_hal_rocm_tracing_context_free( - iree_hal_rocm_tracing_context_t* context) {} - -void iree_hal_rocm_tracing_context_collect( - iree_hal_rocm_tracing_context_t* context) {} - -#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE diff --git a/experimental/rocm/tracing.h b/experimental/rocm/tracing.h deleted file mode 100644 index 7bd6ae54355d..000000000000 --- a/experimental/rocm/tracing.h +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2023 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_HAL_DRIVERS_ROCM_TRACING_H_ -#define IREE_HAL_DRIVERS_ROCM_TRACING_H_ - -#include "experimental/rocm/context_wrapper.h" -#include "iree/base/api.h" -#include "iree/base/internal/arena.h" -#include "iree/hal/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Per-stream ROCM tracing context. -// No-op if IREE tracing is not enabled. -// -// Use the IREE_ROCM_TRACE_* macros to trace a contiguous set of stream -// operations. Unlike the normal tracy macros there are no zone IDs and instead -// each stream gets an ID allocated once and passed to all tracing macros. -// -// Usage: -// IREE_ROCM_TRACE_ZONE_BEGIN(queue->tracing_context, stream); -// hipLaunchKernel(..., stream); -// IREE_ROCM_TRACE_ZONE_END(queue->tracing_context, stream); -// ... -// iree_hal_rocm_tracing_context_collect(queue->tracing_context); -// -// NOTE: timestamps can have non-trivial side-effecting behavior and may -// introduce serialization in graph execution. -// -// TODO(benvanik): expose hipEvent reservation separate from recording. For -// graphs we will need to insert the events but in order to reuse the graphs -// we'll need to reserve and patch new events each graph launch. For now we -// don't instrument graphs. -// -// Thread-compatible: external synchronization is required if using from -// multiple threads (same as with hipStream_t itself). -typedef struct iree_hal_rocm_tracing_context_t iree_hal_rocm_tracing_context_t; - -// Allocates a tracing context for the given ROCM stream. -// Each context must only be used with the stream it was created for. -iree_status_t iree_hal_rocm_tracing_context_allocate( - iree_hal_rocm_context_wrapper_t* rocm_context, - iree_string_view_t queue_name, hipStream_t stream, - iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator, - iree_hal_rocm_tracing_context_t** out_context); - -// Frees a tracing context and all associated ROCM resources. -// All submissions using the resources must be completed prior to calling. -void iree_hal_rocm_tracing_context_free( - iree_hal_rocm_tracing_context_t* context); - -// Collects in-flight timestamp queries from the stream and feeds them to tracy. -// Must be called frequently (every submission, etc) to drain the backlog; -// tracing may start failing if the internal ringbuffer is exceeded. -void iree_hal_rocm_tracing_context_collect( - iree_hal_rocm_tracing_context_t* context); - -#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE - -// Begins a normal zone derived on the calling |src_loc|. -// Must be perfectly nested and paired with a corresponding zone end. -void iree_hal_rocm_tracing_zone_begin_impl( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream, - const iree_tracing_location_t* src_loc); - -// Begins an external zone using the given source information. -// The provided strings will be copied into the tracy buffer. -void iree_hal_rocm_tracing_zone_begin_external_impl( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream, - const char* file_name, size_t file_name_length, uint32_t line, - const char* function_name, size_t function_name_length, const char* name, - size_t name_length); - -void iree_hal_rocm_tracing_zone_end_impl( - iree_hal_rocm_tracing_context_t* context, hipStream_t stream); - -// Begins a new zone with the parent function name. -#define IREE_ROCM_TRACE_ZONE_BEGIN(context, stream) \ - static const iree_tracing_location_t TracyConcat( \ - __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \ - (uint32_t)__LINE__, 0}; \ - iree_hal_rocm_tracing_zone_begin_impl( \ - context, stream, &TracyConcat(__tracy_source_location, __LINE__)); - -// Begins an externally defined zone with a dynamic source location. -// The |file_name|, |function_name|, and optional |name| strings will be copied -// into the trace buffer and do not need to persist. -#define IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( \ - context, stream, file_name, file_name_length, line, function_name, \ - function_name_length, name, name_length) \ - iree_hal_rocm_tracing_zone_begin_external_impl( \ - context, stream, file_name, file_name_length, line, function_name, \ - function_name_length, name, name_length) - -// Ends the current zone. Must be passed the |zone_id| from the _BEGIN. -#define IREE_ROCM_TRACE_ZONE_END(context, stream) \ - iree_hal_rocm_tracing_zone_end_impl(context, stream) - -#else - -#define IREE_ROCM_TRACE_ZONE_BEGIN(context, stream) -#define IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( \ - context, stream, file_name, file_name_length, line, function_name, \ - function_name_length, name, name_length) -#define IREE_ROCM_TRACE_ZONE_END(context, stream) - -#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_HAL_DRIVERS_ROCM_TRACING_H_ diff --git a/samples/custom_dispatch/hip/kernels/CMakeLists.txt b/samples/custom_dispatch/hip/kernels/CMakeLists.txt index feced7bf6bbd..e23968b67e08 100644 --- a/samples/custom_dispatch/hip/kernels/CMakeLists.txt +++ b/samples/custom_dispatch/hip/kernels/CMakeLists.txt @@ -63,6 +63,6 @@ iree_lit_test_suite( iree-compile iree-run-module LABELS - "driver=rocm" + "driver=hip" "hostonly" ) diff --git a/samples/custom_dispatch/hip/kernels/example.mlir b/samples/custom_dispatch/hip/kernels/example.mlir index 3ca1bad2430f..6a2148c0d2e9 100644 --- a/samples/custom_dispatch/hip/kernels/example.mlir +++ b/samples/custom_dispatch/hip/kernels/example.mlir @@ -1,7 +1,7 @@ // RUN: iree-compile %s \ // RUN: --iree-hal-executable-object-search-path=$IREE_BINARY_DIR | \ // RUN: iree-run-module \ -// RUN: --device=rocm \ +// RUN: --device=hip \ // RUN: --module=- \ // RUN: --function=mixed_invocation \ // RUN: --input=8xf32=2 \ @@ -119,7 +119,7 @@ module @example attributes {hal.device.targets = [#rocm_target]} { // Function demonstrating a few hand-authored dispatches mixed with codegen. // Invoke with: - // --device=rocm + // --device=hip // --function=mixed_invocation // --input=8xf32=2 // --input=8xf32=4