Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify more CUDA and HIP files into common #1516

Merged
merged 17 commits into from
Jul 5, 2024
2 changes: 2 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
add_subdirectory(unified)
add_subdirectory(cuda_hip)
set(GKO_UNIFIED_COMMON_SOURCES ${GKO_UNIFIED_COMMON_SOURCES} PARENT_SCOPE)
set(GKO_CUDA_HIP_COMMON_SOURCES ${GKO_CUDA_HIP_COMMON_SOURCES} PARENT_SCOPE)
32 changes: 32 additions & 0 deletions common/cuda_hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
set(CUDA_HIP_SOURCES
base/device_matrix_data_kernels.cpp
components/prefix_sum_kernels.cpp
distributed/index_map_kernels.cpp
distributed/matrix_kernels.cpp
distributed/partition_helpers_kernels.cpp
distributed/partition_kernels.cpp
distributed/vector_kernels.cpp
factorization/cholesky_kernels.cpp
factorization/factorization_kernels.cpp
factorization/lu_kernels.cpp
factorization/par_ic_kernels.cpp
factorization/par_ilu_kernels.cpp
matrix/coo_kernels.cpp
matrix/dense_kernels.cpp
matrix/diagonal_kernels.cpp
matrix/ell_kernels.cpp
matrix/sellp_kernels.cpp
matrix/sparsity_csr_kernels.cpp
multigrid/pgm_kernels.cpp
preconditioner/isai_kernels.cpp
preconditioner/jacobi_kernels.cpp
reorder/rcm_kernels.cpp
solver/cb_gmres_kernels.cpp
solver/idr_kernels.cpp
solver/multigrid_kernels.cpp
stop/criterion_kernels.cpp
stop/residual_norm_kernels.cpp
)
list(TRANSFORM CUDA_HIP_SOURCES PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/)
set(GKO_CUDA_HIP_COMMON_SOURCES ${CUDA_HIP_SOURCES} PARENT_SCOPE)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include "core/base/device_matrix_data_kernels.hpp"

#include <thrust/copy.h>
#include <thrust/count.h>
#include <thrust/device_ptr.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/sort.h>
#include <thrust/tuple.h>

#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/base/types.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace components {


template <typename ValueType, typename IndexType>
void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
array<ValueType>& values, array<IndexType>& row_idxs,
Expand Down Expand Up @@ -99,3 +119,9 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec,

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL);


} // namespace components
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,52 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_
#error \
"This file can only be used from inside common/unified/base/kernel_launch.hpp"
#endif


#include <thrust/tuple.h>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when it becomes the header, it needs the header guard

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file is not self-sufficient without the unified/kernel_launch.hpp header, just like before, so it does not need one.


#include "accessor/cuda_hip_helper.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


template <typename AccessorType>
struct to_device_type_impl<gko::acc::range<AccessorType>&> {
using type = std::decay_t<decltype(gko::acc::as_device_range(
std::declval<gko::acc::range<AccessorType>>()))>;
static type map_to_device(gko::acc::range<AccessorType>& range)
{
return gko::acc::as_device_range(range);
}
};

template <typename AccessorType>
struct to_device_type_impl<const gko::acc::range<AccessorType>&> {
using type = std::decay_t<decltype(gko::acc::as_device_range(
std::declval<gko::acc::range<AccessorType>>()))>;
static type map_to_device(const gko::acc::range<AccessorType>& range)
{
return gko::acc::as_device_range(range);
}
};


namespace device_std = thrust;


constexpr int default_block_size = 512;


template <typename KernelFunction, typename... KernelArgs>
__global__ __launch_bounds__(default_block_size) void generic_kernel_1d(
int64 size, KernelFunction fn, KernelArgs... args)
Expand Down Expand Up @@ -52,3 +98,8 @@ void run_kernel(std::shared_ptr<const DefaultExecutor> exec, KernelFunction fn,
map_to_device(args)...);
}
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_REDUCTION_HPP_
#error \
"This file can only be used from inside common/unified/base/kernel_launch_reduction.hpp"
#endif


#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
#include "common/cuda_hip/components/reduction.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
#include "core/synthesizer/implementation_selection.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


template <typename ValueType, typename KernelFunction, typename ReductionOp,
typename FinalizeOp, typename... KernelArgs>
__global__ __launch_bounds__(
Expand Down Expand Up @@ -505,3 +523,8 @@ void run_kernel_col_reduction_cached(
}
}
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_
#error \
"This file can only be used from inside common/unified/base/kernel_launch_solver.hpp"
#endif


#include "common/cuda_hip/base/runtime.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


template <typename KernelFunction, typename... KernelArgs>
__global__ __launch_bounds__(default_block_size) void generic_kernel_2d_solver(
int64 rows, int64 cols, int64 default_stride, KernelFunction fn,
Expand Down Expand Up @@ -32,3 +46,8 @@ void run_kernel_solver(std::shared_ptr<const DefaultExecutor> exec,
static_cast<int64>(default_stride), fn, map_to_device(args)...);
}
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_MATH_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_MATH_HPP_


#include <thrust/complex.h>

#include <ginkgo/core/base/math.hpp>


namespace gko {


// We need this struct, because otherwise we would call a __host__ function in a
// __device__ function (even though it is constexpr)
template <typename T>
Expand Down Expand Up @@ -37,3 +49,7 @@ struct truncate_type_impl<thrust::complex<T>> {


} // namespace detail
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_BASE_MATH_HPP_
18 changes: 18 additions & 0 deletions common/cuda_hip/base/sparselib_block_bindings.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_SPARSELIB_BLOCK_BINDINGS_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_SPARSELIB_BLOCK_BINDINGS_HPP_


#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/cusparse_block_bindings.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/hipsparse_block_bindings.hip.hpp"
#else
#error "Executor definition missing"
#endif


#endif // GKO_COMMON_CUDA_HIP_BASE_SPARSELIB_BLOCK_BINDINGS_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_COMPONENTS_ATOMIC_HPP_
#define GKO_COMMON_CUDA_HIP_COMPONENTS_ATOMIC_HPP_


#include <type_traits>

#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/types.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace detail {


Expand Down Expand Up @@ -228,3 +241,11 @@ __forceinline__ __device__ thrust::complex<double> atomic_add(
auto imag = atomic_add(addr + 1, val.imag());
return {real, imag};
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_COMPONENTS_ATOMIC_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_COMPONENTS_DIAGONAL_BLOCK_MANIPULATION_HPP_
#define GKO_COMMON_CUDA_HIP_COMPONENTS_DIAGONAL_BLOCK_MANIPULATION_HPP_


#include <type_traits>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace csr {


/**
* @internal
*
Expand Down Expand Up @@ -63,3 +80,12 @@ __device__ __forceinline__ void extract_transposed_diag_blocks(
}
}
}


} // namespace csr
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_COMPONENTS_DIAGONAL_BLOCK_MANIPULATION_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_COMPONENTS_INTRINSICS_HPP_
#define GKO_COMMON_CUDA_HIP_COMPONENTS_INTRINSICS_HPP_


#include <ginkgo/core/base/types.hpp>


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


/**
* @internal
* Returns the number of set bits in the given mask.
Expand Down Expand Up @@ -36,3 +48,11 @@ __forceinline__ __device__ int clz(uint32 mask) { return __clz(mask); }

/** @copydoc clz */
__forceinline__ __device__ int clz(uint64 mask) { return __clzll(mask); }


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_COMPONENTS_INTRINSICS_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_COMPONENTS_MERGING_HPP_
#define GKO_COMMON_CUDA_HIP_COMPONENTS_MERGING_HPP_


#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/components/intrinsics.hpp"
#include "common/cuda_hip/components/searching.hpp"
#include "core/base/utils.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


namespace detail {


Expand Down Expand Up @@ -280,3 +295,11 @@ __forceinline__ __device__ void sequential_match(const ValueType* a,
return a_idx < a_size && b_idx < b_size;
});
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_COMPONENTS_MERGING_HPP_
Loading