Skip to content

Commit

Permalink
Merge pull request #43 from LLNL/fix-build-issues-gcc8
Browse files Browse the repository at this point in the history
Fix some build issues with GCC8/CUDA11
  • Loading branch information
bvanessen authored Sep 24, 2020
2 parents 809af52 + 9901e97 commit 9bca9d9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 39 deletions.
44 changes: 14 additions & 30 deletions legacy/include/distconv/tensor/algorithms/transform_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ __global__ void transform_kernel(
}
}

template <typename DataType, int BLOCK_SIZE, int INNER_DIM,
typename TransformFunc>
template <int BLOCK_SIZE, int INNER_DIM,
typename DataType, typename TransformFunc>
void transform(Shape shape, IndexVector strides, DataType *data,
TransformFunc op, int thread_work_size, int num_inner_blocks,
const dim3 &grid_dims, const dim3 &block_dims,
Expand Down Expand Up @@ -123,8 +123,8 @@ __global__ void transform_kernel(Array<ND> shape, Array<ND> strides,
}
}

template <typename DataType1, typename DataType2, int BLOCK_SIZE, int INNER_DIM,
typename TransformFunc>
template <int BLOCK_SIZE, int INNER_DIM,
typename DataType1, typename DataType2, typename TransformFunc>
void transform(Shape shape, IndexVector strides,
DataType1 *data1, DataType2 *data2,
TransformFunc op,
Expand Down Expand Up @@ -201,8 +201,9 @@ __global__ void transform_kernel(Array<ND> shape, Array<ND> strides,
}
}

template <typename DataType1, typename DataType2, typename DataType3,
int BLOCK_SIZE, int INNER_DIM, typename TransformFunc>
template <int BLOCK_SIZE, int INNER_DIM,
typename DataType1, typename DataType2, typename DataType3,
typename TransformFunc>
void transform(Shape shape, IndexVector strides,
DataType1 *data1, DataType2 *data2,
DataType3 *data3,
Expand Down Expand Up @@ -283,8 +284,9 @@ __global__ void transform_kernel(Array<ND> shape, Array<ND> strides,
}
}

template <typename DataType1, typename DataType2, typename DataType3,
typename DataType4, int BLOCK_SIZE, int INNER_DIM,
template <int BLOCK_SIZE, int INNER_DIM,
typename DataType1, typename DataType2,
typename DataType3, typename DataType4,
typename TransformFunc>
void transform(Shape shape, IndexVector strides,
DataType1 *data1, DataType2 *data2,
Expand Down Expand Up @@ -358,10 +360,9 @@ Transform(Tensor &tensor, TransformFunc op,
"Tensors with 6 or larger number of dimensions not supported.";
throw std::exception();
}
using DataType = typename Tensor::data_type;

#define CALL_TRANFORM(INNER_DIM) \
algo::transform<DataType, block_size, INNER_DIM, TransformFunc>( \
algo::transform<block_size, INNER_DIM>( \
shape, strides, tensor.get_base_ptr(), op, \
thread_work_size, num_inner_blocks, grid_dims, block_dims, stream)

Expand Down Expand Up @@ -437,17 +438,9 @@ Transform(Tensor1 &tensor1, Tensor2 &tensor2,
"Tensors with 6 or larger number of dimensions not supported.";
throw std::exception();
}
using DataType1 = typename std::conditional<
std::is_const<Tensor1>::value,
typename std::add_const<typename std::remove_const<Tensor1>::type::data_type>::type,
typename Tensor1::data_type>::type;
using DataType2 = typename std::conditional<
std::is_const<Tensor2>::value,
typename std::add_const<typename std::remove_const<Tensor2>::type::data_type>::type,
typename Tensor2::data_type>::type;

#define CALL_TRANFORM(INNER_DIM) \
algo::transform<DataType1, DataType2, block_size, INNER_DIM, TransformFunc>( \
algo::transform<block_size, INNER_DIM>( \
shape, strides, tensor1.get_base_ptr(), tensor2.get_base_ptr(), \
op, thread_work_size, num_inner_blocks, grid_dims, block_dims, stream);
switch (inner_dim) {
Expand Down Expand Up @@ -519,9 +512,6 @@ Transform(Tensor1 &tensor1, Tensor2 &tensor2, Tensor3 &tensor3,
<< ", inner dim: " << inner_dim
<< ", num_inner_blocks: " << num_inner_blocks;

using DataType1 = typename Tensor1::data_type;
using DataType2 = typename Tensor2::data_type;
using DataType3 = typename Tensor3::data_type;
if (tensor1.get_num_dims() > 5) {
// The below switch block assumes ND <= 5. Otherwise, inner_dim
// can be >= 5, and the default case would hit. Simply repeating
Expand All @@ -531,8 +521,7 @@ Transform(Tensor1 &tensor1, Tensor2 &tensor2, Tensor3 &tensor3,
throw std::exception();
}
#define CALL_TRANFORM(INNER_DIM) \
algo::transform<DataType1, DataType2, DataType3, block_size, \
INNER_DIM, TransformFunc>( \
algo::transform<block_size, INNER_DIM>( \
shape, strides, tensor1.get_base_ptr(), tensor2.get_base_ptr(), \
tensor3.get_base_ptr(), op, thread_work_size, num_inner_blocks, \
grid_dims, block_dims, stream)
Expand Down Expand Up @@ -620,13 +609,8 @@ Transform(Tensor1 &tensor1, Tensor2 &tensor2, Tensor3 &tensor3,
"Tensors with 6 or larger number of dimensions not supported.";
throw std::exception();
}
using DataType1 = typename Tensor1::data_type;
using DataType2 = typename Tensor2::data_type;
using DataType3 = typename Tensor3::data_type;
using DataType4 = typename Tensor4::data_type;
#define CALL_TRANFORM(INNER_DIM) \
algo::transform<DataType1, DataType2, DataType3, DataType4, \
block_size, INNER_DIM, TransformFunc>( \
algo::transform<block_size, INNER_DIM>( \
shape, strides, tensor1.get_base_ptr(), tensor2.get_base_ptr(), \
tensor3.get_base_ptr(), tensor4.get_base_ptr(), \
op, thread_work_size, num_inner_blocks, grid_dims, block_dims, stream);
Expand Down
9 changes: 3 additions & 6 deletions legacy/src/tensor/tensor_mpi_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ __device__ __forceinline__ void assign(DataType1 &t1, const DataType2 &t2) {
t1 = t2;
}

template <int ND, int INNER_DIM, typename DataType1, typename DataType2,
bool is_concat>
template <int ND, int INNER_DIM, bool is_concat,
typename DataType1, typename DataType2>
__global__ void concat_or_slice_kernel(
DataType1 *dst, Array<ND> dst_shape, Array<ND> dst_strides,
DataType2 *src1, Array<ND> src1_shape, Array<ND> src1_strides,
Expand Down Expand Up @@ -242,16 +242,13 @@ int ConcatenateOrSlice(
// TODO: only works for U-Net. Concat on channel dim
assert_always(concat_dim == nd - 2);

using DataType1 = typename AddConstIf<!IS_CONCAT, DataType>::type;
using DataType2 = typename AddConstIf<IS_CONCAT, DataType>::type;

#define CALL_KERNEL(ND, INNER_DIM) do { \
assert_always(concat_dim > INNER_DIM); \
int grid_dim = 1; \
for (int i = INNER_DIM + 1; i < ND; ++i) { \
grid_dim *= t_dest.get_local_shape()[i]; \
} \
concat_or_slice_kernel<ND, INNER_DIM, DataType1, DataType2, IS_CONCAT> \
concat_or_slice_kernel<ND, INNER_DIM, IS_CONCAT> \
<<<grid_dim, block_dim, 0, s>>>( \
t_dest.get_base_ptr(), Array<ND>(t_dest.get_local_shape()), \
Array<ND>(t_dest.get_strides()), \
Expand Down
6 changes: 3 additions & 3 deletions legacy/src/util/util_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ int get_number_of_gpus() {
std::cout << "Number of GPUs set by TENSOR_NUM_GPUS\n";
num_gpus = atoi(env);
} else {
DISTCONV_CHECK_CUDA(cudaGetDeviceCount(&num_gpus));
DISTCONV_CHECK_CUDA(cudaGetDeviceCount(&num_gpus));
}
return num_gpus;
}
Expand All @@ -31,7 +31,7 @@ int get_local_rank() {
int get_local_size() {
char *env = getenv("MV2_COMM_WORLD_LOCAL_SIZE");
if (!env) env = getenv("OMPI_COMM_WORLD_LOCAL_SIZE");
if (!env) env = getenv("SLURM_TASKS_PER_NODE");
if (!env) env = getenv("SLURM_TASKS_PER_NODE");
if (!env) {
std::cerr << "Can't determine local size\n";
abort();
Expand Down Expand Up @@ -76,7 +76,7 @@ cudaError_t cuda_malloc(void **ptr, size_t size,
int threshold = 0;
try {
threshold = std::stoi(std::string(log_env));
} catch (std::invalid_argument) {
} catch (std::invalid_argument const&) {
}
int size_in_mb = size / (1024 * 1024);
if (size_in_mb >= threshold) {
Expand Down

0 comments on commit 9bca9d9

Please sign in to comment.