Skip to content

Commit

Permalink
Merge pull request #2905 from stan-dev/feature/threadsafe-matrixcl
Browse files Browse the repository at this point in the history
Make matrix_cl thread safe
  • Loading branch information
SteveBronder authored Nov 7, 2023
2 parents 8062e06 + baffed6 commit 35ac188
Show file tree
Hide file tree
Showing 24 changed files with 228 additions and 186 deletions.
25 changes: 16 additions & 9 deletions stan/math/opencl/copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,12 @@ inline auto from_matrix_cl(const T& src) {
} else {
try {
cl::Event copy_event;
const cl::CommandQueue queue = opencl_context.queue();
const cl::CommandQueue& queue = opencl_context.queue();
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
src.write_events().end());
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
sizeof(T_val) * dst.size(), dst.data(),
&src.write_events(), &copy_event);
&copy_write_events, &copy_event);
copy_event.wait();
src.clear_write_events();
} catch (const cl::Error& e) {
Expand Down Expand Up @@ -150,9 +152,11 @@ inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
"dst.cols()", 1);
try {
cl::Event copy_event;
const cl::CommandQueue queue = opencl_context.queue();
const cl::CommandQueue& queue = opencl_context.queue();
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
src.write_events().end());
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
sizeof(T), &dst, &src.write_events(), &copy_event);
sizeof(T), &dst, &copy_write_events, &copy_event);
copy_event.wait();
src.clear_write_events();
} catch (const cl::Error& e) {
Expand Down Expand Up @@ -182,10 +186,12 @@ inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
}
try {
cl::Event copy_event;
const cl::CommandQueue queue = opencl_context.queue();
const cl::CommandQueue& queue = opencl_context.queue();
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
src.write_events().end());
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
sizeof(T) * src.rows(), dst.data(),
&src.write_events(), &copy_event);
&copy_write_events, &copy_event);
copy_event.wait();
src.clear_write_events();
} catch (const cl::Error& e) {
Expand Down Expand Up @@ -251,13 +257,14 @@ inline auto packed_copy(const T& src) {
return dst;
}
try {
const cl::CommandQueue queue = opencl_context.queue();
const cl::CommandQueue& queue = opencl_context.queue();
matrix_cl<T_val> packed(packed_size, 1);
stan::math::opencl_kernels::pack(cl::NDRange(src.rows(), src.rows()),
packed, src, src.rows(), src.rows(),
src.view());
const std::vector<cl::Event> mat_events
= vec_concat(packed.read_write_events(), src.write_events());
= vec_concat(std::vector<cl::Event>{}, packed.read_write_events(),
src.write_events());
cl::Event copy_event;
queue.enqueueReadBuffer(packed.buffer(), opencl_context.in_order(), 0,
sizeof(T_val) * packed_size, dst.data(),
Expand Down Expand Up @@ -303,7 +310,7 @@ inline matrix_cl<Vec_scalar> packed_copy(Vec&& src, int rows) {
try {
matrix_cl<Vec_scalar> packed(packed_size, 1);
cl::Event packed_event;
const cl::CommandQueue queue = opencl_context.queue();
const cl::CommandQueue& queue = opencl_context.queue();
queue.enqueueWriteBuffer(
packed.buffer(),
opencl_context.in_order() || std::is_rvalue_reference<Vec&&>::value, 0,
Expand Down
24 changes: 13 additions & 11 deletions stan/math/opencl/kernel_cl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,17 +109,17 @@ inline void assign_events(const cl::Event& new_event, CallArg& m,
* @return A vector of OpenCL events.
*/
template <typename T, require_not_matrix_cl_t<T>* = nullptr>
inline std::vector<cl::Event> select_events(const T& m) {
return {};
inline tbb::concurrent_vector<cl::Event> select_events(const T& m) {
return tbb::concurrent_vector<cl::Event>{};
}
template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
require_same_t<T, in_buffer>* = nullptr>
inline const std::vector<cl::Event>& select_events(const K& m) {
inline const tbb::concurrent_vector<cl::Event>& select_events(const K& m) {
return m.write_events();
}
template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
require_any_same_t<T, out_buffer, in_out_buffer>* = nullptr>
inline std::vector<cl::Event> select_events(K& m) {
inline tbb::concurrent_vector<cl::Event> select_events(K& m) {
static_assert(!std::is_const<K>::value, "Can not write to const matrix_cl!");
return m.read_write_events();
}
Expand All @@ -133,9 +133,9 @@ inline std::vector<cl::Event> select_events(K& m) {
* @param sources A std::vector of strings containing the code for the kernel.
* @param options The values of macros to be passed at compile time.
*/
inline auto compile_kernel(const char* name,
const std::vector<std::string>& sources,
const std::map<std::string, int>& options) {
inline auto compile_kernel(
const char* name, const std::vector<std::string>& sources,
const std::unordered_map<std::string, int>& options) {
auto base_opts = opencl_context.base_opts();
for (auto& it : options) {
if (base_opts[it.first] > it.second) {
Expand Down Expand Up @@ -175,7 +175,7 @@ struct kernel_cl {
private:
const char* name_;
std::vector<std::string> sources_;
std::map<std::string, int> opts_;
std::unordered_map<std::string, int> opts_;
mutable cl::Kernel kernel_;

public:
Expand All @@ -187,7 +187,7 @@ struct kernel_cl {
* @param options The values of macros to be passed at compile time.
*/
kernel_cl(const char* name, std::vector<std::string> sources,
std::map<std::string, int> options = {})
std::unordered_map<std::string, int> options = {})
: name_(name), sources_(std::move(sources)), opts_(std::move(options)) {}

/** \ingroup kernel_executor_opencl
Expand All @@ -205,7 +205,8 @@ struct kernel_cl {
opencl_context.register_kernel_cache(&kernel_);
}
cl::EnqueueArgs eargs(opencl_context.queue(),
vec_concat(internal::select_events<Args>(args)...),
vec_concat(std::vector<cl::Event>{},
internal::select_events<Args>(args)...),
global_thread_size);
cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
kernel_);
Expand All @@ -232,7 +233,8 @@ struct kernel_cl {
opencl_context.register_kernel_cache(&kernel_);
}
cl::EnqueueArgs eargs(opencl_context.queue(),
vec_concat(internal::select_events<Args>(args)...),
vec_concat(std::vector<cl::Event>{},
internal::select_events<Args>(args)...),
global_thread_size, thread_block_size);
cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
kernel_);
Expand Down
30 changes: 16 additions & 14 deletions stan/math/opencl/kernel_generator/append.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ class append_row_ : public operation_cl<append_row_<T_a, T_b>,
* @return part of kernel with code for this and nested expressions
*/
inline kernel_parts get_kernel_parts(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& name_gen, const std::string& row_index_name,
const std::string& col_index_name, bool view_handled) const {
kernel_parts res{};
Expand All @@ -101,7 +101,7 @@ class append_row_ : public operation_cl<append_row_<T_a, T_b>,
true);
std::string row_index_name_b
= "(" + row_index_name + " - " + var_name_ + "_first_rows)";
std::map<const void*, const char*> generated_b;
std::unordered_map<const void*, const char*> generated_b;
kernel_parts parts_b = this->template get_arg<1>().get_kernel_parts(
generated_b, generated_all, name_gen, row_index_name_b,
col_index_name, true);
Expand Down Expand Up @@ -129,14 +129,15 @@ class append_row_ : public operation_cl<append_row_<T_a, T_b>,
* @param[in,out] arg_num consecutive number of the first argument to set.
* This is incremented for each argument set by this function.
*/
inline void set_args(std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
inline void set_args(
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
if (generated.count(this) == 0) {
generated[this] = "";
this->template get_arg<0>().set_args(generated, generated_all, kernel,
arg_num);
std::map<const void*, const char*> generated_b;
std::unordered_map<const void*, const char*> generated_b;
this->template get_arg<1>().set_args(generated_b, generated_all, kernel,
arg_num);
kernel.setArg(arg_num++, this->template get_arg<0>().rows());
Expand Down Expand Up @@ -250,8 +251,8 @@ class append_col_ : public operation_cl<append_col_<T_a, T_b>,
* @return part of kernel with code for this and nested expressions
*/
inline kernel_parts get_kernel_parts(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& name_gen, const std::string& row_index_name,
const std::string& col_index_name, bool view_handled) const {
kernel_parts res{};
Expand All @@ -263,7 +264,7 @@ class append_col_ : public operation_cl<append_col_<T_a, T_b>,
true);
std::string col_index_name_b
= "(" + col_index_name + " - " + var_name_ + "_first_cols)";
std::map<const void*, const char*> generated_b;
std::unordered_map<const void*, const char*> generated_b;
kernel_parts parts_b = this->template get_arg<1>().get_kernel_parts(
generated_b, generated_all, name_gen, row_index_name,
col_index_name_b, true);
Expand Down Expand Up @@ -291,14 +292,15 @@ class append_col_ : public operation_cl<append_col_<T_a, T_b>,
* @param[in,out] arg_num consecutive number of the first argument to set.
* This is incremented for each argument set by this function.
*/
inline void set_args(std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
inline void set_args(
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
if (generated.count(this) == 0) {
generated[this] = "";
this->template get_arg<0>().set_args(generated, generated_all, kernel,
arg_num);
std::map<const void*, const char*> generated_b;
std::unordered_map<const void*, const char*> generated_b;
this->template get_arg<1>().set_args(generated_b, generated_all, kernel,
arg_num);
kernel.setArg(arg_num++, this->template get_arg<0>().cols());
Expand Down
21 changes: 11 additions & 10 deletions stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ class as_column_vector_or_scalar_
* @return part of kernel with code for this and nested expressions
*/
inline kernel_parts get_kernel_parts(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& name_gen, const std::string& row_index_name,
const std::string& col_index_name, bool view_handled) const {
kernel_parts res{};
Expand All @@ -82,7 +82,7 @@ class as_column_vector_or_scalar_
std::string row_index_name_arg = row_index_name;
std::string col_index_name_arg = col_index_name;
modify_argument_indices(row_index_name_arg, col_index_name_arg);
std::map<const void*, const char*> generated2;
std::unordered_map<const void*, const char*> generated2;
res = this->template get_arg<0>().get_kernel_parts(
generated2, generated_all, name_gen, row_index_name_arg,
col_index_name_arg, view_handled);
Expand Down Expand Up @@ -134,8 +134,8 @@ class as_column_vector_or_scalar_
* @return part of kernel with code for this expressions
*/
inline kernel_parts get_kernel_parts_lhs(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& name_gen, const std::string& row_index_name,
const std::string& col_index_name) const {
if (generated.count(this) == 0) {
Expand All @@ -145,7 +145,7 @@ class as_column_vector_or_scalar_
std::string row_index_name_arg = row_index_name;
std::string col_index_name_arg = col_index_name;
modify_argument_indices(row_index_name_arg, col_index_name_arg);
std::map<const void*, const char*> generated2;
std::unordered_map<const void*, const char*> generated2;
kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs(
generated2, generated_all, name_gen, row_index_name_arg,
col_index_name_arg);
Expand Down Expand Up @@ -185,12 +185,13 @@ class as_column_vector_or_scalar_
* @param[in,out] arg_num consecutive number of the first argument to set.
* This is incremented for each argument set by this function.
*/
inline void set_args(std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
inline void set_args(
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
if (generated.count(this) == 0) {
generated[this] = "";
std::map<const void*, const char*> generated2;
std::unordered_map<const void*, const char*> generated2;
this->template get_arg<0>().set_args(generated2, generated_all, kernel,
arg_num);
if (generated_all.count(this) == 0) {
Expand Down
21 changes: 11 additions & 10 deletions stan/math/opencl/kernel_generator/block_zero_based.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ class block_
* @return part of kernel with code for this and nested expressions
*/
inline kernel_parts get_kernel_parts(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& name_gen, const std::string& row_index_name,
const std::string& col_index_name, bool view_handled) const {
kernel_parts res{};
Expand All @@ -112,7 +112,7 @@ class block_
std::string row_index_name_arg = row_index_name;
std::string col_index_name_arg = col_index_name;
modify_argument_indices(row_index_name_arg, col_index_name_arg);
std::map<const void*, const char*> generated2;
std::unordered_map<const void*, const char*> generated2;
res = this->template get_arg<0>().get_kernel_parts(
generated2, generated_all, name_gen, row_index_name_arg,
col_index_name_arg, view_handled);
Expand Down Expand Up @@ -175,8 +175,8 @@ class block_
* @return part of kernel with code for this expressions
*/
inline kernel_parts get_kernel_parts_lhs(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& name_gen, const std::string& row_index_name,
const std::string& col_index_name) const {
if (generated.count(this) == 0) {
Expand All @@ -186,7 +186,7 @@ class block_
std::string row_index_name_arg = row_index_name;
std::string col_index_name_arg = col_index_name;
modify_argument_indices(row_index_name_arg, col_index_name_arg);
std::map<const void*, const char*> generated2;
std::unordered_map<const void*, const char*> generated2;
kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs(
generated2, generated_all, name_gen, row_index_name_arg,
col_index_name_arg);
Expand Down Expand Up @@ -226,12 +226,13 @@ class block_
* @param[in,out] arg_num consecutive number of the first argument to set.
* This is incremented for each argument set by this function.
*/
inline void set_args(std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
inline void set_args(
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
if (generated.count(this) == 0) {
generated[this] = "";
std::map<const void*, const char*> generated2;
std::unordered_map<const void*, const char*> generated2;
this->template get_arg<0>().set_args(generated2, generated_all, kernel,
arg_num);
if (generated_all.count(this) == 0) {
Expand Down
15 changes: 8 additions & 7 deletions stan/math/opencl/kernel_generator/calc_if.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ class calc_if_
*/
template <typename T_result>
kernel_parts get_whole_kernel_parts(
std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all, name_generator& ng,
const std::string& row_index_name, const std::string& col_index_name,
const T_result& result) const {
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
name_generator& ng, const std::string& row_index_name,
const std::string& col_index_name, const T_result& result) const {
if (Do_Calculate) {
return this->template get_arg<0>().get_whole_kernel_parts(
generated, generated_all, ng, row_index_name, col_index_name, result);
Expand All @@ -88,9 +88,10 @@ class calc_if_
* @param[in,out] arg_num consecutive number of the first argument to set.
* This is incremented for each argument set by this function.
*/
inline void set_args(std::map<const void*, const char*>& generated,
std::map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
inline void set_args(
std::unordered_map<const void*, const char*>& generated,
std::unordered_map<const void*, const char*>& generated_all,
cl::Kernel& kernel, int& arg_num) const {
if (Do_Calculate) {
this->template get_arg<0>().set_args(generated, generated_all, kernel,
arg_num);
Expand Down
Loading

0 comments on commit 35ac188

Please sign in to comment.