Skip to content

Commit

Permalink
Reoptimization of OpenMP and other improvements on the configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
Augusto authored and Augusto committed May 14, 2016
1 parent 2020314 commit b0cd98b
Show file tree
Hide file tree
Showing 23 changed files with 104 additions and 59 deletions.
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ if(BUILD_CUDA_TARGETS)

cuda_add_executable(MCBooster_Example_OpenMP_CompareWithRoot
${CMAKE_CURRENT_SOURCE_DIR}/src/CompareWithTGenPhaseSpace.cu
OPTIONS -Xcompiler -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp
OPTIONS -Xcompiler -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp
)

target_link_libraries(MCBooster_Example_OpenMP_CompareWithRoot
Expand Down Expand Up @@ -150,7 +150,7 @@ if(BUILD_CUDA_TARGETS)

cuda_add_executable(MCBooster_Example_OpenMP_NVCC_B2KPiJpsi
${CMAKE_CURRENT_SOURCE_DIR}/src/Generate.cu
OPTIONS -Xcompiler -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp
OPTIONS -Xcompiler -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp
)

target_link_libraries(MCBooster_Example_OpenMP_NVCC_B2KPiJpsi
Expand All @@ -169,7 +169,7 @@ if(BUILD_CUDA_TARGETS)

cuda_add_executable(MCBooster_Example_OpenMP_NVCC_PerformanceTest
${CMAKE_CURRENT_SOURCE_DIR}/src/PerformanceTest.cu
OPTIONS -Xcompiler -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp
OPTIONS -Xcompiler -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp
)

target_link_libraries(MCBooster_Example_OpenMP_NVCC_PerformanceTest
Expand All @@ -185,7 +185,7 @@ add_executable(MCBooster_Example_OpenMP_GCC_B2KPiJpsi
${CMAKE_CURRENT_SOURCE_DIR}/src/Generate.cpp
)

set_target_properties(MCBooster_Example_OpenMP_GCC_B2KPiJpsi PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp" )
set_target_properties(MCBooster_Example_OpenMP_GCC_B2KPiJpsi PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp" )

target_link_libraries(MCBooster_Example_OpenMP_GCC_B2KPiJpsi
${ROOT_LIBRARIES}
Expand All @@ -198,7 +198,7 @@ add_executable(MCBooster_Example_OpenMP_GCC_PerformanceTest
${CMAKE_CURRENT_SOURCE_DIR}/src/PerformanceTest.cpp
)

set_target_properties(MCBooster_Example_OpenMP_GCC_PerformanceTest PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp" )
set_target_properties(MCBooster_Example_OpenMP_GCC_PerformanceTest PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp" )

target_link_libraries(MCBooster_Example_OpenMP_GCC_PerformanceTest
${ROOT_LIBRARIES}
Expand Down
46 changes: 39 additions & 7 deletions mcbooster/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,51 @@
#ifndef CONFIG_H_
#define CONFIG_H_

#define CUDA_API_PER_THREAD_DEFAULT_STREAM
#include <mcbooster/MCBooster.h>
#include <iostream>

#include <thrust/detail/config/host_device.h>
#define CUDA 1
#define OMP 2
#define TBB 3

#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#ifndef __linux__
#error "MCBooster: Platform not supported. MCBooster is designed to run only on LIXUX systems."
#endif

#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#if (__cplusplus < 201103L)
#error "This library needs a C++11 compliant compiler"
#endif


#ifndef MCBOOSTER_BACKEND
#define MCBOOSTER_BACKEND CUDA
#endif


#if MCBOOSTER_BACKEND!=CUDA && MCBOOSTER_BACKEND!=OMP && MCBOOSTER_BACKEND!=TBB

#error "MCBooster: Backend not supported. MCBOOSTER_BACKEND = CUDA, OMP or TBB "

#endif



#if MCBOOSTER_BACKEND==CUDA
#define CUDA_API_PER_THREAD_DEFAULT_STREAM
#define THRUST_DEVICE_SYSTEM CUDA//THRUST_DEVICE_SYSTEM_CUDA
#define THRUST_HOST_SYSTEM OMP
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#elif MCBOOSTER_BACKEND==OMP
#define THRUST_DEVICE_SYSTEM OMP
#define THRUST_HOST_SYSTEM OMP
#elif MCBOOSTER_BACKEND==TBB
#define THRUST_DEVICE_SYSTEM TBB
#define THRUST_HOST_SYSTEM TBB
#endif


#include <thrust/detail/config/host_device.h>

#endif /* CUDA_H_ */
#endif /* CONFIG_H_ */
2 changes: 1 addition & 1 deletion mcbooster/Evaluate.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include <mcbooster/GTypes.h>
#include <mcbooster/functors/Calculate.h>

namespace MCBooster
namespace mcbooster
{
/** Template functor for evaluate an arbitrary function object.
* Template functor for evaluate an arbitrary function object over the a set of particles stored
Expand Down
15 changes: 8 additions & 7 deletions mcbooster/EvaluateArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#include <mcbooster/GTypes.h>
#include <mcbooster/functors/Calculate.h>

namespace MCBooster
namespace mcbooster
{

/** Template functor for calculate an array of variables over a given set of particles.
Expand Down Expand Up @@ -280,7 +280,7 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset,

}

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB
#if MCBOOSTER_BACKEND!=CUDA

#pragma omp parallel num_threads( arrayWidth )
{
Expand Down Expand Up @@ -319,7 +319,7 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset,
return;
}

#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#if MCBOOSTER_BACKEND==CUDA

/** Template functor for calculate an array of variables over a given set of particles.
* Template functor for evaluate an arbitrary function object over the a set of particles stored
Expand Down Expand Up @@ -560,8 +560,8 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset,
Calculate3<CUSTOMFUNC>(funcObj));

}

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB
/*
//#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB
#pragma omp parallel num_threads( arrayWidth )
{
Expand All @@ -571,7 +571,8 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset,
thrust::copy(it_array.begin(),it_array.end(),
varset[omp_get_thread_num()]->begin());
}
#else
#else*/
cudaStream_t s[arrayWidth];

for (GInt_t d = 0; d < arrayWidth; d++)
Expand All @@ -596,7 +597,7 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset,
for (GInt_t d = 0; d < arrayWidth; d++)
delete it[d];

#endif
//#endif*/
return;
}
#endif
Expand Down
25 changes: 20 additions & 5 deletions mcbooster/GContainers.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,22 @@
#include <thrust/host_vector.h>
#include <thrust/complex.h>

#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#if MCBOOSTER_BACKEND==CUDA
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#endif

using namespace std;

namespace MCBooster
namespace mcbooster
{

#if (THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#if (MCBOOSTER_BACKEND==OMP)
/*!
* Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation
* in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation.
*/
template <typename T>
using mc_device_vector = thrust::device_vector<T>;
using mc_device_vector = thrust::host_vector<T>;
/*!
* Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation
* in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation.
Expand All @@ -67,7 +67,7 @@ namespace MCBooster
template <typename T>
using mc_host_vector = thrust::host_vector<T>;

#else
#elif(MCBOOSTER_BACKEND==CUDA)
/*!
* Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation
* in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation.
Expand All @@ -84,6 +84,21 @@ namespace MCBooster
using mc_host_vector = thrust::host_vector<T,
thrust::cuda::experimental::pinned_allocator<T>>;

#elif(MCBOOSTER_BACKEND==TBB)
/*!
* Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation
* in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation.
*/
template <typename T>
using mc_device_vector = thrust::device_vector<T>;
/*!
* Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation
* in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation.
* mc_host_vectot will always allocate page locked memory on CUDA SYSTEMs in order to maximize speed in memory transfers
* to the device.
*/
template <typename T>
using mc_host_vector = thrust::host_vector<T>;
#endif


Expand Down
2 changes: 1 addition & 1 deletion mcbooster/GFunctional.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include <mcbooster/Vector3R.h>
#include <mcbooster/Vector4R.h>

namespace MCBooster
namespace mcbooster
{
/** \struct IFunction
* IFunction is the base class for arbitrary functions return any type suported by the framwork.
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/GTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#ifndef GTYPES_H_
#define GTYPES_H_

namespace MCBooster
namespace mcbooster
{
//---- types -------------------------------------------------------------------

Expand Down
19 changes: 10 additions & 9 deletions mcbooster/Generate.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@
#include <thrust/sort.h>
#include <thrust/iterator/counting_iterator.h>

#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#if MCBOOSTER_BACKEND==CUDA
#include <thrust/system/cuda/execution_policy.h>
#endif

#elif MCBOOSTER_BACKEND==OPENMP
#include <thrust/system/omp/execution_policy.h>
#elif MCBOOSTER_BACKEND==TBB
#include <thrust/system/tbb/execution_policy.h>
#endif

#define TIMER CLOCK_REALTIME

Expand All @@ -85,7 +87,7 @@

using namespace std;

namespace MCBooster {
namespace mcbooster {
/*!
* Function to calculate time intervals in seconds.
*/
Expand Down Expand Up @@ -404,8 +406,7 @@ void PhaseSpace::ExportUnweighted(Events *_Events) {

_Events->fMaxWeight = fMaxWeight;

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB

#if MCBOOSTER_BACKEND!=CUDA
#pragma omp parallel num_threads( fNDaughters + 1 )
{

Expand Down Expand Up @@ -477,7 +478,7 @@ void PhaseSpace::Export(Events *_Events) {
*/
_Events->fMaxWeight = fMaxWeight;

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB
#if MCBOOSTER_BACKEND!=CUDA

#pragma omp parallel num_threads( fNDaughters + 1 )
{
Expand Down Expand Up @@ -544,7 +545,7 @@ void PhaseSpace::Generate(const Vector4R fMother) {
* in any system of reference. The daughters will be generated in this system.
*/

#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#if MCBOOSTER_BACKEND==CUDA
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
#endif
/* random number generation */
Expand Down Expand Up @@ -671,7 +672,7 @@ void PhaseSpace::Generate(Particles_d fMothers) {
* Run the generator and calculate the maximum weight. It takes as input the device vector with the four-vectors of the mother particle
* in any system of reference. The daughters will be generated in this system.
*/
#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB)
#if MCBOOSTER_BACKEND==CUDA
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
#endif

Expand Down
2 changes: 1 addition & 1 deletion mcbooster/Vector3R.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <math.h>

using std::ostream;
namespace MCBooster
namespace mcbooster
{
class Vector3R
{
Expand Down
12 changes: 2 additions & 10 deletions mcbooster/Vector4R.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,12 @@
#include <iostream>
#include <cmath>
#include "mcbooster/Vector3R.h"
/*
#ifndef __host__
#define __host__
#endif // __host__
#ifndef __device__
#define __device__
#endif // __device_
*/

using std::ostream;



namespace MCBooster
namespace mcbooster
{

class Vector3R;
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/functors/Calculate.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <mcbooster/GContainers.h>
#include <mcbooster/GTypes.h>

namespace MCBooster
namespace mcbooster
{

template<typename FUNCTION, typename RESULT>
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/functors/DecayMother.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

using namespace std;

namespace MCBooster
namespace mcbooster
{

struct DecayMother
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/functors/DecayMothers.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

using namespace std;

namespace MCBooster
namespace mcbooster
{

struct DecayMothers
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/functors/FlagAcceptReject.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <thrust/random.h>
#include <mcbooster/GTypes.h>

namespace MCBooster
namespace mcbooster
{
/**\struct FlagAcceptReject
* Flags generated events as accepted (1) or rejected (0).
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/functors/IsAccepted.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <mcbooster/Config.h>
#include <mcbooster/GTypes.h>

namespace MCBooster
namespace mcbooster
{

struct isAccepted
Expand Down
2 changes: 1 addition & 1 deletion mcbooster/functors/RandGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include <mcbooster/GContainers.h>


namespace MCBooster
namespace mcbooster
{
/**\struct RandGen
* Fill a given vector with random numbers between 0 and 1.
Expand Down
Loading

0 comments on commit b0cd98b

Please sign in to comment.