From b0cd98b81bb4e37802dc98c1e9ef659cd28b31db Mon Sep 17 00:00:00 2001 From: Augusto Date: Sat, 14 May 2016 09:47:49 +0200 Subject: [PATCH] Reoptimization of OpenMP and other improvements on the configuration --- CMakeLists.txt | 10 +++--- mcbooster/Config.h | 46 +++++++++++++++++++++++---- mcbooster/Evaluate.h | 2 +- mcbooster/EvaluateArray.h | 15 +++++---- mcbooster/GContainers.h | 25 ++++++++++++--- mcbooster/GFunctional.h | 2 +- mcbooster/GTypes.h | 2 +- mcbooster/Generate.h | 19 +++++------ mcbooster/Vector3R.h | 2 +- mcbooster/Vector4R.h | 12 ++----- mcbooster/functors/Calculate.h | 2 +- mcbooster/functors/DecayMother.h | 2 +- mcbooster/functors/DecayMothers.h | 2 +- mcbooster/functors/FlagAcceptReject.h | 2 +- mcbooster/functors/IsAccepted.h | 2 +- mcbooster/functors/RandGen.h | 2 +- mcbooster/strided_iterator.h | 4 +++ src/CompareWithTGenPhaseSpace.cu | 2 +- src/Generate.cpp | 2 +- src/Generate.cu | 2 +- src/GenerateSample.cu | 2 +- src/PerformanceTest.cpp | 2 +- src/PerformanceTest.cu | 2 +- 23 files changed, 104 insertions(+), 59 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index df06d21..82d5deb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,7 +121,7 @@ if(BUILD_CUDA_TARGETS) cuda_add_executable(MCBooster_Example_OpenMP_CompareWithRoot ${CMAKE_CURRENT_SOURCE_DIR}/src/CompareWithTGenPhaseSpace.cu - OPTIONS -Xcompiler -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp + OPTIONS -Xcompiler -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp ) target_link_libraries(MCBooster_Example_OpenMP_CompareWithRoot @@ -150,7 +150,7 @@ if(BUILD_CUDA_TARGETS) cuda_add_executable(MCBooster_Example_OpenMP_NVCC_B2KPiJpsi ${CMAKE_CURRENT_SOURCE_DIR}/src/Generate.cu - OPTIONS -Xcompiler -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp + OPTIONS -Xcompiler -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp ) target_link_libraries(MCBooster_Example_OpenMP_NVCC_B2KPiJpsi @@ -169,7 +169,7 @@ if(BUILD_CUDA_TARGETS) cuda_add_executable(MCBooster_Example_OpenMP_NVCC_PerformanceTest ${CMAKE_CURRENT_SOURCE_DIR}/src/PerformanceTest.cu - OPTIONS -Xcompiler -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp + OPTIONS -Xcompiler -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp ) target_link_libraries(MCBooster_Example_OpenMP_NVCC_PerformanceTest @@ -185,7 +185,7 @@ add_executable(MCBooster_Example_OpenMP_GCC_B2KPiJpsi ${CMAKE_CURRENT_SOURCE_DIR}/src/Generate.cpp ) -set_target_properties(MCBooster_Example_OpenMP_GCC_B2KPiJpsi PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp" ) +set_target_properties(MCBooster_Example_OpenMP_GCC_B2KPiJpsi PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp" ) target_link_libraries(MCBooster_Example_OpenMP_GCC_B2KPiJpsi ${ROOT_LIBRARIES} @@ -198,7 +198,7 @@ add_executable(MCBooster_Example_OpenMP_GCC_PerformanceTest ${CMAKE_CURRENT_SOURCE_DIR}/src/PerformanceTest.cpp ) -set_target_properties(MCBooster_Example_OpenMP_GCC_PerformanceTest PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP -lgomp" ) +set_target_properties(MCBooster_Example_OpenMP_GCC_PerformanceTest PROPERTIES COMPILE_FLAGS "-std=c++11 -fopenmp -DMCBOOSTER_BACKEND=OMP -lgomp" ) target_link_libraries(MCBooster_Example_OpenMP_GCC_PerformanceTest ${ROOT_LIBRARIES} diff --git a/mcbooster/Config.h b/mcbooster/Config.h index f1d3a21..b638647 100644 --- a/mcbooster/Config.h +++ b/mcbooster/Config.h @@ -25,19 +25,51 @@ #ifndef CONFIG_H_ #define CONFIG_H_ -#define CUDA_API_PER_THREAD_DEFAULT_STREAM +#include +#include -#include +#define CUDA 1 +#define OMP 2 +#define TBB 3 -#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#ifndef __linux__ +#error "MCBooster: Platform not supported. MCBooster is designed to run only on LIXUX systems." +#endif - #include - #include - #include +#if (__cplusplus < 201103L) +#error "This library needs a C++11 compliant compiler" +#endif +#ifndef MCBOOSTER_BACKEND + #define MCBOOSTER_BACKEND CUDA #endif +#if MCBOOSTER_BACKEND!=CUDA && MCBOOSTER_BACKEND!=OMP && MCBOOSTER_BACKEND!=TBB + +#error "MCBooster: Backend not supported. MCBOOSTER_BACKEND = CUDA, OMP or TBB " + +#endif + + + +#if MCBOOSTER_BACKEND==CUDA + #define CUDA_API_PER_THREAD_DEFAULT_STREAM + #define THRUST_DEVICE_SYSTEM CUDA//THRUST_DEVICE_SYSTEM_CUDA + #define THRUST_HOST_SYSTEM OMP + #include + #include + #include +#elif MCBOOSTER_BACKEND==OMP + #define THRUST_DEVICE_SYSTEM OMP + #define THRUST_HOST_SYSTEM OMP +#elif MCBOOSTER_BACKEND==TBB + #define THRUST_DEVICE_SYSTEM TBB + #define THRUST_HOST_SYSTEM TBB +#endif + + +#include -#endif /* CUDA_H_ */ +#endif /* CONFIG_H_ */ diff --git a/mcbooster/Evaluate.h b/mcbooster/Evaluate.h index 49f7033..62308f4 100644 --- a/mcbooster/Evaluate.h +++ b/mcbooster/Evaluate.h @@ -39,7 +39,7 @@ #include #include -namespace MCBooster +namespace mcbooster { /** Template functor for evaluate an arbitrary function object. * Template functor for evaluate an arbitrary function object over the a set of particles stored diff --git a/mcbooster/EvaluateArray.h b/mcbooster/EvaluateArray.h index d1ffe39..ce99f9c 100644 --- a/mcbooster/EvaluateArray.h +++ b/mcbooster/EvaluateArray.h @@ -38,7 +38,7 @@ #include #include -namespace MCBooster +namespace mcbooster { /** Template functor for calculate an array of variables over a given set of particles. @@ -280,7 +280,7 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset, } -#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB +#if MCBOOSTER_BACKEND!=CUDA #pragma omp parallel num_threads( arrayWidth ) { @@ -319,7 +319,7 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset, return; } -#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#if MCBOOSTER_BACKEND==CUDA /** Template functor for calculate an array of variables over a given set of particles. * Template functor for evaluate an arbitrary function object over the a set of particles stored @@ -560,8 +560,8 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset, Calculate3(funcObj)); } - -#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB +/* +//#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB #pragma omp parallel num_threads( arrayWidth ) { @@ -571,7 +571,8 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset, thrust::copy(it_array.begin(),it_array.end(), varset[omp_get_thread_num()]->begin()); } -#else + +#else*/ cudaStream_t s[arrayWidth]; for (GInt_t d = 0; d < arrayWidth; d++) @@ -596,7 +597,7 @@ void EvaluateArray(const CUSTOMFUNC funcObj, ParticlesSet_d &pset, for (GInt_t d = 0; d < arrayWidth; d++) delete it[d]; -#endif +//#endif*/ return; } #endif diff --git a/mcbooster/GContainers.h b/mcbooster/GContainers.h index 75496c7..b2847f1 100644 --- a/mcbooster/GContainers.h +++ b/mcbooster/GContainers.h @@ -42,22 +42,22 @@ #include #include -#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#if MCBOOSTER_BACKEND==CUDA #include #endif using namespace std; -namespace MCBooster +namespace mcbooster { -#if (THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#if (MCBOOSTER_BACKEND==OMP) /*! * Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation * in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation. */ template - using mc_device_vector = thrust::device_vector; + using mc_device_vector = thrust::host_vector; /*! * Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation * in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation. @@ -67,7 +67,7 @@ namespace MCBooster template using mc_host_vector = thrust::host_vector; -#else +#elif(MCBOOSTER_BACKEND==CUDA) /*! * Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation * in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation. @@ -84,6 +84,21 @@ namespace MCBooster using mc_host_vector = thrust::host_vector>; +#elif(MCBOOSTER_BACKEND==TBB) + /*! + * Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation + * in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation. + */ + template + using mc_device_vector = thrust::device_vector; + /*! + * Generic template typedef for thrust::host_vector. Use it instead of Thrust implementation + * in order to avoid problems to compile OpenMP based applications using gcc and without a cuda runtime installation. + * mc_host_vectot will always allocate page locked memory on CUDA SYSTEMs in order to maximize speed in memory transfers + * to the device. + */ + template + using mc_host_vector = thrust::host_vector; #endif diff --git a/mcbooster/GFunctional.h b/mcbooster/GFunctional.h index ba6abf4..5a81d42 100644 --- a/mcbooster/GFunctional.h +++ b/mcbooster/GFunctional.h @@ -34,7 +34,7 @@ #include #include -namespace MCBooster +namespace mcbooster { /** \struct IFunction * IFunction is the base class for arbitrary functions return any type suported by the framwork. diff --git a/mcbooster/GTypes.h b/mcbooster/GTypes.h index 44aec55..36f63a3 100644 --- a/mcbooster/GTypes.h +++ b/mcbooster/GTypes.h @@ -26,7 +26,7 @@ #ifndef GTYPES_H_ #define GTYPES_H_ -namespace MCBooster +namespace mcbooster { //---- types ------------------------------------------------------------------- diff --git a/mcbooster/Generate.h b/mcbooster/Generate.h index 5d4d60a..51041b7 100644 --- a/mcbooster/Generate.h +++ b/mcbooster/Generate.h @@ -67,11 +67,13 @@ #include #include -#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#if MCBOOSTER_BACKEND==CUDA #include -#endif - +#elif MCBOOSTER_BACKEND==OPENMP #include +#elif MCBOOSTER_BACKEND==TBB +#include +#endif #define TIMER CLOCK_REALTIME @@ -85,7 +87,7 @@ using namespace std; -namespace MCBooster { +namespace mcbooster { /*! * Function to calculate time intervals in seconds. */ @@ -404,8 +406,7 @@ void PhaseSpace::ExportUnweighted(Events *_Events) { _Events->fMaxWeight = fMaxWeight; -#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB - +#if MCBOOSTER_BACKEND!=CUDA #pragma omp parallel num_threads( fNDaughters + 1 ) { @@ -477,7 +478,7 @@ void PhaseSpace::Export(Events *_Events) { */ _Events->fMaxWeight = fMaxWeight; -#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB +#if MCBOOSTER_BACKEND!=CUDA #pragma omp parallel num_threads( fNDaughters + 1 ) { @@ -544,7 +545,7 @@ void PhaseSpace::Generate(const Vector4R fMother) { * in any system of reference. The daughters will be generated in this system. */ -#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#if MCBOOSTER_BACKEND==CUDA cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); #endif /* random number generation */ @@ -671,7 +672,7 @@ void PhaseSpace::Generate(Particles_d fMothers) { * Run the generator and calculate the maximum weight. It takes as input the device vector with the four-vectors of the mother particle * in any system of reference. The daughters will be generated in this system. */ -#if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_TBB) +#if MCBOOSTER_BACKEND==CUDA cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); #endif diff --git a/mcbooster/Vector3R.h b/mcbooster/Vector3R.h index 9070e22..6acf23a 100644 --- a/mcbooster/Vector3R.h +++ b/mcbooster/Vector3R.h @@ -36,7 +36,7 @@ #include using std::ostream; -namespace MCBooster +namespace mcbooster { class Vector3R { diff --git a/mcbooster/Vector4R.h b/mcbooster/Vector4R.h index 5969ef1..c73e991 100644 --- a/mcbooster/Vector4R.h +++ b/mcbooster/Vector4R.h @@ -33,20 +33,12 @@ #include #include #include "mcbooster/Vector3R.h" -/* -#ifndef __host__ -#define __host__ -#endif // __host__ - -#ifndef __device__ -#define __device__ -#endif // __device_ -*/ + using std::ostream; -namespace MCBooster +namespace mcbooster { class Vector3R; diff --git a/mcbooster/functors/Calculate.h b/mcbooster/functors/Calculate.h index 4ac58b6..f2a0048 100644 --- a/mcbooster/functors/Calculate.h +++ b/mcbooster/functors/Calculate.h @@ -33,7 +33,7 @@ #include #include -namespace MCBooster +namespace mcbooster { template diff --git a/mcbooster/functors/DecayMother.h b/mcbooster/functors/DecayMother.h index 44e71a0..4acda35 100644 --- a/mcbooster/functors/DecayMother.h +++ b/mcbooster/functors/DecayMother.h @@ -38,7 +38,7 @@ using namespace std; -namespace MCBooster +namespace mcbooster { struct DecayMother diff --git a/mcbooster/functors/DecayMothers.h b/mcbooster/functors/DecayMothers.h index e3acc7d..65309d5 100644 --- a/mcbooster/functors/DecayMothers.h +++ b/mcbooster/functors/DecayMothers.h @@ -38,7 +38,7 @@ using namespace std; -namespace MCBooster +namespace mcbooster { struct DecayMothers diff --git a/mcbooster/functors/FlagAcceptReject.h b/mcbooster/functors/FlagAcceptReject.h index 3aca4cf..db93cb9 100644 --- a/mcbooster/functors/FlagAcceptReject.h +++ b/mcbooster/functors/FlagAcceptReject.h @@ -36,7 +36,7 @@ #include #include -namespace MCBooster +namespace mcbooster { /**\struct FlagAcceptReject * Flags generated events as accepted (1) or rejected (0). diff --git a/mcbooster/functors/IsAccepted.h b/mcbooster/functors/IsAccepted.h index 9e7cc6f..b63e917 100644 --- a/mcbooster/functors/IsAccepted.h +++ b/mcbooster/functors/IsAccepted.h @@ -36,7 +36,7 @@ #include #include -namespace MCBooster +namespace mcbooster { struct isAccepted diff --git a/mcbooster/functors/RandGen.h b/mcbooster/functors/RandGen.h index c2cfa06..c7118ba 100644 --- a/mcbooster/functors/RandGen.h +++ b/mcbooster/functors/RandGen.h @@ -32,7 +32,7 @@ #include -namespace MCBooster +namespace mcbooster { /**\struct RandGen * Fill a given vector with random numbers between 0 and 1. diff --git a/mcbooster/strided_iterator.h b/mcbooster/strided_iterator.h index ad7c81e..0436d4a 100644 --- a/mcbooster/strided_iterator.h +++ b/mcbooster/strided_iterator.h @@ -33,6 +33,9 @@ #include #include + +namespace mcbooster +{ /** \class strided_range * Strided range iterator original code: https://github.com/thrust/thrust/blob/master/examples/strided_range.cu */ @@ -89,5 +92,6 @@ class strided_range Iterator last; difference_type stride; }; +} #endif /* STRIDED_ITERATOR_H_ */ diff --git a/src/CompareWithTGenPhaseSpace.cu b/src/CompareWithTGenPhaseSpace.cu index 9a4ac85..7f20a16 100644 --- a/src/CompareWithTGenPhaseSpace.cu +++ b/src/CompareWithTGenPhaseSpace.cu @@ -59,7 +59,7 @@ using namespace std; -using namespace MCBooster; +using namespace mcbooster; GInt_t factorial(GInt_t n) { diff --git a/src/Generate.cpp b/src/Generate.cpp index a92e0a8..a1b9764 100644 --- a/src/Generate.cpp +++ b/src/Generate.cpp @@ -53,7 +53,7 @@ using namespace std; -using namespace MCBooster; +using namespace mcbooster; diff --git a/src/Generate.cu b/src/Generate.cu index e7950cb..1d661a1 100644 --- a/src/Generate.cu +++ b/src/Generate.cu @@ -53,7 +53,7 @@ using namespace std; -using namespace MCBooster; +using namespace mcbooster; diff --git a/src/GenerateSample.cu b/src/GenerateSample.cu index 0c0d41c..1f44aa7 100644 --- a/src/GenerateSample.cu +++ b/src/GenerateSample.cu @@ -51,7 +51,7 @@ using namespace std; -using namespace MCBooster; +using namespace mcbooster; void splitString(const std::string &s, const char delim, std::vector &elems) diff --git a/src/PerformanceTest.cpp b/src/PerformanceTest.cpp index 5790c0a..a1072ea 100644 --- a/src/PerformanceTest.cpp +++ b/src/PerformanceTest.cpp @@ -59,7 +59,7 @@ using namespace std; -using namespace MCBooster; +using namespace mcbooster; void RunMCGen (GInt_t nfinal, GInt_t nevents, Double_t *time ) { diff --git a/src/PerformanceTest.cu b/src/PerformanceTest.cu index e54a8d2..eb81172 100644 --- a/src/PerformanceTest.cu +++ b/src/PerformanceTest.cu @@ -59,7 +59,7 @@ using namespace std; -using namespace MCBooster; +using namespace mcbooster; void RunMCGen (GInt_t nfinal, GInt_t nevents, Double_t *time ) {