From 2a5479e94b41a7c56725537b57eb890eacd07008 Mon Sep 17 00:00:00 2001 From: Antonio Augusto Alves Junior Date: Mon, 23 May 2016 14:00:11 +0200 Subject: [PATCH] performance improvements --- mcbooster/Generate.h | 10 +++--- mcbooster/Vector3R.h | 8 ++--- mcbooster/functors/Calculate.h | 52 +++++++++++++-------------- mcbooster/functors/DecayMother.h | 26 +++++++------- mcbooster/functors/DecayMothers.h | 26 +++++++------- mcbooster/functors/FlagAcceptReject.h | 4 +-- mcbooster/functors/IsAccepted.h | 2 +- mcbooster/functors/RandGen.h | 6 ++-- src/Generate.cpp | 6 ++-- src/Generate.cu | 6 ++-- 10 files changed, 73 insertions(+), 73 deletions(-) diff --git a/mcbooster/Generate.h b/mcbooster/Generate.h index 51041b7..e352acb 100644 --- a/mcbooster/Generate.h +++ b/mcbooster/Generate.h @@ -235,8 +235,8 @@ class PhaseSpace { fAccRejFlags.shrink_to_fit(); } - void Generate(Particles_d fMothers); - void Generate(const Vector4R fMother); + inline void Generate(Particles_d fMothers); + inline void Generate(const Vector4R fMother); /** * Get the daughter with index 'i' in the mass array. It return a device vector of particles by reference. @@ -311,11 +311,11 @@ class PhaseSpace { * Export the events and all related information to host. */ void Export(Events *_Events); - void ExportUnweighted(Events *_Events); + inline void ExportUnweighted(Events *_Events); /** * Flag the accepted and rejected events */ - GULong_t Unweight(); + inline GULong_t Unweight(); // public: @@ -336,7 +336,7 @@ class PhaseSpace { /** * PDK function */ - GReal_t PDK(const GReal_t a, const GReal_t b, const GReal_t c) const { + inline GReal_t PDK(const GReal_t a, const GReal_t b, const GReal_t c) const { //the PDK function GReal_t x = (a - b - c) * (a + b + c) * (a - b + c) * (a + b - c); x = sqrt(x) / (2 * a); diff --git a/mcbooster/Vector3R.h b/mcbooster/Vector3R.h index 6acf23a..b0db013 100644 --- a/mcbooster/Vector3R.h +++ b/mcbooster/Vector3R.h @@ -41,12 +41,12 @@ namespace mcbooster class Vector3R { - __host__ __device__ friend Vector3R rotateEuler(const Vector3R& v, + __host__ __device__ inline friend Vector3R rotateEuler(const Vector3R& v, GReal_t phi, GReal_t theta, GReal_t ksi); - __host__ __device__ inline friend Vector3R operator*(GReal_t c, + __host__ __device__ inline friend Vector3R operator*(GReal_t c, const Vector3R& v2); - __host__ __device__ inline friend GReal_t operator*(const Vector3R& v1, + __host__ __device__ inline friend GReal_t operator*(const Vector3R& v1, const Vector3R& v2); __host__ __device__ inline friend Vector3R operator+(const Vector3R& v1, const Vector3R& v2); @@ -56,7 +56,7 @@ class Vector3R GReal_t c); __host__ __device__ inline friend Vector3R operator/(const Vector3R& v1, GReal_t c); - __host__ __device__ friend Vector3R cross(const Vector3R& v1, + __host__ __device__ inline friend Vector3R cross(const Vector3R& v1, const Vector3R& v2); public: diff --git a/mcbooster/functors/Calculate.h b/mcbooster/functors/Calculate.h index f2a0048..d2b79e9 100644 --- a/mcbooster/functors/Calculate.h +++ b/mcbooster/functors/Calculate.h @@ -57,7 +57,7 @@ struct Calculate } - __host__ __device__ RESULT operator()(GT2 &particles) + __host__ __device__ inline RESULT operator()(GT2 &particles) { Vector4R* _Particles[2]; @@ -68,7 +68,7 @@ struct Calculate return Function(2, _Particles); } - __host__ __device__ RESULT operator()(GT3 &particles) + __host__ __device__ inline RESULT operator()(GT3 &particles) { Vector4R* _Particles[3]; @@ -80,7 +80,7 @@ struct Calculate return Function(3, _Particles); } - __host__ __device__ RESULT operator()(GT4 &particles) + __host__ __device__ inline RESULT operator()(GT4 &particles) { Vector4R* _Particles[4]; @@ -94,7 +94,7 @@ struct Calculate } - __host__ __device__ RESULT operator()(GT5 &particles) + __host__ __device__ inline RESULT operator()(GT5 &particles) { Vector4R* _Particles[5]; @@ -108,7 +108,7 @@ struct Calculate return Function(5, _Particles); } - __host__ __device__ RESULT operator()(GT6 &particles) + __host__ __device__ inline RESULT operator()(GT6 &particles) { Vector4R* _Particles[6]; @@ -123,7 +123,7 @@ struct Calculate return Function(6, _Particles); } - __host__ __device__ RESULT operator()(GT7 &particles) + __host__ __device__ inline RESULT operator()(GT7 &particles) { Vector4R* _Particles[7]; @@ -139,7 +139,7 @@ struct Calculate return Function(7, _Particles); } - __host__ __device__ RESULT operator()(GT8 &particles) + __host__ __device__ inline RESULT operator()(GT8 &particles) { Vector4R* _Particles[8]; @@ -157,7 +157,7 @@ struct Calculate } - __host__ __device__ RESULT operator()(GT9 &particles) + __host__ __device__ inline RESULT operator()(GT9 &particles) { Vector4R* _Particles[9]; @@ -175,7 +175,7 @@ struct Calculate return Function(9, _Particles); } - __host__ __device__ RESULT operator()(GT10 &particles) + __host__ __device__ inline RESULT operator()(GT10 &particles) { Vector4R* _Particles[10]; @@ -216,7 +216,7 @@ struct Calculate2 } - __host__ __device__ void operator()(GT2 particles) + __host__ __device__ inline void operator()(GT2 particles) { Vector4R* _Particles[2]; @@ -227,7 +227,7 @@ struct Calculate2 Function(2, _Particles); } - __host__ __device__ void operator()(GT3 particles) + __host__ __device__ inline void operator()(GT3 particles) { Vector4R* _Particles[3]; @@ -239,7 +239,7 @@ struct Calculate2 Function(3, _Particles); } - __host__ __device__ void operator()(GT4 particles) + __host__ __device__ inline void operator()(GT4 particles) { Vector4R* _Particles[4]; @@ -253,7 +253,7 @@ struct Calculate2 } - __host__ __device__ void operator()(GT5 particles) + __host__ __device__ inline void operator()(GT5 particles) { Vector4R* _Particles[5]; @@ -267,7 +267,7 @@ struct Calculate2 Function(5, _Particles); } - __host__ __device__ void operator()(GT6 particles) + __host__ __device__ inline void operator()(GT6 particles) { Vector4R* _Particles[6]; @@ -282,7 +282,7 @@ struct Calculate2 Function(6, _Particles); } - __host__ __device__ void operator()(GT7 particles) + __host__ __device__ inline void operator()(GT7 particles) { Vector4R* _Particles[7]; @@ -298,7 +298,7 @@ struct Calculate2 Function(7, _Particles); } - __host__ __device__ void operator()(GT8 particles) + __host__ __device__ inline void operator()(GT8 particles) { Vector4R* _Particles[8]; @@ -316,7 +316,7 @@ struct Calculate2 } - __host__ __device__ void operator()(GT9 particles) + __host__ __device__ inline void operator()(GT9 particles) { Vector4R* _Particles[9]; @@ -334,7 +334,7 @@ struct Calculate2 Function(9, _Particles); } - __host__ __device__ void operator()(GT10 particles) + __host__ __device__ inline void operator()(GT10 particles) { Vector4R* _Particles[10]; @@ -375,7 +375,7 @@ struct Calculate3 } - __host__ __device__ void operator()(GTR3 tuples) + __host__ __device__ inline void operator()(GTR3 tuples) { GReal_t* _real; @@ -388,7 +388,7 @@ struct Calculate3 Function(2, _Particles, _real); } - __host__ __device__ void operator()(GTR4 tuples) + __host__ __device__ inline void operator()(GTR4 tuples) { GReal_t* _real; @@ -404,7 +404,7 @@ struct Calculate3 } - __host__ __device__ void operator()(GTR5 tuples) + __host__ __device__ inline void operator()(GTR5 tuples) { GReal_t* _real; @@ -420,7 +420,7 @@ struct Calculate3 Function(4, _Particles, _real); } - __host__ __device__ void operator()(GTR6 tuples) + __host__ __device__ inline void operator()(GTR6 tuples) { GReal_t* _real; @@ -437,7 +437,7 @@ struct Calculate3 Function(5, _Particles, _real); } - __host__ __device__ void operator()(GTR7 tuples) + __host__ __device__ inline void operator()(GTR7 tuples) { GReal_t* _real; @@ -455,7 +455,7 @@ struct Calculate3 Function(6, _Particles, _real); } - __host__ __device__ void operator()(GTR8 tuples) + __host__ __device__ inline void operator()(GTR8 tuples) { GReal_t* _real; @@ -475,7 +475,7 @@ struct Calculate3 } - __host__ __device__ void operator()(GTR9 tuples) + __host__ __device__ inline void operator()(GTR9 tuples) { GReal_t* _real; @@ -495,7 +495,7 @@ struct Calculate3 Function(8, _Particles, _real); } - __host__ __device__ void operator()(GTR10 tuples) + __host__ __device__ inline void operator()(GTR10 tuples) { GReal_t* _real; diff --git a/mcbooster/functors/DecayMother.h b/mcbooster/functors/DecayMother.h index 4acda35..9a15213 100644 --- a/mcbooster/functors/DecayMother.h +++ b/mcbooster/functors/DecayMother.h @@ -100,7 +100,7 @@ struct DecayMother } - __host__ __device__ GReal_t pdk(const GReal_t a, const GReal_t b, + __host__ __device__ inline GReal_t pdk(const GReal_t a, const GReal_t b, const GReal_t c) const { //the PDK function @@ -109,7 +109,7 @@ struct DecayMother return x; } - __host__ __device__ void bbsort( GReal_t *array, GInt_t n) + __host__ __device__ inline void bbsort( GReal_t *array, GInt_t n) { // Improved bubble sort @@ -133,7 +133,7 @@ struct DecayMother } - __host__ __device__ GUInt_t hash(GUInt_t a) + __host__ __device__ inline GUInt_t hash(GUInt_t a) { a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); @@ -144,7 +144,7 @@ struct DecayMother return a; } - __host__ __device__ GReal_t process(const GInt_t evt, Vector4R** daugters) + __host__ __device__ inline GReal_t process(const GInt_t evt, Vector4R** daugters) { thrust::random::default_random_engine randEng( hash(evt)*fSeed); @@ -257,7 +257,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT2 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT2 &particles) { Vector4R* _Particles[2]; @@ -268,7 +268,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT3 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT3 &particles) { Vector4R* _Particles[3]; @@ -280,7 +280,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT4 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT4 &particles) { Vector4R* _Particles[4]; @@ -294,7 +294,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT5 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT5 &particles) { Vector4R* _Particles[5]; @@ -308,7 +308,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT6 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT6 &particles) { Vector4R* _Particles[6]; @@ -323,7 +323,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT7 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT7 &particles) { Vector4R* _Particles[7]; @@ -339,7 +339,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT8 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT8 &particles) { Vector4R* _Particles[8]; @@ -356,7 +356,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT9 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT9 &particles) { Vector4R* _Particles[9]; @@ -374,7 +374,7 @@ struct DecayMother } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT10 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT10 &particles) { Vector4R* _Particles[10]; diff --git a/mcbooster/functors/DecayMothers.h b/mcbooster/functors/DecayMothers.h index 65309d5..d04e641 100644 --- a/mcbooster/functors/DecayMothers.h +++ b/mcbooster/functors/DecayMothers.h @@ -56,7 +56,7 @@ struct DecayMothers { } - __host__ __device__ GReal_t pdk(const GReal_t a, const GReal_t b, + __host__ __device__ inline GReal_t pdk(const GReal_t a, const GReal_t b, const GReal_t c) const { //the PDK function @@ -65,7 +65,7 @@ struct DecayMothers return x; } - __host__ __device__ void bbsort(GReal_t *array, GInt_t n) + __host__ __device__ inline void bbsort(GReal_t *array, GInt_t n) { // Improved bubble sort for (GInt_t c = 0; c < n; c++) @@ -88,7 +88,7 @@ struct DecayMothers } - __host__ __device__ GUInt_t hash(GUInt_t a) + __host__ __device__ inline GUInt_t hash(GUInt_t a) { a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); @@ -99,7 +99,7 @@ struct DecayMothers return a; } - __host__ __device__ GReal_t process(const GInt_t evt, Vector4R** particles) + __host__ __device__ inline GReal_t process(const GInt_t evt, Vector4R** particles) { @@ -247,14 +247,14 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT2 &particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT2 &particles) { //do nothing, will never be called return 0.0; } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT3& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT3& particles) { Vector4R* _Particles[3]; @@ -266,7 +266,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT4& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT4& particles) { Vector4R* _Particles[4]; @@ -280,7 +280,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT5& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT5& particles) { Vector4R* _Particles[5]; @@ -294,7 +294,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT6& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT6& particles) { Vector4R* _Particles[6]; @@ -309,7 +309,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT7& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT7& particles) { Vector4R* _Particles[7]; @@ -325,7 +325,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT8& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT8& particles) { Vector4R* _Particles[8]; @@ -342,7 +342,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT9& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT9& particles) { Vector4R* _Particles[9]; @@ -360,7 +360,7 @@ struct DecayMothers } - __host__ __device__ GReal_t operator()(const GInt_t evt, GT10& particles) + __host__ __device__ inline GReal_t operator()(const GInt_t evt, GT10& particles) { Vector4R* _Particles[10]; diff --git a/mcbooster/functors/FlagAcceptReject.h b/mcbooster/functors/FlagAcceptReject.h index db93cb9..6a625bb 100644 --- a/mcbooster/functors/FlagAcceptReject.h +++ b/mcbooster/functors/FlagAcceptReject.h @@ -56,7 +56,7 @@ struct FlagAcceptReject /** * hash function. Generate hashs to be used in random number generation initialization */ - __host__ __device__ GUInt_t hash(GUInt_t a) + __host__ __device__ inline GUInt_t hash(GUInt_t a) { a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); @@ -70,7 +70,7 @@ struct FlagAcceptReject * operator(). Takes the events index and weight and so flag it as accepted and rejected * */ - __host__ __device__ GBool_t operator ()(GLong_t idx, GReal_t weight) + __host__ __device__ inline GBool_t operator ()(GLong_t idx, GReal_t weight) { GUInt_t seed = hash(idx+68464654684); thrust::default_random_engine randEng(seed); diff --git a/mcbooster/functors/IsAccepted.h b/mcbooster/functors/IsAccepted.h index b63e917..514d083 100644 --- a/mcbooster/functors/IsAccepted.h +++ b/mcbooster/functors/IsAccepted.h @@ -42,7 +42,7 @@ namespace mcbooster struct isAccepted { __host__ __device__ - bool operator()(const int x) + inline bool operator()(const int x) { return (x == 1 ) ; } diff --git a/mcbooster/functors/RandGen.h b/mcbooster/functors/RandGen.h index c7118ba..f1572db 100644 --- a/mcbooster/functors/RandGen.h +++ b/mcbooster/functors/RandGen.h @@ -53,7 +53,7 @@ struct RandGen /** * hash function. Generate hashs to be used in random number generation initialization */ - __host__ __device__ GUInt_t hash(GUInt_t a) + __host__ __device__ inline GUInt_t hash(GUInt_t a) { a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); @@ -67,7 +67,7 @@ struct RandGen /** * operator(). Calculate and set random numbers. It takes the index of the event. */ - __host__ __device__ void operator ()(GLong_t idx) + __host__ __device__ inline void operator ()(GLong_t idx) { GUInt_t seed = hash(idx); thrust::random::default_random_engine randEng(seed); @@ -91,7 +91,7 @@ struct RandGen2 /** * operator(). Calculate and set random numbers. It takes the index of the event. */ - __host__ __device__ GReal_t operator ()(GInt_t idx) + __host__ __device__ inline GReal_t operator ()(GInt_t idx) { thrust::random::default_random_engine randEng; diff --git a/src/Generate.cpp b/src/Generate.cpp index a1b9764..235b8b6 100644 --- a/src/Generate.cpp +++ b/src/Generate.cpp @@ -64,7 +64,7 @@ struct Dataset: public IFunctionArray dim = 4; } - __host__ __device__ GReal_t cosHELANG(const Vector4R p, const Vector4R q, + __host__ __device__ inline GReal_t cosHELANG(const Vector4R p, const Vector4R q, const Vector4R d) { GReal_t pd = p * d; @@ -79,7 +79,7 @@ struct Dataset: public IFunctionArray } - __host__ __device__ GReal_t deltaAngle(const Vector4R& p4_p, + __host__ __device__ inline GReal_t deltaAngle(const Vector4R& p4_p, const Vector4R& p4_d1, const Vector4R& p4_d2, const Vector4R& p4_h1, const Vector4R& p4_h2) { @@ -116,7 +116,7 @@ struct Dataset: public IFunctionArray } __host__ __device__ - void operator()(const GInt_t n, Vector4R** particles, GReal_t* variables) + inline void operator()(const GInt_t n, Vector4R** particles, GReal_t* variables) { Vector4R pJpsi = *particles[0]; Vector4R pK = *particles[1]; diff --git a/src/Generate.cu b/src/Generate.cu index 1d661a1..5f334bf 100644 --- a/src/Generate.cu +++ b/src/Generate.cu @@ -64,7 +64,7 @@ struct Dataset: public IFunctionArray dim = 4; } - __host__ __device__ GReal_t cosHELANG(const Vector4R p, const Vector4R q, + __host__ __device__ inline GReal_t cosHELANG(const Vector4R p, const Vector4R q, const Vector4R d) { GReal_t pd = p * d; @@ -79,7 +79,7 @@ struct Dataset: public IFunctionArray } - __host__ __device__ GReal_t deltaAngle(const Vector4R& p4_p, + __host__ __device__ inline GReal_t deltaAngle(const Vector4R& p4_p, const Vector4R& p4_d1, const Vector4R& p4_d2, const Vector4R& p4_h1, const Vector4R& p4_h2) { @@ -116,7 +116,7 @@ struct Dataset: public IFunctionArray } __host__ __device__ - void operator()(const GInt_t n, Vector4R** particles, GReal_t* variables) + inline void operator()(const GInt_t n, Vector4R** particles, GReal_t* variables) { Vector4R pJpsi = *particles[0]; Vector4R pK = *particles[1];