diff --git a/Makefile b/Makefile index 2406f9352..7f5c4d68a 100644 --- a/Makefile +++ b/Makefile @@ -232,7 +232,7 @@ else ifeq ($(COMPILER_NAME),NVHPC) CXXFLAGS += -fast else - CXXFLAGS += -ffast-math -funsafe-math-optimizations -fno-finite-math-only -fopenmp-simd + CXXFLAGS += -ffast-math -funsafe-math-optimizations -fno-finite-math-only endif endif endif diff --git a/atomic.h b/atomic.h index 4c0c58124..a4a86d613 100644 --- a/atomic.h +++ b/atomic.h @@ -58,7 +58,7 @@ inline auto get_nlevels(const int element, const int ion) -> int { } // Return the energy of (element,ion,level). -#pragma omp declare simd + [[nodiscard]] inline auto epsilon(const int element, const int ion, const int level) -> double { assert_testmodeonly(element < get_nelements()); assert_testmodeonly(ion < get_nions(element)); @@ -120,7 +120,7 @@ inline auto get_nphixstargets(const int element, const int ion, const int level) } // Return the statistical weight of (element,ion,level). -#pragma omp declare simd + [[nodiscard]] inline auto stat_weight(const int element, const int ion, const int level) -> double { assert_testmodeonly(element < get_nelements()); assert_testmodeonly(ion < get_nions(element)); diff --git a/ltepop.h b/ltepop.h index 9d7d1c8e6..08dbe26ae 100644 --- a/ltepop.h +++ b/ltepop.h @@ -4,11 +4,11 @@ #include [[nodiscard]] auto get_groundlevelpop(int nonemptymgi, int element, int ion) -> double; -#pragma omp declare simd + [[nodiscard]] auto calculate_levelpop(int nonemptymgi, int element, int ion, int level) -> double; -#pragma omp declare simd + [[nodiscard]] auto calculate_levelpop_lte(int nonemptymgi, int element, int ion, int level) -> double; -#pragma omp declare simd + [[nodiscard]] auto get_levelpop(int nonemptymgi, int element, int ion, int level) -> double; [[nodiscard]] auto calculate_sahafact(int element, int ion, int level, int upperionlevel, double T, double E_threshold) -> double; diff --git a/macroatom.cc b/macroatom.cc index 2bdc75ee4..c30766e5c 100644 --- a/macroatom.cc +++ b/macroatom.cc @@ -646,7 +646,7 @@ void macroatom_close_file() { // radiative deexcitation rate: paperII 3.5.2 // multiply by upper level population to get a rate per second -#pragma omp declare simd + auto rad_deexcitation_ratecoeff(const int nonemptymgi, const int element, const int ion, const int lower, const double epsilon_trans, const float A_ul, const double upperstatweight, const double nnlevelupper, const double t_current) -> double { @@ -690,7 +690,7 @@ auto rad_deexcitation_ratecoeff(const int nonemptymgi, const int element, const // radiative excitation rate: paperII 3.5.2 // multiply by lower level population to get a rate per second -#pragma omp declare simd + auto rad_excitation_ratecoeff(const int nonemptymgi, const int element, const int ion, const int lower, const int uptransindex, const double epsilon_trans, const double nnlevel_lower, const int lineindex, const double t_current) -> double { @@ -732,7 +732,7 @@ auto rad_excitation_ratecoeff(const int nonemptymgi, const int element, const in // radiative recombination rate: paperII 3.5.2 // multiply by upper level population to get a rate per second -#pragma omp declare simd + auto rad_recombination_ratecoeff(const float T_e, const float nne, const int element, const int upperion, const int upperionlevel, const int lowerionlevel, const int nonemptymgi) -> double { // it's probably faster to only check this condition outside this function @@ -777,7 +777,7 @@ auto stim_recombination_ratecoeff(const float nne, const int element, const int } // multiply by upper level population to get a rate per second -#pragma omp declare simd + auto col_recombination_ratecoeff(const float T_e, const float nne, const int element, const int upperion, const int upper, const int lower, const double epsilon_trans) -> double { // it's probably faster to only check this condition outside this function @@ -818,7 +818,7 @@ auto col_recombination_ratecoeff(const float T_e, const float nne, const int ele // collisional ionization rate: paperII 3.5.1 // multiply by lower level population to get a rate per second -#pragma omp declare simd + auto col_ionization_ratecoeff(const float T_e, const float nne, const int element, const int ion, const int lower, const int phixstargetindex, const double epsilon_trans) -> double { assert_testmodeonly(phixstargetindex >= 0); @@ -850,7 +850,7 @@ auto col_ionization_ratecoeff(const float T_e, const float nne, const int elemen } // multiply by upper level population to get a rate per second -#pragma omp declare simd + auto col_deexcitation_ratecoeff(const float T_e, const float nne, const double epsilon_trans, const int element, const int ion, const int upper, const LevelTransition &downtransition) -> double { const int lower = downtransition.targetlevelindex; @@ -901,7 +901,7 @@ auto col_deexcitation_ratecoeff(const float T_e, const float nne, const double e } // multiply by lower level population to get a rate per second -#pragma omp declare simd + auto col_excitation_ratecoeff(const float T_e, const float nne, const int element, const int ion, const int lower, const int uptransindex, const double epsilon_trans, const double lowerstatweight) -> double { diff --git a/macroatom.h b/macroatom.h index bee0939cc..69e13f6da 100644 --- a/macroatom.h +++ b/macroatom.h @@ -9,33 +9,29 @@ void macroatom_close_file(); void do_macroatom(Packet &pkt, const MacroAtomState &pktmastate); -#pragma omp declare simd [[nodiscard]] auto rad_deexcitation_ratecoeff(int nonemptymgi, int element, int ion, int lower, double epsilon_trans, float A_ul, double upperstatweight, double nnlevelupper, double t_current) -> double; -#pragma omp declare simd [[nodiscard]] auto rad_excitation_ratecoeff(int nonemptymgi, int element, int ion, int lower, int uptransindex, double epsilon_trans, double nnlevel_lower, int lineindex, double t_current) -> double; -#pragma omp declare simd [[nodiscard]] auto rad_recombination_ratecoeff(float T_e, float nne, int element, int upperion, int upperionlevel, int lowerionlevel, int nonemptymgi) -> double; -#pragma omp declare simd + [[nodiscard]] auto stim_recombination_ratecoeff(float nne, int element, int upperion, int upper, int lower, int nonemptymgi) -> double; -#pragma omp declare simd [[nodiscard]] auto col_recombination_ratecoeff(float T_e, float nne, int element, int upperion, int upper, int lower, double epsilon_trans) -> double; -#pragma omp declare simd + [[nodiscard]] auto col_ionization_ratecoeff(float T_e, float nne, int element, int ion, int lower, int phixstargetindex, double epsilon_trans) -> double; -#pragma omp declare simd + [[nodiscard]] auto col_deexcitation_ratecoeff(float T_e, float nne, double epsilon_trans, int element, int ion, int upper, const LevelTransition &downtransition) -> double; -#pragma omp declare simd + [[nodiscard]] auto col_excitation_ratecoeff(float T_e, float nne, int element, int ion, int lower, int uptransindex, double epsilon_trans, double lowerstatweight) -> double; diff --git a/nonthermal.h b/nonthermal.h index cb5455996..5218feb98 100644 --- a/nonthermal.h +++ b/nonthermal.h @@ -18,7 +18,7 @@ void solve_spencerfano(int nonemptymgi, int timestep, int iteration); void calculate_deposition_rate_density(int nonemptymgi, int timestep, HeatingCoolingRates &heatingcoolingrates); [[nodiscard]] auto get_deposition_rate_density(int nonemptymgi) -> double; [[nodiscard]] auto get_nt_frac_heating(int modelgridindex) -> float; -#pragma omp declare simd + [[nodiscard]] auto nt_excitation_ratecoeff(int nonemptymgi, int element, int ion, int lowerlevel, int uptransindex, int lineindex) -> double; void do_ntalpha_deposit(Packet &pkt); diff --git a/radfield.cc b/radfield.cc index 3b114c4d8..d834c3302 100644 --- a/radfield.cc +++ b/radfield.cc @@ -112,7 +112,7 @@ constexpr auto get_bin_nu_lower(const int binindex) -> double { } // find the left-closed bin [nu_lower, nu_upper) that nu belongs to -#pragma omp declare simd + constexpr auto select_bin(const double nu) -> int { if (nu < nu_lower_first_initial) { return -2; // out of range, nu lower than lowest bin's lower boundary @@ -224,7 +224,7 @@ void update_bfestimators(const int nonemptymgi, const double distance_e_cmf, con globals::bfestim_nu_edge.data(); const auto bfestimcount = globals::bfestimcount; -#pragma omp simd + for (auto bfestimindex = bfestimbegin; bfestimindex < bfestimend; bfestimindex++) { atomicadd(bfrate_raw[(nonemptymgi * bfestimcount) + bfestimindex], phixslist.gamma_contr[bfestimindex] * distance_e_cmf_over_nu);