Skip to content

Commit

Permalink
Simple linear-solver optimize (#283)
Browse files Browse the repository at this point in the history
* low-hanging fruit optimization using just solver/precon options

* removed partial assembly if's where partial assembly is not supported.  However, setting partial assembly to true is slightly slower

* lin-solve options pushed to hpp

* adding more AMG optimizations

* enforce style

* changing default tolerances back to previous values

* additional tolerance controls for cal-perf and updated ref soln to heatedBox

* enforce style

* fixed issure with SOR weight on helmholtz solve

* enforce style

* fix issue with smoother weight on mass inverse

* fixes to options according to hypre guidance (HYPRE_BoomerAMGSetMaxIter as a precon) and apparent smoother relax weight not functioning as intended

* max iters for amg precon set to 1, updates to input files for two tests and reduced rel tol

* further reduing tolerance on lomach-flow tests

* further reduction in test tols

* raised rel. tol to 1e-10

* switch rel tol check to delta check in lequere test, all atol to 1e-12

* collected default solver tols to single defaults

* further solver setting tweaks, reduced test tols

* removed unintentional underscore in tomboulides tol setting labels

* switch tomboulides underscore tol labels to dashes for consitency

* upped default tolerances and switched default NI to off

* switched tolerances of test checks to be consitent with default lin-sol tol and replacing ref soln.  numerical-integ defaults to false now

* enforce style

* commented copy lines for ref solns as this was causing issues on the gpu

* updated sponge ref soln, reduced tol of axi-symm arans test, and removed a leftover ref sol copy line

* update pipe arans soln and reduced sponge test tol

---------

Co-authored-by: Sigfried Haering <shaering@oden.utexas.edu>
  • Loading branch information
shaering and Sigfried Haering authored Jun 22, 2024
1 parent b799fa7 commit c1a8af5
Show file tree
Hide file tree
Showing 32 changed files with 301 additions and 197 deletions.
16 changes: 8 additions & 8 deletions src/M2ulPhyS2Boltzmann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) {
mfem::ParFiniteElementSpace *reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates)));
externalReactionRates.reset(new mfem::ParGridFunction(reaction_rates_fes));
interface.interpolateToNativeFES(*externalReactionRates, TPS::Tps2Boltzmann::Index::ReactionRates);
#if defined(_CUDA_) || defined(_HIP_)
const double * data(externalReactionRates->Read() );
int size(externalReactionRates->FESpace()->GetNDofs() );
assert(externalReactionRates->FESpace()->GetOrdering() == mfem::Ordering::byNODES);
gpu::deviceSetChemistryReactionData<<<1, 1>>>(data, size, chemistry_);
#else
chemistry_->setGridFunctionRates(*externalReactionRates);
#endif
#if defined(_CUDA_) || defined(_HIP_)
const double *data(externalReactionRates->Read());
int size(externalReactionRates->FESpace()->GetNDofs());
assert(externalReactionRates->FESpace()->GetOrdering() == mfem::Ordering::byNODES);
gpu::deviceSetChemistryReactionData<<<1, 1>>>(data, size, chemistry_);
#else
chemistry_->setGridFunctionRates(*externalReactionRates);
#endif
}
91 changes: 45 additions & 46 deletions src/calorically_perfect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,23 @@ CaloricallyPerfectThermoChem::CaloricallyPerfectThermoChem(mfem::ParMesh *pmesh,

tpsP_->getInput("loMach/openSystem", domain_is_open_, false);

tpsP_->getInput("loMach/calperfect/linear-solver-rtol", rtol_, 1e-12);
tpsP_->getInput("loMach/calperfect/linear-solver-max-iter", max_iter_, 1000);
tpsP_->getInput("loMach/calperfect/linear-solver-verbosity", pl_solve_, 0);
// NOTE: this should default to FALSE (but would break all related test cases...)
tpsP_->getInput("loMach/calperfect/numerical-integ", numerical_integ_, true);

tpsP_->getInput("loMach/calperfect/linear-solver-rtol", rtol_, default_rtol_);
tpsP_->getInput("loMach/calperfect/linear-solver-max-iter", max_iter_, default_max_iter_);
tpsP_->getInput("loMach/calperfect/linear-solver-verbosity", pl_solve_, 0);

// not deleting above block to maintain backwards-compatability
tpsP_->getInput("loMach/calperfect/hsolve-rtol", hsolve_rtol_, rtol_);
tpsP_->getInput("loMach/calperfect/hsolve-atol", hsolve_atol_, default_atol_);
tpsP_->getInput("loMach/calperfect/hsolve-max-iter", hsolve_max_iter_, max_iter_);
tpsP_->getInput("loMach/calperfect/hsolve-verbosity", hsolve_pl_, pl_solve_);

tpsP_->getInput("loMach/calperfect/msolve-rtol", mass_inverse_rtol_, rtol_);
tpsP_->getInput("loMach/calperfect/msolve-atol", mass_inverse_atol_, default_atol_);
tpsP_->getInput("loMach/calperfect/msolve-max-iter", mass_inverse_max_iter_, max_iter_);
tpsP_->getInput("loMach/calperfect/msolve-verbosity", mass_inverse_pl_, pl_solve_);
}

CaloricallyPerfectThermoChem::~CaloricallyPerfectThermoChem() {
Expand Down Expand Up @@ -400,6 +413,9 @@ void CaloricallyPerfectThermoChem::initializeSelf() {
void CaloricallyPerfectThermoChem::initializeOperators() {
dt_ = time_coeff_.dt;

// polynomial order for smoother of precons
smoother_poly_order_ = order_ + 1;

Array<int> empty;

// GLL integration rule (Numerical Integration)
Expand Down Expand Up @@ -438,9 +454,6 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
at_blfi->SetIntRule(&ir_nli);
}
At_form_->AddDomainIntegrator(at_blfi);
if (partial_assembly_) {
At_form_->SetAssemblyLevel(AssemblyLevel::PARTIAL);
}
At_form_->Assemble();
At_form_->FormSystemMatrix(empty, At_);
if (rank0_) std::cout << "CaloricallyPerfectThermoChem At operator set" << endl;
Expand All @@ -466,9 +479,6 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
// msrho_blfi->SetIntRule(&ir_di);
}
MsRho_form_->AddDomainIntegrator(msrho_blfi);
if (partial_assembly_) {
MsRho_form_->SetAssemblyLevel(AssemblyLevel::PARTIAL);
}
MsRho_form_->Assemble();
MsRho_form_->FormSystemMatrix(empty, MsRho_);
if (rank0_) std::cout << "CaloricallyPerfectThermoChem MsRho operator set" << endl;
Expand All @@ -483,9 +493,6 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
}
Ht_form_->AddDomainIntegrator(hmt_blfi);
Ht_form_->AddDomainIntegrator(hdt_blfi);
if (partial_assembly_) {
Ht_form_->SetAssemblyLevel(AssemblyLevel::PARTIAL);
}
Ht_form_->Assemble();
Ht_form_->FormSystemMatrix(temp_ess_tdof_, Ht_);
if (rank0_) std::cout << "CaloricallyPerfectThermoChem Ht operator set" << endl;
Expand All @@ -496,33 +503,34 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
MsInvPC_ = new OperatorJacobiSmoother(diag_pa, empty);
} else {
MsInvPC_ = new HypreSmoother(*Ms_.As<HypreParMatrix>());
dynamic_cast<HypreSmoother *>(MsInvPC_)->SetType(HypreSmoother::Jacobi, 1);
dynamic_cast<HypreSmoother *>(MsInvPC_)->SetType(HypreSmoother::Jacobi, smoother_passes_);
dynamic_cast<HypreSmoother *>(MsInvPC_)->SetSOROptions(smoother_relax_weight_, smoother_relax_omega_);
dynamic_cast<HypreSmoother *>(MsInvPC_)->SetPolyOptions(smoother_poly_order_, smoother_poly_fraction_,
smoother_eig_est_);
}
MsInv_ = new CGSolver(sfes_->GetComm());
MsInv_->iterative_mode = false;
MsInv_->SetOperator(*Ms_);
MsInv_->SetPreconditioner(*MsInvPC_);
MsInv_->SetPrintLevel(pl_solve_);
MsInv_->SetRelTol(rtol_);
MsInv_->SetMaxIter(max_iter_);
MsInv_->SetPrintLevel(mass_inverse_pl_);
MsInv_->SetRelTol(mass_inverse_rtol_);
MsInv_->SetAbsTol(mass_inverse_atol_);
MsInv_->SetMaxIter(mass_inverse_max_iter_);

if (partial_assembly_) {
Vector diag_pa(sfes_->GetTrueVSize());
Ht_form_->AssembleDiagonal(diag_pa);
HtInvPC_ = new OperatorJacobiSmoother(diag_pa, temp_ess_tdof_);
} else {
HtInvPC_ = new HypreSmoother(*Ht_.As<HypreParMatrix>());
dynamic_cast<HypreSmoother *>(HtInvPC_)->SetType(HypreSmoother::Jacobi, 1);
}
HtInvPC_ = new HypreSmoother(*Ht_.As<HypreParMatrix>());
dynamic_cast<HypreSmoother *>(HtInvPC_)->SetType(HypreSmoother::Jacobi, smoother_passes_);
dynamic_cast<HypreSmoother *>(HtInvPC_)->SetSOROptions(hsmoother_relax_weight_, hsmoother_relax_omega_);
dynamic_cast<HypreSmoother *>(HtInvPC_)->SetPolyOptions(smoother_poly_order_, smoother_poly_fraction_,
smoother_eig_est_);

HtInv_ = new CGSolver(sfes_->GetComm());

HtInv_->iterative_mode = true;
HtInv_->SetOperator(*Ht_);
HtInv_->SetPreconditioner(*HtInvPC_);
HtInv_->SetPrintLevel(pl_solve_);
HtInv_->SetRelTol(rtol_);
HtInv_->SetMaxIter(max_iter_);
HtInv_->SetPrintLevel(hsolve_pl_);
HtInv_->SetRelTol(hsolve_rtol_);
HtInv_->SetAbsTol(hsolve_atol_);
HtInv_->SetMaxIter(hsolve_max_iter_);
if (rank0_) std::cout << "Temperature operators set" << endl;

// Qt .....................................
Expand All @@ -546,15 +554,19 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
MqInvPC_ = new OperatorJacobiSmoother(diag_pa, empty);
} else {
MqInvPC_ = new HypreSmoother(*Mq_.As<HypreParMatrix>());
dynamic_cast<HypreSmoother *>(MqInvPC_)->SetType(HypreSmoother::Jacobi, 1);
dynamic_cast<HypreSmoother *>(MqInvPC_)->SetType(HypreSmoother::Jacobi, smoother_passes_);
dynamic_cast<HypreSmoother *>(MqInvPC_)->SetSOROptions(smoother_relax_weight_, smoother_relax_omega_);
dynamic_cast<HypreSmoother *>(MqInvPC_)->SetPolyOptions(smoother_poly_order_, smoother_poly_fraction_,
smoother_eig_est_);
}
MqInv_ = new CGSolver(sfes_->GetComm());
MqInv_->iterative_mode = false;
MqInv_->SetOperator(*Mq_);
MqInv_->SetPreconditioner(*MqInvPC_);
MqInv_->SetPrintLevel(pl_solve_);
MqInv_->SetRelTol(rtol_);
MqInv_->SetMaxIter(max_iter_);
MqInv_->SetPrintLevel(mass_inverse_pl_);
MqInv_->SetRelTol(mass_inverse_rtol_);
MqInv_->SetAbsTol(mass_inverse_atol_);
MqInv_->SetMaxIter(mass_inverse_max_iter_);

LQ_form_ = new ParBilinearForm(sfes_);
auto *lqd_blfi = new DiffusionIntegrator(*thermal_diff_total_coeff_);
Expand Down Expand Up @@ -664,15 +676,7 @@ void CaloricallyPerfectThermoChem::step() {
Ht_form_->Update();
Ht_form_->Assemble();
Ht_form_->FormSystemMatrix(temp_ess_tdof_, Ht_);

HtInv_->SetOperator(*Ht_);
if (partial_assembly_) {
delete HtInvPC_;
Vector diag_pa(sfes_->GetTrueVSize());
Ht_form_->AssembleDiagonal(diag_pa);
HtInvPC_ = new OperatorJacobiSmoother(diag_pa, temp_ess_tdof_);
HtInv_->SetPreconditioner(*HtInvPC_);
}

// Prepare for the solve
for (auto &temp_dbc : temp_dbcs_) {
Expand All @@ -681,12 +685,7 @@ void CaloricallyPerfectThermoChem::step() {
sfes_->GetRestrictionMatrix()->MultTranspose(resT_, resT_gf_);

Vector Xt2, Bt2;
if (partial_assembly_) {
auto *HC = Ht_.As<ConstrainedOperator>();
EliminateRHS(*Ht_form_, *HC, temp_ess_tdof_, Tn_next_gf_, resT_gf_, Xt2, Bt2, 1);
} else {
Ht_form_->FormLinearSystem(temp_ess_tdof_, Tn_next_gf_, resT_gf_, Ht_, Xt2, Bt2, 1);
}
Ht_form_->FormLinearSystem(temp_ess_tdof_, Tn_next_gf_, resT_gf_, Ht_, Xt2, Bt2, 1);

// solve helmholtz eq for temp
HtInv_->Mult(Bt2, Xt2);
Expand Down
32 changes: 28 additions & 4 deletions src/calorically_perfect.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,39 @@ class CaloricallyPerfectThermoChem : public ThermoChemModelBase {
// Flags
bool rank0_; /**< true if this is rank 0 */
bool partial_assembly_ = false; /**< Enable/disable partial assembly of forms. */
bool numerical_integ_ = true; /**< Enable/disable numerical integration rules of forms. */
bool numerical_integ_ = false; /**< Enable/disable numerical integration rules of forms. */
bool constant_viscosity_ = false; /**< Enable/disable constant viscosity */
bool constant_density_ = false; /**< Enable/disable constant density */
bool domain_is_open_ = false; /**< true if domain is open */

// Linear-solver-related options
int pl_solve_ = 0; /**< Verbosity level passed to mfem solvers */
int max_iter_; /**< Maximum number of linear solver iterations */
double rtol_ = 1e-12; /**< Linear solver relative tolerance */
int smoother_poly_order_;
double smoother_poly_fraction_ = 0.75;
int smoother_eig_est_ = 10;
int smoother_passes_ = 1;
double smoother_relax_weight_ = 0.4;
double smoother_relax_omega_ = 1.0;
double hsmoother_relax_weight_ = 0.8;
double hsmoother_relax_omega_ = 0.1;

// solver tolerance options
int pl_solve_; /**< Verbosity level passed to mfem solvers */
int max_iter_; /**< Maximum number of linear solver iterations */
double rtol_; /**< Linear solver relative tolerance */

int default_max_iter_ = 1000;
double default_rtol_ = 1.0e-10;
double default_atol_ = 1.0e-12;

int mass_inverse_pl_ = 0;
int mass_inverse_max_iter_;
double mass_inverse_rtol_;
double mass_inverse_atol_;

int hsolve_pl_ = 0;
int hsolve_max_iter_;
double hsolve_rtol_;
double hsolve_atol_;

// Boundary condition info
Array<int> temp_ess_attr_; /**< List of patches with Dirichlet BC on temperature */
Expand Down
2 changes: 1 addition & 1 deletion src/chemistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() {
}
}

MFEM_HOST_DEVICE void Chemistry::setRates(const double * data, int size) {
MFEM_HOST_DEVICE void Chemistry::setRates(const double *data, int size) {
for (int r = 0; r < numReactions_; r++) {
if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) {
GridFunctionReaction *rx = (GridFunctionReaction *)(reactions_[r]);
Expand Down
2 changes: 1 addition & 1 deletion src/chemistry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class Chemistry {

// Set the grid function rates for GRIDFUNCTION_RXN reaction types
void setGridFunctionRates(mfem::GridFunction &f);
MFEM_HOST_DEVICE void setRates(const double * data, int size);
MFEM_HOST_DEVICE void setRates(const double *data, int size);

// return Vector of reaction rate coefficients, with the size of numReaction_.
// WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not
Expand Down
4 changes: 2 additions & 2 deletions src/gpu_constructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,11 @@ __global__ void freeDeviceRadiation(Radiation *radiation) {
//---------------------------------------------------
// And finally device setters
//---------------------------------------------------
__global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction) {
__global__ void deviceSetGridFunctionReactionData(const double *data, int size, GridFunctionReaction *reaction) {
reaction->setData(data, size);
}

__global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem) {
__global__ void deviceSetChemistryReactionData(const double *data, int size, Chemistry *chem) {
chem->setRates(data, size);
}

Expand Down
4 changes: 2 additions & 2 deletions src/gpu_constructor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ __global__ void freeDeviceChemistry(Chemistry *chem);
__global__ void freeDeviceRadiation(Radiation *radiation);

//! Set the data to a GridFunctionReaction
__global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction);
__global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem);
__global__ void deviceSetGridFunctionReactionData(const double *data, int size, GridFunctionReaction *reaction);
__global__ void deviceSetChemistryReactionData(const double *data, int size, Chemistry *chem);

#endif // cuda or hip
} // namespace gpu
Expand Down
4 changes: 2 additions & 2 deletions src/reaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp)

MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {}

MFEM_HOST_DEVICE void GridFunctionReaction::setData(const double * data, int size) {
MFEM_HOST_DEVICE void GridFunctionReaction::setData(const double *data, int size) {
data_ = data + comp_ * size_;
size_ = size;
}

void GridFunctionReaction::setGridFunction(const mfem::GridFunction & f) {
void GridFunctionReaction::setGridFunction(const mfem::GridFunction &f) {
size_ = f.FESpace()->GetNDofs();
assert(comp_ < f.FESpace()->GetVDim());
assert(f.FESpace()->GetOrdering() == mfem::Ordering::byNODES);
Expand Down
4 changes: 2 additions & 2 deletions src/reaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ class GridFunctionReaction : public Reaction {

MFEM_HOST_DEVICE virtual ~GridFunctionReaction();

void setGridFunction(const mfem::GridFunction & f);
void setGridFunction(const mfem::GridFunction &f);

MFEM_HOST_DEVICE void setData(const double * data, int size);
MFEM_HOST_DEVICE void setData(const double *data, int size);

MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h,
[[maybe_unused]] const double &T_e, const int &dofindex,
Expand Down
Loading

0 comments on commit c1a8af5

Please sign in to comment.