Simple linear-solver optimize (#283)

* low-hanging fruit optimization using just solver/precon options * removed partial assembly if's where partial assembly is not supported. However, setting partial assembly to true is slightly slower * lin-solve options pushed to hpp * adding more AMG optimizations * enforce style * changing default tolerances back to previous values * additional tolerance controls for cal-perf and updated ref soln to heatedBox * enforce style * fixed issure with SOR weight on helmholtz solve * enforce style * fix issue with smoother weight on mass inverse * fixes to options according to hypre guidance (HYPRE_BoomerAMGSetMaxIter as a precon) and apparent smoother relax weight not functioning as intended * max iters for amg precon set to 1, updates to input files for two tests and reduced rel tol * further reduing tolerance on lomach-flow tests * further reduction in test tols * raised rel. tol to 1e-10 * switch rel tol check to delta check in lequere test, all atol to 1e-12 * collected default solver tols to single defaults * further solver setting tweaks, reduced test tols * removed unintentional underscore in tomboulides tol setting labels * switch tomboulides underscore tol labels to dashes for consitency * upped default tolerances and switched default NI to off * switched tolerances of test checks to be consitent with default lin-sol tol and replacing ref soln. numerical-integ defaults to false now * enforce style * commented copy lines for ref solns as this was causing issues on the gpu * updated sponge ref soln, reduced tol of axi-symm arans test, and removed a leftover ref sol copy line * update pipe arans soln and reduced sponge test tol --------- Co-authored-by: Sigfried Haering <shaering@oden.utexas.edu>
pecos · Jun 22, 2024 · c1a8af5 · c1a8af5
1 parent b799fa7
commit c1a8af5
Show file tree

Hide file tree

Showing 32 changed files with 301 additions and 197 deletions.
diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp
@@ -90,12 +90,12 @@ void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) {
   mfem::ParFiniteElementSpace *reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates)));
   externalReactionRates.reset(new mfem::ParGridFunction(reaction_rates_fes));
   interface.interpolateToNativeFES(*externalReactionRates, TPS::Tps2Boltzmann::Index::ReactionRates);
-  #if defined(_CUDA_) || defined(_HIP_)
-    const double * data(externalReactionRates->Read() );
-    int size(externalReactionRates->FESpace()->GetNDofs() );
-    assert(externalReactionRates->FESpace()->GetOrdering() == mfem::Ordering::byNODES);
-    gpu::deviceSetChemistryReactionData<<<1, 1>>>(data, size, chemistry_);
-  #else
-    chemistry_->setGridFunctionRates(*externalReactionRates);
-  #endif
+#if defined(_CUDA_) || defined(_HIP_)
+  const double *data(externalReactionRates->Read());
+  int size(externalReactionRates->FESpace()->GetNDofs());
+  assert(externalReactionRates->FESpace()->GetOrdering() == mfem::Ordering::byNODES);
+  gpu::deviceSetChemistryReactionData<<<1, 1>>>(data, size, chemistry_);
+#else
+  chemistry_->setGridFunctionRates(*externalReactionRates);
+#endif
 }
diff --git a/src/calorically_perfect.cpp b/src/calorically_perfect.cpp
@@ -115,10 +115,23 @@ CaloricallyPerfectThermoChem::CaloricallyPerfectThermoChem(mfem::ParMesh *pmesh,
 
   tpsP_->getInput("loMach/openSystem", domain_is_open_, false);
 
-  tpsP_->getInput("loMach/calperfect/linear-solver-rtol", rtol_, 1e-12);
-  tpsP_->getInput("loMach/calperfect/linear-solver-max-iter", max_iter_, 1000);
-  tpsP_->getInput("loMach/calperfect/linear-solver-verbosity", pl_solve_, 0);
+  // NOTE: this should default to FALSE (but would break all related test cases...)
   tpsP_->getInput("loMach/calperfect/numerical-integ", numerical_integ_, true);
+
+  tpsP_->getInput("loMach/calperfect/linear-solver-rtol", rtol_, default_rtol_);
+  tpsP_->getInput("loMach/calperfect/linear-solver-max-iter", max_iter_, default_max_iter_);
+  tpsP_->getInput("loMach/calperfect/linear-solver-verbosity", pl_solve_, 0);
+
+  // not deleting above block to maintain backwards-compatability
+  tpsP_->getInput("loMach/calperfect/hsolve-rtol", hsolve_rtol_, rtol_);
+  tpsP_->getInput("loMach/calperfect/hsolve-atol", hsolve_atol_, default_atol_);
+  tpsP_->getInput("loMach/calperfect/hsolve-max-iter", hsolve_max_iter_, max_iter_);
+  tpsP_->getInput("loMach/calperfect/hsolve-verbosity", hsolve_pl_, pl_solve_);
+
+  tpsP_->getInput("loMach/calperfect/msolve-rtol", mass_inverse_rtol_, rtol_);
+  tpsP_->getInput("loMach/calperfect/msolve-atol", mass_inverse_atol_, default_atol_);
+  tpsP_->getInput("loMach/calperfect/msolve-max-iter", mass_inverse_max_iter_, max_iter_);
+  tpsP_->getInput("loMach/calperfect/msolve-verbosity", mass_inverse_pl_, pl_solve_);
 }
 
 CaloricallyPerfectThermoChem::~CaloricallyPerfectThermoChem() {
@@ -400,6 +413,9 @@ void CaloricallyPerfectThermoChem::initializeSelf() {
 void CaloricallyPerfectThermoChem::initializeOperators() {
   dt_ = time_coeff_.dt;
 
+  // polynomial order for smoother of precons
+  smoother_poly_order_ = order_ + 1;
+
   Array<int> empty;
 
   // GLL integration rule (Numerical Integration)
@@ -438,9 +454,6 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
     at_blfi->SetIntRule(&ir_nli);
   }
   At_form_->AddDomainIntegrator(at_blfi);
-  if (partial_assembly_) {
-    At_form_->SetAssemblyLevel(AssemblyLevel::PARTIAL);
-  }
   At_form_->Assemble();
   At_form_->FormSystemMatrix(empty, At_);
   if (rank0_) std::cout << "CaloricallyPerfectThermoChem At operator set" << endl;
@@ -466,9 +479,6 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
     // msrho_blfi->SetIntRule(&ir_di);
   }
   MsRho_form_->AddDomainIntegrator(msrho_blfi);
-  if (partial_assembly_) {
-    MsRho_form_->SetAssemblyLevel(AssemblyLevel::PARTIAL);
-  }
   MsRho_form_->Assemble();
   MsRho_form_->FormSystemMatrix(empty, MsRho_);
   if (rank0_) std::cout << "CaloricallyPerfectThermoChem MsRho operator set" << endl;
@@ -483,9 +493,6 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
   }
   Ht_form_->AddDomainIntegrator(hmt_blfi);
   Ht_form_->AddDomainIntegrator(hdt_blfi);
-  if (partial_assembly_) {
-    Ht_form_->SetAssemblyLevel(AssemblyLevel::PARTIAL);
-  }
   Ht_form_->Assemble();
   Ht_form_->FormSystemMatrix(temp_ess_tdof_, Ht_);
   if (rank0_) std::cout << "CaloricallyPerfectThermoChem Ht operator set" << endl;
@@ -496,33 +503,34 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
     MsInvPC_ = new OperatorJacobiSmoother(diag_pa, empty);
   } else {
     MsInvPC_ = new HypreSmoother(*Ms_.As<HypreParMatrix>());
-    dynamic_cast<HypreSmoother *>(MsInvPC_)->SetType(HypreSmoother::Jacobi, 1);
+    dynamic_cast<HypreSmoother *>(MsInvPC_)->SetType(HypreSmoother::Jacobi, smoother_passes_);
+    dynamic_cast<HypreSmoother *>(MsInvPC_)->SetSOROptions(smoother_relax_weight_, smoother_relax_omega_);
+    dynamic_cast<HypreSmoother *>(MsInvPC_)->SetPolyOptions(smoother_poly_order_, smoother_poly_fraction_,
+                                                            smoother_eig_est_);
   }
   MsInv_ = new CGSolver(sfes_->GetComm());
   MsInv_->iterative_mode = false;
   MsInv_->SetOperator(*Ms_);
   MsInv_->SetPreconditioner(*MsInvPC_);
-  MsInv_->SetPrintLevel(pl_solve_);
-  MsInv_->SetRelTol(rtol_);
-  MsInv_->SetMaxIter(max_iter_);
+  MsInv_->SetPrintLevel(mass_inverse_pl_);
+  MsInv_->SetRelTol(mass_inverse_rtol_);
+  MsInv_->SetAbsTol(mass_inverse_atol_);
+  MsInv_->SetMaxIter(mass_inverse_max_iter_);
 
-  if (partial_assembly_) {
-    Vector diag_pa(sfes_->GetTrueVSize());
-    Ht_form_->AssembleDiagonal(diag_pa);
-    HtInvPC_ = new OperatorJacobiSmoother(diag_pa, temp_ess_tdof_);
-  } else {
-    HtInvPC_ = new HypreSmoother(*Ht_.As<HypreParMatrix>());
-    dynamic_cast<HypreSmoother *>(HtInvPC_)->SetType(HypreSmoother::Jacobi, 1);
-  }
+  HtInvPC_ = new HypreSmoother(*Ht_.As<HypreParMatrix>());
+  dynamic_cast<HypreSmoother *>(HtInvPC_)->SetType(HypreSmoother::Jacobi, smoother_passes_);
+  dynamic_cast<HypreSmoother *>(HtInvPC_)->SetSOROptions(hsmoother_relax_weight_, hsmoother_relax_omega_);
+  dynamic_cast<HypreSmoother *>(HtInvPC_)->SetPolyOptions(smoother_poly_order_, smoother_poly_fraction_,
+                                                          smoother_eig_est_);
 
   HtInv_ = new CGSolver(sfes_->GetComm());
-
   HtInv_->iterative_mode = true;
   HtInv_->SetOperator(*Ht_);
   HtInv_->SetPreconditioner(*HtInvPC_);
-  HtInv_->SetPrintLevel(pl_solve_);
-  HtInv_->SetRelTol(rtol_);
-  HtInv_->SetMaxIter(max_iter_);
+  HtInv_->SetPrintLevel(hsolve_pl_);
+  HtInv_->SetRelTol(hsolve_rtol_);
+  HtInv_->SetAbsTol(hsolve_atol_);
+  HtInv_->SetMaxIter(hsolve_max_iter_);
   if (rank0_) std::cout << "Temperature operators set" << endl;
 
   // Qt .....................................
@@ -546,15 +554,19 @@ void CaloricallyPerfectThermoChem::initializeOperators() {
     MqInvPC_ = new OperatorJacobiSmoother(diag_pa, empty);
   } else {
     MqInvPC_ = new HypreSmoother(*Mq_.As<HypreParMatrix>());
-    dynamic_cast<HypreSmoother *>(MqInvPC_)->SetType(HypreSmoother::Jacobi, 1);
+    dynamic_cast<HypreSmoother *>(MqInvPC_)->SetType(HypreSmoother::Jacobi, smoother_passes_);
+    dynamic_cast<HypreSmoother *>(MqInvPC_)->SetSOROptions(smoother_relax_weight_, smoother_relax_omega_);
+    dynamic_cast<HypreSmoother *>(MqInvPC_)->SetPolyOptions(smoother_poly_order_, smoother_poly_fraction_,
+                                                            smoother_eig_est_);
   }
   MqInv_ = new CGSolver(sfes_->GetComm());
   MqInv_->iterative_mode = false;
   MqInv_->SetOperator(*Mq_);
   MqInv_->SetPreconditioner(*MqInvPC_);
-  MqInv_->SetPrintLevel(pl_solve_);
-  MqInv_->SetRelTol(rtol_);
-  MqInv_->SetMaxIter(max_iter_);
+  MqInv_->SetPrintLevel(mass_inverse_pl_);
+  MqInv_->SetRelTol(mass_inverse_rtol_);
+  MqInv_->SetAbsTol(mass_inverse_atol_);
+  MqInv_->SetMaxIter(mass_inverse_max_iter_);
 
   LQ_form_ = new ParBilinearForm(sfes_);
   auto *lqd_blfi = new DiffusionIntegrator(*thermal_diff_total_coeff_);
@@ -664,15 +676,7 @@ void CaloricallyPerfectThermoChem::step() {
   Ht_form_->Update();
   Ht_form_->Assemble();
   Ht_form_->FormSystemMatrix(temp_ess_tdof_, Ht_);
-
   HtInv_->SetOperator(*Ht_);
-  if (partial_assembly_) {
-    delete HtInvPC_;
-    Vector diag_pa(sfes_->GetTrueVSize());
-    Ht_form_->AssembleDiagonal(diag_pa);
-    HtInvPC_ = new OperatorJacobiSmoother(diag_pa, temp_ess_tdof_);
-    HtInv_->SetPreconditioner(*HtInvPC_);
-  }
 
   // Prepare for the solve
   for (auto &temp_dbc : temp_dbcs_) {
@@ -681,12 +685,7 @@ void CaloricallyPerfectThermoChem::step() {
   sfes_->GetRestrictionMatrix()->MultTranspose(resT_, resT_gf_);
 
   Vector Xt2, Bt2;
-  if (partial_assembly_) {
-    auto *HC = Ht_.As<ConstrainedOperator>();
-    EliminateRHS(*Ht_form_, *HC, temp_ess_tdof_, Tn_next_gf_, resT_gf_, Xt2, Bt2, 1);
-  } else {
-    Ht_form_->FormLinearSystem(temp_ess_tdof_, Tn_next_gf_, resT_gf_, Ht_, Xt2, Bt2, 1);
-  }
+  Ht_form_->FormLinearSystem(temp_ess_tdof_, Tn_next_gf_, resT_gf_, Ht_, Xt2, Bt2, 1);
 
   // solve helmholtz eq for temp
   HtInv_->Mult(Bt2, Xt2);

diff --git a/src/calorically_perfect.hpp b/src/calorically_perfect.hpp
@@ -76,15 +76,39 @@ class CaloricallyPerfectThermoChem : public ThermoChemModelBase {
   // Flags
   bool rank0_;                      /**< true if this is rank 0 */
   bool partial_assembly_ = false;   /**< Enable/disable partial assembly of forms. */
-  bool numerical_integ_ = true;     /**< Enable/disable numerical integration rules of forms. */
+  bool numerical_integ_ = false;    /**< Enable/disable numerical integration rules of forms. */
   bool constant_viscosity_ = false; /**< Enable/disable constant viscosity */
   bool constant_density_ = false;   /**< Enable/disable constant density */
   bool domain_is_open_ = false;     /**< true if domain is open */
 
   // Linear-solver-related options
-  int pl_solve_ = 0;    /**< Verbosity level passed to mfem solvers */
-  int max_iter_;        /**< Maximum number of linear solver iterations */
-  double rtol_ = 1e-12; /**< Linear solver relative tolerance */
+  int smoother_poly_order_;
+  double smoother_poly_fraction_ = 0.75;
+  int smoother_eig_est_ = 10;
+  int smoother_passes_ = 1;
+  double smoother_relax_weight_ = 0.4;
+  double smoother_relax_omega_ = 1.0;
+  double hsmoother_relax_weight_ = 0.8;
+  double hsmoother_relax_omega_ = 0.1;
+
+  // solver tolerance options
+  int pl_solve_; /**< Verbosity level passed to mfem solvers */
+  int max_iter_; /**< Maximum number of linear solver iterations */
+  double rtol_;  /**< Linear solver relative tolerance */
+
+  int default_max_iter_ = 1000;
+  double default_rtol_ = 1.0e-10;
+  double default_atol_ = 1.0e-12;
+
+  int mass_inverse_pl_ = 0;
+  int mass_inverse_max_iter_;
+  double mass_inverse_rtol_;
+  double mass_inverse_atol_;
+
+  int hsolve_pl_ = 0;
+  int hsolve_max_iter_;
+  double hsolve_rtol_;
+  double hsolve_atol_;
 
   // Boundary condition info
   Array<int> temp_ess_attr_; /**< List of patches with Dirichlet BC on temperature */

diff --git a/src/chemistry.cpp b/src/chemistry.cpp
@@ -109,7 +109,7 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() {
   }
 }
 
-MFEM_HOST_DEVICE void Chemistry::setRates(const double * data, int size) {
+MFEM_HOST_DEVICE void Chemistry::setRates(const double *data, int size) {
   for (int r = 0; r < numReactions_; r++) {
     if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) {
       GridFunctionReaction *rx = (GridFunctionReaction *)(reactions_[r]);

diff --git a/src/chemistry.hpp b/src/chemistry.hpp
@@ -97,7 +97,7 @@ class Chemistry {
 
   // Set the grid function rates for GRIDFUNCTION_RXN reaction types
   void setGridFunctionRates(mfem::GridFunction &f);
-  MFEM_HOST_DEVICE void setRates(const double * data, int size);
+  MFEM_HOST_DEVICE void setRates(const double *data, int size);
 
   // return Vector of reaction rate coefficients, with the size of numReaction_.
   // WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not

diff --git a/src/gpu_constructor.cpp b/src/gpu_constructor.cpp
@@ -124,11 +124,11 @@ __global__ void freeDeviceRadiation(Radiation *radiation) {
 //---------------------------------------------------
 // And finally device setters
 //---------------------------------------------------
-__global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction) {
+__global__ void deviceSetGridFunctionReactionData(const double *data, int size, GridFunctionReaction *reaction) {
   reaction->setData(data, size);
 }
 
-__global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem) {
+__global__ void deviceSetChemistryReactionData(const double *data, int size, Chemistry *chem) {
   chem->setRates(data, size);
 }
 

diff --git a/src/gpu_constructor.hpp b/src/gpu_constructor.hpp
@@ -161,8 +161,8 @@ __global__ void freeDeviceChemistry(Chemistry *chem);
 __global__ void freeDeviceRadiation(Radiation *radiation);
 
 //! Set the data to a GridFunctionReaction
-__global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction);
-__global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem);
+__global__ void deviceSetGridFunctionReactionData(const double *data, int size, GridFunctionReaction *reaction);
+__global__ void deviceSetChemistryReactionData(const double *data, int size, Chemistry *chem);
 
 #endif  // cuda or hip
 }  // namespace gpu

diff --git a/src/reaction.cpp b/src/reaction.cpp
@@ -84,12 +84,12 @@ MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp)
 
 MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {}
 
-MFEM_HOST_DEVICE void GridFunctionReaction::setData(const double * data, int size) {
+MFEM_HOST_DEVICE void GridFunctionReaction::setData(const double *data, int size) {
   data_ = data + comp_ * size_;
   size_ = size;
 }
 
-void GridFunctionReaction::setGridFunction(const mfem::GridFunction & f) {
+void GridFunctionReaction::setGridFunction(const mfem::GridFunction &f) {
   size_ = f.FESpace()->GetNDofs();
   assert(comp_ < f.FESpace()->GetVDim());
   assert(f.FESpace()->GetOrdering() == mfem::Ordering::byNODES);

diff --git a/src/reaction.hpp b/src/reaction.hpp
@@ -129,9 +129,9 @@ class GridFunctionReaction : public Reaction {
 
   MFEM_HOST_DEVICE virtual ~GridFunctionReaction();
 
-  void setGridFunction(const mfem::GridFunction & f);
+  void setGridFunction(const mfem::GridFunction &f);
 
-  MFEM_HOST_DEVICE void setData(const double * data, int size);
+  MFEM_HOST_DEVICE void setData(const double *data, int size);
 
   MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h,
                                                          [[maybe_unused]] const double &T_e, const int &dofindex,