From 5c677226cd04abd1d571aee943392fe51960a7dc Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Tue, 10 May 2022 20:31:59 -0600 Subject: [PATCH 001/172] examples/fluids: factor Newtonian with better helpers --- examples/fluids/problems/newtonian.c | 4 +- examples/fluids/qfunctions/newtonian.h | 585 +++++++++++-------------- 2 files changed, 253 insertions(+), 336 deletions(-) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 11f9cc8487..04b52e144f 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -39,8 +39,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction_loc = ICsNewtonianIG_loc; problem->setup_sur.qfunction = SetupBoundary; problem->setup_sur.qfunction_loc = SetupBoundary_loc; - problem->apply_vol_rhs.qfunction = Newtonian; - problem->apply_vol_rhs.qfunction_loc = Newtonian_loc; + problem->apply_vol_rhs.qfunction = RHSFunction_Newtonian; + problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; problem->bc = NULL; diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 008a5c8e46..04ffde09ee 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -20,6 +20,138 @@ #define M_PI 3.14159265358979323846 #endif +typedef struct { + CeedScalar pressure; + CeedScalar velocity[3]; + CeedScalar temperature; +} StatePrimitive; + +typedef struct { + CeedScalar density; + CeedScalar momentum[3]; + CeedScalar E_total; +} StateConservative; + +typedef struct { + StateConservative U; + StatePrimitive Y; +} State; + +CEED_QFUNCTION_HELPER CeedScalar Dot3(const CeedScalar u[3], + const CeedScalar v[3]) { + return u[0]*v[0] + u[1]*v[1] + u[2]*v[2]; +} + +CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative( + NewtonianIdealGasContext gas, StateConservative U, const CeedScalar x[3]) { + StatePrimitive Y; + for (int i=0; i<3; i++) Y.velocity[i] = U.momentum[i] / U.density; + CeedScalar e_kinetic = .5 * Dot3(Y.velocity, Y.velocity); + CeedScalar e_potential = -Dot3(gas->g, x); + CeedScalar e_total = U.E_total / U.density; + CeedScalar e_internal = e_total - e_kinetic - e_potential; + Y.temperature = e_internal / gas->cv; + Y.pressure = (gas->cp / gas->cv - 1) * U.density * e_internal; + return Y; +} + +CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative_fwd( + NewtonianIdealGasContext gas, State s, StateConservative dU, + const CeedScalar x[3], const CeedScalar dx[3]) { + StatePrimitive dY; + for (int i=0; i<3; i++) { + dY.velocity[i] = (dU.momentum[i] - s.Y.velocity[i] * dU.density) / s.U.density; + } + CeedScalar e_kinetic = .5 * Dot3(s.Y.velocity, s.Y.velocity); + CeedScalar de_kinetic = Dot3(dY.velocity, s.Y.velocity); + CeedScalar e_potential = -Dot3(gas->g, x); + CeedScalar de_potential = -Dot3(gas->g, dx); + CeedScalar e_total = s.U.E_total / s.U.density; + CeedScalar de_total = (dU.E_total - e_total * dU.density) / s.U.density; + CeedScalar e_internal = e_total - e_kinetic - e_potential; + CeedScalar de_internal = de_total - de_kinetic - de_potential; + dY.temperature = de_internal / gas->cv; + dY.pressure = (gas->cp / gas->cv - 1) + * (dU.density * e_internal + s.U.density * de_internal); + return dY; +} + +CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas, + const CeedScalar U[5], const CeedScalar x[3]) { + State s; + s.U.density = U[0]; + s.U.momentum[0] = U[1]; + s.U.momentum[1] = U[2]; + s.U.momentum[2] = U[3]; + s.U.E_total = U[4]; + s.Y = StatePrimitiveFromConservative(gas, s.U, x); + return s; +} + +CEED_QFUNCTION_HELPER void FluxInviscid(NewtonianIdealGasContext gas, State s, + StateConservative Flux[3]) { + for (int i=0; i<3; i++) { + Flux[i].density = s.U.momentum[i]; + for (int j=0; j<3; j++) + Flux[i].momentum[j] = s.U.momentum[i] * s.Y.velocity[j] + + s.Y.pressure * (i == j); + Flux[i].E_total = (s.U.E_total + s.Y.pressure) * s.Y.velocity[i]; + } +} + +CEED_QFUNCTION_HELPER void FluxInviscid_fwd(NewtonianIdealGasContext gas, + State s, State ds, StateConservative dFlux[3]) { + for (int i=0; i<3; i++) { + dFlux[i].density = ds.U.momentum[i]; + for (int j=0; j<3; j++) + dFlux[i].momentum[j] = ds.U.momentum[i] * s.Y.velocity[j] + + s.U.momentum[i] * ds.Y.velocity[j] + ds.Y.pressure * (i == j); + dFlux[i].E_total = (ds.U.E_total + ds.Y.pressure) * s.Y.velocity[i] + + (s.U.E_total + s.Y.pressure) * ds.Y.velocity[i]; + } +} + +// Kelvin-Mandel notation +CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3], + CeedScalar strain_rate[6]) { + const CeedScalar weight = 1 / sqrt(2.); + strain_rate[0] = grad_s[0].Y.velocity[0]; + strain_rate[1] = grad_s[1].Y.velocity[1]; + strain_rate[2] = grad_s[2].Y.velocity[2]; + strain_rate[3] = weight * (grad_s[2].Y.velocity[1] + grad_s[1].Y.velocity[2]); + strain_rate[4] = weight * (grad_s[2].Y.velocity[0] + grad_s[0].Y.velocity[2]); + strain_rate[5] = weight * (grad_s[1].Y.velocity[0] + grad_s[0].Y.velocity[1]); +} + +CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) { + const CeedScalar weight = 1 / sqrt(2.); + A[0][0] = v[0]; + A[1][1] = v[1]; + A[2][2] = v[2]; + A[2][1] = A[1][2] = weight * v[3]; + A[2][0] = A[0][2] = weight * v[4]; + A[1][0] = A[0][1] = weight * v[5]; +} + +CEED_QFUNCTION_HELPER void NewtonianStress(NewtonianIdealGasContext gas, + const CeedScalar strain_rate[6], CeedScalar stress[6]) { + CeedScalar div_u = strain_rate[0] + strain_rate[1] + strain_rate[2]; + for (int i=0; i<6; i++) { + stress[i] = gas->mu * (2 * strain_rate[i] + gas->lambda * div_u * (i < 3)); + } +} + +CEED_QFUNCTION_HELPER void ViscousEnergyFlux(NewtonianIdealGasContext gas, + StatePrimitive Y, const State grad_s[3], const CeedScalar stress[3][3], + CeedScalar Fe[3]) { + for (int i=0; i<3; i++) { + Fe[i] = - Y.velocity[0] * stress[0][i] + - Y.velocity[1] * stress[1][i] + - Y.velocity[2] * stress[2][i] + - gas->k * grad_s[i].Y.temperature; + } +} + // ***************************************************************************** // Helper function for computing flux Jacobian // ***************************************************************************** @@ -178,41 +310,6 @@ CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3], // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T) } -// ***************************************************************************** -// Helper function for computing Tau elements (stabilization constant) -// Model from: -// Stabilized Methods for Compressible Flows, Hughes et al 2010 -// -// Spatial criterion #2 - Tau is a 3x3 diagonal matrix -// Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum) -// -// Where -// c_tau = stabilization constant (0.5 is reported as "optimal") -// h[i] = 2 length(dxdX[i]) -// Pe = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity ) -// Xi(Pe) = coth Pe - 1. / Pe (1. at large local Peclet number ) -// rho(A[i]) = spectral radius of the convective flux Jacobian i, -// wave speed in direction i -// ***************************************************************************** -CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3], - const CeedScalar dXdx[3][3], const CeedScalar u[3], - /* const CeedScalar sound_speed, const CeedScalar c_tau) { */ - const CeedScalar sound_speed, const CeedScalar c_tau, - const CeedScalar viscosity) { - const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) / - (2*viscosity); - for (int i=0; i<3; i++) { - // length of element in direction i - CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] + - dXdx[2][i]*dXdx[2][i]); - CeedScalar Pe = mag_u_visc*h; - CeedScalar Xi = 1/tanh(Pe) - 1/Pe; - // fastest wave in direction i - CeedScalar fastest_wave = fabs(u[i]) + sound_speed; - Tau_x[i] = c_tau * h * Xi / fastest_wave; - } -} - // ***************************************************************************** // This QFunction sets a "still" initial condition for generic Newtonian IG problems // ***************************************************************************** @@ -308,8 +405,8 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q, // int( gradv gradu ) // // ***************************************************************************** -CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) { +CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], @@ -323,9 +420,7 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q, // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar lambda = context->lambda; const CeedScalar mu = context->mu; - const CeedScalar k = context->k; const CeedScalar cv = context->cv; const CeedScalar cp = context->cp; const CeedScalar *g = context->g; @@ -336,34 +431,11 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q, CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; ic_tau, mu); + strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j]; // -- Stabilization method: none, SU, or SUPG CeedScalar stab[5][3] = {{0.}}; @@ -515,13 +523,14 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q, case STAB_NONE: // Galerkin break; case STAB_SU: // SU - Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho); + Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); tau_strong_conv[0] = Tau_d[0] * strong_conv[0]; tau_strong_conv[1] = Tau_d[1] * strong_conv[1]; tau_strong_conv[2] = Tau_d[1] * strong_conv[2]; tau_strong_conv[3] = Tau_d[1] * strong_conv[3]; tau_strong_conv[4] = Tau_d[2] * strong_conv[4]; - PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv, + PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, + tau_strong_conv, tau_strong_conv_conservative); for (int j=0; j<3; j++) for (int k=0; k<5; k++) @@ -569,9 +578,7 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar lambda = context->lambda; const CeedScalar mu = context->mu; - const CeedScalar k = context->k; const CeedScalar cv = context->cv; const CeedScalar cp = context->cp; const CeedScalar *g = context->g; @@ -582,35 +589,11 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; i Date: Wed, 11 May 2022 07:32:11 -0600 Subject: [PATCH 002/172] examples/fluids: small consolidation --- examples/fluids/qfunctions/newtonian.h | 41 +++++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 04ffde09ee..d6508672b6 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -88,6 +88,19 @@ CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas, return s; } +CEED_QFUNCTION_HELPER State StateFromU_fwd(NewtonianIdealGasContext gas, + State s, const CeedScalar dU[5], + const CeedScalar x[3], const CeedScalar dx[3]) { + State ds; + ds.U.density = dU[0]; + ds.U.momentum[0] = dU[1]; + ds.U.momentum[1] = dU[2]; + ds.U.momentum[2] = dU[3]; + ds.U.E_total = dU[4]; + ds.Y = StatePrimitiveFromConservative_fwd(gas, s, ds.U, x, dx); + return ds; +} + CEED_QFUNCTION_HELPER void FluxInviscid(NewtonianIdealGasContext gas, State s, StateConservative Flux[3]) { for (int i=0; i<3; i++) { @@ -455,16 +468,12 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, State grad_s[3]; for (int j=0; j<3; j++) { - CeedScalar dx_i[3] = {0}; - grad_s[j].U.density = dq[0][0][i] * dXdx[0][j] - + dq[1][0][i] * dXdx[1][j] + dq[2][0][i] * dXdx[2][j]; - for (int k=0; k<3; k++) grad_s[j].U.momentum[k] = dq[0][k+1][i] * dXdx[0][j] - + dq[1][k+1][i] * dXdx[1][j] + dq[2][k+1][i] * dXdx[2][j]; - grad_s[j].U.E_total = dq[0][4][i] * dXdx[0][j] + dq[1][4][i] * dXdx[1][j] + - dq[2][4][i] * dXdx[2][j]; + CeedScalar dx_i[3] = {0}, dU[5]; + for (int k=0; k<5; k++) dU[k] = dq[0][k][i] * dXdx[0][j] + + dq[1][k][i] * dXdx[1][j] + + dq[2][k][i] * dXdx[2][j]; dx_i[j] = 1.; - grad_s[j].Y = StatePrimitiveFromConservative_fwd(context, s, grad_s[j].U, - x_i, dx_i); + grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); } CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3]; @@ -612,16 +621,12 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, // *INDENT-ON* State grad_s[3]; for (int j=0; j<3; j++) { - CeedScalar dx_i[3]; - grad_s[j].U.density = dq[0][0][i] * dXdx[0][j] - + dq[1][0][i] * dXdx[1][j] + dq[2][0][i] * dXdx[2][j]; - for (int k=0; k<3; k++) grad_s[j].U.momentum[k] = dq[0][k+1][i] * dXdx[0][j] - + dq[1][k+1][i] * dXdx[1][j] + dq[2][k+1][i] * dXdx[2][j]; - grad_s[j].U.E_total = dq[0][4][i] * dXdx[0][j] + dq[1][4][i] * dXdx[1][j] + - dq[2][4][i] * dXdx[2][j]; + CeedScalar dx_i[3] = {0}, dU[5]; + for (int k=0; k<5; k++) dU[k] = dq[0][k][i] * dXdx[0][j] + + dq[1][k][i] * dXdx[1][j] + + dq[2][k][i] * dXdx[2][j]; dx_i[j] = 1.; - grad_s[j].Y = StatePrimitiveFromConservative_fwd(context, s, grad_s[j].U, - x_i, dx_i); + grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); } CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3]; From 2534dcc855657bb9b9aefc63eb9d8efda9fb61da Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Thu, 12 May 2022 11:20:59 -0600 Subject: [PATCH 003/172] examples/fluids: GetRestrictionForDomain accept NULL arguments --- examples/fluids/src/setuplibceed.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 84972e3456..1eac97e7ea 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -44,25 +44,32 @@ PetscErrorCode GetRestrictionForDomain(Ceed ceed, DM dm, CeedInt height, DM dm_coord; CeedInt dim, loc_num_elem; CeedInt Q_dim; + CeedElemRestriction elem_restr_tmp; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr); dim -= height; Q_dim = CeedIntPow(Q, dim); - ierr = DMGetCoordinateDM(dm, &dm_coord); CHKERRQ(ierr); - ierr = DMPlexSetClosurePermutationTensor(dm_coord, PETSC_DETERMINE, NULL); - CHKERRQ(ierr); ierr = CreateRestrictionFromPlex(ceed, dm, height, domain_label, value, - elem_restr_q); - CHKERRQ(ierr); - ierr = CreateRestrictionFromPlex(ceed, dm_coord, height, domain_label, value, - elem_restr_x); + &elem_restr_tmp); CHKERRQ(ierr); - CeedElemRestrictionGetNumElements(*elem_restr_q, &loc_num_elem); - CeedElemRestrictionCreateStrided(ceed, loc_num_elem, Q_dim, - q_data_size, q_data_size*loc_num_elem*Q_dim, - CEED_STRIDES_BACKEND, elem_restr_qd_i); + if (elem_restr_q) *elem_restr_q = elem_restr_tmp; + if (elem_restr_x) { + ierr = DMGetCoordinateDM(dm, &dm_coord); CHKERRQ(ierr); + ierr = DMPlexSetClosurePermutationTensor(dm_coord, PETSC_DETERMINE, NULL); + CHKERRQ(ierr); + ierr = CreateRestrictionFromPlex(ceed, dm_coord, height, domain_label, value, + elem_restr_x); + CHKERRQ(ierr); + } + if (elem_restr_qd_i) { + CeedElemRestrictionGetNumElements(elem_restr_tmp, &loc_num_elem); + CeedElemRestrictionCreateStrided(ceed, loc_num_elem, Q_dim, + q_data_size, q_data_size*loc_num_elem*Q_dim, + CEED_STRIDES_BACKEND, elem_restr_qd_i); + } + if (!elem_restr_q) CeedElemRestrictionDestroy(&elem_restr_tmp); PetscFunctionReturn(0); } From a3ae0734a04ace7b99bbe186ac606878fa3ae395 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Thu, 12 May 2022 15:23:12 -0600 Subject: [PATCH 004/172] examples/fluids: record state and tau in preparation for Jacobian Change notation from dq to Grad_q so that dq can be used consistently as variations. --- examples/fluids/qfunctions/newtonian.h | 53 ++++++++++++++------------ examples/fluids/src/setuplibceed.c | 34 ++++++++++++----- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index d6508672b6..6d56867651 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -423,12 +423,12 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; // *INDENT-ON* // Context @@ -469,9 +469,9 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, State grad_s[3]; for (int j=0; j<3; j++) { CeedScalar dx_i[3] = {0}, dU[5]; - for (int k=0; k<5; k++) dU[k] = dq[0][k][i] * dXdx[0][j] - + dq[1][k][i] * dXdx[1][j] - + dq[2][k][i] * dXdx[2][j]; + for (int k=0; k<5; k++) dU[k] = Grad_q[0][k][i] * dXdx[0][j] + + Grad_q[1][k][i] * dXdx[1][j] + + Grad_q[2][k][i] * dXdx[2][j]; dx_i[j] = 1.; grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); } @@ -496,9 +496,9 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, for (int j=0; j<3; j++) { for (int k=0; k<5; k++) { - dv[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] + - dXdx[j][1] * Flux[k][1] + - dXdx[j][2] * Flux[k][2]); + Grad_v[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] + + dXdx[j][1] * Flux[k][1] + + dXdx[j][2] * Flux[k][2]); } } @@ -548,9 +548,9 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, for (int j=0; j<5; j++) for (int k=0; k<3; k++) - dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); + Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); break; case STAB_SUPG: // SUPG is not implemented for explicit scheme break; @@ -577,13 +577,14 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], + (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; @@ -622,9 +623,9 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, State grad_s[3]; for (int j=0; j<3; j++) { CeedScalar dx_i[3] = {0}, dU[5]; - for (int k=0; k<5; k++) dU[k] = dq[0][k][i] * dXdx[0][j] - + dq[1][k][i] * dXdx[1][j] - + dq[2][k][i] * dXdx[2][j]; + for (int k=0; k<5; k++) dU[k] = Grad_q[0][k][i] * dXdx[0][j] + + Grad_q[1][k][i] * dXdx[1][j] + + Grad_q[2][k][i] * dXdx[2][j]; dx_i[j] = 1.; grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); } @@ -650,9 +651,9 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, for (int j=0; j<3; j++) { for (int k=0; k<5; k++) { - dv[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] + - dXdx[j][1] * Flux[k][1] + - dXdx[j][2] * Flux[k][2]); + Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] + + dXdx[j][1] * Flux[k][1] + + dXdx[j][2] * Flux[k][2]); } } @@ -707,9 +708,9 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, for (int j=0; j<5; j++) for (int k=0; k<3; k++) - dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); + Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); break; case STAB_SUPG: // SUPG Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); @@ -734,11 +735,13 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, for (int j=0; j<5; j++) for (int k=0; k<3; k++) - dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); + Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); break; } + for (int j=0; j<5; j++) jac_data[j][i] = U[j]; + for (int j=0; j<3; j++) jac_data[5+j][i] = Tau_d[j]; } // End Quadrature Point Loop diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 1eac97e7ea..ec2a15b787 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -234,8 +234,11 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, const CeedInt dim = problem->dim, num_comp_x = problem->dim, q_data_size_vol = problem->q_data_size_vol, + jac_data_size_vol = num_comp_q + 3, P = app_ctx->degree + 1, Q = P + app_ctx->q_extra; + CeedElemRestriction elem_restr_jd_i; + CeedVector jac_data; // ----------------------------------------------------------------------------- // CEED Bases @@ -254,7 +257,11 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, ierr = GetRestrictionForDomain(ceed, dm, 0, 0, 0, Q, q_data_size_vol, &ceed_data->elem_restr_q, &ceed_data->elem_restr_x, &ceed_data->elem_restr_qd_i); CHKERRQ(ierr); - // -- Create E vectors + + ierr = GetRestrictionForDomain(ceed, dm, 0, 0, 0, Q, jac_data_size_vol, + NULL, NULL, + &elem_restr_jd_i); CHKERRQ(ierr); +// -- Create E vectors CeedElemRestrictionCreateVector(ceed_data->elem_restr_q, &user->q_ceed, NULL); CeedElemRestrictionCreateVector(ceed_data->elem_restr_q, &user->q_dot_ceed, NULL); @@ -295,14 +302,14 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, problem->apply_vol_rhs.qfunction_context); CeedQFunctionContextDestroy(&problem->apply_vol_rhs.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "q", num_comp_q, CEED_EVAL_INTERP); - CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "dq", num_comp_q*dim, + CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "Grad_q", num_comp_q*dim, CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "qdata", q_data_size_vol, CEED_EVAL_NONE); CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "x", num_comp_x, CEED_EVAL_INTERP); CeedQFunctionAddOutput(ceed_data->qf_rhs_vol, "v", num_comp_q, CEED_EVAL_INTERP); - CeedQFunctionAddOutput(ceed_data->qf_rhs_vol, "dv", num_comp_q*dim, + CeedQFunctionAddOutput(ceed_data->qf_rhs_vol, "Grad_v", num_comp_q*dim, CEED_EVAL_GRAD); } @@ -315,7 +322,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionContextDestroy(&problem->apply_vol_ifunction.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "q", num_comp_q, CEED_EVAL_INTERP); - CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "dq", num_comp_q*dim, + CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "Grad_q", num_comp_q*dim, CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "q dot", num_comp_q, CEED_EVAL_INTERP); @@ -325,8 +332,10 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CEED_EVAL_INTERP); CeedQFunctionAddOutput(ceed_data->qf_ifunction_vol, "v", num_comp_q, CEED_EVAL_INTERP); - CeedQFunctionAddOutput(ceed_data->qf_ifunction_vol, "dv", num_comp_q*dim, + CeedQFunctionAddOutput(ceed_data->qf_ifunction_vol, "Grad_v", num_comp_q*dim, CEED_EVAL_GRAD); + CeedQFunctionAddOutput(ceed_data->qf_ifunction_vol, "jac_data", + jac_data_size_vol, CEED_EVAL_NONE); } // --------------------------------------------------------------------------- @@ -357,6 +366,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedVectorCreate(ceed, q_data_size_vol*loc_num_elem_vol*num_qpts_vol, &ceed_data->q_data); + CeedElemRestrictionCreateVector(elem_restr_jd_i, &jac_data, NULL); // ----------------------------------------------------------------------------- // CEED Operators // ----------------------------------------------------------------------------- @@ -385,7 +395,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedOperatorCreate(ceed, ceed_data->qf_rhs_vol, NULL, NULL, &op); CeedOperatorSetField(op, "q", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(op, "dq", ceed_data->elem_restr_q, ceed_data->basis_q, + CeedOperatorSetField(op, "Grad_q", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op, "qdata", ceed_data->elem_restr_qd_i, CEED_BASIS_COLLOCATED, ceed_data->q_data); @@ -393,7 +403,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, ceed_data->x_coord); CeedOperatorSetField(op, "v", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(op, "dv", ceed_data->elem_restr_q, ceed_data->basis_q, + CeedOperatorSetField(op, "Grad_v", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); user->op_rhs_vol = op; } @@ -404,7 +414,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedOperatorCreate(ceed, ceed_data->qf_ifunction_vol, NULL, NULL, &op); CeedOperatorSetField(op, "q", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(op, "dq", ceed_data->elem_restr_q, ceed_data->basis_q, + CeedOperatorSetField(op, "Grad_q", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op, "q dot", ceed_data->elem_restr_q, ceed_data->basis_q, user->q_dot_ceed); @@ -414,8 +424,11 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, ceed_data->x_coord); CeedOperatorSetField(op, "v", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(op, "dv", ceed_data->elem_restr_q, ceed_data->basis_q, + CeedOperatorSetField(op, "Grad_v", ceed_data->elem_restr_q, ceed_data->basis_q, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op, "jac_data", elem_restr_jd_i, + CEED_BASIS_COLLOCATED, jac_data); + user->op_ifunction_vol = op; } @@ -505,6 +518,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, user->op_ifunction_vol, height, P_sur, Q_sur, q_data_size_sur, &user->op_ifunction); CHKERRQ(ierr); } - + CeedElemRestrictionDestroy(&elem_restr_jd_i); + CeedVectorDestroy(&jac_data); PetscFunctionReturn(0); } From e71202f6ed1f7658a951fb96c00ca70137d558f9 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Fri, 13 May 2022 17:45:11 -0600 Subject: [PATCH 005/172] examples/fluids: fix sign and magnitude of channel flow body force Add -body_force_scale, which can be -1 to test that a flow reversal builds to the same steady profile with opposite sign. I also changed the domain size (reducing Reynolds number from 4500 to 45) so flow reversal can be observed reasonably quickly when desired. --- examples/fluids/channel.yaml | 4 ++-- examples/fluids/navierstokes.c | 2 +- examples/fluids/problems/channel.c | 5 ++++- .../fluids-navierstokes-channel.bin | Bin 6408 -> 6408 bytes 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/fluids/channel.yaml b/examples/fluids/channel.yaml index 0c7e89d7f5..4daf949a76 100644 --- a/examples/fluids/channel.yaml +++ b/examples/fluids/channel.yaml @@ -6,10 +6,10 @@ implicit: true ts: type: 'beuler' adapt_type: 'none' - dt: 5e-8 + dt: 5e-6 dm_plex_box_lower: 0,0,0 -dm_plex_box_upper: 1,1,.1 +dm_plex_box_upper: .01,.01,.001 dm_plex_dim: 3 degree: 1 dm_plex_box_faces: 10,10,1 diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index d3a05d798b..14b368f6b4 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -22,7 +22,7 @@ // ./navierstokes -ceed /cpu/self -problem density_current -degree 1 // ./navierstokes -ceed /gpu/cuda -problem advection -degree 1 // -//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin +//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin //TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c index eb1fb7bed7..f591eadc91 100644 --- a/examples/fluids/problems/channel.c +++ b/examples/fluids/problems/channel.c @@ -40,6 +40,7 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, CeedScalar umax = 10.; // m/s CeedScalar theta0 = 300.; // K CeedScalar P0 = 1.e5; // Pa + PetscReal body_force_scale = 1.; PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL); ierr = PetscOptionsScalar("-umax", "Centerline velocity of the Channel", NULL, umax, &umax, NULL); CHKERRQ(ierr); @@ -47,6 +48,8 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, NULL, theta0, &theta0, NULL); CHKERRQ(ierr); ierr = PetscOptionsScalar("-P0", "Pressure at outflow", NULL, P0, &P0, NULL); CHKERRQ(ierr); + ierr = PetscOptionsReal("-body_force_scale", "Multiplier for body force", + NULL, body_force_scale=1, &body_force_scale, NULL); CHKERRQ(ierr); PetscOptionsEnd(); PetscScalar meter = user->units->meter; @@ -79,7 +82,7 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, channel_ctx->P0 = P0; channel_ctx->umax = umax; channel_ctx->implicit = user->phys->implicit; - channel_ctx->B = -2*umax*newtonian_ig_ctx->mu/H; + channel_ctx->B = body_force_scale * 2 * umax*newtonian_ig_ctx->mu / (H*H); { // Calculate Body force diff --git a/examples/fluids/tests-output/fluids-navierstokes-channel.bin b/examples/fluids/tests-output/fluids-navierstokes-channel.bin index 23d6f0e271adff05b32e7d9a62b07e0a8d89d012..cba9b7f0a322e2aa48cd54c22862359b8f9750ac 100644 GIT binary patch literal 6408 zcmXw;d0b8V`^7_2;eEs>sq&rm1~98n93^-HfA!(hTN+qKL%lL zZl~s#iTRz4(ZWlibx+qNE`?)cl6BcdugiW5t#P&yS$f)1Bju^{9a9>G?agg{kCaI8 z_kCZUYcj$8UQB)d<&D(Pt$mp4pGYCF;f3nF{`NA-&ZW_6NBp68Jd>ELCpGHw%cm&Z zURb|j<7~ajDbPpUtP1lMS`(CQuMLq&4c#?*Un?Cg_`4|8Pf+iG@9*_9JiZ+I<%d2h z4TS!=!pxzsgn$FVYj5U9Uq zU8*PiiNpPmI#V#zM%b_@De^#}1?Dkj&sX^ft$VlqvNc^OHT1ZAW!1v#LP(E@58B4( za2|_iYc3L?cj#j>s!?jBB9Xir94c%*Zy@R(uY-Q$*dk8>BuFaHR+t{ ze0y!FX@K}NzmK&LG)MMzvf^f$WN*dueR}JlpHbMk;WhLNJonERg>`NjlVzX0pr3Cv zx-Xt@LTkj$R%Pg2Qlor+3hSGHhOg+TE|Wy%Z49qeg?`TR#);3RrtJ;xA2k;WUKM4F zdg{)F-ZNhIY6Rw!yxdEQp&vNL+oDd`B0RD$h}?zeSD3mrF%I*;_4RWXL4Wmq)r#f9 z>Nu_8zWt{`zoGhI7S1O&c)@{+Ea)?aiH}l*u;H#J9Za_4`CWQBE%Gk(Hm7Yi^uql| z-{_=PDr}7n?>u7UE$CM{kIxB&A1|I#x_Mt}x_0NyPF7_?1QnSo37p0IEVP)8C&r|I0KRx*Va}&+)pLBZ$ z{p0~9rJBO}7?W48Z@c327pyvaa2)36-)+w8484=8d3vZ4e&;1oUHB{K4aw&7>b-d62SSYU!9s+;p=L~dAlFnLvv23QVl@NB? z-P%dnQ6@8w%T-P#+T2!0>5f7t6p$|UJE2`X`}&`+*ztnLSW zbMC~5CxVY8c!c7>8_@6ZGVI`s^J^I2?apZEqawy!Xc9aV4OS~Q*T^Jkr=MjOOn~0( z{07rK&`->@IN%~IR#)3=drTGj>=M_{_b}J7i*h9H7QCMxgV)%5LmyI`b1GC=9bV(# z>ua=35`Ql8p~4pEXV~cuDuVvak)v@^VYT$(f%vc2q0cI=H#v;+JC@tu`x@?F>6Wsa zq7Y?8aegdCdwk$v%6>5Z&=Ozw#UX zLN%loT4SFqR(xlgeCYcu{|3wtcc1^pqzZa{OVN+I`b9aqywv_b2U$VU` zaelw&yCy!CT4<(+cT4@R^k_(6@ys&J544ZjnKu`D<9p6qjiK*qe&(G*>Cx!jw>_7V zJ~d6<;RfbYng{MLkXrP;8@=nhP%2wFY3#T5&+z}JvE6UhV?J?V*41pO#gZJC{pbHR zjq`nyq&k=Mv6F3{I^g^QhG}@7gg*LrV&)-JFVkllHx-b-iO=-^-ToAMU7h!Xb)o-n z^Nr zbN_jD6yZNBtH>@mfAXI-_n*fs?!U*;BKfb2$t$`4==sBcRjS;7VKE~7H|nX2AN1ru z>^t&bl?wOY*%c!E7vO!eyWD?0pojma@8teVX%OMR6b0M9(3AgaF^B&acj5kX?k>WA zcIB5}%Kc}EIsDghoBQwKcoF{7xAKjF9{#gd;r{#EQ0~70BK)@|W8oFK|H7ds|1IJE ziy11C{{p-hLr?xImHY1!_urEBBKglJ3;ToluP5Gr%0E4xf1{3zlz%f{Zo~N^|3V6Q z{%IBS{M*+{ME-^6EIY&V&mZ|l`Io27^Uo?(ME;GBsn`HL^6%^qo_}9+c>dLaSpFd&DgOqS@cjEVRHXd-edB>V|AL`M{xuDe=ift-@^6aAUg#z zzvNfu%lB^?^t6Ak^ZmQ-lZgFm{GjMK^w__B{^0r2{yoe0@Ak(c_HRUQ!(#dVrToYK zoiv>9-ytVN?BB(KIq(PV-VKzje&~NXF}(l%oGMcP^SJRpdH;J0J^Ekg$GrcU z^%K$m*3HcKgdY7b{ubuc{~CDz3z;WU|JxyIVNU%I=fnD66YqbM_KVd2(y!9rr~an| z|4{!k=l##$K&1ZXtkFx}|8V})|MZsd{x|W7Nc~Tt82gj@pQeSp|JCvSXTMma{%7+7 z{-pk=3V%|+AAml_`aSvr_4`4C@cpRYPsJSle&le>soz`Td7-)}{}vVQ*+ zbM*V=*Ktk#9{qs&{UP*A>h~|e3#{K8%lrKR-tWKf!5sa5^-9dq@1^K_)bGz=f3kl6 z2y^0(o0x+?f?o6c8=S`PF9rET_ov&8Iqt7~How2U*spYd;iE7If6()!`&%{`*SJ5W zp}5BVWqIP7?ynoJ!5?18E8-8kiTwWlpkFZlKpxZmnIbRf{?39Q=>9H$#rea3qdIZ_ zrOjack*c#Ae;@v9_2d5g{FM8z+fv3K)BVw3h(Gpsf*$_cH=p}&mnGv5!+mKCR;lIwi^BIM|8;5L{u`;o_~WofEA;SRvO4DQU(iYJKOberAKMDY zVGjS*7hw+mX#XVVk37a7yC->L4*#t|zbF4q$>9ETe!=*|ARqmN_`@E2NB+}S=l&ab zpYcckr|rV;#@GKCX;E6!)!x9{HF3g6H4u z9z6f%?_m7V$88VJ7yMDb6?*L79!`A!PQJkSWBVoWH0|H8V|@SiT_)#`YQ`TXZOA9$ z5AQC1Y=Y5p%hxuDMe^_es{^uFU```0Ej6cF^b9w(uih>^f z?{){?|F#7af9N_!`?CHwwtYR$kNRI%-v2}c;t#*K?%jF+E7}e{`rowQy#F0AVEnP! z0PjEbzt-EBqyIUBe~3Rk-ZK8U&<*)V{Bij)=IDP-xIf~Lhn2)1`p2!nchvtZ>Gucl z$3h?8|IWHG{*a7X!24h4ZqTFu4eT!Gj~R?Vw0`6Lr~YS*^CAB5eariwrxWpqL}B@Q z-v1ow{=pv&=-y95JJ@{i4{yyc4pTQrgDV#sr)JwOOD=Yszi~djiQ9=C|{Bff%=Z`XV)16@kaWPv+ z5B_*&i}M42xKHH#aoD>wywe@!XEZ1N2o8fD{Gp@8`9rBkX=-4RmRKo91HT5j|DIz``~kip|JAxO{;;k={xSc7zsY~| z%DDduf*F6jiRmx*AALUjH+ZC+Kgt<@9Lga*{5M6J`){Q^_n+$w#vj)Gi{$>>h4&x% zr}mlW-&h;QA5%-f@05SrCi48#u;lrdw43qA>we$l`Bw}*Aj?!x!)jM0oglAj}A==Z-dZO~)?*0=Ee8yLd)<6h!S=&^q_uz!d@J}>0^H+m)G zkI~awq%l7Xf_?!CQBi{dN(;0un`MY3_{`U#L?+|}{KF#~zqQ4k_ zjCKDh?|;Z2>VK)k+1WBd_Y3I3w~7h#C=L;o98%lqH29L66B(de`E`*Rff6ZJo3 zN8bNtnKS+vc5gfM=zl55XV(A1c>minl<`Mmsx9W!|M32@{@2X=pYeOfA0?V*n4|ys zpN409??*hw9R2>H8lD&UBc>RiPyOBqbNc@R z@eA=s7hF@nx5qX0`+8hczlUF_-+RNa)bDMpF literal 6408 zcmdVedtA+F8UXM^BheJ05lKYR2W5;kqnl3Xq6=dWa*Jwd!Z1uTMMa&KTvmgF(N^O& zG}4Gds#A@O7;_3WT`ZAO&U8dtql{#q=XsCS^uFz9|J?oa_xYaZ{r&o#=RB7r{la}D z5}jdFn)kVQSEoIe8g4S2es!gV_WF_b+C_W)Tzfr9d#&#D`DX3)`<>1k*Xepfr|VIj zt|x1+-`w8>uB9dAA%4!|vtmD)IirWu^GSZscC`(c8p>}b<+(p5FAI2x{z`3-1|BGz zWm84o^a#(BmQ?7bqL-DPI1lb`KX2O&^7>FWcD|hzdbOh77rk=HEH85Px*^PChoM&; z3wA(1e)-*O<|YT2hb%Gti=WwHC(?aHdDX05~WNFBojcmvxEX~Qo zuN$LF6eGSxpPivQgFK}BCi2Mce&}XTG-tr2E!(%pknidjfG#hx{?vJD%+TREy$YS~ zui3b@_w%VzL&XkD1OFG~D(_hI#kaE#g3C3d%$Ub|(DTcS4t7KLzBsBHJlb@w&n$`$ zwqUOMJsy2_zod)kNl&(UlYbWH!S4T22>PtLsqdhN8LT}^zB+BL_W72gf9z4P2R%k^ z{S*0`g;$vmp2p(e@kd`@{Vbft2k&L>JC5$(e@Q*M!alN$eC=|2KXO$@I=XlMl#}S= zUauZa9@O23;){EXL0^2&qX1luyQ>#7vM{$L z?wSSUKb&9uPpXfRp^012e>T>Ape%{AHC`S#hFq2KRQr7C)#Ue_4UsbES|4_ z=nB8}`^owGmx69%l?~r7)PF(!1DEHj_4CQK^{+$!J&_0fH&e$8>VvTV&H|Svjn?re z7xZ6nmz_V1jUpHHUv#?rKz$K-tfO+^2*`=p8Fg8mCGZHdo>-zS`}?Y|x7Kb<_B|K=YZMe_|m|D}Tm$^v43 zA?N2mbbtG(?fT^Pp;D|5CFO7DKlDl$>tgb^^Pf4J-+~>`kJrC?!knM~&@=QM*D&Yi zzdh*RCHvTt3+6xc?37+jjqdNM5A_e`U%~tb9#D1U++lJy|NS@f{hRs!%{~D2 zm-d0q^STARua4*8{P~}*JAZzn_VXR*`;PH%KL5sDw4eW9**|ohF#iWrF+LFETtr-qrrGyzV^t+x+|E{&6Kx!ko`PJU>J4D%3~F4>gy6Z2vfB z8%-|AKa9`b@YX2C0;7|1XwiWLmNxEy7k_+me z6V#8LV~)do4fR1#|IqD+4Tb#D(i&(smrU-~YO)o+Us3+$!TWz8 z@mxUfU$Fn8H)U*w`ByApgkwnr|c*~{>h{f~u591ZHS6hMwl%W&4MD{6KO+|INexo3qf2T+n~f z?Z!k6CKvQybXjOE`LH|YfOq&AVFM1dIhvLXBa=!nv{UgqJ2sz(>+5X{mALf5J zU(kQy{?eAMdp{-bYX6v91NA}p|7W&;EKY{~7@VK~*!~fbSwP;^{^4i|`ybA4|Nj5; zANP-1-xTKj{FjIGU(fe9Fz4n!wtpO1%l0FF{ Date: Thu, 12 May 2022 21:44:12 -0600 Subject: [PATCH 006/172] examples/fluids: add IJacobian for SUPG I think this is exact for Galerkin on channel.yaml and blasius.yaml. Specifically, SNES converges almost perfectly in this test than when -snes_mf_operator is added. -options_file examples/fluids/channel.yaml -snes_monitor -ksp_converged_reason -stab none -ts_max_steps 3 -newtonian_unit_tests -ksp_rtol 1e-10 -order 2 -ts_dt 3e-6 -pc_type lu I say "think" because something is fishy with the viscous energy term velocity*stress. If I disable that term, then convergence is clearly better with the analytic Jacobian (makes sense because it's accuracy is machine epsilon) than with -snes_mf_operator. This is unexpected, but the difference is near the limit of numerical stability so I'm not confident it's wrong. Also, the term itself is so simple. The Jacobian is inexact for SUPG in that Tau_d is frozen. WIP: examples/fluids: support MatShell and Blasius_Inflow_Jacobian --- examples/fluids/Makefile | 2 +- examples/fluids/navierstokes.c | 2 + examples/fluids/navierstokes.h | 24 ++- examples/fluids/problems/blasius.c | 8 + examples/fluids/problems/newtonian.c | 70 +++++++- examples/fluids/qfunctions/blasius.h | 149 ++++++++++++++-- examples/fluids/qfunctions/newtonian.h | 156 ++++++++++++++++- examples/fluids/qfunctions/newtonian_types.h | 5 + examples/fluids/src/setuplibceed.c | 173 +++++++++++++++++-- examples/fluids/src/setupts.c | 126 ++++++++++++++ 10 files changed, 673 insertions(+), 42 deletions(-) diff --git a/examples/fluids/Makefile b/examples/fluids/Makefile index c53eaaed22..359aaeb87c 100644 --- a/examples/fluids/Makefile +++ b/examples/fluids/Makefile @@ -25,7 +25,7 @@ CC = $(call pkgconf, --variable=ccompiler $(PETSc.pc) $(ceed.pc)) CFLAGS = -std=c99 \ $(call pkgconf, --variable=cflags_extra $(PETSc.pc)) \ $(call pkgconf, --cflags-only-other $(PETSc.pc)) \ - $(OPT) + $(OPT) $(OPT_EXAMPLES) CPPFLAGS = $(call pkgconf, --cflags-only-I $(PETSc.pc) $(ceed.pc)) \ $(call pkgconf, --variable=cflags_dep $(PETSc.pc)) LDFLAGS = $(call pkgconf, --libs-only-L --libs-only-other $(PETSc.pc) $(ceed.pc)) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 14b368f6b4..c55b42c735 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -288,6 +288,7 @@ int main(int argc, char **argv) { CeedVectorDestroy(&user->q_ceed); CeedVectorDestroy(&user->q_dot_ceed); CeedVectorDestroy(&user->g_ceed); + CeedVectorDestroy(&user->coo_values); // -- QFunctions CeedQFunctionDestroy(&ceed_data->qf_setup_vol); @@ -317,6 +318,7 @@ int main(int argc, char **argv) { CeedOperatorDestroy(&user->op_ifunction_vol); CeedOperatorDestroy(&user->op_rhs); CeedOperatorDestroy(&user->op_ifunction); + CeedOperatorDestroy(&user->op_ijacobian); // -- Ceed CeedDestroy(&ceed); diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index 9275ea4f38..546513b4bd 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -128,7 +128,9 @@ struct AppCtx_private { struct CeedData_private { CeedVector x_coord, q_data; CeedQFunction qf_setup_vol, qf_ics, qf_rhs_vol, qf_ifunction_vol, - qf_setup_sur, qf_apply_inflow, qf_apply_outflow; + qf_setup_sur, + qf_apply_inflow, qf_apply_inflow_jacobian, + qf_apply_outflow, qf_apply_outflow_jacobian; CeedBasis basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur; CeedElemRestriction elem_restr_x, elem_restr_q, elem_restr_qd_i; CeedOperator op_setup_vol, op_ics; @@ -145,8 +147,9 @@ struct User_private { Vec M; Physics phys; AppCtx app_ctx; - CeedVector q_ceed, q_dot_ceed, g_ceed; - CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction; + CeedVector q_ceed, q_dot_ceed, g_ceed, coo_values; + CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction, op_ijacobian; + bool matrices_set_up; }; // Units @@ -189,6 +192,7 @@ struct Physics_private { CeedContextFieldLabel solution_time_label; CeedContextFieldLabel timestep_size_label; CeedContextFieldLabel ics_time_label; + CeedContextFieldLabel ijacobian_time_shift_label; }; typedef struct { @@ -201,10 +205,11 @@ typedef struct { // *INDENT-OFF* typedef struct ProblemData_private ProblemData; struct ProblemData_private { - CeedInt dim, q_data_size_vol, q_data_size_sur; + CeedInt dim, q_data_size_vol, q_data_size_sur, jac_data_size_sur; CeedScalar dm_scale; ProblemQFunctionSpec setup_vol, setup_sur, ics, apply_vol_rhs, apply_vol_ifunction, - apply_inflow, apply_outflow; + apply_vol_ijacobian, apply_inflow, apply_outflow, + apply_inflow_jacobian, apply_outflow_jacobian; bool non_zero_time; PetscErrorCode (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt, PetscScalar[], void *); @@ -274,9 +279,12 @@ PetscErrorCode GetRestrictionForDomain(Ceed ceed, DM dm, CeedInt height, // Utility function to create CEED Composite Operator for the entire domain PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedData ceed_data, Physics phys, - CeedOperator op_apply_vol, CeedInt height, - CeedInt P_sur, CeedInt Q_sur, CeedInt q_data_size_sur, - CeedOperator *op_apply); + CeedOperator op_apply_vol, + CeedOperator op_apply_ijacobian_vol, + CeedInt height, + CeedInt P_sur, CeedInt Q_sur, + CeedInt q_data_size_sur, CeedInt jac_data_size_sur, + CeedOperator *op_apply, CeedOperator *op_apply_ijacobian); PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, AppCtx app_ctx, ProblemData *problem, SimpleBC bc); diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 4d5d20b0b4..54fa01e6ae 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -94,8 +94,12 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction_loc = ICsBlasius_loc; problem->apply_inflow.qfunction = Blasius_Inflow; problem->apply_inflow.qfunction_loc = Blasius_Inflow_loc; + problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; + problem->apply_inflow_jacobian.qfunction_loc = Blasius_Inflow_Jacobian_loc; problem->apply_outflow.qfunction = Blasius_Outflow; problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc; + problem->apply_outflow_jacobian.qfunction = Blasius_Outflow_Jacobian; + problem->apply_outflow_jacobian.qfunction_loc = Blasius_Outflow_Jacobian_loc; // CeedScalar mu = .04; // Pa s, dynamic viscosity CeedScalar Uinf = 40; // m/s @@ -169,7 +173,11 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction_context = blasius_context; CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_inflow.qfunction_context); + CeedQFunctionContextReferenceCopy(blasius_context, + &problem->apply_inflow_jacobian.qfunction_context); CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_outflow.qfunction_context); + CeedQFunctionContextReferenceCopy(blasius_context, + &problem->apply_outflow_jacobian.qfunction_context); PetscFunctionReturn(0); } diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 04b52e144f..e8ce66fd36 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -12,13 +12,66 @@ #include "../qfunctions/setupgeo.h" #include "../qfunctions/newtonian.h" +// Compute relative error |a - b|/|s| +static PetscErrorCode CheckPrimitiveWithTolerance(StatePrimitive sY, + StatePrimitive aY, StatePrimitive bY, const char *name, PetscReal rtol_pressure, + PetscReal rtol_velocity, PetscReal rtol_temperature) { + PetscFunctionBeginUser; + StatePrimitive eY; // relative error + eY.pressure = (aY.pressure - bY.pressure) / sY.pressure; + PetscScalar u = sqrt(Square(sY.velocity[0]) + Square(sY.velocity[1]) + Square( + sY.velocity[2])); + for (int j=0; j<3; j++) eY.velocity[j] = (aY.velocity[j] - bY.velocity[j]) / u; + eY.temperature = (aY.temperature - bY.temperature) / sY.temperature; + if (fabs(eY.pressure) > rtol_pressure) printf("%s: pressure error %g\n", name, + eY.pressure); + for (int j=0; j<3; + j++) if (fabs(eY.velocity[j]) > rtol_velocity) + printf("%s: velocity[%d] error %g\n", name, j, eY.velocity[j]); + if (fabs(eY.temperature) > rtol_temperature) + printf("%s: temperature error %g\n", name, eY.temperature); + PetscFunctionReturn(0); +} + +static PetscErrorCode UnitTests_Newtonian(User user, + NewtonianIdealGasContext gas) { + Units units = user->units; + const CeedScalar eps = 1e-6; + const CeedScalar kg = units->kilogram, m = units->meter, sec = units->second, + Pascal = units->Pascal; + + PetscFunctionBeginUser; + const CeedScalar rho = 1.2 * kg / (m*m*m), u = 40 * m/sec; + CeedScalar U[5] = {rho, rho*u, rho *u*1.1, rho *u*1.2, 250e3*Pascal + .5*rho *u*u}; + const CeedScalar x[3] = {.1, .2, .3}; + State s = StateFromU(gas, U, x); + for (int i=0; i<8; i++) { + CeedScalar dU[5] = {0}, dx[3] = {0}; + if (i < 5) dU[i] = U[i]; + else dx[i-5] = x[i-5]; + State ds = StateFromU_fwd(gas, s, dU, x, dx); + for (int j=0; j<5; j++) dU[j] = (1 + eps * (i == j)) * U[j]; + for (int j=0; j<3; j++) dx[j] = (1 + eps * (i == 5 + j)) * x[j]; + State t = StateFromU(gas, dU, dx); + StatePrimitive dY; + dY.pressure = (t.Y.pressure - s.Y.pressure) / eps; + for (int j=0; j<3; j++) + dY.velocity[j] = (t.Y.velocity[j] - s.Y.velocity[j]) / eps; + dY.temperature = (t.Y.temperature - s.Y.temperature) / eps; + char buf[128]; + snprintf(buf, sizeof buf, "StateFromU_fwd i=%d", i); + PetscCall(CheckPrimitiveWithTolerance(dY, ds.Y, dY, buf, 5e-6, 1e-6, 1e-6)); + } + PetscFunctionReturn(0); +} + PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { SetupContext setup_context; User user = *(User *)ctx; StabilizationType stab; MPI_Comm comm = PETSC_COMM_WORLD; PetscBool implicit; - PetscBool has_curr_time = PETSC_FALSE; + PetscBool has_curr_time = PETSC_FALSE, unit_tests; PetscInt ierr; NewtonianIdealGasContext newtonian_ig_ctx; CeedQFunctionContext newtonian_ig_context; @@ -33,6 +86,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->dim = 3; problem->q_data_size_vol = 10; problem->q_data_size_sur = 4; + problem->jac_data_size_sur = 5; problem->setup_vol.qfunction = Setup; problem->setup_vol.qfunction_loc = Setup_loc; problem->ics.qfunction = ICsNewtonianIG; @@ -43,6 +97,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; + problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; + problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; problem->bc = NULL; problem->bc_ctx = setup_context; problem->non_zero_time = PETSC_FALSE; @@ -116,6 +172,9 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation", NULL, implicit=PETSC_FALSE, &implicit, NULL); CHKERRQ(ierr); + ierr = PetscOptionsBool("-newtonian_unit_tests", "Run Newtonian unit tests", + NULL, unit_tests=PETSC_FALSE, &unit_tests, NULL); + CHKERRQ(ierr); // -- Units ierr = PetscOptionsScalar("-units_meter", "1 meter in scaled length units", @@ -214,9 +273,18 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { FreeContextPetsc); CeedQFunctionContextRegisterDouble(newtonian_ig_context, "timestep size", offsetof(struct NewtonianIdealGasContext_, dt), 1, "Size of timestep, delta t"); + CeedQFunctionContextRegisterDouble(newtonian_ig_context, "ijacobian time shift", + offsetof(struct NewtonianIdealGasContext_, ijacobian_time_shift), 1, + "Shift for mass matrix in IJacobian"); problem->apply_vol_rhs.qfunction_context = newtonian_ig_context; CeedQFunctionContextReferenceCopy(newtonian_ig_context, &problem->apply_vol_ifunction.qfunction_context); + CeedQFunctionContextReferenceCopy(newtonian_ig_context, + &problem->apply_vol_ijacobian.qfunction_context); + + if (unit_tests) { + PetscCall(UnitTests_Newtonian(user, newtonian_ig_ctx)); + } PetscFunctionReturn(0); } diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 99279ae68d..7c86e20ccc 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -216,16 +216,12 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, // Temperature is being set weakly (theta0) and for constant cv this sets E_internal E_internal = rho * cv * theta0; // Find pressure using - P=rho*Rd*theta0; // interior rho with exterior T - E_kinetic = .5 * rho * (velocity[0]*velocity[0] + - velocity[1]*velocity[1] + - velocity[2]*velocity[2]); + P = rho*Rd*theta0; // interior rho with exterior T + E_kinetic = .5 * rho * Dot3(velocity, velocity); } else { // Fixing rho weakly on the inflow to a value consistent with theta0 and P0 rho = rho_0; - E_kinetic = .5 * rho * (velocity[0]*velocity[0] + - velocity[1]*velocity[1] + - velocity[2]*velocity[2]); + E_kinetic = .5 * rho * Dot3(velocity, velocity); E_internal = q[4][i] - E_kinetic; // uses set rho and u but E from solution P = E_internal * (gamma - 1.); } @@ -240,9 +236,7 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, // Zero v so all future terms can safely sum into it for (int j=0; j<5; j++) v[j][i] = 0.; - const CeedScalar u_normal = norm[0]*velocity[0] + - norm[1]*velocity[1] + - norm[2]*velocity[2]; + const CeedScalar u_normal = Dot3(norm, velocity); // The Physics // -- Density @@ -262,6 +256,80 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, return 0; } +CEED_QFUNCTION(Blasius_Inflow_Jacobian)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], + (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // *INDENT-ON* + const BlasiusContext context = (BlasiusContext)ctx; + const bool implicit = context->implicit; + const CeedScalar mu = context->newtonian_ctx.mu; + const CeedScalar cv = context->newtonian_ctx.cv; + const CeedScalar cp = context->newtonian_ctx.cp; + const CeedScalar Rd = cp - cv; + const CeedScalar gamma = cp/cv; + + const CeedScalar theta0 = context->theta0; + const CeedScalar P0 = context->P0; + const CeedScalar delta0 = context->delta0; + const CeedScalar Uinf = context->Uinf; + const bool weakT = context->weakT; + const CeedScalar rho_0 = P0 / (Rd * theta0); + const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; inewtonian_ctx); + + // enabling user to choose between weak T and weak rho inflow + CeedScalar drho, dE, dP; + if (weakT) { + // rho should be from the current solution + drho = dq[0][i]; + CeedScalar dE_internal = drho * cv * theta0; + CeedScalar dE_kinetic = .5 * drho * Dot3(velocity, velocity); + dE = dE_internal + dE_kinetic; + dP = drho * Rd * theta0; // interior rho with exterior T + } else { // rho specified, E_internal from solution + drho = 0; + dE = dq[4][i]; + dP = dE * (gamma - 1.); + } + const CeedScalar norm[3] = {q_data_sur[1][i], + q_data_sur[2][i], + q_data_sur[3][i] + }; + + const CeedScalar u_normal = Dot3(norm, velocity); + + v[0][i] = - wdetJb * drho * u_normal; + for (int j=0; j<3; j++) + v[j+1][i] = -wdetJb * (drho * u_normal * velocity[j] + norm[j] * dP); + v[4][i] = - wdetJb * u_normal * (dE + dP); + } // End Quadrature Point Loop + return 0; +} + // ***************************************************************************** CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, const CeedScalar *const *in, @@ -272,7 +340,8 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1]; // *INDENT-ON* const BlasiusContext context = (BlasiusContext)ctx; @@ -319,8 +388,7 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, // Implementing outflow condition const CeedScalar P = P0; // pressure - const CeedScalar u_normal = norm[0]*u[0] + norm[1]*u[1] + - norm[2]*u[2]; // Normal velocity + const CeedScalar u_normal = Dot3(norm, u); // Normal velocity // Calculate prescribed outflow traction values const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; @@ -341,7 +409,62 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, v[4][i] -= wdetJb * u_normal * (E + P); v[4][i] += wdetJb * t12 * velocity[1]; + // Save values for Jacobian + jac_data_sur[0][i] = rho; + jac_data_sur[1][i] = u[0]; + jac_data_sur[2][i] = u[1]; + jac_data_sur[3][i] = u[2]; + jac_data_sur[4][i] = E; } // End Quadrature Point Loop return 0; } + +CEED_QFUNCTION(Blasius_Outflow_Jacobian)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], + (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // *INDENT-ON* + + const BlasiusContext context = (BlasiusContext)ctx; + const bool implicit = context->implicit; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; iP0; + const CeedScalar dP = 0; + + v[0][i] = -wdetJb * dmomentum_normal; + for (int j=0; j<3; j++) + v[j+1][i] = -wdetJb * (dmomentum_normal * u[j] + rho * u_normal * du[j]); + v[4][i] = -wdetJb * (du_normal * (E + P) + u_normal * (dE + dP)); + } // End Quadrature Point Loop + return 0; +} + #endif // blasius_h diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 6d56867651..5d66a4c85b 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -37,11 +37,6 @@ typedef struct { StatePrimitive Y; } State; -CEED_QFUNCTION_HELPER CeedScalar Dot3(const CeedScalar u[3], - const CeedScalar v[3]) { - return u[0]*v[0] + u[1]*v[1] + u[2]*v[2]; -} - CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative( NewtonianIdealGasContext gas, StateConservative U, const CeedScalar x[3]) { StatePrimitive Y; @@ -165,6 +160,18 @@ CEED_QFUNCTION_HELPER void ViscousEnergyFlux(NewtonianIdealGasContext gas, } } +CEED_QFUNCTION_HELPER void ViscousEnergyFlux_fwd(NewtonianIdealGasContext gas, + StatePrimitive Y, StatePrimitive dY, const State grad_ds[3], + const CeedScalar stress[3][3], + const CeedScalar dstress[3][3], + CeedScalar dFe[3]) { + for (int i=0; i<3; i++) { + dFe[i] = - Y.velocity[0] * dstress[0][i] - dY.velocity[0] * stress[0][i] + - Y.velocity[1] * dstress[1][i] - dY.velocity[1] * stress[1][i] + - Y.velocity[2] * dstress[2][i] - dY.velocity[2] * stress[2][i] + - gas->k * grad_ds[i].Y.temperature; + } +} // ***************************************************************************** // Helper function for computing flux Jacobian // ***************************************************************************** @@ -741,12 +748,149 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, break; } for (int j=0; j<5; j++) jac_data[j][i] = U[j]; - for (int j=0; j<3; j++) jac_data[5+j][i] = Tau_d[j]; + for (int j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; + for (int j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j]; } // End Quadrature Point Loop // Return return 0; } + +CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + // *INDENT-ON* + // Context + NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + const CeedScalar *g = context->g; + const CeedScalar cp = context->cp; + const CeedScalar cv = context->cv; + const CeedScalar Rd = cp - cv; + const CeedScalar gamma = cp / cv; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; iijacobian_time_shift * dU[j] - dbody_force[j]); + + if (1) { + CeedScalar jacob_F_conv[3][5][5] = {0}; + computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, + gamma, g, x_i); + CeedScalar grad_dU[5][3]; + for (int j=0; j<3; j++) { + grad_dU[0][j] = grad_ds[j].U.density; + for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k]; + grad_dU[4][j] = grad_ds[j].U.E_total; + } + CeedScalar dstrong_conv[5] = {0}; + for (int j=0; j<3; j++) + for (int k=0; k<5; k++) + for (int l=0; l<5; l++) + dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j]; + CeedScalar dstrong_res[5]; + for (int j=0; j<5; j++) + dstrong_res[j] = context->ijacobian_time_shift * dU[j] + dstrong_conv[j] - + dbody_force[j]; + CeedScalar dtau_strong_res[5] = {0.}, dtau_strong_res_conservative[5] = {0}; + dtau_strong_res[0] = Tau_d[0] * dstrong_res[0]; + dtau_strong_res[1] = Tau_d[1] * dstrong_res[1]; + dtau_strong_res[2] = Tau_d[1] * dstrong_res[2]; + dtau_strong_res[3] = Tau_d[1] * dstrong_res[3]; + dtau_strong_res[4] = Tau_d[2] * dstrong_res[4]; + PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, + dtau_strong_res, dtau_strong_res_conservative); + CeedScalar dstab[5][3] = {0}; + for (int j=0; j<3; j++) + for (int k=0; k<5; k++) + for (int l=0; l<5; l++) + dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l]; + for (int j=0; j<5; j++) + for (int k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + + dstab[j][1] * dXdx[k][1] + + dstab[j][2] * dXdx[k][2]); + + } + } // End Quadrature Point Loop + return 0; +} // ***************************************************************************** #endif // newtonian_h diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 31c198f38d..a548d5bd96 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -40,9 +40,14 @@ struct NewtonianIdealGasContext_ { CeedScalar Ctau_M; CeedScalar Ctau_E; CeedScalar dt; + CeedScalar ijacobian_time_shift; StabilizationType stabilization; }; CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; } +CEED_QFUNCTION_HELPER CeedScalar Dot3(const CeedScalar u[3], + const CeedScalar v[3]) { + return u[0]*v[0] + u[1]*v[1] + u[2]*v[2]; +} #endif // newtonian_types_h diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index ec2a15b787..7031149bbc 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -76,9 +76,12 @@ PetscErrorCode GetRestrictionForDomain(Ceed ceed, DM dm, CeedInt height, // Utility function to create CEED Composite Operator for the entire domain PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedData ceed_data, Physics phys, - CeedOperator op_apply_vol, CeedInt height, - CeedInt P_sur, CeedInt Q_sur, CeedInt q_data_size_sur, - CeedOperator *op_apply) { + CeedOperator op_apply_vol, + CeedOperator op_apply_ijacobian_vol, + CeedInt height, + CeedInt P_sur, CeedInt Q_sur, + CeedInt q_data_size_sur, CeedInt jac_data_size_sur, + CeedOperator *op_apply, CeedOperator *op_apply_ijacobian) { //CeedInt dim; DMLabel domain_label; PetscErrorCode ierr; @@ -86,9 +89,13 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, // Create Composite Operaters CeedCompositeOperatorCreate(ceed, op_apply); + if (op_apply_ijacobian) + CeedCompositeOperatorCreate(ceed, op_apply_ijacobian); // --Apply Sub-Operator for the volume CeedCompositeOperatorAddSub(*op_apply, op_apply_vol); + if (op_apply_ijacobian) + CeedCompositeOperatorAddSub(*op_apply_ijacobian, op_apply_ijacobian_vol); // -- Create Sub-Operator for in/outflow BCs if (phys->has_neumann || 1) { @@ -103,7 +110,8 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, // --- Create Sub-Operator for inflow boundaries for (CeedInt i=0; i < bc->num_inflow; i++) { CeedVector q_data_sur; - CeedOperator op_setup_sur, op_apply_inflow; + CeedOperator op_setup_sur, op_apply_inflow, + op_apply_inflow_jacobian = NULL; CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_qd_i_sur; // ---- CEED Restriction @@ -140,12 +148,28 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedOperatorSetField(op_apply_inflow, "v", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + if (ceed_data->qf_apply_inflow_jacobian) { + CeedOperatorCreate(ceed, ceed_data->qf_apply_inflow_jacobian, NULL, NULL, + &op_apply_inflow_jacobian); + CeedOperatorSetField(op_apply_inflow_jacobian, "dq", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_inflow_jacobian, "surface qdata", + elem_restr_qd_i_sur, + CEED_BASIS_COLLOCATED, q_data_sur); + CeedOperatorSetField(op_apply_inflow_jacobian, "x", elem_restr_x_sur, + ceed_data->basis_x_sur, ceed_data->x_coord); + CeedOperatorSetField(op_apply_inflow_jacobian, "v", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + } + // ----- Apply CEED operator for Setup CeedOperatorApply(op_setup_sur, ceed_data->x_coord, q_data_sur, CEED_REQUEST_IMMEDIATE); // ----- Apply Sub-Operator for Physics CeedCompositeOperatorAddSub(*op_apply, op_apply_inflow); + if (op_apply_ijacobian) + CeedCompositeOperatorAddSub(*op_apply_ijacobian, op_apply_inflow_jacobian); // ----- Cleanup CeedVectorDestroy(&q_data_sur); @@ -154,19 +178,26 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedElemRestrictionDestroy(&elem_restr_qd_i_sur); CeedOperatorDestroy(&op_setup_sur); CeedOperatorDestroy(&op_apply_inflow); + CeedOperatorDestroy(&op_apply_inflow_jacobian); } // --- Create Sub-Operator for outflow boundaries for (CeedInt i=0; i < bc->num_outflow; i++) { - CeedVector q_data_sur; - CeedOperator op_setup_sur, op_apply_outflow; - CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_qd_i_sur; + CeedVector q_data_sur, jac_data_sur; + CeedOperator op_setup_sur, op_apply_outflow, + op_apply_outflow_jacobian = NULL; + CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_qd_i_sur, + elem_restr_jd_i_sur; // ---- CEED Restriction ierr = GetRestrictionForDomain(ceed, dm, height, domain_label, bc->outflows[i], Q_sur, q_data_size_sur, &elem_restr_q_sur, &elem_restr_x_sur, &elem_restr_qd_i_sur); CHKERRQ(ierr); + ierr = GetRestrictionForDomain(ceed, dm, height, domain_label, bc->outflows[i], + Q_sur, jac_data_size_sur, NULL, NULL, + &elem_restr_jd_i_sur); + CHKERRQ(ierr); // ---- CEED Vector PetscInt loc_num_elem_sur; @@ -174,6 +205,8 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedVectorCreate(ceed, q_data_size_sur*loc_num_elem_sur*num_qpts_sur, &q_data_sur); + CeedElemRestrictionCreateVector(elem_restr_jd_i_sur, &jac_data_sur, NULL); + // ---- CEED Operator // ----- CEED Operator for Setup (geometric factors) CeedOperatorCreate(ceed, ceed_data->qf_setup_sur, NULL, NULL, &op_setup_sur); @@ -195,6 +228,24 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, ceed_data->basis_x_sur, ceed_data->x_coord); CeedOperatorSetField(op_apply_outflow, "v", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_outflow, "surface jacobian data", + elem_restr_jd_i_sur, + CEED_BASIS_COLLOCATED, jac_data_sur); + + if (ceed_data->qf_apply_outflow_jacobian) { + CeedOperatorCreate(ceed, ceed_data->qf_apply_outflow_jacobian, NULL, NULL, + &op_apply_outflow_jacobian); + CeedOperatorSetField(op_apply_outflow_jacobian, "dq", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_outflow_jacobian, "surface qdata", + elem_restr_qd_i_sur, + CEED_BASIS_COLLOCATED, q_data_sur); + CeedOperatorSetField(op_apply_outflow_jacobian, "surface jacobian data", + elem_restr_jd_i_sur, + CEED_BASIS_COLLOCATED, jac_data_sur); + CeedOperatorSetField(op_apply_outflow_jacobian, "v", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + } // ----- Apply CEED operator for Setup CeedOperatorApply(op_setup_sur, ceed_data->x_coord, q_data_sur, @@ -202,14 +253,19 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, // ----- Apply Sub-Operator for Physics CeedCompositeOperatorAddSub(*op_apply, op_apply_outflow); + if (op_apply_ijacobian) + CeedCompositeOperatorAddSub(*op_apply_ijacobian, op_apply_outflow_jacobian); // ----- Cleanup CeedVectorDestroy(&q_data_sur); + CeedVectorDestroy(&jac_data_sur); CeedElemRestrictionDestroy(&elem_restr_q_sur); CeedElemRestrictionDestroy(&elem_restr_x_sur); CeedElemRestrictionDestroy(&elem_restr_qd_i_sur); + CeedElemRestrictionDestroy(&elem_restr_jd_i_sur); CeedOperatorDestroy(&op_setup_sur); CeedOperatorDestroy(&op_apply_outflow); + CeedOperatorDestroy(&op_apply_outflow_jacobian); } } @@ -234,7 +290,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, const CeedInt dim = problem->dim, num_comp_x = problem->dim, q_data_size_vol = problem->q_data_size_vol, - jac_data_size_vol = num_comp_q + 3, + jac_data_size_vol = num_comp_q + 6 + 3, P = app_ctx->degree + 1, Q = P + app_ctx->q_extra; CeedElemRestriction elem_restr_jd_i; @@ -338,6 +394,29 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, jac_data_size_vol, CEED_EVAL_NONE); } + CeedQFunction qf_ijacobian_vol = NULL; + if (problem->apply_vol_ijacobian.qfunction) { + CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_ijacobian.qfunction, + problem->apply_vol_ijacobian.qfunction_loc, &qf_ijacobian_vol); + CeedQFunctionSetContext(qf_ijacobian_vol, + problem->apply_vol_ijacobian.qfunction_context); + CeedQFunctionContextDestroy(&problem->apply_vol_ijacobian.qfunction_context); + CeedQFunctionAddInput(qf_ijacobian_vol, "dq", num_comp_q, + CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_ijacobian_vol, "Grad_dq", num_comp_q*dim, + CEED_EVAL_GRAD); + CeedQFunctionAddInput(qf_ijacobian_vol, "qdata", q_data_size_vol, + CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_ijacobian_vol, "x", num_comp_x, + CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_ijacobian_vol, "jac_data", + jac_data_size_vol, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_ijacobian_vol, "v", num_comp_q, + CEED_EVAL_INTERP); + CeedQFunctionAddOutput(qf_ijacobian_vol, "Grad_v", num_comp_q*dim, + CEED_EVAL_GRAD); + } + // --------------------------------------------------------------------------- // Element coordinates // --------------------------------------------------------------------------- @@ -432,6 +511,28 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, user->op_ifunction_vol = op; } + CeedOperator op_ijacobian_vol = NULL; + if (qf_ijacobian_vol) { + CeedOperator op; + CeedOperatorCreate(ceed, qf_ijacobian_vol, NULL, NULL, &op); + CeedOperatorSetField(op, "dq", ceed_data->elem_restr_q, ceed_data->basis_q, + CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op, "Grad_dq", ceed_data->elem_restr_q, ceed_data->basis_q, + CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op, "qdata", ceed_data->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, ceed_data->q_data); + CeedOperatorSetField(op, "x", ceed_data->elem_restr_x, ceed_data->basis_x, + ceed_data->x_coord); + CeedOperatorSetField(op, "jac_data", elem_restr_jd_i, + CEED_BASIS_COLLOCATED, jac_data); + CeedOperatorSetField(op, "v", ceed_data->elem_restr_q, ceed_data->basis_q, + CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op, "Grad_v", ceed_data->elem_restr_q, ceed_data->basis_q, + CEED_VECTOR_ACTIVE); + op_ijacobian_vol = op; + CeedQFunctionDestroy(&qf_ijacobian_vol); + } + // ***************************************************************************** // Set up CEED objects for the exterior domain (surface) // ***************************************************************************** @@ -439,7 +540,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, dim_sur = dim - height, P_sur = app_ctx->degree + 1, Q_sur = P_sur + app_ctx->q_extra; - const CeedInt q_data_size_sur = problem->q_data_size_sur; + const CeedInt q_data_size_sur = problem->q_data_size_sur, + jac_data_size_sur = problem->jac_data_size_sur; // ----------------------------------------------------------------------------- // CEED Bases @@ -483,6 +585,22 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionAddOutput(ceed_data->qf_apply_inflow, "v", num_comp_q, CEED_EVAL_INTERP); } + if (problem->apply_inflow_jacobian.qfunction) { + CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow_jacobian.qfunction, + problem->apply_inflow_jacobian.qfunction_loc, + &ceed_data->qf_apply_inflow_jacobian); + CeedQFunctionSetContext(ceed_data->qf_apply_inflow_jacobian, + problem->apply_inflow_jacobian.qfunction_context); + CeedQFunctionContextDestroy(&problem->apply_inflow_jacobian.qfunction_context); + CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "dq", num_comp_q, + CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "surface qdata", + q_data_size_sur, CEED_EVAL_NONE); + CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "x", num_comp_x, + CEED_EVAL_INTERP); + CeedQFunctionAddOutput(ceed_data->qf_apply_inflow_jacobian, "v", num_comp_q, + CEED_EVAL_INTERP); + } // -- Creat QFunction for outflow boundaries if (problem->apply_outflow.qfunction) { @@ -499,6 +617,26 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CEED_EVAL_INTERP); CeedQFunctionAddOutput(ceed_data->qf_apply_outflow, "v", num_comp_q, CEED_EVAL_INTERP); + CeedQFunctionAddOutput(ceed_data->qf_apply_outflow, "surface jacobian data", + jac_data_size_sur, + CEED_EVAL_NONE); + } + if (problem->apply_outflow_jacobian.qfunction) { + CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow_jacobian.qfunction, + problem->apply_outflow_jacobian.qfunction_loc, + &ceed_data->qf_apply_outflow_jacobian); + CeedQFunctionSetContext(ceed_data->qf_apply_outflow_jacobian, + problem->apply_outflow_jacobian.qfunction_context); + CeedQFunctionContextDestroy(&problem->apply_outflow_jacobian.qfunction_context); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "dq", num_comp_q, + CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "surface qdata", + q_data_size_sur, CEED_EVAL_NONE); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, + "surface jacobian data", + jac_data_size_sur, CEED_EVAL_NONE); + CeedQFunctionAddOutput(ceed_data->qf_apply_outflow_jacobian, "v", num_comp_q, + CEED_EVAL_INTERP); } // ***************************************************************************** @@ -511,12 +649,21 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, // -- Create and apply CEED Composite Operator for the entire domain if (!user->phys->implicit) { // RHS ierr = CreateOperatorForDomain(ceed, dm, bc, ceed_data, user->phys, - user->op_rhs_vol, height, P_sur, Q_sur, - q_data_size_sur, &user->op_rhs); CHKERRQ(ierr); + user->op_rhs_vol, NULL, height, P_sur, Q_sur, + q_data_size_sur, 0, + &user->op_rhs, NULL); CHKERRQ(ierr); } else { // IFunction ierr = CreateOperatorForDomain(ceed, dm, bc, ceed_data, user->phys, - user->op_ifunction_vol, height, P_sur, Q_sur, - q_data_size_sur, &user->op_ifunction); CHKERRQ(ierr); + user->op_ifunction_vol, op_ijacobian_vol, + height, P_sur, Q_sur, + q_data_size_sur, jac_data_size_sur, + &user->op_ifunction, + op_ijacobian_vol ? &user->op_ijacobian : NULL); CHKERRQ(ierr); + if (user->op_ijacobian) { + CeedOperatorContextGetFieldLabel(user->op_ijacobian, "ijacobian time shift", + &user->phys->ijacobian_time_shift_label); + } + } CeedElemRestrictionDestroy(&elem_restr_jd_i); CeedVectorDestroy(&jac_data); diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 8bc6dedb1e..28ce4a409b 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -214,6 +214,129 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G, PetscFunctionReturn(0); } +static PetscErrorCode MatMult_NS_IJacobian(Mat J, Vec Q, Vec G) { + User user; + const PetscScalar *q; + PetscScalar *g; + Vec Q_loc, G_loc; + PetscMemType q_mem_type, g_mem_type; + PetscErrorCode ierr; + PetscFunctionBeginUser; + MatShellGetContext(J, &user); + // Get local vectors + ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); + ierr = DMGetLocalVector(user->dm, &G_loc); CHKERRQ(ierr); + + // Global-to-local + ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); + ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); + ierr = VecZeroEntries(G_loc); CHKERRQ(ierr); + + // Place PETSc vectors in CEED vectors + ierr = VecGetArrayReadAndMemType(Q_loc, &q, &q_mem_type); CHKERRQ(ierr); + ierr = VecGetArrayAndMemType(G_loc, &g, &g_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(user->q_ceed, MemTypeP2C(q_mem_type), CEED_USE_POINTER, + (PetscScalar *)q); + CeedVectorSetArray(user->g_ceed, MemTypeP2C(g_mem_type), CEED_USE_POINTER, g); + + // Apply CEED operator + CeedOperatorApply(user->op_ijacobian, user->q_ceed, user->g_ceed, + CEED_REQUEST_IMMEDIATE); + + // Restore vectors + CeedVectorTakeArray(user->q_ceed, MemTypeP2C(q_mem_type), NULL); + CeedVectorTakeArray(user->g_ceed, MemTypeP2C(g_mem_type), NULL); + ierr = VecRestoreArrayReadAndMemType(Q_loc, &q); CHKERRQ(ierr); + ierr = VecRestoreArrayAndMemType(G_loc, &g); CHKERRQ(ierr); + + // Local-to-Global + ierr = VecZeroEntries(G); CHKERRQ(ierr); + ierr = DMLocalToGlobal(user->dm, G_loc, ADD_VALUES, G); CHKERRQ(ierr); + + // Restore vectors + ierr = DMRestoreLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); + ierr = DMRestoreLocalVector(user->dm, &G_loc); CHKERRQ(ierr); + PetscFunctionReturn(0); +} + +PetscErrorCode MatGetDiagonal_NS_IJacobian(Mat A, Vec D) { + User user; + Vec D_loc; + PetscScalar *d; + PetscMemType mem_type; + + PetscFunctionBeginUser; + MatShellGetContext(A, &user); + PetscCall(DMGetLocalVector(user->dm, &D_loc)); + PetscCall(VecGetArrayAndMemType(D_loc, &d, &mem_type)); + CeedVectorSetArray(user->g_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, d); + CeedOperatorLinearAssembleDiagonal(user->op_ijacobian, user->g_ceed, + CEED_REQUEST_IMMEDIATE); + CeedVectorTakeArray(user->g_ceed, MemTypeP2C(mem_type), NULL); + PetscCall(VecRestoreArrayAndMemType(D_loc, &d)); + PetscCall(VecZeroEntries(D)); + PetscCall(DMLocalToGlobal(user->dm, D_loc, ADD_VALUES, D)); + PetscCall(DMRestoreLocalVector(user->dm, &D_loc)); + VecViewFromOptions(D, NULL, "-diag_vec_view"); + PetscFunctionReturn(0); +} + +PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, + PetscReal shift, Mat J, Mat J_pre, + void *user_data) { + User user = *(User *)user_data; + PetscBool J_is_shell, J_pre_is_shell; + PetscFunctionBeginUser; + if (user->phys->ijacobian_time_shift_label) + CeedOperatorContextSetDouble(user->op_ijacobian, + user->phys->ijacobian_time_shift_label, &shift); + PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); + PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); + Vec coo_vec = NULL; + PetscCall(PetscObjectTypeCompare((PetscObject)J, MATSHELL, &J_is_shell)); + PetscCall(PetscObjectTypeCompare((PetscObject)J_pre, MATSHELL, + &J_pre_is_shell)); + if (!user->matrices_set_up) { + if (J_is_shell) { + PetscCall(MatShellSetContext(J, user)); + PetscCall(MatShellSetOperation(J, MATOP_MULT, + (void (*)(void))MatMult_NS_IJacobian)); + PetscCall(MatShellSetOperation(J, MATOP_GET_DIAGONAL, + (void (*)(void))MatGetDiagonal_NS_IJacobian)); + PetscCall(MatSetUp(J)); + } + if (!J_pre_is_shell) { + PetscCount ncoo; + PetscInt *rows, *cols; + PetscCall(CeedOperatorLinearAssembleSymbolic(user->op_ijacobian, &ncoo, &rows, + &cols)); + PetscCall(MatSetPreallocationCOOLocal(J_pre, ncoo, rows, cols)); + free(rows); + free(cols); + CeedVectorCreate(user->ceed, ncoo, &user->coo_values); + user->matrices_set_up = true; + VecCreateSeq(PETSC_COMM_WORLD, ncoo, &coo_vec); + } + } + if (!J_pre_is_shell) { + CeedMemType mem_type = CEED_MEM_HOST; + const PetscScalar *values; + MatType mat_type; + PetscCall(MatGetType(J_pre, &mat_type)); + //if (strstr(mat_type, "kokkos") || strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; + CeedOperatorLinearAssemble(user->op_ijacobian, user->coo_values); + CeedVectorGetArrayRead(user->coo_values, mem_type, &values); + if (coo_vec) { + VecPlaceArray(coo_vec, values); + VecViewFromOptions(coo_vec, NULL, "-coo_vec_view"); + VecDestroy(&coo_vec); + } + PetscCall(MatSetValuesCOO(J_pre, values, INSERT_VALUES)); + CeedVectorRestoreArrayRead(user->coo_values, &values); + } + PetscFunctionReturn(0); +} + // User provided TS Monitor PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time, Vec Q, void *ctx) { @@ -313,6 +436,9 @@ PetscErrorCode TSSolve_NS(DM dm, User user, AppCtx app_ctx, Physics phys, } else { // Implicit integrators can fall back to using an RHSFunction ierr = TSSetRHSFunction(*ts, NULL, RHS_NS, &user); CHKERRQ(ierr); } + if (user->op_ijacobian) { + ierr = DMTSSetIJacobian(dm, FormIJacobian_NS, &user); CHKERRQ(ierr); + } } else { if (!user->op_rhs) SETERRQ(comm, PETSC_ERR_ARG_NULL, "Problem does not provide RHSFunction"); From 30ccfdea7218f4be0dababc313bf9a1dcb5de23e Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Mon, 16 May 2022 14:42:44 -0600 Subject: [PATCH 007/172] examples/fluids: update Blasius inflow/outflow viscous for arbitrary normal --- examples/fluids/qfunctions/blasius.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 7c86e20ccc..0187c91000 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -237,6 +237,7 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, for (int j=0; j<5; j++) v[j][i] = 0.; const CeedScalar u_normal = Dot3(norm, velocity); + const CeedScalar viscous_flux[3] = {-t12 *norm[1], -t12 *norm[0], 0}; // The Physics // -- Density @@ -244,13 +245,12 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, // -- Momentum for (int j=0; j<3; j++) - v[j+1][i] -= wdetJb * (rho * u_normal * velocity[j] + // interior rho - norm[j] * P); // mixed P - v[2][i] -= wdetJb * t12 ; + v[j+1][i] -= wdetJb * (rho * u_normal * velocity[j] // interior rho + + norm[j] * P // mixed P + + viscous_flux[j]); // -- Total Energy Density - v[4][i] -= wdetJb * u_normal * (E + P); - v[4][i] -= wdetJb * t12 * velocity[1]; + v[4][i] -= wdetJb * (u_normal * (E + P) + Dot3(viscous_flux, velocity)); } // End Quadrature Point Loop return 0; @@ -382,10 +382,6 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, q_data_sur[3][i] }; - // The Physics - // Zero v so all future terms can safely sum into it - for (int j=0; j<5; j++) v[j][i] = 0.; - // Implementing outflow condition const CeedScalar P = P0; // pressure const CeedScalar u_normal = Dot3(norm, u); // Normal velocity @@ -396,18 +392,19 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, CeedScalar t12; BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1], &t12, &context->newtonian_ctx); - // The Physics + const CeedScalar viscous_flux[3] = {-t12 *norm[1], -t12 *norm[0], 0}; + // -- Density - v[0][i] -= wdetJb * rho * u_normal; + v[0][i] = -wdetJb * rho * u_normal; // -- Momentum for (int j=0; j<3; j++) - v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P); - v[2][i] += wdetJb * t12 ; + v[j+1][i] = -wdetJb * (rho * u_normal * u[j] + + norm[j] * P + viscous_flux[j]); // -- Total Energy Density - v[4][i] -= wdetJb * u_normal * (E + P); - v[4][i] += wdetJb * t12 * velocity[1]; + v[4][i] = -wdetJb * (u_normal * (E + P) + + Dot3(viscous_flux, velocity)); // Save values for Jacobian jac_data_sur[0][i] = rho; From 4ea65e7b1ec39b273ebafce761ab65591b5c174d Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 15 May 2022 17:53:48 -0600 Subject: [PATCH 008/172] examples/fluids: select GPU matrix and vector types Mark viscous implicit examples to use -snes_fd_color because GPU assembly is corrupt for unknown reasons. It's correct when running on device with shell matrices. --- examples/fluids/navierstokes.c | 22 ++++++++++++++++------ examples/fluids/navierstokes.h | 3 ++- examples/fluids/src/setupdm.c | 7 ++++++- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index c55b42c735..ad1c670494 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -22,9 +22,9 @@ // ./navierstokes -ceed /cpu/self -problem density_current -degree 1 // ./navierstokes -ceed /gpu/cuda -problem advection -degree 1 // -//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin +//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin -snes_fd_color //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin -//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin +//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin -snes_fd_color //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin //TESTARGS(name="adv_rotation_implicit_sharp_cylinder") -ceed {ceed_resource} -test -problem advection -bubble_type cylinder -bubble_continuity back_sharp -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_Slip_z 1,2 -bc_wall 3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin //TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin @@ -107,19 +107,22 @@ int main(int argc, char **argv) { // --------------------------------------------------------------------------- // -- Create DM DM dm; - ierr = CreateDM(comm, problem, &dm); CHKERRQ(ierr); VecType vec_type = NULL; + MatType mat_type = NULL; switch (mem_type_backend) { case CEED_MEM_HOST: vec_type = VECSTANDARD; break; case CEED_MEM_DEVICE: { const char *resolved; CeedGetResource(ceed, &resolved); if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA; - else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP; + else if (strstr(resolved, "/gpu/hip")) vec_type = VECKOKKOS; else vec_type = VECSTANDARD; } } - ierr = DMSetVecType(dm, vec_type); CHKERRQ(ierr); + if (strstr(vec_type, VECCUDA)) mat_type = MATAIJCUSPARSE; + else if (strstr(vec_type, VECKOKKOS)) mat_type = MATAIJKOKKOS; + else mat_type = MATAIJ; + ierr = CreateDM(comm, problem, mat_type, vec_type, &dm); CHKERRQ(ierr); user->dm = dm; // --------------------------------------------------------------------------- @@ -232,11 +235,18 @@ int main(int argc, char **argv) { if (problem->dim == 2) box_faces_str[3] = '\0'; ierr = PetscOptionsGetString(NULL, NULL, "-dm_plex_box_faces", box_faces_str, sizeof(box_faces_str), NULL); CHKERRQ(ierr); + MatType mat_type; + VecType vec_type; + ierr = DMGetMatType(dm, &mat_type); CHKERRQ(ierr); + ierr = DMGetVecType(dm, &vec_type); CHKERRQ(ierr); ierr = PetscPrintf(comm, " PETSc:\n" " Box Faces : %s\n" + " DM MatType : %s\n" + " DM VecType : %s\n" " Time Stepping Scheme : %s\n", - box_faces_str, phys_ctx->implicit ? "implicit" : "explicit"); CHKERRQ(ierr); + box_faces_str, mat_type, vec_type, + phys_ctx->implicit ? "implicit" : "explicit"); CHKERRQ(ierr); // Mesh const PetscInt num_comp_q = 5; CeedInt glob_dofs, owned_dofs; diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index 546513b4bd..1bdecfbc78 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -315,7 +315,8 @@ PetscErrorCode TSSolve_NS(DM dm, User user, AppCtx app_ctx, Physics phys, // Setup DM // ----------------------------------------------------------------------------- // Create mesh -PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm); +PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, + MatType, VecType, DM *dm); // Set up DM PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree, diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index cefea0b012..c5309e87c7 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -11,12 +11,17 @@ #include "../navierstokes.h" // Create mesh -PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm) { +PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, + MatType mat_type, VecType vec_type, + DM *dm) { PetscErrorCode ierr; PetscFunctionBeginUser; // Create DMPLEX ierr = DMCreate(comm, dm); CHKERRQ(ierr); ierr = DMSetType(*dm, DMPLEX); CHKERRQ(ierr); + ierr = DMSetMatType(*dm, mat_type); CHKERRQ(ierr); + ierr = DMSetVecType(*dm, vec_type); CHKERRQ(ierr); + // Set Tensor elements ierr = PetscOptionsSetValue(NULL, "-dm_plex_simplex", "0"); CHKERRQ(ierr); // Set CL options From 39c69132bfa1e54ee170692e72f9f4b944109a99 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Mon, 23 May 2022 21:34:22 -0600 Subject: [PATCH 009/172] examples/fluids: fix zero-size Jacobian data and STG Use -snes_fd_color for now because STG inflow Jacobian isn't implemented. --- examples/fluids/navierstokes.c | 4 ++-- examples/fluids/qfunctions/newtonian.h | 14 +++++++----- examples/fluids/src/setuplibceed.c | 31 ++++++++++++++++---------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index d4a4c80d46..7f250efafb 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -35,8 +35,8 @@ //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin -//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin -//TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT +//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin -snes_fd_color +//TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT -snes_fd_color /// @file /// Navier-Stokes example using PETSc diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 6901603440..7a5dcb63fc 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -476,9 +476,10 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, State grad_s[3]; for (CeedInt j=0; j<3; j++) { CeedScalar dx_i[3] = {0}, dU[5]; - for (CeedInt k=0; k<5; k++) dU[k] = Grad_q[0][k][i] * dXdx[0][j] - + Grad_q[1][k][i] * dXdx[1][j] - + Grad_q[2][k][i] * dXdx[2][j]; + for (CeedInt k=0; k<5; k++) + dU[k] = Grad_q[0][k][i] * dXdx[0][j] + + Grad_q[1][k][i] * dXdx[1][j] + + Grad_q[2][k][i] * dXdx[2][j]; dx_i[j] = 1.; grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); } @@ -630,9 +631,10 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, State grad_s[3]; for (CeedInt j=0; j<3; j++) { CeedScalar dx_i[3] = {0}, dU[5]; - for (CeedInt k=0; k<5; k++) dU[k] = Grad_q[0][k][i] * dXdx[0][j] - + Grad_q[1][k][i] * dXdx[1][j] - + Grad_q[2][k][i] * dXdx[2][j]; + for (CeedInt k=0; k<5; k++) + dU[k] = Grad_q[0][k][i] * dXdx[0][j] + + Grad_q[1][k][i] * dXdx[1][j] + + Grad_q[2][k][i] * dXdx[2][j]; dx_i[j] = 1.; grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); } diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 7031149bbc..fe14286b57 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -194,10 +194,17 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, Q_sur, q_data_size_sur, &elem_restr_q_sur, &elem_restr_x_sur, &elem_restr_qd_i_sur); CHKERRQ(ierr); - ierr = GetRestrictionForDomain(ceed, dm, height, domain_label, bc->outflows[i], - Q_sur, jac_data_size_sur, NULL, NULL, - &elem_restr_jd_i_sur); - CHKERRQ(ierr); + if (jac_data_size_sur > 0) { + // State-dependent data will be passed from residual to Jacobian. This will be collocated. + ierr = GetRestrictionForDomain(ceed, dm, height, domain_label, bc->outflows[i], + Q_sur, jac_data_size_sur, NULL, NULL, + &elem_restr_jd_i_sur); + CHKERRQ(ierr); + CeedElemRestrictionCreateVector(elem_restr_jd_i_sur, &jac_data_sur, NULL); + } else { + elem_restr_jd_i_sur = NULL; + jac_data_sur = NULL; + } // ---- CEED Vector PetscInt loc_num_elem_sur; @@ -205,8 +212,6 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedVectorCreate(ceed, q_data_size_sur*loc_num_elem_sur*num_qpts_sur, &q_data_sur); - CeedElemRestrictionCreateVector(elem_restr_jd_i_sur, &jac_data_sur, NULL); - // ---- CEED Operator // ----- CEED Operator for Setup (geometric factors) CeedOperatorCreate(ceed, ceed_data->qf_setup_sur, NULL, NULL, &op_setup_sur); @@ -228,9 +233,10 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, ceed_data->basis_x_sur, ceed_data->x_coord); CeedOperatorSetField(op_apply_outflow, "v", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(op_apply_outflow, "surface jacobian data", - elem_restr_jd_i_sur, - CEED_BASIS_COLLOCATED, jac_data_sur); + if (elem_restr_jd_i_sur) + CeedOperatorSetField(op_apply_outflow, "surface jacobian data", + elem_restr_jd_i_sur, + CEED_BASIS_COLLOCATED, jac_data_sur); if (ceed_data->qf_apply_outflow_jacobian) { CeedOperatorCreate(ceed, ceed_data->qf_apply_outflow_jacobian, NULL, NULL, @@ -617,9 +623,10 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CEED_EVAL_INTERP); CeedQFunctionAddOutput(ceed_data->qf_apply_outflow, "v", num_comp_q, CEED_EVAL_INTERP); - CeedQFunctionAddOutput(ceed_data->qf_apply_outflow, "surface jacobian data", - jac_data_size_sur, - CEED_EVAL_NONE); + if (jac_data_size_sur) + CeedQFunctionAddOutput(ceed_data->qf_apply_outflow, "surface jacobian data", + jac_data_size_sur, + CEED_EVAL_NONE); } if (problem->apply_outflow_jacobian.qfunction) { CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow_jacobian.qfunction, From 6838cee0516edf957e9d0dd182a0b8fd9e34b746 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 26 May 2022 13:11:31 -0600 Subject: [PATCH 010/172] restr - relax comp_stride check for num_comp == 1 --- interface/ceed-elemrestriction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c index 21d785e472..d20930de5d 100644 --- a/interface/ceed-elemrestriction.c +++ b/interface/ceed-elemrestriction.c @@ -386,7 +386,7 @@ int CeedElemRestrictionCreate(Ceed ceed, CeedInt num_elem, CeedInt elem_size, "ElemRestriction must have at least 1 component"); // LCOV_EXCL_STOP - if (comp_stride < 1) + if (num_comp > 1 && comp_stride < 1) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "ElemRestriction component stride must be at least 1"); @@ -476,7 +476,7 @@ int CeedElemRestrictionCreateOriented(Ceed ceed, CeedInt num_elem, "ElemRestriction must have at least 1 component"); // LCOV_EXCL_STOP - if (comp_stride < 1) + if (num_comp > 1 && comp_stride < 1) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "ElemRestriction component stride must be at least 1"); @@ -657,7 +657,7 @@ int CeedElemRestrictionCreateBlocked(Ceed ceed, CeedInt num_elem, "ElemRestriction must have at least 1 component"); // LCOV_EXCL_STOP - if (comp_stride < 1) + if (num_comp > 1 && comp_stride < 1) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "ElemRestriction component stride must be at least 1"); From 9e77b9c83fab354bd2f86ccd01d54da2efd469b5 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 26 May 2022 14:04:23 -0600 Subject: [PATCH 011/172] pc - fix qf fallback cloning --- include/ceed-impl.h | 1 - interface/ceed-preconditioning.c | 76 +++++++++++++++++++++++++++++--- interface/ceed-qfunction.c | 4 +- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/include/ceed-impl.h b/include/ceed-impl.h index ea49f0e998..9328feadb7 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -341,7 +341,6 @@ struct CeedQFunctionAssemblyData_private { struct CeedOperator_private { Ceed ceed; CeedOperator op_fallback; - CeedQFunction qf_fallback; int ref_count; int (*LinearAssembleQFunction)(CeedOperator, CeedVector *, CeedElemRestriction *, CeedRequest *); diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 6789a90aee..de2a07a7f1 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -22,6 +22,61 @@ /// @addtogroup CeedOperatorDeveloper /// @{ +/** + @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced + CeedOperator functionality + + @param[in] qf CeedQFunction to create fallback for + @param[out] fallback_qf fallback CeedQFunction + + @return An error code: 0 - success, otherwise - failure + + @ref Developer +**/ +static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, + CeedQFunction *qf_fallback) { + int ierr; + + // Check if NULL qf passed in + if (!qf) return CEED_ERROR_SUCCESS; + + char *source_path_with_name = ""; + if (qf->source_path) { + size_t path_len = strlen(qf->source_path), + name_len = strlen(qf->kernel_name); + ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name); + CeedChk(ierr); + memcpy(source_path_with_name, qf->source_path, path_len); + memcpy(&source_path_with_name[path_len], ":", 1); + memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); + } else { + ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr); + } + + ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, + qf->function, source_path_with_name, + qf_fallback); CeedChk(ierr); + { + CeedQFunctionContext ctx; + + ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr); + ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr); + } + for (CeedInt i = 0; i < qf->num_input_fields; i++) { + ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, + qf->input_fields[i]->size, + qf->input_fields[i]->eval_mode); CeedChk(ierr); + } + for (CeedInt i = 0; i < qf->num_output_fields; i++) { + ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, + qf->output_fields[i]->size, + qf->output_fields[i]->eval_mode); CeedChk(ierr); + } + ierr = CeedFree(&source_path_with_name); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + /** @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality @@ -34,26 +89,33 @@ **/ int CeedOperatorCreateFallback(CeedOperator op) { int ierr; - Ceed fallback_ceed; + Ceed ceed_fallback; // Check not already created if (op->op_fallback) return CEED_ERROR_SUCCESS; // Fallback Ceed - ierr = CeedGetOperatorFallbackCeed(op->ceed, &fallback_ceed); CeedChk(ierr); + ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); // Clone Op CeedOperator op_fallback; if (op->is_composite) { - ierr = CeedCompositeOperatorCreate(fallback_ceed, &op_fallback); + ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback); CeedChk(ierr); for (CeedInt i = 0; i < op->num_suboperators; i++) { ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]); CeedChk(ierr); } } else { - ierr = CeedOperatorCreate(fallback_ceed, op->qf, op->dqf, op->dqfT, - &op_fallback); CeedChk(ierr); + CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; + ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback); + CeedChk(ierr); + ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback); + CeedChk(ierr); + ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback); + CeedChk(ierr); + ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, + dqfT_fallback, &op_fallback); CeedChk(ierr); for (CeedInt i = 0; i < op->qf->num_input_fields; i++) { ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, @@ -72,6 +134,10 @@ int CeedOperatorCreateFallback(CeedOperator op) { ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts); CeedChk(ierr); } + // Cleanup + ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr); + ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr); + ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr); } ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr); ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr); diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c index d94f76af46..5208e0357b 100644 --- a/interface/ceed-qfunction.c +++ b/interface/ceed-qfunction.c @@ -907,7 +907,9 @@ int CeedQFunctionSetContext(CeedQFunction qf, CeedQFunctionContext ctx) { int ierr; ierr = CeedQFunctionContextDestroy(&qf->ctx); CeedChk(ierr); qf->ctx = ctx; - ierr = CeedQFunctionContextReference(ctx); CeedChk(ierr); + if (ctx) { + ierr = CeedQFunctionContextReference(ctx); CeedChk(ierr); + } return CEED_ERROR_SUCCESS; } From 7173c8dc81de0c321ecbfa65814b28460552c1f4 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 1 Jun 2022 19:17:02 -0600 Subject: [PATCH 012/172] tests - more complex source include testing --- tests/t406-qfunction-helper.h | 6 ++++-- tests/t406-qfunction-scales.h | 15 +++++++++++++++ tests/t406-qfunction.h | 4 +++- 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 tests/t406-qfunction-scales.h diff --git a/tests/t406-qfunction-helper.h b/tests/t406-qfunction-helper.h index 55efe0a977..f7a3d46828 100644 --- a/tests/t406-qfunction-helper.h +++ b/tests/t406-qfunction-helper.h @@ -8,12 +8,14 @@ #ifndef _helper_h #define _helper_h +#include "t406-qfunction-scales.h" + CEED_QFUNCTION_HELPER CeedScalar times_two(CeedScalar x) { - return 2 * x; + return SCALE_TWO * x; } CEED_QFUNCTION_HELPER CeedScalar times_three(CeedScalar x) { - return 3 * x; + return SCALE_THREE * x; } #endif diff --git a/tests/t406-qfunction-scales.h b/tests/t406-qfunction-scales.h new file mode 100644 index 0000000000..2b02659b08 --- /dev/null +++ b/tests/t406-qfunction-scales.h @@ -0,0 +1,15 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#ifndef _scales_h +#define _scales_h + +#define SCALE_TWO 2 + +#define SCALE_THREE 3 + +#endif diff --git a/tests/t406-qfunction.h b/tests/t406-qfunction.h index 7c0682b418..80ffb79078 100644 --- a/tests/t406-qfunction.h +++ b/tests/t406-qfunction.h @@ -7,6 +7,7 @@ // Note: intentionally testing strange spacing in '#include's #include +#include "./t406-qfunction-scales.h" # include "t406-qfunction-helper.h" CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, @@ -24,7 +25,8 @@ CEED_QFUNCTION(mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, const CeedScalar *q_data = in[0], *u = in[1]; CeedScalar *v = out[0]; for (CeedInt i=0; i Date: Thu, 2 Jun 2022 16:43:47 -0400 Subject: [PATCH 013/172] GPU assembly: fix potential basis loading offset error --- backends/cuda-ref/ceed-cuda-ref-operator.c | 2 +- backends/hip-ref/ceed-hip-ref-operator.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index b0691a3662..f7de5144a5 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -1290,7 +1290,7 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op) { cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); mat_start += esize * nqpts; } else if (eval_mode == CEED_EVAL_GRAD) { - ierr = cudaMemcpy(asmb->d_B_in, grad_in, + ierr = cudaMemcpy(&asmb->d_B_in[mat_start], grad_in, dim * esize * nqpts * sizeof(CeedScalar), cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); mat_start += dim * esize * nqpts; diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 9cecbec4f4..2e323d9088 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -1285,7 +1285,7 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op) { hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); mat_start += esize * nqpts; } else if (eval_mode == CEED_EVAL_GRAD) { - ierr = hipMemcpy(asmb->d_B_in, grad_in, + ierr = hipMemcpy(&asmb->d_B_in[mat_start], grad_in, dim * esize * nqpts * sizeof(CeedScalar), hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); mat_start += dim * esize * nqpts; From d95c92db255d41d2f03e8526bc83db0d5b614abe Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 23 May 2022 10:50:20 -0600 Subject: [PATCH 014/172] examples/fluids: Fix memory leak in STG --- examples/fluids/problems/stg_shur14.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 1be568a104..6700b22837 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -307,6 +307,7 @@ PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm, } } //end calculate kappa + ierr = PetscFree(*pstg_ctx); CHKERRQ(ierr); *pstg_ctx = stg_ctx; PetscFunctionReturn(0); } From 4e1392660fd02e74547113eb97925d90fbec8e1f Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 24 May 2022 10:38:43 -0600 Subject: [PATCH 015/172] examples/fluids: Add Strong STG functions --- examples/fluids/problems/blasius.c | 11 +- examples/fluids/problems/stg_shur14.c | 112 +++++++++++++++++-- examples/fluids/problems/stg_shur14.h | 6 +- examples/fluids/qfunctions/stg_shur14.h | 62 ++++++++++ examples/fluids/qfunctions/stg_shur14_type.h | 6 +- 5 files changed, 180 insertions(+), 17 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 730e9e533b..d3298f7945 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -138,9 +138,9 @@ static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim, PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { PetscInt ierr; - User user = *(User *)ctx; - MPI_Comm comm = PETSC_COMM_WORLD; - PetscBool use_stg = PETSC_FALSE; + User user = *(User *)ctx; + MPI_Comm comm = PETSC_COMM_WORLD; + PetscBool use_stg = PETSC_FALSE; BlasiusContext blasius_ctx; NewtonianIdealGasContext newtonian_ig_ctx; CeedQFunctionContext blasius_context; @@ -222,7 +222,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { ierr = ModifyMesh(comm, dm, problem->dim, mesh_growth, mesh_Ndelta, mesh_refine_height, mesh_top_angle, mesh_ynodes, mesh_nynodes); CHKERRQ(ierr); - ierr = PetscFree(mesh_ynodes); CHKERRQ(ierr); // Some properties depend on parameters from NewtonianIdealGas CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context, @@ -252,7 +251,9 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_outflow.qfunction_context); if (use_stg) { - ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0); CHKERRQ(ierr); + ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0, mesh_ynodes, + mesh_nynodes); CHKERRQ(ierr); } + ierr = PetscFree(mesh_ynodes); CHKERRQ(ierr); PetscFunctionReturn(0); } diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 6700b22837..8b7e01ff45 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -248,7 +248,8 @@ static PetscErrorCode ReadSTGRand(const MPI_Comm comm, PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm, char stg_inflow_path[PETSC_MAX_PATH_LEN], char stg_rand_path[PETSC_MAX_PATH_LEN], - STGShur14Context *pstg_ctx) { + STGShur14Context *pstg_ctx, + const CeedScalar ynodes[]) { PetscErrorCode ierr; PetscInt nmodes, nprofs; STGShur14Context stg_ctx; @@ -277,7 +278,8 @@ PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm, s->offsets.cij = s->offsets.ubar + nprofs*3; s->offsets.eps = s->offsets.cij + nprofs*6; s->offsets.lt = s->offsets.eps + nprofs; - PetscInt total_num_scalars = s->offsets.lt + nprofs; + s->offsets.ynodes = s->offsets.lt + nprofs; + PetscInt total_num_scalars = s->offsets.ynodes + s->nynodes; s->total_bytes = sizeof(*stg_ctx) + total_num_scalars*sizeof(stg_ctx->data[0]); ierr = PetscMalloc(s->total_bytes, &stg_ctx); CHKERRQ(ierr); *stg_ctx = *s; @@ -287,6 +289,11 @@ PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm, ierr = ReadSTGInflow(comm, stg_inflow_path, stg_ctx); CHKERRQ(ierr); ierr = ReadSTGRand(comm, stg_rand_path, stg_ctx); CHKERRQ(ierr); + if (stg_ctx->nynodes > 0) { + CeedScalar *ynodes_ctx = &stg_ctx->data[stg_ctx->offsets.ynodes]; + for (PetscInt i=0; inynodes; i++) ynodes_ctx[i] = ynodes[i]; + } + // -- Calculate kappa { CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa]; @@ -314,12 +321,15 @@ PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm, PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, User user, const bool prescribe_T, - const CeedScalar theta0, const CeedScalar P0) { + const CeedScalar theta0, const CeedScalar P0, + const CeedScalar ynodes[], const CeedInt nynodes) { PetscErrorCode ierr; char stg_inflow_path[PETSC_MAX_PATH_LEN] = "./STGInflow.dat"; - char stg_rand_path[PETSC_MAX_PATH_LEN] = "./STGRand.dat"; - PetscBool mean_only = PETSC_FALSE; - CeedScalar u0=0.0, alpha=1.01; + char stg_rand_path[PETSC_MAX_PATH_LEN] = "./STGRand.dat"; + PetscBool mean_only = PETSC_FALSE, + use_stgstrong = PETSC_FALSE; + CeedScalar u0 = 0.0, + alpha = 1.01; STGShur14Context stg_ctx; CeedQFunctionContext stg_context; NewtonianIdealGasContext newtonian_ig_ctx; @@ -339,6 +349,8 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, NULL, u0, &u0, NULL); CHKERRQ(ierr); ierr = PetscOptionsBool("-stg_mean_only", "Only apply mean profile", NULL, mean_only, &mean_only, NULL); CHKERRQ(ierr); + ierr = PetscOptionsBool("-stg_strong", "Enforce STG inflow strongly", + NULL, use_stgstrong, &use_stgstrong, NULL); CHKERRQ(ierr); PetscOptionsEnd(); ierr = PetscCalloc1(1, &stg_ctx); CHKERRQ(ierr); @@ -349,6 +361,7 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, stg_ctx->mean_only = mean_only; stg_ctx->theta0 = theta0; stg_ctx->P0 = P0; + stg_ctx->nynodes = nynodes; { // Calculate dx assuming constant spacing @@ -360,6 +373,7 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax, NULL); CHKERRQ(ierr); stg_ctx->dx = domain_size[0]/faces[0]; + stg_ctx->dz = domain_size[2]/faces[2]; } CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context, @@ -368,8 +382,8 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context, &newtonian_ig_ctx); - ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, &stg_ctx); - CHKERRQ(ierr); + ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, &stg_ctx, + ynodes); CHKERRQ(ierr); CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context); CeedQFunctionContextCreate(user->ceed, &stg_context); @@ -381,9 +395,87 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, offsetof(struct STGShur14Context_, time), 1, "Phyiscal time of the solution"); - problem->apply_inflow.qfunction = STGShur14_Inflow; - problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; + if (use_stgstrong) { + problem->apply_inflow.qfunction = STGShur14_Inflow_Strong; + problem->apply_inflow.qfunction_loc = STGShur14_Inflow_Strong_loc; + } else { + problem->apply_inflow.qfunction = STGShur14_Inflow; + problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; + } problem->apply_inflow.qfunction_context = stg_context; PetscFunctionReturn(0); } + +static inline PetscScalar FindDy(const PetscScalar ynodes[], + const PetscInt nynodes, const PetscScalar y) { + const PetscScalar half_mindy = 0.5 * (ynodes[1] - ynodes[0]); + // ^^assuming min(dy) is first element off the wall + PetscInt idx = -1; // Index + + for (PetscInt i=0; inmodes], u[3], ubar[3], cij[6], eps, lt; + const bool mean_only = stg_ctx->mean_only; + const PetscScalar dx = stg_ctx->dx; + const PetscScalar dz = stg_ctx->dz; + const PetscScalar mu = stg_ctx->newtonian_ctx.mu; + const PetscScalar theta0 = stg_ctx->theta0; + const PetscScalar P0 = stg_ctx->P0; + const PetscScalar cv = stg_ctx->newtonian_ctx.cv; + const PetscScalar cp = stg_ctx->newtonian_ctx.cp; + const PetscScalar Rd = cp - cv; + + const CeedScalar rho = P0 / (Rd * theta0); + InterpolateProfile(x[1], ubar, cij, &eps, <, stg_ctx); + if (!mean_only) { + const PetscInt nynodes = stg_ctx->nynodes; + const PetscScalar *ynodes = &stg_ctx->data[stg_ctx->offsets.ynodes]; + const PetscScalar h[3] = {dx, FindDy(ynodes, nynodes, x[1]), dz}; + CalcSpectrum(x[1], eps, lt, h, mu/rho, qn, stg_ctx); + STGShur14_Calc(x, time, ubar, cij, qn, u, stg_ctx); + } else { + for (CeedInt j=0; j<3; j++) u[j] = ubar[j]; + } + + bcval[0] = rho; + bcval[1] = rho * u[0]; + bcval[2] = rho * u[1]; + bcval[3] = rho * u[2]; + PetscFunctionReturn(0); +} + +PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, + STGShur14Context stg_ctx) { + + PetscErrorCode ierr; + DMLabel label; + const PetscInt comps[] = {0, 1, 2, 3}; + const PetscInt num_comps = 4; + PetscFunctionBeginUser; + + ierr = DMGetLabel(dm, "Face Sets", &label); CHKERRQ(ierr); + // Set wall BCs + if (bc->num_inflow > 0) { + ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "STG", label, + bc->num_inflow, bc->inflows, 0, num_comps, + comps, (void(*)(void))StrongSTGbcFunc, + NULL, stg_ctx, NULL); CHKERRQ(ierr); + } + + PetscFunctionReturn(0); +} diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h index 7d5dc90bfd..45663350e3 100644 --- a/examples/fluids/problems/stg_shur14.h +++ b/examples/fluids/problems/stg_shur14.h @@ -13,4 +13,8 @@ extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, User user, const bool prescribe_T, const CeedScalar theta0, - const CeedScalar P0); + const CeedScalar P0, + const CeedScalar ynodes[], const CeedInt nynodes); + +extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, + STGShur14Context stg_ctx); diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index e8909d1f2e..e7c4457e3d 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -277,5 +277,67 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, return 0; } +/* Compute boundary integral for strong STG enforcement + * + * This assumes that density is set strongly and temperature is allowed to + * float + */ +CEED_QFUNCTION(STGShur14_Inflow_Strong)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + + //*INDENT-OFF* + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1]; + + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; + + //*INDENT-ON* + + const STGShur14Context stg_ctx = (STGShur14Context) ctx; + const bool is_implicit = stg_ctx->is_implicit; + const CeedScalar cv = stg_ctx->newtonian_ctx.cv; + const CeedScalar cp = stg_ctx->newtonian_ctx.cp; + const CeedScalar gamma = cp/cv; + + CeedPragmaSIMD + for(CeedInt i=0; inmodes] = (CeedScalar (*)[ctx->nmodes])&ctx->data[ctx->offsets.sigma]; */ + * CeedScalar (*sigma)[ctx->nmodes] = (CeedScalar (*)[ctx->nmodes])&ctx->data[ctx->offsets.sigma]; + * CeedScalar *eps = &ctx->data[ctx->offsets.eps]; */ typedef struct STGShur14Context_ *STGShur14Context; struct STGShur14Context_ { CeedInt nmodes; // !< Number of wavemodes CeedInt nprofs; // !< Number of profile points in STGInflow.dat + CeedInt nynodes; // !< Number of mesh nodes in the y direction CeedScalar alpha; // !< Geometric growth rate of kappa CeedScalar u0; // !< Convective velocity CeedScalar time; // !< Solution time @@ -25,6 +27,7 @@ struct STGShur14Context_ { bool is_implicit; // !< Whether using implicit time integration bool mean_only; // !< Only apply the mean profile CeedScalar dx; // !< dx used for h calculation + CeedScalar dz; // !< dz used for h calculation bool prescribe_T; // !< Prescribe temperature weakly struct NewtonianIdealGasContext_ newtonian_ctx; @@ -36,6 +39,7 @@ struct STGShur14Context_ { size_t cij; // !< Cholesky decomposition [nprof, 6] size_t eps; // !< Turbulent Disspation [nprof, 6] size_t lt; // !< Tubulent Length Scale [nprof, 6] + size_t ynodes; // !< Locations of nodes in y direction [nynodes] } offsets; // !< Holds offsets for each array in data size_t total_bytes; // !< Total size of struct plus array CeedScalar data[1]; // !< Holds concatenated scalar array data From 961c9c98f3e5cd82c216ed6fd9346702e8a78efd Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 24 May 2022 10:06:14 -0600 Subject: [PATCH 016/172] examples/fluids: Minor stg improvements/style --- examples/fluids/qfunctions/stg_shur14.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index e7c4457e3d..0f4b4e9e3a 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -75,8 +75,8 @@ CEED_QFUNCTION_HELPER void InterpolateProfile(const CeedScalar dw, cij[3] = prof_cij[3*nprofs+idx-1] + coeff*( prof_cij[3*nprofs+idx] - prof_cij[3*nprofs+idx-1] ); cij[4] = prof_cij[4*nprofs+idx-1] + coeff*( prof_cij[4*nprofs+idx] - prof_cij[4*nprofs+idx-1] ); cij[5] = prof_cij[5*nprofs+idx-1] + coeff*( prof_cij[5*nprofs+idx] - prof_cij[5*nprofs+idx-1] ); - *eps = prof_eps[idx-1] + coeff*( prof_eps[idx] - prof_eps[idx-1] ); - *lt = prof_lt[idx-1] + coeff*( prof_lt[idx] - prof_lt[idx-1] ); + *eps = prof_eps[idx-1] + coeff*( prof_eps[idx] - prof_eps[idx-1] ); + *lt = prof_lt[idx-1] + coeff*( prof_lt[idx] - prof_lt[idx-1] ); //*INDENT-ON* } else { // y outside bounds of prof_dw ubar[0] = prof_ubar[1*nprofs-1]; @@ -129,6 +129,7 @@ void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw, Ektot += qn[n]; } + if (Ektot == 0) return; for(CeedInt n=0; ndata[stg_ctx->offsets.sigma]; const CeedScalar *d = &stg_ctx->data[stg_ctx->offsets.d]; //*INDENT-ON* - const CeedScalar tworoot1p5 = 2*sqrt(1.5); CeedScalar xdotd, vp[3] = {0.}; CeedScalar xhat[] = {0., X[1], X[2]}; @@ -165,10 +165,11 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3], xdotd = 0.; for(CeedInt i=0; i<3; i++) xdotd += d[i*nmodes+n]*xhat[i]; const CeedScalar cos_kxdp = cos(kappa[n]*xdotd + phi[n]); - vp[0] += tworoot1p5*sqrt(qn[n])*sigma[0*nmodes+n] * cos_kxdp; - vp[1] += tworoot1p5*sqrt(qn[n])*sigma[1*nmodes+n] * cos_kxdp; - vp[2] += tworoot1p5*sqrt(qn[n])*sigma[2*nmodes+n] * cos_kxdp; + vp[0] += sqrt(qn[n])*sigma[0*nmodes+n] * cos_kxdp; + vp[1] += sqrt(qn[n])*sigma[1*nmodes+n] * cos_kxdp; + vp[2] += sqrt(qn[n])*sigma[2*nmodes+n] * cos_kxdp; } + for(CeedInt i=0; i<3; i++) vp[i] *= 2*sqrt(1.5); u[0] = ubar[0] + cij[0]*vp[0]; u[1] = ubar[1] + cij[3]*vp[0] + cij[1]*vp[1]; From 363b60e10ca170e7545568eddd43e64f5967d744 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 24 May 2022 10:08:59 -0600 Subject: [PATCH 017/172] examples/fluids: Implement Strong STG BC --- examples/fluids/navierstokes.c | 2 +- examples/fluids/src/setupdm.c | 16 ++++++++++++++++ examples/fluids/src/setupts.c | 4 ++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 05d4a75bc8..961dc8fb49 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -185,7 +185,7 @@ int main(int argc, char **argv) { // DMPlexInsertBoundaryValues() is very slow. If we disable this, we should // still get the same results due to the problem->bc function, but with // potentially much slower execution. - if (1) {ierr = SetBCsFromICs_NS(dm, Q, Q_loc); CHKERRQ(ierr);} + if (0) {ierr = SetBCsFromICs_NS(dm, Q, Q_loc); CHKERRQ(ierr);} // --------------------------------------------------------------------------- // Create output directory diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index cefea0b012..495ee38da3 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -9,6 +9,7 @@ /// Setup DM for Navier-Stokes example using PETSc #include "../navierstokes.h" +#include "../problems/stg_shur14.h" // Create mesh PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm) { @@ -70,6 +71,21 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree, bc->num_slip[2], bc->slips[2], 0, 1, comps, (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr); } + { + PetscBool use_strongstg = PETSC_FALSE; + ierr = PetscOptionsGetBool(NULL, NULL, "-stg_strong", &use_strongstg, NULL); + CHKERRQ(ierr); + STGShur14Context stg_ctx; + + if (use_strongstg) { + CeedQFunctionContextGetData(problem->apply_inflow.qfunction_context, + CEED_MEM_HOST, &stg_ctx); + ierr = SetupStrongSTG(dm, bc, problem, stg_ctx); CHKERRQ(ierr); + CeedQFunctionContextRestoreData(problem->apply_inflow.qfunction_context, + &stg_ctx); + } + } + ierr = DMPlexSetClosurePermutationTensor(dm, PETSC_DETERMINE, NULL); CHKERRQ(ierr); ierr = PetscFEDestroy(&fe); CHKERRQ(ierr); diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 8bc6dedb1e..9131637c38 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -106,7 +106,7 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) { // Global-to-local ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); - ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, 0.0, + ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, NULL, NULL, NULL); CHKERRQ(ierr); ierr = VecZeroEntries(G_loc); CHKERRQ(ierr); @@ -172,7 +172,7 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G, // Global-to-local ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); - ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, 0.0, + ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, NULL, NULL, NULL); CHKERRQ(ierr); ierr = VecZeroEntries(Q_dot_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q_dot, INSERT_VALUES, Q_dot_loc); From f1122ed0eade392eae0bd7536dac1401f6009b05 Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 2 Jun 2022 17:30:45 -0600 Subject: [PATCH 018/172] examples/fluids: Correct for non-origin domains - Add correction for if domain is not set at the origin --- examples/fluids/problems/blasius.c | 6 +++++ examples/fluids/qfunctions/blasius.h | 40 +++++++++++++++------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index d3298f7945..c1bbcf4e17 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -235,6 +235,12 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { blasius_ctx->implicit = user->phys->implicit; blasius_ctx->newtonian_ctx = *newtonian_ig_ctx; + { + PetscReal domain_min[3]; + ierr = DMGetBoundingBox(dm, domain_min, NULL); CHKERRQ(ierr); + blasius_ctx->x_inflow = domain_min[0]; + } + CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context, &newtonian_ig_ctx); diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 06b13aef11..acb37fc271 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -24,6 +24,7 @@ struct BlasiusContext_ { CeedScalar Uinf; // !< Velocity at boundary layer edge CeedScalar P0; // !< Pressure at outflow CeedScalar theta0; // !< Temperature at inflow + CeedScalar x_inflow; // !< Location of inflow in x struct NewtonianIdealGasContext_ newtonian_ctx; }; @@ -139,6 +140,7 @@ CEED_QFUNCTION(ICsBlasius)(void *ctx, CeedInt Q, const CeedScalar P0 = context->P0; const CeedScalar delta0 = context->delta0; const CeedScalar Uinf = context->Uinf; + const CeedScalar x_inflow = context->x_inflow; const CeedScalar e_internal = cv * theta0; const CeedScalar rho = P0 / ((gamma - 1) * e_internal); @@ -150,7 +152,7 @@ CEED_QFUNCTION(ICsBlasius)(void *ctx, CeedInt Q, for (CeedInt i=0; inewtonian_ctx); q0[0][i] = rho; @@ -183,13 +185,14 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, const CeedScalar Rd = cp - cv; const CeedScalar gamma = cp/cv; - const CeedScalar theta0 = context->theta0; - const CeedScalar P0 = context->P0; - const CeedScalar delta0 = context->delta0; - const CeedScalar Uinf = context->Uinf; - const bool weakT = context->weakT; - const CeedScalar rho_0 = P0 / (Rd * theta0); - const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); + const CeedScalar theta0 = context->theta0; + const CeedScalar P0 = context->P0; + const CeedScalar delta0 = context->delta0; + const CeedScalar Uinf = context->Uinf; + const CeedScalar x_inflow = context->x_inflow; + const bool weakT = context->weakT; + const CeedScalar rho_0 = P0 / (Rd * theta0); + const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); CeedPragmaSIMD // Quadrature Point Loop @@ -205,8 +208,8 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; CeedScalar velocity[3] = {0.}; CeedScalar t12; - BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1], - &t12, &context->newtonian_ctx); + BlasiusSolution(x[1], Uinf, x0, x[0] - x_inflow, rho_0, &velocity[0], + &velocity[1], &t12, &context->newtonian_ctx); // enabling user to choose between weak T and weak rho inflow CeedScalar rho,E_internal, P, E_kinetic; @@ -282,12 +285,13 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, const CeedScalar cp = context->newtonian_ctx.cp; const CeedScalar Rd = cp - cv; - const CeedScalar theta0 = context->theta0; - const CeedScalar P0 = context->P0; - const CeedScalar rho_0 = P0 / (Rd*theta0); - const CeedScalar delta0 = context->delta0; - const CeedScalar Uinf = context->Uinf; - const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); + const CeedScalar theta0 = context->theta0; + const CeedScalar P0 = context->P0; + const CeedScalar rho_0 = P0 / (Rd*theta0); + const CeedScalar delta0 = context->delta0; + const CeedScalar Uinf = context->Uinf; + const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); + const CeedScalar x_inflow = context->x_inflow; CeedPragmaSIMD // Quadrature Point Loop @@ -326,8 +330,8 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; CeedScalar velocity[3] = {0.}; CeedScalar t12; - BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1], - &t12, &context->newtonian_ctx); + BlasiusSolution(x[1], Uinf, x0, x[0] - x_inflow, rho_0, &velocity[0], + &velocity[1], &t12, &context->newtonian_ctx); // The Physics // -- Density v[0][i] -= wdetJb * rho * u_normal; From 36b31e276ef6aee5a91c969ce1e558ff92ee537f Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 3 Jun 2022 10:51:51 -0600 Subject: [PATCH 019/172] examples/fluids: Add option for BC from ICs - Instead of hard coded if statement, add option on whether to set the boundary conditions based on their initial condition values - Default set to True --- examples/fluids/navierstokes.c | 5 ++++- examples/fluids/navierstokes.h | 1 + examples/fluids/problems/stg_shur14.c | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 961dc8fb49..92eaf587b2 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -80,6 +80,7 @@ int main(int argc, char **argv) { user->app_ctx = app_ctx; user->units = units; user->phys = phys_ctx; + problem->bc_from_ics = PETSC_TRUE; // --------------------------------------------------------------------------- // Process command line options @@ -185,7 +186,9 @@ int main(int argc, char **argv) { // DMPlexInsertBoundaryValues() is very slow. If we disable this, we should // still get the same results due to the problem->bc function, but with // potentially much slower execution. - if (0) {ierr = SetBCsFromICs_NS(dm, Q, Q_loc); CHKERRQ(ierr);} + if (problem->bc_from_ics) { + ierr = SetBCsFromICs_NS(dm, Q, Q_loc); CHKERRQ(ierr); + } // --------------------------------------------------------------------------- // Create output directory diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index 9275ea4f38..2c69c041e0 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -209,6 +209,7 @@ struct ProblemData_private { PetscErrorCode (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt, PetscScalar[], void *); void *bc_ctx; + PetscBool bc_from_ics; PetscErrorCode (*print_info)(ProblemData*, AppCtx); }; // *INDENT-ON* diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 8b7e01ff45..30ad5eac35 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -398,9 +398,11 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, if (use_stgstrong) { problem->apply_inflow.qfunction = STGShur14_Inflow_Strong; problem->apply_inflow.qfunction_loc = STGShur14_Inflow_Strong_loc; + problem->bc_from_ics = PETSC_FALSE; } else { problem->apply_inflow.qfunction = STGShur14_Inflow; problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; + problem->bc_from_ics = PETSC_TRUE; } problem->apply_inflow.qfunction_context = stg_context; From 30af3636e5012a6b4963a5b63630db884472f640 Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 3 Jun 2022 10:28:16 -0600 Subject: [PATCH 020/172] examples/fluids: Add strong STG to documentation --- examples/fluids/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/fluids/README.md b/examples/fluids/README.md index 602803fc1e..e8d650cf9f 100644 --- a/examples/fluids/README.md +++ b/examples/fluids/README.md @@ -735,6 +735,11 @@ options: - `false` - +* - `-stg_strong` + - Strongly enforce the STG inflow boundary condition + - `false` + - + ::: This problem can be run with the `blasius.yaml` file via: From c79c9d8accea9b1549c782c9810a2665141d102f Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 3 Jun 2022 11:13:46 -0600 Subject: [PATCH 021/172] examples/fluids: Test for strong STG - Note that the "compare_final_state_atol" had to be reduced to 1e-10 in order to have all the backends pass (the non-optimized backends were not able to pass) --- examples/fluids/navierstokes.c | 1 + .../fluids-navierstokes-blasius_STG_strongBC.bin | Bin 0 -> 6360 bytes 2 files changed, 1 insertion(+) create mode 100644 examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 92eaf587b2..51e8216f7f 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -37,6 +37,7 @@ //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin //TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin //TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT +//TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true /// @file /// Navier-Stokes example using PETSc diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin new file mode 100644 index 0000000000000000000000000000000000000000..5327e148b8854c5239395b75bdb97d36c7a71bf1 GIT binary patch literal 6360 zcmW-lcOcc@|HrL{;(cY5J(7`4Jiu=24Q!5xg3{HUj>qaPH9l7gW1)UV&0q zIaM?ottt`IPSW;=6zO){g6~L2`NpYWwz-uT9`-L$((G@-?6Mz86u1hpGX4K|uce-k zM_rXqYF{`yQ1`Y3AM#x<1)QCmTUPT)qGXF--c6zZosY4%YnU4T(;gkNuaY`+>T{XK z3tp0@)}&$0J_sC@%@@_I+_q@X|cq>#$*MhDQ>YF zZCnJ7yW-nJ5bruWkDE#zz~K@u-dscd^7VXotaAsdkyW00#B12;;vZq@z~8@scWW4no{Zp@*^8?{Qv4+X2siib!2|j zT%}@%_HsvGG9!uNecOwtU4Wy|(b;zx6Q2GLPG+(px>vs8NX9wfa89LFN0P)Bp&@P; zsF%v|VVf6OE7n~7FN1oL!Xy+% z-G}_{8vZa3^_MBK`ZBKs^SaZ6BYNsk{mRI}9L2pP@o&Y|*1Vh~TD(-MO|^mZ^+*0c zF0^7WI_-WV618s>v7EH2CuzF{zX@N3^WN#YC@ijmciVOLM{nn)iodK*_dawaiBhSD zk{X4;VQyfxm&1Y?wa1N=DD<`PZ(Y}90NjVTyS+q%B-*wF1zlMKoZKstZv!@!5F?}Q zp-QJ;c_i^to8z9?C~(rEsA+X9#db&a zxm*g) zKGot5EEgq-9)oD<9ZiT)k$mG@>^zoUuf%HkTM&(WQZgEnT?27b-krK*5;5+&-Rdt$ z!D{#8UQWErryBdTqHk9YlSG9~%=x@mh><>V*!ar+(f5WAHS+*R#DL%YFy0x^TEk`Ug2r}O8Fn;`LmjY<#yx`b`icHmZ6bj8Tgjv@ zzT>5uvTT#a|89pqmkHdj9RnOcR*eV)tW_ zWy%l4cqR36=Rz)4oNS_7n53X_(NOjb0|lt-6Msspb^(X6K3O#dYo?8+KTe9En)l1t zu|8}fiK=Yj4}K32G z1Bd5_eai&a{I*U)NL~R=?>vTK}CZ%@qUB|kH>X!9gHdOP*+tTLAwIoqL=laPs3o+9E=AKdO z#ALHKzk;tKVz_XudKn{<#-5a3Dg@`5wh_@wfx0d&{g1`c4QugUS(oE<1Wl%>SX-}~ zA!(ZD)-&9d5aZqh?`O$BvA%`Jzwb3G)K`DIz+?HNvl85?1;KUals;|a6IHEv< z#|O=9I=!Y=Lye>nq6#KtCrQlU!Nbw-;XXsGdgIry&enq$9j@o08BLJ~N-TLKO>2Rk zpcMw3mL-3E8*I3LB;uk{6ZPq4bxHXW=-W5_%&L}r;85;%Q5fsp1jBd{{r0)TpM?K zWeqmUY{9iXcd5@YrdH-&Atdqr*Cos4=fJsWW3k$Y4Xi{Z9;7*-nGMtDk6QAPw9PZZ z8tspOBhqaBMhfa}UlPlqF|6k}^YHH0el$DkaICu8j-(m%x?7DM0*-m)W$TaFI7sfx zESDhlw)k?n!xQ-bkAsXXGtjrR1bVCnH#SWA#=>zR6)|GGc!&@0Nt(ci3yW@dAZ|=6 znYJ(&N{q7a%GY8&x7Xz>&*A%mdhH_s>8#LuE8o{>SQSNH|Fo?2FBgaDzG+v9BD5I(V5< zH#pdg^_rZUuDo7{{`S-?07mjYv?%#+e-?yoUZu0=nGJgebB#EiVB12Cy;6C-`#TrMkUi12k z-F+r#VrOIZ3GPjBAD8tlt)PE_LYf1!0oa7UFy`8&UsO5k)b;E_b>Q?oRy}tbF;btY zjWwxbqf^PH-5+ewA2#OVPKQy5TW^*0Hpt@?TbXd;C&VbT7g+1M8|yXLjh9O(pnow7 zJgJ{v1IMz~P2)Fk&PYyJZN?@FmQ!Qp>ga@YfK#2zJaAUdnRI~eq&i4GyidnQ2YX*g z$=9QilVghUJT4@Sf6-fXV;@OOhN!gik0C~})9h#8B&_#UQiJ)#T{Kmo7!x7%0P5{* zz@vy9;2hR>J~Dz$1iOO2s2)cJofg*R0X{Il^osl9#}OmtMXtM%4K`BisT(NrLIVjx zdnTUFlC&+-hu7@T1y00qCmzU8akE+I>El?hWlvnxxEy94@)LNxDUPJ+n8r;!wFQpf z(z<;ra38bDtf{l;E$5HF%r{`3cHoTx4Df#`TbpkA|H4K}&HqjG9Yw=MD~2l921y!U zq3@3i(IhdEdq9je8Zq*3rfL86!g`}~rR>(fG4sa2XjCZxzE&hi|G5}&nrSlKcG&pO z?gUQFRP_C+%oe8$prakb$~zZC5hJN4@?)$rHsZAPseLnuMs94M`&73;(l#wR=0Erh z`u1$uUQiY>o;Pdlyt*6fZ_q4o8&t%+#>K&>Lqfn;rDPnN9YTztV~QI^#;|ek?1t6E z$EY{K)ct%I__B_Ug4O(a#CX8L>oXIE4X4(qOtwHjO|Wo^|6n0$oQ0p#d~!%)WO%uy zbqnNC^2XmmcC3H&S7n!A2Nr*N_(V9D7SyE?`|XcE5hM7J zW{?op&#k_((&~s+-e>2j?~;c;{)J@~%@89B+c~htW5X*7w%((mh%sr5jFxCHzbx}t zgZ3jv*iv$8P31IUZZ*O5w7k_i8}_iBDKV%VIE=Hn3|3Eu(^ z32zp}NW7lK=L2=p$KTj9yowkeOAmry9VUqdoxb_Ji~3BymTsB<#pr_|7$9B3hmrnCIXB@HP*6P}P}jesl=F>x%<2O#b)X zkZT8%QOL!=bwy8?sNi-{hheDGcRiDbN9NTctepYrhhUd(J#O0(f8xPStir$cA?sF zk*xyeVpK%j+L+uM?ZAPv(g{rf zPOh_^74%&}&#Xz`HsH8_X;bLIESH|m%sCBF9Y!u1@62A4g!}KrRtDrXQH3t4JxFKe zI`$@S;0f{%w~v`TdkHwY2g9$y9_Y#Ed{P6U<$UD|A+c%d`RV7qY=8TgGsj3fe+CR1O{`ojX=L(Q7yAJ*{+kMDNeIxklm8*rD4glxV;ls9xSYoxc zf73I4YFKq`+LdQ7fRikCt#%J^lAiMCF(~Rxm1di|3cA0xOh}(f0ZwR@P(0{}r{j*F zUq$eia<5BVbpzD9pUw_)L$E(*CwI$ngpfp~T=x2+jfi1OV{6a9jb)y&=EokAq<&i( z&2LSBJ?`d6rE|9$aI}AB_}xPb%V#3`^R!W{*!ntBtpYgDkC!~F15RdcHAfy6Q|CA( zV6aRz)wS8)NQHXN?$>U5qd^jH^cAMotp$#!)Vx41mT!;=aE+{?#)hIEM_%%U{KQ%G z=yM`Q{5NvuY%wNYbF&`vgFX!yt$nv%7C1v2PX5o=dk)?|cy2S697ypjzTr%Ldpl}; zE~^Oo=R{!imQy4_d2UsC2Y%4@-+yNQFR_wV;b3D+6*Vc6YRYDPl_YMx5|e)lb2eUA z?EbPO7G(D$b%Xg)=7qIk#@$cA*PG9OWruq6+B=k8&4Fd>LL1n{1F1H#l3zW^{v`2q zZC=WITkzG47eibyA1{9TWieibRVTeha}s%}DY`Aw#pK7(r-#SRT!Olf%h(cTYK|rG zo}J(FPovBuJhRi!^GRZ8&GsETZUQH6x;x#*;J$qee_}E{S`aV zZTo~PM@PS7%^eqIYQz;OMuJ4Sc*}XvQPZG`4G@>u{c+6A4Omfgb5q%!Ae6P0&+Ewj zr=at6zUpf5pWcVFCMD~!>ie6o&OW_K^`<6CJgWp9$W;hD{a%YCKB?P#rUB;?am_EQ z36rPh&&6BFQPYPN8Lzibk%ZsK)7~!-ml)@q6@jZ*^;vJu=(gvm;LauACiV@mr>x^? zECRpmozbKrJBroQ)o=3{@>6~0G%gNH$X|Y38T;csB=J?{$?6>jVq7+5?uwtm+Tj)( zjQtm>nI~E`+cjNCBJj1H|GQ@}*OpxyCFpqHfj+b4=wS3Hs=>NY1NvvIyBYJ5z-gkL z_+x=JO4_UMR&`VTf|F4PcE`YbPNGY#~8=S8<6KI?Q)h-Rp87YkveaO@yNGu zflPa<@0eev@I81>D`hvmEC;&&^VU!KkMKJOR;R%L3#@+vpNu@OO3hxUasCVios5WO z&gx_Xj*`2j^eQIJ8~;qH44@R-hMLh4i0e$a%m!i5bMF~lCRt&u^*JH)@O4kBPozSb zTKwN0|D^851=urs3^mg~7z5|4oSc~yHi&Z;^IKh|rf8A@20ph)BD!)@g)R8csNZ@8 zIcKp}^Y?%U3%V$8T-2?ew}B-7Uf9xP2>bJGnN~wC2ds03=rD71r@9~2Q^sB32VSq8 z&aU4Fb2mHtWFq*n%O9keISa6%c0Zy|oTKInXFKsR(A!v{@OuRK@+c~>QuPql&Uq-& zd*~w-@hIK&nI7oh0tS+*gHA3~T?}6+ z1J3O}`%}VLPbj78#(_?%olW~=P$>dVnRKT-%>Tl3?)M5c zF_zAeo;6Mq2`U@y9HxQe!&!NO3G3`xskr;(3{~a3GyVDK2yi@QLmbr+!-w<3FDr!g zlO83MjQaGnVfW&oI&h9kiU!>Vj!xb^@vm4<%{Ys%^#b+% zVyW!Q%TV_wU@Ep>F!Up8`-FiDyN3}g9h;a@#Brz6zIy} zim>wm*hj8;`6r3L!G?)nU9T3CbT-zOL@VK!;O}DeN82oc!=`xZy*Jh=tZS_h2&IM@ z@5tk~zXONkWbZL+cyEiZQ?Yg(v@)#4p4A7OrdELgp-%mV&=n4i}T^!Co9x_WN{ z8|v=0Hn)C2-)f;GJ=-Tu67P0?I@i;O7!f>{`rGee-GMJ54_hBmO95@S>RQ2f{S}=% z5CeUDZ%^`=WCqqZDwQACu%Z4PST@Wq76kp=b&P8l)YGcg2EOkwKY|N+=YBI`!_E68 zd$i8dRhM)=;B1&rg9lyBBbyN8{yn`wtpKd=Uf3VU6hvpT`ge%FxEapr8)|C@@1-Fn g7GZ8(*g*D&J?)GXeVsvW)|jaW%)v~zfZw(M2M%`pegFUf literal 0 HcmV?d00001 From f5ebfb9052f649edec461f0221cd70cd59d5eda0 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 3 Jun 2022 15:18:19 -0600 Subject: [PATCH 022/172] op - more data in view --- interface/ceed-operator.c | 6 ++++++ julia/LibCEED.jl/test/rundevtests.jl | 4 ++++ python/tests/output/test_504.out | 12 ++++++++++++ python/tests/output/test_523.out | 24 ++++++++++++++++++++++++ tests/output/t504-operator-f.out | 12 ++++++++++++ tests/output/t504-operator.out | 12 ++++++++++++ tests/output/t523-operator-f.out | 24 ++++++++++++++++++++++++ tests/output/t523-operator.out | 24 ++++++++++++++++++++++++ 8 files changed, 118 insertions(+) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index 28f3ca5632..10d0d49dee 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -147,6 +147,11 @@ static int CeedOperatorFieldView(CeedOperatorField field, "%s Name: \"%s\"\n", pre, in_out, field_number, pre, qf_field->field_name); + fprintf(stream, "%s Size: %d\n", pre, qf_field->size); + + fprintf(stream, "%s EvalMode: %s\n", pre, + CeedEvalModes[qf_field->eval_mode]); + if (field->basis == CEED_BASIS_COLLOCATED) fprintf(stream, "%s Collocated basis\n", pre); @@ -154,6 +159,7 @@ static int CeedOperatorFieldView(CeedOperatorField field, fprintf(stream, "%s Active vector\n", pre); else if (field->vec == CEED_VECTOR_NONE) fprintf(stream, "%s No vector\n", pre); + return CEED_ERROR_SUCCESS; } diff --git a/julia/LibCEED.jl/test/rundevtests.jl b/julia/LibCEED.jl/test/rundevtests.jl index e80f9ba9dc..b9a458f51d 100644 --- a/julia/LibCEED.jl/test/rundevtests.jl +++ b/julia/LibCEED.jl/test/rundevtests.jl @@ -50,10 +50,14 @@ using Test, LibCEED, LinearAlgebra, StaticArrays 1 input field: Input field 0: Name: "input" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "output" + Size: 1 + EvalMode: interpolation Active vector""" end end diff --git a/python/tests/output/test_504.out b/python/tests/output/test_504.out index e956387c9b..2014938716 100644 --- a/python/tests/output/test_504.out +++ b/python/tests/output/test_504.out @@ -4,13 +4,19 @@ CeedOperator 2 input fields: Input field 0: Name: "weights" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 1 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Active vector @@ -20,12 +26,18 @@ CeedOperator 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector diff --git a/python/tests/output/test_523.out b/python/tests/output/test_523.out index 8721114d9f..07eded0c87 100644 --- a/python/tests/output/test_523.out +++ b/python/tests/output/test_523.out @@ -5,13 +5,19 @@ Composite CeedOperator - setup 2 input fields: Input field 0: Name: "weights" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 4 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis SubOperator 1 - quadralateral elements: 6 elements with 16 quadrature points each @@ -19,13 +25,19 @@ Composite CeedOperator - setup 2 input fields: Input field 0: Name: "weights" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 4 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Composite CeedOperator - mass @@ -35,13 +47,19 @@ Composite CeedOperator - mass 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector SubOperator 1 - quadralateral elements: 6 elements with 16 quadrature points each @@ -49,12 +67,18 @@ Composite CeedOperator - mass 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector diff --git a/tests/output/t504-operator-f.out b/tests/output/t504-operator-f.out index 91d47589d4..ea7cc99fa3 100644 --- a/tests/output/t504-operator-f.out +++ b/tests/output/t504-operator-f.out @@ -4,13 +4,19 @@ CeedOperator 2 input fields: Input field 0: Name: "weight" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 1 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Active vector CeedOperator @@ -19,11 +25,17 @@ CeedOperator 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 2 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 2 + EvalMode: interpolation Active vector diff --git a/tests/output/t504-operator.out b/tests/output/t504-operator.out index 91d47589d4..ea7cc99fa3 100644 --- a/tests/output/t504-operator.out +++ b/tests/output/t504-operator.out @@ -4,13 +4,19 @@ CeedOperator 2 input fields: Input field 0: Name: "weight" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 1 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Active vector CeedOperator @@ -19,11 +25,17 @@ CeedOperator 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 2 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 2 + EvalMode: interpolation Active vector diff --git a/tests/output/t523-operator-f.out b/tests/output/t523-operator-f.out index 79b6ac9151..0acb0ca062 100644 --- a/tests/output/t523-operator-f.out +++ b/tests/output/t523-operator-f.out @@ -5,13 +5,19 @@ Composite CeedOperator - setup 2 input fields: Input field 0: Name: "weight" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 4 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis SubOperator 1 - quadralateral elements: 6 elements with 16 quadrature points each @@ -19,13 +25,19 @@ Composite CeedOperator - setup 2 input fields: Input field 0: Name: "weight" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 4 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Composite CeedOperator - mass SubOperator 0 - triangle elements: @@ -34,13 +46,19 @@ Composite CeedOperator - mass 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector SubOperator 1 - quadralateral elements: 6 elements with 16 quadrature points each @@ -48,11 +66,17 @@ Composite CeedOperator - mass 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector diff --git a/tests/output/t523-operator.out b/tests/output/t523-operator.out index 79b6ac9151..0acb0ca062 100644 --- a/tests/output/t523-operator.out +++ b/tests/output/t523-operator.out @@ -5,13 +5,19 @@ Composite CeedOperator - setup 2 input fields: Input field 0: Name: "weight" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 4 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis SubOperator 1 - quadralateral elements: 6 elements with 16 quadrature points each @@ -19,13 +25,19 @@ Composite CeedOperator - setup 2 input fields: Input field 0: Name: "weight" + Size: 1 + EvalMode: quadrature weights No vector Input field 1: Name: "dx" + Size: 4 + EvalMode: gradient Active vector 1 output field: Output field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Composite CeedOperator - mass SubOperator 0 - triangle elements: @@ -34,13 +46,19 @@ Composite CeedOperator - mass 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector SubOperator 1 - quadralateral elements: 6 elements with 16 quadrature points each @@ -48,11 +66,17 @@ Composite CeedOperator - mass 2 input fields: Input field 0: Name: "rho" + Size: 1 + EvalMode: none Collocated basis Input field 1: Name: "u" + Size: 1 + EvalMode: interpolation Active vector 1 output field: Output field 0: Name: "v" + Size: 1 + EvalMode: interpolation Active vector From a1df05f8b168553250579357e67fc0811b6585b1 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 5 Jun 2022 15:25:01 -0600 Subject: [PATCH 023/172] examples/fluids: document developer options --- examples/fluids/README.md | 11 +++++++++++ examples/fluids/index.md | 2 +- examples/fluids/problems/newtonian.c | 8 ++++---- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/examples/fluids/README.md b/examples/fluids/README.md index e8d650cf9f..5d06388c51 100644 --- a/examples/fluids/README.md +++ b/examples/fluids/README.md @@ -528,8 +528,14 @@ For the Density Current, Channel, and Blasius problems, the following common com - Thermal conductivity - `0.02638` - `W/(m K)` + +* - `-newtonian_unit_tests` + - Developer option to test properties + - `false` + - boolean ::: + #### Density current The Density Current problem the following command-line options are available in @@ -612,6 +618,11 @@ addition to the Newtonian Ideal Gas options: - Atmospheric pressure - `1E5` - `Pa` + +* - `-body_force_scale` + - Multiplier for body force (`-1` for flow reversal) + - 1 + - ::: This problem can be run with the `channel.yaml` file via: diff --git a/examples/fluids/index.md b/examples/fluids/index.md index d4ccbac231..e8973fd6f5 100644 --- a/examples/fluids/index.md +++ b/examples/fluids/index.md @@ -437,7 +437,7 @@ $$p = p_0 - \frac{2\rho_0 u_{\max}^2 x_1}{Re_H H}$$ where $H$ is the channel half-height, $u_{\max}$ is the center velocity, $T_w$ is the temperature at the wall, $Pr=\frac{\mu}{c_p \kappa}$ is the Prandlt number, $\hat E_c = \frac{u_{\max}^2}{c_p T_w}$ is the modified Eckert number, and $Re_h = \frac{u_{\max}H}{\nu}$ is the Reynolds number. Boundary conditions are periodic in the streamwise direction, and no-slip and non-penetration boundary conditions at the walls. -The flow is driven by a body force. +The flow is driven by a body force determined analytically from the fluid properties and setup parameters $H$ and $u_{\max}$. ## Flat Plate Boundary Layer diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 12df2520f4..2e2ce85ad2 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -23,10 +23,10 @@ static PetscErrorCode CheckPrimitiveWithTolerance(StatePrimitive sY, sY.velocity[2])); for (int j=0; j<3; j++) eY.velocity[j] = (aY.velocity[j] - bY.velocity[j]) / u; eY.temperature = (aY.temperature - bY.temperature) / sY.temperature; - if (fabs(eY.pressure) > rtol_pressure) printf("%s: pressure error %g\n", name, - eY.pressure); - for (int j=0; j<3; - j++) if (fabs(eY.velocity[j]) > rtol_velocity) + if (fabs(eY.pressure) > rtol_pressure) + printf("%s: pressure error %g\n", name, eY.pressure); + for (int j=0; j<3; j++) + if (fabs(eY.velocity[j]) > rtol_velocity) printf("%s: velocity[%d] error %g\n", name, j, eY.velocity[j]); if (fabs(eY.temperature) > rtol_temperature) printf("%s: temperature error %g\n", name, eY.temperature); From 01ea9c811e8dd59782d9f1e63b4c26beef50f890 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 5 Jun 2022 15:25:32 -0600 Subject: [PATCH 024/172] doc: support Sphinx-5 and fix man page --- doc/sphinx/source/conf.py | 2 +- interface/ceed-preconditioning.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index 7992bb690e..ebe6857d3d 100755 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -95,7 +95,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index de2a07a7f1..eec3fedfd9 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -26,8 +26,9 @@ @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality + @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction @param[in] qf CeedQFunction to create fallback for - @param[out] fallback_qf fallback CeedQFunction + @param[out] qf_fallback fallback CeedQFunction @return An error code: 0 - success, otherwise - failure From 17b9cea23060f0b8d6d7c7fa17ecce05aa38bdce Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 5 Jun 2022 15:42:13 -0600 Subject: [PATCH 025/172] examples/fluids: slightly relax tolerance for /gpu/hip --- examples/fluids/navierstokes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 77444c17f1..3e3ba3bc3c 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -35,7 +35,7 @@ //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin -//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin -snes_fd_color +//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 2E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin -snes_fd_color //TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT -snes_fd_color //TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true From d04bbc78fb1f4c986969af2fa56bdcf94e57fde9 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 3 Jun 2022 13:07:42 -0600 Subject: [PATCH 026/172] op - add debugging output to fallback creation --- include/ceed-impl.h | 4 + interface/ceed-preconditioning.c | 266 +++++++++++++++++-------------- interface/ceed.c | 34 ++-- 3 files changed, 166 insertions(+), 138 deletions(-) diff --git a/include/ceed-impl.h b/include/ceed-impl.h index 9328feadb7..9e85b2897a 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -124,6 +124,7 @@ struct Ceed_private { int ref_count; void *data; bool is_debug; + bool has_valid_op_fallback_resource; bool is_deterministic; char err_msg[CEED_MAX_RESOURCE_LEN]; FOffset *f_offsets; @@ -387,4 +388,7 @@ struct CeedOperator_private { CeedContextFieldLabel *context_labels; }; +CEED_INTERN int CeedOperatorGetFallback(CeedOperator op, + CeedOperator *op_fallback); + #endif diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index de2a07a7f1..83bb1414d7 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -40,6 +40,9 @@ static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, // Check if NULL qf passed in if (!qf) return CEED_ERROR_SUCCESS; + CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); + CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n"); + char *source_path_with_name = ""; if (qf->source_path) { size_t path_len = strlen(qf->source_path), @@ -87,7 +90,7 @@ static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, @ref Developer **/ -int CeedOperatorCreateFallback(CeedOperator op) { +static int CeedOperatorCreateFallback(CeedOperator op) { int ierr; Ceed ceed_fallback; @@ -96,6 +99,10 @@ int CeedOperatorCreateFallback(CeedOperator op) { // Fallback Ceed ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); + if (!ceed_fallback) return CEED_ERROR_SUCCESS; + + CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); + CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n"); // Clone Op CeedOperator op_fallback; @@ -103,8 +110,11 @@ int CeedOperatorCreateFallback(CeedOperator op) { ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback); CeedChk(ierr); for (CeedInt i = 0; i < op->num_suboperators; i++) { - ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]); + CeedOperator op_sub_fallback; + + ierr = CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback); CeedChk(ierr); + ierr = CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback); CeedChk(ierr); } } else { CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL; @@ -146,6 +156,46 @@ int CeedOperatorCreateFallback(CeedOperator op) { return CEED_ERROR_SUCCESS; } +/** + @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality + + @param[in] op CeedOperator to retrieve fallback for + @param[out] op_fallback Fallback CeedOperator + + @return An error code: 0 - success, otherwise - failure + + @ref Developer +**/ +int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { + int ierr; + + // Create if needed + if (!op->op_fallback) { + ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); + } + if (op->op_fallback) { + bool is_debug; + + ierr = CeedIsDebug(op->ceed, &is_debug); CeedChk(ierr); + if (is_debug) { + Ceed ceed_fallback; + const char *resource, *resource_fallback; + + ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr); + ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); + ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr); + + CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); + CeedDebug256(op->ceed, 255, + "Falling back from %s operator at address %ld to %s operator at address %ld\n", + resource, op, resource_fallback, op->op_fallback); + } + } + *op_fallback = op->op_fallback; + + return CEED_ERROR_SUCCESS; +} + /** @brief Select correct basis matrix pointer based on CeedEvalMode @@ -1339,18 +1389,24 @@ int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, int ierr; ierr = CeedOperatorCheckReady(op); CeedChk(ierr); - // Backend version if (op->LinearAssembleQFunction) { + // Backend version ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr); } else { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssembleQFunction(op_fallback, assembled, + rstr, request); CeedChk(ierr); + } else { + // LCOV_EXCL_START + return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support CeedOperatorLinearAssembleQFunction"); + // LCOV_EXCL_STOP } - // Assemble - ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled, - rstr, request); CeedChk(ierr); } return CEED_ERROR_SUCCESS; } @@ -1378,8 +1434,8 @@ int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, int ierr; ierr = CeedOperatorCheckReady(op); CeedChk(ierr); - // Backend version if (op->LinearAssembleQFunctionUpdate) { + // Backend version bool qf_assembled_is_setup; CeedVector assembled_vec = NULL; CeedElemRestriction assembled_rstr = NULL; @@ -1387,10 +1443,10 @@ int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup); CeedChk(ierr); if (qf_assembled_is_setup) { + bool update_needed; + ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr); CeedChk(ierr); - - bool update_needed; ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed); CeedChk(ierr); if (update_needed) { @@ -1406,7 +1462,7 @@ int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false); CeedChk(ierr); - // Copy reference to internally held copy + // Copy reference from internally held copy *assembled = NULL; *rstr = NULL; ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr); @@ -1414,13 +1470,19 @@ int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr); ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr); } else { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, + rstr, request); CeedChk(ierr); + } else { + // LCOV_EXCL_START + return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate"); + // LCOV_EXCL_STOP } - // Assemble - ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback, - assembled, rstr, request); CeedChk(ierr); } return CEED_ERROR_SUCCESS; @@ -1459,36 +1521,31 @@ int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); // LCOV_EXCL_STOP - // Use backend version, if available if (op->LinearAssembleDiagonal) { + // Backend version ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else if (op->LinearAssembleAddDiagonal) { + // Backend version with zeroing first ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } } - // Default interface implementation ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); + return CEED_ERROR_SUCCESS; } @@ -1525,40 +1582,33 @@ int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); // LCOV_EXCL_STOP - // Use backend version, if available if (op->LinearAssembleAddDiagonal) { + // Backend version ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled, - request); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request); + CeedChk(ierr); return CEED_ERROR_SUCCESS; } } - // Default interface implementation bool is_composite; ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); if (is_composite) { ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled); CeedChk(ierr); - return CEED_ERROR_SUCCESS; } else { ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); CeedChk(ierr); - return CEED_ERROR_SUCCESS; } + + return CEED_ERROR_SUCCESS; } /** @@ -1600,38 +1650,33 @@ int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); // LCOV_EXCL_STOP - // Use backend version, if available if (op->LinearAssemblePointBlockDiagonal) { + // Backend version ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else if (op->LinearAssembleAddPointBlockDiagonal) { + // Backend version with zeroing first ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback, - assembled, request); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, + request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } } - // Default interface implementation ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr); ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request); CeedChk(ierr); + return CEED_ERROR_SUCCESS; } @@ -1674,41 +1719,34 @@ int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square"); // LCOV_EXCL_STOP - // Use backend version, if available if (op->LinearAssembleAddPointBlockDiagonal) { + // Backend version ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback, - assembled, request); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, + request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } } - // Default interface implemenation bool is_composite; ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr); if (is_composite) { ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled); CeedChk(ierr); - return CEED_ERROR_SUCCESS; } else { ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); CeedChk(ierr); - return CEED_ERROR_SUCCESS; } + + return CEED_ERROR_SUCCESS; } /** @@ -1743,24 +1781,18 @@ int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, bool is_composite; ierr = CeedOperatorCheckReady(op); CeedChk(ierr); - // Use backend version, if available if (op->LinearAssembleSymbolic) { + // Backend version ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows, - cols); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols); + CeedChk(ierr); return CEED_ERROR_SUCCESS; } } @@ -1834,23 +1866,17 @@ int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { CeedOperator *sub_operators; ierr = CeedOperatorCheckReady(op); CeedChk(ierr); - // Use backend version, if available if (op->LinearAssemble) { + // Backend version ierr = op->LinearAssemble(op, values); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorLinearAssemble(op_fallback, values); CeedChk(ierr); return CEED_ERROR_SUCCESS; } } @@ -2168,29 +2194,23 @@ int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, int ierr; ierr = CeedOperatorCheckReady(op); CeedChk(ierr); - // Use backend version, if available if (op->CreateFDMElementInverse) { + // Backend version ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } else { - // Check for valid fallback resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource); - CeedChk(ierr); - if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) { - // Fallback to reference Ceed - if (!op->op_fallback) { - ierr = CeedOperatorCreateFallback(op); CeedChk(ierr); - } - // Assemble - ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request); + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request); CeedChk(ierr); return CEED_ERROR_SUCCESS; } } - // Interface implementation + // Default interface implementation Ceed ceed, ceed_parent; ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr); diff --git a/interface/ceed.c b/interface/ceed.c index da5f922e42..5a6e7a19fb 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -505,21 +505,20 @@ int CeedGetOperatorFallbackResource(Ceed ceed, const char **resource) { int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed) { int ierr; + if (ceed->has_valid_op_fallback_resource) { + CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); + CeedDebug256(ceed, 255, "Getting fallback from %s to %s\n", ceed->resource, + ceed->op_fallback_resource); + } + // Create fallback Ceed if uninitalized - if (!ceed->op_fallback_ceed) { - // Check resource - const char *resource, *fallback_resource; - ierr = CeedGetResource(ceed, &resource); CeedChk(ierr); - ierr = CeedGetOperatorFallbackResource(ceed, &fallback_resource); CeedChk(ierr); - if (!strcmp(resource, fallback_resource)) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_UNSUPPORTED, - "Backend %s cannot create an operator" - "fallback to resource %s", resource, fallback_resource); - // LCOV_EXCL_STOP + if (!ceed->op_fallback_ceed && ceed->has_valid_op_fallback_resource) { + CeedDebug256(ceed, 255, "Creating fallback Ceed"); - // Create fallback Ceed fallback_ceed; + const char *fallback_resource; + + ierr = CeedGetOperatorFallbackResource(ceed, &fallback_resource); CeedChk(ierr); ierr = CeedInit(fallback_resource, &fallback_ceed); CeedChk(ierr); fallback_ceed->op_fallback_parent = ceed; fallback_ceed->Error = ceed->Error; @@ -552,6 +551,11 @@ int CeedSetOperatorFallbackResource(Ceed ceed, const char *resource) { // Set new ierr = CeedStringAllocCopy(resource, (char **)&ceed->op_fallback_resource); CeedChk(ierr); + + // Check validity + ceed->has_valid_op_fallback_resource = strlen(ceed->op_fallback_resource) > 0 && + strcmp(ceed->op_fallback_resource, ceed->resource); + return CEED_ERROR_SUCCESS; } @@ -945,9 +949,6 @@ int CeedInit(const char *resource, Ceed *ceed) { (*ceed)->is_debug = !!getenv("CEED_DEBUG") || !!getenv("DEBUG") || !!getenv("DBG"); - // Backend specific setup - ierr = backends[match_index].init(&resource[match_help], *ceed); CeedChk(ierr); - // Copy resource prefix, if backend setup successful ierr = CeedStringAllocCopy(backends[match_index].prefix, (char **)&(*ceed)->resource); @@ -959,6 +960,9 @@ int CeedInit(const char *resource, Ceed *ceed) { ierr = CeedAddJitSourceRoot(*ceed, (char *)CeedJitSourceRootDefault); CeedChk(ierr); + // Backend specific setup + ierr = backends[match_index].init(&resource[match_help], *ceed); CeedChk(ierr); + return CEED_ERROR_SUCCESS; } From 6aa95790dc5b28a9e235efa2aa8ff94fc141b832 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 6 Jun 2022 14:19:39 -0600 Subject: [PATCH 027/172] pc - fix fallback for composite assembly --- backends/cuda-gen/ceed-cuda-gen-operator.c | 1 + backends/cuda-gen/ceed-cuda-gen.h | 2 - backends/cuda-ref/ceed-cuda-ref-operator.c | 77 ++--------------- backends/cuda-ref/ceed-cuda-ref.c | 2 - backends/cuda-ref/ceed-cuda-ref.h | 1 - backends/hip-gen/ceed-hip-gen.h | 2 - backends/hip-ref/ceed-hip-ref-operator.c | 96 ++-------------------- backends/hip-ref/ceed-hip-ref.c | 2 - backends/hip-ref/ceed-hip-ref.h | 1 - interface/ceed-preconditioning.c | 9 +- 10 files changed, 24 insertions(+), 169 deletions(-) diff --git a/backends/cuda-gen/ceed-cuda-gen-operator.c b/backends/cuda-gen/ceed-cuda-gen-operator.c index 255cbbfc00..de44881a67 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator.c +++ b/backends/cuda-gen/ceed-cuda-gen-operator.c @@ -265,4 +265,5 @@ int CeedOperatorCreate_Cuda_gen(CeedOperator op) { CeedOperatorDestroy_Cuda_gen); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } + //------------------------------------------------------------------------------ diff --git a/backends/cuda-gen/ceed-cuda-gen.h b/backends/cuda-gen/ceed-cuda-gen.h index 1cfef466f9..586be9800e 100644 --- a/backends/cuda-gen/ceed-cuda-gen.h +++ b/backends/cuda-gen/ceed-cuda-gen.h @@ -39,6 +39,4 @@ CEED_INTERN int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf); CEED_INTERN int CeedOperatorCreate_Cuda_gen(CeedOperator op); -CEED_INTERN int CeedCompositeOperatorCreate_Cuda_gen(CeedOperator op); - #endif // _ceed_cuda_gen_h diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index f7de5144a5..c14c7b2c4f 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -1066,38 +1066,14 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, return CEED_ERROR_SUCCESS; } -//------------------------------------------------------------------------------ -// Assemble composite diagonal common code -//------------------------------------------------------------------------------ -static inline int CeedOperatorLinearAssembleAddDiagonalCompositeCore_Cuda( - CeedOperator op, CeedVector assembled, CeedRequest *request, - const bool pointBlock) { - int ierr; - CeedInt numSub; - CeedOperator *subOperators; - ierr = CeedOperatorGetNumSub(op, &numSub); CeedChkBackend(ierr); - ierr = CeedOperatorGetSubList(op, &subOperators); CeedChkBackend(ierr); - for (CeedInt i = 0; i < numSub; i++) { - ierr = CeedOperatorAssembleDiagonalCore_Cuda(subOperators[i], assembled, - request, pointBlock); CeedChkBackend(ierr); - } - return CEED_ERROR_SUCCESS; -} - //------------------------------------------------------------------------------ // Assemble Linear Diagonal //------------------------------------------------------------------------------ static int CeedOperatorLinearAssembleAddDiagonal_Cuda(CeedOperator op, CeedVector assembled, CeedRequest *request) { - int ierr; - bool isComposite; - ierr = CeedOperatorIsComposite(op, &isComposite); CeedChkBackend(ierr); - if (isComposite) { - return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Cuda(op, assembled, - request, false); - } else { - return CeedOperatorAssembleDiagonalCore_Cuda(op, assembled, request, false); - } + int ierr = CeedOperatorAssembleDiagonalCore_Cuda(op, assembled, request, false); + CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ @@ -1105,15 +1081,9 @@ static int CeedOperatorLinearAssembleAddDiagonal_Cuda(CeedOperator op, //------------------------------------------------------------------------------ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda(CeedOperator op, CeedVector assembled, CeedRequest *request) { - int ierr; - bool isComposite; - ierr = CeedOperatorIsComposite(op, &isComposite); CeedChkBackend(ierr); - if (isComposite) { - return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Cuda(op, assembled, - request, true); - } else { - return CeedOperatorAssembleDiagonalCore_Cuda(op, assembled, request, true); - } + int ierr = CeedOperatorAssembleDiagonalCore_Cuda(op, assembled, request, true); + CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ @@ -1397,39 +1367,8 @@ static int CeedSingleOperatorAssemble_Cuda(CeedOperator op, CeedInt offset, // TODO: allow multiple active input restrictions/basis objects //------------------------------------------------------------------------------ int CeedOperatorLinearAssemble_Cuda(CeedOperator op, CeedVector values) { - - // As done in the default implementation, loop through suboperators - // for composite operators, or call single operator assembly otherwise - bool is_composite; - CeedInt ierr; - ierr = CeedOperatorIsComposite(op, &is_composite); CeedChkBackend(ierr); - - CeedElemRestriction rstr; - CeedInt num_elem, elem_size, num_comp; - - CeedInt offset = 0; - if (is_composite) { - CeedInt num_suboperators; - ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChkBackend(ierr); - CeedOperator *sub_operators; - ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChkBackend(ierr); - for (int k = 0; k < num_suboperators; ++k) { - ierr = CeedSingleOperatorAssemble_Cuda(sub_operators[k], offset, values); - CeedChkBackend(ierr); - ierr = CeedOperatorGetActiveElemRestriction(sub_operators[k], &rstr); - CeedChkBackend(ierr); - ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChkBackend(ierr); - ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); - CeedChkBackend(ierr); - ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); - CeedChkBackend(ierr); - offset += elem_size*num_comp * elem_size*num_comp * num_elem; - } - } else { - ierr = CeedSingleOperatorAssemble_Cuda(op, offset, values); - CeedChkBackend(ierr); - } - + int ierr = CeedSingleOperatorAssemble_Cuda(op, 0, values); + CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ diff --git a/backends/cuda-ref/ceed-cuda-ref.c b/backends/cuda-ref/ceed-cuda-ref.c index ace2d8b91d..ff08393d22 100644 --- a/backends/cuda-ref/ceed-cuda-ref.c +++ b/backends/cuda-ref/ceed-cuda-ref.c @@ -74,8 +74,6 @@ static int CeedInit_Cuda(const char *resource, Ceed ceed) { CeedQFunctionContextCreate_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate", CeedOperatorCreate_Cuda); CeedChkBackend(ierr); - ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "CompositeOperatorCreate", - CeedCompositeOperatorCreate_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", CeedDestroy_Cuda); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; diff --git a/backends/cuda-ref/ceed-cuda-ref.h b/backends/cuda-ref/ceed-cuda-ref.h index 5dbafe2b01..0d37511a40 100644 --- a/backends/cuda-ref/ceed-cuda-ref.h +++ b/backends/cuda-ref/ceed-cuda-ref.h @@ -150,5 +150,4 @@ CEED_INTERN int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx); CEED_INTERN int CeedOperatorCreate_Cuda(CeedOperator op); -CEED_INTERN int CeedCompositeOperatorCreate_Cuda(CeedOperator op); #endif diff --git a/backends/hip-gen/ceed-hip-gen.h b/backends/hip-gen/ceed-hip-gen.h index c6c70e885e..432aef3a2a 100644 --- a/backends/hip-gen/ceed-hip-gen.h +++ b/backends/hip-gen/ceed-hip-gen.h @@ -39,6 +39,4 @@ CEED_INTERN int CeedQFunctionCreate_Hip_gen(CeedQFunction qf); CEED_INTERN int CeedOperatorCreate_Hip_gen(CeedOperator op); -CEED_INTERN int CeedCompositeOperatorCreate_Hip_gen(CeedOperator op); - #endif // _ceed_hip_gen_h diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 2e323d9088..5bbcf6a206 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -1064,38 +1064,14 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, return CEED_ERROR_SUCCESS; } -//------------------------------------------------------------------------------ -// Assemble composite diagonal common code -//------------------------------------------------------------------------------ -static inline int CeedOperatorLinearAssembleAddDiagonalCompositeCore_Hip( - CeedOperator op, CeedVector assembled, CeedRequest *request, - const bool pointBlock) { - int ierr; - CeedInt numSub; - CeedOperator *subOperators; - ierr = CeedOperatorGetNumSub(op, &numSub); CeedChkBackend(ierr); - ierr = CeedOperatorGetSubList(op, &subOperators); CeedChkBackend(ierr); - for (CeedInt i = 0; i < numSub; i++) { - ierr = CeedOperatorAssembleDiagonalCore_Hip(subOperators[i], assembled, - request, pointBlock); CeedChkBackend(ierr); - } - return CEED_ERROR_SUCCESS; -} - //------------------------------------------------------------------------------ // Assemble Linear Diagonal //------------------------------------------------------------------------------ static int CeedOperatorLinearAssembleAddDiagonal_Hip(CeedOperator op, CeedVector assembled, CeedRequest *request) { - int ierr; - bool isComposite; - ierr = CeedOperatorIsComposite(op, &isComposite); CeedChkBackend(ierr); - if (isComposite) { - return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Hip(op, assembled, - request, false); - } else { - return CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, false); - } + int ierr = CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, false); + CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ @@ -1103,15 +1079,9 @@ static int CeedOperatorLinearAssembleAddDiagonal_Hip(CeedOperator op, //------------------------------------------------------------------------------ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op, CeedVector assembled, CeedRequest *request) { - int ierr; - bool isComposite; - ierr = CeedOperatorIsComposite(op, &isComposite); CeedChkBackend(ierr); - if (isComposite) { - return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Hip(op, assembled, - request, true); - } else { - return CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, true); - } + int ierr = CeedOperatorAssembleDiagonalCore_Hip(op, assembled, request, true); + CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ @@ -1392,38 +1362,8 @@ static int CeedSingleOperatorAssemble_Hip(CeedOperator op, CeedInt offset, // TODO: allow multiple active input restrictions/basis objects //------------------------------------------------------------------------------ int CeedOperatorLinearAssemble_Hip(CeedOperator op, CeedVector values) { - - // As done in the default implementation, loop through suboperators - // for composite operators, or call single operator assembly otherwise - bool is_composite; - CeedInt ierr; - ierr = CeedOperatorIsComposite(op, &is_composite); CeedChkBackend(ierr); - - CeedElemRestriction rstr; - CeedInt num_elem, elem_size, num_comp; - - CeedInt offset = 0; - if (is_composite) { - CeedInt num_suboperators; - ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChkBackend(ierr); - CeedOperator *sub_operators; - ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChkBackend(ierr); - for (int k = 0; k < num_suboperators; ++k) { - ierr = CeedSingleOperatorAssemble_Hip(sub_operators[k], offset, values); - CeedChkBackend(ierr); - ierr = CeedOperatorGetActiveElemRestriction(sub_operators[k], &rstr); - CeedChkBackend(ierr); - ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChkBackend(ierr); - ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); - CeedChkBackend(ierr); - ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); - CeedChkBackend(ierr); - offset += elem_size*num_comp * elem_size*num_comp * num_elem; - } - } else { - ierr = CeedSingleOperatorAssemble_Hip(op, offset, values); CeedChkBackend(ierr); - } - + int ierr = CeedSingleOperatorAssemble_Hip(op, 0, values); + CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } @@ -1464,23 +1404,3 @@ int CeedOperatorCreate_Hip(CeedOperator op) { } //------------------------------------------------------------------------------ -// Composite Operator Create -//------------------------------------------------------------------------------ -int CeedCompositeOperatorCreate_Hip(CeedOperator op) { - int ierr; - Ceed ceed; - ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); - - ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", - CeedOperatorLinearAssembleAddDiagonal_Hip); - CeedChkBackend(ierr); - ierr = CeedSetBackendFunction(ceed, "Operator", op, - "LinearAssembleAddPointBlockDiagonal", - CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip); - CeedChkBackend(ierr); - ierr = CeedSetBackendFunction(ceed, "Operator", op, - "LinearAssemble", CeedOperatorLinearAssemble_Hip); - CeedChkBackend(ierr); - return CEED_ERROR_SUCCESS; -} -//------------------------------------------------------------------------------ diff --git a/backends/hip-ref/ceed-hip-ref.c b/backends/hip-ref/ceed-hip-ref.c index d32d556b9a..2b7fe7c874 100644 --- a/backends/hip-ref/ceed-hip-ref.c +++ b/backends/hip-ref/ceed-hip-ref.c @@ -72,8 +72,6 @@ static int CeedInit_Hip(const char *resource, Ceed ceed) { CeedQFunctionContextCreate_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate", CeedOperatorCreate_Hip); CeedChkBackend(ierr); - ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "CompositeOperatorCreate", - CeedCompositeOperatorCreate_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", CeedDestroy_Hip); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; diff --git a/backends/hip-ref/ceed-hip-ref.h b/backends/hip-ref/ceed-hip-ref.h index eb54806aee..a925acde42 100644 --- a/backends/hip-ref/ceed-hip-ref.h +++ b/backends/hip-ref/ceed-hip-ref.h @@ -152,5 +152,4 @@ CEED_INTERN int CeedQFunctionContextCreate_Hip(CeedQFunctionContext ctx); CEED_INTERN int CeedOperatorCreate_Hip(CeedOperator op); -CEED_INTERN int CeedCompositeOperatorCreate_Hip(CeedOperator op); #endif diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 83bb1414d7..43763a9ba4 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -534,8 +534,13 @@ static inline int CeedCompositeOperatorLinearAssembleAddDiagonal( ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr); ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr); for (CeedInt i = 0; i < num_sub; i++) { - ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request, - is_pointblock, assembled); CeedChk(ierr); + if (is_pointblock) { + ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], + assembled, request); CeedChk(ierr); + } else { + ierr = CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, + request); CeedChk(ierr); + } } return CEED_ERROR_SUCCESS; } From cefa2673c9b7c6fb97d32b63fbbfb5d237e3c796 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 6 Jun 2022 15:53:01 -0600 Subject: [PATCH 028/172] pc - fix GPU single operator fallback --- backends/cuda-ref/ceed-cuda-ref-operator.c | 44 ++++------------------ backends/hip-ref/ceed-hip-ref-operator.c | 25 ++++-------- include/ceed-impl.h | 1 + interface/ceed-preconditioning.c | 18 ++++++++- interface/ceed.c | 1 + 5 files changed, 35 insertions(+), 54 deletions(-) diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index c14c7b2c4f..811d754052 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -1300,7 +1300,13 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op) { } //------------------------------------------------------------------------------ -// Single operator assembly +// Assemble matrix data for COO matrix of assembled operator. +// The sparsity pattern is set by CeedOperatorLinearAssembleSymbolic. +// +// Note that this (and other assembly routines) currently assume only one +// active input restriction/basis per operator (could have multiple basis eval +// modes). +// TODO: allow multiple active input restrictions/basis objects //------------------------------------------------------------------------------ static int CeedSingleOperatorAssemble_Cuda(CeedOperator op, CeedInt offset, CeedVector values) { @@ -1357,20 +1363,6 @@ static int CeedSingleOperatorAssemble_Cuda(CeedOperator op, CeedInt offset, return CEED_ERROR_SUCCESS; } -//------------------------------------------------------------------------------ -// Assemble matrix data for COO matrix of assembled operator. -// The sparsity pattern is set by CeedOperatorLinearAssembleSymbolic. -// -// Note that this (and other assembly routines) currently assume only one -// active input restriction/basis per operator (could have multiple basis eval -// modes). -// TODO: allow multiple active input restrictions/basis objects -//------------------------------------------------------------------------------ -int CeedOperatorLinearAssemble_Cuda(CeedOperator op, CeedVector values) { - int ierr = CeedSingleOperatorAssemble_Cuda(op, 0, values); - CeedChkBackend(ierr); - return CEED_ERROR_SUCCESS; -} //------------------------------------------------------------------------------ // Create operator //------------------------------------------------------------------------------ @@ -1398,7 +1390,7 @@ int CeedOperatorCreate_Cuda(CeedOperator op) { CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, - "LinearAssemble", CeedOperatorLinearAssemble_Cuda); + "LinearAssembleSingle", CeedSingleOperatorAssemble_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Cuda); CeedChkBackend(ierr); @@ -1408,23 +1400,3 @@ int CeedOperatorCreate_Cuda(CeedOperator op) { } //------------------------------------------------------------------------------ -// Composite Operator Create -//------------------------------------------------------------------------------ -int CeedCompositeOperatorCreate_Cuda(CeedOperator op) { - int ierr; - Ceed ceed; - ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); - - ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", - CeedOperatorLinearAssembleAddDiagonal_Cuda); - CeedChkBackend(ierr); - ierr = CeedSetBackendFunction(ceed, "Operator", op, - "LinearAssembleAddPointBlockDiagonal", - CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda); - CeedChkBackend(ierr); - ierr = CeedSetBackendFunction(ceed, "Operator", op, - "LinearAssemble", CeedOperatorLinearAssemble_Cuda); - CeedChkBackend(ierr); - return CEED_ERROR_SUCCESS; -} -//------------------------------------------------------------------------------ diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 5bbcf6a206..52b9df64b5 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -1295,7 +1295,13 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op) { } //------------------------------------------------------------------------------ -// Single operator assembly +// Assemble matrix data for COO matrix of assembled operator. +// The sparsity pattern is set by CeedOperatorLinearAssembleSymbolic. +// +// Note that this (and other assembly routines) currently assume only one +// active input restriction/basis per operator (could have multiple basis eval +// modes). +// TODO: allow multiple active input restrictions/basis objects //------------------------------------------------------------------------------ static int CeedSingleOperatorAssemble_Hip(CeedOperator op, CeedInt offset, CeedVector values) { @@ -1352,21 +1358,6 @@ static int CeedSingleOperatorAssemble_Hip(CeedOperator op, CeedInt offset, return CEED_ERROR_SUCCESS; } -//------------------------------------------------------------------------------ -// Assemble matrix data for COO matrix of assembled operator. -// The sparsity pattern is set by CeedOperatorLinearAssembleSymbolic. -// -// Note that this (and other assembly routines) currently assume only one -// active input restriction/basis per operator (could have multiple basis eval -// modes). -// TODO: allow multiple active input restrictions/basis objects -//------------------------------------------------------------------------------ -int CeedOperatorLinearAssemble_Hip(CeedOperator op, CeedVector values) { - int ierr = CeedSingleOperatorAssemble_Hip(op, 0, values); - CeedChkBackend(ierr); - return CEED_ERROR_SUCCESS; -} - //------------------------------------------------------------------------------ // Create operator //------------------------------------------------------------------------------ @@ -1394,7 +1385,7 @@ int CeedOperatorCreate_Hip(CeedOperator op) { CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, - "LinearAssemble", CeedOperatorLinearAssemble_Hip); + "LinearAssembleSingle", CeedSingleOperatorAssemble_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Hip); CeedChkBackend(ierr); diff --git a/include/ceed-impl.h b/include/ceed-impl.h index 9e85b2897a..b7383c3c6b 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -356,6 +356,7 @@ struct CeedOperator_private { int (*LinearAssembleSymbolic)(CeedOperator, CeedSize *, CeedInt **, CeedInt **); int (*LinearAssemble)(CeedOperator, CeedVector); + int (*LinearAssembleSingle)(CeedOperator, CeedInt, CeedVector); int (*CreateFDMElementInverse)(CeedOperator, CeedOperator *, CeedRequest *); int (*Apply)(CeedOperator, CeedVector, CeedVector, CeedRequest *); int (*ApplyComposite)(CeedOperator, CeedVector, CeedVector, CeedRequest *); diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 2a396c4ee5..d1a1b9f7c4 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -659,6 +659,22 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, "Composite operator not supported"); // LCOV_EXCL_STOP + if (op->LinearAssembleSingle) { + // Backend version + ierr = op->LinearAssembleSingle(op, offset, values); CeedChk(ierr); + return CEED_ERROR_SUCCESS; + } else { + // Operator fallback + CeedOperator op_fallback; + + ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr); + if (op_fallback) { + ierr = CeedSingleOperatorAssemble(op_fallback, offset, values); + CeedChk(ierr); + return CEED_ERROR_SUCCESS; + } + } + // Assemble QFunction CeedQFunction qf; ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); @@ -1895,7 +1911,7 @@ int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) { if (is_composite) { ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr); ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr); - for (CeedInt k = 0; k < num_suboperators; ++k) { + for (CeedInt k = 0; k < num_suboperators; k++) { ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values); CeedChk(ierr); ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k], diff --git a/interface/ceed.c b/interface/ceed.c index 5a6e7a19fb..a25e8fedd4 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -927,6 +927,7 @@ int CeedInit(const char *resource, Ceed *ceed) { CEED_FTABLE_ENTRY(CeedOperator, LinearAssembleAddPointBlockDiagonal), CEED_FTABLE_ENTRY(CeedOperator, LinearAssembleSymbolic), CEED_FTABLE_ENTRY(CeedOperator, LinearAssemble), + CEED_FTABLE_ENTRY(CeedOperator, LinearAssembleSingle), CEED_FTABLE_ENTRY(CeedOperator, CreateFDMElementInverse), CEED_FTABLE_ENTRY(CeedOperator, Apply), CEED_FTABLE_ENTRY(CeedOperator, ApplyComposite), From a350620e611024d5f7e34e48d6b5f6c3a62819a4 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 6 Jun 2022 16:28:09 -0600 Subject: [PATCH 029/172] tidy - satisfy tidy that resource names are non-null --- interface/ceed.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/interface/ceed.c b/interface/ceed.c index a25e8fedd4..52c3f22c57 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -553,7 +553,8 @@ int CeedSetOperatorFallbackResource(Ceed ceed, const char *resource) { CeedChk(ierr); // Check validity - ceed->has_valid_op_fallback_resource = strlen(ceed->op_fallback_resource) > 0 && + ceed->has_valid_op_fallback_resource = ceed->op_fallback_resource && + ceed->resource && strcmp(ceed->op_fallback_resource, ceed->resource); return CEED_ERROR_SUCCESS; From f5ddeb832ac20fee55dbb10f3d727f92b2340a50 Mon Sep 17 00:00:00 2001 From: Alex Pedersen Date: Mon, 6 Jun 2022 16:53:22 -0600 Subject: [PATCH 030/172] fixed include memcpy segv error --- interface/ceed-jit-tools.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c index e994aee245..3e5af47206 100644 --- a/interface/ceed-jit-tools.c +++ b/interface/ceed-jit-tools.c @@ -122,7 +122,7 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed, // -- Check for 'include' keyword const char *next_e = strchr(first_hash, 'e'); char keyword[8] = ""; - if (next_e) + if (next_e && next_e - first_hash >= 7) memcpy(keyword, &next_e[-6], 7); bool is_hash_include = !strcmp(keyword, "include"); // ---- Spaces allowed in '# include ' From 9a3a46e1d4300a3ad2fa9b1d35445254cd604d16 Mon Sep 17 00:00:00 2001 From: Jeremy Luke Thompson Date: Tue, 7 Jun 2022 10:09:37 -0600 Subject: [PATCH 031/172] test - modify t406 to test for # bug --- tests/t406-qfunction-scales.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/t406-qfunction-scales.h b/tests/t406-qfunction-scales.h index 2b02659b08..7dc6f4cef2 100644 --- a/tests/t406-qfunction-scales.h +++ b/tests/t406-qfunction-scales.h @@ -1,3 +1,7 @@ +#ifndef _scales_h +#define _scales_h +// Testing # on first line + // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. // @@ -5,9 +9,6 @@ // // This file is part of CEED: http://github.com/ceed -#ifndef _scales_h -#define _scales_h - #define SCALE_TWO 2 #define SCALE_THREE 3 From f8c2a97dab8c6b101885ddac201767a4ff082b2a Mon Sep 17 00:00:00 2001 From: rezgarshakeri Date: Wed, 8 Jun 2022 05:54:26 -0600 Subject: [PATCH 032/172] malloc_dump is fine, need to fix valgrind --- examples/solids/elasticity.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/solids/elasticity.c b/examples/solids/elasticity.c index 71a6cb0313..f3bf31b6ec 100644 --- a/examples/solids/elasticity.c +++ b/examples/solids/elasticity.c @@ -975,6 +975,8 @@ int main(int argc, char **argv) { // -- Function list ierr = PetscFunctionListDestroy(&problem_functions->setupPhysics); CHKERRQ(ierr); + ierr = PetscFunctionListDestroy(&problem_functions->setupSmootherPhysics); + CHKERRQ(ierr); ierr = PetscFunctionListDestroy(&problem_functions->setupLibceedFineLevel); CHKERRQ(ierr); ierr = PetscFunctionListDestroy(&problem_functions->setupLibceedLevel); From 3129f025463db8742ca74aaf3f49dea3f6c730bb Mon Sep 17 00:00:00 2001 From: rezgarshakeri Date: Wed, 8 Jun 2022 06:57:48 -0600 Subject: [PATCH 033/172] ceed-precoditioning.c: Added a ternary for fine operator name --- interface/ceed-preconditioning.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index d1a1b9f7c4..ada03bf690 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -1105,7 +1105,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, char *prolongation_name; ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr); sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", - op_fine->name); + has_name ? op_fine->name : ""); ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr); ierr = CeedFree(&prolongation_name); CeedChk(ierr); } @@ -1113,7 +1113,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, char *restriction_name; ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr); sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", - op_fine->name); + has_name ? op_fine->name : ""); ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr); ierr = CeedFree(&restriction_name); CeedChk(ierr); } From 22d320f555d55efd74dd14085a60597eeab04ba4 Mon Sep 17 00:00:00 2001 From: Leila Ghaffari Date: Wed, 8 Jun 2022 14:07:13 -0600 Subject: [PATCH 034/172] examples/fluids: destroy ceed object --- examples/fluids/src/setuplibceed.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index fe14286b57..48e4f026f7 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -673,6 +673,7 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, } CeedElemRestrictionDestroy(&elem_restr_jd_i); + CeedOperatorDestroy(&op_ijacobian_vol); CeedVectorDestroy(&jac_data); PetscFunctionReturn(0); } From 6ef2784e04ef7b80774ebaeb780c915f92349709 Mon Sep 17 00:00:00 2001 From: Leila Ghaffari Date: Wed, 8 Jun 2022 14:33:33 -0600 Subject: [PATCH 035/172] examples/fluids: minor - style --- examples/fluids/problems/advection.c | 3 +-- examples/fluids/problems/advection2d.c | 1 + examples/fluids/problems/blasius.c | 1 - examples/fluids/problems/channel.c | 3 +-- examples/fluids/problems/eulervortex.c | 1 + examples/fluids/problems/newtonian.c | 3 +++ examples/fluids/problems/shocktube.c | 3 ++- examples/fluids/problems/stg_shur14.c | 4 +--- 8 files changed, 10 insertions(+), 9 deletions(-) diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c index 830b41fad9..58bed71b58 100644 --- a/examples/fluids/problems/advection.c +++ b/examples/fluids/problems/advection.c @@ -12,8 +12,7 @@ #include "../qfunctions/setupgeo.h" #include "../qfunctions/advection.h" -PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, - void *ctx) { +PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *ctx) { WindType wind_type; BubbleType bubble_type; BubbleContinuityType bubble_continuity_type; diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c index 44f6e3547e..a7a6a417c2 100644 --- a/examples/fluids/problems/advection2d.c +++ b/examples/fluids/problems/advection2d.c @@ -13,6 +13,7 @@ #include "../qfunctions/advection2d.h" PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx) { + WindType wind_type; StabilizationType stab; SetupContext setup_context; diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 247e560e64..dcb6587509 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -63,7 +63,6 @@ static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim, PetscReal growth, PetscInt N, PetscReal refine_height, PetscReal top_angle, PetscReal node_locs[], PetscInt num_node_locs) { - PetscInt ierr, narr, ncoords; PetscReal domain_min[3], domain_max[3], domain_size[3]; PetscScalar *arr_coords; diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c index 4441a85c57..53350370b2 100644 --- a/examples/fluids/problems/channel.c +++ b/examples/fluids/problems/channel.c @@ -11,8 +11,7 @@ #include "../navierstokes.h" #include "../qfunctions/channel.h" -PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, - void *ctx) { +PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *ctx) { PetscInt ierr; User user = *(User *)ctx; diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c index b851afb6ee..7c9748ca57 100644 --- a/examples/fluids/problems/eulervortex.c +++ b/examples/fluids/problems/eulervortex.c @@ -13,6 +13,7 @@ #include "../qfunctions/eulervortex.h" PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) { + EulerTestType euler_test; User user = *(User *)ctx; StabilizationType stab; diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 2e2ce85ad2..9679111398 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -16,6 +16,7 @@ static PetscErrorCode CheckPrimitiveWithTolerance(StatePrimitive sY, StatePrimitive aY, StatePrimitive bY, const char *name, PetscReal rtol_pressure, PetscReal rtol_velocity, PetscReal rtol_temperature) { + PetscFunctionBeginUser; StatePrimitive eY; // relative error eY.pressure = (aY.pressure - bY.pressure) / sY.pressure; @@ -35,6 +36,7 @@ static PetscErrorCode CheckPrimitiveWithTolerance(StatePrimitive sY, static PetscErrorCode UnitTests_Newtonian(User user, NewtonianIdealGasContext gas) { + Units units = user->units; const CeedScalar eps = 1e-6; const CeedScalar kg = units->kilogram, m = units->meter, sec = units->second, @@ -66,6 +68,7 @@ static PetscErrorCode UnitTests_Newtonian(User user, } PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { + SetupContext setup_context; User user = *(User *)ctx; StabilizationType stab; diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c index 0e234556a5..8c52a69483 100644 --- a/examples/fluids/problems/shocktube.c +++ b/examples/fluids/problems/shocktube.c @@ -22,6 +22,7 @@ #include "../qfunctions/shocktube.h" PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) { + SetupContext setup_context; User user = *(User *)ctx; MPI_Comm comm = PETSC_COMM_WORLD; @@ -33,7 +34,6 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) { ShockTubeContext shocktube_ctx; CeedQFunctionContext shocktube_context; - PetscFunctionBeginUser; ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr); ierr = PetscCalloc1(1, &shocktube_ctx); CHKERRQ(ierr); @@ -173,6 +173,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) { } PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, AppCtx app_ctx) { + MPI_Comm comm = PETSC_COMM_WORLD; PetscErrorCode ierr; PetscFunctionBeginUser; diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 30ad5eac35..a1a399819f 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -33,7 +33,6 @@ */ PetscErrorCode CalcCholeskyDecomp(MPI_Comm comm, PetscInt nprofs, const CeedScalar Rij[6][nprofs], CeedScalar Cij[6][nprofs]) { - PetscFunctionBeginUser; for (PetscInt i=0; i Date: Tue, 17 May 2022 15:40:41 -0600 Subject: [PATCH 036/172] test - add test for non-symmetric assembly --- tests/t564-operator.c | 13 ++- tests/t566-operator.c | 184 ++++++++++++++++++++++++++++++++++++++++++ tests/t566-operator.h | 38 +++++++++ 3 files changed, 228 insertions(+), 7 deletions(-) create mode 100644 tests/t566-operator.c create mode 100644 tests/t566-operator.h diff --git a/tests/t564-operator.c b/tests/t564-operator.c index 20f37709f8..6728712b8e 100644 --- a/tests/t564-operator.c +++ b/tests/t564-operator.c @@ -95,7 +95,7 @@ int main(int argc, char **argv) { CeedOperatorApply(op_setup, X, q_data, CEED_REQUEST_IMMEDIATE); // Fuly assemble operator - for (int k=0; k 100.*CEED_EPSILON) @@ -163,7 +163,6 @@ int main(int argc, char **argv) { CeedBasisDestroy(&basis_u); CeedBasisDestroy(&basis_x); CeedVectorDestroy(&X); - // CeedVectorDestroy(&A); CeedVectorDestroy(&q_data); CeedVectorDestroy(&U); CeedVectorDestroy(&V); diff --git a/tests/t566-operator.c b/tests/t566-operator.c new file mode 100644 index 0000000000..8bb566ab45 --- /dev/null +++ b/tests/t566-operator.c @@ -0,0 +1,184 @@ +/// @file +/// Test assembly of non-symmetric mass matrix operator (multi-component) see t537 +/// \test Test assembly of non-symmetric mass matrix operator (multi-component) +#include +#include +#include +#include "t566-operator.h" + +int main(int argc, char **argv) { + Ceed ceed; + CeedElemRestriction elem_restr_x, elem_restr_u, + elem_restr_qd_i; + CeedBasis basis_x, basis_u; + CeedQFunction qf_setup, qf_mass; + CeedOperator op_setup, op_mass; + CeedVector q_data, X, U, V; + CeedInt P = 3, Q = 3, dim = 2, num_comp = 2; + CeedInt n_x = 1, n_y = 1; + CeedInt num_elem = n_x * n_y; + CeedInt num_dofs = (n_x*2+1)*(n_y*2+1), num_qpts = num_elem*Q*Q; + CeedInt ind_x[num_elem*P*P]; + CeedScalar assembled[num_comp*num_comp*num_dofs*num_dofs]; + CeedScalar x[dim*num_dofs], assembled_true[num_comp*num_comp*num_dofs*num_dofs]; + CeedScalar *u; + const CeedScalar *v; + + CeedInit(argv[1], &ceed); + + // DoF Coordinates + for (CeedInt i=0; i 0) + u[indOld] = 0.0; + indOld = ind; + CeedVectorRestoreArray(U, &u); + + // Compute effect of DoF j + CeedOperatorApply(op_mass, U, V, CEED_REQUEST_IMMEDIATE); + + CeedVectorGetArrayRead(V, CEED_MEM_HOST, &v); + for (CeedInt k=0; k + 100.*CEED_EPSILON) + // LCOV_EXCL_START + printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", + node_in, comp_in, node_out, comp_out, + assembled_value, assembled_true_value); + // LCOV_EXCL_STOP + } + } + } + } + + // Cleanup + free(rows); + free(cols); + CeedVectorDestroy(&values); + CeedQFunctionDestroy(&qf_setup); + CeedQFunctionDestroy(&qf_mass); + CeedOperatorDestroy(&op_setup); + CeedOperatorDestroy(&op_mass); + CeedElemRestrictionDestroy(&elem_restr_u); + CeedElemRestrictionDestroy(&elem_restr_x); + CeedElemRestrictionDestroy(&elem_restr_qd_i); + CeedBasisDestroy(&basis_u); + CeedBasisDestroy(&basis_x); + CeedVectorDestroy(&X); + CeedVectorDestroy(&q_data); + CeedVectorDestroy(&U); + CeedVectorDestroy(&V); + CeedDestroy(&ceed); + return 0; +} diff --git a/tests/t566-operator.h b/tests/t566-operator.h new file mode 100644 index 0000000000..01ec35232a --- /dev/null +++ b/tests/t566-operator.h @@ -0,0 +1,38 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + const CeedScalar *weight = in[0], *J = in[1]; + CeedScalar *rho = out[0]; + for (CeedInt i=0; i Date: Wed, 18 May 2022 10:44:20 -0600 Subject: [PATCH 037/172] Update tests/t566-operator.c Co-authored-by: Jed Brown --- tests/t566-operator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/t566-operator.c b/tests/t566-operator.c index 8bb566ab45..13b0ff2aba 100644 --- a/tests/t566-operator.c +++ b/tests/t566-operator.c @@ -17,7 +17,7 @@ int main(int argc, char **argv) { CeedInt P = 3, Q = 3, dim = 2, num_comp = 2; CeedInt n_x = 1, n_y = 1; CeedInt num_elem = n_x * n_y; - CeedInt num_dofs = (n_x*2+1)*(n_y*2+1), num_qpts = num_elem*Q*Q; + CeedInt num_dofs = (n_x*(P-1)+1)*(n_y*(P-1)+1), num_qpts = num_elem*Q*Q; CeedInt ind_x[num_elem*P*P]; CeedScalar assembled[num_comp*num_comp*num_dofs*num_dofs]; CeedScalar x[dim*num_dofs], assembled_true[num_comp*num_comp*num_dofs*num_dofs]; From cbe927a405cb9c9193183554146a997f988efdb8 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Wed, 18 May 2022 14:02:42 -0600 Subject: [PATCH 038/172] t566-operator: fix some indexing --- tests/t566-operator.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/t566-operator.c b/tests/t566-operator.c index 13b0ff2aba..897d29baed 100644 --- a/tests/t566-operator.c +++ b/tests/t566-operator.c @@ -27,10 +27,10 @@ int main(int argc, char **argv) { CeedInit(argv[1], &ceed); // DoF Coordinates - for (CeedInt i=0; i Date: Wed, 18 May 2022 14:11:03 -0600 Subject: [PATCH 039/172] t566-operator: Use row-major indexing for both matrices --- tests/t566-operator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/t566-operator.c b/tests/t566-operator.c index 897d29baed..e21d07e953 100644 --- a/tests/t566-operator.c +++ b/tests/t566-operator.c @@ -135,7 +135,7 @@ int main(int argc, char **argv) { CeedVectorGetArrayRead(V, CEED_MEM_HOST, &v); for (CeedInt k=0; k 100.*CEED_EPSILON) // LCOV_EXCL_START printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", - node_in, comp_in, node_out, comp_out, + node_out, comp_out, node_in, comp_in, assembled_value, assembled_true_value); // LCOV_EXCL_STOP } From 093bb6dbb14840a3899225a6dc829b8572f27660 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 18 May 2022 14:35:16 -0600 Subject: [PATCH 040/172] style - add NOPAD for t566 --- tests/t566-operator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/t566-operator.c b/tests/t566-operator.c index e21d07e953..a30c5d7795 100644 --- a/tests/t566-operator.c +++ b/tests/t566-operator.c @@ -55,7 +55,7 @@ int main(int argc, char **argv) { CeedElemRestrictionCreate(ceed, num_elem, P*P, num_comp, num_dofs, num_comp*num_dofs, CEED_MEM_HOST, CEED_USE_POINTER, ind_x, &elem_restr_u); - CeedInt strides_qd[3] = {1, Q*Q*num_elem, Q*Q}; + CeedInt strides_qd[3] = {1, Q*Q*num_elem, Q*Q}; /* *NOPAD* */ CeedElemRestrictionCreateStrided(ceed, num_elem, Q*Q, 1, num_qpts, strides_qd, &elem_restr_qd_i); From 96461910ed4bdb415adf6fb966585f810826484d Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 18 May 2022 15:00:44 -0600 Subject: [PATCH 041/172] test - add t567, assembly of non-symmetric multi-component Poisson --- tests/t567-operator.c | 184 ++++++++++++++++++++++++++++++++++++++++++ tests/t567-operator.h | 73 +++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 tests/t567-operator.c create mode 100644 tests/t567-operator.h diff --git a/tests/t567-operator.c b/tests/t567-operator.c new file mode 100644 index 0000000000..7f8ea402fc --- /dev/null +++ b/tests/t567-operator.c @@ -0,0 +1,184 @@ +/// @file +/// Test assembly of non-symmetric Poisson operator (multi-component) +/// \test Test assembly of non-symmetric Poisson operator (multi-component) +#include +#include +#include +#include "t567-operator.h" + +int main(int argc, char **argv) { + Ceed ceed; + CeedElemRestriction elem_restr_x, elem_restr_u, + elem_restr_qd_i; + CeedBasis basis_x, basis_u; + CeedQFunction qf_setup, qf_diff; + CeedOperator op_setup, op_diff; + CeedVector q_data, X, U, V; + CeedInt P = 3, Q = 3, dim = 2, num_comp = 2; + CeedInt n_x = 1, n_y = 1; + CeedInt num_elem = n_x * n_y; + CeedInt num_dofs = (n_x*(P-1)+1)*(n_y*(P-1)+1), num_qpts = num_elem*Q*Q; + CeedInt ind_x[num_elem*P*P]; + CeedScalar assembled[num_comp*num_comp*num_dofs*num_dofs]; + CeedScalar x[dim*num_dofs], assembled_true[num_comp*num_comp*num_dofs*num_dofs]; + CeedScalar *u; + const CeedScalar *v; + + CeedInit(argv[1], &ceed); + + // DoF Coordinates + for (CeedInt i=0; i 0) + u[indOld] = 0.0; + indOld = ind; + CeedVectorRestoreArray(U, &u); + + // Compute effect of DoF j + CeedOperatorApply(op_diff, U, V, CEED_REQUEST_IMMEDIATE); + + CeedVectorGetArrayRead(V, CEED_MEM_HOST, &v); + for (CeedInt k=0; k + 100.*CEED_EPSILON) + // LCOV_EXCL_START + printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", + node_out, comp_out, node_in, comp_in, + assembled_value, assembled_true_value); + // LCOV_EXCL_STOP + } + } + } + } + + // Cleanup + free(rows); + free(cols); + CeedVectorDestroy(&values); + CeedQFunctionDestroy(&qf_setup); + CeedQFunctionDestroy(&qf_diff); + CeedOperatorDestroy(&op_setup); + CeedOperatorDestroy(&op_diff); + CeedElemRestrictionDestroy(&elem_restr_u); + CeedElemRestrictionDestroy(&elem_restr_x); + CeedElemRestrictionDestroy(&elem_restr_qd_i); + CeedBasisDestroy(&basis_u); + CeedBasisDestroy(&basis_x); + CeedVectorDestroy(&X); + CeedVectorDestroy(&q_data); + CeedVectorDestroy(&U); + CeedVectorDestroy(&V); + CeedDestroy(&ceed); + return 0; +} diff --git a/tests/t567-operator.h b/tests/t567-operator.h new file mode 100644 index 0000000000..523841efa6 --- /dev/null +++ b/tests/t567-operator.h @@ -0,0 +1,73 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + const CeedScalar *w = in[0], + (*J)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1]; + CeedScalar (*q_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // *INDENT-ON* + + // Quadrature point loop + CeedPragmaSIMD + for (CeedInt i=0; i Date: Wed, 1 Jun 2022 10:53:39 -0600 Subject: [PATCH 042/172] test - update t566/567 indexing --- tests/t566-operator.h | 16 ++++++++++------ tests/t567-operator.h | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/tests/t566-operator.h b/tests/t566-operator.h index 01ec35232a..d8d07a9d53 100644 --- a/tests/t566-operator.h +++ b/tests/t566-operator.h @@ -18,19 +18,23 @@ CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, CEED_QFUNCTION(mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { - const CeedScalar *rho = in[0], *u = in[1]; - CeedScalar *v = out[0]; + // *INDENT-OFF* + const CeedScalar (*q_data) = (const CeedScalar(*))in[0], + (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // *INDENT-ON* + const CeedScalar num_comp = 2; const CeedScalar scale[2][2] = { {1.0, 2.0}, {3.0, 4.0}, }; for (CeedInt i = 0; i < Q; i++) { - for (CeedInt c_out = 0; c_out < 2; c_out++) { - v[i+Q*c_out] = 0.0; - for (CeedInt c_in = 0; c_in < 2; c_in++) { - v[i+Q*c_out] += rho[i] * u[i+Q*c_in] * scale[c_in][c_out]; + for (CeedInt c_out = 0; c_out < num_comp; c_out++) { + v[c_out][i] = 0.0; + for (CeedInt c_in = 0; c_in < num_comp; c_in++) { + v[c_out][i] += q_data[i] * u[c_in][i] * scale[c_in][c_out]; } } } diff --git a/tests/t567-operator.h b/tests/t567-operator.h index 523841efa6..a83f391405 100644 --- a/tests/t567-operator.h +++ b/tests/t567-operator.h @@ -36,12 +36,13 @@ CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, CEED_QFUNCTION(diff)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* - const CeedScalar (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*ug)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; - CeedScalar (*vg)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + const CeedScalar (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*ug)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1]; + CeedScalar (*vg)[2][CEED_Q_VLA] = (CeedScalar(*)[2][CEED_Q_VLA])out[0]; // *INDENT-ON* const CeedInt dim = 2; + const CeedScalar num_comp = 2; const CeedScalar scale[2][2] = { {1.0, 2.0}, {3.0, 4.0}, @@ -64,9 +65,12 @@ CEED_QFUNCTION(diff)(void *ctx, const CeedInt Q, const CeedScalar *const *in, // Apply Poisson operator // j = direction of vg - for (CeedInt j=0; j Date: Fri, 10 Jun 2022 17:10:53 -0600 Subject: [PATCH 043/172] ref - handle num_elem=0 case for qf assembly --- backends/ref/ceed-ref-operator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/ref/ceed-ref-operator.c b/backends/ref/ceed-ref-operator.c index 8086020716..3525ce7d75 100644 --- a/backends/ref/ceed-ref-operator.c +++ b/backends/ref/ceed-ref-operator.c @@ -671,7 +671,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); CeedChkBackend(ierr); // Check if active output - if (vec == CEED_VECTOR_ACTIVE) { + if (vec == CEED_VECTOR_ACTIVE && num_elem > 0) { CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL); CeedChkBackend(ierr); } From 52b3e6a738faffda5b45f08b3021a8d767a770af Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Fri, 10 Jun 2022 23:38:33 -0600 Subject: [PATCH 044/172] Fix assembly for operators with size zero This occurs in parallel when some processes have zero elements in a subdomain or part of the boundary. --- backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c | 2 +- interface/ceed-elemrestriction.c | 4 +++- interface/ceed-preconditioning.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c b/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c index 0bb14e0089..271666ebca 100644 --- a/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c +++ b/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c @@ -119,7 +119,7 @@ static inline int CeedQFunctionContextHasValidData_Cuda( CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - *has_valid_data = !!impl->h_data || !!impl->d_data; + *has_valid_data = impl && (!!impl->h_data || !!impl->d_data); return CEED_ERROR_SUCCESS; } diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c index d20930de5d..3e2c3b0e78 100644 --- a/interface/ceed-elemrestriction.c +++ b/interface/ceed-elemrestriction.c @@ -863,7 +863,9 @@ int CeedElemRestrictionApply(CeedElemRestriction rstr, CeedTransposeMode t_mode, "Output vector size %d not compatible with " "element restriction (%d, %d)", ru->length, m, n); // LCOV_EXCL_STOP - ierr = rstr->Apply(rstr, t_mode, u, ru, request); CeedChk(ierr); + if (rstr->num_elem > 0) { + ierr = rstr->Apply(rstr, t_mode, u, ru, request); CeedChk(ierr); + } return CEED_ERROR_SUCCESS; } diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index ada03bf690..aec35492c7 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -642,7 +642,7 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, Users should generally use CeedOperatorLinearAssemble() @param[in] op CeedOperator to assemble - @param[out] offset Offest for number of entries + @param[in] offset Offest for number of entries @param[out] values Values to assemble into matrix @return An error code: 0 - success, otherwise - failure @@ -658,6 +658,7 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported"); // LCOV_EXCL_STOP + if (op->num_elem == 0) return CEED_ERROR_SUCCESS; if (op->LinearAssembleSingle) { // Backend version From 1f401ad2c3fc99c5f846a3f1e8046086243879e6 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 16 Jun 2022 09:43:44 -0600 Subject: [PATCH 045/172] ci - update Python testing to avoid distutils --- python/tests/setup-qfunctions.py | 2 +- python/tests/test-4-qfunction.py | 2 +- python/tests/test-5-operator.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tests/setup-qfunctions.py b/python/tests/setup-qfunctions.py index eb60a12c03..ba51062e31 100644 --- a/python/tests/setup-qfunctions.py +++ b/python/tests/setup-qfunctions.py @@ -15,7 +15,7 @@ # testbed platforms, in support of the nation's exascale computing imperative. import os -from distutils.core import setup, Extension +from setuptools import setup, Extension import libceed CEED_DIR = os.path.dirname(libceed.__file__) diff --git a/python/tests/test-4-qfunction.py b/python/tests/test-4-qfunction.py index 0d041cf710..ab9e02d596 100644 --- a/python/tests/test-4-qfunction.py +++ b/python/tests/test-4-qfunction.py @@ -21,7 +21,7 @@ def load_qfs_so(): - from distutils.sysconfig import get_config_var + from sysconfig import get_config_var import ctypes file_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/python/tests/test-5-operator.py b/python/tests/test-5-operator.py index 78b455779d..57e52e5246 100644 --- a/python/tests/test-5-operator.py +++ b/python/tests/test-5-operator.py @@ -22,7 +22,7 @@ def load_qfs_so(): - from distutils.sysconfig import get_config_var + from sysconfig import get_config_var import ctypes file_dir = os.path.dirname(os.path.abspath(__file__)) From a3cbccd6a794f16dfcfd76060323bf6b3295566d Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 16 Jun 2022 16:07:55 -0600 Subject: [PATCH 046/172] fluids - trim some extra zeroing --- examples/fluids/src/setupts.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index a639873745..d4b8d492c8 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -108,7 +108,6 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) { ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, NULL, NULL, NULL); CHKERRQ(ierr); - ierr = VecZeroEntries(G_loc); CHKERRQ(ierr); // Place PETSc vectors in CEED vectors ierr = VecGetArrayReadAndMemType(Q_loc, (const PetscScalar **)&q, &q_mem_type); @@ -177,7 +176,6 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G, ierr = VecZeroEntries(Q_dot_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q_dot, INSERT_VALUES, Q_dot_loc); CHKERRQ(ierr); - ierr = VecZeroEntries(G_loc); CHKERRQ(ierr); // Place PETSc vectors in CEED vectors ierr = VecGetArrayReadAndMemType(Q_loc, &q, &q_mem_type); CHKERRQ(ierr); @@ -230,7 +228,6 @@ static PetscErrorCode MatMult_NS_IJacobian(Mat J, Vec Q, Vec G) { // Global-to-local ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); - ierr = VecZeroEntries(G_loc); CHKERRQ(ierr); // Place PETSc vectors in CEED vectors ierr = VecGetArrayReadAndMemType(Q_loc, &q, &q_mem_type); CHKERRQ(ierr); From c9a77dd1d1716ee9efa9576a12592b10ba4dba50 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 9 Jun 2022 09:02:17 -0600 Subject: [PATCH 047/172] test - add test for assembly w/non-square D --- tests/t568-operator.c | 187 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 tests/t568-operator.c diff --git a/tests/t568-operator.c b/tests/t568-operator.c new file mode 100644 index 0000000000..d1d832c2cc --- /dev/null +++ b/tests/t568-operator.c @@ -0,0 +1,187 @@ +/// @file +/// Test assembly of Poisson operator with extra input field (non-square D) +/// \test Test assembly of Poisson operator with extra input field (non-square D) +#include +#include +#include +#include "t534-operator.h" + +int main(int argc, char **argv) { + Ceed ceed; + CeedElemRestriction elem_restr_x, elem_restr_u, + elem_restr_qd_i; + CeedBasis basis_x, basis_u; + CeedQFunction qf_setup, qf_diff; + CeedOperator op_setup, op_diff; + CeedVector q_data, X, U, V; + CeedInt P = 3, Q = 3, dim = 2, num_comp = 2; + CeedInt n_x = 1, n_y = 1; + CeedInt num_elem = n_x * n_y; + CeedInt num_dofs = (n_x*(P-1)+1)*(n_y*(P-1)+1), num_qpts = num_elem*Q*Q; + CeedInt ind_x[num_elem*P*P]; + CeedScalar assembled[num_comp*num_comp*num_dofs*num_dofs]; + CeedScalar x[dim*num_dofs], assembled_true[num_comp*num_comp*num_dofs*num_dofs]; + CeedScalar *u; + const CeedScalar *v; + + CeedInit(argv[1], &ceed); + + // DoF Coordinates + for (CeedInt i=0; i 0) + u[indOld] = 0.0; + indOld = ind; + CeedVectorRestoreArray(U, &u); + + // Compute effect of DoF j + CeedOperatorApply(op_diff, U, V, CEED_REQUEST_IMMEDIATE); + + CeedVectorGetArrayRead(V, CEED_MEM_HOST, &v); + for (CeedInt k=0; k + 100.*CEED_EPSILON) + // LCOV_EXCL_START + printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", + node_out, comp_out, node_in, comp_in, + assembled_value, assembled_true_value); + // LCOV_EXCL_STOP + } + } + } + } + + // Cleanup + free(rows); + free(cols); + CeedVectorDestroy(&values); + CeedQFunctionDestroy(&qf_setup); + CeedQFunctionDestroy(&qf_diff); + CeedOperatorDestroy(&op_setup); + CeedOperatorDestroy(&op_diff); + CeedElemRestrictionDestroy(&elem_restr_u); + CeedElemRestrictionDestroy(&elem_restr_x); + CeedElemRestrictionDestroy(&elem_restr_qd_i); + CeedBasisDestroy(&basis_u); + CeedBasisDestroy(&basis_x); + CeedVectorDestroy(&X); + CeedVectorDestroy(&q_data); + CeedVectorDestroy(&U); + CeedVectorDestroy(&V); + CeedDestroy(&ceed); + return 0; +} From ed9e99e6e9123568081837e7b4e9e23f10209a71 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 9 Jun 2022 11:48:23 -0600 Subject: [PATCH 048/172] pc - clean up LinearAssemble implementation --- include/ceed-impl.h | 9 + include/ceed/backend.h | 18 +- interface/ceed-basis.c | 18 +- interface/ceed-operator.c | 3 +- interface/ceed-preconditioning.c | 661 ++++++++++++++++++------------- 5 files changed, 417 insertions(+), 292 deletions(-) diff --git a/include/ceed-impl.h b/include/ceed-impl.h index b7383c3c6b..2773f68641 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -339,6 +339,14 @@ struct CeedQFunctionAssemblyData_private { CeedElemRestriction rstr; }; +struct CeedOperatorAssemblyData_private { + Ceed ceed; + CeedInt num_eval_mode_in, num_eval_mode_out; + CeedEvalMode *eval_mode_in, *eval_mode_out; + CeedScalar *B_in, *B_out; + CeedBasis basis_in, basis_out; +}; + struct CeedOperator_private { Ceed ceed; CeedOperator op_fallback; @@ -381,6 +389,7 @@ struct CeedOperator_private { bool is_composite; bool has_restriction; CeedQFunctionAssemblyData qf_assembled; + CeedOperatorAssemblyData op_assembled; CeedOperator *sub_operators; CeedInt num_suboperators; void *data; diff --git a/include/ceed/backend.h b/include/ceed/backend.h index a9b248bb12..596b71aebf 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -82,6 +82,10 @@ typedef struct CeedTensorContract_private *CeedTensorContract; /// @ingroup CeedOperator typedef struct CeedQFunctionAssemblyData_private *CeedQFunctionAssemblyData; +/// Handle for object handling assembled Operator data +/// @ingroup CeedOperator +typedef struct CeedOperatorAssemblyData_private *CeedOperatorAssemblyData; + /* In the next 3 functions, p has to be the address of a pointer type, i.e. p has to be a pointer to a pointer. */ CEED_INTERN int CeedMallocArray(size_t n, size_t unit, void *p); @@ -114,9 +118,6 @@ CEED_EXTERN int CeedGetObjectDelegate(Ceed ceed, Ceed *delegate, const char *obj_name); CEED_EXTERN int CeedSetObjectDelegate(Ceed ceed, Ceed delegate, const char *obj_name); -CEED_EXTERN int CeedOperatorGetActiveBasis(CeedOperator op, - CeedBasis *active_basis); -CEED_EXTERN int CeedOperatorGetActiveElemRestriction(CeedOperator op, CeedElemRestriction *active_rstr); CEED_EXTERN int CeedGetOperatorFallbackResource(Ceed ceed, const char **resource); CEED_EXTERN int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed); @@ -273,6 +274,15 @@ CEED_EXTERN int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData da CEED_EXTERN int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr); CEED_EXTERN int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data); +CEED_EXTERN int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data); +CEED_EXTERN int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in, CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out); +CEED_EXTERN int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out, const CeedScalar **B_out); +CEED_EXTERN int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data); + +CEED_EXTERN int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data); +CEED_EXTERN int CeedOperatorGetActiveBasis(CeedOperator op, + CeedBasis *active_basis); +CEED_EXTERN int CeedOperatorGetActiveElemRestriction(CeedOperator op, CeedElemRestriction *active_rstr); CEED_EXTERN int CeedOperatorGetNumArgs(CeedOperator op, CeedInt *num_args); CEED_EXTERN int CeedOperatorIsSetupDone(CeedOperator op, bool *is_setup_done); CEED_EXTERN int CeedOperatorGetQFunction(CeedOperator op, CeedQFunction *qf); @@ -285,7 +295,7 @@ CEED_EXTERN int CeedOperatorSetData(CeedOperator op, void *data); CEED_EXTERN int CeedOperatorReference(CeedOperator op); CEED_EXTERN int CeedOperatorSetSetupDone(CeedOperator op); -CEED_INTERN int CeedMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, +CEED_INTERN int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk); diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index 8c8d09879a..33d353912a 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -418,9 +418,9 @@ int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) { @ref Utility **/ -int CeedMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, - const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, - CeedInt n, CeedInt kk) { +int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, + const CeedScalar *mat_B, CeedScalar *mat_C, + CeedInt m, CeedInt n, CeedInt kk) { for (CeedInt i=0; iop_fallback); CeedChk(ierr); - // Destroy QF assembly cache + // Destroy assembly data ierr = CeedQFunctionAssemblyDataDestroy(&(*op)->qf_assembled); CeedChk(ierr); + ierr = CeedOperatorAssemblyDataDestroy(&(*op)->op_assembled); CeedChk(ierr); ierr = CeedFree(&(*op)->input_fields); CeedChk(ierr); ierr = CeedFree(&(*op)->output_fields); CeedChk(ierr); diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index aec35492c7..35eb79609b 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -226,6 +227,7 @@ static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, case CEED_EVAL_CURL: break; // Caught by QF Assembly } + assert(*basis_ptr != NULL); } /** @@ -314,101 +316,28 @@ static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, CeedScalar max_norm = 0; ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); - // Determine active input basis - CeedOperatorField *op_fields; - CeedQFunctionField *qf_fields; - ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); - ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr); - CeedInt num_eval_mode_in = 0, num_comp, dim = 1; - CeedEvalMode *eval_mode_in = NULL; - CeedBasis basis_in = NULL; - CeedElemRestriction rstr_in = NULL; - for (CeedInt i=0; i qf_value_bound) { + BTD_mat[btd_index] += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; + } } } } } - - ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size, - elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); + // form element matrix itself (for each block component) + ierr = CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, + elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr); // put element matrix in coordinate data structure - for (CeedInt i = 0; i < elem_size; ++i) { - for (CeedInt j = 0; j < elem_size; ++j) { + for (CeedInt i = 0; i < elem_size; i++) { + for (CeedInt j = 0; j < elem_size; j++) { vals[offset + count] = elem_mat[i*elem_size + j]; count++; } @@ -910,8 +721,6 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); CeedChk(ierr); ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr); - ierr = CeedFree(&eval_mode_in); CeedChk(ierr); - ierr = CeedFree(&eval_mode_out); CeedChk(ierr); return CEED_ERROR_SUCCESS; } @@ -1368,6 +1177,302 @@ int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) { return CEED_ERROR_SUCCESS; } +/** + @brief Get CeedOperatorAssemblyData + + @param[in] op CeedOperator to assemble + @param[out] data CeedQFunctionAssemblyData + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedOperatorGetOperatorAssemblyData(CeedOperator op, + CeedOperatorAssemblyData *data) { + int ierr; + + if (!op->op_assembled) { + CeedOperatorAssemblyData data; + + ierr = CeedOperatorAssemblyDataCreate(op->ceed, op, &data); CeedChk(ierr); + op->op_assembled = data; + } + *data = op->op_assembled; + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Create object holding CeedOperator assembly data + + @param[in] ceed A Ceed object where the CeedOperatorAssemblyData will be created + @param[in] op CeedOperator to be assembled + @param[out] data Address of the variable where the newly created + CeedOperatorAssemblyData will be stored + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, + CeedOperatorAssemblyData *data) { + int ierr; + + ierr = CeedCalloc(1, data); CeedChk(ierr); + (*data)->ceed = ceed; + ierr = CeedReference(ceed); CeedChk(ierr); + + // Build OperatorAssembly data + CeedQFunction qf; + CeedQFunctionField *qf_fields; + CeedOperatorField *op_fields; + CeedInt num_input_fields; + ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); + ierr = CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL); + CeedChk(ierr); + ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr); + + // Determine active input basis + CeedInt num_eval_mode_in = 0, dim = 1; + CeedEvalMode *eval_mode_in = NULL; + CeedBasis basis_in = NULL; + for (CeedInt i=0; inum_eval_mode_in = num_eval_mode_in; + (*data)->eval_mode_in = eval_mode_in; + ierr = CeedBasisReferenceCopy(basis_in, &(*data)->basis_in); CeedChk(ierr); + + // Determine active output basis + CeedInt num_output_fields; + ierr = CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields); + CeedChk(ierr); + ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr); + CeedInt num_eval_mode_out = 0; + CeedEvalMode *eval_mode_out = NULL; + CeedBasis basis_out = NULL; + for (CeedInt i=0; inum_eval_mode_out = num_eval_mode_out; + (*data)->eval_mode_out = eval_mode_out; + ierr = CeedBasisReferenceCopy(basis_out, &(*data)->basis_out); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Get CeedOperator CeedEvalModes for assembly + + @param[in] data CeedOperatorAssemblyData + @param[out] num_eval_mode_in Pointer to hold number of input CeedEvalModes, or NULL + @param[out] eval_mode_in Pointer to hold input CeedEvalModes, or NULL + @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL + @param[out] eval_mode_out Pointer to hold output CeedEvalModes, or NULL + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, + CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in, + CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) { + if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in; + if (eval_mode_in) *eval_mode_in = data->eval_mode_in; + if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out; + if (eval_mode_out) *eval_mode_out = data->eval_mode_out; + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Get CeedOperator CeedBasis data for assembly + + @param[in] data CeedOperatorAssemblyData + @param[out] basis_in Pointer to hold active input CeedBasis, or NULL + @param[out] B_in Pointer to hold assembled active input B, or NULL + @param[out] basis_out Pointer to hold active output CeedBasis, or NULL + @param[out] B_out Pointer to hold assembled active output B, or NULL + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, + CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out, + const CeedScalar **B_out) { + int ierr; + + // Assemble B_in, B_out if needed + if (B_in && !data->B_in) { + CeedInt num_qpts, elem_size; + CeedScalar *B_in, *identity = NULL; + const CeedScalar *interp_in, *grad_in; + bool has_eval_none = false; + + ierr = CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts); + CeedChk(ierr); + ierr = CeedBasisGetNumNodes(data->basis_in, &elem_size); CeedChk(ierr); + ierr = CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in); + CeedChk(ierr); + + for (CeedInt i = 0; i < data->num_eval_mode_in; i++) { + has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE); + } + if (has_eval_none) { + ierr = CeedCalloc(num_qpts * elem_size, &identity); CeedChk(ierr); + for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { + identity[i * elem_size + i] = 1.0; + } + } + ierr = CeedBasisGetInterp(data->basis_in, &interp_in); CeedChk(ierr); + ierr = CeedBasisGetGrad(data->basis_in, &grad_in); CeedChk(ierr); + + for (CeedInt q = 0; q < num_qpts; q++) { + for (CeedInt n = 0; n < elem_size; n++) { + CeedInt d_in = -1; + for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) { + const CeedInt qq = data->num_eval_mode_in * q; + const CeedScalar *b = NULL; + + if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++; + CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, + interp_in, &grad_in[d_in * num_qpts * elem_size], &b); CeedChk(ierr); + B_in[(qq + e_in)*elem_size + n] = b[q * elem_size + n]; + } + } + } + data->B_in = B_in; + } + + if (B_out && !data->B_out) { + CeedInt num_qpts, elem_size; + CeedScalar *B_out, *identity = NULL; + const CeedScalar *interp_out, *grad_out; + bool has_eval_none = false; + + ierr = CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts); + CeedChk(ierr); + ierr = CeedBasisGetNumNodes(data->basis_out, &elem_size); CeedChk(ierr); + ierr = CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out); + CeedChk(ierr); + + for (CeedInt i = 0; i < data->num_eval_mode_out; i++) { + has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE); + } + if (has_eval_none) { + ierr = CeedCalloc(num_qpts * elem_size, &identity); CeedChk(ierr); + for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) { + identity[i * elem_size + i] = 1.0; + } + } + ierr = CeedBasisGetInterp(data->basis_out, &interp_out); CeedChk(ierr); + ierr = CeedBasisGetGrad(data->basis_out, &grad_out); CeedChk(ierr); + + for (CeedInt q = 0; q < num_qpts; q++) { + for (CeedInt n = 0; n < elem_size; n++) { + CeedInt d_out = -1; + for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) { + const CeedInt qq = data->num_eval_mode_out * q; + const CeedScalar *b = NULL; + + if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++; + CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, + interp_out, &grad_out[d_out * num_qpts * elem_size], &b); CeedChk(ierr); + B_out[(qq + e_out)*elem_size + n] = b[q * elem_size + n]; + } + } + } + data->B_out = B_out; + } + + if (basis_in) *basis_in = data->basis_in; + if (B_in) *B_in = data->B_in; + if (basis_out) *basis_out = data->basis_out; + if (B_out) *B_out = data->B_out; + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Destroy CeedOperatorAssemblyData + + @param[out] data CeedOperatorAssemblyData to destroy + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { + int ierr; + + if (!*data) return CEED_ERROR_SUCCESS; + + ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); + ierr = CeedBasisDestroy(&(*data)->basis_in); CeedChk(ierr); + ierr = CeedBasisDestroy(&(*data)->basis_out); CeedChk(ierr); + ierr = CeedFree(&(*data)->B_in); CeedChk(ierr); + ierr = CeedFree(&(*data)->B_out); CeedChk(ierr); + + ierr = CeedFree(data); CeedChk(ierr); + return CEED_ERROR_SUCCESS; +} + /// @} /// ---------------------------------------------------------------------------- From b451f703624e585d740c6c134347271fcdf18161 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 9 Jun 2022 12:29:57 -0600 Subject: [PATCH 049/172] minor - fix indentation --- .../cuda/cuda-ref-operator-assemble.h | 28 +++++++++---------- .../hip/hip-ref-operator-assemble.h | 28 +++++++++---------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h index cb75ddc7ea..2cbc185ecd 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h +++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h @@ -44,17 +44,16 @@ extern "C" __launch_bounds__(BLOCK_SIZE) for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) { CeedInt b_in_index = emode_in * NQPTS * NNODES; for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) { - CeedInt b_out_index = emode_out * NQPTS * NNODES; - CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; - // Perform the B^T D B operation for this 'chunk' of D (the qf_array) + CeedInt b_out_index = emode_out * NQPTS * NNODES; + CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; + // Perform the B^T D B operation for this 'chunk' of D (the qf_array) for (CeedInt j = 0; j < NQPTS; j++) { result += B_out[b_out_index + j * NNODES + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l]; - } - - }// end of emode_out + } + } // end of emode_out } // end of emode_in CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l; - values_array[val_index] = result; + values_array[val_index] = result; } // end of out component } // end of in component } // end of element loop @@ -96,15 +95,14 @@ extern "C" __launch_bounds__(BLOCK_SIZE) CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) { CeedInt b_in_index = emode_in * NQPTS * NNODES; - for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) { - CeedInt b_out_index = emode_out * NQPTS * NNODES; - CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; - // Perform the B^T D B operation for this 'chunk' of D (the qf_array) + for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) { + CeedInt b_out_index = emode_out * NQPTS * NNODES; + CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; + // Perform the B^T D B operation for this 'chunk' of D (the qf_array) for (CeedInt j = 0; j < NQPTS; j++) { - result += B_out[b_out_index + j * NNODES + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l]; - } - - }// end of emode_out + result += B_out[b_out_index + j * NNODES + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l]; + } + } // end of emode_out } // end of emode_in CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l; values_array[val_index] = result; diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h index cb75ddc7ea..2cbc185ecd 100644 --- a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h +++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h @@ -44,17 +44,16 @@ extern "C" __launch_bounds__(BLOCK_SIZE) for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) { CeedInt b_in_index = emode_in * NQPTS * NNODES; for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) { - CeedInt b_out_index = emode_out * NQPTS * NNODES; - CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; - // Perform the B^T D B operation for this 'chunk' of D (the qf_array) + CeedInt b_out_index = emode_out * NQPTS * NNODES; + CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; + // Perform the B^T D B operation for this 'chunk' of D (the qf_array) for (CeedInt j = 0; j < NQPTS; j++) { result += B_out[b_out_index + j * NNODES + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l]; - } - - }// end of emode_out + } + } // end of emode_out } // end of emode_in CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l; - values_array[val_index] = result; + values_array[val_index] = result; } // end of out component } // end of in component } // end of element loop @@ -96,15 +95,14 @@ extern "C" __launch_bounds__(BLOCK_SIZE) CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) { CeedInt b_in_index = emode_in * NQPTS * NNODES; - for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) { - CeedInt b_out_index = emode_out * NQPTS * NNODES; - CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; - // Perform the B^T D B operation for this 'chunk' of D (the qf_array) + for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) { + CeedInt b_out_index = emode_out * NQPTS * NNODES; + CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in; + // Perform the B^T D B operation for this 'chunk' of D (the qf_array) for (CeedInt j = 0; j < NQPTS; j++) { - result += B_out[b_out_index + j * NNODES + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l]; - } - - }// end of emode_out + result += B_out[b_out_index + j * NNODES + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l]; + } + } // end of emode_out } // end of emode_in CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l; values_array[val_index] = result; From 067fd99f3dc9013a4cd6c57af3402ece8c168174 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 9 Jun 2022 13:33:41 -0600 Subject: [PATCH 050/172] pc - simplify BT*D computation in assembly --- interface/ceed-preconditioning.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 35eb79609b..2b88527168 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -678,23 +678,23 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) { for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) { // Compute B^T*D - const CeedInt mat_size = elem_size * num_qpts*num_eval_mode_in; - for (CeedInt i = 0; i < mat_size; i++) BTD_mat[i] = 0.0; for (CeedInt n = 0; n < elem_size; n++) { for (CeedInt q = 0; q < num_qpts; q++) { for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) { + const CeedInt btd_index = n*(num_qpts*num_eval_mode_in) + + (num_eval_mode_in*q + e_in); + CeedScalar sum = 0.0; for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) { - const CeedInt btd_index = n*(num_qpts*num_eval_mode_in) + - (num_eval_mode_in*q + e_in); const CeedInt b_out_index = (num_eval_mode_out*q + e_out)*elem_size + n; const CeedInt eval_mode_index = ((e_in*num_comp+comp_in)*num_eval_mode_out +e_out)*num_comp + comp_out; const CeedInt qf_index = q*layout_qf[0] + eval_mode_index*layout_qf[1] + e*layout_qf[2]; if (fabs(assembled_qf_array[qf_index]) > qf_value_bound) { - BTD_mat[btd_index] += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; + sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; } } + BTD_mat[btd_index] = sum; } } } From 69bf922daa36270062bb51e5ab4f299e9d7ad1af Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 9 Jun 2022 14:09:17 -0600 Subject: [PATCH 051/172] pc - drop qf value thresholding in assembly --- backends/cuda-ref/ceed-cuda-ref-operator.c | 5 +--- backends/hip-ref/ceed-hip-ref-operator.c | 5 +--- .../cuda-ref-operator-assemble-diagonal.h | 19 +++++++-------- .../hip/hip-ref-operator-assemble-diagonal.h | 19 +++++++-------- interface/ceed-preconditioning.c | 24 ++++++------------- 5 files changed, 25 insertions(+), 47 deletions(-) diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index 811d754052..95537c82e4 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -990,9 +990,6 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembledqf, &rstr, request); CeedChkBackend(ierr); ierr = CeedElemRestrictionDestroy(&rstr); CeedChkBackend(ierr); - CeedScalar maxnorm = 0; - ierr = CeedVectorNorm(assembledqf, CEED_NORM_MAX, &maxnorm); - CeedChkBackend(ierr); // Setup if (!impl->diag) { @@ -1036,7 +1033,7 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, // Compute the diagonal of B^T D B int elemsPerBlock = 1; int grid = nelem/elemsPerBlock+((nelem/elemsPerBlock*elemsPerBlockd_identity, + void *args[] = {(void *) &nelem, &diag->d_identity, &diag->d_interpin, &diag->d_gradin, &diag->d_interpout, &diag->d_gradout, &diag->d_emodein, &diag->d_emodeout, &assembledqfarray, &elemdiagarray diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 52b9df64b5..2afbed4d1e 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -987,9 +987,6 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembledqf, &rstr, request); CeedChkBackend(ierr); ierr = CeedElemRestrictionDestroy(&rstr); CeedChkBackend(ierr); - CeedScalar maxnorm = 0; - ierr = CeedVectorNorm(assembledqf, CEED_NORM_MAX, &maxnorm); - CeedChkBackend(ierr); // Setup if (!impl->diag) { @@ -1034,7 +1031,7 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, // Compute the diagonal of B^T D B int elemsPerBlock = 1; int grid = nelem/elemsPerBlock+((nelem/elemsPerBlock*elemsPerBlockd_identity, + void *args[] = {(void *) &nelem, &diag->d_identity, &diag->d_interpin, &diag->d_gradin, &diag->d_interpout, &diag->d_gradout, &diag->d_emodein, &diag->d_emodeout, &assembledqfarray, &elemdiagarray diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h index ecaca9d444..6955d74efb 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h +++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h @@ -54,15 +54,14 @@ extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **b // Core code for diagonal assembly //------------------------------------------------------------------------------ __device__ void diagonalCore(const CeedInt nelem, - const CeedScalar maxnorm, const bool pointBlock, - const CeedScalar *identity, + const bool pointBlock, const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { const int tid = threadIdx.x; // running with P threads, tid is evec node - const CeedScalar qfvaluebound = maxnorm*1e-12; + if (tid >= NNODES) return; // Compute the diagonal of B^T D B // Each element @@ -94,8 +93,7 @@ __device__ void diagonalCore(const CeedInt nelem, const CeedScalar qfvalue = assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* NCOMP+compOut)*nelem+e)*NQPTS+q]; - if (abs(qfvalue) > qfvaluebound) - evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; + evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; } elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; } @@ -106,8 +104,7 @@ __device__ void diagonalCore(const CeedInt nelem, const CeedScalar qfvalue = assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* NCOMP+compOut)*nelem+e)*NQPTS+q]; - if (abs(qfvalue) > qfvaluebound) - evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; + evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; } elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; } @@ -121,13 +118,13 @@ __device__ void diagonalCore(const CeedInt nelem, // Linear diagonal //------------------------------------------------------------------------------ extern "C" __global__ void linearDiagonal(const CeedInt nelem, - const CeedScalar maxnorm, const CeedScalar *identity, + const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { - diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout, + diagonalCore(nelem, false, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); } @@ -135,13 +132,13 @@ extern "C" __global__ void linearDiagonal(const CeedInt nelem, // Linear point block diagonal //------------------------------------------------------------------------------ extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, - const CeedScalar maxnorm, const CeedScalar *identity, + const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { - diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout, + diagonalCore(nelem, true, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); } diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h index f5fd171c2d..622a159bad 100644 --- a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h +++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h @@ -53,15 +53,14 @@ extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **ba // Core code for diagonal assembly //------------------------------------------------------------------------------ __device__ void diagonalCore(const CeedInt nelem, - const CeedScalar maxnorm, const bool pointBlock, - const CeedScalar *identity, + const bool pointBlock, const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { const int tid = threadIdx.x; // running with P threads, tid is evec node - const CeedScalar qfvaluebound = maxnorm*1e-12; + if (tid >= NNODES) return; // Compute the diagonal of B^T D B // Each element @@ -93,8 +92,7 @@ __device__ void diagonalCore(const CeedInt nelem, const CeedScalar qfvalue = assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)* NCOMP+compOut)*nelem+e)*NQPTS+q]; - if (abs(qfvalue) > qfvaluebound) - evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; + evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; } elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue; } @@ -105,8 +103,7 @@ __device__ void diagonalCore(const CeedInt nelem, const CeedScalar qfvalue = assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)* NCOMP+compOut)*nelem+e)*NQPTS+q]; - if (abs(qfvalue) > qfvaluebound) - evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; + evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid]; } elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue; } @@ -120,13 +117,13 @@ __device__ void diagonalCore(const CeedInt nelem, // Linear diagonal //------------------------------------------------------------------------------ extern "C" __global__ void linearDiagonal(const CeedInt nelem, - const CeedScalar maxnorm, const CeedScalar *identity, + const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { - diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout, + diagonalCore(nelem, false, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); } @@ -134,13 +131,13 @@ extern "C" __global__ void linearDiagonal(const CeedInt nelem, // Linear point block diagonal //------------------------------------------------------------------------------ extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem, - const CeedScalar maxnorm, const CeedScalar *identity, + const CeedScalar *identity, const CeedScalar *interpin, const CeedScalar *gradin, const CeedScalar *interpout, const CeedScalar *gradout, const CeedEvalMode *emodein, const CeedEvalMode *emodeout, const CeedScalar *__restrict__ assembledqfarray, CeedScalar *__restrict__ elemdiagarray) { - diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout, + diagonalCore(nelem, true, identity, interpin, gradin, interpout, gradout, emodein, emodeout, assembledqfarray, elemdiagarray); } diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 2b88527168..2c14b358f8 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -313,8 +313,6 @@ static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, CeedInt layout[3]; ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr); ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr); - CeedScalar max_norm = 0; - ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr); // Get assembly data CeedOperatorAssemblyData data; @@ -379,7 +377,6 @@ static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr); // Compute the diagonal of B^T D B // Each element - const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON; for (CeedInt e=0; e qf_value_bound) - for (CeedInt n=0; n qf_value_bound) - for (CeedInt n=0; n qf_value_bound) { - sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; - } + sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index]; } BTD_mat[btd_index] = sum; } From 0e455453ae0cd7ca9f40a43c0df197c0a20f0d65 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 17 Jun 2022 09:56:31 -0600 Subject: [PATCH 052/172] pc - clearer internal function name for stack traces --- interface/ceed-preconditioning.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 2c14b358f8..1df89cff29 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -294,7 +294,7 @@ static int CeedOperatorCreateActivePointBlockRestriction( @ref Developer **/ -static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, +static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) { int ierr; Ceed ceed; @@ -1722,8 +1722,8 @@ int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled); CeedChk(ierr); } else { - ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled); - CeedChk(ierr); + ierr = CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, + assembled); CeedChk(ierr); } return CEED_ERROR_SUCCESS; @@ -1860,7 +1860,7 @@ int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled); CeedChk(ierr); } else { - ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled); + ierr = CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled); CeedChk(ierr); } From 5e82a6e1725e39817d93729ca2d6f7dcbf054835 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 16 Jun 2022 16:44:14 -0600 Subject: [PATCH 053/172] fluids - cache boundary conditions in separate vecs --- examples/fluids/navierstokes.c | 20 ++++---- examples/fluids/navierstokes.h | 3 +- examples/fluids/src/setupts.c | 87 ++++++++++++++++++---------------- 3 files changed, 61 insertions(+), 49 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 3e3ba3bc3c..37ad39d423 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -166,12 +166,14 @@ int main(int argc, char **argv) { ierr = DMCreateGlobalVector(dm, &Q); CHKERRQ(ierr); ierr = VecZeroEntries(Q); CHKERRQ(ierr); - // -- Set up local state vector Q_loc - Vec Q_loc; - ierr = DMGetLocalVector(dm, &Q_loc); CHKERRQ(ierr); + // -- Set up local state vectors Q_loc, Q_dot_loc + ierr = DMCreateLocalVector(dm, &user->Q_loc); CHKERRQ(ierr); + ierr = DMCreateLocalVector(dm, &user->Q_dot_loc); CHKERRQ(ierr); + ierr = VecZeroEntries(user->Q_dot_loc); CHKERRQ(ierr); // -- Fix multiplicity for ICs - ierr = ICs_FixMultiplicity(dm, ceed_data, user, Q_loc, Q, 0.0); CHKERRQ(ierr); + ierr = ICs_FixMultiplicity(dm, ceed_data, user, user->Q_loc, Q, 0.0); + CHKERRQ(ierr); // --------------------------------------------------------------------------- // Set up lumped mass matrix @@ -191,7 +193,7 @@ int main(int argc, char **argv) { // still get the same results due to the problem->bc function, but with // potentially much slower execution. if (problem->bc_from_ics) { - ierr = SetBCsFromICs_NS(dm, Q, Q_loc); CHKERRQ(ierr); + ierr = SetBCsFromICs_NS(dm, Q, user->Q_loc); CHKERRQ(ierr); } // --------------------------------------------------------------------------- @@ -264,7 +266,7 @@ int main(int argc, char **argv) { ierr = VecGetLocalSize(Q, &owned_dofs); CHKERRQ(ierr); glob_nodes = glob_dofs/num_comp_q; // -- Get local size - ierr = VecGetSize(Q_loc, &owned_nodes); CHKERRQ(ierr); + ierr = VecGetSize(user->Q_loc, &owned_nodes); CHKERRQ(ierr); owned_nodes /= num_comp_q; ierr = PetscPrintf(comm, " Mesh:\n" @@ -278,8 +280,8 @@ int main(int argc, char **argv) { num_P, num_Q, glob_dofs, owned_dofs, num_comp_q, glob_nodes, owned_nodes); CHKERRQ(ierr); } - // -- Restore Q_loc - ierr = DMRestoreLocalVector(dm, &Q_loc); CHKERRQ(ierr); + // -- Zero Q_loc + ierr = VecZeroEntries(user->Q_loc); CHKERRQ(ierr); // --------------------------------------------------------------------------- // TS: Create, setup, and solve @@ -345,6 +347,8 @@ int main(int argc, char **argv) { // -- Vectors ierr = VecDestroy(&Q); CHKERRQ(ierr); ierr = VecDestroy(&user->M); CHKERRQ(ierr); + ierr = VecDestroy(&user->Q_loc); CHKERRQ(ierr); + ierr = VecDestroy(&user->Q_dot_loc); CHKERRQ(ierr); // -- Matrices ierr = MatDestroy(&user->interp_viz); CHKERRQ(ierr); diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index 730411a19c..cd6bafa179 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -144,12 +144,13 @@ struct User_private { Mat interp_viz; Ceed ceed; Units units; - Vec M; + Vec M, Q_loc, Q_dot_loc; Physics phys; AppCtx app_ctx; CeedVector q_ceed, q_dot_ceed, g_ceed, coo_values; CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction, op_ijacobian; bool matrices_set_up; + CeedScalar time, dt; }; // Units diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index d4b8d492c8..1e46a1f704 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -81,33 +81,38 @@ PetscErrorCode ComputeLumpedMassMatrix(Ceed ceed, DM dm, CeedData ceed_data, // This is the RHS of the ODE, given as u_t = G(t,u) // This function takes in a state vector Q and writes into G PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) { - User user = *(User *)user_data; PetscScalar *q, *g; - Vec Q_loc, G_loc; + Vec Q_loc = user->Q_loc, G_loc; PetscMemType q_mem_type, g_mem_type; PetscErrorCode ierr; PetscFunctionBeginUser; - // Update context field labels - if (user->phys->solution_time_label) - CeedOperatorContextSetDouble(user->op_rhs, user->phys->solution_time_label, &t); + // Get local vector + ierr = DMGetLocalVector(user->dm, &G_loc); CHKERRQ(ierr); + + // Update time dependent data + if (user->time != t) { + ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); + ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, + NULL, NULL, NULL); CHKERRQ(ierr); + if (user->phys->solution_time_label) { + CeedOperatorContextSetDouble(user->op_rhs, user->phys->solution_time_label, &t); + } + user->time = t; + } if (user->phys->timestep_size_label) { PetscScalar dt; - ierr = TSGetTimeStep(ts,&dt); CHKERRQ(ierr); - CeedOperatorContextSetDouble(user->op_rhs, user->phys->timestep_size_label, - &dt); + ierr = TSGetTimeStep(ts, &dt); CHKERRQ(ierr); + if (user->dt != dt) { + CeedOperatorContextSetDouble(user->op_rhs, user->phys->timestep_size_label, + &dt); + user->dt = dt; + } } - // Get local vectors - ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); - ierr = DMGetLocalVector(user->dm, &G_loc); CHKERRQ(ierr); - // Global-to-local - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); - ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, - NULL, NULL, NULL); CHKERRQ(ierr); // Place PETSc vectors in CEED vectors ierr = VecGetArrayReadAndMemType(Q_loc, (const PetscScalar **)&q, &q_mem_type); @@ -135,7 +140,6 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) { ierr = VecPointwiseMult(G, G, user->M); CHKERRQ(ierr); // Restore vectors - ierr = DMRestoreLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); ierr = DMRestoreLocalVector(user->dm, &G_loc); CHKERRQ(ierr); PetscFunctionReturn(0); @@ -147,33 +151,37 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G, User user = *(User *)user_data; const PetscScalar *q, *q_dot; PetscScalar *g; - Vec Q_loc, Q_dot_loc, G_loc; + Vec Q_loc = user->Q_loc, Q_dot_loc = user->Q_dot_loc, G_loc; PetscMemType q_mem_type, q_dot_mem_type, g_mem_type; PetscErrorCode ierr; PetscFunctionBeginUser; - // Update context field labels - if (user->phys->solution_time_label) - CeedOperatorContextSetDouble(user->op_ifunction, - user->phys->solution_time_label, &t); + // Get local vectors + ierr = DMGetLocalVector(user->dm, &G_loc); CHKERRQ(ierr); + + // Update time dependent data + if (user->time != t) { + ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); + ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, + NULL, NULL, NULL); CHKERRQ(ierr); + if (user->phys->solution_time_label) { + CeedOperatorContextSetDouble(user->op_ifunction, + user->phys->solution_time_label, &t); + } + user->time = t; + } if (user->phys->timestep_size_label) { PetscScalar dt; - ierr = TSGetTimeStep(ts,&dt); CHKERRQ(ierr); - CeedOperatorContextSetDouble(user->op_ifunction, - user->phys->timestep_size_label, &dt); + ierr = TSGetTimeStep(ts, &dt); CHKERRQ(ierr); + if (user->dt != dt) { + CeedOperatorContextSetDouble(user->op_ifunction, + user->phys->timestep_size_label, &dt); + user->dt = dt; + } } - // Get local vectors - ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); - ierr = DMGetLocalVector(user->dm, &Q_dot_loc); CHKERRQ(ierr); - ierr = DMGetLocalVector(user->dm, &G_loc); CHKERRQ(ierr); - // Global-to-local - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); - ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, - NULL, NULL, NULL); CHKERRQ(ierr); - ierr = VecZeroEntries(Q_dot_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q_dot, INSERT_VALUES, Q_dot_loc); CHKERRQ(ierr); @@ -205,8 +213,6 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G, ierr = DMLocalToGlobal(user->dm, G_loc, ADD_VALUES, G); CHKERRQ(ierr); // Restore vectors - ierr = DMRestoreLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); - ierr = DMRestoreLocalVector(user->dm, &Q_dot_loc); CHKERRQ(ierr); ierr = DMRestoreLocalVector(user->dm, &G_loc); CHKERRQ(ierr); PetscFunctionReturn(0); @@ -216,17 +222,17 @@ static PetscErrorCode MatMult_NS_IJacobian(Mat J, Vec Q, Vec G) { User user; const PetscScalar *q; PetscScalar *g; - Vec Q_loc, G_loc; PetscMemType q_mem_type, g_mem_type; PetscErrorCode ierr; PetscFunctionBeginUser; - MatShellGetContext(J, &user); + ierr = MatShellGetContext(J, &user); CHKERRQ(ierr); + Vec Q_loc = user->Q_dot_loc, // Note - Q_dot_loc has zero BCs + G_loc; + // Get local vectors - ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); ierr = DMGetLocalVector(user->dm, &G_loc); CHKERRQ(ierr); // Global-to-local - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); // Place PETSc vectors in CEED vectors @@ -251,7 +257,6 @@ static PetscErrorCode MatMult_NS_IJacobian(Mat J, Vec Q, Vec G) { ierr = DMLocalToGlobal(user->dm, G_loc, ADD_VALUES, G); CHKERRQ(ierr); // Restore vectors - ierr = DMRestoreLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); ierr = DMRestoreLocalVector(user->dm, &G_loc); CHKERRQ(ierr); PetscFunctionReturn(0); } @@ -452,6 +457,8 @@ PetscErrorCode TSSolve_NS(DM dm, User user, AppCtx app_ctx, Physics phys, 1.e-12 * user->units->second, 1.e2 * user->units->second); CHKERRQ(ierr); ierr = TSSetFromOptions(*ts); CHKERRQ(ierr); + user->time = -1.0; // require all BCs and ctx to be updated + user->dt = -1.0; if (!app_ctx->cont_steps) { // print initial condition if (!app_ctx->test_mode) { ierr = TSMonitor_NS(*ts, 0, 0., *Q, user); CHKERRQ(ierr); From 9a9f72eb87ad552844b2e52318dcf815a3ce4563 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 16 Jun 2022 17:08:39 -0600 Subject: [PATCH 054/172] fluids - restore commented code --- examples/fluids/src/setupts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 1e46a1f704..b4a8ae34cb 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -325,7 +325,8 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, const PetscScalar *values; MatType mat_type; PetscCall(MatGetType(J_pre, &mat_type)); - //if (strstr(mat_type, "kokkos") || strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; + if (strstr(mat_type, "kokkos") + || strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; CeedOperatorLinearAssemble(user->op_ijacobian, user->coo_values); CeedVectorGetArrayRead(user->coo_values, mem_type, &values); if (coo_vec) { From b80a4419c8f00e2f8e9a7afd897117055f7f7a73 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 17 Jun 2022 10:15:19 -0600 Subject: [PATCH 055/172] fluids - only VecZero in the BCs if using SetBCsFromICs --- examples/fluids/src/misc.c | 3 +++ examples/fluids/src/setupts.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/fluids/src/misc.c b/examples/fluids/src/misc.c index d7b63e1581..7edfceb9e2 100644 --- a/examples/fluids/src/misc.c +++ b/examples/fluids/src/misc.c @@ -98,6 +98,8 @@ PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user, PetscFunctionReturn(0); } + +// Note: The BCs must be inserted *before* the other values are inserted into Q_loc PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm, PetscBool insert_essential, Vec Q_loc, PetscReal time, Vec face_geom_FVM, Vec cell_geom_FVM, Vec grad_FVM) { @@ -107,6 +109,7 @@ PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm, PetscFunctionBegin; ierr = DMGetNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr); + ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = VecAXPY(Q_loc, 1., Qbc); CHKERRQ(ierr); ierr = DMRestoreNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr); diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index b4a8ae34cb..2feb10c45b 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -93,7 +93,6 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) { // Update time dependent data if (user->time != t) { - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, NULL, NULL, NULL); CHKERRQ(ierr); if (user->phys->solution_time_label) { @@ -161,7 +160,6 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G, // Update time dependent data if (user->time != t) { - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Q_loc, t, NULL, NULL, NULL); CHKERRQ(ierr); if (user->phys->solution_time_label) { From 0f58c348267158b87d81b5a8b1a981eb0caf8b99 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 15 Jun 2022 09:46:55 -0600 Subject: [PATCH 056/172] memcheck - add QFContext impl --- backends/memcheck/ceed-memcheck-blocked.c | 2 + .../memcheck/ceed-memcheck-qfunctioncontext.c | 228 ++++++++++++++++++ backends/memcheck/ceed-memcheck-serial.c | 2 + backends/memcheck/ceed-memcheck.h | 10 + doc/sphinx/source/releasenotes.md | 4 + tests/t404-qfunction.c | 21 ++ tests/t408-qfunction.c | 4 +- 7 files changed, 269 insertions(+), 2 deletions(-) create mode 100644 backends/memcheck/ceed-memcheck-qfunctioncontext.c diff --git a/backends/memcheck/ceed-memcheck-blocked.c b/backends/memcheck/ceed-memcheck-blocked.c index 17f1d30264..f1b3efef80 100644 --- a/backends/memcheck/ceed-memcheck-blocked.c +++ b/backends/memcheck/ceed-memcheck-blocked.c @@ -30,6 +30,8 @@ static int CeedInit_Memcheck(const char *resource, Ceed ceed) { ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", CeedQFunctionCreate_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", + CeedQFunctionContextCreate_Memcheck); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } diff --git a/backends/memcheck/ceed-memcheck-qfunctioncontext.c b/backends/memcheck/ceed-memcheck-qfunctioncontext.c new file mode 100644 index 0000000000..7bb310c2a7 --- /dev/null +++ b/backends/memcheck/ceed-memcheck-qfunctioncontext.c @@ -0,0 +1,228 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#include +#include +#include "ceed-memcheck.h" + +//------------------------------------------------------------------------------ +// QFunctionContext has valid data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextHasValidData_Memcheck(CeedQFunctionContext ctx, + bool *has_valid_data) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + + *has_valid_data = !!impl->data; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext has borrowed data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextHasBorrowedDataOfType_Memcheck( + CeedQFunctionContext ctx, CeedMemType mem_type, + bool *has_borrowed_data_of_type) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + switch (mem_type) { + case CEED_MEM_HOST: + *has_borrowed_data_of_type = !!impl->data_borrowed; + break; + default: + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only set HOST memory for this backend"); + // LCOV_EXCL_STOP + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext Set Data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextSetData_Memcheck(CeedQFunctionContext ctx, + CeedMemType mem_type, CeedCopyMode copy_mode, void *data) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + size_t ctx_size; + ierr = CeedQFunctionContextGetContextSize(ctx, &ctx_size); CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + if (mem_type != CEED_MEM_HOST) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only set HOST memory for this backend"); + // LCOV_EXCL_STOP + + ierr = CeedFree(&impl->data_allocated); CeedChkBackend(ierr); + ierr = CeedFree(&impl->data_owned); CeedChkBackend(ierr); + switch (copy_mode) { + case CEED_COPY_VALUES: + ierr = CeedMallocArray(1, ctx_size, &impl->data_owned); CeedChkBackend(ierr); + impl->data_borrowed = NULL; + impl->data = impl->data_owned; + memcpy(impl->data, data, ctx_size); + break; + case CEED_OWN_POINTER: + impl->data_owned = data; + impl->data_borrowed = NULL; + impl->data = data; + break; + case CEED_USE_POINTER: + impl->data_borrowed = data; + impl->data = data; + } + // Copy data to check ctx_size bounds + ierr = CeedMallocArray(1, ctx_size, &impl->data_allocated); + CeedChkBackend(ierr); + memcpy(impl->data_allocated, impl->data, ctx_size); + impl->data = impl->data_allocated; + VALGRIND_DISCARD(impl->mem_block_id); + impl->mem_block_id = VALGRIND_CREATE_BLOCK(impl->data, ctx_size, + "'QFunction backend context data copy'"); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext Take Data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextTakeData_Memcheck(CeedQFunctionContext ctx, + CeedMemType mem_type, void *data) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + if (mem_type != CEED_MEM_HOST) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only provide HOST memory for this backend"); + // LCOV_EXCL_STOP + + *(void **)data = impl->data_borrowed; + impl->data_borrowed = NULL; + impl->data = NULL; + VALGRIND_DISCARD(impl->mem_block_id); + ierr = CeedFree(&impl->data_allocated); CeedChkBackend(ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext Get Data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextGetData_Memcheck(CeedQFunctionContext ctx, + CeedMemType mem_type, void *data) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + if (mem_type != CEED_MEM_HOST) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only provide HOST memory for this backend"); + // LCOV_EXCL_STOP + + *(void **)data = impl->data; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext Restore Data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextRestoreData_Memcheck(CeedQFunctionContext ctx) { + int ierr; + size_t ctx_size; + ierr = CeedQFunctionContextGetContextSize(ctx, &ctx_size); CeedChkBackend(ierr); + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + + if (impl->data_borrowed) { + memcpy(impl->data_borrowed, impl->data, ctx_size); + } + if (impl->data_owned) { + memcpy(impl->data_owned, impl->data, ctx_size); + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext Destroy +//------------------------------------------------------------------------------ +static int CeedQFunctionContextDestroy_Memcheck(CeedQFunctionContext ctx) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + ierr = CeedFree(&impl->data_allocated); CeedChkBackend(ierr); + ierr = CeedFree(&impl->data_owned); CeedChkBackend(ierr); + ierr = CeedFree(&impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext Create +//------------------------------------------------------------------------------ +int CeedQFunctionContextCreate_Memcheck(CeedQFunctionContext ctx) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", + CeedQFunctionContextHasValidData_Memcheck); + CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, + "HasBorrowedDataOfType", + CeedQFunctionContextHasBorrowedDataOfType_Memcheck); + CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", + CeedQFunctionContextSetData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", + CeedQFunctionContextTakeData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData", + CeedQFunctionContextGetData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetDataRead", + CeedQFunctionContextGetData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreData", + CeedQFunctionContextRestoreData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreDataRead", + CeedQFunctionContextRestoreData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", + CeedQFunctionContextDestroy_Memcheck); CeedChkBackend(ierr); + + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); + ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); + + return CEED_ERROR_SUCCESS; +} +//------------------------------------------------------------------------------ diff --git a/backends/memcheck/ceed-memcheck-serial.c b/backends/memcheck/ceed-memcheck-serial.c index 36f12d7b4d..07c9a464ef 100644 --- a/backends/memcheck/ceed-memcheck-serial.c +++ b/backends/memcheck/ceed-memcheck-serial.c @@ -31,6 +31,8 @@ static int CeedInit_Memcheck(const char *resource, Ceed ceed) { ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", CeedQFunctionCreate_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", + CeedQFunctionContextCreate_Memcheck); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } diff --git a/backends/memcheck/ceed-memcheck.h b/backends/memcheck/ceed-memcheck.h index b213be3254..ca97f16012 100644 --- a/backends/memcheck/ceed-memcheck.h +++ b/backends/memcheck/ceed-memcheck.h @@ -17,6 +17,16 @@ typedef struct { bool setup_done; } CeedQFunction_Memcheck; +typedef struct { + int mem_block_id; + void *data; + void *data_allocated; + void *data_borrowed; + void *data_owned; +} CeedQFunctionContext_Memcheck; + CEED_INTERN int CeedQFunctionCreate_Memcheck(CeedQFunction qf); +CEED_INTERN int CeedQFunctionContextCreate_Memcheck(CeedQFunctionContext ctx); + #endif // _ceed_memcheck_h diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1c0b7a93f0..2d4170f8fc 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -10,6 +10,10 @@ On this page we provide a summary of the main API changes, new features and exam - Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output. +### New features + +- Update `/cpu/self/memcheck/*` backends to help verify `CeedQFunctionContext` data sizes provided by user. + (v0-10-1)= ## v0.10.1 (Apr 11, 2022) diff --git a/tests/t404-qfunction.c b/tests/t404-qfunction.c index 050de8d4ce..49978def0f 100644 --- a/tests/t404-qfunction.c +++ b/tests/t404-qfunction.c @@ -11,10 +11,12 @@ int main(int argc, char **argv) { CeedInit(argv[1], &ceed); + // Set borrowed pointer CeedQFunctionContextCreate(ceed, &ctx); CeedQFunctionContextSetData(ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(ctxData), &ctxData); + // Update borrowed pointer CeedQFunctionContextGetData(ctx, CEED_MEM_HOST, &ctxDataCopy); ctxDataCopy[4] = 6; CeedQFunctionContextRestoreData(ctx, &ctxDataCopy); @@ -23,7 +25,26 @@ int main(int argc, char **argv) { printf("error modifying data: %f != 6.0\n", ctxData[4]); // LCOV_EXCL_STOP + // Take back borrowed pointer CeedQFunctionContextTakeData(ctx, CEED_MEM_HOST, &ctxDataCopy); + if (ctxDataCopy[4] != 6) + // LCOV_EXCL_START + printf("error accessing borrowed data: %f != 6.0\n", ctxDataCopy[4]); + // LCOV_EXCL_STOP + + // Set copied data + ctxData[4] = 6; + CeedQFunctionContextSetData(ctx, CEED_MEM_HOST, CEED_COPY_VALUES, + sizeof(ctxData), &ctxData); + + // Check copied data + CeedQFunctionContextGetData(ctx, CEED_MEM_HOST, &ctxDataCopy); + if (ctxDataCopy[4] != 6) + // LCOV_EXCL_START + printf("error accessing copied data: %f != 6.0\n", ctxDataCopy[4]); + // LCOV_EXCL_STOP + CeedQFunctionContextRestoreData(ctx, &ctxDataCopy); + CeedQFunctionContextDestroy(&ctx); CeedDestroy(&ceed); return 0; diff --git a/tests/t408-qfunction.c b/tests/t408-qfunction.c index 188ad60e3f..51d205db29 100644 --- a/tests/t408-qfunction.c +++ b/tests/t408-qfunction.c @@ -17,9 +17,9 @@ int main(int argc, char **argv) { // Get data access CeedQFunctionContextGetDataRead(ctx, CEED_MEM_HOST, &ctxDataCopy); - if (ctxData[4] != 5) + if (ctxDataCopy[4] != 5) // LCOV_EXCL_START - printf("error reading data: %f != 5.0\n", ctxData[4]); + printf("error reading data: %f != 5.0\n", ctxDataCopy[4]); // LCOV_EXCL_STOP CeedQFunctionContextRestoreDataRead(ctx, &ctxDataCopy); From edc819a10d47c4f248d00a144b44345fa45bfe5b Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 15 Jun 2022 10:03:53 -0600 Subject: [PATCH 057/172] memcheck - name QFunction output blocks --- backends/memcheck/ceed-memcheck-qfunction.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/backends/memcheck/ceed-memcheck-qfunction.c b/backends/memcheck/ceed-memcheck-qfunction.c index a2ff4600d1..6e90ee1736 100644 --- a/backends/memcheck/ceed-memcheck-qfunction.c +++ b/backends/memcheck/ceed-memcheck-qfunction.c @@ -7,6 +7,7 @@ #include #include +#include #include #include "ceed-memcheck.h" @@ -33,25 +34,33 @@ static int CeedQFunctionApply_Memcheck(CeedQFunction qf, CeedInt Q, CeedInt num_in, num_out; ierr = CeedQFunctionGetNumArgs(qf, &num_in, &num_out); CeedChkBackend(ierr); - for (int i = 0; iinputs[i]); CeedChkBackend(ierr); } - for (int i = 0; ioutputs[i]); CeedChkBackend(ierr); - CeedSize len; + ierr = CeedVectorGetLength(V[i], &len); CeedChkBackend(ierr); VALGRIND_MAKE_MEM_UNDEFINED(impl->outputs[i], len); + + snprintf(name, 30, "'QFunction output %d'", i); + mem_block_ids[i] = VALGRIND_CREATE_BLOCK(impl->outputs[i], len, name); } ierr = f(ctxData, Q, impl->inputs, impl->outputs); CeedChkBackend(ierr); - for (int i = 0; iinputs[i]); CeedChkBackend(ierr); } - for (int i = 0; ioutputs[i]); CeedChkBackend(ierr); + VALGRIND_DISCARD(mem_block_ids[i]); } if (ctx) { ierr = CeedQFunctionContextRestoreData(ctx, &ctxData); CeedChkBackend(ierr); From 2e64a2b98b92a606ea5279621c68cc90295ae2a9 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 15 Jun 2022 10:47:23 -0600 Subject: [PATCH 058/172] ctx - allow backend specific data destroy behavior --- .../memcheck/ceed-memcheck-qfunctioncontext.c | 31 +++++++++ include/ceed-impl.h | 1 + include/ceed/backend.h | 3 + include/ceed/ceed.h | 3 +- interface/ceed-qfunctioncontext.c | 69 +++++++++++++++---- interface/ceed.c | 1 + 6 files changed, 95 insertions(+), 13 deletions(-) diff --git a/backends/memcheck/ceed-memcheck-qfunctioncontext.c b/backends/memcheck/ceed-memcheck-qfunctioncontext.c index 7bb310c2a7..cc8416596b 100644 --- a/backends/memcheck/ceed-memcheck-qfunctioncontext.c +++ b/backends/memcheck/ceed-memcheck-qfunctioncontext.c @@ -175,6 +175,35 @@ static int CeedQFunctionContextRestoreData_Memcheck(CeedQFunctionContext ctx) { return CEED_ERROR_SUCCESS; } +//------------------------------------------------------------------------------ +// QFunctionContext destroy user data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextDataDestroy_Memcheck(CeedQFunctionContext ctx) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + CeedQFunctionContextDataDestroyUser data_destroy_function; + CeedMemType data_destroy_mem_type; + ierr = CeedQFunctionContextGetDataDestroy(ctx, &data_destroy_mem_type, + &data_destroy_function); CeedChk(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + if (data_destroy_mem_type != CEED_MEM_HOST) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only destroy HOST memory for this backend"); + // LCOV_EXCL_STOP + + if (data_destroy_function) { + ierr = data_destroy_function(impl->data_borrowed ? impl->data_borrowed : + impl->data_owned); CeedChk(ierr); + } + ierr = CeedFree(&impl->data_allocated); CeedChkBackend(ierr); + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // QFunctionContext Destroy //------------------------------------------------------------------------------ @@ -217,6 +246,8 @@ int CeedQFunctionContextCreate_Memcheck(CeedQFunctionContext ctx) { CeedQFunctionContextRestoreData_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreDataRead", CeedQFunctionContextRestoreData_Memcheck); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "DataDestroy", + CeedQFunctionContextDataDestroy_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", CeedQFunctionContextDestroy_Memcheck); CeedChkBackend(ierr); diff --git a/include/ceed-impl.h b/include/ceed-impl.h index b7383c3c6b..7e3548c01d 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -275,6 +275,7 @@ struct CeedQFunctionContext_private { int (*GetDataRead)(CeedQFunctionContext, CeedMemType, void *); int (*RestoreData)(CeedQFunctionContext); int (*RestoreDataRead)(CeedQFunctionContext); + int (*DataDestroy)(CeedQFunctionContext); int (*Destroy)(CeedQFunctionContext); CeedQFunctionContextDataDestroyUser data_destroy_function; CeedMemType data_destroy_mem_type; diff --git a/include/ceed/backend.h b/include/ceed/backend.h index a9b248bb12..d22f5ccf59 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -260,6 +260,9 @@ CEED_EXTERN int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx, CeedContextFieldLabel field_label, double *values); CEED_EXTERN int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx, CeedContextFieldLabel field_label, int *values); +CEED_EXTERN int CeedQFunctionContextGetDataDestroy(CeedQFunctionContext ctx, + CeedMemType *f_mem_type, CeedQFunctionContextDataDestroyUser *f); +CEED_EXTERN int CeedQFunctionContextDestroyData(CeedQFunctionContext ctx); CEED_EXTERN int CeedQFunctionContextReference(CeedQFunctionContext ctx); CEED_EXTERN int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data); diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index 9f46ee843c..bad828facd 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -696,7 +696,8 @@ CEED_EXTERN int CeedQFunctionContextGetContextSize(CeedQFunctionContext ctx, size_t *ctx_size); CEED_EXTERN int CeedQFunctionContextView(CeedQFunctionContext ctx, FILE *stream); -CEED_EXTERN int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx, CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f); +CEED_EXTERN int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx, + CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f); CEED_EXTERN int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx); CEED_EXTERN int CeedOperatorCreate(Ceed ceed, CeedQFunction qf, diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c index 8967953a6b..49cfb67e73 100644 --- a/interface/ceed-qfunctioncontext.c +++ b/interface/ceed-qfunctioncontext.c @@ -343,6 +343,58 @@ int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx, return CEED_ERROR_SUCCESS; } +/** + @brief Get additional destroy routine for CeedQFunctionContext user data + + @param[in] ctx CeedQFunctionContext to get user destroy function + @param[out] f_mem_type Memory type to use when passing data into `f` + @param[out] f Additional routine to use to destroy user data + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedQFunctionContextGetDataDestroy(CeedQFunctionContext ctx, + CeedMemType *f_mem_type, CeedQFunctionContextDataDestroyUser *f) { + if (f_mem_type) *f_mem_type = ctx->data_destroy_mem_type; + if (f) *f = ctx->data_destroy_function; + return CEED_ERROR_SUCCESS; +} + +/** + @brief Destroy user data held by CeedQFunctionContext, using function set by + CeedQFunctionContextSetDataDestroy, if applicable + + @param[in,out] ctx CeedQFunctionContext to destroy user data + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedQFunctionContextDestroyData(CeedQFunctionContext ctx) { + int ierr; + + if (ctx->DataDestroy) { + ierr = ctx->DataDestroy(ctx); CeedChk(ierr); + } else { + CeedQFunctionContextDataDestroyUser data_destroy_function; + CeedMemType data_destroy_mem_type; + + ierr = CeedQFunctionContextGetDataDestroy(ctx, &data_destroy_mem_type, + &data_destroy_function); CeedChk(ierr); + if (data_destroy_function) { + void *data; + + ierr = CeedQFunctionContextGetData(ctx, data_destroy_mem_type, &data); + CeedChk(ierr); + ierr = data_destroy_function(data); CeedChk(ierr); + ierr = CeedQFunctionContextRestoreData(ctx, &data); CeedChk(ierr); + } + } + + return CEED_ERROR_SUCCESS; +} + /** @brief Increment the reference counter for a CeedQFunctionContext @@ -460,6 +512,7 @@ int CeedQFunctionContextSetData(CeedQFunctionContext ctx, CeedMemType mem_type, "access lock is already in use"); // LCOV_EXCL_STOP + ierr = CeedQFunctionContextDestroyData(ctx); CeedChk(ierr); ctx->ctx_size = size; ierr = ctx->SetData(ctx, mem_type, copy_mode, data); CeedChk(ierr); ctx->state += 2; @@ -807,15 +860,14 @@ int CeedQFunctionContextView(CeedQFunctionContext ctx, FILE *stream) { /** @brief Set additional destroy routine for CeedQFunctionContext user data - @param ctx CeedQFunctionContext to set user destroy function - @param f_mem_type Memory type to use when passing data into `f` - @param f Additional routine to use to destroy user data + @param[in] ctx CeedQFunctionContext to set user destroy function + @param[in] f_mem_type Memory type to use when passing data into `f` + @param[in] f Additional routine to use to destroy user data @return An error code: 0 - success, otherwise - failure @ref User **/ - int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx, CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f) { if (!f) @@ -850,14 +902,7 @@ int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx) { "lock is in use"); // LCOV_EXCL_STOP - if ((*ctx)->data_destroy_function) { - void *data; - - ierr = CeedQFunctionContextGetData(*ctx, (*ctx)->data_destroy_mem_type, &data); - CeedChk(ierr); - ierr = (*ctx)->data_destroy_function(data); CeedChk(ierr); - ierr = CeedQFunctionContextRestoreData(*ctx, &data); CeedChk(ierr); - } + ierr = CeedQFunctionContextDestroyData(*ctx); CeedChk(ierr); if ((*ctx)->Destroy) { ierr = (*ctx)->Destroy(*ctx); CeedChk(ierr); } diff --git a/interface/ceed.c b/interface/ceed.c index 52c3f22c57..5aecdbca5c 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -919,6 +919,7 @@ int CeedInit(const char *resource, Ceed *ceed) { CEED_FTABLE_ENTRY(CeedQFunctionContext, GetDataRead), CEED_FTABLE_ENTRY(CeedQFunctionContext, RestoreData), CEED_FTABLE_ENTRY(CeedQFunctionContext, RestoreDataRead), + CEED_FTABLE_ENTRY(CeedQFunctionContext, DataDestroy), CEED_FTABLE_ENTRY(CeedQFunctionContext, Destroy), CEED_FTABLE_ENTRY(CeedOperator, LinearAssembleQFunction), CEED_FTABLE_ENTRY(CeedOperator, LinearAssembleQFunctionUpdate), From 17ce10fa51ebb1fdf632a9d19f71ef68acbb0e05 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 15 Jun 2022 11:57:53 -0600 Subject: [PATCH 059/172] fluids - fix setup context data misuse --- examples/fluids/navierstokes.c | 2 -- examples/fluids/problems/advection.c | 3 +++ examples/fluids/problems/advection2d.c | 3 +++ examples/fluids/problems/densitycurrent.c | 7 ++++++- examples/fluids/problems/newtonian.c | 3 +++ examples/fluids/problems/shocktube.c | 3 +++ 6 files changed, 18 insertions(+), 3 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 3e3ba3bc3c..f574e18be9 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -359,8 +359,6 @@ int main(int argc, char **argv) { // -- Function list ierr = PetscFunctionListDestroy(&app_ctx->problems); CHKERRQ(ierr); - ierr = PetscFree(problem->bc_ctx); CHKERRQ(ierr); - // -- Structs ierr = PetscFree(units); CHKERRQ(ierr); ierr = PetscFree(user); CHKERRQ(ierr); diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c index 58bed71b58..8964682aa8 100644 --- a/examples/fluids/problems/advection.c +++ b/examples/fluids/problems/advection.c @@ -204,6 +204,9 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(*setup_context), setup_context); + CeedQFunctionContextSetDataDestroy(problem->ics.qfunction_context, + CEED_MEM_HOST, + FreeContextPetsc); CeedQFunctionContextCreate(user->ceed, &advection_context); CeedQFunctionContextSetData(advection_context, CEED_MEM_HOST, diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c index a7a6a417c2..24529b5517 100644 --- a/examples/fluids/problems/advection2d.c +++ b/examples/fluids/problems/advection2d.c @@ -181,6 +181,9 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(*setup_context), setup_context); + CeedQFunctionContextSetDataDestroy(problem->ics.qfunction_context, + CEED_MEM_HOST, + FreeContextPetsc); CeedQFunctionContextCreate(user->ceed, &advection_context); CeedQFunctionContextSetData(advection_context, CEED_MEM_HOST, diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c index 92bc14d5c9..27c4c40078 100644 --- a/examples/fluids/problems/densitycurrent.c +++ b/examples/fluids/problems/densitycurrent.c @@ -27,7 +27,8 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction = ICsDC; problem->ics.qfunction_loc = ICsDC_loc; problem->bc = Exact_DC; - setup_context = problem->bc_ctx; + CeedQFunctionContextGetData(problem->ics.qfunction_context, CEED_MEM_HOST, + &setup_context); // ------------------------------------------------------ // Create the libCEED context @@ -108,5 +109,9 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) { setup_context->dc_axis[1] = dc_axis[1]; setup_context->dc_axis[2] = dc_axis[2]; + problem->bc_ctx = + setup_context; // This is bad, context data should only be accessed via Get/Restore + CeedQFunctionContextRestoreData(problem->ics.qfunction_context, &setup_context); + PetscFunctionReturn(0); } diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 9679111398..fe393a63af 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -264,6 +264,9 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(*setup_context), setup_context); + CeedQFunctionContextSetDataDestroy(problem->ics.qfunction_context, + CEED_MEM_HOST, + FreeContextPetsc); CeedQFunctionContextRegisterDouble(problem->ics.qfunction_context, "evaluation time", (char *)&setup_context->time - (char *)setup_context, 1, "Time of evaluation"); diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c index 8c52a69483..672e1b0f3e 100644 --- a/examples/fluids/problems/shocktube.c +++ b/examples/fluids/problems/shocktube.c @@ -161,6 +161,9 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(*setup_context), setup_context); + CeedQFunctionContextSetDataDestroy(problem->ics.qfunction_context, + CEED_MEM_HOST, + FreeContextPetsc); CeedQFunctionContextCreate(user->ceed, &shocktube_context); CeedQFunctionContextSetData(shocktube_context, CEED_MEM_HOST, From 6cccb8e4b3ec0b5d0d60888533d5b9b04933e739 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 17 Jun 2022 15:39:17 -0600 Subject: [PATCH 060/172] doc - note bugfix for CeedOperatorLinearAssemble --- doc/sphinx/source/releasenotes.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1c0b7a93f0..804c31fb3e 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -10,6 +10,10 @@ On this page we provide a summary of the main API changes, new features and exam - Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output. +## Bugfix + +- Fix bugs in CPU implementation of {c:func}`CeedOperatorLinearAssemble` when there are different number of active input modes and active output modes. + (v0-10-1)= ## v0.10.1 (Apr 11, 2022) From e0e3543688abb0ad4f8d8e0577331b09b75ab099 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 17 Jun 2022 15:40:44 -0600 Subject: [PATCH 061/172] doc - note fluids performance enhancements --- doc/sphinx/source/releasenotes.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1c0b7a93f0..bd0483e7ca 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -10,6 +10,10 @@ On this page we provide a summary of the main API changes, new features and exam - Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output. +### Examples + +- Added various performance enhancements for {ref}`example-petsc-navier-stokes` + (v0-10-1)= ## v0.10.1 (Apr 11, 2022) From 44d7a66c6073d76a4a1f8dc0795479283da37ec1 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 17 Jun 2022 10:55:30 -0600 Subject: [PATCH 062/172] gpu - fix restriction CEED_COPY_VALUES + CEED_MEM_HOST --- backends/cuda-ref/ceed-cuda-restriction.c | 13 +++++++++++++ backends/hip-ref/ceed-hip-ref-restriction.c | 13 +++++++++++++ doc/sphinx/source/releasenotes.md | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/backends/cuda-ref/ceed-cuda-restriction.c b/backends/cuda-ref/ceed-cuda-restriction.c index 57b92e36c9..9f90613299 100644 --- a/backends/cuda-ref/ceed-cuda-restriction.c +++ b/backends/cuda-ref/ceed-cuda-restriction.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ceed-cuda-ref.h" #include "../cuda/ceed-cuda-compile.h" @@ -298,6 +299,12 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType m_type, CeedCopyMode copy_mode, impl->h_ind = (CeedInt *)indices; break; case CEED_COPY_VALUES: + if (indices != NULL) { + ierr = CeedMalloc(elem_size * num_elem, &impl->h_ind_allocated); + CeedChkBackend(ierr); + memcpy(impl->h_ind_allocated, indices, elem_size * num_elem * sizeof(CeedInt)); + impl->h_ind = impl->h_ind_allocated; + } break; } if (indices != NULL) { @@ -329,6 +336,12 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType m_type, CeedCopyMode copy_mode, impl->d_ind = (CeedInt *)indices; } if (indices != NULL) { + ierr = CeedMalloc(elem_size * num_elem, &impl->h_ind_allocated); + CeedChkBackend(ierr); + ierr = cudaMemcpy(impl->h_ind_allocated, impl->d_ind, + elem_size * num_elem * sizeof(CeedInt), cudaMemcpyDeviceToHost); + CeedChk_Cu(ceed, ierr); + impl->h_ind = impl->h_ind_allocated; ierr = CeedElemRestrictionOffset_Cuda(r, indices); CeedChkBackend(ierr); } } else { diff --git a/backends/hip-ref/ceed-hip-ref-restriction.c b/backends/hip-ref/ceed-hip-ref-restriction.c index 139af010c6..4ff56a3bbf 100644 --- a/backends/hip-ref/ceed-hip-ref-restriction.c +++ b/backends/hip-ref/ceed-hip-ref-restriction.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ceed-hip-ref.h" #include "../hip/ceed-hip-compile.h" @@ -296,6 +297,12 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode, impl->h_ind = (CeedInt *)indices; break; case CEED_COPY_VALUES: + if (indices != NULL) { + ierr = CeedMalloc(elem_size * num_elem, &impl->h_ind_allocated); + CeedChkBackend(ierr); + memcpy(impl->h_ind_allocated, indices, elem_size * num_elem * sizeof(CeedInt)); + impl->h_ind = impl->h_ind_allocated; + } break; } if (indices != NULL) { @@ -327,6 +334,12 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode, impl->d_ind = (CeedInt *)indices; } if (indices != NULL) { + ierr = CeedMalloc(elem_size * num_elem, &impl->h_ind_allocated); + CeedChkBackend(ierr); + ierr = hipMemcpy(impl->h_ind_allocated, impl->d_ind, + elem_size * num_elem * sizeof(CeedInt), hipMemcpyDeviceToHost); + CeedChk_Hip(ceed, ierr); + impl->h_ind = impl->h_ind_allocated; ierr = CeedElemRestrictionOffset_Hip(r, indices); CeedChkBackend(ierr); } } else { diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1c0b7a93f0..952437510c 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -10,6 +10,10 @@ On this page we provide a summary of the main API changes, new features and exam - Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output. +### Bugfix + +- Fix storing of indices for `CeedElemRestriction` on the host with GPU backends. + (v0-10-1)= ## v0.10.1 (Apr 11, 2022) From 7b63f5c6881a9a0bb827bb6972a33367d9223442 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 19 Jun 2022 15:35:45 -0600 Subject: [PATCH 063/172] CeedOperatorAssemblePointBlockDiagonal: fix restriction sizes for subdomain integrals Formerly would give errors such as interface/ceed-elemrestriction.c:862 in CeedElemRestrictionApply(): Output vector size 125050 not compatible with element restriction (124050, 6000) (noted in https://github.com/CEED/libCEED/pull/994#discussion_r899808040) --- backends/cuda-ref/ceed-cuda-ref-operator.c | 8 ++++---- backends/hip-ref/ceed-hip-ref-operator.c | 8 ++++---- doc/sphinx/source/releasenotes.md | 1 + interface/ceed-preconditioning.c | 9 ++++----- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index 811d754052..25adfd4f97 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -733,25 +733,25 @@ static int CreatePBRestriction(CeedElemRestriction rstr, CeedChkBackend(ierr); // Expand offsets - CeedInt nelem, ncomp, elemsize, compstride, max = 1, *pbOffsets; + CeedInt nelem, ncomp, elemsize, compstride, *pbOffsets; + CeedSize l_size; ierr = CeedElemRestrictionGetNumElements(rstr, &nelem); CeedChkBackend(ierr); ierr = CeedElemRestrictionGetNumComponents(rstr, &ncomp); CeedChkBackend(ierr); ierr = CeedElemRestrictionGetElementSize(rstr, &elemsize); CeedChkBackend(ierr); ierr = CeedElemRestrictionGetCompStride(rstr, &compstride); CeedChkBackend(ierr); + ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChkBackend(ierr); CeedInt shift = ncomp; if (compstride != 1) shift *= ncomp; ierr = CeedCalloc(nelem*elemsize, &pbOffsets); CeedChkBackend(ierr); for (CeedInt i = 0; i < nelem*elemsize; i++) { pbOffsets[i] = offsets[i]*shift; - if (pbOffsets[i] > max) - max = pbOffsets[i]; } // Create new restriction ierr = CeedElemRestrictionCreate(ceed, nelem, elemsize, ncomp*ncomp, 1, - max + ncomp*ncomp, CEED_MEM_HOST, + l_size * ncomp, CEED_MEM_HOST, CEED_OWN_POINTER, pbOffsets, pbRstr); CeedChkBackend(ierr); diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 52b9df64b5..04af59900a 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -729,25 +729,25 @@ static int CreatePBRestriction(CeedElemRestriction rstr, CeedChkBackend(ierr); // Expand offsets - CeedInt nelem, ncomp, elemsize, compstride, max = 1, *pbOffsets; + CeedInt nelem, ncomp, elemsize, compstride, *pbOffsets; + CeedSize l_size; ierr = CeedElemRestrictionGetNumElements(rstr, &nelem); CeedChkBackend(ierr); ierr = CeedElemRestrictionGetNumComponents(rstr, &ncomp); CeedChkBackend(ierr); ierr = CeedElemRestrictionGetElementSize(rstr, &elemsize); CeedChkBackend(ierr); ierr = CeedElemRestrictionGetCompStride(rstr, &compstride); CeedChkBackend(ierr); + ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChkBackend(ierr); CeedInt shift = ncomp; if (compstride != 1) shift *= ncomp; ierr = CeedCalloc(nelem*elemsize, &pbOffsets); CeedChkBackend(ierr); for (CeedInt i = 0; i < nelem*elemsize; i++) { pbOffsets[i] = offsets[i]*shift; - if (pbOffsets[i] > max) - max = pbOffsets[i]; } // Create new restriction ierr = CeedElemRestrictionCreate(ceed, nelem, elemsize, ncomp*ncomp, 1, - max + ncomp*ncomp, CEED_MEM_HOST, + l_size * ncomp, CEED_MEM_HOST, CEED_OWN_POINTER, pbOffsets, pbRstr); CeedChkBackend(ierr); diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 6155a85b06..be4f61955e 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -13,6 +13,7 @@ On this page we provide a summary of the main API changes, new features and exam ### Bugfix - Fix storing of indices for `CeedElemRestriction` on the host with GPU backends. +- Fix `CeedElemRestriction` sizing for {c:func}`CeedOperatorAssemblePointBlockDiagonal`. ### Examples diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index aec35492c7..91df9e0d71 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -250,12 +250,13 @@ static int CeedOperatorCreateActivePointBlockRestriction( CeedChk(ierr); // Expand offsets - CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1, - *pointblock_offsets; + CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets; + CeedSize l_size; ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr); ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr); ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr); ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr); + ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr); CeedInt shift = num_comp; if (comp_stride != 1) shift *= num_comp; @@ -263,13 +264,11 @@ static int CeedOperatorCreateActivePointBlockRestriction( CeedChk(ierr); for (CeedInt i = 0; i < num_elem*elem_size; i++) { pointblock_offsets[i] = offsets[i]*shift; - if (pointblock_offsets[i] > max) - max = pointblock_offsets[i]; } // Create new restriction ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp, - 1, max + num_comp*num_comp, CEED_MEM_HOST, + 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr); CeedChk(ierr); From f6af633f48f40461e0ac4411cacdc7e4180f53e5 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Fri, 6 May 2022 15:04:37 -0600 Subject: [PATCH 064/172] Use rtc for MAGMA elem restriction and tensor basis kernels --- backends/magma/ceed-magma-basis.c | 279 +++++++++++-- backends/magma/ceed-magma-restriction.c | 75 +++- backends/magma/ceed-magma.h | 61 ++- backends/magma/kernels/common/grad.h | 378 ------------------ backends/magma/kernels/common/grad_device.h | 325 --------------- backends/magma/kernels/common/interp.h | 191 --------- backends/magma/kernels/common/interp_device.h | 337 ---------------- backends/magma/kernels/common/weight.h | 124 +----- backends/magma/kernels/common/weight_device.h | 135 ------- backends/magma/kernels/cuda/grad_1d.cu | 215 ---------- backends/magma/kernels/cuda/grad_generic.cu | 205 ---------- backends/magma/kernels/cuda/gradn_2d.cu | 218 ---------- backends/magma/kernels/cuda/gradn_3d.cu | 218 ---------- backends/magma/kernels/cuda/gradt_2d.cu | 219 ---------- backends/magma/kernels/cuda/gradt_3d.cu | 218 ---------- backends/magma/kernels/cuda/interp_1d.cu | 216 ---------- backends/magma/kernels/cuda/interp_2d.cu | 216 ---------- backends/magma/kernels/cuda/interp_3d.cu | 216 ---------- backends/magma/kernels/cuda/interp_generic.cu | 248 ------------ backends/magma/kernels/cuda/magma_devptr.cu | 47 --- .../kernels/cuda/magma_drestrictApply.cu | 93 ----- backends/magma/kernels/cuda/weight_1d.cu | 115 ------ backends/magma/kernels/cuda/weight_2d.cu | 114 ------ backends/magma/kernels/cuda/weight_3d.cu | 114 ------ backends/magma/kernels/cuda/weight_generic.cu | 113 ------ backends/magma/kernels/hip/grad_1d.hip.cpp | 208 ---------- .../magma/kernels/hip/grad_generic.hip.cpp | 198 --------- backends/magma/kernels/hip/gradn_2d.hip.cpp | 210 ---------- backends/magma/kernels/hip/gradn_3d.hip.cpp | 210 ---------- backends/magma/kernels/hip/gradt_2d.hip.cpp | 211 ---------- backends/magma/kernels/hip/gradt_3d.hip.cpp | 210 ---------- backends/magma/kernels/hip/interp_1d.hip.cpp | 208 ---------- backends/magma/kernels/hip/interp_2d.hip.cpp | 208 ---------- backends/magma/kernels/hip/interp_3d.hip.cpp | 208 ---------- .../magma/kernels/hip/interp_generic.hip.cpp | 240 ----------- .../magma/kernels/hip/magma_devptr.hip.cpp | 42 -- .../kernels/hip/magma_drestrictApply.hip.cpp | 93 ----- backends/magma/kernels/hip/weight_1d.hip.cpp | 107 ----- backends/magma/kernels/hip/weight_2d.hip.cpp | 106 ----- backends/magma/kernels/hip/weight_3d.hip.cpp | 106 ----- .../magma/kernels/hip/weight_generic.hip.cpp | 104 ----- backends/magma/magma_grad.c | 64 --- backends/magma/magma_interp.c | 45 --- backends/magma/magma_weight.c | 39 -- .../ceed/jit-source/magma}/elem_restriction.h | 54 +-- include/ceed/jit-source/magma/grad-1d.h | 141 +++++++ include/ceed/jit-source/magma/grad-2d.h | 202 ++++++++++ include/ceed/jit-source/magma/grad-3d.h | 240 +++++++++++ include/ceed/jit-source/magma/interp-1d.h | 141 +++++++ include/ceed/jit-source/magma/interp-2d.h | 157 ++++++++ include/ceed/jit-source/magma/interp-3d.h | 185 +++++++++ .../jit-source/magma}/magma_common_device.h | 204 ++++------ include/ceed/jit-source/magma/weight-1d.h | 54 +++ include/ceed/jit-source/magma/weight-2d.h | 63 +++ include/ceed/jit-source/magma/weight-3d.h | 64 +++ include/ceed/jit-tools.h | 1 + interface/ceed-jit-tools.c | 10 +- 57 files changed, 1690 insertions(+), 7333 deletions(-) delete mode 100644 backends/magma/kernels/common/grad.h delete mode 100644 backends/magma/kernels/common/grad_device.h delete mode 100644 backends/magma/kernels/common/interp.h delete mode 100644 backends/magma/kernels/common/interp_device.h delete mode 100644 backends/magma/kernels/common/weight_device.h delete mode 100644 backends/magma/kernels/cuda/grad_1d.cu delete mode 100644 backends/magma/kernels/cuda/grad_generic.cu delete mode 100644 backends/magma/kernels/cuda/gradn_2d.cu delete mode 100644 backends/magma/kernels/cuda/gradn_3d.cu delete mode 100644 backends/magma/kernels/cuda/gradt_2d.cu delete mode 100644 backends/magma/kernels/cuda/gradt_3d.cu delete mode 100644 backends/magma/kernels/cuda/interp_1d.cu delete mode 100644 backends/magma/kernels/cuda/interp_2d.cu delete mode 100644 backends/magma/kernels/cuda/interp_3d.cu delete mode 100644 backends/magma/kernels/cuda/interp_generic.cu delete mode 100644 backends/magma/kernels/cuda/magma_devptr.cu delete mode 100644 backends/magma/kernels/cuda/magma_drestrictApply.cu delete mode 100644 backends/magma/kernels/cuda/weight_1d.cu delete mode 100644 backends/magma/kernels/cuda/weight_2d.cu delete mode 100644 backends/magma/kernels/cuda/weight_3d.cu delete mode 100644 backends/magma/kernels/hip/grad_1d.hip.cpp delete mode 100644 backends/magma/kernels/hip/grad_generic.hip.cpp delete mode 100644 backends/magma/kernels/hip/gradn_2d.hip.cpp delete mode 100644 backends/magma/kernels/hip/gradn_3d.hip.cpp delete mode 100644 backends/magma/kernels/hip/gradt_2d.hip.cpp delete mode 100644 backends/magma/kernels/hip/gradt_3d.hip.cpp delete mode 100644 backends/magma/kernels/hip/interp_1d.hip.cpp delete mode 100644 backends/magma/kernels/hip/interp_2d.hip.cpp delete mode 100644 backends/magma/kernels/hip/interp_3d.hip.cpp delete mode 100644 backends/magma/kernels/hip/interp_generic.hip.cpp delete mode 100644 backends/magma/kernels/hip/magma_devptr.hip.cpp delete mode 100644 backends/magma/kernels/hip/magma_drestrictApply.hip.cpp delete mode 100644 backends/magma/kernels/hip/weight_1d.hip.cpp delete mode 100644 backends/magma/kernels/hip/weight_2d.hip.cpp delete mode 100644 backends/magma/kernels/hip/weight_3d.hip.cpp delete mode 100644 backends/magma/magma_grad.c delete mode 100644 backends/magma/magma_interp.c delete mode 100644 backends/magma/magma_weight.c rename {backends/magma/kernels/common => include/ceed/jit-source/magma}/elem_restriction.h (64%) create mode 100644 include/ceed/jit-source/magma/grad-1d.h create mode 100644 include/ceed/jit-source/magma/grad-2d.h create mode 100644 include/ceed/jit-source/magma/grad-3d.h create mode 100644 include/ceed/jit-source/magma/interp-1d.h create mode 100644 include/ceed/jit-source/magma/interp-2d.h create mode 100644 include/ceed/jit-source/magma/interp-3d.h rename {backends/magma/kernels/common => include/ceed/jit-source/magma}/magma_common_device.h (61%) create mode 100644 include/ceed/jit-source/magma/weight-1d.h create mode 100644 include/ceed/jit-source/magma/weight-2d.h create mode 100644 include/ceed/jit-source/magma/weight-3d.h diff --git a/backends/magma/ceed-magma-basis.c b/backends/magma/ceed-magma-basis.c index acd8a30ab1..7c4967435a 100644 --- a/backends/magma/ceed-magma-basis.c +++ b/backends/magma/ceed-magma-basis.c @@ -7,7 +7,16 @@ #include #include +#include +#include #include "ceed-magma.h" +#ifdef HAVE_HIP +#include "../hip/ceed-hip-common.h" +#include "../hip/ceed-hip-compile.h" +#else +#include "../cuda/ceed-cuda-common.h" +#include "../cuda/ceed-cuda-compile.h" +#endif #ifdef __cplusplus CEED_INTERN "C" @@ -60,6 +69,7 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, } ceed_magma_queue_sync( data->queue ); } + switch (emode) { case CEED_EVAL_INTERP: { CeedInt P = P1d, Q = Q1d; @@ -97,14 +107,48 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, u_compstride = nelem * elquadsize; } - ierr = magma_interp(P, Q, dim, ncomp, - impl->dinterp1d, tmode, - u, u_elstride, u_compstride, - v, v_elstride, v_compstride, - nelem, data->basis_kernel_mode, - data->queue); - if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, - "MAGMA: launch failure detected for magma_interp"); + CeedInt nthreads = 1; + CeedInt ntcol = 1; + CeedInt shmem = 0; + CeedInt maxPQ = CeedIntMax(P, Q); + + switch (dim) { + case 1: + nthreads = maxPQ; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); + shmem += sizeof(CeedScalar) * ntcol * ( ncomp * (1*P + 1*Q) ); + shmem += sizeof(CeedScalar) * (P*Q); + break; + case 2: + nthreads = maxPQ; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); + shmem += P*Q *sizeof(CeedScalar); // for sT + shmem += ntcol * ( P*maxPQ*sizeof( + CeedScalar) ); // for reforming rU we need PxP, and for the intermediate output we need PxQ + break; + case 3: + nthreads = maxPQ*maxPQ; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); + shmem += sizeof(CeedScalar)* (P*Q); // for sT + shmem += sizeof(CeedScalar)* ntcol * (CeedIntMax(P*P*maxPQ, + P*Q*Q)); // rU needs P^2xP, the intermediate output needs max(P^2xQ,PQ^2) + } + CeedInt grid = (nelem + ntcol-1) / ntcol; + void *args[] = {&impl->dinterp1d, + &u, &u_elstride, &u_compstride, + &v, &v_elstride, &v_compstride, + &nelem + }; + + if (tmode == CEED_TRANSPOSE) { + ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_interp_tr, grid, + nthreads, ntcol, 1, shmem, + args); CeedChkBackend(ierr); + } else { + ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_interp, grid, + nthreads, ntcol, 1, shmem, + args); CeedChkBackend(ierr); + } } break; case CEED_EVAL_GRAD: { @@ -155,14 +199,49 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, } - ierr = magma_grad( P, Q, dim, ncomp, - impl->dinterp1d, impl->dgrad1d, tmode, - u, u_elstride, u_compstride, u_dimstride, - v, v_elstride, v_compstride, v_dimstride, - nelem, data->basis_kernel_mode, - data->queue); - if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, - "MAGMA: launch failure detected for magma_grad"); + CeedInt nthreads = 1; + CeedInt ntcol = 1; + CeedInt shmem = 0; + CeedInt maxPQ = CeedIntMax(P, Q); + + switch (dim) { + case 1: + nthreads = maxPQ; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); + shmem += sizeof(CeedScalar) * ntcol * (ncomp * (1*P + 1*Q)); + shmem += sizeof(CeedScalar) * (P*Q); + break; + case 2: + nthreads = maxPQ; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); + shmem += sizeof(CeedScalar) * 2*P*Q; // for sTinterp and sTgrad + shmem += sizeof(CeedScalar) * ntcol * + (P*maxPQ); // for reforming rU we need PxP, and for the intermediate output we need PxQ + break; + case 3: + nthreads = maxPQ * maxPQ; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); + shmem += sizeof(CeedScalar) * 2*P*Q; // for sTinterp and sTgrad + shmem += sizeof(CeedScalar) * ntcol * CeedIntMax(P*P*P, + (P*P*Q) + + (P*Q*Q)); // rU needs P^2xP, the intermediate outputs need (P^2.Q + P.Q^2) + } + CeedInt grid = (nelem + ntcol-1) / ntcol; + void *args[] = {&impl->dinterp1d, &impl->dgrad1d, + &u, &u_elstride, &u_compstride, &u_dimstride, + &v, &v_elstride, &v_compstride, &v_dimstride, + &nelem + }; + + if (tmode == CEED_TRANSPOSE) { + ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_grad_tr, grid, + nthreads, ntcol, 1, shmem, + args); CeedChkBackend(ierr); + } else { + ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_grad, grid, + nthreads, ntcol, 1, shmem, + args); CeedChkBackend(ierr); + } } break; case CEED_EVAL_WEIGHT: { @@ -172,11 +251,34 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); // LCOV_EXCL_STOP CeedInt Q = Q1d; - int eldofssize = CeedIntPow(Q, dim); - ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem, - data->basis_kernel_mode, data->queue); - if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, - "MAGMA: launch failure detected for magma_weight"); + CeedInt eldofssize = CeedIntPow(Q, dim); + CeedInt nthreads = 1; + CeedInt ntcol = 1; + CeedInt shmem = 0; + + switch (dim) { + case 1: + nthreads = Q; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); + shmem += sizeof(CeedScalar) * Q; // for dqweight1d + shmem += sizeof(CeedScalar) * ntcol * Q; // for output + break; + case 2: + nthreads = Q; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); + shmem += sizeof(CeedScalar) * Q; // for dqweight1d + break; + case 3: + nthreads = Q * Q; + ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); + shmem += sizeof(CeedScalar) * Q; // for dqweight1d + } + CeedInt grid = (nelem + ntcol-1) / ntcol; + void *args[] = {&impl->dqweight1d, &v, &eldofssize, &nelem}; + + ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_weight, grid, + nthreads, ntcol, 1, shmem, + args); CeedChkBackend(ierr); } break; // LCOV_EXCL_START @@ -465,6 +567,13 @@ int CeedBasisDestroy_Magma(CeedBasis basis) { ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr); ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr); ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); + #ifdef HAVE_HIP + ierr = hipModuleUnload(impl->module); CeedChk_Hip(ceed, ierr); + #else + ierr = cuModuleUnload(impl->module); CeedChk_Cu(ceed, ierr); + #endif ierr = CeedFree(&impl); CeedChkBackend(ierr); @@ -499,39 +608,127 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, const CeedScalar *qweight1d, CeedBasis basis) { int ierr; CeedBasis_Magma *impl; + ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); Ceed ceed; ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); // Check for supported parameters CeedInt ncomp = 0; ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); - if (ncomp > 3) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, - "Magma backend does not support tensor bases with more than 3 components"); - // LCOV_EXCL_STOP - if (P1d > 10) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, - "Magma backend does not support tensor bases with more than 10 nodes in each dimension"); - // LCOV_EXCL_STOP - if (Q1d > 10) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, - "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension"); - // LCOV_EXCL_STOP - Ceed_Magma *data; ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); + // Compile kernels + char *magma_common_path; + char *interp_path, *grad_path, *weight_path; + char *basis_kernel_source; + ierr = CeedGetJitAbsolutePath(ceed, + "ceed/jit-source/magma/magma_common_device.h", + &magma_common_path); CeedChkBackend(ierr); + CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); + ierr = CeedLoadSourceToBuffer(ceed, magma_common_path, + &basis_kernel_source); + CeedChkBackend(ierr); + char *interp_name_base = "ceed/jit-source/magma/interp"; + CeedInt interp_name_len = strlen(interp_name_base) + 6; + char interp_name[interp_name_len]; + snprintf(interp_name, interp_name_len, "%s-%dd.h", interp_name_base, dim); + ierr = CeedGetJitAbsolutePath(ceed, interp_name, &interp_path); + CeedChkBackend(ierr); + ierr = CeedLoadSourceToInitializedBuffer(ceed, interp_path, + &basis_kernel_source); + CeedChkBackend(ierr); + char *grad_name_base = "ceed/jit-source/magma/grad"; + CeedInt grad_name_len = strlen(grad_name_base) + 6; + char grad_name[grad_name_len]; + snprintf(grad_name, grad_name_len, "%s-%dd.h", grad_name_base, dim); + ierr = CeedGetJitAbsolutePath(ceed, grad_name, &grad_path); + CeedChkBackend(ierr); + ierr = CeedLoadSourceToInitializedBuffer(ceed, grad_path, + &basis_kernel_source); + CeedChkBackend(ierr); + char *weight_name_base = "ceed/jit-source/magma/weight"; + CeedInt weight_name_len = strlen(weight_name_base) + 6; + char weight_name[weight_name_len]; + snprintf(weight_name, weight_name_len, "%s-%dd.h", weight_name_base, dim); + ierr = CeedGetJitAbsolutePath(ceed, weight_name, &weight_path); + CeedChkBackend(ierr); + ierr = CeedLoadSourceToInitializedBuffer(ceed, weight_path, + &basis_kernel_source); + CeedChkBackend(ierr); + CeedDebug256(ceed, 2, + "----- Loading Basis Kernel Source Complete! -----\n"); + // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip + // data + Ceed delegate; + ierr = CeedGetDelegate(ceed, &delegate); CeedChkBackend(ierr); + ierr = MAGMA_RTC_COMPILE(delegate, basis_kernel_source, &impl->module, 5, + "DIM", dim, + "NCOMP", ncomp, + "P", P1d, + "Q", Q1d, + "MAXPQ", CeedIntMax(P1d, Q1d)); + CeedChkBackend(ierr); + + // Kernel setup + switch (dim) { + case 1: + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpn_1d_kernel", + &impl->magma_interp); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpt_1d_kernel", + &impl->magma_interp_tr); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradn_1d_kernel", + &impl->magma_grad); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradt_1d_kernel", + &impl->magma_grad_tr); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_weight_1d_kernel", + &impl->magma_weight); + CeedChkBackend(ierr); + break; + case 2: + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpn_2d_kernel", + &impl->magma_interp); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpt_2d_kernel", + &impl->magma_interp_tr); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradn_2d_kernel", + &impl->magma_grad); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradt_2d_kernel", + &impl->magma_grad_tr); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_weight_2d_kernel", + &impl->magma_weight); + CeedChkBackend(ierr); + break; + case 3: + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpn_3d_kernel", + &impl->magma_interp); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpt_3d_kernel", + &impl->magma_interp_tr); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradn_3d_kernel", + &impl->magma_grad); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradt_3d_kernel", + &impl->magma_grad_tr); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_weight_3d_kernel", + &impl->magma_weight); + CeedChkBackend(ierr); + } + ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Magma); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Magma); CeedChkBackend(ierr); - ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); - ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); - // Copy qref1d to the GPU ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); CeedChkBackend(ierr); @@ -556,6 +753,8 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1, data->queue); + ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } diff --git a/backends/magma/ceed-magma-restriction.c b/backends/magma/ceed-magma-restriction.c index c491d89144..ac8a42be8f 100644 --- a/backends/magma/ceed-magma-restriction.c +++ b/backends/magma/ceed-magma-restriction.c @@ -7,8 +7,16 @@ #include #include +#include #include #include "ceed-magma.h" +#ifdef HAVE_HIP +#include "../hip/ceed-hip-common.h" +#include "../hip/ceed-hip-compile.h" +#else +#include "../cuda/ceed-cuda-common.h" +#include "../cuda/ceed-cuda-compile.h" +#endif static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, CeedTransposeMode tmode, CeedVector u, CeedVector v, CeedRequest *request) { @@ -75,13 +83,16 @@ static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, magma_setvector(3, sizeof(CeedInt), strides, 1, dstrides, 1, data->queue); } + void *args[] = {&ncomp, &esize, &nelem, &dstrides, &du, &dv}; + CeedInt grid = nelem; + CeedInt blocksize = 256; // Perform strided restriction with dstrides if (tmode == CEED_TRANSPOSE) { - magma_writeDofsStrided(ncomp, esize, nelem, dstrides, du, dv, - data->queue); + ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->StridedTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } else { - magma_readDofsStrided(ncomp, esize, nelem, dstrides, du, dv, - data->queue); + ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->StridedNoTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } ierr = magma_free(dstrides); CeedChkBackend(ierr); @@ -90,13 +101,16 @@ static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, CeedInt compstride; ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChkBackend(ierr); + void *args[] = {&ncomp, &compstride, &esize, &nelem, &impl->doffsets, &du, &dv}; + CeedInt grid = nelem; + CeedInt blocksize = 256; if (tmode == CEED_TRANSPOSE) { - magma_writeDofsOffset(ncomp, compstride, esize, nelem, impl->doffsets, - du, dv, data->queue); + ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->OffsetTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } else { - magma_readDofsOffset(ncomp, compstride, esize, nelem, impl->doffsets, - du, dv, data->queue); + ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->OffsetNoTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } } @@ -152,6 +166,13 @@ static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { } else if (impl->down_) { ierr = magma_free(impl->doffsets); CeedChkBackend(ierr); } + Ceed ceed; + ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); + #ifdef HAVE_HIP + ierr = hipModuleUnload(impl->module); CeedChk_Hip(ceed, ierr); + #else + ierr = cuModuleUnload(impl->module); CeedChk_Cu(ceed, ierr); + #endif ierr = CeedFree(&impl); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } @@ -247,6 +268,44 @@ int CeedElemRestrictionCreate_Magma(CeedMemType mtype, CeedCopyMode cmode, } else return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST or DEVICE supported"); + // Compile kernels + char *magma_common_path; + char *restriction_kernel_path, *restriction_kernel_source; + ierr = CeedGetJitAbsolutePath(ceed, + "ceed/jit-source/magma/magma_common_device.h", + &magma_common_path); CeedChkBackend(ierr); + CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n"); + ierr = CeedLoadSourceToBuffer(ceed, magma_common_path, + &restriction_kernel_source); + CeedChkBackend(ierr); + ierr = CeedGetJitAbsolutePath(ceed, + "ceed/jit-source/magma/elem_restriction.h", + &restriction_kernel_path); CeedChkBackend(ierr); + ierr = CeedLoadSourceToInitializedBuffer(ceed, restriction_kernel_path, + &restriction_kernel_source); + CeedChkBackend(ierr); + CeedDebug256(ceed, 2, + "----- Loading Restriction Kernel Source Complete! -----\n"); + // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip + // data + Ceed delegate; + ierr = CeedGetDelegate(ceed, &delegate); CeedChkBackend(ierr); + ierr = MAGMA_RTC_COMPILE(delegate, restriction_kernel_source, &impl->module, 0); + CeedChkBackend(ierr); + + // Kernel setup + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_readDofsStrided_kernel", + &impl->StridedNoTranspose); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_readDofsOffset_kernel", + &impl->OffsetNoTranspose); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_writeDofsStrided_kernel", + &impl->StridedTranspose); + CeedChkBackend(ierr); + ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_writeDofsOffset_kernel", + &impl->OffsetTranspose); + CeedChkBackend(ierr); ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); CeedInt layout[3] = {1, elemsize*nelem, elemsize}; diff --git a/backends/magma/ceed-magma.h b/backends/magma/ceed-magma.h index d50da8a639..2545eb3b05 100644 --- a/backends/magma/ceed-magma.h +++ b/backends/magma/ceed-magma.h @@ -13,6 +13,34 @@ #include #include +#define MAGMA_MAXTHREADS_1D 128 +#define MAGMA_MAXTHREADS_2D 128 +#define MAGMA_MAXTHREADS_3D 64 +// Define macro for determining number of threads in y-direction +// for basis kernels +#define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x))) +// Define macro for computing the total threads in a block +// for use with __launch_bounds__() +#define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) + +#ifdef HAVE_HIP +#define MAGMA_RTC_MODULE hipModule_t +#define MAGMA_RTC_FUNCTION hipFunction_t +#define MAGMA_RTC_COMPILE CeedCompileHip +#define MAGMA_RTC_GET_KERNEL CeedGetKernelHip +#define MAGMA_RTC_RUN_KERNEL CeedRunKernelHip +#define MAGMA_RTC_RUN_KERNEL_DIM CeedRunKernelDimHip +#define MAGMA_RTC_RUN_KERNEL_DIM_SH CeedRunKernelDimSharedHip +#else +#define MAGMA_RTC_MODULE CUmodule +#define MAGMA_RTC_FUNCTION CUfunction +#define MAGMA_RTC_COMPILE CeedCompileCuda +#define MAGMA_RTC_GET_KERNEL CeedGetKernelCuda +#define MAGMA_RTC_RUN_KERNEL CeedRunKernelCuda +#define MAGMA_RTC_RUN_KERNEL_DIM CeedRunKernelDimCuda +#define MAGMA_RTC_RUN_KERNEL_DIM_SH CeedRunKernelDimSharedCuda +#endif + typedef enum { MAGMA_KERNEL_DIM_GENERIC=101, MAGMA_KERNEL_DIM_SPECIFIC=102 @@ -25,6 +53,12 @@ typedef struct { } Ceed_Magma; typedef struct { + MAGMA_RTC_MODULE module; + MAGMA_RTC_FUNCTION magma_interp; + MAGMA_RTC_FUNCTION magma_interp_tr; + MAGMA_RTC_FUNCTION magma_grad; + MAGMA_RTC_FUNCTION magma_grad_tr; + MAGMA_RTC_FUNCTION magma_weight; CeedScalar *dqref1d; CeedScalar *dinterp1d; CeedScalar *dgrad1d; @@ -45,6 +79,11 @@ typedef enum { } OwnershipMode; typedef struct { + MAGMA_RTC_MODULE module; + MAGMA_RTC_FUNCTION StridedTranspose; + MAGMA_RTC_FUNCTION StridedNoTranspose; + MAGMA_RTC_FUNCTION OffsetTranspose; + MAGMA_RTC_FUNCTION OffsetNoTranspose; CeedInt *offsets; CeedInt *doffsets; OwnershipMode own_; @@ -184,28 +223,6 @@ CEED_INTERN { magma_int_t Q, CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); - void magma_readDofsOffset(const magma_int_t NCOMP, - const magma_int_t compstride, - const magma_int_t esize, const magma_int_t nelem, - magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue); - - void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, - const magma_int_t nelem, magma_int_t *strides, - const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue); - - void magma_writeDofsOffset(const magma_int_t NCOMP, - const magma_int_t compstride, - const magma_int_t esize, const magma_int_t nelem, - magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue); - - void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, - const magma_int_t nelem, magma_int_t *strides, - const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue); - int magma_dgemm_nontensor( magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, diff --git a/backends/magma/kernels/common/grad.h b/backends/magma/kernels/common/grad.h deleted file mode 100644 index 1f01625c5f..0000000000 --- a/backends/magma/kernels/common/grad.h +++ /dev/null @@ -1,378 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#ifndef CEED_MAGMA_GRAD_H -#define CEED_MAGMA_GRAD_H - -#include -#include -#include "magma_common_device.h" -#include "grad_device.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_1D)) __global__ void -magma_grad_1d_kernel( - const T *dTgrad, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, - T *dV, const int estrdV, const int cstrdV, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T* sU[NCOMP]; - T* sV[NCOMP]; - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sT = (T*)(shared_data); - T* sW = sT + P*Q; - sU[0] = sW + ty * NCOMP * (P + Q); - sV[0] = sU[0] + (NCOMP * 1 * P); - for(int icomp = 1; icomp < NCOMP; icomp++) { - sU[icomp] = sU[icomp-1] + (1 * P); - sV[icomp] = sV[icomp-1] + (1 * Q); - } - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dTgrad, sT); - } - - // read U - read_1d(dU, cstrdU, sU, tx); - - // read V if transT is magmaTrans - if (transT == MagmaTrans) { - read_1d(dV, cstrdV, sV, tx); - } - - __syncthreads(); - magma_grad_1d_device(sT, transT, sU, sV, tx); - __syncthreads(); - - // write V - write_1d(sV, dV, cstrdV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void -magma_gradn_2d_kernel( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, const int dstrdU, - T *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) -{ - - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rU[1][NCOMP][P] = { make_zero() }; // here DIMU = 1, but might be different for a fused operator - T rV[1][NCOMP][Q] = { make_zero() }; // here DIMV = 1, but might be different for a fused operator - T rTmp = make_zero(); - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sTinterp = (T*)(shared_data); - T* sTgrad = sTinterp + P*Q; - T* sTmp = sTgrad + P*Q; - sTmp += ty * (P * MAXPQ); - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); - dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); - } - - // No need to read V ( required only in transposed grad ) - const T beta = make_zero(); - - /* read U (idim = 0 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_2d - (dU + (0*dstrdU), cstrdU, rU, sTmp, tx); - - /* first call (iDIM = 0, iDIMU = 0, iDIMV = 0) -- - output from rV[0][][] into dV (idim = 0) */ - magma_grad_2d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_2d_device */ - writeV_2d - (dV+(0*dstrdV), cstrdV, rV, tx); - - /* second call (iDIM = 1, iDIMU = 0, iDIMV = 0) -- - output from rV[0][][] into dV (idim = 1) */ - magma_grad_2d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_2d_device */ - writeV_2d - (dV+(1*dstrdV), cstrdV, rV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void -magma_gradt_2d_kernel( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, const int dstrdU, - T *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rU[1][NCOMP][P] = { make_zero() }; // here DIMU = 1, but might be different for a fused operator - T rV[1][NCOMP][Q] = { make_zero() }; // here DIMV = 1, but might be different for a fused operator - T rTmp = make_zero(); - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sTinterp = (T*)(shared_data); - T* sTgrad = sTinterp + P*Q; - T* sTmp = sTgrad + P*Q; - sTmp += ty * (P*MAXPQ); - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); - dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); - } - __syncthreads(); - - /* read V (since this is transposed mode -- - idim = 0 for dV, iDIM = 0 for rV) */ - const T beta = make_one(); - readV_2d - (dV + (0*dstrdV), cstrdV, rV, tx); - - /* read U (idim = 0 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_2d - (dU + (0 * dstrdU), cstrdU, rU, sTmp, tx); - /* first call (iDIM = 0, iDIMU = 0, iDIMV = 0) */ - magma_grad_2d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_2d_device */ - - /* read U (idim = 1 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_2d - (dU + (1*dstrdU), cstrdU, rU, sTmp, tx); - /* second call (iDIM = 1, iDIMU = 0, iDIMV = 0) */ - magma_grad_2d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_2d_device */ - - // write V - writeV_2d - (dV + (0*dstrdV), cstrdV, rV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void -magma_gradn_3d_kernel( - const T* dinterp1d, const T* dgrad1d, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, const int dstrdU, - T *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rU[1][NCOMP][P] = {make_zero()}; // here DIMU = 1, but might be different for a fused operator - T rV[1][NCOMP][Q] = {make_zero()}; // here DIMV = 1, but might be different for a fused operator - T rTmp = make_zero(); - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sTinterp = (T*)(shared_data); - T* sTgrad = sTinterp + P*Q; - T* sTmp = sTgrad + P*Q; - sTmp += ty * (max(P*P*P, (P*P*Q) + (P*Q*Q))); - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); - dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); - } - __syncthreads(); - - // No need to read V ( required only in transposed grad ) - const T beta = make_zero(); - - /* read U (idim = 0 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_3d - (dU + (0*dstrdU), cstrdU, rU, sTmp, tx); - - /* first call (iDIM = 0, iDIMU = 0, iDIMV = 0) -- - output from rV[0][][] into dV (idim = 0) */ - magma_grad_3d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_3d_device */ - writeV_3d - (dV+ (0*dstrdV), cstrdV, rV, tx); - - /* second call (iDIM = 1, iDIMU = 0, iDIMV = 0) -- - output from rV[0][][] into dV (idim = 1) */ - magma_grad_3d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_3d_device */ - writeV_3d - (dV+ (1*dstrdV), cstrdV, rV, tx); - - /* third call (iDIM = 2, iDIMU = 0, iDIMV = 0) -- - output from rV[0][][] into dV (idim = 2) */ - magma_grad_3d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_3d_device */ - writeV_3d - (dV+ (2*dstrdV), cstrdV, rV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void -magma_gradt_3d_kernel( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, const int dstrdU, - T *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rU[1][NCOMP][P] = { make_zero() }; // here DIMU = 1, but might be different for a fused operator - T rV[1][NCOMP][Q] = { make_zero() }; // here DIMV = 1, but might be different for a fused operator - T rTmp = make_zero(); - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sTinterp = (T*)(shared_data); - T* sTgrad = sTinterp + P*Q; - T* sTmp = sTgrad + P*Q; - sTmp += ty * (max(P*P*P, (P*P*Q) + (P*Q*Q))); - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); - dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); - } - __syncthreads(); - - // read V (since this is transposed mode) - const T beta = make_one(); - readV_3d - (dV + (0*dstrdV), cstrdV, rV, tx); - - /* read U (idim = 0 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_3d - (dU + (0 * dstrdU), cstrdU, rU, sTmp, tx); - /* then first call (iDIM = 0, iDIMU = 0, iDIMV = 0) */ - magma_grad_3d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_3d_device */ - - /* read U (idim = 1 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_3d - (dU + (1 * dstrdU), cstrdU, rU, sTmp, tx); - /* then second call (iDIM = 1, iDIMU = 0, iDIMV = 0) */ - magma_grad_3d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_3d_device */ - - /* read U (idim = 2 for dU, iDIM = 0 for rU) -- - there is a sync at the end of this function */ - readU_3d - (dU + (2 * dstrdU), cstrdU, rU, sTmp, tx); - /* then third call (iDIM = 2, iDIMU = 0, iDIMV = 0) */ - magma_grad_3d_device - (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); - /* there is a sync at the end of magma_grad_3d_device */ - - // write V - writeV_3d - (dV + (0 * dstrdV), cstrdV, rV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __global__ void -magma_grad_generic_kernel( - const int dim, const int ncomp, - const int pre_org, const int tmp_size, - const T* dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, - T *dV, const int estrdV, const int cstrdV, - const int dim_id ) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int elem_id = blockIdx.x; - const int comp_id = blockIdx.y; - int tx = threadIdx.x; - int pre, post; - - // advance to the respective element in the batch - dU += (elem_id * estrdU) + (comp_id * cstrdU); - dV += (elem_id * estrdV) + (comp_id * cstrdV); - - T* sTinterp = (T*)shared_data; - T* sTgrad = sTinterp + P * Q; - - // read T in shared memory - dread_T_gm2sm(tx, transT, dinterp1d, sTinterp ); - dread_T_gm2sm(tx, transT, dgrad1d, sTgrad ); - __syncthreads(); - - pre = pre_org; // the value of pre is independent from the loop below - post = 1; - magma_grad_generic_device - ( dim_id, dim, ncomp, pre, post, tmp_size, sTinterp, sTgrad, transT, dU, dV, shared_data + (2*P*Q) ); -} - -#endif // CEED_MAGMA_GRAD_H diff --git a/backends/magma/kernels/common/grad_device.h b/backends/magma/kernels/common/grad_device.h deleted file mode 100644 index e354d859a7..0000000000 --- a/backends/magma/kernels/common/grad_device.h +++ /dev/null @@ -1,325 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#ifndef CEED_MAGMA_GRAD_DEVICE_H -#define CEED_MAGMA_GRAD_DEVICE_H - -#define maxpq(p,q) (p > q ? p : q) - -// macros to abstract access of shared memory and reg. file -#define sT(i,j) sT[(j) * P + (i)] -#define sTmp(i,j,ldw) sTmp[(j)*(ldw) + (i)] -#define sTmp2(i,j,ldw) sTmp2[(j)*(ldw) + (i)] -#define rU(idim,icomp,i) rU[(idim)*NCOMP*P + (icomp)*P + (i)] -#define rV(idim,icomp,i) rV[(idim)*NCOMP*Q + (icomp)*Q + (i)] - -////////////////////////////////////////////////////////////////////////////////////////// -// grad basis action (1D) -template -static __device__ __inline__ void -magma_grad_1d_device( - const T *sT, magma_trans_t transT, - T* sU[NCOMP], T* sV[NCOMP], const int tx) -{ - // Assumptions - // 1. 1D threads of size max(P,Q) - // 2. sU[i] is 1xP: in shared memory - // 3. sV[i] is 1xQ: in shared memory - // 4. Product per component is one row (1xP) times T matrix (PxQ) => one row (1xQ) - // 5. Each thread computes one entry in sV[i] - // 6. Must sync before and after call - // 7. Note that the layout for U and V is different from 2D/3D problem - - T rv; - if (tx < Q) { - for(int icomp = 0; icomp < NCOMP; icomp++) { - rv = (transT == MagmaTrans) ? sV[icomp][tx] : make_zero(); - for(int i = 0; i < P; i++) { - rv += sU[icomp][i] * sT(i,tx); - } - sV[icomp][tx] = rv; - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -// grad basis action (2D) -// This function is called two times at a higher level for 2D -// DIMU -- for the size of rU[DIMU * NCOMP * MAXPQ] -// DIMV -- for the size of rV[DIMV * NCOMP * MAXPQ] -// iDIM -- the index of the outermost loop over dimensions in grad -// iDIMU -- which dim index of rU is accessed (always 0 for notrans, 0 or 1 for trans) -// iDIMV -- which dim index of rV is accessed (0 or 1 for notrans, always 0 for trans) -// the scalar beta is used to specify whether to accumulate to rV, or overwrite it -template -static __device__ __inline__ void -magma_grad_2d_device( - const T *sTinterp, const T *sTgrad, - T rU[DIMU][NCOMP][rUsize] , T rV[DIMV][NCOMP][rVsize], - T beta, const int tx, T rTmp, T* swork) -{ - // Assumptions - // 0. This device routine applies grad for one dim only (iDIM), so it should be called twice for 2D - // 1. 1D threads of size max(P,Q) - // 2. input: rU[DIMU x NCOMP x P] in registers (per thread) - // 3. output: rV[DIMV x NCOMP x Q] in registers (per thread) - // 4. Two products per each (dim,component) pair - // 4.1 Batch P of (1xP) matrices times (PxQ) matrix => Batch P of (1xQ) matrices - // 4.2 Batch 1 of (QxP) matrix times (PxQ) matrix => (QxQ) matrix - // 6. Each thread computes one row of the output of each product - // 7. Sync is recommended before and after the call - - for(int icomp = 0; icomp < NCOMP; icomp++){ - // 1st product -- Batch P of (1xP) matrices [reg] x (PxQ) [shmem] => Batch P of (1xQ) matrices - // the batch output P x (1xQ) is written on the fly to shmem - if (tx < P) { - const int batchid = tx; - const int sld = 1; - const T *sT = (iDIM == 0) ? sTgrad : sTinterp; - T* sTmp = swork + batchid * (1 * Q); - for(int j = 0; j < Q; j++){ - rTmp = make_zero(); - for(int i = 0; i < P; i++){ - rTmp += rU[iDIMU][icomp][i] * sT(i,j); - } - sTmp(0,j,sld) = rTmp; - } - } // end of: if (tx < P) - __syncthreads(); - - // 2nd product -- Batch 1 of a (QxP) matrix [shmem] x (PxQ) [shmem] => (QxQ) matrix [reg] - if (tx < Q) { - const int batchid = 0; - const int sld = Q; - const T *sT = (iDIM == 1) ? sTgrad : sTinterp; - T* sTmp = swork + batchid * (Q*P); - for(int j = 0; j < Q; j++){ - rTmp = make_zero(); - for(int i = 0; i < P; i++){ - rTmp += sTmp(tx,i,sld) * sT(i,j); - } - rV[iDIMV][icomp][j] *= beta; - rV[iDIMV][icomp][j] += rTmp; - } - } - __syncthreads(); - } // loop over NCOMP -} - -////////////////////////////////////////////////////////////////////////////////////////// -// grad basis action (3D) -// This function is called three times at a higher level for 3D -// DIMU -- for the size of rU[DIMU * NCOMP * MAXPQ] -// DIMV -- for the size of rV[DIMV * NCOMP * MAXPQ] -// iDIM -- the index of the outermost loop over dimensions in grad -// iDIMU -- which dim index of rU is accessed (always 0 for notrans, 0, 1, or 2 for trans) -// iDIMV -- which dim index of rV is accessed (0, 1, or 2 for notrans, always 0 for trans) -// the scalar beta is used to specify whether to accumulate to rV, or overwrite it -template -static __device__ __inline__ void -magma_grad_3d_device( - const T *sTinterp, const T *sTgrad, - T rU[DIMU][NCOMP][rUsize] , T rV[DIMV][NCOMP][rVsize], - T beta, const int tx, T rTmp, T* swork) -{ - // Assumptions - // 0. This device routine applies grad for one dim only (iDIM), so it should be thrice for 3D - // 1. 1D threads of size max(P,Q)^2 - // 2. input: rU[DIMU x NCOMP x rUsize] in registers (per thread) - // 3. output: rV[DIMV x NCOMP x rVsize] in registers (per thread) - // 4. Three products per each (dim,component) pair - // 4.1 Batch P^2 of (1xP) matrices times (PxQ) matrix => Batch P^2 of (1xQ) matrices - // 4.2 Batch P of (QxP) matrices times (PxQ) matrix => Batch P of (QxQ) matrices - // 4.3 Batch 1 of (Q^2xP) matrix times (PxQ) matrix => (Q^2xQ) matrix - // 6. Each thread computes one row of the output of each product - // 7. Sync is recommended before and after the call - - T* sW1 = swork; - T* sW2 = sW1 + P*P*Q; - for(int icomp = 0; icomp < NCOMP; icomp++){ - // Batch P^2 of (1xP) matrices [reg] times (PxQ) matrix [shmem] => Batch P^2 of (1xQ) matrices [shmem] - if (tx < (P*P)) { - const int batchid = tx; - const int sld = 1; - const T *sT = (iDIM == 0) ? sTgrad : sTinterp; - T* sTmp = sW1 + batchid * (1*Q); - for(int j = 0; j < Q; j++){ - rTmp = make_zero(); - for(int i = 0; i < P; i++){ - //rTmp += rU(iDIMU,icomp,i) * sT(i,j); - rTmp += rU[iDIMU][icomp][i] * sT(i,j); - } - sTmp(0,j,sld) = rTmp; - } - } // end of: if (tx < P*P) - __syncthreads(); - - // Batch P of (QxP) matrices [shmem] times (PxQ) matrix [shmem] => Batch P of (QxQ) matrices [reg] - if (tx < (P*Q)) { - const int batchid = tx / Q; - const int tx_ = tx % Q; - const int sld = Q; - const T *sT = (iDIM == 1) ? sTgrad : sTinterp; - T* sTmp = sW1 + batchid * (Q*P); // sTmp is input - T* sTmp2 = sW2 + batchid * (Q*Q); // sTmp2 is output - for(int j = 0; j < Q; j++){ - rTmp = make_zero(); - for(int i = 0; i < P; i++){ - rTmp += sTmp(tx_,i,sld) * sT(i,j); - } - sTmp2(tx_,j,sld) = rTmp; - } - } - __syncthreads(); - - // Batch 1 of (Q^2xP) matrices [shmem] times (PxQ) matrix [shmem] => Batch 1 of (Q^2xQ) matrices [reg] - if (tx < (Q*Q)) { - // No need to declare batchid = (tx / Q^2) = always zero - // No need to declare tx_ = (tx_ % Q^2) = always tx - const int sld = Q*Q; - const T *sT = (iDIM == 2) ? sTgrad : sTinterp; - T* sTmp = sW2; // sTmp is input - for(int j = 0; j < Q; j++) { - rTmp = make_zero(); - for(int i = 0; i < P; i++) { - rTmp += sTmp(tx,i,sld) * sT(i,j); - } - //rV(iDIMV,icomp,j) *= beta; - //rV(iDIMV,icomp,j) += rTmp; - rV[iDIMV][icomp][j] *= beta; - rV[iDIMV][icomp][j] += rTmp; - } - } - __syncthreads(); - } // loop over NCOMP -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __device__ __inline__ void -magma_grad_generic_device( - const int p, - const int dim, const int ncomp, const int pre_org, const int post_org, const int tmp_size, - const T *sTinterp, const T *sTgrad, magma_trans_t transT, - const T *dU, T *dV, - T* shared_data ) -{ -#define B (P) -#define J (Q) -#define A (pre) -#define C (post) - - const int nthreads = blockDim.x; - int pre = pre_org; - int post = post_org; - int nslices = nthreads / C; - int tx = threadIdx.x; - int tx_ = tx % C; - int slice_id = tx / C; - int i = 0; - - const magma_int_t add = (transT == MagmaTrans); - - T* sTmp1 = (T*)shared_data; - T* sTmp2 = sTmp1 + tmp_size; - T rU[P] = { MAGMA_D_ZERO }; // each thread has an entire row of U - T rV[Q] = { MAGMA_D_ZERO }; // each thread computes an entire row of V - T *sU, *sV, *sT; - - sU = sTmp1; - sV = sTmp2; - - // read U in sTmp1 (AC x B) - sU += slice_id * C * B; - dU += slice_id * C * B; - for(i = 0; i < A-nslices; i+=nslices) { - #pragma unroll - for(int b = 0; b < B; b++) { - sU[b * C + tx_] = dU[b * C + tx_]; - } - dU += nslices * C * B; - sU += nslices * C * B; - } - - if (slice_id < A-i) { - #pragma unroll - for(int b = 0; b < B; b++) { - //printf("tx = %d, tx_ = %d, accessing b * C + tx_ = %d\n", tx, tx_, b * C + tx_); - sU[b * C + tx_] = dU[b * C + tx_]; - } - } - __syncthreads(); - - int d = 0; - for(d = 0; d < dim-1; d++) { - sT = (p == d) ? (T*)sTgrad : (T*)sTinterp; - sU = (d % 2 == 0) ? sTmp1 : sTmp2; - sV = (d % 2 == 0) ? sTmp2 : sTmp1; - - sU += slice_id * C * B; - sV += slice_id * C * J; - for(i = 0; i < A-nslices; i+=nslices) { - dread_U_gsm2reg(C, tx_, sU, rU); // read U - dgemm_slice(MAGMA_D_ONE, sT, rU, MAGMA_D_ZERO, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, sV ); // write V back - sU += nslices * C * B; - sV += nslices * C * J; - } - - if (slice_id < A-i){ - dread_U_gsm2reg(C, tx_, sU, rU); // read U - dgemm_slice(MAGMA_D_ONE, sT, rU, MAGMA_D_ZERO, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, sV ); // write V back - } - __syncthreads(); - - // adjust dimensions and re-calculate the thread indices - pre /= P; - post *= Q; - nslices = nthreads / C; - tx_ = tx % C; - slice_id = tx / C; - } - - // handle last iteration (d = dim-1) with dV and beta - // no need for sV in the last iteration, just use sU and write directly into dV - sT = (p == d) ? (T*)sTgrad : (T*)sTinterp; - sU = (d % 2 == 0) ? sTmp1 : sTmp2; - //sV = (d % 2 == 0) ? sTmp2 : sTmp1; - T beta = (add == 1) ? MAGMA_D_ONE : MAGMA_D_ZERO; - - sU += slice_id * C * B; - dV += slice_id * C * J; - for(i = 0; i < A-nslices; i+=nslices) { - dread_U_gsm2reg(C, tx_, sU, rU); // read U - if ( add ) { - dread_V_gsm2reg(C, tx_, dV, rV); - } - dgemm_slice(MAGMA_D_ONE, sT, rU, beta, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, dV ); // write V back - sU += nslices * C * B; - dV += nslices * C * J; - } - - if (slice_id < A-i){ - dread_U_gsm2reg(C, tx_, sU, rU); // read U - if ( add ) { - dread_V_gsm2reg(C, tx_, dV, rV); - } - dgemm_slice(MAGMA_D_ONE, sT, rU, beta, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, dV ); // write V back - } - - pre /= P; - post *= Q; -#undef B -#undef J -#undef A -#undef C -} - -#endif // CEED_MAGMA_GRAD_DEVICE_H diff --git a/backends/magma/kernels/common/interp.h b/backends/magma/kernels/common/interp.h deleted file mode 100644 index b86d10c216..0000000000 --- a/backends/magma/kernels/common/interp.h +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#ifndef CEED_MAGMA_INTERP_H -#define CEED_MAGMA_INTERP_H - -#include -#include -#include "magma_common_device.h" -#include "interp_device.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_1D)) __global__ void -magma_interp_1d_kernel( - const T *dT, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, - T *dV, const int estrdV, const int cstrdV, const int nelem) -{ - - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T* sU[NCOMP]; - T* sV[NCOMP]; - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sT = (T*)(shared_data); - T* sW = sT + P*Q; - sU[0] = sW + ty * NCOMP * (P + Q); - sV[0] = sU[0] + (NCOMP * 1 * P); - for(int icomp = 1; icomp < NCOMP; icomp++) { - sU[icomp] = sU[icomp-1] + (1 * P); - sV[icomp] = sV[icomp-1] + (1 * Q); - } - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dT, sT); - } - - // read U - read_1d(dU, cstrdU, sU, tx); - - // read V if transT is magmaTrans - if (transT == MagmaTrans) { - read_1d(dV, cstrdV, sV, tx); - } - - __syncthreads(); - magma_interp_1d_device(sT, transT, sU, sV, tx); - __syncthreads(); - - // write V - write_1d(sV, dV, cstrdV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void -magma_interp_2d_kernel( - const T *dT, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, - T *dV, const int estrdV, const int cstrdV, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rU[1][NCOMP][P] = { make_zero() }; // for a non fused operator DIM is always 1 - T rV[1][NCOMP][Q] = { make_zero() }; // for a non fused operator DIM is always 1 - T rTmp = make_zero(); - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sT = (T*)(shared_data); - T* sTmp = sT + P*Q; - sTmp += ty * (P * MAXPQ); - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dT, sT); - } - - // read V if transT is magmaTrans - if (transT == MagmaTrans) { - readV_2d(dV, cstrdV, rV, tx); - } - - // read U -- there is a sync at the end of this function - readU_2d(dU, cstrdU, rU, sTmp, tx); - - // no sync needed here -- readU_2d already syncs at the end - magma_interp_2d_device(sT, transT, rU, rV, tx, rTmp, sTmp); - __syncthreads(); - - // write V - writeV_2d(dV, cstrdV, rV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void -magma_interp_3d_kernel( - const T *dT, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, - T *dV, const int estrdV, const int cstrdV, const int nelem) -{ - MAGMA_DEVICE_SHARED( CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rU[1][NCOMP][P] = { make_zero() }; // for a non fused operator DIM is always 1 - T rV[1][NCOMP][Q] = { make_zero() }; // for a non fused operator DIM is always 1 - T rTmp[Q] = { make_zero() }; - - // shift global memory pointers by elem stride - dU += elem_id * estrdU; - dV += elem_id * estrdV; - - // assign shared memory pointers - T* sT = (T*)(shared_data); - T* sTmp = sT + P*Q; - sTmp += ty * (max(P*P*MAXPQ, P*Q*Q)); - - // read T - if (ty == 0) { - dread_T_gm2sm(tx, transT, dT, sT); - } - - // read V if transT is magmaTrans - if (transT == MagmaTrans) { - readV_3d(dV, cstrdV, rV, tx); - } - - // read U (idim = 0 for dU, iDIM = 0 for rU, u_dimstride is always 0) - readU_3d(dU, cstrdU, rU, sTmp, tx); - // there is a sync at the end of this function - - magma_interp_3d_device(sT, transT, rU , rV, tx, rTmp, sTmp); - __syncthreads(); - - // write V - writeV_3d(dV, cstrdV, rV, tx); -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __global__ void -interp_generic_kernel( - const int dim, const int ncomp, const int pre_org, const int post_org, const int tmp_size, - const T *dT, magma_trans_t transT, - const T *dU, const int estrdU, const int cstrdU, - T *dV, const int estrdV, const int cstrdV) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int elem_id = blockIdx.x; - const int comp_id = blockIdx.y; - magma_interp_generic_device< P, Q > - ( dim, ncomp, pre_org, post_org, tmp_size, dT, transT, - dU + (elem_id * estrdU) + (comp_id * cstrdU), - dV + (elem_id * estrdV) + (comp_id * cstrdV), - shared_data ); -} - -#endif // CEED_MAGMA_INTERP_H diff --git a/backends/magma/kernels/common/interp_device.h b/backends/magma/kernels/common/interp_device.h deleted file mode 100644 index 20273db0cc..0000000000 --- a/backends/magma/kernels/common/interp_device.h +++ /dev/null @@ -1,337 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#ifndef CEED_MAGMA_INTERP_DEVICE_H -#define CEED_MAGMA_INTERP_DEVICE_H - -#define maxpq(p,q) (p > q ? p : q) - -// macros to abstract access of shared memory and reg. file -#define sT(i,j) sT[(j) * P + (i)] -#define sTmp(i,j,ldw) sTmp[(j)*(ldw) + (i)] -#define rU(idim,icomp,i) rU[(idim)*NCOMP*P + (icomp)*P + (i)] -#define rV(idim,icomp,i) rV[(idim)*NCOMP*Q + (icomp)*Q + (i)] - -////////////////////////////////////////////////////////////////////////////////////////// -// interp basis action (1D) -template -static __device__ __inline__ void -magma_interp_1d_device( - const T *sT, magma_trans_t transT, - T* sU[NCOMP], T* sV[NCOMP], const int tx) -{ - // Assumptions - // 1. 1D threads of size max(P,Q) - // 2. sU[i] is 1xP: in shared memory - // 3. sV[i] is 1xQ: in shared memory - // 4. Product per component is one row (1xP) times T matrix (PxQ) => one row (1xQ) - // 5. Each thread computes one entry in sV[i] - // 6. Must sync before and after call - // 7. Note that the layout for U and V is different from 2D/3D problem - - T rv; - if (tx < Q) { - for(int icomp = 0; icomp < NCOMP; icomp++) { - rv = (transT == MagmaTrans) ? sV[icomp][tx] : make_zero(); - for(int i = 0; i < P; i++) { - rv += sU[icomp][i] * sT(i,tx); //sT[tx * P + i]; - } - sV[icomp][tx] = rv; - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -// interp basis action (2D) -template -static __device__ __inline__ void -magma_interp_2d_device( - const T *sT, magma_trans_t transT, - T rU[DIMU][NCOMP][rUsize] , T rV[DIMV][NCOMP][rVsize], - const int tx, T rTmp, T* swork) -{ - // Assumptions - // 1. 1D threads of size max(P,Q) - // 2. input: rU[DIMU x NCOMP x rUsize] in registers (per thread) - // 3. output: rV[DIMV x NCOMP x rVsize] in registers (per thread) - // 4. Two products per component - // 4.1 Batch P of (1xP) matrices times (PxQ) matrix => Batch P of (1xQ) matrices - // 4.2 Batch 1 of (QxP) matrix times (PxQ) matrix => (QxQ) matrix - // 5. Each thread computes one row of the output of each product - // 6. Sync is recommended before and after the call - - for(int icomp = 0; icomp < NCOMP; icomp++){ - // 1st product -- Batch P of (1xP) matrices [reg] x (PxQ) [shmem] => Batch P of (1xQ) matrices - // the batch output P x (1xQ) is written on the fly to shmem - if (tx < P) { - const int batchid = tx; - const int sld = 1; - T* sTmp = swork + batchid * (1 * Q); - for(int j = 0; j < Q; j++){ - rTmp = make_zero(); - for(int i = 0; i < P; i++){ - rTmp += rU[0][icomp][i] * sT(i,j); - } - sTmp(0,j,sld) = rTmp; - } - } // end of: if (tx < P) - __syncthreads(); - - // 2nd product -- Batch 1 of a (QxP) matrix [shmem] x (PxQ) [shmem] => (QxQ) matrix [reg] - if (tx < Q) { - const int batchid = 0; - const int sld = Q; - T* sTmp = swork + batchid * (Q*P); - for(int j = 0; j < Q; j++){ - rTmp = make_zero(); - for(int i = 0; i < P; i++){ - rTmp += sTmp(tx,i,sld) * sT(i,j); - } - rV[0][icomp][j] += rTmp; - } - } - __syncthreads(); - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -// interp basis action (3D) -template -static __device__ __inline__ void -magma_interp_3d_device( - const T *sT, magma_trans_t transT, - T rU[DIMU][NCOMP][rUsize] , T rV[DIMV][NCOMP][rVsize], - const int tx, T rTmp[Q], T* swork) -{ - // Assumptions - // 1. 1D threads of size max(P,Q)^2 - // 2. input: rU[DIMU x NCOMP x rUsize] in registers (per thread) - // 3. output: rV[DIMV x NCOMP x rVsize] in registers (per thread) - // 4. Three products per component - // 4.1 Batch P^2 of (1xP) matrices times (PxQ) matrix => Batch P^2 of (1xQ) matrices - // 4.2 Batch P of (QxP) matrices times (PxQ) matrix => Batch P of (QxQ) matrices - // 4.3 Batch 1 of (Q^2xP) matrix times (PxQ) matrix => (Q^2xQ) matrix - // 5. Each thread computes one row of the output of each product - // 6. Sync is recommended before and after the call - - for(int icomp = 0; icomp < NCOMP; icomp++){ - // Batch P^2 of (1xP) matrices [reg] times (PxQ) matrix [shmem] => Batch P^2 of (1xQ) matrices [shmem] - if (tx < (P*P)) { - const int batchid = tx; - const int sld = 1; - T* sTmp = swork + batchid * (1*Q); - for(int j = 0; j < Q; j++){ - rTmp[0] = make_zero(); - for(int i = 0; i < P; i++){ - rTmp[0] += rU[0][icomp][i] * sT(i,j); - } - sTmp(0,j,sld) = rTmp[0]; - } - } // end of: if (tx < P*P) - __syncthreads(); - - // Batch P of (QxP) matrices [shmem] times (PxQ) matrix [shmem] => Batch P of (QxQ) matrices [reg] - if (tx < (P*Q)) { - const int batchid = tx / Q; - const int tx_ = tx % Q; - const int sld = Q; - T* sTmp = swork + batchid * (Q*P); // sTmp is input - for(int j = 0; j < Q; j++){ - rTmp[j] = make_zero(); - for(int i = 0; i < P; i++){ - rTmp[j] += sTmp(tx_,i,sld) * sT(i,j); - } - } - } - __syncthreads(); - - // write rTmp[] into shmem as batch P of QxQ matrices - if (tx < (P*Q)){ - const int batchid = tx / Q; - const int tx_ = tx % Q; - const int sld = Q; - T* sTmp = swork + batchid * (Q*Q); - for(int j = 0; j < Q; j++){ - sTmp(tx_, j, sld) = rTmp[j]; - } - } - __syncthreads(); - - // Batch 1 of (Q^2xP) matrices [shmem] times (PxQ) matrix [shmem] => Batch 1 of (Q^2xQ) matrices [reg] - if (tx < (Q*Q)) { - // No need to declare batchid = (tx / Q^2) = always zero - // No need to declare tx_ = (tx_ % Q^2) = always tx - const int sld = Q*Q; - T* sTmp = swork; - for(int j = 0; j < Q; j++) { - rTmp[0] = make_zero(); - for(int i = 0; i < P; i++) { - rTmp[0] += sTmp(tx,i,sld) * sT(i,j); - } - rV[0][icomp][j] += rTmp[0]; - } - } - __syncthreads(); - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -// interp basis action -- dim and ncomp are run-time variables -template -static __device__ __inline__ void -magma_interp_generic_device( - const int dim, const int ncomp, const int pre_org, const int post_org, const int tmp_size, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, CeedScalar *dV, - CeedScalar* shared_data ) -{ -#define B (P) -#define J (Q) -#define A (pre) -#define C (post) - - const int nthreads = blockDim.x; - int pre = pre_org; - int post = post_org; - int nslices = nthreads / C; - int tx = threadIdx.x; - int tx_ = tx % C; - int slice_id = tx / C; - int i = 0; - - const magma_int_t add = (transT == MagmaTrans); - - CeedScalar* sT = (CeedScalar*)shared_data; - CeedScalar* sTmp1 = sT + B * J; - CeedScalar* sTmp2 = sTmp1 + tmp_size; - CeedScalar rU[P] = { MAGMA_D_ZERO }; // each thread has an entire row of U - CeedScalar rV[Q] = { MAGMA_D_ZERO }; // each thread computes an entire row of V - CeedScalar *sU, *sV; - - // read T in shared memory - dread_T_gm2sm(tx, transT, dT, sT ); - - sU = sTmp1; - sV = sTmp2; - - // read U in sTmp1 (AC x B) - sU += slice_id * C * B; - dU += slice_id * C * B; - for(i = 0; i < A-nslices; i+=nslices) { - for(int b = 0; b < B; b++) { - sU[b * C + tx_] = dU[b * C + tx_]; - } - dU += nslices * C * B; - sU += nslices * C * B; - } - - if (slice_id < A-i) { - for(int b = 0; b < B; b++) { - //printf("tx = %d, tx_ = %d, accessing b * C + tx_ = %d\n", tx, tx_, b * C + tx_); - sU[b * C + tx_] = dU[b * C + tx_]; - } - } - __syncthreads(); - - int d = 0; - for(d = 0; d < dim-1; d++) { - sU = (d % 2 == 0) ? sTmp1 : sTmp2; - sV = (d % 2 == 0) ? sTmp2 : sTmp1; - - sU += slice_id * C * B; - sV += slice_id * C * J; - for(i = 0; i < A-nslices; i+=nslices) { - dread_U_gsm2reg(C, tx_, sU, rU); // read U - dgemm_slice(MAGMA_D_ONE, sT, rU, MAGMA_D_ZERO, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, sV ); // write V back - sU += nslices * C * B; - sV += nslices * C * J; - } - - if (slice_id < A-i){ - dread_U_gsm2reg(C, tx_, sU, rU); // read U - dgemm_slice(MAGMA_D_ONE, sT, rU, MAGMA_D_ZERO, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, sV ); // write V back - } - __syncthreads(); - - - #if 0 - __syncthreads(); - if (tx == 0) { - printf("GPU,dim = %d \n", d); - for(int i = 0; i < pre * post; i++) { - for(int j = 0; j < Q; j++) { - printf("%5.2f ", sV[j * (pre*post) + i]); - } - printf("\n"); - } - } - __syncthreads(); - #endif - - // adjust dimensions and re-calculate the thread indices - pre /= P; - post *= Q; - nslices = nthreads / C; - tx_ = tx % C; - slice_id = tx / C; - } - - // handle last iteration (d = dim-1) with dV and beta - // no need for sV in the last iteration, just use sU and write directly into dV - sU = (d % 2 == 0) ? sTmp1 : sTmp2; - //sV = (d % 2 == 0) ? sTmp2 : sTmp1; - CeedScalar beta = (add == 1) ? MAGMA_D_ONE : MAGMA_D_ZERO; - - sU += slice_id * C * B; - dV += slice_id * C * J; - for(i = 0; i < A-nslices; i+=nslices) { - dread_U_gsm2reg(C, tx_, sU, rU); // read U - if ( add ) { - dread_V_gsm2reg(C, tx_, dV, rV); - } - dgemm_slice(MAGMA_D_ONE, sT, rU, beta, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, dV ); // write V back - sU += nslices * C * B; - dV += nslices * C * J; - } - - if (slice_id < A-i){ - dread_U_gsm2reg(C, tx_, sU, rU); // read U - if ( add ) { - dread_V_gsm2reg(C, tx_, dV, rV); - } - dgemm_slice(MAGMA_D_ONE, sT, rU, beta, rV); // multiply - dwrite_V_reg2gsm(C, tx_, rV, dV ); // write V back - } - - - #if 0 - __syncthreads(); - if (tx == 0) { - printf("GPU,dim = %d \n", d); - for(int i = 0; i < pre * post; i++) { - for(int j = 0; j < Q; j++) { - printf("%5.2f ", dV[j * (pre*post) + i]); - } - printf("\n"); - } - } - __syncthreads(); - #endif - - - pre /= P; - post *= Q; -#undef B -#undef J -#undef A -#undef C -} - -#endif // CEED_MAGMA_INTERP_DEVICE_H diff --git a/backends/magma/kernels/common/weight.h b/backends/magma/kernels/common/weight.h index 007cdb6603..78aa65c699 100644 --- a/backends/magma/kernels/common/weight.h +++ b/backends/magma/kernels/common/weight.h @@ -9,129 +9,7 @@ #define CEED_MAGMA_WEIGHT_H #include -#include -#include "magma_common_device.h" -#include "weight_device.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(Q, MAGMA_MAXTHREADS_1D)) __global__ void -magma_weight_1d_kernel(const T *dqweight1d, T *dV, const int v_stride, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - // global memory pointers - dV += elem_id * v_stride; - - // shared memory pointers - T* sTweight = (T*)shared_data; - T* sV = sTweight + Q; - sV += ty * Q; - - // read dqweight_1d - if (ty == 0 && tx < Q) { - sTweight[tx] = dqweight1d[tx]; - } - - __syncthreads(); - magma_weight_1d_device(sTweight, sV, tx); - __syncthreads(); - - // write V - dV[ tx ] = sV[ tx ]; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(Q, MAGMA_MAXTHREADS_2D)) __global__ void -magma_weight_2d_kernel(const T *dqweight1d, T *dV, const int v_stride, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rV[1][1][Q]; // allocate with DIM=NCOMP=1, but sizes may differ for a fused operator - // global memory pointers - dV += elem_id * v_stride; - - // shared memory pointers - T* sTweight = (T*)shared_data; - - // read dqweight_1d - if (ty == 0 && tx < Q) { - sTweight[tx] = dqweight1d[tx]; - } - - __syncthreads(); - magma_weight_2d_device(sTweight, rV, tx); - - // write V - if (tx < Q) { - for(int j = 0; j < Q; j++) { - dV[ j*Q + tx ] = rV[0][0][j]; - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __launch_bounds__(MAGMA_BASIS_BOUNDS(Q*Q, MAGMA_MAXTHREADS_3D)) __global__ void -magma_weight_3d_kernel(const T *dqweight1d, T *dV, const int v_stride, const int nelem) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - - const int tx = threadIdx.x; - const int ty = threadIdx.y; - const int elem_id = (blockIdx.x * blockDim.y) + ty; - - if (elem_id >= nelem) return; - - T rV[1][1][Q]; // allocate with DIM=NCOMP=1, but sizes may differ for a fused operator - // global memory pointers - dV += elem_id * v_stride; - - // shared memory pointers - T* sTweight = (T*)shared_data; - - // read dqweight_1d - if (tx < Q) { - sTweight[tx] = dqweight1d[tx]; - } - __syncthreads(); - - magma_weight_3d_device(sTweight, rV, tx); - - // write V - if (tx < (Q*Q)) { - for(int j = 0; j < Q; j++) { - dV[ j*(Q*Q) + tx ] = rV[0][0][j]; - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __global__ void -magma_weight_generic_kernel( - const int dim, const int pre_org, const int post_org, - const T *dqweight1d, - T *dV, const int vstride) -{ - MAGMA_DEVICE_SHARED(CeedScalar, shared_data) - const int batchid = blockIdx.x; - magma_weight_generic_device - ( dim, pre_org, post_org, dqweight1d, dV+(batchid*vstride), shared_data ); -} +#include "magma_v2.h" ////////////////////////////////////////////////////////////////////////////////////////// static __global__ void diff --git a/backends/magma/kernels/common/weight_device.h b/backends/magma/kernels/common/weight_device.h deleted file mode 100644 index f1526a3015..0000000000 --- a/backends/magma/kernels/common/weight_device.h +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#ifndef CEED_MAGMA_WEIGHT_DEVICE_H -#define CEED_MAGMA_WEIGHT_DEVICE_H - -////////////////////////////////////////////////////////////////////////////////////////// -// weight basis action -- 1D -template -__device__ __inline__ void -magma_weight_1d_device(const T* sTweight, T* sV, const int tx) -{ - // Assumptions - // 1. 1D thread configuration of size Q - // 2. The output sV is in shared memory -- size 1xQ - if (tx < Q){ - sV[tx] = sTweight[tx]; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -// weight basis action -- 2D -template -__device__ __inline__ void -magma_weight_2d_device(const T* sTweight, T rV[DIM][NCOMP][Q], const int tx) -{ - // Assumptions - // 1. 1D thread configuration of size Q - // 2. rV[][][] matches the storage used in other actions (interp, grad, ... etc) - // 3. iDIM and iCOMP specify which indexes to use in rV, - // since the output per thread is a register array of size Q - // 4. Sync is recommended after the call (to make sure sTweight can be overwritten) - - if (tx < Q) { - // x sTweight[j] for first update - // x sTweight[tx] for second update - for(int j = 0; j < Q; j++) { - rV[iDIM][iCOMP][j] = sTweight[j] * sTweight[tx]; - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -// weight basis action -- 2D -template -__device__ __inline__ void -magma_weight_3d_device(const T* sTweight, T rV[DIM][NCOMP][Q], const int tx) -{ - // Assumptions - // 1. 1D thread configuration of size Q^2 - // 2. rV[][][] matches the storage used in other actions (interp, grad, ... etc) - // 3. iDIM and iCOMP specify which indexes to use in rV, - // since the output per thread is a register array of size Q - // 4. Sync is recommended after the call (to make sure sTweight can be overwritten) - - if (tx < (Q*Q)) { - // x sTweight[j] for first update - // x sTweight[tx%Q] for second update - // x sTweight[tx/Q] for third update - for(int j = 0; j < Q; j++) { - rV[iDIM][iCOMP][j] = sTweight[j] * sTweight[tx%Q] * sTweight[tx/Q]; - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static __device__ __inline__ void -magma_weight_generic_device( - const int dim, - const int pre_org, const int post_org, - const T *dqweight1d, T *dV, - T* shared_data ) -{ - const int nthreads = blockDim.x; - const int tx = threadIdx.x; - int pre = pre_org; - int post = post_org; - - int tx_ = tx % post; - int slice_id = tx / post; - - // the size of V is Q^dim, which is pre * post - T* sVorg = (T*)shared_data; - T* sqweight1d = sVorg + (pre*post*Q); - T* sV; - - // read qweight1d into shared memory - int i = 0; - for(i = 0; i < Q-nthreads; i += nthreads) { - sqweight1d[i+tx] = dqweight1d[i+tx]; - } - - if (tx < Q-i) { - sqweight1d[i+tx] = dqweight1d[i+tx]; - } - __syncthreads(); - - // first iteration -- special case - sV = sVorg + slice_id * post * Q; - #pragma unroll - for(int j = 0; j < Q; j++) { - sV[j * post + tx_] = sqweight1d[j]; - } - __syncthreads(); - - // rest of iterations - for(int d = 1; d < dim; d++) { - // remapping - pre /= Q; - post *= Q; - tx_ = tx % post; - slice_id = tx / post; - sV = sVorg + slice_id * post * Q; - #pragma unroll - for(int j = 0; j < Q; j++) { - sV[j * post + tx_] *= sqweight1d[j]; - } - __syncthreads(); - } - - // write V back, advance dV and - // use the values of pre, post, tx_, and sV - dV += slice_id * post * Q; - #pragma unroll - for(int j = 0; j < Q; j++) { - dV[j * post + tx_] = sV[j * post + tx_]; - } -} - -#endif // CEED_MAGMA_WEIGHT_DEVICE_H diff --git a/backends/magma/kernels/cuda/grad_1d.cu b/backends/magma/kernels/cuda/grad_1d.cu deleted file mode 100644 index 1eebc75ae2..0000000000 --- a/backends/magma/kernels/cuda/grad_1d.cu +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_1d_kernel_driver( - const T *dTgrad, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - const int MAXPQ = maxpq(P,Q); - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); - magma_int_t shmem = 0; - shmem += sizeof(T) * ntcol * (NCOMP * (1*P + 1*Q)); - shmem += sizeof(T) * (P*Q); - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_grad_1d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - magma_grad_1d_kernel<<>> - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_1d_ncomp( - magma_int_t ncomp, - const CeedScalar *dTgrad, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_grad_1d_kernel_driver - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_grad_1d_kernel_driver - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_grad_1d_kernel_driver - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dTgrad, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_grad_1d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dTgrad, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_grad_1d_ncomp_q< 1> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_grad_1d_ncomp_q< 2> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_grad_1d_ncomp_q< 3> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_grad_1d_ncomp_q< 4> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_grad_1d_ncomp_q< 5> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_grad_1d_ncomp_q< 6> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_grad_1d_ncomp_q< 7> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_grad_1d_ncomp_q< 8> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_grad_1d_ncomp_q< 9> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_grad_1d_ncomp_q<10> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_grad_1d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_grad_1d_ncomp_q_p( - P, Q, ncomp, - dTgrad, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/grad_generic.cu b/backends/magma/kernels/cuda/grad_generic.cu deleted file mode 100644 index 6abd28a3ae..0000000000 --- a/backends/magma/kernels/cuda/grad_generic.cu +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "../common/grad.h" - -#define cu_ipow(a,b) ( (int)(__powf( (float)(a), (float)(b) ) ) ) -#define ipow(a,b) ( (magma_int_t)(std::pow( (float)(a), (float)(b) ) ) ) - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_generic_kernel_driver( - magma_int_t dim, magma_int_t ncomp, - const T* dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, const int cstrdU, - T *dV, magma_int_t estrdV, const int cstrdV, - magma_int_t dim_id, magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - - magma_int_t shmem_max, nthreads_max; - // ncomp*Q*CeedIntPow(P>Q?P:Q,dim-1); - // originally the exponent is (dim-1), but we use dim because - // we have to read the original u in shared memory - // the original implementation access u directly - magma_int_t tmp_size = CeedIntPow(max(P,Q), dim); //ncomp * Q * CeedIntPow(max(P,Q), dim); - magma_int_t shmem = 2 * P * Q * sizeof(T); - shmem += 2 * tmp_size * sizeof(T); - - magma_int_t pre = CeedIntPow(P, dim-1); - magma_int_t nthreads = max(P, CeedIntPow(Q, dim-1) ); - nthreads = magma_roundup( nthreads, Q ); // nthreads must be multiple of Q - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_grad_generic_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( nthreads > nthreads_max || shmem > shmem_max ) { - return 1; - } - else { - dim3 threads(nthreads, 1, 1); - dim3 grid(nelem, ncomp, 1); - magma_grad_generic_kernel<<>> - ( dim, ncomp, pre, tmp_size, dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - dim_id ); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_generic_q( - magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t dim_id, magma_int_t nelem, - magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch(Q){ - case 1: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 2: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 3: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 4: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 5: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 6: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 7: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 8: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 9: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 10: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_grad_generic_q_p( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t dim_id, magma_int_t nelem, - magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch(P){ - case 1: - launch_failed = magma_grad_generic_q< 1> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 2: - launch_failed = magma_grad_generic_q< 2> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 3: - launch_failed = magma_grad_generic_q< 3> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 4: - launch_failed = magma_grad_generic_q< 4> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 5: - launch_failed = magma_grad_generic_q< 5> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 6: - launch_failed = magma_grad_generic_q< 6> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 7: - launch_failed = magma_grad_generic_q< 7> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 8: - launch_failed = magma_grad_generic_q< 8> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 9: - launch_failed = magma_grad_generic_q< 9> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 10: - launch_failed = magma_grad_generic_q<10> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_grad_generic( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t u_dimstride, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t v_dimstride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - // Loop through grad dimensions only, batch call over elements and components - for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++) { - launch_failed = magma_grad_generic_q_p( - P, Q, dim, ncomp, - dinterp1d, dgrad1d, transT, - dU + dim_ctr * u_dimstride, estrdU, cstrdU, - dV + dim_ctr * v_dimstride, estrdV, cstrdV, - dim_ctr, nelem, queue ); - if (launch_failed != 0) break; - } - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/gradn_2d.cu b/backends/magma/kernels/cuda/gradn_2d.cu deleted file mode 100644 index 6ee301ff52..0000000000 --- a/backends/magma/kernels/cuda/gradn_2d.cu +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_2d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * (P*MAXPQ); // for reforming rU we need PxP, and for the intermediate output we need PxQ - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_gradn_2d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=2 because the kernel handles one dimension at a time - // We should instantiate with DIM >= 1 when we fuse the whole operator, because of the q-function - magma_gradn_2d_kernel<<>> - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_2d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradn_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_2d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradn_2d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradn_2d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_2d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_2d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_2d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_2d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_2d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_2d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_2d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_2d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_2d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradn_2d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradn_2d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/gradn_3d.cu b/backends/magma/kernels/cuda/gradn_3d.cu deleted file mode 100644 index 24ac7a4cdc..0000000000 --- a/backends/magma/kernels/cuda/gradn_3d.cu +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_3d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ*MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * max(P*P*P, (P*P*Q) + (P*Q*Q)); // rU needs P^2xP, the intermediate outputs need (P^2.Q + P.Q^2) - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_gradn_3d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=3 because the kernel handles one dimension at a time - // We should instantiate with DIM >= 1 when we fuse the whole operator, because of the q-function - magma_gradn_3d_kernel<<>> - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_3d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradn_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_3d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradn_3d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradn_3d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_3d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_3d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_3d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_3d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_3d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_3d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_3d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_3d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_3d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradn_3d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradn_3d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/gradt_2d.cu b/backends/magma/kernels/cuda/gradt_2d.cu deleted file mode 100644 index 4165531e0e..0000000000 --- a/backends/magma/kernels/cuda/gradt_2d.cu +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_2d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * (P*MAXPQ); // for reforming rU we need PxP, and for the intermediate output we need PxQ - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_gradt_2d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=2 because interp operators deal with dim=0 only - // We should instantiate with DIM=2 when we fuse interp and grad operators, because the grad operator - // needs to access data from all dimensions - magma_gradt_2d_kernel<<>> - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_2d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradt_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_2d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradt_2d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradt_2d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_2d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_2d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_2d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_2d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_2d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_2d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_2d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_2d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_2d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradt_2d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradt_2d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/gradt_3d.cu b/backends/magma/kernels/cuda/gradt_3d.cu deleted file mode 100644 index c2577106ec..0000000000 --- a/backends/magma/kernels/cuda/gradt_3d.cu +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_3d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ*MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * max(P*P*P, (P*P*Q) + (P*Q*Q)); // rU needs P^2xP, the intermediate outputs need (P^2.Q + P.Q^2) - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_gradt_3d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=3 because the kernel handles one dimension at a time - // We should instantiate with DIM >= 1 when we fuse the whole operator, because of the q-function - magma_gradt_3d_kernel<<>> - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_3d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradt_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_3d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradt_3d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradt_3d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_3d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_3d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_3d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_3d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_3d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_3d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_3d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_3d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_3d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradt_3d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradt_3d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/interp_1d.cu b/backends/magma/kernels/cuda/interp_1d.cu deleted file mode 100644 index ba153e6793..0000000000 --- a/backends/magma/kernels/cuda/interp_1d.cu +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/interp.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_kernel_driver( - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - const int MAXPQ = maxpq(P,Q); - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); - magma_int_t shmem = 0; - shmem += sizeof(T) * ntcol * ( NCOMP * (1*P + 1*Q) ); - shmem += sizeof(T) * (P*Q); - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_interp_1d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks , 1, 1); - magma_interp_1d_kernel<<>> - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp( - magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_interp_1d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_interp_1d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_1d_ncomp_q< 1> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp_q< 2> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp_q< 3> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp_q< 4> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp_q< 5> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp_q< 6> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp_q< 7> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp_q< 8> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp_q< 9> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp_q<10> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_1d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_interp_1d_ncomp_q_p( - P, Q, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/interp_2d.cu b/backends/magma/kernels/cuda/interp_2d.cu deleted file mode 100644 index c6a0b87d7c..0000000000 --- a/backends/magma/kernels/cuda/interp_2d.cu +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/interp.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_2d_kernel_driver( - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += P*Q *sizeof(T); // for sT - shmem += ntcol * ( P*MAXPQ*sizeof(T) ); // for reforming rU we need PxP, and for the intermediate output we need PxQ - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_interp_2d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - magma_interp_2d_kernel<<>> - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_2d_ncomp( - magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_interp_2d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_2d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_2d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_interp_2d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_1d_ncomp_q< 1> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp_q< 2> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp_q< 3> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp_q< 4> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp_q< 5> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp_q< 6> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp_q< 7> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp_q< 8> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp_q< 9> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp_q<10> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_2d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_interp_2d_ncomp_q_p( - P, Q, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/interp_3d.cu b/backends/magma/kernels/cuda/interp_3d.cu deleted file mode 100644 index e102c205f7..0000000000 --- a/backends/magma/kernels/cuda/interp_3d.cu +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/interp.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_3d_kernel_driver( - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ*MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T)* (P*Q); // for sT - shmem += sizeof(T)* ntcol * (max(P*P*MAXPQ, P*Q*Q)); // rU needs P^2xP, the intermediate output needs max(P^2xQ,PQ^2) - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_interp_3d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - magma_interp_3d_kernel<<>> - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_3d_ncomp( - magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_interp_3d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_3d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_3d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_interp_3d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_1d_ncomp_q< 1> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp_q< 2> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp_q< 3> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp_q< 4> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp_q< 5> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp_q< 6> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp_q< 7> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp_q< 8> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp_q< 9> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp_q<10> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_3d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_interp_3d_ncomp_q_p( - P, Q, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/interp_generic.cu b/backends/magma/kernels/cuda/interp_generic.cu deleted file mode 100644 index 1b1cc9f228..0000000000 --- a/backends/magma/kernels/cuda/interp_generic.cu +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/interp.h" - -#define ipow(a,b) ( (magma_int_t)(std::pow( (float)(a), (float)(b) ) ) ) - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -interp_generic_kernel_driver( - magma_int_t dim, magma_int_t ncomp, - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - - magma_int_t shmem_max, nthreads_max; - magma_int_t pre = ipow(P, dim-1); //ncomp*CeedIntPow(P, dim-1); - magma_int_t post = 1; - // ncomp*Q*CeedIntPow(P>Q?P:Q,dim-1); - // originally the exponent is (dim-1), but we use dim because - // we have to read the original u in shared memory - // the original implementation access u directly - magma_int_t tmp_size = ipow(max(P,Q), dim); //ncomp * Q * ipow(max(P,Q), dim); - magma_int_t shmem = P * Q * sizeof(T); - shmem += 2 * tmp_size * sizeof(T); - - magma_int_t nthreads = max(P, ipow(Q, dim-1) ); - nthreads = magma_roundup( nthreads, Q ); // nthreads must be multiple of Q - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(interp_generic_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( nthreads > nthreads_max || shmem > shmem_max ) { - return 1; - } - else { - dim3 threads(nthreads, 1, 1); - dim3 grid(nelem, ncomp, 1); - interp_generic_kernel<<>> - ( dim, ncomp, pre, post, tmp_size, dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV ); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_generic_q( - magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 11: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 12: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 13: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 14: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 15: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 16: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -magma_int_t -static magma_interp_generic_q_p( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_generic_q< 1> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_generic_q< 2> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_generic_q< 3> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_generic_q< 4> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_generic_q< 5> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_generic_q< 6> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_generic_q< 7> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_generic_q< 8> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_generic_q< 9> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_generic_q<10> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 11: - launch_failed = magma_interp_generic_q<11> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 12: - launch_failed = magma_interp_generic_q<12> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 13: - launch_failed = magma_interp_generic_q<13> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 14: - launch_failed = magma_interp_generic_q<14> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 15: - launch_failed = magma_interp_generic_q<15> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 16: - launch_failed = magma_interp_generic_q<16> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_generic( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - - launch_failed = magma_interp_generic_q_p( - P, Q, dim, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/magma_devptr.cu b/backends/magma/kernels/cuda/magma_devptr.cu deleted file mode 100644 index b5b1dc23b4..0000000000 --- a/backends/magma/kernels/cuda/magma_devptr.cu +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include - -/***************************************************************************//** - Determines whether a pointer points to CPU or GPU memory. - - This is very similar to magma_is_devptr, except that it does not check for - unified addressing support. - @param[in] A pointer to test - - @return 1: if A is a device pointer (definitely), - @return 0: if A is a host pointer (definitely or inferred from error), - @return -1: if unknown. - - @ingroup magma_util -*******************************************************************************/ -extern "C" magma_int_t -magma_isdevptr( const void* A ) -{ - cudaError_t err; - cudaPointerAttributes attr; - int dev; // must be int - err = cudaGetDevice( &dev ); - if ( ! err ) { - err = cudaPointerGetAttributes( &attr, A); - if ( ! err ) { - // definitely know type - #if CUDA_VERSION >= 10000 - return (attr.type == cudaMemoryTypeDevice); - #else - return (attr.memoryType == cudaMemoryTypeDevice); - #endif - } - else if ( err == cudaErrorInvalidValue ) { - // clear error; see http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=529 - cudaGetLastError(); - // infer as host pointer - return 0; - } - } - // clear error - cudaGetLastError(); - // unknown, e.g., device doesn't support unified addressing - return -1; -} diff --git a/backends/magma/kernels/cuda/magma_drestrictApply.cu b/backends/magma/kernels/cuda/magma_drestrictApply.cu deleted file mode 100644 index 9ab364dc0a..0000000000 --- a/backends/magma/kernels/cuda/magma_drestrictApply.cu +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include -#include -#include "../common/elem_restriction.h" - -////////////////////////////////////////////////////////////////////////////////////////// -// ReadDofs to device memory -// du is L-vector, size lsize -// dv is E-vector, size nelem * esize * NCOMP -extern "C" void -magma_readDofsOffset(const magma_int_t NCOMP, const magma_int_t compstride, - const magma_int_t esize, const magma_int_t nelem, - magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - magma_readDofsOffset_kernel<<>>(NCOMP, compstride, - esize, nelem, offsets, du, dv); -} - -// ReadDofs to device memory, strided description for L-vector -// du is L-vector, size lsize -// dv is E-vector, size nelem * esize * NCOMP -extern "C" void -magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, - const magma_int_t nelem, const int *strides, - const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - magma_readDofsStrided_kernel<<>>(NCOMP, esize, nelem, - strides, du, dv); -} - -// WriteDofs from device memory -// du is E-vector, size nelem * esize * NCOMP -// dv is L-vector, size lsize -extern "C" void -magma_writeDofsOffset(const magma_int_t NCOMP, const magma_int_t compstride, - const magma_int_t esize, const magma_int_t nelem, - magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { - magma_writeDofsOffset_kernel_s<<>>(NCOMP, compstride, - esize, nelem, offsets, (float*)du, (float*)dv); - } - else { - magma_writeDofsOffset_kernel_d<<>>(NCOMP, compstride, - esize, nelem, offsets, (double*)du, (double*)dv); - } -} - -// WriteDofs from device memory, strided description for L-vector -// du is E-vector, size nelem * esize * NCOMP -// dv is L-vector, size lsize -extern "C" void -magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, - const magma_int_t nelem, const int *strides, - const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { - magma_writeDofsStrided_kernel_s<<>>(NCOMP, esize, nelem, - strides, (float*)du, (float*)dv); - } - else { - magma_writeDofsStrided_kernel_d<<>>(NCOMP, esize, nelem, - strides, (double*)du, (double*)dv); - } -} diff --git a/backends/magma/kernels/cuda/weight_1d.cu b/backends/magma/kernels/cuda/weight_1d.cu deleted file mode 100644 index c52580ab4b..0000000000 --- a/backends/magma/kernels/cuda/weight_1d.cu +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/weight.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_1d_kernel_driver( - const T *dqweight1d, T *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - magma_int_t nthreads = Q; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); - magma_int_t shmem = 0; - shmem += sizeof(T) * Q; // for dqweight1d - shmem += sizeof(T) * ntcol * Q; // for output - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_weight_1d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - magma_weight_1d_kernel<<>> - (dqweight1d, dV, v_stride, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_1d_q( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 2: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 3: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 4: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 5: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 6: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 7: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 8: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 9: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 10: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_1d( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_weight_1d_q(Q, dqweight1d, dV, v_stride, nelem, queue); - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/weight_2d.cu b/backends/magma/kernels/cuda/weight_2d.cu deleted file mode 100644 index 5328a52586..0000000000 --- a/backends/magma/kernels/cuda/weight_2d.cu +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/weight.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_2d_kernel_driver( - const T *dqweight1d, T *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - magma_int_t nthreads = Q; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += sizeof(T) * Q; // for dqweight1d - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_weight_2d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - magma_weight_2d_kernel<<>> - (dqweight1d, dV, v_stride, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_2d_q( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 2: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 3: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 4: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 5: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 6: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 7: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 8: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 9: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 10: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_2d( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_weight_2d_q(Q, dqweight1d, dV, v_stride, nelem, queue); - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/weight_3d.cu b/backends/magma/kernels/cuda/weight_3d.cu deleted file mode 100644 index 314e85f48d..0000000000 --- a/backends/magma/kernels/cuda/weight_3d.cu +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include // for CUDA_VERSION -#include "../common/weight.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_3d_kernel_driver( - const T *dqweight1d, T *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - magma_int_t nthreads = (Q*Q); - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T) * Q; // for dqweight1d - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_weight_3d_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - magma_weight_3d_kernel<<>> - (dqweight1d, dV, v_stride, nelem); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_3d_q( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 2: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 3: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 4: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 5: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 6: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 7: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 8: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 9: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 10: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_3d( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_weight_3d_q(Q, dqweight1d, dV, v_stride, nelem, queue); - return launch_failed; -} diff --git a/backends/magma/kernels/cuda/weight_generic.cu b/backends/magma/kernels/cuda/weight_generic.cu index 45be88d8c2..567e3c3c02 100644 --- a/backends/magma/kernels/cuda/weight_generic.cu +++ b/backends/magma/kernels/cuda/weight_generic.cu @@ -7,119 +7,6 @@ #include "../common/weight.h" -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_generic_kernel_driver( - magma_int_t dim, - const T *dqweight1d, - T *dV, magma_int_t vstride, - magma_int_t batchCount, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - - magma_int_t shmem_max, nthreads_max; - - magma_int_t pre_org = CeedIntPow(Q, dim-0-1); - magma_int_t post_org = CeedIntPow(Q, 0); - - magma_int_t vsize = CeedIntPow(Q, dim); - magma_int_t shmem = vsize * sizeof(T); // holds dV in shared memory - shmem += (Q * sizeof(T)); // holds qweight1d - - magma_int_t nthreads = CeedIntPow(Q, dim-1); - - cudaDeviceGetAttribute (&nthreads_max, cudaDevAttrMaxThreadsPerBlock, device); - #if CUDA_VERSION >= 9000 - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); - if (shmem <= shmem_max) { - cudaFuncSetAttribute(magma_weight_generic_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shmem); - } - #else - cudaDeviceGetAttribute (&shmem_max, cudaDevAttrMaxSharedMemoryPerBlock, device); - #endif // CUDA_VERSION >= 9000 - - if ( nthreads > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - dim3 threads(nthreads, 1, 1); - dim3 grid(batchCount, 1, 1); - magma_weight_generic_kernel<<>> - ( dim, pre_org, post_org, dqweight1d, dV, vstride ); - return (cudaPeekAtLastError() == cudaSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_generic_q( - magma_int_t Q, magma_int_t dim, - const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t vstride, - magma_int_t batchCount, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 2: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 3: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 4: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 5: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 6: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 7: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 8: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 9: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 10: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_generic( - magma_int_t Q, magma_int_t dim, - const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t vstride, - magma_int_t batchCount, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - launch_failed = magma_weight_generic_q(Q, dim, dqweight1d, dV, vstride, batchCount, queue); - return launch_failed; -} - ////////////////////////////////////////////////////////////////////////////////////////// // NonTensor weight function extern "C" void diff --git a/backends/magma/kernels/hip/grad_1d.hip.cpp b/backends/magma/kernels/hip/grad_1d.hip.cpp deleted file mode 100644 index 7319a5bbf2..0000000000 --- a/backends/magma/kernels/hip/grad_1d.hip.cpp +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_1d_kernel_driver( - const T *dTgrad, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - const int MAXPQ = maxpq(P,Q); - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); - magma_int_t shmem = 0; - shmem += sizeof(T) * ntcol * (NCOMP * (1*P + 1*Q)); - shmem += sizeof(T) * (P*Q); - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_grad_1d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_1d_ncomp( - magma_int_t ncomp, - const CeedScalar *dTgrad, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_grad_1d_kernel_driver - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_grad_1d_kernel_driver - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_grad_1d_kernel_driver - (dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dTgrad, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_grad_1d_ncomp - (ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_grad_1d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dTgrad, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_grad_1d_ncomp_q< 1> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_grad_1d_ncomp_q< 2> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_grad_1d_ncomp_q< 3> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_grad_1d_ncomp_q< 4> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_grad_1d_ncomp_q< 5> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_grad_1d_ncomp_q< 6> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_grad_1d_ncomp_q< 7> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_grad_1d_ncomp_q< 8> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_grad_1d_ncomp_q< 9> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_grad_1d_ncomp_q<10> - (Q, ncomp, dTgrad, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_grad_1d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_grad_1d_ncomp_q_p( - P, Q, ncomp, - dTgrad, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/grad_generic.hip.cpp b/backends/magma/kernels/hip/grad_generic.hip.cpp deleted file mode 100644 index 07555ed33b..0000000000 --- a/backends/magma/kernels/hip/grad_generic.hip.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/grad.h" - -#define hip_ipow(a,b) ( (int)(__powf( (float)(a), (float)(b) ) ) ) -#define ipow(a,b) ( (magma_int_t)(std::pow( (float)(a), (float)(b) ) ) ) - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_generic_kernel_driver( - magma_int_t dim, magma_int_t ncomp, - const T* dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, const int cstrdU, - T *dV, magma_int_t estrdV, const int cstrdV, - magma_int_t dim_id, magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - - magma_int_t shmem_max, nthreads_max; - // ncomp*Q*CeedIntPow(P>Q?P:Q,dim-1); - // originally the exponent is (dim-1), but we use dim because - // we have to read the original u in shared memory - // the original implementation access u directly - magma_int_t tmp_size = CeedIntPow(max(P,Q), dim); //ncomp * Q * CeedIntPow(max(P,Q), dim); - magma_int_t shmem = 2 * P * Q * sizeof(T); - shmem += 2 * tmp_size * sizeof(T); - - magma_int_t pre = CeedIntPow(P, dim-1); - magma_int_t nthreads = max(P, CeedIntPow(Q, dim-1) ); - nthreads = magma_roundup( nthreads, Q ); // nthreads must be multiple of Q - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( nthreads > nthreads_max || shmem > shmem_max ) { - return 1; - } - else { - dim3 threads(nthreads, 1, 1); - dim3 grid(nelem, ncomp, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_grad_generic_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dim, ncomp, pre, tmp_size, dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - dim_id ); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_grad_generic_q( - magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t dim_id, magma_int_t nelem, - magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch(Q){ - case 1: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 2: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 3: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 4: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 5: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 6: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 7: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 8: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 9: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 10: - launch_failed = magma_grad_generic_kernel_driver - ( dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_grad_generic_q_p( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t dim_id, magma_int_t nelem, - magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch(P){ - case 1: - launch_failed = magma_grad_generic_q< 1> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 2: - launch_failed = magma_grad_generic_q< 2> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 3: - launch_failed = magma_grad_generic_q< 3> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 4: - launch_failed = magma_grad_generic_q< 4> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 5: - launch_failed = magma_grad_generic_q< 5> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 6: - launch_failed = magma_grad_generic_q< 6> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 7: - launch_failed = magma_grad_generic_q< 7> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 8: - launch_failed = magma_grad_generic_q< 8> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 9: - launch_failed = magma_grad_generic_q< 9> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - case 10: - launch_failed = magma_grad_generic_q<10> - (Q, dim, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, dim_id, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_grad_generic( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t u_dimstride, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t v_dimstride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - // Loop through grad dimensions only, batch call over elements and components - for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++) { - launch_failed = magma_grad_generic_q_p( - P, Q, dim, ncomp, - dinterp1d, dgrad1d, transT, - dU + dim_ctr * u_dimstride, estrdU, cstrdU, - dV + dim_ctr * v_dimstride, estrdV, cstrdV, - dim_ctr, nelem, queue ); - if (launch_failed != 0) break; - } - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/gradn_2d.hip.cpp b/backends/magma/kernels/hip/gradn_2d.hip.cpp deleted file mode 100644 index 2eb3e6a129..0000000000 --- a/backends/magma/kernels/hip/gradn_2d.hip.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_2d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * (P*MAXPQ); // for reforming rU we need PxP, and for the intermediate output we need PxQ - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=2 because the kernel handles one dimension at a time - // We should instantiate with DIM >= 1 when we fuse the whole operator, because of the q-function - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_gradn_2d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_2d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradn_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_2d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradn_2d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradn_2d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_2d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_2d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_2d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_2d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_2d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_2d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_2d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_2d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_2d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradn_2d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradn_2d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/gradn_3d.hip.cpp b/backends/magma/kernels/hip/gradn_3d.hip.cpp deleted file mode 100644 index dca310d040..0000000000 --- a/backends/magma/kernels/hip/gradn_3d.hip.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_3d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ*MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * max(P*P*P, (P*P*Q) + (P*Q*Q)); // rU needs P^2xP, the intermediate outputs need (P^2.Q + P.Q^2) - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=3 because the kernel handles one dimension at a time - // We should instantiate with DIM >= 1 when we fuse the whole operator, because of the q-function - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_gradn_3d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_3d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradn_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradn_3d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradn_3d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradn_3d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradn_3d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradn_3d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradn_3d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradn_3d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradn_3d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradn_3d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradn_3d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradn_3d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradn_3d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradn_3d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradn_3d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/gradt_2d.hip.cpp b/backends/magma/kernels/hip/gradt_2d.hip.cpp deleted file mode 100644 index eb87cfd583..0000000000 --- a/backends/magma/kernels/hip/gradt_2d.hip.cpp +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_2d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * (P*MAXPQ); // for reforming rU we need PxP, and for the intermediate output we need PxQ - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=2 because interp operators deal with dim=0 only - // We should instantiate with DIM=2 when we fuse interp and grad operators, because the grad operator - // needs to access data from all dimensions - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_gradt_2d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_2d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradt_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_2d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_2d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_2d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradt_2d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradt_2d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_2d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_2d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_2d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_2d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_2d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_2d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_2d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_2d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_2d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradt_2d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradt_2d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/gradt_3d.hip.cpp b/backends/magma/kernels/hip/gradt_3d.hip.cpp deleted file mode 100644 index cf465e0b86..0000000000 --- a/backends/magma/kernels/hip/gradt_3d.hip.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/grad.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_3d_kernel_driver( - const T *dinterp1d, const T *dgrad1d, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ*MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T) * 2*P*Q; // for sTinterp and sTgrad - shmem += sizeof(T) * ntcol * max(P*P*P, (P*P*Q) + (P*Q*Q)); // rU needs P^2xP, the intermediate outputs need (P^2.Q + P.Q^2) - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - // IMPORTANT: we instantiate with DIM=1 instead of DIM=3 because the kernel handles one dimension at a time - // We should instantiate with DIM >= 1 when we fuse the whole operator, because of the q-function - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_gradt_3d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_3d_ncomp( - magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_gradt_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_3d_kernel_driver - (dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_gradt_3d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_3d_ncomp - (ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_gradt_3d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_gradt_3d_ncomp_q< 1> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_gradt_3d_ncomp_q< 2> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_gradt_3d_ncomp_q< 3> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_gradt_3d_ncomp_q< 4> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_gradt_3d_ncomp_q< 5> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_gradt_3d_ncomp_q< 6> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_gradt_3d_ncomp_q< 7> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_gradt_3d_ncomp_q< 8> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_gradt_3d_ncomp_q< 9> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_gradt_3d_ncomp_q<10> - (Q, ncomp, dinterp1d, dgrad1d, transT, dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, dstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_gradt_3d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_gradt_3d_ncomp_q_p( - P, Q, ncomp, - dinterp1d, dgrad1d, transT, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/interp_1d.hip.cpp b/backends/magma/kernels/hip/interp_1d.hip.cpp deleted file mode 100644 index f9b8ad1090..0000000000 --- a/backends/magma/kernels/hip/interp_1d.hip.cpp +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/interp.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_kernel_driver( - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - const int MAXPQ = maxpq(P,Q); - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); - magma_int_t shmem = 0; - shmem += sizeof(T) * ntcol * ( NCOMP * (1*P + 1*Q) ); - shmem += sizeof(T) * (P*Q); - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks , 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_interp_1d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp( - magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_interp_1d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_interp_1d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_1d_ncomp_q< 1> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp_q< 2> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp_q< 3> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp_q< 4> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp_q< 5> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp_q< 6> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp_q< 7> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp_q< 8> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp_q< 9> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp_q<10> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_1d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_interp_1d_ncomp_q_p( - P, Q, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/interp_2d.hip.cpp b/backends/magma/kernels/hip/interp_2d.hip.cpp deleted file mode 100644 index e6874687bc..0000000000 --- a/backends/magma/kernels/hip/interp_2d.hip.cpp +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/interp.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_2d_kernel_driver( - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += P*Q *sizeof(T); // for sT - shmem += ntcol * ( P*MAXPQ*sizeof(T) ); // for reforming rU we need PxP, and for the intermediate output we need PxQ - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_interp_2d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_2d_ncomp( - magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_interp_2d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_2d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_2d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_2d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_interp_2d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_1d_ncomp_q< 1> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp_q< 2> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp_q< 3> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp_q< 4> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp_q< 5> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp_q< 6> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp_q< 7> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp_q< 8> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp_q< 9> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp_q<10> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_2d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_interp_2d_ncomp_q_p( - P, Q, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/interp_3d.hip.cpp b/backends/magma/kernels/hip/interp_3d.hip.cpp deleted file mode 100644 index 8b56d626b1..0000000000 --- a/backends/magma/kernels/hip/interp_3d.hip.cpp +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/interp.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_3d_kernel_driver( - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - const int MAXPQ = maxpq(P,Q); - - magma_int_t nthreads = MAXPQ*MAXPQ; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T)* (P*Q); // for sT - shmem += sizeof(T)* ntcol * (max(P*P*MAXPQ, P*Q*Q)); // rU needs P^2xP, the intermediate output needs max(P^2xQ,PQ^2) - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_interp_3d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_3d_ncomp( - magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (ncomp) { - case 1: - launch_failed = magma_interp_3d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_3d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_3d_kernel_driver - (dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_1d_ncomp_q( - magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_3d_ncomp - (ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_interp_3d_ncomp_q_p( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_1d_ncomp_q< 1> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_1d_ncomp_q< 2> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_1d_ncomp_q< 3> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_1d_ncomp_q< 4> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_1d_ncomp_q< 5> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_1d_ncomp_q< 6> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_1d_ncomp_q< 7> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_1d_ncomp_q< 8> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_1d_ncomp_q< 9> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_1d_ncomp_q<10> - (Q, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_3d( - magma_int_t P, magma_int_t Q, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - launch_failed = magma_interp_3d_ncomp_q_p( - P, Q, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/interp_generic.hip.cpp b/backends/magma/kernels/hip/interp_generic.hip.cpp deleted file mode 100644 index 30d72a1d4c..0000000000 --- a/backends/magma/kernels/hip/interp_generic.hip.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/interp.h" - -#define ipow(a,b) ( (magma_int_t)(std::pow( (float)(a), (float)(b) ) ) ) - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -interp_generic_kernel_driver( - magma_int_t dim, magma_int_t ncomp, - const T *dT, magma_trans_t transT, - const T *dU, magma_int_t estrdU, magma_int_t cstrdU, - T *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - - magma_int_t shmem_max, nthreads_max; - magma_int_t pre = ipow(P, dim-1); //ncomp*CeedIntPow(P, dim-1); - magma_int_t post = 1; - // ncomp*Q*CeedIntPow(P>Q?P:Q,dim-1); - // originally the exponent is (dim-1), but we use dim because - // we have to read the original u in shared memory - // the original implementation access u directly - magma_int_t tmp_size = ipow(max(P,Q), dim); //ncomp * Q * ipow(max(P,Q), dim); - magma_int_t shmem = P * Q * sizeof(T); - shmem += 2 * tmp_size * sizeof(T); - - magma_int_t nthreads = max(P, ipow(Q, dim-1) ); - nthreads = magma_roundup( nthreads, Q ); // nthreads must be multiple of Q - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( nthreads > nthreads_max || shmem > shmem_max ) { - return 1; - } - else { - dim3 threads(nthreads, 1, 1); - dim3 grid(nelem, ncomp, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(interp_generic_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dim, ncomp, pre, post, tmp_size, dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV ); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_interp_generic_q( - magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 11: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 12: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 13: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 14: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 15: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 16: - launch_failed = interp_generic_kernel_driver - (dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -magma_int_t -static magma_interp_generic_q_p( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, magma_trans_t transT, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (P) { - case 1: - launch_failed = magma_interp_generic_q< 1> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 2: - launch_failed = magma_interp_generic_q< 2> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 3: - launch_failed = magma_interp_generic_q< 3> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 4: - launch_failed = magma_interp_generic_q< 4> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 5: - launch_failed = magma_interp_generic_q< 5> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 6: - launch_failed = magma_interp_generic_q< 6> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 7: - launch_failed = magma_interp_generic_q< 7> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 8: - launch_failed = magma_interp_generic_q< 8> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 9: - launch_failed = magma_interp_generic_q< 9> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 10: - launch_failed = magma_interp_generic_q<10> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 11: - launch_failed = magma_interp_generic_q<11> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 12: - launch_failed = magma_interp_generic_q<12> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 13: - launch_failed = magma_interp_generic_q<13> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 14: - launch_failed = magma_interp_generic_q<14> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 15: - launch_failed = magma_interp_generic_q<15> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - case 16: - launch_failed = magma_interp_generic_q<16> - (Q, dim, ncomp, dT, transT, dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_interp_generic( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_trans_t transT = (tmode == CEED_NOTRANSPOSE) ? MagmaNoTrans : MagmaTrans; - - launch_failed = magma_interp_generic_q_p( - P, Q, dim, ncomp, - dT, transT, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - - return launch_failed; -} diff --git a/backends/magma/kernels/hip/magma_devptr.hip.cpp b/backends/magma/kernels/hip/magma_devptr.hip.cpp deleted file mode 100644 index 3d97692069..0000000000 --- a/backends/magma/kernels/hip/magma_devptr.hip.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#include - -/***************************************************************************//** - Determines whether a pointer points to CPU or GPU memory. - - This is very similar to magma_is_devptr, except that it does not check for - unified addressing support. - @param[in] A pointer to test - - @return 1: if A is a device pointer (definitely), - @return 0: if A is a host pointer (definitely or inferred from error), - @return -1: if unknown. - - @ingroup magma_util -*******************************************************************************/ -extern "C" magma_int_t -magma_isdevptr( const void* A ) -{ - hipError_t err; - hipPointerAttribute_t attr; - int dev; // must be int - err = hipGetDevice( &dev ); - if ( ! err ) { - err = hipPointerGetAttributes( &attr, A); - if ( ! err ) { - // definitely know type - return (attr.memoryType == hipMemoryTypeDevice); - } - else if ( err == hipErrorInvalidValue ) { - // clear error; see http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=529 - hipGetLastError(); - // infer as host pointer - return 0; - } - } - // clear error - hipGetLastError(); - // unknown, e.g., device doesn't support unified addressing - return -1; -} diff --git a/backends/magma/kernels/hip/magma_drestrictApply.hip.cpp b/backends/magma/kernels/hip/magma_drestrictApply.hip.cpp deleted file mode 100644 index ed85665d75..0000000000 --- a/backends/magma/kernels/hip/magma_drestrictApply.hip.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include -#include "hip/hip_runtime.h" -#include -#include "../common/elem_restriction.h" - -////////////////////////////////////////////////////////////////////////////////////////// -// ReadDofs to device memory -// du is L-vector, size lsize -// dv is E-vector, size nelem * esize * NCOMP -extern "C" void -magma_readDofsOffset(const magma_int_t NCOMP, const magma_int_t compstride, - const magma_int_t esize, const magma_int_t nelem, - magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - hipLaunchKernelGGL(magma_readDofsOffset_kernel, dim3(grid), dim3(threads), 0, magma_queue_get_hip_stream(queue), NCOMP, compstride, - esize, nelem, offsets, du, dv); -} - -// ReadDofs to device memory, strided description for L-vector -// du is L-vector, size lsize -// dv is E-vector, size nelem * esize * NCOMP -extern "C" void -magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, - const magma_int_t nelem, const int *strides, - const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - hipLaunchKernelGGL(magma_readDofsStrided_kernel, dim3(grid), dim3(threads), 0, magma_queue_get_hip_stream(queue), NCOMP, esize, nelem, - strides, du, dv); -} - -// WriteDofs from device memory -// du is E-vector, size nelem * esize * NCOMP -// dv is L-vector, size lsize -extern "C" void -magma_writeDofsOffset(const magma_int_t NCOMP, const magma_int_t compstride, - const magma_int_t esize, const magma_int_t nelem, - magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { - hipLaunchKernelGGL(magma_writeDofsOffset_kernel_s, dim3(grid), dim3(threads), - 0, magma_queue_get_hip_stream(queue), NCOMP, compstride, - esize, nelem, offsets, (float*)du, (float*)dv); - } - else { - hipLaunchKernelGGL(magma_writeDofsOffset_kernel_d, dim3(grid), dim3(threads), - 0, magma_queue_get_hip_stream(queue), NCOMP, compstride, - esize, nelem, offsets, (double*)du, (double*)dv); - } -} - -// WriteDofs from device memory, strided description for L-vector -// du is E-vector, size nelem * esize * NCOMP -// dv is L-vector, size lsize -extern "C" void -magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, - const magma_int_t nelem, const int *strides, - const CeedScalar *du, CeedScalar *dv, - magma_queue_t queue) -{ - magma_int_t grid = nelem; - magma_int_t threads = MAGMA_ERSTR_THREADS; - - if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { - hipLaunchKernelGGL(magma_writeDofsStrided_kernel_s, dim3(grid), dim3(threads), - 0, magma_queue_get_hip_stream(queue), NCOMP, esize, nelem, - strides, (float*)du, (float*)dv); - } - else { - hipLaunchKernelGGL(magma_writeDofsStrided_kernel_d, dim3(grid), dim3(threads), - 0, magma_queue_get_hip_stream(queue), NCOMP, esize, nelem, - strides, (double *)du, (double*)dv); - } - -} diff --git a/backends/magma/kernels/hip/weight_1d.hip.cpp b/backends/magma/kernels/hip/weight_1d.hip.cpp deleted file mode 100644 index 3a623e5146..0000000000 --- a/backends/magma/kernels/hip/weight_1d.hip.cpp +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/weight.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_1d_kernel_driver( - const T *dqweight1d, T *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - magma_int_t nthreads = Q; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_1D); - magma_int_t shmem = 0; - shmem += sizeof(T) * Q; // for dqweight1d - shmem += sizeof(T) * ntcol * Q; // for output - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_weight_1d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dqweight1d, dV, v_stride, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_1d_q( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 2: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 3: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 4: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 5: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 6: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 7: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 8: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 9: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 10: - launch_failed = magma_weight_1d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_1d( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_weight_1d_q(Q, dqweight1d, dV, v_stride, nelem, queue); - return launch_failed; -} diff --git a/backends/magma/kernels/hip/weight_2d.hip.cpp b/backends/magma/kernels/hip/weight_2d.hip.cpp deleted file mode 100644 index 74fd938162..0000000000 --- a/backends/magma/kernels/hip/weight_2d.hip.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/weight.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_2d_kernel_driver( - const T *dqweight1d, T *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - magma_int_t nthreads = Q; - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_2D); - magma_int_t shmem = 0; - shmem += sizeof(T) * Q; // for dqweight1d - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_weight_2d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dqweight1d, dV, v_stride, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_2d_q( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 2: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 3: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 4: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 5: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 6: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 7: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 8: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 9: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 10: - launch_failed = magma_weight_2d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_2d( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_weight_2d_q(Q, dqweight1d, dV, v_stride, nelem, queue); - return launch_failed; -} diff --git a/backends/magma/kernels/hip/weight_3d.hip.cpp b/backends/magma/kernels/hip/weight_3d.hip.cpp deleted file mode 100644 index 7dc634f225..0000000000 --- a/backends/magma/kernels/hip/weight_3d.hip.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "hip/hip_runtime.h" -#include "../common/weight.h" - -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_3d_kernel_driver( - const T *dqweight1d, T *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - magma_int_t shmem_max, nthreads_max; - - magma_int_t nthreads = (Q*Q); - magma_int_t ntcol = MAGMA_BASIS_NTCOL(nthreads, MAGMA_MAXTHREADS_3D); - magma_int_t shmem = 0; - shmem += sizeof(T) * Q; // for dqweight1d - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( (nthreads*ntcol) > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - magma_int_t nblocks = (nelem + ntcol-1) / ntcol; - dim3 threads(nthreads, ntcol, 1); - dim3 grid(nblocks, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_weight_3d_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dqweight1d, dV, v_stride, nelem); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_3d_q( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 2: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 3: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 4: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 5: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 6: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 7: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 8: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 9: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - case 10: - launch_failed = magma_weight_3d_kernel_driver - (dqweight1d, dV, v_stride, nelem, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_3d( - magma_int_t Q, const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - magma_weight_3d_q(Q, dqweight1d, dV, v_stride, nelem, queue); - return launch_failed; -} diff --git a/backends/magma/kernels/hip/weight_generic.hip.cpp b/backends/magma/kernels/hip/weight_generic.hip.cpp index a7692a8a26..b963b3dbf0 100644 --- a/backends/magma/kernels/hip/weight_generic.hip.cpp +++ b/backends/magma/kernels/hip/weight_generic.hip.cpp @@ -8,110 +8,6 @@ #include "hip/hip_runtime.h" #include "../common/weight.h" -////////////////////////////////////////////////////////////////////////////////////////// -template -static magma_int_t -magma_weight_generic_kernel_driver( - magma_int_t dim, - const T *dqweight1d, - T *dV, magma_int_t vstride, - magma_int_t batchCount, magma_queue_t queue) -{ - magma_device_t device; - magma_getdevice( &device ); - - magma_int_t shmem_max, nthreads_max; - - magma_int_t pre_org = CeedIntPow(Q, dim-0-1); - magma_int_t post_org = CeedIntPow(Q, 0); - - magma_int_t vsize = CeedIntPow(Q, dim); - magma_int_t shmem = vsize * sizeof(T); // holds dV in shared memory - shmem += (Q * sizeof(T)); // holds qweight1d - - magma_int_t nthreads = CeedIntPow(Q, dim-1); - - hipDeviceGetAttribute (&nthreads_max, hipDeviceAttributeMaxThreadsPerBlock, device); - hipDeviceGetAttribute (&shmem_max, hipDeviceAttributeMaxSharedMemoryPerBlock, device); - - if ( nthreads > nthreads_max || shmem > shmem_max ) { - return 1; // launch failed - } - else { - dim3 threads(nthreads, 1, 1); - dim3 grid(batchCount, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(magma_weight_generic_kernel), dim3(grid), dim3(threads), shmem, magma_queue_get_hip_stream(queue), dim, pre_org, post_org, dqweight1d, dV, vstride ); - return (hipPeekAtLastError() == hipSuccess) ? 0 : 1; - } -} - -////////////////////////////////////////////////////////////////////////////////////////// -static magma_int_t -magma_weight_generic_q( - magma_int_t Q, magma_int_t dim, - const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t vstride, - magma_int_t batchCount, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - switch (Q) { - case 1: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 2: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 3: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 4: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 5: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 6: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 7: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 8: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 9: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - case 10: - launch_failed = magma_weight_generic_kernel_driver - (dim, dqweight1d, dV, vstride, batchCount, queue); - break; - default: launch_failed = 1; - } - return launch_failed; -} - -////////////////////////////////////////////////////////////////////////////////////////// -extern "C" magma_int_t -magma_weight_generic( - magma_int_t Q, magma_int_t dim, - const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t vstride, - magma_int_t batchCount, magma_queue_t queue) -{ - magma_int_t launch_failed = 0; - launch_failed = magma_weight_generic_q(Q, dim, dqweight1d, dV, vstride, batchCount, queue); - return launch_failed; -} ////////////////////////////////////////////////////////////////////////////////////////// // NonTensor weight function diff --git a/backends/magma/magma_grad.c b/backends/magma/magma_grad.c deleted file mode 100644 index fc0a58b722..0000000000 --- a/backends/magma/magma_grad.c +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "ceed-magma.h" - -////////////////////////////////////////////////////////////////////////////////////////// -#ifdef __cplusplus -CEED_INTERN "C" -#endif -magma_int_t -magma_grad( - magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - magma_int_t dstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t dstrdV, - magma_int_t nelem, magma_kernel_mode_t kernel_mode, - magma_queue_t queue) { - magma_int_t launch_failed = 0; - - if (kernel_mode == MAGMA_KERNEL_DIM_SPECIFIC) { - if (tmode == CEED_TRANSPOSE) { - switch(dim) { - case 1: launch_failed = magma_grad_1d(P, Q, ncomp, dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, - queue); break; - case 2: launch_failed = magma_gradt_2d(P, Q, ncomp, dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, - dstrdV, nelem, queue); break; - case 3: launch_failed = magma_gradt_3d(P, Q, ncomp, dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, - dstrdV, nelem, queue); break; - default: launch_failed = 1; - } - } else { - switch(dim) { - case 1: launch_failed = magma_grad_1d(P, Q, ncomp, dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dV, estrdV, cstrdV, nelem, - queue); break; - case 2: launch_failed = magma_gradn_2d(P, Q, ncomp, dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, - dstrdV, nelem, queue); break; - case 3: launch_failed = magma_gradn_3d(P, Q, ncomp, dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dstrdU, dV, estrdV, cstrdV, - dstrdV, nelem, queue); break; - default: launch_failed = 1; - } - } - } else { - launch_failed = magma_grad_generic( - P, Q, dim, ncomp, - dinterp1d, dgrad1d, tmode, - dU, estrdU, cstrdU, dstrdU, - dV, estrdV, cstrdV, dstrdV, - nelem, queue ); - } - - return launch_failed; -} diff --git a/backends/magma/magma_interp.c b/backends/magma/magma_interp.c deleted file mode 100644 index d6f778e780..0000000000 --- a/backends/magma/magma_interp.c +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "ceed-magma.h" - -////////////////////////////////////////////////////////////////////////////////////////// -#ifdef __cplusplus -CEED_INTERN "C" -#endif -magma_int_t -magma_interp( - magma_int_t P, magma_int_t Q, - magma_int_t dim, magma_int_t ncomp, - const CeedScalar *dT, CeedTransposeMode tmode, - const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, - CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, - magma_int_t nelem, magma_kernel_mode_t kernel_mode, - magma_queue_t queue) { - magma_int_t launch_failed = 0; - - if (kernel_mode == MAGMA_KERNEL_DIM_SPECIFIC) { - switch(dim) { - case 1: launch_failed = magma_interp_1d(P, Q, ncomp, dT, tmode, dU, estrdU, - cstrdU, dV, estrdV, cstrdV, nelem, queue); break; - case 2: launch_failed = magma_interp_2d(P, Q, ncomp, dT, tmode, dU, estrdU, - cstrdU, dV, estrdV, cstrdV, nelem, queue); break; - case 3: launch_failed = magma_interp_3d(P, Q, ncomp, dT, tmode, dU, estrdU, - cstrdU, dV, estrdV, cstrdV, nelem, queue); break; - default: launch_failed = 1; - } - } else { - launch_failed = magma_interp_generic( - P, Q, dim, ncomp, - dT, tmode, - dU, estrdU, cstrdU, - dV, estrdV, cstrdV, - nelem, queue); - } - - return launch_failed; -} diff --git a/backends/magma/magma_weight.c b/backends/magma/magma_weight.c deleted file mode 100644 index 1cc30bfea7..0000000000 --- a/backends/magma/magma_weight.c +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed - -#include "ceed-magma.h" - -////////////////////////////////////////////////////////////////////////////////////////// -#ifdef __cplusplus -CEED_INTERN "C" -#endif -magma_int_t -magma_weight( - magma_int_t Q, magma_int_t dim, - const CeedScalar *dqweight1d, - CeedScalar *dV, magma_int_t v_stride, - magma_int_t nelem, magma_kernel_mode_t kernel_mode, - magma_queue_t queue) { - magma_int_t launch_failed = 0; - - if (kernel_mode == MAGMA_KERNEL_DIM_SPECIFIC) { - switch(dim) { - case 1: launch_failed = magma_weight_1d(Q, dqweight1d, dV, v_stride, nelem, - queue); break; - case 2: launch_failed = magma_weight_2d(Q, dqweight1d, dV, v_stride, nelem, - queue); break; - case 3: launch_failed = magma_weight_3d(Q, dqweight1d, dV, v_stride, nelem, - queue); break; - default: launch_failed = 1; - } - } else { - launch_failed = magma_weight_generic(Q, dim, dqweight1d, dV, v_stride, nelem, - queue); - } - - return launch_failed; -} diff --git a/backends/magma/kernels/common/elem_restriction.h b/include/ceed/jit-source/magma/elem_restriction.h similarity index 64% rename from backends/magma/kernels/common/elem_restriction.h rename to include/ceed/jit-source/magma/elem_restriction.h index 41e2703ca5..344f294fdf 100644 --- a/backends/magma/kernels/common/elem_restriction.h +++ b/include/ceed/jit-source/magma/elem_restriction.h @@ -17,7 +17,7 @@ // Go from L-vector (du) to E-vector (dv): // // dv(i, e, c) = du( offsets(i, e) + compstride * c) -static __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void +extern "C" __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void magma_readDofsOffset_kernel(const int NCOMP, const int compstride, const int esize, const int nelem, int *offsets, const CeedScalar *du, CeedScalar *dv) @@ -42,7 +42,7 @@ magma_readDofsOffset_kernel(const int NCOMP, const int compstride, // to describe the L-vector layout // // dv(i, e, c) = du( i * strides[0] + c * strides[1] + e * strides[2] ) -static __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void +extern "C" __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void magma_readDofsStrided_kernel(const int NCOMP, const int esize, const int nelem, const int *strides, const CeedScalar *du, CeedScalar *dv) { @@ -65,28 +65,10 @@ magma_readDofsStrided_kernel(const int NCOMP, const int esize, const int nelem, // Go from E-vector (du) to L-vector (dv): // // dv(offsets(i, e) + compstride * c) = du(i, e, c) -// Double precision version -static __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void -magma_writeDofsOffset_kernel_d(const int NCOMP, const int compstride, - const int esize, const int nelem, int *offsets, - const double *du, double *dv) -{ - const int pid = threadIdx.x; - const int elem = blockIdx.x; - - for (CeedInt i = pid; i < esize; i += blockDim.x) { - const CeedInt ind = offsets ? offsets[i + elem * esize] : i + elem * esize; - for (CeedInt comp = 0; comp < NCOMP; ++comp) { - atomicAdd(dv + (ind + compstride * comp), - du[i+elem*esize+comp*esize*nelem]); - } - } -} -// Single precision version -static __global__ void -magma_writeDofsOffset_kernel_s(const int NCOMP, const int compstride, - const int esize, const int nelem, int *offsets, - const float *du, float *dv) +extern "C" __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void +magma_writeDofsOffset_kernel(const int NCOMP, const int compstride, + const int esize, const int nelem, int *offsets, + const CeedScalar *du, CeedScalar *dv) { const int pid = threadIdx.x; const int elem = blockIdx.x; @@ -108,27 +90,9 @@ magma_writeDofsOffset_kernel_s(const int NCOMP, const int compstride, // to describe the L-vector layout // // dv( i * strides[0] + c * strides[1] + e * strides[2] ) = du(i, e, c) -// Double precision version -static __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void -magma_writeDofsStrided_kernel_d(const int NCOMP, const int esize, const int nelem, - const int *strides, const double *du, double *dv) -{ - const int pid = threadIdx.x; - const int elem = blockIdx.x; - - for (CeedInt i = pid; i < esize; i += blockDim.x) { - for (CeedInt comp = 0; comp < NCOMP; ++comp) { - atomicAdd(dv + (i * strides[0] + comp * strides[1] + - elem * strides[2]), - du[i+elem*esize+comp*esize*nelem]); - } - } -} - -// Single precision version -static __global__ void -magma_writeDofsStrided_kernel_s(const int NCOMP, const int esize, const int nelem, - const int *strides, const float *du, float *dv) +extern "C" __launch_bounds__(MAGMA_ERSTR_THREADS) __global__ void +magma_writeDofsStrided_kernel(const int NCOMP, const int esize, const int nelem, + const int *strides, const CeedScalar *du, CeedScalar *dv) { const int pid = threadIdx.x; const int elem = blockIdx.x; diff --git a/include/ceed/jit-source/magma/grad-1d.h b/include/ceed/jit-source/magma/grad-1d.h new file mode 100644 index 0000000000..bd71b550de --- /dev/null +++ b/include/ceed/jit-source/magma/grad-1d.h @@ -0,0 +1,141 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +// macros to abstract access of shared memory and reg. file +#define sT(i,j) sT[(j) * P_ + (i)] + +////////////////////////////////////////////////////////////////////////////////////////// +// grad basis action (1D) +template +static __device__ __inline__ void +magma_grad_1d_device( + const T *sT, magma_trans_t transT, + T* sU[NCOMP_], T* sV[NCOMP_], const int tx) +{ + // Assumptions + // 1. 1D threads of size max(P_,Q_) + // 2. sU[i] is 1xP_: in shared memory + // 3. sV[i] is 1xQ_: in shared memory + // 4. P_roduct per component is one row (1xP_) times T matrix (P_xQ_) => one row (1xQ_) + // 5. Each thread computes one entry in sV[i] + // 6. Must sync before and after call + // 7. Note that the layout for U and V is different from 2D/3D problem + + T rv; + if (tx < Q_) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { + rv = (transT == MagmaTrans) ? sV[icomp][tx] : 0.0; + for(int i = 0; i < P_; i++) { + rv += sU[icomp][i] * sT(i,tx); + } + sV[icomp][tx] = rv; + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_1D)) __global__ void +magma_gradn_1d_kernel( + const CeedScalar *dTinterp, const CeedScalar *dTgrad, + const CeedScalar *dU, const int estrdU, const int cstrdU, const int dstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, + const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaNoTrans; + + if (elem_id >= nelem) return; + + CeedScalar* sU[NCOMP]; + CeedScalar* sV[NCOMP]; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sW = sT + P*Q; + sU[0] = sW + ty * NCOMP * (P + Q); + sV[0] = sU[0] + (NCOMP * 1 * P); + for(int icomp = 1; icomp < NCOMP; icomp++) { + sU[icomp] = sU[icomp-1] + (1 * P); + sV[icomp] = sV[icomp-1] + (1 * Q); + } + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dTgrad, sT); + } + + // read U + read_1d(dU, cstrdU, sU, tx); + + __syncthreads(); + magma_grad_1d_device(sT, transT, sU, sV, tx); + __syncthreads(); + + // write V + write_1d(sV, dV, cstrdV, tx); +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_1D)) __global__ void +magma_gradt_1d_kernel( + const CeedScalar *dTinterp, const CeedScalar *dTgrad, + const CeedScalar *dU, const int estrdU, const int cstrdU, const int dstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, + const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaTrans; + + if (elem_id >= nelem) return; + + CeedScalar* sU[NCOMP]; + CeedScalar* sV[NCOMP]; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sW = sT + Q*P; + sU[0] = sW + ty * NCOMP * (Q + P); + sV[0] = sU[0] + (NCOMP * 1 * Q); + for(int icomp = 1; icomp < NCOMP; icomp++) { + sU[icomp] = sU[icomp-1] + (1 * Q); + sV[icomp] = sV[icomp-1] + (1 * P); + } + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dTgrad, sT); + } + + // read U + read_1d(dU, cstrdU, sU, tx); + + // read V + read_1d(dV, cstrdV, sV, tx); + + __syncthreads(); + magma_grad_1d_device(sT, transT, sU, sV, tx); + __syncthreads(); + + // write V + write_1d(sV, dV, cstrdV, tx); +} diff --git a/include/ceed/jit-source/magma/grad-2d.h b/include/ceed/jit-source/magma/grad-2d.h new file mode 100644 index 0000000000..f47f45dbf7 --- /dev/null +++ b/include/ceed/jit-source/magma/grad-2d.h @@ -0,0 +1,202 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +// macros to abstract access of shared memory and reg. file +#define sT(i,j) sT[(j) * P_ + (i)] +#define sTmp(i,j,ldw) sTmp[(j)*(ldw) + (i)] + +////////////////////////////////////////////////////////////////////////////////////////// +// grad basis action (2D) +// This function is called two times at a higher level for 2D +// DIM_U -- for the size of rU[DIM_U * NCOMP_ * MAXP_Q_] +// DIM_V -- for the size of rV[DIM_V * NCOMP_ * MAXP_Q_] +// iDIM_ -- the index of the outermost loop over dimensions in grad +// iDIM_U -- which dim index of rU is accessed (always 0 for notrans, 0 or 1 for trans) +// iDIM_V -- which dim index of rV is accessed (0 or 1 for notrans, always 0 for trans) +// the scalar beta is used to specify whether to accumulate to rV, or overwrite it +template +static __device__ __inline__ void +magma_grad_2d_device( + const T *sTinterp, const T *sTgrad, + T rU[DIM_U][NCOMP_][rUsize] , T rV[DIM_V][NCOMP_][rVsize], + T beta, const int tx, T rTmp, T* swork) +{ + // Assumptions + // 0. This device routine applies grad for one dim only (iDIM_), so it should be called twice for 2D + // 1. 1D threads of size max(P_,Q_) + // 2. input: rU[DIM_U x NCOMP_ x P_] in registers (per thread) + // 3. output: rV[DIM_V x NCOMP_ x Q_] in registers (per thread) + // 4. Two products per each (dim,component) pair + // 4.1 Batch P_ of (1xP_) matrices times (P_xQ_) matrix => Batch P_ of (1xQ_) matrices + // 4.2 Batch 1 of (Q_xP_) matrix times (P_xQ_) matrix => (Q_xQ_) matrix + // 6. Each thread computes one row of the output of each product + // 7. Sync is recommended before and after the call + + for(int icomp = 0; icomp < NCOMP_; icomp++){ + // 1st product -- Batch P_ of (1xP_) matrices [reg] x (P_xQ_) [shmem] => Batch P_ of (1xQ_) matrices + // the batch output P_ x (1xQ_) is written on the fly to shmem + if (tx < P_) { + const int batchid = tx; + const int sld = 1; + const T *sT = (iDIM_ == 0) ? sTgrad : sTinterp; + T* sTmp = swork + batchid * (1 * Q_); + for(int j = 0; j < Q_; j++){ + rTmp = 0.0; + for(int i = 0; i < P_; i++){ + rTmp += rU[iDIM_U][icomp][i] * sT(i,j); + } + sTmp(0,j,sld) = rTmp; + } + } // end of: if (tx < P_) + __syncthreads(); + + // 2nd product -- Batch 1 of a (Q_xP_) matrix [shmem] x (P_xQ_) [shmem] => (Q_xQ_) matrix [reg] + if (tx < Q_) { + const int batchid = 0; + const int sld = Q_; + const T *sT = (iDIM_ == 1) ? sTgrad : sTinterp; + T* sTmp = swork + batchid * (Q_*P_); + for(int j = 0; j < Q_; j++){ + rTmp = 0.0; + for(int i = 0; i < P_; i++){ + rTmp += sTmp(tx,i,sld) * sT(i,j); + } + rV[iDIM_V][icomp][j] *= beta; + rV[iDIM_V][icomp][j] += rTmp; + } + } + __syncthreads(); + } // loop over NCOMP_ +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void +magma_gradn_2d_kernel( + const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, + const CeedScalar *dU, const int estrdU, const int cstrdU, const int dstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) +{ + + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaNoTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][P] = { 0.0 }; // here DIMU = 1, but might be different for a fused operator + CeedScalar rV[1][NCOMP][Q] = { 0.0 }; // here DIMV = 1, but might be different for a fused operator + CeedScalar rTmp = 0.0; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sTinterp = (CeedScalar*)(shared_data); + CeedScalar* sTgrad = sTinterp + P*Q; + CeedScalar* sTmp = sTgrad + P*Q; + sTmp += ty * (P * MAXPQ); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); + dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); + } + + // No need to read V ( required only in transposed grad ) + const CeedScalar beta = 0.0; + + /* read U (idim = 0 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_2d + (dU + (0*dstrdU), cstrdU, rU, sTmp, tx); + + /* first call (iDIM = 0, iDIMU = 0, iDIMV = 0) -- + output from rV[0][][] into dV (idim = 0) */ + magma_grad_2d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_2d_device */ + writeV_2d + (dV+(0*dstrdV), cstrdV, rV, tx); + + /* second call (iDIM = 1, iDIMU = 0, iDIMV = 0) -- + output from rV[0][][] into dV (idim = 1) */ + magma_grad_2d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_2d_device */ + writeV_2d + (dV+(1*dstrdV), cstrdV, rV, tx); +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void +magma_gradt_2d_kernel( + const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, + const CeedScalar *dU, const int estrdU, const int cstrdU, const int dstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][Q] = { 0.0 }; // here DIMU = 1, but might be different for a fused operator + CeedScalar rV[1][NCOMP][P] = { 0.0 }; // here DIMV = 1, but might be different for a fused operator + CeedScalar rTmp = 0.0; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sTinterp = (CeedScalar*)(shared_data); + CeedScalar* sTgrad = sTinterp + Q*P; + CeedScalar* sTmp = sTgrad + Q*P; + sTmp += ty * (Q*MAXPQ); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); + dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); + } + __syncthreads(); + + /* read V (since this is transposed mode -- + idim = 0 for dV, iDIM = 0 for rV) */ + const CeedScalar beta = 1.0; + readV_2d + (dV + (0*dstrdV), cstrdV, rV, tx); + + /* read U (idim = 0 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_2d + (dU + (0 * dstrdU), cstrdU, rU, sTmp, tx); + /* first call (iDIM = 0, iDIMU = 0, iDIMV = 0) */ + magma_grad_2d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_2d_device */ + + /* read U (idim = 1 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_2d + (dU + (1*dstrdU), cstrdU, rU, sTmp, tx); + /* second call (iDIM = 1, iDIMU = 0, iDIMV = 0) */ + magma_grad_2d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_2d_device */ + + // write V + writeV_2d + (dV + (0*dstrdV), cstrdV, rV, tx); +} diff --git a/include/ceed/jit-source/magma/grad-3d.h b/include/ceed/jit-source/magma/grad-3d.h new file mode 100644 index 0000000000..7d3de8cc05 --- /dev/null +++ b/include/ceed/jit-source/magma/grad-3d.h @@ -0,0 +1,240 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +// macros to abstract access of shared memory and reg. file +#define sT(i,j) sT[(j) * P_ + (i)] +#define sTmp(i,j,ldw) sTmp[(j)*(ldw) + (i)] +#define sTmp2(i,j,ldw) sTmp2[(j)*(ldw) + (i)] + +////////////////////////////////////////////////////////////////////////////////////////// +// grad basis action (3D) +// This function is called three times at a higher level for 3D +// DIM_U -- for the size of rU[DIM_U * NCOMP_ * MAXP_Q_] +// DIM_V -- for the size of rV[DIM_V * NCOMP_ * MAXP_Q_] +// iDIM_ -- the index of the outermost loop over dimensions in grad +// iDIM_U -- which dim index of rU is accessed (always 0 for notrans, 0, 1, or 2 for trans) +// iDIM_V -- which dim index of rV is accessed (0, 1, or 2 for notrans, always 0 for trans) +// the scalar beta is used to specify whether to accumulate to rV, or overwrite it +template +static __device__ __inline__ void +magma_grad_3d_device( + const T *sTinterp, const T *sTgrad, + T rU[DIM_U][NCOMP_][rUsize] , T rV[DIM_V][NCOMP_][rVsize], + T beta, const int tx, T rTmp, T* swork) +{ + // Assumptions + // 0. This device routine applies grad for one dim only (iDIM_), so it should be thrice for 3D + // 1. 1D threads of size max(P_,Q_)^2 + // 2. input: rU[DIM_U x NCOMP_ x rUsize] in registers (per thread) + // 3. output: rV[DIM_V x NCOMP_ x rVsize] in registers (per thread) + // 4. Three products per each (dim,component) pair + // 4.1 Batch P_^2 of (1xP_) matrices times (P_xQ_) matrix => Batch P_^2 of (1xQ_) matrices + // 4.2 Batch P_ of (Q_xP_) matrices times (P_xQ_) matrix => Batch P_ of (Q_xQ_) matrices + // 4.3 Batch 1 of (Q_^2xP_) matrix times (P_xQ_) matrix => (Q_^2xQ_) matrix + // 6. Each thread computes one row of the output of each product + // 7. Sync is recommended before and after the call + + T* sW1 = swork; + T* sW2 = sW1 + P_*P_*Q_; + for(int icomp = 0; icomp < NCOMP_; icomp++){ + // Batch P_^2 of (1xP_) matrices [reg] times (P_xQ_) matrix [shmem] => Batch P_^2 of (1xQ_) matrices [shmem] + if (tx < (P_*P_)) { + const int batchid = tx; + const int sld = 1; + const T *sT = (iDIM_ == 0) ? sTgrad : sTinterp; + T* sTmp = sW1 + batchid * (1*Q_); + for(int j = 0; j < Q_; j++){ + rTmp = 0.0; + for(int i = 0; i < P_; i++){ + rTmp += rU[iDIM_U][icomp][i] * sT(i,j); + } + sTmp(0,j,sld) = rTmp; + } + } // end of: if (tx < P_*P_) + __syncthreads(); + + // Batch P_ of (Q_xP_) matrices [shmem] times (P_xQ_) matrix [shmem] => Batch P_ of (Q_xQ_) matrices [reg] + if (tx < (P_*Q_)) { + const int batchid = tx / Q_; + const int tx_ = tx % Q_; + const int sld = Q_; + const T *sT = (iDIM_ == 1) ? sTgrad : sTinterp; + T* sTmp = sW1 + batchid * (Q_*P_); // sTmp is input + T* sTmp2 = sW2 + batchid * (Q_*Q_); // sTmp2 is output + for(int j = 0; j < Q_; j++){ + rTmp = 0.0; + for(int i = 0; i < P_; i++){ + rTmp += sTmp(tx_,i,sld) * sT(i,j); + } + sTmp2(tx_,j,sld) = rTmp; + } + } + __syncthreads(); + + // Batch 1 of (Q_^2xP_) matrices [shmem] times (P_xQ_) matrix [shmem] => Batch 1 of (Q_^2xQ_) matrices [reg] + if (tx < (Q_*Q_)) { + // No need to declare batchid = (tx / Q_^2) = always zero + // No need to declare tx_ = (tx_ % Q_^2) = always tx + const int sld = Q_*Q_; + const T *sT = (iDIM_ == 2) ? sTgrad : sTinterp; + T* sTmp = sW2; // sTmp is input + for(int j = 0; j < Q_; j++) { + rTmp = 0.0; + for(int i = 0; i < P_; i++) { + rTmp += sTmp(tx,i,sld) * sT(i,j); + } + rV[iDIM_V][icomp][j] *= beta; + rV[iDIM_V][icomp][j] += rTmp; + } + } + __syncthreads(); + } // loop over NCOMP_ +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void +magma_gradn_3d_kernel( + const CeedScalar* dinterp1d, const CeedScalar* dgrad1d, + const CeedScalar *dU, const int estrdU, const int cstrdU, const int dstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaNoTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][P] = { 0.0 }; // here DIMU = 1, but might be different for a fused operator + CeedScalar rV[1][NCOMP][Q] = { 0.0 }; // here DIMV = 1, but might be different for a fused operator + CeedScalar rTmp = 0.0; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sTinterp = (CeedScalar*)(shared_data); + CeedScalar* sTgrad = sTinterp + P*Q; + CeedScalar* sTmp = sTgrad + P*Q; + sTmp += ty * (max(P*P*P, (P*P*Q) + (P*Q*Q))); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); + dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); + } + __syncthreads(); + + // No need to read V ( required only in transposed grad ) + const CeedScalar beta = 0.0; + + /* read U (idim = 0 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_3d + (dU + (0*dstrdU), cstrdU, rU, sTmp, tx); + + /* first call (iDIM = 0, iDIMU = 0, iDIMV = 0) -- + output from rV[0][][] into dV (idim = 0) */ + magma_grad_3d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_3d_device */ + writeV_3d + (dV+ (0*dstrdV), cstrdV, rV, tx); + + /* second call (iDIM = 1, iDIMU = 0, iDIMV = 0) -- + output from rV[0][][] into dV (idim = 1) */ + magma_grad_3d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_3d_device */ + writeV_3d + (dV+ (1*dstrdV), cstrdV, rV, tx); + + /* third call (iDIM = 2, iDIMU = 0, iDIMV = 0) -- + output from rV[0][][] into dV (idim = 2) */ + magma_grad_3d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_3d_device */ + writeV_3d + (dV+ (2*dstrdV), cstrdV, rV, tx); +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void +magma_gradt_3d_kernel( + const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, + const CeedScalar *dU, const int estrdU, const int cstrdU, const int dstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int dstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][Q] = { 0.0 }; // here DIMU = 1, but might be different for a fused operator + CeedScalar rV[1][NCOMP][P] = { 0.0 }; // here DIMV = 1, but might be different for a fused operator + CeedScalar rTmp = 0.0; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sTinterp = (CeedScalar*)(shared_data); + CeedScalar* sTgrad = sTinterp + Q*P; + CeedScalar* sTmp = sTgrad + Q*P; + sTmp += ty * (max(Q*Q*Q, (Q*Q*P) + (Q*P*P))); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dinterp1d, sTinterp); + dread_T_gm2sm(tx, transT, dgrad1d, sTgrad); + } + __syncthreads(); + + // read V (since this is transposed mode) + const CeedScalar beta = 1.0; + readV_3d + (dV + (0*dstrdV), cstrdV, rV, tx); + + /* read U (idim = 0 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_3d + (dU + (0 * dstrdU), cstrdU, rU, sTmp, tx); + /* then first call (iDIM = 0, iDIMU = 0, iDIMV = 0) */ + magma_grad_3d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_3d_device */ + + /* read U (idim = 1 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_3d + (dU + (1 * dstrdU), cstrdU, rU, sTmp, tx); + /* then second call (iDIM = 1, iDIMU = 0, iDIMV = 0) */ + magma_grad_3d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_3d_device */ + + /* read U (idim = 2 for dU, iDIM = 0 for rU) -- + there is a sync at the end of this function */ + readU_3d + (dU + (2 * dstrdU), cstrdU, rU, sTmp, tx); + /* then third call (iDIM = 2, iDIMU = 0, iDIMV = 0) */ + magma_grad_3d_device + (sTinterp, sTgrad, rU, rV, beta, tx, rTmp, sTmp); + /* there is a sync at the end of magma_grad_3d_device */ + + // write V + writeV_3d + (dV + (0 * dstrdV), cstrdV, rV, tx); +} diff --git a/include/ceed/jit-source/magma/interp-1d.h b/include/ceed/jit-source/magma/interp-1d.h new file mode 100644 index 0000000000..7fd329b704 --- /dev/null +++ b/include/ceed/jit-source/magma/interp-1d.h @@ -0,0 +1,141 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +// macros to abstract access of shared memory and reg. file +#define sT(i,j) sT[(j) * P_ + (i)] + +////////////////////////////////////////////////////////////////////////////////////////// +// interp basis action (1D) +template +static __device__ __inline__ void +magma_interp_1d_device( + const T *sT, magma_trans_t transT, + T* sU[NCOMP_], T* sV[NCOMP_], const int tx) +{ + // Assumptions + // 1. 1D threads of size max(P_,Q_) + // 2. sU[i] is 1xP_: in shared memory + // 3. sV[i] is 1xQ_: in shared memory + // 4. P_roduct per component is one row (1xP_) times T matrix (P_xQ_) => one row (1xQ_) + // 5. Each thread computes one entry in sV[i] + // 6. Must sync before and after call + // 7. Note that the layout for U and V is different from 2D/3D problem + + T rv; + if (tx < Q_) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { + rv = (transT == MagmaTrans) ? sV[icomp][tx] : 0.0; + for(int i = 0; i < P_; i++) { + rv += sU[icomp][i] * sT(i,tx); //sT[tx * P_ + i]; + } + sV[icomp][tx] = rv; + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_1D)) __global__ void +magma_interpn_1d_kernel( + const CeedScalar *dT, + const CeedScalar *dU, const int estrdU, const int cstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int nelem) +{ + + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaNoTrans; + + if (elem_id >= nelem) return; + + CeedScalar* sU[NCOMP]; + CeedScalar* sV[NCOMP]; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sW = sT + P*Q; + sU[0] = sW + ty * NCOMP * (P + Q); + sV[0] = sU[0] + (NCOMP * 1 * P); + for(int icomp = 1; icomp < NCOMP; icomp++) { + sU[icomp] = sU[icomp-1] + (1 * P); + sV[icomp] = sV[icomp-1] + (1 * Q); + } + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dT, sT); + } + + // read U + read_1d(dU, cstrdU, sU, tx); + + __syncthreads(); + magma_interp_1d_device(sT, transT, sU, sV, tx); + __syncthreads(); + + // write V + write_1d(sV, dV, cstrdV, tx); +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_1D)) __global__ void +magma_interpt_1d_kernel( + const CeedScalar *dT, + const CeedScalar *dU, const int estrdU, const int cstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int nelem) +{ + + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaTrans; + + if (elem_id >= nelem) return; + + CeedScalar* sU[NCOMP]; + CeedScalar* sV[NCOMP]; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sW = sT + Q*P; + sU[0] = sW + ty * NCOMP * (Q + P); + sV[0] = sU[0] + (NCOMP * 1 * Q); + for(int icomp = 1; icomp < NCOMP; icomp++) { + sU[icomp] = sU[icomp-1] + (1 * Q); + sV[icomp] = sV[icomp-1] + (1 * P); + } + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dT, sT); + } + + // read U + read_1d(dU, cstrdU, sU, tx); + + // read V + read_1d(dV, cstrdV, sV, tx); + + __syncthreads(); + magma_interp_1d_device(sT, transT, sU, sV, tx); + __syncthreads(); + + // write V + write_1d(sV, dV, cstrdV, tx); +} diff --git a/include/ceed/jit-source/magma/interp-2d.h b/include/ceed/jit-source/magma/interp-2d.h new file mode 100644 index 0000000000..35a5f730d2 --- /dev/null +++ b/include/ceed/jit-source/magma/interp-2d.h @@ -0,0 +1,157 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +// macros to abstract access of shared memory and reg. file +#define sT(i,j) sT[(j) * P_ + (i)] +#define sTmp(i,j,ldw) sTmp[(j)*(ldw) + (i)] + + +////////////////////////////////////////////////////////////////////////////////////////// +// interp basis action (2D) +template +static __device__ __inline__ void +magma_interp_2d_device( + const T *sT, magma_trans_t transT, + T rU[DIM_U][NCOMP_][rUsize] , T rV[DIM_V][NCOMP_][rVsize], + const int tx, T rTmp, T* swork) +{ + // Assumptions + // 1. 1D threads of size max(P_,Q_) + // 2. input: rU[DIM_U x NCOMP_ x rUsize] in registers (per thread) + // 3. output: rV[DIM_V x NCOMP_ x rVsize] in registers (per thread) + // 4. Two products per component + // 4.1 Batch P_ of (1xP_) matrices times (P_xQ_) matrix => Batch P_ of (1xQ_) matrices + // 4.2 Batch 1 of (Q_xP_) matrix times (P_xQ_) matrix => (Q_xQ_) matrix + // 5. Each thread computes one row of the output of each product + // 6. Sync is recommended before and after the call + + for(int icomp = 0; icomp < NCOMP_; icomp++){ + // 1st product -- Batch P_ of (1xP_) matrices [reg] x (P_xQ_) [shmem] => Batch P_ of (1xQ_) matrices + // the batch output P_ x (1xQ_) is written on the fly to shmem + if (tx < P_) { + const int batchid = tx; + const int sld = 1; + T* sTmp = swork + batchid * (1 * Q_); + for(int j = 0; j < Q_; j++){ + rTmp = 0.0; + for(int i = 0; i < P_; i++){ + rTmp += rU[0][icomp][i] * sT(i,j); + } + sTmp(0,j,sld) = rTmp; + } + } // end of: if (tx < P_) + __syncthreads(); + + // 2nd product -- Batch 1 of a (Q_xP_) matrix [shmem] x (P_xQ_) [shmem] => (Q_xQ_) matrix [reg] + if (tx < Q_) { + const int batchid = 0; + const int sld = Q_; + T* sTmp = swork + batchid * (Q_*P_); + for(int j = 0; j < Q_; j++){ + rTmp = 0.0; + for(int i = 0; i < P_; i++){ + rTmp += sTmp(tx,i,sld) * sT(i,j); + } + rV[0][icomp][j] += rTmp; + } + } + __syncthreads(); + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void +magma_interpn_2d_kernel( + const CeedScalar *dT, + const CeedScalar *dU, const int estrdU, const int cstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaNoTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][P] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rV[1][NCOMP][Q] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rTmp = 0.0; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sTmp = sT + P*Q; + sTmp += ty * (P * MAXPQ); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dT, sT); + } + + // read U -- there is a sync at the end of this function + readU_2d(dU, cstrdU, rU, sTmp, tx); + + // no sync needed here -- readU_2d already syncs at the end + magma_interp_2d_device(sT, transT, rU, rV, tx, rTmp, sTmp); + __syncthreads(); + + // write V + writeV_2d(dV, cstrdV, rV, tx); +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ, MAGMA_MAXTHREADS_2D)) __global__ void +magma_interpt_2d_kernel( + const CeedScalar *dT, + const CeedScalar *dU, const int estrdU, const int cstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][Q] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rV[1][NCOMP][P] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rTmp = 0.0; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sTmp = sT + Q*P; + sTmp += ty * (Q * MAXPQ); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dT, sT); + } + + // read V + readV_2d(dV, cstrdV, rV, tx); + + // read U -- there is a sync at the end of this function + readU_2d(dU, cstrdU, rU, sTmp, tx); + + // no sync needed here -- readU_2d already syncs at the end + magma_interp_2d_device(sT, transT, rU, rV, tx, rTmp, sTmp); + __syncthreads(); + + // write V + writeV_2d(dV, cstrdV, rV, tx); +} diff --git a/include/ceed/jit-source/magma/interp-3d.h b/include/ceed/jit-source/magma/interp-3d.h new file mode 100644 index 0000000000..5d1302d7c0 --- /dev/null +++ b/include/ceed/jit-source/magma/interp-3d.h @@ -0,0 +1,185 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +// macros to abstract access of shared memory and reg. file +#define sT(i,j) sT[(j) * P_ + (i)] +#define sTmp(i,j,ldw) sTmp[(j)*(ldw) + (i)] + + +////////////////////////////////////////////////////////////////////////////////////////// +// interp basis action (3D) +template +static __device__ __inline__ void +magma_interp_3d_device( + const T *sT, magma_trans_t transT, + T rU[DIM_U][NCOMP_][rUsize] , T rV[DIM_V][NCOMP_][rVsize], + const int tx, T rTmp[Q_], T* swork) +{ + // Assumptions + // 1. 1D threads of size max(P_,Q_)^2 + // 2. input: rU[DIM_U x NCOMP_ x rUsize] in registers (per thread) + // 3. output: rV[DIM_V x NCOMP_ x rVsize] in registers (per thread) + // 4. Three products per component + // 4.1 Batch P_^2 of (1xP_) matrices times (P_xQ_) matrix => Batch P_^2 of (1xQ_) matrices + // 4.2 Batch P_ of (Q_xP_) matrices times (P_xQ_) matrix => Batch P_ of (Q_xQ_) matrices + // 4.3 Batch 1 of (Q_^2xP_) matrix times (P_xQ_) matrix => (Q_^2xQ_) matrix + // 5. Each thread computes one row of the output of each product + // 6. Sync is recommended before and after the call + + for(int icomp = 0; icomp < NCOMP_; icomp++){ + // Batch P_^2 of (1xP_) matrices [reg] times (P_xQ_) matrix [shmem] => Batch P_^2 of (1xQ_) matrices [shmem] + if (tx < (P_*P_)) { + const int batchid = tx; + const int sld = 1; + T* sTmp = swork + batchid * (1*Q_); + for(int j = 0; j < Q_; j++){ + rTmp[0] = 0.0; + for(int i = 0; i < P_; i++){ + rTmp[0] += rU[0][icomp][i] * sT(i,j); + } + sTmp(0,j,sld) = rTmp[0]; + } + } // end of: if (tx < P_*P_) + __syncthreads(); + + // Batch P_ of (Q_xP_) matrices [shmem] times (P_xQ_) matrix [shmem] => Batch P_ of (Q_xQ_) matrices [reg] + if (tx < (P_*Q_)) { + const int batchid = tx / Q_; + const int tx_ = tx % Q_; + const int sld = Q_; + T* sTmp = swork + batchid * (Q_*P_); // sTmp is input + for(int j = 0; j < Q_; j++){ + rTmp[j] = 0.0; + for(int i = 0; i < P_; i++){ + rTmp[j] += sTmp(tx_,i,sld) * sT(i,j); + } + } + } + __syncthreads(); + + // write rTmp[] into shmem as batch P_ of Q_xQ_ matrices + if (tx < (P_*Q_)){ + const int batchid = tx / Q_; + const int tx_ = tx % Q_; + const int sld = Q_; + T* sTmp = swork + batchid * (Q_*Q_); + for(int j = 0; j < Q_; j++){ + sTmp(tx_, j, sld) = rTmp[j]; + } + } + __syncthreads(); + + // Batch 1 of (Q_^2xP_) matrices [shmem] times (P_xQ_) matrix [shmem] => Batch 1 of (Q_^2xQ_) matrices [reg] + if (tx < (Q_*Q_)) { + // No need to declare batchid = (tx / Q_^2) = always zero + // No need to declare tx_ = (tx_ % Q_^2) = always tx + const int sld = Q_*Q_; + T* sTmp = swork; + for(int j = 0; j < Q_; j++) { + rTmp[0] = 0.0; + for(int i = 0; i < P_; i++) { + rTmp[0] += sTmp(tx,i,sld) * sT(i,j); + } + rV[0][icomp][j] += rTmp[0]; + } + } + __syncthreads(); + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void +magma_interpn_3d_kernel( + const CeedScalar *dT, + const CeedScalar *dU, const int estrdU, const int cstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED( CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaNoTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][P] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rV[1][NCOMP][Q] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rTmp[Q] = { 0.0 }; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sTmp = sT + P*Q; + sTmp += ty * (max(P*P*MAXPQ, P*Q*Q)); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dT, sT); + } + + // read U (idim = 0 for dU, iDIM = 0 for rU, u_dimstride is always 0) + readU_3d(dU, cstrdU, rU, sTmp, tx); + // there is a sync at the end of this function + + magma_interp_3d_device(sT, transT, rU , rV, tx, rTmp, sTmp); + __syncthreads(); + + // write V + writeV_3d(dV, cstrdV, rV, tx); +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(MAXPQ*MAXPQ, MAGMA_MAXTHREADS_3D)) __global__ void +magma_interpt_3d_kernel( + const CeedScalar *dT, + const CeedScalar *dU, const int estrdU, const int cstrdU, + CeedScalar *dV, const int estrdV, const int cstrdV, const int nelem) +{ + MAGMA_DEVICE_SHARED( CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + magma_trans_t transT = MagmaTrans; + + if (elem_id >= nelem) return; + + CeedScalar rU[1][NCOMP][Q] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rV[1][NCOMP][P] = { 0.0 }; // for a non fused operator DIM is always 1 + CeedScalar rTmp[P] = { 0.0 }; + + // shift global memory pointers by elem stride + dU += elem_id * estrdU; + dV += elem_id * estrdV; + + // assign shared memory pointers + CeedScalar* sT = (CeedScalar*)(shared_data); + CeedScalar* sTmp = sT + Q*P; + sTmp += ty * (max(Q*Q*MAXPQ, Q*P*P)); + + // read T + if (ty == 0) { + dread_T_gm2sm(tx, transT, dT, sT); + } + + // read V + readV_3d(dV, cstrdV, rV, tx); + + // read U (idim = 0 for dU, iDIM = 0 for rU, u_dimstride is always 0) + readU_3d(dU, cstrdU, rU, sTmp, tx); + // there is a sync at the end of this function + + magma_interp_3d_device(sT, transT, rU , rV, tx, rTmp, sTmp); + __syncthreads(); + + // write V + writeV_3d(dV, cstrdV, rV, tx); +} diff --git a/backends/magma/kernels/common/magma_common_device.h b/include/ceed/jit-source/magma/magma_common_device.h similarity index 61% rename from backends/magma/kernels/common/magma_common_device.h rename to include/ceed/jit-source/magma/magma_common_device.h index 33c8d0ace6..606acaf647 100644 --- a/backends/magma/kernels/common/magma_common_device.h +++ b/include/ceed/jit-source/magma/magma_common_device.h @@ -14,6 +14,13 @@ #define MAGMA_DEVICE_SHARED(type, name) extern __shared__ type name[]; #endif +typedef enum { + MagmaNoTrans = 111, + MagmaTrans = 112, + MagmaConjTrans = 113, + Magma_ConjTrans = MagmaConjTrans +} magma_trans_t; + #define MAGMA_MAXTHREADS_1D 128 #define MAGMA_MAXTHREADS_2D 128 #define MAGMA_MAXTHREADS_3D 64 @@ -24,66 +31,19 @@ // for use with __launch_bounds__() #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) -////////////////////////////////////////////////////////////////////////////////////////// -// init scalar to zero -template -__device__ __inline__ T -make_zero() -{ - return 0; -} -////////////////////////////////////////////////////////////////////////////////////////// -// init scalar to zero -- specialization -template<> -__device__ __inline__ magmaFloatComplex -make_zero() -{ - return MAGMA_C_ZERO; -} -////////////////////////////////////////////////////////////////////////////////////////// -// init scalar to zero -- specialization -template<> -__device__ __inline__ magmaDoubleComplex -make_zero() -{ - return MAGMA_Z_ZERO; -} - -////////////////////////////////////////////////////////////////////////////////////////// -// init scalar to one -template -__device__ __inline__ T -make_one() -{ - return 1; -} -////////////////////////////////////////////////////////////////////////////////////////// -// init scalar to zero -- specialization -template<> -__device__ __inline__ magmaFloatComplex -make_one() -{ - return MAGMA_C_ONE; -} -////////////////////////////////////////////////////////////////////////////////////////// -// init scalar to zero -- specialization -template<> -__device__ __inline__ magmaDoubleComplex -make_one() -{ - return MAGMA_Z_ONE; -} +#define MAGMA_D_ZERO 0.0 +#define MAGMA_D_ONE 1.0 ////////////////////////////////////////////////////////////////////////////////////////// // read U or V of a 1D element into shared memory sU[][] or sV[][] -- for all components // the devptr is assumed to point directly to the element // must sync after call -template +template __device__ __inline__ void -read_1d(const T* devptr, const int compstride, T* sBuffer[NCOMP], const int tx) +read_1d(const T* devptr, const int compstride, T* sBuffer[NCOMP_], const int tx) { if (tx < LENGTH) { - for(int icomp = 0; icomp < NCOMP; icomp++) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { sBuffer[icomp][tx] = devptr[icomp * compstride + tx]; } } @@ -92,12 +52,12 @@ read_1d(const T* devptr, const int compstride, T* sBuffer[NCOMP], const int tx) ////////////////////////////////////////////////////////////////////////////////////////// // write V of a 1D element into global memory from sV[][] -- for all components // the devptr is assumed to point directly to the element -template +template __device__ __inline__ void -write_1d(T* sBuffer[NCOMP], T* devptr, const int compstride, const int tx) +write_1d(T* sBuffer[NCOMP_], T* devptr, const int compstride, const int tx) { if (tx < LENGTH) { - for(int icomp = 0; icomp < NCOMP; icomp++) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { devptr[icomp * compstride + tx] = sBuffer[icomp][tx]; } } @@ -106,36 +66,36 @@ write_1d(T* sBuffer[NCOMP], T* devptr, const int compstride, const int tx) ////////////////////////////////////////////////////////////////////////////////////////// // read U of a 2D element into registers rU[][][] -- for all components of a single dim // dU is assumed to be offset by elem-stride and dim-stride -// register is assumed to be rU[DIMU][NCOMP][rUsize] +// register is assumed to be rU[DIMU][NCOMP_][rUsize] // iDIM specifies which dimension is being read into in rU -// rUsize can be different from P (e.g. MAXPQ) -// sTmp is a shared memory workspace of size P^2 -template +// rUsize can be different from P_ (e.g. MAXP_Q) +// sTmp is a shared memory workspace of size P_^2 +template __device__ __inline__ void -readU_2d(const T* dU, const int compstride, T rU[DIMU][NCOMP][rUsize], T* sTmp, const int tx) +readU_2d(const T* dU, const int compstride, T rU[DIMU][NCOMP_][rUsize], T* sTmp, const int tx) { - // read U as a batch P of (1xP) vectors - // vec 0 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory - // vec 1 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory + // read U as a batch P_ of (1xP_) vectors + // vec 0 : [u0, u1, u2, ... u_(P_-1)] -- contiguous in memory + // vec 1 : [u0, u1, u2, ... u_(P_-1)] -- contiguous in memory // ... - // vec P-1: [u0, u1, u2, ... u_(P-1)] -- contiguous in memory + // vec P_-1: [u0, u1, u2, ... u_(P_-1)] -- contiguous in memory // threads collaboratively read vec0 and then vec1 and so on // but for the kernel, we want // thread 0 to hold all of vec0 in registers, and // thread 1 to hold all of vec1 in registers, and and so on // so we need to transpose - for(int icomp = 0; icomp < NCOMP; icomp++) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { // read from global memory into shared memory - if (tx < P) { - for(int i = 0; i < P; i++) { - sTmp[i*P + tx] = dU[icomp * compstride + i*P + tx]; + if (tx < P_) { + for(int i = 0; i < P_; i++) { + sTmp[i*P_ + tx] = dU[icomp * compstride + i*P_ + tx]; } } __syncthreads(); - if (tx < P) { - for(int i = 0; i < P; i++) { - rU[iDIM][icomp][i] = sTmp[tx*P + i]; + if (tx < P_) { + for(int i = 0; i < P_; i++) { + rU[iDIM][icomp][i] = sTmp[tx*P_ + i]; } } __syncthreads(); @@ -145,17 +105,17 @@ readU_2d(const T* dU, const int compstride, T rU[DIMU][NCOMP][rUsize], T* sTmp, ////////////////////////////////////////////////////////////////////////////////////////// // read V of a 2D element into registers rV[][][] -- for all components of a single dim // dV is assumed to be offset by elem-stride and dim-stride -// register is assumed to be rV[DIMV][NCOMP][rVsize] +// register is assumed to be rV[DIMV][NCOMP_][rVsize] // iDIM specifies which dimension is being read into in rV -// rVsize can be different from P (e.g. MAXPQ) -template +// rVsize can be different from P_ (e.g. MAXP_Q) +template __device__ __inline__ void -readV_2d(const T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int tx) +readV_2d(const T* dV, const int compstride, T rV[DIMV][NCOMP_][rVsize], const int tx) { - if (tx < Q) { - for(int icomp = 0; icomp < NCOMP; icomp++) { - for(int j = 0; j < Q; j++) { - rV[iDIM][icomp][j] = dV[icomp * compstride + j*Q + tx]; + if (tx < Q_) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { + for(int j = 0; j < Q_; j++) { + rV[iDIM][icomp][j] = dV[icomp * compstride + j*Q_ + tx]; } } } @@ -164,18 +124,18 @@ readV_2d(const T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int ////////////////////////////////////////////////////////////////////////////////////////// // write V of a 2D element from registers rV[][][] to global memory -- for all components of a single dim // dV is assumed to be offset by elem-stride and dim-stride -// register is assumed to be rV[DIMV][NCOMP][rVsize] +// register is assumed to be rV[DIMV][NCOMP_][rVsize] // iDIM specifies which dimension is being read from in rV // idim specifies which dimension is being written to in dV -// rVsize can be different from P (e.g. MAXPQ) -template +// rVsize can be different from P_ (e.g. MAXP_Q) +template __device__ __inline__ void -writeV_2d(T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int tx) +writeV_2d(T* dV, const int compstride, T rV[DIMV][NCOMP_][rVsize], const int tx) { - if (tx < Q) { - for(int icomp = 0; icomp < NCOMP; icomp++) { - for(int j = 0; j < Q; j++) { - dV[icomp * compstride + j*Q + tx] = rV[iDIM][icomp][j]; + if (tx < Q_) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { + for(int j = 0; j < Q_; j++) { + dV[icomp * compstride + j*Q_ + tx] = rV[iDIM][icomp][j]; } } } @@ -184,36 +144,36 @@ writeV_2d(T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int tx) ////////////////////////////////////////////////////////////////////////////////////////// // read U of a 3D element into registers rU[][][] -- for all components of a single dim // dU is assumed to be offset by elem-stride and dim-stride -// register is assumed to be rU[DIMU][NCOMP][rUsize] +// register is assumed to be rU[DIMU][NCOMP_][rUsize] // iDIM specifies which dimension is being read into in rU -// rUsize can be different from P (e.g. MAXPQ) -// sTmp is a shared memory workspace of size P^3 -template +// rUsize can be different from P_ (e.g. MAXP_Q) +// sTmp is a shared memory workspace of size P_^3 +template __device__ __inline__ void -readU_3d(const T* dU, const int compstride, T rU[DIMU][NCOMP][rUsize], T* sTmp, const int tx) +readU_3d(const T* dU, const int compstride, T rU[DIMU][NCOMP_][rUsize], T* sTmp, const int tx) { - // read U as a batch P^2 of (1xP) vectors - // vec 0 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory - // vec 1 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory + // read U as a batch P_^2 of (1xP_) vectors + // vec 0 : [u0, u1, u2, ... u_(P_-1)] -- contiguous in memory + // vec 1 : [u0, u1, u2, ... u_(P_-1)] -- contiguous in memory // ... - // vec P^2-1: [u0, u1, u2, ... u_(P-1)] -- contiguous in memory + // vec P_^2-1: [u0, u1, u2, ... u_(P_-1)] -- contiguous in memory // threads collaboratively read vec0 and then vec1 and so on // but for the kernel, we want // thread 0 to hold all of vec0 in registers, and // thread 1 to hold all of vec1 in registers, and and so on // so we need to transpose - for(int icomp = 0; icomp < NCOMP; icomp++) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { // read from global memory into shared memory - if (tx < P*P) { - for(int i = 0; i < P; i++) { - sTmp[i*P*P + tx] = dU[icomp * compstride + i*P*P + tx]; + if (tx < P_*P_) { + for(int i = 0; i < P_; i++) { + sTmp[i*P_*P_ + tx] = dU[icomp * compstride + i*P_*P_ + tx]; } } __syncthreads(); - if (tx < P*P) { - for(int i = 0; i < P; i++) { - rU[iDIM][icomp][i] = sTmp[tx*P + i]; + if (tx < P_*P_) { + for(int i = 0; i < P_; i++) { + rU[iDIM][icomp][i] = sTmp[tx*P_ + i]; } } __syncthreads(); @@ -223,17 +183,17 @@ readU_3d(const T* dU, const int compstride, T rU[DIMU][NCOMP][rUsize], T* sTmp, ////////////////////////////////////////////////////////////////////////////////////////// // read V of a 3D element into registers rV[][][] -- for all components of a single dim // dV is assumed to be offset by elem-stride and dim-stride -// register is assumed to be rV[DIMV][NCOMP][rVsize] +// register is assumed to be rV[DIMV][NCOMP_][rVsize] // iDIM specifies which dimension is being read into in rV -// rVsize can be different from P (e.g. MAXPQ) -template +// rVsize can be different from P_ (e.g. MAXP_Q) +template __device__ __inline__ void -readV_3d(const T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int tx) +readV_3d(const T* dV, const int compstride, T rV[DIMV][NCOMP_][rVsize], const int tx) { - if (tx < Q*Q) { - for(int icomp = 0; icomp < NCOMP; icomp++) { - for(int j = 0; j < Q; j++) { - rV[iDIM][icomp][j] = dV[icomp * compstride + j*(Q*Q) + tx]; + if (tx < Q_*Q_) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { + for(int j = 0; j < Q_; j++) { + rV[iDIM][icomp][j] = dV[icomp * compstride + j*(Q_*Q_) + tx]; } } } @@ -242,18 +202,18 @@ readV_3d(const T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int ////////////////////////////////////////////////////////////////////////////////////////// // write V of a 3D element from registers rV[][][] to global memory -- for all components of a single dim // dV is assumed to point directly to the element (i.e. already offset by elem-stride) -// register is assumed to be rV[DIMV][NCOMP][rVsize] +// register is assumed to be rV[DIMV][NCOMP_][rVsize] // iDIM specifies which dimension is being read from in rV // idim specifies which dimension is being written to in dV -// rVsize can be different from P (e.g. MAXPQ) -template +// rVsize can be different from P_ (e.g. MAXP_Q) +template __device__ __inline__ void -writeV_3d(T* dV, const int compstride, T rV[DIMV][NCOMP][rVsize], const int tx) +writeV_3d(T* dV, const int compstride, T rV[DIMV][NCOMP_][rVsize], const int tx) { - if (tx < (Q*Q)) { - for(int icomp = 0; icomp < NCOMP; icomp++) { - for(int j = 0; j < Q; j++) { - dV[icomp * compstride + j*(Q*Q) + tx] = rV[iDIM][icomp][j]; + if (tx < (Q_*Q_)) { + for(int icomp = 0; icomp < NCOMP_; icomp++) { + for(int j = 0; j < Q_; j++) { + dV[icomp * compstride + j*(Q_*Q_) + tx] = rV[iDIM][icomp][j]; } } } @@ -318,7 +278,7 @@ dread_V_gsm2reg( // writes a slice of V from reg to shared/global memory // the correct pointer V must be precomputed template -__device__ __noinline__ void +__device__ __inline__ void dwrite_V_reg2gsm( const int C, const int tx_, CeedScalar rV[J], CeedScalar* V ) @@ -338,7 +298,7 @@ dgemm_slice( { CeedScalar rTmp; for(int j = 0; j < J; j++) { - rTmp = MAGMA_D_ZERO; + rTmp = 0.0; for(int b = 0; b < B; b++){ rTmp += rU[ b ] * sT[ j * B + b ]; } @@ -363,7 +323,7 @@ dgemm_ceed_device( const int tx, const int A, const int C, magma_trans_t transT, dV += slice_id * C * J; // read V if beta is non-zero - if ( beta != MAGMA_D_ZERO ) { + if ( beta != 0.0 ) { dread_V_gsm2reg(C, tx_, (const CeedScalar*)dV, rV); } diff --git a/include/ceed/jit-source/magma/weight-1d.h b/include/ceed/jit-source/magma/weight-1d.h new file mode 100644 index 0000000000..c70ce3cb5e --- /dev/null +++ b/include/ceed/jit-source/magma/weight-1d.h @@ -0,0 +1,54 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +////////////////////////////////////////////////////////////////////////////////////////// +// weight basis action -- 1D +template +__device__ __inline__ void +magma_weight_1d_device(const T* sTweight, T* sV, const int tx) +{ + // Assumptions + // 1. 1D thread configuration of size Q_ + // 2. The output sV is in shared memory -- size 1xQ_ + if (tx < Q_){ + sV[tx] = sTweight[tx]; + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(Q, MAGMA_MAXTHREADS_1D)) __global__ void +magma_weight_1d_kernel(const CeedScalar *dqweight1d, CeedScalar *dV, const int v_stride, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + + if (elem_id >= nelem) return; + + // global memory pointers + dV += elem_id * v_stride; + + // shared memory pointers + CeedScalar* sTweight = (CeedScalar*)shared_data; + CeedScalar* sV = sTweight + Q; + sV += ty * Q; + + // read dqweight_1d + if (ty == 0 && tx < Q) { + sTweight[tx] = dqweight1d[tx]; + } + + __syncthreads(); + magma_weight_1d_device(sTweight, sV, tx); + __syncthreads(); + + // write V + dV[ tx ] = sV[ tx ]; +} + diff --git a/include/ceed/jit-source/magma/weight-2d.h b/include/ceed/jit-source/magma/weight-2d.h new file mode 100644 index 0000000000..8ae8530967 --- /dev/null +++ b/include/ceed/jit-source/magma/weight-2d.h @@ -0,0 +1,63 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +////////////////////////////////////////////////////////////////////////////////////////// +// weight basis action -- 2D +template +__device__ __inline__ void +magma_weight_2d_device(const T* sTweight, T rV[DIM_][NCOMP_][Q_], const int tx) +{ + // Assumptions + // 1. 1D thread configuration of size Q_ + // 2. rV[][][] matches the storage used in other actions (interp, grad, ... etc) + // 3. iDIM and iCOMP specify which indexes to use in rV, + // since the output per thread is a register array of size Q_ + // 4. Sync is recommended after the call (to make sure sTweight can be overwritten) + + if (tx < Q_) { + // x sTweight[j] for first update + // x sTweight[tx] for second update + for(int j = 0; j < Q_; j++) { + rV[iDIM][iCOMP][j] = sTweight[j] * sTweight[tx]; + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(Q, MAGMA_MAXTHREADS_2D)) __global__ void +magma_weight_2d_kernel(const CeedScalar *dqweight1d, CeedScalar *dV, const int v_stride, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + + if (elem_id >= nelem) return; + + CeedScalar rV[1][1][Q]; // allocate with DIM=NCOMP=1, but sizes may differ for a fused operator + // global memory pointers + dV += elem_id * v_stride; + + // shared memory pointers + CeedScalar* sTweight = (CeedScalar*)shared_data; + + // read dqweight_1d + if (ty == 0 && tx < Q) { + sTweight[tx] = dqweight1d[tx]; + } + + __syncthreads(); + magma_weight_2d_device(sTweight, rV, tx); + + // write V + if (tx < Q) { + for(int j = 0; j < Q; j++) { + dV[ j*Q + tx ] = rV[0][0][j]; + } + } +} diff --git a/include/ceed/jit-source/magma/weight-3d.h b/include/ceed/jit-source/magma/weight-3d.h new file mode 100644 index 0000000000..de5a253d90 --- /dev/null +++ b/include/ceed/jit-source/magma/weight-3d.h @@ -0,0 +1,64 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +////////////////////////////////////////////////////////////////////////////////////////// +// weight basis action -- 3D +template +__device__ __inline__ void +magma_weight_3d_device(const T* sTweight, T rV[DIM_][NCOMP_][Q_], const int tx) +{ + // Assumptions + // 1. 1D thread configuration of size Q_^2 + // 2. rV[][][] matches the storage used in other actions (interp, grad, ... etc) + // 3. iDIM and iCOMP specify which indexes to use in rV, + // since the output per thread is a register array of size Q_ + // 4. Sync is recommended after the call (to make sure sTweight can be overwritten) + + if (tx < (Q_*Q_)) { + // x sTweight[j] for first update + // x sTweight[tx%Q_] for second update + // x sTweight[tx/Q_] for third update + for(int j = 0; j < Q_; j++) { + rV[iDIM][iCOMP][j] = sTweight[j] * sTweight[tx%Q_] * sTweight[tx/Q_]; + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(Q*Q, MAGMA_MAXTHREADS_3D)) __global__ void +magma_weight_3d_kernel(const CeedScalar *dqweight1d, CeedScalar *dV, const int v_stride, const int nelem) +{ + MAGMA_DEVICE_SHARED(CeedScalar, shared_data) + + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int elem_id = (blockIdx.x * blockDim.y) + ty; + + if (elem_id >= nelem) return; + + CeedScalar rV[1][1][Q]; // allocate with DIM=NCOMP=1, but sizes may differ for a fused operator + // global memory pointers + dV += elem_id * v_stride; + + // shared memory pointers + CeedScalar* sTweight = (CeedScalar*)shared_data; + + // read dqweight_1d + if (tx < Q) { + sTweight[tx] = dqweight1d[tx]; + } + __syncthreads(); + + magma_weight_3d_device(sTweight, rV, tx); + + // write V + if (tx < (Q*Q)) { + for(int j = 0; j < Q; j++) { + dV[ j*(Q*Q) + tx ] = rV[0][0][j]; + } + } +} diff --git a/include/ceed/jit-tools.h b/include/ceed/jit-tools.h index 90d6a7712a..f951afc440 100644 --- a/include/ceed/jit-tools.h +++ b/include/ceed/jit-tools.h @@ -21,6 +21,7 @@ CEED_EXTERN int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid); CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer); +CEED_EXTERN int CeedLoadSourceToInitializedBuffer(Ceed ceed, const char *source_file_path, char **buffer); CEED_EXTERN int CeedPathConcatenate(Ceed ceed, const char *base_file_path, const char *relative_file_path, char **new_file_path); CEED_EXTERN int CeedGetJitRelativePath(const char *absolute_file_path, diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c index e994aee245..61070a96c0 100644 --- a/interface/ceed-jit-tools.c +++ b/interface/ceed-jit-tools.c @@ -67,7 +67,7 @@ int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) { } /** - @brief Load source file into initalized string buffer, including full text + @brief Load source file into initialized string buffer, including full text of local files in place of `#include "local.h"` @param ceed A Ceed object for error handling @@ -78,8 +78,8 @@ int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) { @ref Backend **/ -static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed, - const char *source_file_path, char **buffer) { +int CeedLoadSourceToInitializedBuffer(Ceed ceed, + const char *source_file_path, char **buffer) { int ierr; FILE *source_file; long file_size, file_offset = 0; @@ -154,7 +154,7 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed, include_file_name_len); memcpy(&include_source_path[root_length + include_file_name_len + 1], "", 1); // ---- Recursive call to load source to buffer - ierr = CeedLoadSourceToInitalizedBuffer(ceed, include_source_path, buffer); + ierr = CeedLoadSourceToInitializedBuffer(ceed, include_source_path, buffer); CeedDebug256(ceed, 2, "JiT Including: %s\n", include_source_path); CeedChk(ierr); ierr = CeedFree(&include_source_path); CeedChk(ierr); @@ -206,7 +206,7 @@ int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, ierr = CeedCalloc(1, buffer); CeedChk(ierr); // Load to initalized buffer - ierr = CeedLoadSourceToInitalizedBuffer(ceed, source_file_path, buffer); + ierr = CeedLoadSourceToInitializedBuffer(ceed, source_file_path, buffer); CeedChk(ierr); return CEED_ERROR_SUCCESS; From 12070e41bb7dc43147375364d72777d29947e4b5 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Thu, 26 May 2022 15:58:34 -0600 Subject: [PATCH 065/172] Remove restrictions on tests for MAGMA backend when using tensor basis kernels --- tests/junit.py | 6 +++--- tests/tap.sh | 21 ++------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/tests/junit.py b/tests/junit.py index 4278d81d35..746781c670 100755 --- a/tests/junit.py +++ b/tests/junit.py @@ -55,12 +55,12 @@ def contains_any(resource, substrings): def skip_rule(test, resource): return any(( - test.startswith('fluids-') and contains_any(resource, ['occa', 'magma']), + test.startswith('fluids-') and contains_any(resource, ['occa']), test.startswith('solids-') and contains_any(resource, ['occa']), test.startswith('nek') and contains_any(resource, ['occa']), test.startswith('t507') and contains_any(resource, ['occa']), - test.startswith('t318') and contains_any(resource, ['magma', '/gpu/cuda/ref']), - test.startswith('t506') and contains_any(resource, ['magma', '/gpu/cuda/shared']), + test.startswith('t318') and contains_any(resource, ['/gpu/cuda/ref']), + test.startswith('t506') and contains_any(resource, ['/gpu/cuda/shared']), )) def run(test, backends): diff --git a/tests/tap.sh b/tests/tap.sh index e2ef93b61b..b0c49be14e 100755 --- a/tests/tap.sh +++ b/tests/tap.sh @@ -99,14 +99,6 @@ for ((i=0;i<${#backends[@]};++i)); do continue; fi - # Navier-Stokes test problem has too many components for MAGMA backends - if [[ "$backend" = *magma* && ( "$1" = fluids-* ) ]]; then - printf "ok $i0 # SKIP - backend basis kernel not available $backend\n" - printf "ok $i1 # SKIP - backend basis kernel not available $backend stdout\n" - printf "ok $i2 # SKIP - backend basis kernel not available $backend stderr\n" - continue; - fi - # Run in subshell (build/$1 ${args/\{ceed_resource\}/$backend} || false) > ${output}.out 2> ${output}.err status=$? @@ -174,8 +166,8 @@ for ((i=0;i<${#backends[@]};++i)); do continue fi - # grep to skip t318 for cuda/ref and MAGMA, Q is too large for these backends - if [[ "$backend" = *magma* || "$backend" = *cuda/ref ]] \ + # grep to skip t318 for cuda/ref, Q is too large + if [[ "$backend" = *cuda/ref ]] \ && [[ "$1" = t318* ]] ; then printf "ok $i0 # SKIP - backend basis kernel not available $1 $backend\n" printf "ok $i1 # SKIP - backend basis kernel not available $1 $backend stdout\n" @@ -183,15 +175,6 @@ for ((i=0;i<${#backends[@]};++i)); do continue fi - # grep to skip t506 for MAGMA, range of basis kernels limited for now - if [[ "$backend" = *magma* ]] \ - && [[ "$1" = t506* ]] ; then - printf "ok $i0 # SKIP - backend basis kernel not available $1 $backend\n" - printf "ok $i1 # SKIP - backend basis kernel not available $1 $backend stdout\n" - printf "ok $i2 # SKIP - backend basis kernel not available $1 $backend stderr\n" - continue - fi - # grep to pass test t541 for single precision if grep -F -q -e 'Test not implemented in single precision' ${output}.err \ && [[ "$1" = "t541"* ]] ; then From e5f091eb2082fd2e5a436aed5b3c40dee25ac3c3 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Wed, 8 Jun 2022 17:23:34 -0600 Subject: [PATCH 066/172] MAGMA: Use more specific macro name for HIP mode --- Makefile | 6 +++--- backends/magma/ceed-magma-basis.c | 4 ++-- backends/magma/ceed-magma-det.c | 6 +++--- backends/magma/ceed-magma-restriction.c | 4 ++-- backends/magma/ceed-magma.c | 6 +++--- backends/magma/ceed-magma.h | 2 +- backends/magma/magma_dgemm_nontensor.c | 2 +- backends/magma/magma_sgemm_nontensor.c | 2 +- include/ceed/jit-source/magma/magma_common_device.h | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 54c997698c..4180414352 100644 --- a/Makefile +++ b/Makefile @@ -443,11 +443,11 @@ ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),) libceed.c += $(magma.c) libceed.hip += $(magma.hip) ifneq ($(CXX), $(HIPCC)) - $(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : CPPFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DHAVE_HIP -DADD_ + $(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : CPPFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_ else - $(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DHAVE_HIP -DADD_ + $(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_ endif - $(magma.hip:%.hip.cpp=$(OBJDIR)/%.o) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(MAGMA_DIR)/magmablas -I$(HIP_DIR)/include -DHAVE_HIP -DADD_ + $(magma.hip:%.hip.cpp=$(OBJDIR)/%.o) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(MAGMA_DIR)/magmablas -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_ MAGMA_BACKENDS = /gpu/hip/magma /gpu/hip/magma/det endif endif diff --git a/backends/magma/ceed-magma-basis.c b/backends/magma/ceed-magma-basis.c index 7c4967435a..d277e0caf0 100644 --- a/backends/magma/ceed-magma-basis.c +++ b/backends/magma/ceed-magma-basis.c @@ -10,7 +10,7 @@ #include #include #include "ceed-magma.h" -#ifdef HAVE_HIP +#ifdef CEED_MAGMA_USE_HIP #include "../hip/ceed-hip-common.h" #include "../hip/ceed-hip-compile.h" #else @@ -569,7 +569,7 @@ int CeedBasisDestroy_Magma(CeedBasis basis) { ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr); Ceed ceed; ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP ierr = hipModuleUnload(impl->module); CeedChk_Hip(ceed, ierr); #else ierr = cuModuleUnload(impl->module); CeedChk_Cu(ceed, ierr); diff --git a/backends/magma/ceed-magma-det.c b/backends/magma/ceed-magma-det.c index f0d918c12f..27d2a42e33 100644 --- a/backends/magma/ceed-magma-det.c +++ b/backends/magma/ceed-magma-det.c @@ -42,7 +42,7 @@ CEED_INTERN int CeedInit_Magma_Det(const char *resource, Ceed ceed) { // Create reference CEED that implementation will be dispatched // through unless overridden Ceed ceedref; - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP CeedInit("/gpu/hip/magma", &ceedref); #else CeedInit("/gpu/cuda/magma", &ceedref); @@ -51,7 +51,7 @@ CEED_INTERN int CeedInit_Magma_Det(const char *resource, Ceed ceed) { // Create reference CEED for restriction Ceed restrictionceedref; - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP CeedInit("/gpu/hip/ref", &restrictionceedref); #else CeedInit("/gpu/cuda/ref", &restrictionceedref); @@ -63,7 +63,7 @@ CEED_INTERN int CeedInit_Magma_Det(const char *resource, Ceed ceed) { } CEED_INTERN int CeedRegister_Magma_Det(void) { - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP return CeedRegister("/gpu/hip/magma/det", CeedInit_Magma_Det, 125); #else return CeedRegister("/gpu/cuda/magma/det", CeedInit_Magma_Det, 125); diff --git a/backends/magma/ceed-magma-restriction.c b/backends/magma/ceed-magma-restriction.c index ac8a42be8f..182da59e30 100644 --- a/backends/magma/ceed-magma-restriction.c +++ b/backends/magma/ceed-magma-restriction.c @@ -10,7 +10,7 @@ #include #include #include "ceed-magma.h" -#ifdef HAVE_HIP +#ifdef CEED_MAGMA_USE_HIP #include "../hip/ceed-hip-common.h" #include "../hip/ceed-hip-compile.h" #else @@ -168,7 +168,7 @@ static int CeedElemRestrictionDestroy_Magma(CeedElemRestriction r) { } Ceed ceed; ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP ierr = hipModuleUnload(impl->module); CeedChk_Hip(ceed, ierr); #else ierr = cuModuleUnload(impl->module); CeedChk_Cu(ceed, ierr); diff --git a/backends/magma/ceed-magma.c b/backends/magma/ceed-magma.c index e636a0a89c..5625235ad6 100644 --- a/backends/magma/ceed-magma.c +++ b/backends/magma/ceed-magma.c @@ -55,7 +55,7 @@ static int CeedInit_Magma(const char *resource, Ceed ceed) { } // create a queue that uses the null stream data->device = currentDeviceID; - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP magma_queue_create_from_hip(data->device, NULL, NULL, NULL, &(data->queue)); #else magma_queue_create_from_cuda(data->device, NULL, NULL, NULL, &(data->queue)); @@ -64,7 +64,7 @@ static int CeedInit_Magma(const char *resource, Ceed ceed) { // Create reference CEED that implementation will be dispatched // through unless overridden Ceed ceedref; - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP CeedInit("/gpu/hip/ref", &ceedref); #else CeedInit("/gpu/cuda/ref", &ceedref); @@ -86,7 +86,7 @@ static int CeedInit_Magma(const char *resource, Ceed ceed) { } CEED_INTERN int CeedRegister_Magma(void) { - #ifdef HAVE_HIP + #ifdef CEED_MAGMA_USE_HIP return CeedRegister("/gpu/hip/magma", CeedInit_Magma, 120); #else return CeedRegister("/gpu/cuda/magma", CeedInit_Magma, 120); diff --git a/backends/magma/ceed-magma.h b/backends/magma/ceed-magma.h index 2545eb3b05..b76b714879 100644 --- a/backends/magma/ceed-magma.h +++ b/backends/magma/ceed-magma.h @@ -23,7 +23,7 @@ // for use with __launch_bounds__() #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) -#ifdef HAVE_HIP +#ifdef CEED_MAGMA_USE_HIP #define MAGMA_RTC_MODULE hipModule_t #define MAGMA_RTC_FUNCTION hipFunction_t #define MAGMA_RTC_COMPILE CeedCompileHip diff --git a/backends/magma/magma_dgemm_nontensor.c b/backends/magma/magma_dgemm_nontensor.c index 361be354b5..636c7774c0 100644 --- a/backends/magma/magma_dgemm_nontensor.c +++ b/backends/magma/magma_dgemm_nontensor.c @@ -7,7 +7,7 @@ #include "ceed-magma.h" -#ifdef HAVE_HIP +#ifdef CEED_MAGMA_USE_HIP // TODO: Tune for HIP int magma_dgemm_nontensor( diff --git a/backends/magma/magma_sgemm_nontensor.c b/backends/magma/magma_sgemm_nontensor.c index cc3316d462..d5954c8e1e 100644 --- a/backends/magma/magma_sgemm_nontensor.c +++ b/backends/magma/magma_sgemm_nontensor.c @@ -7,7 +7,7 @@ #include "ceed-magma.h" -#ifdef HAVE_HIP +#ifdef CEED_MAGMA_USE_HIP // TODO: Tune for HIP int magma_sgemm_nontensor( diff --git a/include/ceed/jit-source/magma/magma_common_device.h b/include/ceed/jit-source/magma/magma_common_device.h index 606acaf647..69ac89a059 100644 --- a/include/ceed/jit-source/magma/magma_common_device.h +++ b/include/ceed/jit-source/magma/magma_common_device.h @@ -8,7 +8,7 @@ #ifndef CEED_MAGMA_COMMON_DEVICE_H #define CEED_MAGMA_COMMON_DEVICE_H -#ifdef HAVE_HIP +#ifdef CEED_MAGMA_USE_HIP #define MAGMA_DEVICE_SHARED(type, name) HIP_DYNAMIC_SHARED(type, name) #else #define MAGMA_DEVICE_SHARED(type, name) extern __shared__ type name[]; From b3271f7369247419c8d73a07979b41a9f10d823f Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Mon, 20 Jun 2022 12:52:50 -0400 Subject: [PATCH 067/172] Add MAGMA RTC usage to release notes --- doc/sphinx/source/releasenotes.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1c0b7a93f0..c774e3b080 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -18,6 +18,11 @@ On this page we provide a summary of the main API changes, new features and exam - Added {c:func}`CeedQFunctionSetUserFlopsEstimate` and {c:func}`CeedOperatorGetFlopsEstimate` to facilitate estimating FLOPs in operator application. +### New features + +- Switched MAGMA backends to use runtime compilation for tensor basis kernels (and element restriction kernels, in non-deterministic `/gpu/*/magma` backends). +This reduces time to compile the library and increases the range of parameters for which the MAGMA tensor basis kernels will work. + ### Bugfix - Install JiT source files in install directory to fix GPU functionality for installed libCEED. From 3b190ab81810e736b017918967e14bef9d9a95be Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 09:24:11 -0600 Subject: [PATCH 068/172] ctx - shift QFContextDestroyData to interface only --- include/ceed/backend.h | 1 - interface/ceed-qfunctioncontext.c | 68 +++++++++++++++---------------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/include/ceed/backend.h b/include/ceed/backend.h index d22f5ccf59..7dbfc112ca 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -262,7 +262,6 @@ CEED_EXTERN int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx, CeedContextFieldLabel field_label, int *values); CEED_EXTERN int CeedQFunctionContextGetDataDestroy(CeedQFunctionContext ctx, CeedMemType *f_mem_type, CeedQFunctionContextDataDestroyUser *f); -CEED_EXTERN int CeedQFunctionContextDestroyData(CeedQFunctionContext ctx); CEED_EXTERN int CeedQFunctionContextReference(CeedQFunctionContext ctx); CEED_EXTERN int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data); diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c index 49cfb67e73..29bf20b957 100644 --- a/interface/ceed-qfunctioncontext.c +++ b/interface/ceed-qfunctioncontext.c @@ -100,6 +100,40 @@ int CeedQFunctionContextRegisterGeneric(CeedQFunctionContext ctx, return CEED_ERROR_SUCCESS; } +/** + @brief Destroy user data held by CeedQFunctionContext, using function set by + CeedQFunctionContextSetDataDestroy, if applicable + + @param[in,out] ctx CeedQFunctionContext to destroy user data + + @return An error code: 0 - success, otherwise - failure + + @ref Developer +**/ +static int CeedQFunctionContextDestroyData(CeedQFunctionContext ctx) { + int ierr; + + if (ctx->DataDestroy) { + ierr = ctx->DataDestroy(ctx); CeedChk(ierr); + } else { + CeedQFunctionContextDataDestroyUser data_destroy_function; + CeedMemType data_destroy_mem_type; + + ierr = CeedQFunctionContextGetDataDestroy(ctx, &data_destroy_mem_type, + &data_destroy_function); CeedChk(ierr); + if (data_destroy_function) { + void *data; + + ierr = CeedQFunctionContextGetData(ctx, data_destroy_mem_type, &data); + CeedChk(ierr); + ierr = data_destroy_function(data); CeedChk(ierr); + ierr = CeedQFunctionContextRestoreData(ctx, &data); CeedChk(ierr); + } + } + + return CEED_ERROR_SUCCESS; +} + /// @} /// ---------------------------------------------------------------------------- @@ -361,40 +395,6 @@ int CeedQFunctionContextGetDataDestroy(CeedQFunctionContext ctx, return CEED_ERROR_SUCCESS; } -/** - @brief Destroy user data held by CeedQFunctionContext, using function set by - CeedQFunctionContextSetDataDestroy, if applicable - - @param[in,out] ctx CeedQFunctionContext to destroy user data - - @return An error code: 0 - success, otherwise - failure - - @ref Backend -**/ -int CeedQFunctionContextDestroyData(CeedQFunctionContext ctx) { - int ierr; - - if (ctx->DataDestroy) { - ierr = ctx->DataDestroy(ctx); CeedChk(ierr); - } else { - CeedQFunctionContextDataDestroyUser data_destroy_function; - CeedMemType data_destroy_mem_type; - - ierr = CeedQFunctionContextGetDataDestroy(ctx, &data_destroy_mem_type, - &data_destroy_function); CeedChk(ierr); - if (data_destroy_function) { - void *data; - - ierr = CeedQFunctionContextGetData(ctx, data_destroy_mem_type, &data); - CeedChk(ierr); - ierr = data_destroy_function(data); CeedChk(ierr); - ierr = CeedQFunctionContextRestoreData(ctx, &data); CeedChk(ierr); - } - } - - return CEED_ERROR_SUCCESS; -} - /** @brief Increment the reference counter for a CeedQFunctionContext From b11824b355ec5db8d1d0662d2c2bd260606aac4b Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 09:58:40 -0600 Subject: [PATCH 069/172] gpu - fix setting device id --- backends/cuda-gen/ceed-cuda-gen.c | 7 ++++++- backends/cuda-ref/ceed-cuda-ref.c | 8 ++++++-- backends/cuda-shared/ceed-cuda-shared.c | 21 ++++++++++++--------- backends/cuda/ceed-cuda-common.c | 17 +++++++++++++++++ backends/cuda/ceed-cuda-common.h | 3 +++ backends/hip-gen/ceed-hip-gen.c | 6 +++++- backends/hip-ref/ceed-hip-ref.c | 8 ++++++-- backends/hip-shared/ceed-hip-shared.c | 6 +++++- backends/hip/ceed-hip-common.c | 17 +++++++++++++++++ backends/hip/ceed-hip-common.h | 3 +++ doc/sphinx/source/releasenotes.md | 4 ++++ 11 files changed, 84 insertions(+), 16 deletions(-) diff --git a/backends/cuda-gen/ceed-cuda-gen.c b/backends/cuda-gen/ceed-cuda-gen.c index 96ce2d36d4..6038602879 100644 --- a/backends/cuda-gen/ceed-cuda-gen.c +++ b/backends/cuda-gen/ceed-cuda-gen.c @@ -16,11 +16,16 @@ static int CeedInit_Cuda_gen(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/cuda") && strcmp(resource, "/gpu/cuda/gen")) + char *resource_root; + ierr = CeedCudaGetResourceRoot(ceed, resource, &resource_root); + CeedChkBackend(ierr); + if (strcmp(resource_root, "/gpu/cuda") + && strcmp(resource_root, "/gpu/cuda/gen")) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Cuda backend cannot use resource: %s", resource); // LCOV_EXCL_STOP + ierr = CeedFree(&resource_root); CeedChkBackend(ierr); Ceed_Cuda *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); diff --git a/backends/cuda-ref/ceed-cuda-ref.c b/backends/cuda-ref/ceed-cuda-ref.c index ace2d8b91d..3ba040238d 100644 --- a/backends/cuda-ref/ceed-cuda-ref.c +++ b/backends/cuda-ref/ceed-cuda-ref.c @@ -42,12 +42,16 @@ int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) { static int CeedInit_Cuda(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/cuda/ref")) + char *resource_root; + ierr = CeedCudaGetResourceRoot(ceed, resource, &resource_root); + CeedChkBackend(ierr); + if (strcmp(resource_root, "/gpu/cuda/ref")) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Cuda backend cannot use resource: %s", resource); // LCOV_EXCL_STOP - ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); + ierr = CeedFree(&resource_root); CeedChkBackend(ierr); + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); Ceed_Cuda *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); diff --git a/backends/cuda-shared/ceed-cuda-shared.c b/backends/cuda-shared/ceed-cuda-shared.c index 65db70a3cc..a606f2c495 100644 --- a/backends/cuda-shared/ceed-cuda-shared.c +++ b/backends/cuda-shared/ceed-cuda-shared.c @@ -16,28 +16,31 @@ static int CeedInit_Cuda_shared(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/cuda/shared")) + char *resource_root; + ierr = CeedCudaGetResourceRoot(ceed, resource, &resource_root); + CeedChkBackend(ierr); + if (strcmp(resource_root, "/gpu/cuda/shared")) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Cuda backend cannot use resource: %s", resource); // LCOV_EXCL_STOP - ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); Ceed_Cuda *data; - ierr = CeedCalloc(1, &data); CeedChk(ierr); - ierr = CeedSetData(ceed, data); CeedChk(ierr); - ierr = CeedCudaInit(ceed, resource); CeedChk(ierr); + ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); + ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); + ierr = CeedCudaInit(ceed, resource); CeedChkBackend(ierr); Ceed ceed_ref; CeedInit("/gpu/cuda/ref", &ceed_ref); - ierr = CeedSetDelegate(ceed, ceed_ref); CeedChk(ierr); + ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1", CeedBasisCreateTensorH1_Cuda_shared); - CeedChk(ierr); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", - CeedDestroy_Cuda); CeedChk(ierr); - CeedChk(ierr); + CeedDestroy_Cuda); CeedChkBackend(ierr); + CeedChkBackend(ierr); return 0; } diff --git a/backends/cuda/ceed-cuda-common.c b/backends/cuda/ceed-cuda-common.c index e4317b9983..e38aee07f5 100644 --- a/backends/cuda/ceed-cuda-common.c +++ b/backends/cuda/ceed-cuda-common.c @@ -8,6 +8,23 @@ #include #include "ceed-cuda-common.h" +//------------------------------------------------------------------------------ +// Get root resource without device spec +//------------------------------------------------------------------------------ +int CeedCudaGetResourceRoot(Ceed ceed, const char *resource, + char **resource_root) { + int ierr; + + char *device_spec = strstr(resource, ":device_id="); + size_t resource_root_len = device_spec + ? (size_t)(device_spec - resource) + 1 + : strlen(resource) + 1; + ierr = CeedCalloc(resource_root_len, resource_root); CeedChkBackend(ierr); + memcpy(*resource_root, resource, resource_root_len - 1); + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // Device information backend init //------------------------------------------------------------------------------ diff --git a/backends/cuda/ceed-cuda-common.h b/backends/cuda/ceed-cuda-common.h index 6758fe626e..98d47b55fc 100644 --- a/backends/cuda/ceed-cuda-common.h +++ b/backends/cuda/ceed-cuda-common.h @@ -58,6 +58,9 @@ typedef struct { struct cudaDeviceProp device_prop; } Ceed_Cuda; +CEED_INTERN int CeedCudaGetResourceRoot(Ceed ceed, const char *resource, + char **resource_root); + CEED_INTERN int CeedCudaInit(Ceed ceed, const char *resource); CEED_INTERN int CeedDestroy_Cuda(Ceed ceed); diff --git a/backends/hip-gen/ceed-hip-gen.c b/backends/hip-gen/ceed-hip-gen.c index b9f6d9e29b..2811dc506b 100644 --- a/backends/hip-gen/ceed-hip-gen.c +++ b/backends/hip-gen/ceed-hip-gen.c @@ -16,11 +16,15 @@ static int CeedInit_Hip_gen(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/hip") && strcmp(resource, "/gpu/hip/gen")) + char *resource_root; + ierr = CeedHipGetResourceRoot(ceed, resource, &resource_root); + CeedChkBackend(ierr); + if (strcmp(resource_root, "/gpu/hip") && strcmp(resource_root, "/gpu/hip/gen")) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Hip backend cannot use resource: %s", resource); // LCOV_EXCL_STOP + ierr = CeedFree(&resource_root); CeedChkBackend(ierr); Ceed_Hip *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); diff --git a/backends/hip-ref/ceed-hip-ref.c b/backends/hip-ref/ceed-hip-ref.c index d32d556b9a..7204ebe942 100644 --- a/backends/hip-ref/ceed-hip-ref.c +++ b/backends/hip-ref/ceed-hip-ref.c @@ -40,12 +40,16 @@ int CeedHipGetHipblasHandle(Ceed ceed, hipblasHandle_t *handle) { static int CeedInit_Hip(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/hip/ref")) + char *resource_root; + ierr = CeedHipGetResourceRoot(ceed, resource, &resource_root); + CeedChkBackend(ierr); + if (strcmp(resource_root, "/gpu/hip/ref")) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Hip backend cannot use resource: %s", resource); // LCOV_EXCL_STOP - ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); + ierr = CeedFree(&resource_root); CeedChkBackend(ierr); + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); Ceed_Hip *data; ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); diff --git a/backends/hip-shared/ceed-hip-shared.c b/backends/hip-shared/ceed-hip-shared.c index e8af825d0e..2925b1bf5c 100644 --- a/backends/hip-shared/ceed-hip-shared.c +++ b/backends/hip-shared/ceed-hip-shared.c @@ -17,11 +17,15 @@ static int CeedInit_Hip_shared(const char *resource, Ceed ceed) { int ierr; - if (strcmp(resource, "/gpu/hip/shared")) + char *resource_root; + ierr = CeedHipGetResourceRoot(ceed, resource, &resource_root); + CeedChkBackend(ierr); + if (strcmp(resource_root, "/gpu/hip/shared")) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, "Hip backend cannot use resource: %s", resource); // LCOV_EXCL_STOP + ierr = CeedFree(&resource_root); CeedChkBackend(ierr); ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); Ceed_Hip *data; diff --git a/backends/hip/ceed-hip-common.c b/backends/hip/ceed-hip-common.c index b58fda870d..221198b9da 100644 --- a/backends/hip/ceed-hip-common.c +++ b/backends/hip/ceed-hip-common.c @@ -11,6 +11,23 @@ #include #include "ceed-hip-common.h" +//------------------------------------------------------------------------------ +// Get root resource without device spec +//------------------------------------------------------------------------------ +int CeedHipGetResourceRoot(Ceed ceed, const char *resource, + char **resource_root) { + int ierr; + + char *device_spec = strstr(resource, ":device_id="); + size_t resource_root_len = device_spec + ? (size_t)(device_spec - resource) + 1 + : strlen(resource) + 1; + ierr = CeedCalloc(resource_root_len, resource_root); CeedChkBackend(ierr); + memcpy(*resource_root, resource, resource_root_len - 1); + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // Device information backend init //------------------------------------------------------------------------------ diff --git a/backends/hip/ceed-hip-common.h b/backends/hip/ceed-hip-common.h index 098e610641..b624ac8bed 100644 --- a/backends/hip/ceed-hip-common.h +++ b/backends/hip/ceed-hip-common.h @@ -56,6 +56,9 @@ typedef struct { hipblasHandle_t hipblas_handle; } Ceed_Hip; +CEED_INTERN int CeedHipGetResourceRoot(Ceed ceed, const char *resource, + char **resource_root); + CEED_INTERN int CeedHipInit(Ceed ceed, const char *resource); CEED_INTERN int CeedDestroy_Hip(Ceed ceed); diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 58a9a17576..328c725a7f 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -7,6 +7,10 @@ for each release of libCEED. ## Current `main` branch +### Bugfix + +- Fix bug in setting device id for GPU backends. + ### Interface changes (v0-10)= From 7b39487de4ab72ae5651458a5c845e3d06436385 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 11:24:17 -0600 Subject: [PATCH 070/172] fluids - add preloading (#997) * fluids - add preloading * fluids - fix accidentally dropped line Co-authored-by: James Wright * fluids - get wallclock from stage log, FRAGILE Co-authored-by: James Wright --- examples/fluids/src/setupts.c | 50 +++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 2feb10c45b..88078448ba 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -481,19 +481,47 @@ PetscErrorCode TSSolve_NS(DM dm, User user, AppCtx app_ctx, Physics phys, } // Solve - double start, cpu_time_used; - start = MPI_Wtime(); - ierr = PetscBarrier((PetscObject) *ts); CHKERRQ(ierr); - ierr = TSSolve(*ts, *Q); CHKERRQ(ierr); - cpu_time_used = MPI_Wtime() - start; - ierr = TSGetSolveTime(*ts, &final_time); CHKERRQ(ierr); + PetscScalar start_time; + ierr = TSGetTime(*ts, &start_time); CHKERRQ(ierr); + + PetscPreLoadBegin(PETSC_FALSE, "Fluids Solve"); + PetscCall(TSSetTime(*ts, start_time)); + PetscCall(TSSetStepNumber(*ts, 0)); + if (PetscPreLoadingOn) { + // LCOV_EXCL_START + SNES snes; + Vec Q_preload; + PetscReal rtol; + PetscCall(VecDuplicate(*Q, &Q_preload)); + PetscCall(VecCopy(*Q, Q_preload)); + PetscCall(TSGetSNES(*ts, &snes)); + PetscCall(SNESGetTolerances(snes, NULL, &rtol, NULL, NULL, NULL)); + PetscCall(SNESSetTolerances(snes, PETSC_DEFAULT, .99, PETSC_DEFAULT, + PETSC_DEFAULT, PETSC_DEFAULT)); + PetscCall(TSSetSolution(*ts, *Q)); + PetscCall(TSStep(*ts)); + PetscCall(SNESSetTolerances(snes, PETSC_DEFAULT, rtol, PETSC_DEFAULT, + PETSC_DEFAULT, PETSC_DEFAULT)); + PetscCall(VecDestroy(&Q_preload)); + // LCOV_EXCL_STOP + } else { + ierr = PetscBarrier((PetscObject) *ts); CHKERRQ(ierr); + ierr = TSSolve(*ts, *Q); CHKERRQ(ierr); + } + PetscPreLoadEnd(); + + PetscCall(TSGetSolveTime(*ts, &final_time)); *f_time = final_time; - ierr = MPI_Allreduce(MPI_IN_PLACE, &cpu_time_used, 1, MPI_DOUBLE, MPI_MIN, - comm); CHKERRQ(ierr); + if (!app_ctx->test_mode) { - ierr = PetscPrintf(PETSC_COMM_WORLD, - "Time taken for solution (sec): %g\n", - (double)cpu_time_used); CHKERRQ(ierr); + PetscLogEvent stage_id; + PetscStageLog stage_log; + + PetscCall(PetscLogStageGetId("Fluids Solve", &stage_id)); + PetscCall(PetscLogGetStageLog(&stage_log)); + PetscCall(PetscPrintf(PETSC_COMM_WORLD, + "Time taken for solution (sec): %g\n", + stage_log->stageInfo[stage_id].perfInfo.time)); } PetscFunctionReturn(0); } From e8b03feea03d6d78c8ccc26338cef4a425955b6d Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 3 Jun 2022 14:18:01 -0600 Subject: [PATCH 071/172] examples/fluids: Pass solution gradient to boundary QFs --- examples/fluids/qfunctions/advection.h | 4 ++-- examples/fluids/qfunctions/advection2d.h | 4 ++-- examples/fluids/qfunctions/blasius.h | 8 ++++---- examples/fluids/qfunctions/channel.h | 7 ++++--- examples/fluids/qfunctions/eulervortex.h | 6 +++--- examples/fluids/qfunctions/stg_shur14.h | 6 +++--- examples/fluids/src/setuplibceed.c | 8 ++++++++ 7 files changed, 26 insertions(+), 17 deletions(-) diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h index 06560167e0..293547c384 100644 --- a/examples/fluids/qfunctions/advection.h +++ b/examples/fluids/qfunctions/advection.h @@ -464,8 +464,8 @@ CEED_QFUNCTION(Advection_InOutFlow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h index 40867fb82b..9e0f9d7259 100644 --- a/examples/fluids/qfunctions/advection2d.h +++ b/examples/fluids/qfunctions/advection2d.h @@ -413,8 +413,8 @@ CEED_QFUNCTION(Advection2d_InOutFlow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 677d9e1e75..b929ee52c5 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -171,8 +171,8 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; @@ -340,8 +340,8 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1]; diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 3d8d60e948..a44e5fd00e 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -94,8 +94,8 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; @@ -180,7 +180,8 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h index ebe425d05d..dc82e29915 100644 --- a/examples/fluids/qfunctions/eulervortex.h +++ b/examples/fluids/qfunctions/eulervortex.h @@ -654,7 +654,7 @@ CEED_QFUNCTION(TravelingVortex_Inflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* @@ -738,8 +738,8 @@ CEED_QFUNCTION(Euler_Outflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 0f4b4e9e3a..ad0bbf11e3 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -188,8 +188,8 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, //*INDENT-OFF* const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2], + (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[3]; CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; @@ -289,7 +289,7 @@ CEED_QFUNCTION(STGShur14_Inflow_Strong)(void *ctx, CeedInt Q, //*INDENT-OFF* const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2]; CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 48e4f026f7..7e4822047b 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -141,6 +141,8 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, &op_apply_inflow); CeedOperatorSetField(op_apply_inflow, "q", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_inflow, "Grad_q", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_apply_inflow, "surface qdata", elem_restr_qd_i_sur, CEED_BASIS_COLLOCATED, q_data_sur); CeedOperatorSetField(op_apply_inflow, "x", elem_restr_x_sur, @@ -227,6 +229,8 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, &op_apply_outflow); CeedOperatorSetField(op_apply_outflow, "q", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_outflow, "Grad_q", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_apply_outflow, "surface qdata", elem_restr_qd_i_sur, CEED_BASIS_COLLOCATED, q_data_sur); CeedOperatorSetField(op_apply_outflow, "x", elem_restr_x_sur, @@ -584,6 +588,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "q", num_comp_q, CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "Grad_q", num_comp_q*(dim-1), + CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "surface qdata", q_data_size_sur, CEED_EVAL_NONE); CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "x", num_comp_x, @@ -617,6 +623,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionContextDestroy(&problem->apply_outflow.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "q", num_comp_q, CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "Grad_q", num_comp_q*(dim-1), + CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "surface qdata", q_data_size_sur, CEED_EVAL_NONE); CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "x", num_comp_x, From c6e8c5707530fe7560ff4e372ff2ebcb18e91603 Mon Sep 17 00:00:00 2001 From: James Wright Date: Sun, 5 Jun 2022 15:00:42 -0600 Subject: [PATCH 072/172] examples/fluids: Abstract State and helper funcs --- examples/fluids/qfunctions/newtonian.h | 153 +---------------- examples/fluids/qfunctions/newtonian_state.h | 172 +++++++++++++++++++ 2 files changed, 173 insertions(+), 152 deletions(-) create mode 100644 examples/fluids/qfunctions/newtonian_state.h diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 7a5dcb63fc..f93450bfa1 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -15,163 +15,12 @@ #include #include #include "newtonian_types.h" +#include "newtonian_state.h" #ifndef M_PI #define M_PI 3.14159265358979323846 #endif -typedef struct { - CeedScalar pressure; - CeedScalar velocity[3]; - CeedScalar temperature; -} StatePrimitive; - -typedef struct { - CeedScalar density; - CeedScalar momentum[3]; - CeedScalar E_total; -} StateConservative; - -typedef struct { - StateConservative U; - StatePrimitive Y; -} State; - -CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative( - NewtonianIdealGasContext gas, StateConservative U, const CeedScalar x[3]) { - StatePrimitive Y; - for (int i=0; i<3; i++) Y.velocity[i] = U.momentum[i] / U.density; - CeedScalar e_kinetic = .5 * Dot3(Y.velocity, Y.velocity); - CeedScalar e_potential = -Dot3(gas->g, x); - CeedScalar e_total = U.E_total / U.density; - CeedScalar e_internal = e_total - e_kinetic - e_potential; - Y.temperature = e_internal / gas->cv; - Y.pressure = (gas->cp / gas->cv - 1) * U.density * e_internal; - return Y; -} - -CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative_fwd( - NewtonianIdealGasContext gas, State s, StateConservative dU, - const CeedScalar x[3], const CeedScalar dx[3]) { - StatePrimitive dY; - for (int i=0; i<3; i++) { - dY.velocity[i] = (dU.momentum[i] - s.Y.velocity[i] * dU.density) / s.U.density; - } - CeedScalar e_kinetic = .5 * Dot3(s.Y.velocity, s.Y.velocity); - CeedScalar de_kinetic = Dot3(dY.velocity, s.Y.velocity); - CeedScalar e_potential = -Dot3(gas->g, x); - CeedScalar de_potential = -Dot3(gas->g, dx); - CeedScalar e_total = s.U.E_total / s.U.density; - CeedScalar de_total = (dU.E_total - e_total * dU.density) / s.U.density; - CeedScalar e_internal = e_total - e_kinetic - e_potential; - CeedScalar de_internal = de_total - de_kinetic - de_potential; - dY.temperature = de_internal / gas->cv; - dY.pressure = (gas->cp / gas->cv - 1) - * (dU.density * e_internal + s.U.density * de_internal); - return dY; -} - -CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas, - const CeedScalar U[5], const CeedScalar x[3]) { - State s; - s.U.density = U[0]; - s.U.momentum[0] = U[1]; - s.U.momentum[1] = U[2]; - s.U.momentum[2] = U[3]; - s.U.E_total = U[4]; - s.Y = StatePrimitiveFromConservative(gas, s.U, x); - return s; -} - -CEED_QFUNCTION_HELPER State StateFromU_fwd(NewtonianIdealGasContext gas, - State s, const CeedScalar dU[5], - const CeedScalar x[3], const CeedScalar dx[3]) { - State ds; - ds.U.density = dU[0]; - ds.U.momentum[0] = dU[1]; - ds.U.momentum[1] = dU[2]; - ds.U.momentum[2] = dU[3]; - ds.U.E_total = dU[4]; - ds.Y = StatePrimitiveFromConservative_fwd(gas, s, ds.U, x, dx); - return ds; -} - -CEED_QFUNCTION_HELPER void FluxInviscid(NewtonianIdealGasContext gas, State s, - StateConservative Flux[3]) { - for (int i=0; i<3; i++) { - Flux[i].density = s.U.momentum[i]; - for (int j=0; j<3; j++) - Flux[i].momentum[j] = s.U.momentum[i] * s.Y.velocity[j] - + s.Y.pressure * (i == j); - Flux[i].E_total = (s.U.E_total + s.Y.pressure) * s.Y.velocity[i]; - } -} - -CEED_QFUNCTION_HELPER void FluxInviscid_fwd(NewtonianIdealGasContext gas, - State s, State ds, StateConservative dFlux[3]) { - for (int i=0; i<3; i++) { - dFlux[i].density = ds.U.momentum[i]; - for (int j=0; j<3; j++) - dFlux[i].momentum[j] = ds.U.momentum[i] * s.Y.velocity[j] + - s.U.momentum[i] * ds.Y.velocity[j] + ds.Y.pressure * (i == j); - dFlux[i].E_total = (ds.U.E_total + ds.Y.pressure) * s.Y.velocity[i] + - (s.U.E_total + s.Y.pressure) * ds.Y.velocity[i]; - } -} - -// Kelvin-Mandel notation -CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3], - CeedScalar strain_rate[6]) { - const CeedScalar weight = 1 / sqrt(2.); - strain_rate[0] = grad_s[0].Y.velocity[0]; - strain_rate[1] = grad_s[1].Y.velocity[1]; - strain_rate[2] = grad_s[2].Y.velocity[2]; - strain_rate[3] = weight * (grad_s[2].Y.velocity[1] + grad_s[1].Y.velocity[2]); - strain_rate[4] = weight * (grad_s[2].Y.velocity[0] + grad_s[0].Y.velocity[2]); - strain_rate[5] = weight * (grad_s[1].Y.velocity[0] + grad_s[0].Y.velocity[1]); -} - -CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) { - const CeedScalar weight = 1 / sqrt(2.); - A[0][0] = v[0]; - A[1][1] = v[1]; - A[2][2] = v[2]; - A[2][1] = A[1][2] = weight * v[3]; - A[2][0] = A[0][2] = weight * v[4]; - A[1][0] = A[0][1] = weight * v[5]; -} - -CEED_QFUNCTION_HELPER void NewtonianStress(NewtonianIdealGasContext gas, - const CeedScalar strain_rate[6], CeedScalar stress[6]) { - CeedScalar div_u = strain_rate[0] + strain_rate[1] + strain_rate[2]; - for (int i=0; i<6; i++) { - stress[i] = gas->mu * (2 * strain_rate[i] + gas->lambda * div_u * (i < 3)); - } -} - -CEED_QFUNCTION_HELPER void ViscousEnergyFlux(NewtonianIdealGasContext gas, - StatePrimitive Y, const State grad_s[3], const CeedScalar stress[3][3], - CeedScalar Fe[3]) { - for (int i=0; i<3; i++) { - Fe[i] = - Y.velocity[0] * stress[0][i] - - Y.velocity[1] * stress[1][i] - - Y.velocity[2] * stress[2][i] - - gas->k * grad_s[i].Y.temperature; - } -} - -CEED_QFUNCTION_HELPER void ViscousEnergyFlux_fwd(NewtonianIdealGasContext gas, - StatePrimitive Y, StatePrimitive dY, const State grad_ds[3], - const CeedScalar stress[3][3], - const CeedScalar dstress[3][3], - CeedScalar dFe[3]) { - for (int i=0; i<3; i++) { - dFe[i] = - Y.velocity[0] * dstress[0][i] - dY.velocity[0] * stress[0][i] - - Y.velocity[1] * dstress[1][i] - dY.velocity[1] * stress[1][i] - - Y.velocity[2] * dstress[2][i] - dY.velocity[2] * stress[2][i] - - gas->k * grad_ds[i].Y.temperature; - } -} // ***************************************************************************** // Helper function for computing flux Jacobian // ***************************************************************************** diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h new file mode 100644 index 0000000000..4113166ed3 --- /dev/null +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -0,0 +1,172 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +/// @file +/// Structs and helper functions regarding the state of a newtonian simulation + + +#ifndef newtonian_state_h +#define newtonian_state_h + +#include +#include +#include "newtonian_types.h" + +typedef struct { + CeedScalar pressure; + CeedScalar velocity[3]; + CeedScalar temperature; +} StatePrimitive; + +typedef struct { + CeedScalar density; + CeedScalar momentum[3]; + CeedScalar E_total; +} StateConservative; + +typedef struct { + StateConservative U; + StatePrimitive Y; +} State; + +CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative( + NewtonianIdealGasContext gas, StateConservative U, const CeedScalar x[3]) { + StatePrimitive Y; + for (CeedInt i=0; i<3; i++) Y.velocity[i] = U.momentum[i] / U.density; + CeedScalar e_kinetic = .5 * Dot3(Y.velocity, Y.velocity); + CeedScalar e_potential = -Dot3(gas->g, x); + CeedScalar e_total = U.E_total / U.density; + CeedScalar e_internal = e_total - e_kinetic - e_potential; + Y.temperature = e_internal / gas->cv; + Y.pressure = (gas->cp / gas->cv - 1) * U.density * e_internal; + return Y; +} + +CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative_fwd( + NewtonianIdealGasContext gas, State s, StateConservative dU, + const CeedScalar x[3], const CeedScalar dx[3]) { + StatePrimitive dY; + for (CeedInt i=0; i<3; i++) { + dY.velocity[i] = (dU.momentum[i] - s.Y.velocity[i] * dU.density) / s.U.density; + } + CeedScalar e_kinetic = .5 * Dot3(s.Y.velocity, s.Y.velocity); + CeedScalar de_kinetic = Dot3(dY.velocity, s.Y.velocity); + CeedScalar e_potential = -Dot3(gas->g, x); + CeedScalar de_potential = -Dot3(gas->g, dx); + CeedScalar e_total = s.U.E_total / s.U.density; + CeedScalar de_total = (dU.E_total - e_total * dU.density) / s.U.density; + CeedScalar e_internal = e_total - e_kinetic - e_potential; + CeedScalar de_internal = de_total - de_kinetic - de_potential; + dY.temperature = de_internal / gas->cv; + dY.pressure = (gas->cp / gas->cv - 1) + * (dU.density * e_internal + s.U.density * de_internal); + return dY; +} + +CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas, + const CeedScalar U[5], const CeedScalar x[3]) { + State s; + s.U.density = U[0]; + s.U.momentum[0] = U[1]; + s.U.momentum[1] = U[2]; + s.U.momentum[2] = U[3]; + s.U.E_total = U[4]; + s.Y = StatePrimitiveFromConservative(gas, s.U, x); + return s; +} + +CEED_QFUNCTION_HELPER State StateFromU_fwd(NewtonianIdealGasContext gas, + State s, const CeedScalar dU[5], + const CeedScalar x[3], const CeedScalar dx[3]) { + State ds; + ds.U.density = dU[0]; + ds.U.momentum[0] = dU[1]; + ds.U.momentum[1] = dU[2]; + ds.U.momentum[2] = dU[3]; + ds.U.E_total = dU[4]; + ds.Y = StatePrimitiveFromConservative_fwd(gas, s, ds.U, x, dx); + return ds; +} + +CEED_QFUNCTION_HELPER void FluxInviscid(NewtonianIdealGasContext gas, State s, + StateConservative Flux[3]) { + for (CeedInt i=0; i<3; i++) { + Flux[i].density = s.U.momentum[i]; + for (CeedInt j=0; j<3; j++) + Flux[i].momentum[j] = s.U.momentum[i] * s.Y.velocity[j] + + s.Y.pressure * (i == j); + Flux[i].E_total = (s.U.E_total + s.Y.pressure) * s.Y.velocity[i]; + } +} + +CEED_QFUNCTION_HELPER void FluxInviscid_fwd(NewtonianIdealGasContext gas, + State s, State ds, StateConservative dFlux[3]) { + for (CeedInt i=0; i<3; i++) { + dFlux[i].density = ds.U.momentum[i]; + for (CeedInt j=0; j<3; j++) + dFlux[i].momentum[j] = ds.U.momentum[i] * s.Y.velocity[j] + + s.U.momentum[i] * ds.Y.velocity[j] + ds.Y.pressure * (i == j); + dFlux[i].E_total = (ds.U.E_total + ds.Y.pressure) * s.Y.velocity[i] + + (s.U.E_total + s.Y.pressure) * ds.Y.velocity[i]; + } +} + +// Kelvin-Mandel notation +CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3], + CeedScalar strain_rate[6]) { + const CeedScalar weight = 1 / sqrt(2.); + strain_rate[0] = grad_s[0].Y.velocity[0]; + strain_rate[1] = grad_s[1].Y.velocity[1]; + strain_rate[2] = grad_s[2].Y.velocity[2]; + strain_rate[3] = weight * (grad_s[2].Y.velocity[1] + grad_s[1].Y.velocity[2]); + strain_rate[4] = weight * (grad_s[2].Y.velocity[0] + grad_s[0].Y.velocity[2]); + strain_rate[5] = weight * (grad_s[1].Y.velocity[0] + grad_s[0].Y.velocity[1]); +} + +CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) { + const CeedScalar weight = 1 / sqrt(2.); + A[0][0] = v[0]; + A[1][1] = v[1]; + A[2][2] = v[2]; + A[2][1] = A[1][2] = weight * v[3]; + A[2][0] = A[0][2] = weight * v[4]; + A[1][0] = A[0][1] = weight * v[5]; +} + +CEED_QFUNCTION_HELPER void NewtonianStress(NewtonianIdealGasContext gas, + const CeedScalar strain_rate[6], CeedScalar stress[6]) { + CeedScalar div_u = strain_rate[0] + strain_rate[1] + strain_rate[2]; + for (CeedInt i=0; i<6; i++) { + stress[i] = gas->mu * (2 * strain_rate[i] + gas->lambda * div_u * (i < 3)); + } +} + +CEED_QFUNCTION_HELPER void ViscousEnergyFlux(NewtonianIdealGasContext gas, + StatePrimitive Y, const State grad_s[3], const CeedScalar stress[3][3], + CeedScalar Fe[3]) { + for (CeedInt i=0; i<3; i++) { + Fe[i] = - Y.velocity[0] * stress[0][i] + - Y.velocity[1] * stress[1][i] + - Y.velocity[2] * stress[2][i] + - gas->k * grad_s[i].Y.temperature; + } +} + +CEED_QFUNCTION_HELPER void ViscousEnergyFlux_fwd(NewtonianIdealGasContext gas, + StatePrimitive Y, StatePrimitive dY, const State grad_ds[3], + const CeedScalar stress[3][3], + const CeedScalar dstress[3][3], + CeedScalar dFe[3]) { + for (CeedInt i=0; i<3; i++) { + dFe[i] = - Y.velocity[0] * dstress[0][i] - dY.velocity[0] * stress[0][i] + - Y.velocity[1] * dstress[1][i] - dY.velocity[1] * stress[1][i] + - Y.velocity[2] * dstress[2][i] - dY.velocity[2] * stress[2][i] + - gas->k * grad_ds[i].Y.temperature; + } +} + +#endif // newtonian_state_h From 8b85812c06baf6831ed3ba6d22d2276f4fca36d7 Mon Sep 17 00:00:00 2001 From: James Wright Date: Sun, 5 Jun 2022 17:02:34 -0600 Subject: [PATCH 073/172] examples/fluids: Cleanup code - Code either commented out, or left over from previous debugging --- examples/fluids/src/setuplibceed.c | 2 -- examples/fluids/src/setupts.c | 11 ++--------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 7e4822047b..9caa5ea839 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -82,7 +82,6 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, CeedInt P_sur, CeedInt Q_sur, CeedInt q_data_size_sur, CeedInt jac_data_size_sur, CeedOperator *op_apply, CeedOperator *op_apply_ijacobian) { - //CeedInt dim; DMLabel domain_label; PetscErrorCode ierr; PetscFunctionBeginUser; @@ -101,7 +100,6 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, if (phys->has_neumann || 1) { // --- Setup ierr = DMGetLabel(dm, "Face Sets", &domain_label); CHKERRQ(ierr); - //ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr); // --- Get number of quadrature points for the boundaries CeedInt num_qpts_sur; diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 88078448ba..e3206cd188 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -292,7 +292,6 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, user->phys->ijacobian_time_shift_label, &shift); PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); - Vec coo_vec = NULL; PetscCall(PetscObjectTypeCompare((PetscObject)J, MATSHELL, &J_is_shell)); PetscCall(PetscObjectTypeCompare((PetscObject)J_pre, MATSHELL, &J_pre_is_shell)); @@ -315,7 +314,6 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, free(cols); CeedVectorCreate(user->ceed, ncoo, &user->coo_values); user->matrices_set_up = true; - VecCreateSeq(PETSC_COMM_WORLD, ncoo, &coo_vec); } } if (!J_pre_is_shell) { @@ -323,15 +321,10 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, const PetscScalar *values; MatType mat_type; PetscCall(MatGetType(J_pre, &mat_type)); - if (strstr(mat_type, "kokkos") - || strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE; + if (strstr(mat_type, "kokkos") || strstr(mat_type, "cusparse")) + mem_type = CEED_MEM_DEVICE; CeedOperatorLinearAssemble(user->op_ijacobian, user->coo_values); CeedVectorGetArrayRead(user->coo_values, mem_type, &values); - if (coo_vec) { - VecPlaceArray(coo_vec, values); - VecViewFromOptions(coo_vec, NULL, "-coo_vec_view"); - VecDestroy(&coo_vec); - } PetscCall(MatSetValuesCOO(J_pre, values, INSERT_VALUES)); CeedVectorRestoreArrayRead(user->coo_values, &values); } From 65dd5cafde15489fff5d2ab607c335242f64f615 Mon Sep 17 00:00:00 2001 From: James Wright Date: Sun, 5 Jun 2022 18:13:41 -0600 Subject: [PATCH 074/172] examples/fluids: Move StrongSTGbcFunc to newtonian.h - Requires significant changes to how the stg_context is passed to the the strong bc --- examples/fluids/problems/blasius.c | 21 +++---- examples/fluids/problems/newtonian.c | 5 ++ examples/fluids/problems/stg_shur14.c | 50 ++++++++-------- examples/fluids/problems/stg_shur14.h | 3 +- examples/fluids/qfunctions/newtonian.h | 60 +++++++++++++++++++ examples/fluids/qfunctions/newtonian_types.h | 2 + examples/fluids/qfunctions/stg_shur14.h | 63 -------------------- examples/fluids/src/setupdm.c | 7 +-- 8 files changed, 105 insertions(+), 106 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index dcb6587509..967b57d1a7 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -152,15 +152,13 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { // SET UP Blasius // ------------------------------------------------------ CeedQFunctionContextDestroy(&problem->ics.qfunction_context); - problem->ics.qfunction = ICsBlasius; - problem->ics.qfunction_loc = ICsBlasius_loc; - problem->apply_inflow.qfunction = Blasius_Inflow; - problem->apply_inflow.qfunction_loc = Blasius_Inflow_loc; - problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; + problem->ics.qfunction = ICsBlasius; + problem->ics.qfunction_loc = ICsBlasius_loc; + problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; problem->apply_inflow_jacobian.qfunction_loc = Blasius_Inflow_Jacobian_loc; - problem->apply_outflow.qfunction = Blasius_Outflow; - problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc; - problem->apply_outflow_jacobian.qfunction = Blasius_Outflow_Jacobian; + problem->apply_outflow.qfunction = Blasius_Outflow; + problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc; + problem->apply_outflow_jacobian.qfunction = Blasius_Outflow_Jacobian; problem->apply_outflow_jacobian.qfunction_loc = Blasius_Outflow_Jacobian_loc; CeedScalar Uinf = 40; // m/s @@ -255,8 +253,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { FreeContextPetsc); problem->ics.qfunction_context = blasius_context; - CeedQFunctionContextReferenceCopy(blasius_context, - &problem->apply_inflow.qfunction_context); CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_inflow_jacobian.qfunction_context); CeedQFunctionContextReferenceCopy(blasius_context, @@ -266,6 +262,11 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { if (use_stg) { ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0, mesh_ynodes, mesh_nynodes); CHKERRQ(ierr); + } else { + problem->apply_inflow.qfunction = Blasius_Inflow; + problem->apply_inflow.qfunction_loc = Blasius_Inflow_loc; + CeedQFunctionContextReferenceCopy(blasius_context, + &problem->apply_inflow.qfunction_context); } ierr = PetscFree(mesh_ynodes); CHKERRQ(ierr); PetscFunctionReturn(0); diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index fe393a63af..a40cbee26d 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -102,6 +102,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; + problem->apply_inflow.qfunction = BoundaryIntegral; + problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; problem->bc = NULL; problem->bc_ctx = setup_context; problem->non_zero_time = PETSC_FALSE; @@ -259,6 +261,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { newtonian_ig_ctx->Ctau_M = Ctau_M; newtonian_ig_ctx->Ctau_E = Ctau_E; newtonian_ig_ctx->stabilization = stab; + newtonian_ig_ctx->is_implicit = implicit; ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr); CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); @@ -287,6 +290,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { &problem->apply_vol_ifunction.qfunction_context); CeedQFunctionContextReferenceCopy(newtonian_ig_context, &problem->apply_vol_ijacobian.qfunction_context); + CeedQFunctionContextReferenceCopy(newtonian_ig_context, + &problem->apply_inflow.qfunction_context); if (unit_tests) { PetscCall(UnitTests_Newtonian(user, newtonian_ig_ctx)); diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index a1a399819f..ae94c914c0 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -20,6 +20,8 @@ #define M_PI 3.14159265358979323846 #endif +STGShur14Context global_stg_ctx; + /* * @brief Perform Cholesky decomposition on array of symmetric 3x3 matrices * @@ -328,7 +330,6 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, use_stgstrong = PETSC_FALSE; CeedScalar u0 = 0.0, alpha = 1.01; - STGShur14Context stg_ctx; CeedQFunctionContext stg_context; NewtonianIdealGasContext newtonian_ig_ctx; PetscFunctionBeginUser; @@ -351,15 +352,15 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, NULL, use_stgstrong, &use_stgstrong, NULL); CHKERRQ(ierr); PetscOptionsEnd(); - ierr = PetscCalloc1(1, &stg_ctx); CHKERRQ(ierr); - stg_ctx->alpha = alpha; - stg_ctx->u0 = u0; - stg_ctx->is_implicit = user->phys->implicit; - stg_ctx->prescribe_T = prescribe_T; - stg_ctx->mean_only = mean_only; - stg_ctx->theta0 = theta0; - stg_ctx->P0 = P0; - stg_ctx->nynodes = nynodes; + ierr = PetscCalloc1(1, &global_stg_ctx); CHKERRQ(ierr); + global_stg_ctx->alpha = alpha; + global_stg_ctx->u0 = u0; + global_stg_ctx->is_implicit = user->phys->implicit; + global_stg_ctx->prescribe_T = prescribe_T; + global_stg_ctx->mean_only = mean_only; + global_stg_ctx->theta0 = theta0; + global_stg_ctx->P0 = P0; + global_stg_ctx->nynodes = nynodes; { // Calculate dx assuming constant spacing @@ -370,23 +371,23 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, PetscInt nmax = 3, faces[3]; ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax, NULL); CHKERRQ(ierr); - stg_ctx->dx = domain_size[0]/faces[0]; - stg_ctx->dz = domain_size[2]/faces[2]; + global_stg_ctx->dx = domain_size[0]/faces[0]; + global_stg_ctx->dz = domain_size[2]/faces[2]; } CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context, CEED_MEM_HOST, &newtonian_ig_ctx); - stg_ctx->newtonian_ctx = *newtonian_ig_ctx; + global_stg_ctx->newtonian_ctx = *newtonian_ig_ctx; CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context, &newtonian_ig_ctx); - ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, &stg_ctx, - ynodes); CHKERRQ(ierr); + ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, + &global_stg_ctx, ynodes); CHKERRQ(ierr); CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context); CeedQFunctionContextCreate(user->ceed, &stg_context); CeedQFunctionContextSetData(stg_context, CEED_MEM_HOST, - CEED_USE_POINTER, stg_ctx->total_bytes, stg_ctx); + CEED_USE_POINTER, global_stg_ctx->total_bytes, global_stg_ctx); CeedQFunctionContextSetDataDestroy(stg_context, CEED_MEM_HOST, FreeContextPetsc); CeedQFunctionContextRegisterDouble(stg_context, "solution time", @@ -394,15 +395,15 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, "Phyiscal time of the solution"); if (use_stgstrong) { - problem->apply_inflow.qfunction = STGShur14_Inflow_Strong; - problem->apply_inflow.qfunction_loc = STGShur14_Inflow_Strong_loc; + // Use default boundary integral QF (BoundaryIntegral) in newtonian.h problem->bc_from_ics = PETSC_FALSE; } else { - problem->apply_inflow.qfunction = STGShur14_Inflow; - problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; - problem->bc_from_ics = PETSC_TRUE; + problem->apply_inflow.qfunction = STGShur14_Inflow; + problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; + problem->apply_inflow.qfunction_context = stg_context; + problem->bc_from_ics = PETSC_TRUE; } - problem->apply_inflow.qfunction_context = stg_context; + // global_stg_ctx = global_stg_ctx; PetscFunctionReturn(0); } @@ -460,8 +461,7 @@ PetscErrorCode StrongSTGbcFunc(PetscInt dim, PetscReal time, PetscFunctionReturn(0); } -PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, - STGShur14Context stg_ctx) { +PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem) { PetscErrorCode ierr; DMLabel label; const PetscInt comps[] = {0, 1, 2, 3}; @@ -474,7 +474,7 @@ PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "STG", label, bc->num_inflow, bc->inflows, 0, num_comps, comps, (void(*)(void))StrongSTGbcFunc, - NULL, stg_ctx, NULL); CHKERRQ(ierr); + NULL, global_stg_ctx, NULL); CHKERRQ(ierr); } PetscFunctionReturn(0); diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h index 45663350e3..09f84031b1 100644 --- a/examples/fluids/problems/stg_shur14.h +++ b/examples/fluids/problems/stg_shur14.h @@ -16,5 +16,4 @@ extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, const CeedScalar P0, const CeedScalar ynodes[], const CeedInt nynodes); -extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, - STGShur14Context stg_ctx); +extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem); diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index f93450bfa1..14db2f0b17 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -744,5 +744,65 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, } // End Quadrature Point Loop return 0; } + +// Compute boundary integral (ie. for strongly set inflows) +CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + + //*INDENT-OFF* + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2]; + + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; + + //*INDENT-ON* + + const NewtonianIdealGasContext newt_ctx = (NewtonianIdealGasContext) ctx; + const bool is_implicit = newt_ctx->is_implicit; + const CeedScalar cv = newt_ctx->cv; + const CeedScalar cp = newt_ctx->cp; + const CeedScalar gamma = cp/cv; + + CeedPragmaSIMD + for(CeedInt i=0; iis_implicit; - const CeedScalar cv = stg_ctx->newtonian_ctx.cv; - const CeedScalar cp = stg_ctx->newtonian_ctx.cp; - const CeedScalar gamma = cp/cv; - - CeedPragmaSIMD - for(CeedInt i=0; iapply_inflow.qfunction_context, - CEED_MEM_HOST, &stg_ctx); - ierr = SetupStrongSTG(dm, bc, problem, stg_ctx); CHKERRQ(ierr); - CeedQFunctionContextRestoreData(problem->apply_inflow.qfunction_context, - &stg_ctx); + ierr = SetupStrongSTG(dm, bc, problem); CHKERRQ(ierr); } } From 30e9fa817d8b49e10111560e5f5e44c44a518fda Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 6 Jun 2022 08:38:20 -0600 Subject: [PATCH 075/172] examples/fluids: Move blasius outflow to newtonian.h --- examples/fluids/problems/blasius.c | 21 ++-- examples/fluids/problems/newtonian.c | 52 ++++++---- examples/fluids/qfunctions/blasius.h | 135 ------------------------- examples/fluids/qfunctions/newtonian.h | 122 ++++++++++++++++++++++ 4 files changed, 159 insertions(+), 171 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 967b57d1a7..84bc0584ed 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -156,10 +156,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction_loc = ICsBlasius_loc; problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; problem->apply_inflow_jacobian.qfunction_loc = Blasius_Inflow_Jacobian_loc; - problem->apply_outflow.qfunction = Blasius_Outflow; - problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc; - problem->apply_outflow_jacobian.qfunction = Blasius_Outflow_Jacobian; - problem->apply_outflow_jacobian.qfunction_loc = Blasius_Outflow_Jacobian_loc; CeedScalar Uinf = 40; // m/s CeedScalar delta0 = 4.2e-4; // m @@ -228,12 +224,13 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context, CEED_MEM_HOST, &newtonian_ig_ctx); - blasius_ctx->weakT = weakT; - blasius_ctx->Uinf = Uinf; - blasius_ctx->delta0 = delta0; - blasius_ctx->theta0 = theta0; - blasius_ctx->P0 = P0; - blasius_ctx->implicit = user->phys->implicit; + blasius_ctx->weakT = weakT; + blasius_ctx->Uinf = Uinf; + blasius_ctx->delta0 = delta0; + blasius_ctx->theta0 = theta0; + blasius_ctx->P0 = P0; + newtonian_ig_ctx->P0 = P0; + blasius_ctx->implicit = user->phys->implicit; blasius_ctx->newtonian_ctx = *newtonian_ig_ctx; { @@ -255,10 +252,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction_context = blasius_context; CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_inflow_jacobian.qfunction_context); - CeedQFunctionContextReferenceCopy(blasius_context, - &problem->apply_outflow.qfunction_context); - CeedQFunctionContextReferenceCopy(blasius_context, - &problem->apply_outflow_jacobian.qfunction_context); if (use_stg) { ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0, mesh_ynodes, mesh_nynodes); CHKERRQ(ierr); diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index a40cbee26d..74e7e2f58a 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -86,28 +86,32 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { // ------------------------------------------------------ // Setup Generic Newtonian IG Problem // ------------------------------------------------------ - problem->dim = 3; - problem->q_data_size_vol = 10; - problem->q_data_size_sur = 10; - problem->jac_data_size_sur = 5; - problem->setup_vol.qfunction = Setup; - problem->setup_vol.qfunction_loc = Setup_loc; - problem->ics.qfunction = ICsNewtonianIG; - problem->ics.qfunction_loc = ICsNewtonianIG_loc; - problem->setup_sur.qfunction = SetupBoundary; - problem->setup_sur.qfunction_loc = SetupBoundary_loc; - problem->apply_vol_rhs.qfunction = RHSFunction_Newtonian; - problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; - problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; - problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; - problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; - problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; - problem->apply_inflow.qfunction = BoundaryIntegral; - problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; - problem->bc = NULL; - problem->bc_ctx = setup_context; - problem->non_zero_time = PETSC_FALSE; - problem->print_info = PRINT_DENSITY_CURRENT; + problem->dim = 3; + problem->q_data_size_vol = 10; + problem->q_data_size_sur = 10; + problem->jac_data_size_sur = 5; + problem->setup_vol.qfunction = Setup; + problem->setup_vol.qfunction_loc = Setup_loc; + problem->ics.qfunction = ICsNewtonianIG; + problem->ics.qfunction_loc = ICsNewtonianIG_loc; + problem->setup_sur.qfunction = SetupBoundary; + problem->setup_sur.qfunction_loc = SetupBoundary_loc; + problem->apply_vol_rhs.qfunction = RHSFunction_Newtonian; + problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; + problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; + problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; + problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; + problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; + problem->apply_inflow.qfunction = BoundaryIntegral; + problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; + problem->apply_outflow.qfunction = PressureOutflow; + problem->apply_outflow.qfunction_loc = PressureOutflow_loc; + problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; + problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_loc; + problem->bc = NULL; + problem->bc_ctx = setup_context; + problem->non_zero_time = PETSC_FALSE; + problem->print_info = PRINT_DENSITY_CURRENT; // ------------------------------------------------------ // Create the libCEED context @@ -292,6 +296,10 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { &problem->apply_vol_ijacobian.qfunction_context); CeedQFunctionContextReferenceCopy(newtonian_ig_context, &problem->apply_inflow.qfunction_context); + CeedQFunctionContextReferenceCopy(newtonian_ig_context, + &problem->apply_outflow.qfunction_context); + CeedQFunctionContextReferenceCopy(newtonian_ig_context, + &problem->apply_outflow_jacobian.qfunction_context); if (unit_tests) { PetscCall(UnitTests_Newtonian(user, newtonian_ig_ctx)); diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index b929ee52c5..7fbcb573a4 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -333,139 +333,4 @@ CEED_QFUNCTION(Blasius_Inflow_Jacobian)(void *ctx, CeedInt Q, return 0; } -// ***************************************************************************** -CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) { - // *INDENT-OFF* - // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; - // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1]; - // *INDENT-ON* - - const BlasiusContext context = (BlasiusContext)ctx; - const bool implicit = context->implicit; - const CeedScalar mu = context->newtonian_ctx.mu; - const CeedScalar cv = context->newtonian_ctx.cv; - const CeedScalar cp = context->newtonian_ctx.cp; - const CeedScalar Rd = cp - cv; - - const CeedScalar theta0 = context->theta0; - const CeedScalar P0 = context->P0; - const CeedScalar rho_0 = P0 / (Rd*theta0); - const CeedScalar delta0 = context->delta0; - const CeedScalar Uinf = context->Uinf; - const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); - const CeedScalar x_inflow = context->x_inflow; - - CeedPragmaSIMD - // Quadrature Point Loop - for (CeedInt i=0; inewtonian_ctx); - const CeedScalar viscous_flux[3] = {-t12 *norm[1], -t12 *norm[0], 0}; - - // -- Density - v[0][i] = -wdetJb * rho * u_normal; - - // -- Momentum - for (CeedInt j=0; j<3; j++) - v[j+1][i] = -wdetJb * (rho * u_normal * u[j] - + norm[j] * P + viscous_flux[j]); - - // -- Total Energy Density - v[4][i] = -wdetJb * (u_normal * (E + P) - + Dot3(viscous_flux, velocity)); - - // Save values for Jacobian - jac_data_sur[0][i] = rho; - jac_data_sur[1][i] = u[0]; - jac_data_sur[2][i] = u[1]; - jac_data_sur[3][i] = u[2]; - jac_data_sur[4][i] = E; - } // End Quadrature Point Loop - return 0; -} - -CEED_QFUNCTION(Blasius_Outflow_Jacobian)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) { - // *INDENT-OFF* - // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], - (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; - // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; - // *INDENT-ON* - - const BlasiusContext context = (BlasiusContext)ctx; - const bool implicit = context->implicit; - - CeedPragmaSIMD - // Quadrature Point Loop - for (CeedInt i=0; iP0; - const CeedScalar dP = 0; - - v[0][i] = -wdetJb * dmomentum_normal; - for (int j=0; j<3; j++) - v[j+1][i] = -wdetJb * (dmomentum_normal * u[j] + rho * u_normal * du[j]); - v[4][i] = -wdetJb * (du_normal * (E + P) + u_normal * (dE + dP)); - } // End Quadrature Point Loop - return 0; -} - #endif // blasius_h diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 14db2f0b17..a90caeabfd 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -804,5 +804,127 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, return 0; } +// Outflow boundary condition, weakly setting a constant pressure +CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1]; + // *INDENT-ON* + + const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + const bool implicit = context->is_implicit; + const CeedScalar P0 = context->P0; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; iis_implicit; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; iP0; + const CeedScalar dP = 0; + + v[0][i] = -wdetJb * dmomentum_normal; + for (int j=0; j<3; j++) + v[j+1][i] = -wdetJb * (dmomentum_normal * u[j] + rho * u_normal * du[j]); + v[4][i] = -wdetJb * (du_normal * (E + P) + u_normal * (dE + dP)); + } // End Quadrature Point Loop + return 0; +} + // ***************************************************************************** #endif // newtonian_h From ce9b5c206b21eed13d4c5adc123be0873c754f00 Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 6 Jun 2022 12:26:50 -0600 Subject: [PATCH 076/172] examples/fluids: Add viscous flux to POutflow BC --- examples/fluids/qfunctions/newtonian.h | 70 +++++++++++++++++--------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index a90caeabfd..493a199bf9 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -811,7 +811,9 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1]; @@ -826,12 +828,10 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, for (CeedInt i=0; i Date: Mon, 6 Jun 2022 16:55:00 -0600 Subject: [PATCH 077/172] examples/fluids: Add viscous flux to StrongInflowBI --- examples/fluids/qfunctions/newtonian.h | 67 ++++++++++++++++---------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 493a199bf9..902f9ecadd 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -751,26 +751,23 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, CeedScalar *const *out) { //*INDENT-OFF* - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2]; + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; //*INDENT-ON* - const NewtonianIdealGasContext newt_ctx = (NewtonianIdealGasContext) ctx; - const bool is_implicit = newt_ctx->is_implicit; - const CeedScalar cv = newt_ctx->cv; - const CeedScalar cp = newt_ctx->cp; - const CeedScalar gamma = cp/cv; + const NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx; + const bool is_implicit = context->is_implicit; CeedPragmaSIMD for(CeedInt i=0; i Date: Mon, 6 Jun 2022 16:55:20 -0600 Subject: [PATCH 078/172] examples/fluids: Refine STGInflow_blasius.dat --- examples/fluids/STGInflow_blasius.dat | 304 +++++++++++++++++--------- 1 file changed, 202 insertions(+), 102 deletions(-) diff --git a/examples/fluids/STGInflow_blasius.dat b/examples/fluids/STGInflow_blasius.dat index 2b12fb7e88..f90f9c451a 100644 --- a/examples/fluids/STGInflow_blasius.dat +++ b/examples/fluids/STGInflow_blasius.dat @@ -1,102 +1,202 @@ -101 14 -0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 -4.200000000000002986e-06 6.641099321171224368e-01 -2.688275721802099928e-10 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.503522332443360197e-06 7.121033120206911038e-01 5.625993976502234810e-06 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.828979380670493501e-06 7.635650401647769980e-01 1.250142628597567334e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.177956305656680519e-06 8.187457644416862301e-01 2.084248362952813662e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.552152823557324425e-06 8.779142463706052224e-01 3.090016074070208580e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.953391484292584302e-06 9.413586701190954642e-01 4.296531750310341824e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -6.383626548402324046e-06 1.009388046123934402e+00 5.737481358311177256e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -6.844953505406707854e-06 1.082333716147739100e+00 7.451857365815305122e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -7.339619280032274129e-06 1.160550967101845909e+00 9.484772346308667436e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -7.870033176013613616e-06 1.244420761495690142e+00 1.188839579386735820e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -8.438778610773199320e-06 1.334350999618601818e+00 1.458894855932344988e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -9.048625698133911575e-06 1.430771551613953863e+00 1.581560748020152847e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -9.702544740349270590e-06 1.534160149046711830e+00 1.747102975094025161e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.040372069516518436e-05 1.645020353203479058e+00 1.963713567554627713e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.115556868837608125e-05 1.763892116346153394e+00 2.240939354861193116e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.196175064743038823e-05 1.891354411586620987e+00 2.589894105302736720e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.282619313710034132e-05 2.028028052815169957e+00 3.023503103593241275e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.375310648408648365e-05 2.174578718417349066e+00 3.556785072386167831e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.474700528370354663e-05 2.331720193506353400e+00 4.207177082015844298e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.581273038852838925e-05 2.500217846462653437e+00 4.994908941792155924e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.695547248610555476e-05 2.680883861457040496e+00 5.889655958320385437e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.818079738054887299e-05 2.874548263010431093e+00 6.602281079523934521e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.949467310117369896e-05 3.082208252866376785e+00 7.503663649287806939e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.090349897019489394e-05 3.304875253311903460e+00 8.627999991332345117e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.241413677106653097e-05 3.543633779312999721e+00 1.001503600974398660e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.403394416927094298e-05 3.799646720719475290e+00 1.171092923974609594e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.577081054833535852e-05 4.074092395254440113e+00 1.357158033230107053e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.763319543561788559e-05 4.368221208458913374e+00 1.540937148582322697e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.963016970501833084e-05 4.683605893906867657e+00 1.769681736351030443e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.177145975729444162e-05 5.021782554436745372e+00 2.051386266834596614e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.406749489316689041e-05 5.384299303649559221e+00 2.379111642902919863e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.652945810994693139e-05 5.772598926784612061e+00 2.710603356932465152e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.916934056909526795e-05 6.188959884991291460e+00 3.121345968641189156e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.200000000000003156e-05 6.635410093001123499e+00 3.625347520186018510e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.503522332443360367e-05 7.113125338991705071e+00 4.133124687953142543e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.828979380670492993e-05 7.625363738722001017e+00 4.761465522541206800e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.177956305656680519e-05 8.173969710347833484e+00 5.483355699626112773e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.552152823557324933e-05 8.761155029286637586e+00 6.288849893152771361e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.953391484292584133e-05 9.390306008098571411e+00 7.253785738742655163e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -6.383626548402323707e-05 1.006267945968176925e+01 8.310353563038722774e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -6.844953505406707345e-05 1.078260765264970900e+01 9.566162359885154245e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -7.339619280032274467e-05 1.155157312880497855e+01 1.097617661229051932e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -7.870033176013614294e-05 1.237288013889156701e+01 1.259644675763084941e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -8.438778610773198642e-05 1.324659660273999329e+01 1.442147547686566111e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -9.048625698133931227e-05 1.411776521727952627e+01 1.550008743586907094e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -9.702544740349271267e-05 1.505189076749435273e+01 1.696394574577553271e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.040372069516519487e-04 1.605352297623211655e+01 1.888691112446131207e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.115556868837609277e-04 1.712754036217140907e+01 2.135507382076490615e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.196175064743037705e-04 1.827917400098332124e+01 2.446866950728195447e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.282619313710035521e-04 1.951403300364581384e+01 2.834428819338968841e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.375310648408647010e-04 2.083813183600515728e+01 3.311742048129684424e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.474700528370356052e-04 2.225791961264622643e+01 3.894539215743021698e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.581273038852837570e-04 2.378031150774935654e+01 4.601074578209482513e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.695547248610555476e-04 2.535075036351783240e+01 5.382694402282042073e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.818079738054887231e-04 2.661271076395308555e+01 5.835740987402229796e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.949467310117369964e-04 2.796586953681483223e+01 6.410968369375426301e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.090349897019489259e-04 2.941681733513018671e+01 7.130600537355311075e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.241413677106652961e-04 3.097262110011179459e+01 8.020473439929037829e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.403394416927094298e-04 3.264085848118182298e+01 9.110596407971430222e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.577081054833535445e-04 3.413934342938573963e+01 1.014467216134999905e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.763319543561788762e-04 3.511021163702044134e+01 1.073549573507629751e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.963016970501833151e-04 3.615124179295903417e+01 1.147360075583040984e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.177145975729444433e-04 3.726750430629629562e+01 1.238529307048257255e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.406749489316688770e-04 3.830092385213693262e+01 1.332273456401334155e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.652945810994693275e-04 3.872325220338986185e+01 1.367304294075183113e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.916934056909526524e-04 3.917610105136403575e+01 1.410880782566600322e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.200000000000002885e-04 3.966167602757187893e+01 1.464521141298004425e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.503522332443360638e-04 3.976907916245198749e+01 1.475593379115296422e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.828979380670493128e-04 3.988424402352507769e+01 1.489351353721966587e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.177956305656680790e-04 3.996514744096749894e+01 1.500061967534003227e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.552152823557325069e-04 3.998205278113282901e+01 1.502314641381908289e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -5.953391484292584268e-04 3.999712512512702034e+01 1.504595355677841584e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -6.383626548402323436e-04 3.999865492740099882e+01 1.504828737190639643e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -6.844953505406714935e-04 3.999990491833290207e+01 1.505044551060463753e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -7.339619280032281786e-04 3.999998824873846814e+01 1.505059777264049770e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -7.870033176013621883e-04 3.999999720599615216e+01 1.505061502639564730e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -8.438778610773207045e-04 3.999999992423261119e+01 1.505062073695294655e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -9.048625698133920927e-04 3.999999998123077205e+01 1.505062086204041749e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -9.702544740349281025e-04 3.999999999957353225e+01 1.505062090609098069e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.040372069516519622e-03 3.999999999999438671e+01 1.505062090717198819e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.115556868837609249e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.196175064743037596e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.282619313710035413e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.375310648408646901e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.474700528370356161e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.581273038852837407e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.695547248610553741e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.818079738054885226e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -1.949467310117369801e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.090349897019489530e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.241413677106653124e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.403394416927094081e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.577081054833535879e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.763319543561788653e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -2.963016970501833151e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.177145975729444433e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.406749489316688879e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.652945810994693166e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -3.916934056909526958e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 -4.200000000000003210e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +201 14 +0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.200000000000002986e-06 6.641140981505948560e-01 3.630359440264661799e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.318165071485715414e-06 6.827985921104507971e-01 3.837509590908312938e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.439654662999822548e-06 7.020087597129041068e-01 4.056479791879753598e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.564562308386786961e-06 7.217593898384206952e-01 4.287944492540062035e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.692984173022870607e-06 7.420656873629547512e-01 4.532616625247044189e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.825019127853004529e-06 7.629432848520516508e-01 4.791249800990936422e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.960768825510785322e-06 7.844082545828384712e-01 5.064640630279787997e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.100337778579980117e-06 8.064771209029573118e-01 5.353631176415636492e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.243833440057880147e-06 8.291668729357855039e-01 5.659111548710616638e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.391366286082559086e-06 8.524949776415362557e-01 5.982022643670369046e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.543049900987465374e-06 8.764793932440189117e-01 6.323359042397989872e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.699001064749144347e-06 9.011385830331950508e-01 6.684172073396136930e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.859339842895164177e-06 9.264915295538235229e-01 7.065573050060769726e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.024189678941542233e-06 9.525577491908122640e-01 7.468736692766958887e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.193677489430986948e-06 9.793573071621652915e-01 7.894904746115991316e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.367933761644810201e-06 1.006910832930623334e+00 8.345389802462905356e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.547092654064105355e-06 1.035239536045465369e+00 8.821579343477191262e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.731292099657229450e-06 1.064365222426126634e+00 9.324940012194569560e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.920673912073230969e-06 1.094310311099622179e+00 9.857022128614357018e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.115383894823110946e-06 1.125097851404053495e+00 1.041946446271921193e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.315571953532648309e-06 1.156751540670705847e+00 1.101399927983429225e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.521392211353629810e-06 1.189295742397619149e+00 1.164245767343850439e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.733003127621886853e-06 1.222755504927688142e+00 1.230677520194207813e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.950567619853951632e-06 1.257156580644760657e+00 1.300899784673317154e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.174253189175879772e-06 1.292525445701377507e+00 1.375128830985449815e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.404232049280946896e-06 1.328889320292130583e+00 1.453593267040625546e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.640681259015676899e-06 1.366276189486925974e+00 1.536534742044412994e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.883782858695878055e-06 1.404714824638605197e+00 1.624208690180607522e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.133724010258101911e-06 1.444234805379770448e+00 1.716885116634951560e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.390697141354052406e-06 1.484866542223799213e+00 1.814849428385638267e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.654900093498998019e-06 1.526641299785354899e+00 1.918403312316203447e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.926536274388475392e-06 1.569591220635915718e+00 2.027865663301020832e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.020581481450005303e-05 1.613749349809992184e+00 2.143573565185654100e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.049295072810132478e-05 1.659149659978003122e+00 2.265883327563921357e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.078816507878757129e-05 1.705827077301811157e+00 2.395171581531488250e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.109168514967676096e-05 1.753817507989186231e+00 2.531836437787599882e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.140374461839310930e-05 1.803157865563519202e+00 2.676298710585402066e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.172458373697340076e-05 1.853886098865121124e+00 2.829003211353998395e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.205444951683523017e-05 1.906041220800564817e+00 2.990420115781148376e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.239359591894900304e-05 1.959663337856384580e+00 3.161046408565818802e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.274228404936034477e-05 2.014793680393484276e+00 3.341407410223696281e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.310078236021368562e-05 2.071474633738419779e+00 3.532058390670760636e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.346936685643116285e-05 2.129749770087332017e+00 3.733586274333633610e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.384832130820674956e-05 2.189663881238256948e+00 3.946611441936342815e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.423793746947853206e-05 2.251263012166932143e+00 4.171789634444133155e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.463851530254758682e-05 2.314594495460800871e+00 4.409813965032258806e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.505036320901671766e-05 2.379706986625142129e+00 4.661417044870037212e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.547379826722609654e-05 2.446650500274548445e+00 4.927373229170195490e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.590914647636952606e-05 2.515476447222162903e+00 5.208500990376896183e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.635674300747830455e-05 2.586237672477534666e+00 5.505665425471517908e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.681693246146689620e-05 2.658988494163021965e+00 5.819780904676895772e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.729006913443827169e-05 2.733784743356989289e+00 6.151813869697590970e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.777651729045346397e-05 2.810683804870031555e+00 6.502785789730438889e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.827665144197570678e-05 2.889744658958470325e+00 6.873776283615871301e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.879085663820443774e-05 2.971027923977121787e+00 7.265926417729158147e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.931952876152138679e-05 3.054595899969943407e+00 7.680442189061353593e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.986307483227748609e-05 3.140512613194513136e+00 8.118598203336292122e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.042191332215418279e-05 3.228843861572436413e+00 8.581741559395575586e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.099647447634169373e-05 3.319657261053023056e+00 9.071295950397772253e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.158720064478093979e-05 3.413022292873750807e+00 9.588765993846895585e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.219454662272545131e-05 3.509010351694664465e+00 1.013574180289187601e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.281898000088438525e-05 3.607694794577965602e+00 1.071390381116330091e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.346098152541665734e-05 3.709150990777474455e+00 1.132502786548317221e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.412104546805380622e-05 3.813456372293564378e+00 1.197099059954489803e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.479968000663539967e-05 3.920690485141924420e+00 1.265377510424751982e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.549740761635131678e-05 4.030935041272128316e+00 1.337547690912366076e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.621476547199101323e-05 4.144273971063332063e+00 1.413831029169076601e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.695230586150982092e-05 4.260793476308345262e+00 1.494461493057301030e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.771059661123132550e-05 4.380582083586631370e+00 1.579686292048572676e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.849022152301177357e-05 4.503730697906415337e+00 1.669766616615278197e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.929178082370473995e-05 4.630332656482098130e+00 1.764978417464505402e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.011589162727072629e-05 4.760483782488917015e+00 1.865613226430372189e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.096318840988796672e-05 4.894282438616580855e+00 1.971979021091216453e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.183432349843088373e-05 5.031829580217039855e+00 2.084401135063986086e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.272996757269072883e-05 5.173228807811488750e+00 2.203223216078317812e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.365081018172700261e-05 5.318586418692702544e+00 2.328808233957968803e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.459756027474506992e-05 5.468011457318935342e+00 2.461539540595548796e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.557094674691089602e-05 5.621615764158268469e+00 2.601821984132147991e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.657171900052125679e-05 5.779514022596792699e+00 2.750083079485879205e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.760064752196220269e-05 5.941823803472473742e+00 2.906774237356019377e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.865852447490070577e-05 6.108665606742320620e+00 3.072372053845906330e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.974616431016478709e-05 6.280162899727260495e+00 3.247379662753565894e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.086440439278228180e-05 6.456442151310147537e+00 3.432328152493786377e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.201410564666193184e-05 6.637632861384040517e+00 3.627778049493299555e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.319615321741099214e-05 6.823867584762835392e+00 3.834320869752978842e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.441145715380230688e-05 7.015281948670325285e+00 4.052580740045620342e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.566095310841270931e-05 7.212014662817196609e+00 4.283216089951571645e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.694560305797492762e-05 7.414207520958059128e+00 4.526921415606471703e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.826639604399537742e-05 7.622005392689104930e+00 4.784429115624729174e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.962434893420880893e-05 7.835556204102479327e+00 5.056511399166850260e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.102050720545707365e-05 8.055010905753176687e+00 5.343982265520148996e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.245594574859292292e-05 8.280523426216349847e+00 5.647699553845274278e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.393176969602948969e-05 8.512250609317629113e+00 5.968567060889627685e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.544911527257289014e-05 8.750352132902929014e+00 6.307536723460631552e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.700915067019266739e-05 8.994990406776418368e+00 6.665610861264993983e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.861307694740303516e-05 9.246330447173846778e+00 7.043844474328245578e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.026212895394765848e-05 9.504539724851666094e+00 7.443347587583248881e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.195757628150109218e-05 9.769787983558238409e+00 7.865287633324146918e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.370072424111670756e-05 1.004224702531021052e+01 8.310891860027576003e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.549291486817460095e-05 1.032209045852201967e+01 8.781449753502210478e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.733552795560449186e-05 1.060949340463204749e+01 9.278315453418416181e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.922998211617657619e-05 1.090463215842215128e+01 9.802910144863485539e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.117773587467971332e-05 1.120768379675803494e+01 1.035672440074125213e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.318028879082796271e-05 1.151882572996325393e+01 1.094132044644973746e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.523918261375968765e-05 1.183823518948351428e+01 1.155833431316663842e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.735600246901733678e-05 1.216608864492684816e+01 1.220947784041121267e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.953237807892218393e-05 1.250256114293592802e+01 1.289654048211620434e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.176998501728516798e-05 1.284782555970061857e+01 1.362139086298290447e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.407054599941702450e-05 1.320205175824821708e+01 1.438597802388161262e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.643583220843226124e-05 1.356540564094142987e+01 1.519233228558319708e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.886766465886967829e-05 1.393804808691080765e+01 1.604256565001891538e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.136791559867529517e-05 1.432013376344410993e+01 1.693887164665755160e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.393850995063215774e-05 1.471180979964544377e+01 1.788352451892555317e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.658142679434113611e-05 1.511321431003769256e+01 1.887887763169763047e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.929870088989955525e-05 1.552447475518022202e+01 1.992736096553603814e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.020924242444457914e-04 1.594570612586339742e+01 2.103147754678042977e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.049647477227778387e-04 1.637700893708128902e+01 2.219379864474238517e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.079178827032878948e-04 1.681846701779361553e+01 2.341695754818867020e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.109541027804834194e-04 1.727014508253209613e+01 2.470364171317436866e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.140757455154093290e-04 1.773208607127057235e+01 2.605658305342279171e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.172852142353177313e-04 1.820430824472472864e+01 2.747854612311527792e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.205849798839685313e-04 1.868680202348598840e+01 2.897231392069997413e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.239775829239906214e-04 1.917952656123670607e+01 3.054067102181418428e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.274656352927619734e-04 1.968240604485971801e+01 3.218638373048855056e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.310518224133194688e-04 2.019532571772026586e+01 3.391217692168207315e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.347389052618440798e-04 2.071812762687834919e+01 3.572070723612606052e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.385297224933136856e-04 2.125060610072550205e+01 3.761453228239797220e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.424271926269610503e-04 2.179250297070407427e+01 3.959607550318628572e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.464343162932169333e-04 2.234350255953273745e+01 4.166758637532948922e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.505541785438716993e-04 2.290322646898586001e+01 4.383109563942969022e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.547899512272312176e-04 2.347122821302876616e+01 4.608836529887104516e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.591448954300968299e-04 2.404698775691503698e+01 4.844083319243606273e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.636223639884502336e-04 2.462990604034230202e+01 5.088955203622393120e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.682258040687733516e-04 2.521929958241464931e+01 5.343512295143462015e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.729587598219944867e-04 2.581439528860159527e+01 5.607762365272995347e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.778248751121008775e-04 2.641432560452457068e+01 5.881653167025466949e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.828278963215197348e-04 2.701812418815089600e+01 6.165064322324182583e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.879716752354284096e-04 2.762472230039809773e+01 6.457798865848096703e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.932601720072101817e-04 2.823294614325882890e+01 6.759574571506070384e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.986974582073468059e-04 2.884151540340694098e+01 7.070015227914633327e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.042877199580851917e-04 2.944904328623144352e+01 7.388642074639054091e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.100352611562995386e-04 3.005403834844389621e+01 7.714865660856315421e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.159445067870272245e-04 3.065490845434188927e+01 8.047978441305408959e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.220200063302262385e-04 3.124996718854791666e+01 8.387148479028351533e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.282664372633858723e-04 3.183744305315560297e+01 8.731414677770767130e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.346886086626758026e-04 3.241549175587164910e+01 9.079684015363082006e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.412914649054156556e-04 3.298221185393732213e+01 9.430731288309410210e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.480800894767115454e-04 3.353566395223492691e+01 9.783201901454978422e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.550597088831871480e-04 3.407389355950837029e+01 1.013561823841083454e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.622356966768318778e-04 3.459495758115729558e+01 1.048639012115039348e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.696135775920519679e-04 3.509695426930206708e+01 1.083382980314307187e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.771990317991194358e-04 3.557805626155405321e+01 1.117617183257158531e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.849978992772898368e-04 3.603654612297154358e+01 1.151159796465952567e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +2.930161843109514307e-04 3.647085356795587785e+01 1.183826709116155007e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.012600601122788959e-04 3.687959329234971761e+01 1.215434989127842091e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.097358735739357384e-04 3.726160210601791078e+01 1.245806759628910604e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.184501501554964508e-04 3.761597384361081708e+01 1.274773391218071150e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.274095989073444219e-04 3.794209036938991630e+01 1.302179877889652060e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.366211176359145741e-04 3.823964690745211215e+01 1.327889228853030845e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.460917982142596946e-04 3.850866994702172263e+01 1.351786677130907555e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.558289320420235662e-04 3.874952611653954193e+01 1.373783482676723955e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.658400156590311267e-04 3.896292070587391265e+01 1.393820096948040210e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.761327565168132744e-04 3.914988494792370233e+01 1.411868461288565968e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.867150789125102473e-04 3.931175173960665603e+01 1.427933236177375276e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +3.975951300897247903e-04 3.945012016089036422e+01 1.442051803990326286e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.087812865110165058e-04 3.956680989408089744e+01 1.454292953867874216e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.202821603068730401e-04 3.966380739273417078e+01 1.464754240531451202e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.321066059061189784e-04 3.974320632665484254e+01 1.473558103557497667e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.442637268528677958e-04 3.980714535878647808e+01 1.480846931289471757e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.567628828152677629e-04 3.985774661904319061e+01 1.486777343914639227e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.696136967914315923e-04 3.989705827386666925e+01 1.491514042202272683e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.828260625181056572e-04 3.992700432092595975e+01 1.495223611791614837e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +4.964101520877768061e-04 3.994934417410425453e+01 1.498068680086499793e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.103764237800833929e-04 3.996564379231465125e+01 1.500202790293580779e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.247356301135585764e-04 3.997725913097582406e+01 1.501766286786559157e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.394988261239087738e-04 3.998533166799013117e+01 1.502883405223520707e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.546773778751925644e-04 3.999079479908936463e+01 1.503660642052148666e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.702829712104620684e-04 3.999438912447312333e+01 1.504186356679954872e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +5.863276207485980168e-04 3.999668414681229223e+01 1.504531451761796645e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.028236791342660205e-04 3.999810371461150282e+01 1.504750896584230446e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.197838465481190920e-04 3.999895266988930587e+01 1.504885814438431668e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.372211804845597958e-04 3.999944254326970139e+01 1.504965849785514509e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.551491058046003766e-04 3.999971469585698003e+01 1.505011560888340483e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.735814250715537214e-04 3.999985993220520442e+01 1.505036638925645598e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +6.925323291775134157e-04 3.999993420321077053e+01 1.505049822833507134e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.120164082688056929e-04 3.999997050519222341e+01 1.505056447428673172e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.320486629788263604e-04 3.999998741855083040e+01 1.505059620358278061e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.526445159769016256e-04 3.999999490836898275e+01 1.505061064802281912e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.738198238420785670e-04 3.999999805130849495e+01 1.505061687907646439e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +7.955908892709746653e-04 3.999999929705505508e+01 1.505061941799195058e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.179744736290903324e-04 3.999999976186708750e+01 1.505062039182820111e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.409878098552505321e-04 3.999999992454191755e+01 1.505062074218919965e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.646486157290985460e-04 3.999999997774141036e+01 1.505062085997323496e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +8.889751075118735007e-04 3.999999999393345007e+01 1.505062089682520088e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.139860139709599009e-04 3.999999999850054166e+01 1.505062090751022041e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.397005907990121617e-04 3.999999999968910913e+01 1.505062091036865335e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.661386354387586440e-04 3.999999999997311306e+01 1.505062091107076949e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +9.933205023248856905e-04 4.000000000003511502e+01 1.505062091122836010e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.021267118554754303e-03 4.000000000004735767e+01 1.505062091126031787e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 +1.050000000000000586e-03 4.000000000004958878e+01 1.505062091126629920e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 From cdb4dd67cd8edd07e3207a1c021923acf216e178 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 17:38:23 -0600 Subject: [PATCH 079/172] doc - add CeedErrorType to doc --- doc/sphinx/source/api/Ceed.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx/source/api/Ceed.rst b/doc/sphinx/source/api/Ceed.rst index b678a0128b..0ec20d464d 100644 --- a/doc/sphinx/source/api/Ceed.rst +++ b/doc/sphinx/source/api/Ceed.rst @@ -31,3 +31,6 @@ Typedefs and Enumerations .. doxygenenum:: CeedMemType :project: libCEED + +.. doxygenenum:: CeedErrorType + :project: libCEED From 2f26800c8fb92ba6709b24c925da555bc17dd990 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 17:43:17 -0600 Subject: [PATCH 080/172] doc - drop unused code --- doc/sphinx/source/api/CeedBasis.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/sphinx/source/api/CeedBasis.rst b/doc/sphinx/source/api/CeedBasis.rst index 35e92281e6..6e4744d190 100644 --- a/doc/sphinx/source/api/CeedBasis.rst +++ b/doc/sphinx/source/api/CeedBasis.rst @@ -14,8 +14,6 @@ Discrete element bases and quadrature :content-only: :members: -.. _CeedBasis-Typedefs and Enumerations: - Typedefs and Enumerations -------------------------------------- From 56b865afbbee4a4b9c734bdc912bb521979930c2 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 18:24:00 -0600 Subject: [PATCH 081/172] doc - exclude jit-source dir from doc --- Doxyfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doxyfile b/Doxyfile index 8f0be2cdea..6e426b9ebc 100644 --- a/Doxyfile +++ b/Doxyfile @@ -875,10 +875,10 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = backends \ +INPUT = include \ + interface \ + backends \ gallery \ - include \ - interface # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -964,6 +964,7 @@ RECURSIVE = YES EXCLUDE = benchmarks \ include/ceed/khash.h \ include/ceedf.h \ + include/ceed/jit-source \ tests/junit-xml \ tests/output \ include/ceed-fortran-name.h \ From 520dae657d7a8d66e0018fcfbab10e2ef89ffd7a Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 7 Jun 2022 09:46:13 -0600 Subject: [PATCH 082/172] doc: Update blasius BC description --- examples/fluids/index.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/fluids/index.md b/examples/fluids/index.md index e8973fd6f5..f73b1adbfb 100644 --- a/examples/fluids/index.md +++ b/examples/fluids/index.md @@ -450,11 +450,10 @@ the velocity is prescribed by the Blasius soution profile, density is set constant, and temperature is allowed to float. Using `weakT: true`, density is allowed to float and temperature is set constant. At the outlet, a user-set pressure is used for pressure in the inviscid flux terms (all other inviscid -flux terms use interior solution values). The viscous traction is also set to -the analytic Blasius profile value at both the inflow and the outflow. The wall -is a no-slip, no-penetration, no-heat flux condition. The top of the domain is -treated as an outflow and is tilted at a downward angle to ensure that flow is -always exiting it. +flux terms use interior solution values). The wall is a no-slip, +no-penetration, no-heat flux condition. The top of the domain is treated as an +outflow and is tilted at a downward angle to ensure that flow is always exiting +it. ### Turbulent Boundary Layer From b5d317f8ea3d38f9d470e63b66856cea61458625 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 7 Jun 2022 12:00:56 -0600 Subject: [PATCH 083/172] examples/fluids: Move POutflow_Jac to State variables - Verified to be exactly the same as before, just using the State structs and methods --- examples/fluids/qfunctions/newtonian.h | 39 ++++++++++++++------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 902f9ecadd..9b765a2418 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -933,13 +933,15 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iP0; + ds.Y.pressure = 0.; - const CeedScalar drho = dq[0][i]; - const CeedScalar dmomentum[3] = {dq[1][i], dq[2][i], dq[3][i]}; - const CeedScalar dE = dq[4][i]; const CeedScalar wdetJb = (implicit ? -1. : 1.) * q_data_sur[0][i]; const CeedScalar norm[3] = {q_data_sur[1][i], @@ -947,18 +949,19 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, q_data_sur[3][i] }; - CeedScalar du[3]; - for (int j=0; j<3; j++) du[j] = (dmomentum[j] - u[j] * drho) / rho; - const CeedScalar u_normal = Dot3(norm, u); - const CeedScalar du_normal = Dot3(norm, du); - const CeedScalar dmomentum_normal = drho * u_normal + rho * du_normal; - const CeedScalar P = context->P0; - const CeedScalar dP = 0; - - v[0][i] = -wdetJb * dmomentum_normal; - for (int j=0; j<3; j++) - v[j+1][i] = -wdetJb * (dmomentum_normal * u[j] + rho * u_normal * du[j]); - v[4][i] = -wdetJb * (du_normal * (E + P) + u_normal * (dE + dP)); + StateConservative dF_inviscid[3]; + FluxInviscid_fwd(context, s, ds, dF_inviscid); + + CeedScalar dFlux[5] = {0.}; + for (int j=0; j<3; j++) { + dFlux[0] += dF_inviscid[j].density * norm[j]; + for (int k=0; k<3; k++) + dFlux[k+1] += dF_inviscid[j].momentum[k] * norm[j]; + dFlux[4] += dF_inviscid[j].E_total * norm[j]; + } + + for (int j=0; j<5; j++) + v[j][i] = -wdetJb * dFlux[j]; } // End Quadrature Point Loop return 0; } From 0ec2498e4647a31a3ca4d4cab343044544c0b30c Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 10 Jun 2022 09:48:10 -0600 Subject: [PATCH 084/172] examples/fluids: Add viscous flux to POutflow Jacobian --- examples/fluids/problems/newtonian.c | 2 +- examples/fluids/qfunctions/newtonian.h | 53 +++++++++++++++++++------- examples/fluids/src/setuplibceed.c | 8 ++++ 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 74e7e2f58a..dbfcb5f925 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -89,7 +89,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->dim = 3; problem->q_data_size_vol = 10; problem->q_data_size_sur = 10; - problem->jac_data_size_sur = 5; + problem->jac_data_size_sur = 11; problem->setup_vol.qfunction = Setup; problem->setup_vol.qfunction_loc = Setup_loc; problem->ics.qfunction = ICsNewtonianIG; diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 9b765a2418..764f4c1b22 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -910,6 +910,7 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, jac_data_sur[2][i] = s.Y.velocity[1]; jac_data_sur[3][i] = s.Y.velocity[2]; jac_data_sur[4][i] = s.U.E_total; + for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j]; } // End Quadrature Point Loop return 0; } @@ -921,8 +922,10 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], - (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* @@ -933,21 +936,43 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iP0; - ds.Y.pressure = 0.; - - + const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]}; const CeedScalar wdetJb = (implicit ? -1. : 1.) * q_data_sur[0][i]; const CeedScalar norm[3] = {q_data_sur[1][i], q_data_sur[2][i], q_data_sur[3][i] }; + const CeedScalar dXdx[2][3] = { + {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]}, + {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]} + }; + + CeedScalar U[5], kmstress[6], dU[5], dx_i[3] = {0.}; + for (int j=0; j<5; j++) U[j] = jac_data_sur[j][i]; + for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; + for (int j=0; j<3; j++) U[j+1] *= U[0]; + for (int j=0; j<5; j++) dU[j] = dq[j][i]; + State s = StateFromU(context, U, x_i); + State ds = StateFromU_fwd(context, s, dU, x_i, dx_i); + s.Y.pressure = context->P0; + ds.Y.pressure = 0.; + + State grad_ds[3]; + for (CeedInt j=0; j<3; j++) { + CeedScalar dx_i[3] = {0}, dUj[5]; + for (CeedInt k=0; k<5; k++) + dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] + + Grad_dq[1][k][i] * dXdx[1][j]; + dx_i[j] = 1.; + grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx_i); + } + + CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3]; + KMStrainRate(grad_ds, dstrain_rate); + NewtonianStress(context, dstrain_rate, dkmstress); + KMUnpack(dkmstress, dstress); + KMUnpack(kmstress, stress); + ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe); StateConservative dF_inviscid[3]; FluxInviscid_fwd(context, s, ds, dF_inviscid); @@ -956,8 +981,8 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, for (int j=0; j<3; j++) { dFlux[0] += dF_inviscid[j].density * norm[j]; for (int k=0; k<3; k++) - dFlux[k+1] += dF_inviscid[j].momentum[k] * norm[j]; - dFlux[4] += dF_inviscid[j].E_total * norm[j]; + dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j]; + dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j]; } for (int j=0; j<5; j++) diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 9caa5ea839..1028c98310 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -245,9 +245,13 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, &op_apply_outflow_jacobian); CeedOperatorSetField(op_apply_outflow_jacobian, "dq", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_outflow_jacobian, "Grad_dq", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_apply_outflow_jacobian, "surface qdata", elem_restr_qd_i_sur, CEED_BASIS_COLLOCATED, q_data_sur); + CeedOperatorSetField(op_apply_outflow_jacobian, "x", elem_restr_x_sur, + ceed_data->basis_x_sur, ceed_data->x_coord); CeedOperatorSetField(op_apply_outflow_jacobian, "surface jacobian data", elem_restr_jd_i_sur, CEED_BASIS_COLLOCATED, jac_data_sur); @@ -643,8 +647,12 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionContextDestroy(&problem->apply_outflow_jacobian.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "dq", num_comp_q, CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "Grad_dq", num_comp_q*dim_sur, + CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "surface qdata", q_data_size_sur, CEED_EVAL_NONE); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "x", num_comp_x, + CEED_EVAL_INTERP); CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "surface jacobian data", jac_data_size_sur, CEED_EVAL_NONE); From b55ac660a6883f680001b910762a338cb52482fa Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 7 Jun 2022 16:48:53 -0600 Subject: [PATCH 085/172] examples/fluids: Viscous flux for BoundaryIntegral jacobian --- examples/fluids/problems/newtonian.c | 2 + examples/fluids/qfunctions/blasius.h | 4 +- examples/fluids/qfunctions/newtonian.h | 84 +++++++++++++++++++++++++- examples/fluids/src/setuplibceed.c | 40 ++++++++++-- 4 files changed, 123 insertions(+), 7 deletions(-) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index dbfcb5f925..dca39afbb7 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -104,6 +104,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; problem->apply_inflow.qfunction = BoundaryIntegral; problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; + problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian; + problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_loc; problem->apply_outflow.qfunction = PressureOutflow; problem->apply_outflow.qfunction_loc = PressureOutflow_loc; problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 7fbcb573a4..b740f39108 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -265,8 +265,8 @@ CEED_QFUNCTION(Blasius_Inflow_Jacobian)(void *ctx, CeedInt Q, // *INDENT-OFF* // Inputs const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 764f4c1b22..19cb7d116b 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -756,7 +756,8 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0], + (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[1]; //*INDENT-ON* @@ -817,10 +818,91 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, // -- Total Energy Density v[4][i] = -wdetJb * Flux[4]; + + jac_data_sur[0][i] = s.U.density; + jac_data_sur[1][i] = s.Y.velocity[0]; + jac_data_sur[2][i] = s.Y.velocity[1]; + jac_data_sur[3][i] = s.Y.velocity[2]; + jac_data_sur[4][i] = s.U.E_total; + for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j]; } return 0; } +// Jacobian for "set nothing" boundary integral +CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // *INDENT-ON* + + const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + const bool implicit = context->is_implicit; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; inum_inflow; i++) { - CeedVector q_data_sur; + CeedVector q_data_sur, jac_data_sur; CeedOperator op_setup_sur, op_apply_inflow, op_apply_inflow_jacobian = NULL; - CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_qd_i_sur; + CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_qd_i_sur, + elem_restr_jd_i_sur; // ---- CEED Restriction ierr = GetRestrictionForDomain(ceed, dm, height, domain_label, bc->inflows[i], Q_sur, q_data_size_sur, &elem_restr_q_sur, &elem_restr_x_sur, &elem_restr_qd_i_sur); CHKERRQ(ierr); + if (jac_data_size_sur > 0) { + // State-dependent data will be passed from residual to Jacobian. This will be collocated. + ierr = GetRestrictionForDomain(ceed, dm, height, domain_label, bc->inflows[i], + Q_sur, jac_data_size_sur, NULL, NULL, + &elem_restr_jd_i_sur); + CHKERRQ(ierr); + CeedElemRestrictionCreateVector(elem_restr_jd_i_sur, &jac_data_sur, NULL); + } else { + elem_restr_jd_i_sur = NULL; + jac_data_sur = NULL; + } // ---- CEED Vector PetscInt loc_num_elem_sur; @@ -147,17 +159,26 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, ceed_data->basis_x_sur, ceed_data->x_coord); CeedOperatorSetField(op_apply_inflow, "v", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + if (elem_restr_jd_i_sur) + CeedOperatorSetField(op_apply_inflow, "surface jacobian data", + elem_restr_jd_i_sur, + CEED_BASIS_COLLOCATED, jac_data_sur); if (ceed_data->qf_apply_inflow_jacobian) { CeedOperatorCreate(ceed, ceed_data->qf_apply_inflow_jacobian, NULL, NULL, &op_apply_inflow_jacobian); CeedOperatorSetField(op_apply_inflow_jacobian, "dq", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_apply_inflow_jacobian, "Grad_dq", elem_restr_q_sur, + ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_apply_inflow_jacobian, "surface qdata", elem_restr_qd_i_sur, CEED_BASIS_COLLOCATED, q_data_sur); CeedOperatorSetField(op_apply_inflow_jacobian, "x", elem_restr_x_sur, ceed_data->basis_x_sur, ceed_data->x_coord); + CeedOperatorSetField(op_apply_inflow_jacobian, "surface jacobian data", + elem_restr_jd_i_sur, + CEED_BASIS_COLLOCATED, jac_data_sur); CeedOperatorSetField(op_apply_inflow_jacobian, "v", elem_restr_q_sur, ceed_data->basis_q_sur, CEED_VECTOR_ACTIVE); } @@ -173,9 +194,11 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc, // ----- Cleanup CeedVectorDestroy(&q_data_sur); + CeedVectorDestroy(&jac_data_sur); CeedElemRestrictionDestroy(&elem_restr_q_sur); CeedElemRestrictionDestroy(&elem_restr_x_sur); CeedElemRestrictionDestroy(&elem_restr_qd_i_sur); + CeedElemRestrictionDestroy(&elem_restr_jd_i_sur); CeedOperatorDestroy(&op_setup_sur); CeedOperatorDestroy(&op_apply_inflow); CeedOperatorDestroy(&op_apply_inflow_jacobian); @@ -598,6 +621,10 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CEED_EVAL_INTERP); CeedQFunctionAddOutput(ceed_data->qf_apply_inflow, "v", num_comp_q, CEED_EVAL_INTERP); + if (jac_data_size_sur) + CeedQFunctionAddOutput(ceed_data->qf_apply_inflow, "surface jacobian data", + jac_data_size_sur, + CEED_EVAL_NONE); } if (problem->apply_inflow_jacobian.qfunction) { CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow_jacobian.qfunction, @@ -608,10 +635,15 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionContextDestroy(&problem->apply_inflow_jacobian.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "dq", num_comp_q, CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "Grad_dq", + num_comp_q*dim_sur, CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "surface qdata", q_data_size_sur, CEED_EVAL_NONE); CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, "x", num_comp_x, CEED_EVAL_INTERP); + CeedQFunctionAddInput(ceed_data->qf_apply_inflow_jacobian, + "surface jacobian data", + jac_data_size_sur, CEED_EVAL_NONE); CeedQFunctionAddOutput(ceed_data->qf_apply_inflow_jacobian, "v", num_comp_q, CEED_EVAL_INTERP); } @@ -647,8 +679,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedQFunctionContextDestroy(&problem->apply_outflow_jacobian.qfunction_context); CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "dq", num_comp_q, CEED_EVAL_INTERP); - CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "Grad_dq", num_comp_q*dim_sur, - CEED_EVAL_GRAD); + CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "Grad_dq", + num_comp_q*dim_sur, CEED_EVAL_GRAD); CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "surface qdata", q_data_size_sur, CEED_EVAL_NONE); CeedQFunctionAddInput(ceed_data->qf_apply_outflow_jacobian, "x", num_comp_x, From c2e074770ba9ec94944278eb34fe594fd42c340d Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Fri, 10 Jun 2022 23:37:41 -0600 Subject: [PATCH 086/172] examples/fluids: free qfunctions for in/outflow Jacobian --- examples/fluids/navierstokes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 1df5119cb2..8591adf18d 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -315,7 +315,9 @@ int main(int argc, char **argv) { CeedQFunctionDestroy(&ceed_data->qf_ifunction_vol); CeedQFunctionDestroy(&ceed_data->qf_setup_sur); CeedQFunctionDestroy(&ceed_data->qf_apply_inflow); + CeedQFunctionDestroy(&ceed_data->qf_apply_inflow_jacobian); CeedQFunctionDestroy(&ceed_data->qf_apply_outflow); + CeedQFunctionDestroy(&ceed_data->qf_apply_outflow_jacobian); // -- Bases CeedBasisDestroy(&ceed_data->basis_q); From 855641ea0d2671e470ad7fa4c18bce9aaf0faaab Mon Sep 17 00:00:00 2001 From: James Wright Date: Sat, 11 Jun 2022 12:47:35 -0600 Subject: [PATCH 087/172] examples/fluids: Fix plate mesh generation --- examples/fluids/problems/blasius.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 84bc0584ed..03ca8edc5d 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -100,11 +100,11 @@ static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim, for (PetscInt i=0; i Date: Sat, 11 Jun 2022 12:45:31 -0600 Subject: [PATCH 088/172] examples/fluids: ICs from STG Profile --- examples/fluids/problems/blasius.c | 2 +- examples/fluids/problems/stg_shur14.c | 11 +++++---- examples/fluids/qfunctions/stg_shur14.h | 31 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 03ca8edc5d..16fd562483 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -153,7 +153,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { // ------------------------------------------------------ // SET UP Blasius // ------------------------------------------------------ - CeedQFunctionContextDestroy(&problem->ics.qfunction_context); problem->ics.qfunction = ICsBlasius; problem->ics.qfunction_loc = ICsBlasius_loc; problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; @@ -251,6 +250,7 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextSetDataDestroy(blasius_context, CEED_MEM_HOST, FreeContextPetsc); + CeedQFunctionContextDestroy(&problem->ics.qfunction_context); problem->ics.qfunction_context = blasius_context; CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_inflow_jacobian.qfunction_context); diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index ae94c914c0..481c0ecf4b 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -384,7 +384,6 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, &global_stg_ctx, ynodes); CHKERRQ(ierr); - CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context); CeedQFunctionContextCreate(user->ceed, &stg_context); CeedQFunctionContextSetData(stg_context, CEED_MEM_HOST, CEED_USE_POINTER, global_stg_ctx->total_bytes, global_stg_ctx); @@ -394,16 +393,20 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, offsetof(struct STGShur14Context_, time), 1, "Phyiscal time of the solution"); + CeedQFunctionContextDestroy(&problem->ics.qfunction_context); + problem->ics.qfunction = ICsSTG; + problem->ics.qfunction_loc = ICsSTG_loc; + problem->ics.qfunction_context = stg_context; + if (use_stgstrong) { // Use default boundary integral QF (BoundaryIntegral) in newtonian.h - problem->bc_from_ics = PETSC_FALSE; + problem->bc_from_ics = PETSC_FALSE; } else { problem->apply_inflow.qfunction = STGShur14_Inflow; problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; - problem->apply_inflow.qfunction_context = stg_context; + CeedQFunctionContextReferenceCopy(stg_context, &problem->apply_inflow.qfunction_context); problem->bc_from_ics = PETSC_TRUE; } - // global_stg_ctx = global_stg_ctx; PetscFunctionReturn(0); } diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 949c135de4..8f50c1b3c5 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -176,6 +176,37 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3], u[2] = ubar[2] + cij[4]*vp[0] + cij[5]*vp[1] + cij[2]*vp[2]; } +// Extrude the STGInflow profile through out the domain for an initial condition +CEED_QFUNCTION(ICsSTG)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + // Inputs + const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + + // Outputs + CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + + const STGShur14Context stg_ctx = (STGShur14Context) ctx; + CeedScalar u[3], cij[6], eps, lt; + const CeedScalar theta0 = stg_ctx->theta0; + const CeedScalar P0 = stg_ctx->P0; + const CeedScalar cv = stg_ctx->newtonian_ctx.cv; + const CeedScalar cp = stg_ctx->newtonian_ctx.cp; + const CeedScalar Rd = cp - cv; + const CeedScalar rho = P0 / (Rd * theta0); + + CeedPragmaSIMD + for(CeedInt i=0; i Date: Mon, 13 Jun 2022 12:23:22 -0600 Subject: [PATCH 089/172] FIXUP: Use BI inflow jacobian if not blasius inflow - Previously did not use the BoundaryIntegral_Jacobian for any cases --- examples/fluids/problems/blasius.c | 13 +++++++------ examples/fluids/problems/newtonian.c | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 16fd562483..ff98c2dc74 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -155,8 +155,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { // ------------------------------------------------------ problem->ics.qfunction = ICsBlasius; problem->ics.qfunction_loc = ICsBlasius_loc; - problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; - problem->apply_inflow_jacobian.qfunction_loc = Blasius_Inflow_Jacobian_loc; CeedScalar Uinf = 40; // m/s CeedScalar delta0 = 4.2e-4; // m @@ -252,16 +250,19 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextDestroy(&problem->ics.qfunction_context); problem->ics.qfunction_context = blasius_context; - CeedQFunctionContextReferenceCopy(blasius_context, - &problem->apply_inflow_jacobian.qfunction_context); if (use_stg) { ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0, mesh_ynodes, mesh_nynodes); CHKERRQ(ierr); } else { - problem->apply_inflow.qfunction = Blasius_Inflow; - problem->apply_inflow.qfunction_loc = Blasius_Inflow_loc; + CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context); + problem->apply_inflow.qfunction = Blasius_Inflow; + problem->apply_inflow.qfunction_loc = Blasius_Inflow_loc; + problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; + problem->apply_inflow_jacobian.qfunction_loc = Blasius_Inflow_Jacobian_loc; CeedQFunctionContextReferenceCopy(blasius_context, &problem->apply_inflow.qfunction_context); + CeedQFunctionContextReferenceCopy(blasius_context, + &problem->apply_inflow_jacobian.qfunction_context); } ierr = PetscFree(mesh_ynodes); CHKERRQ(ierr); PetscFunctionReturn(0); diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index dca39afbb7..16f2dffa88 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -298,6 +298,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { &problem->apply_vol_ijacobian.qfunction_context); CeedQFunctionContextReferenceCopy(newtonian_ig_context, &problem->apply_inflow.qfunction_context); + CeedQFunctionContextReferenceCopy(newtonian_ig_context, + &problem->apply_inflow_jacobian.qfunction_context); CeedQFunctionContextReferenceCopy(newtonian_ig_context, &problem->apply_outflow.qfunction_context); CeedQFunctionContextReferenceCopy(newtonian_ig_context, From 798d42b89352fce907da8e2c546a541691b19556 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 14 Jun 2022 09:31:28 -0600 Subject: [PATCH 090/172] examples/fluids: Fill node_locs if file not specified - Ensures that strong STG has correct mesh_ynode locations even if the mesh spacing is specified via the algorithmic method --- examples/fluids/problems/blasius.c | 32 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index ff98c2dc74..470c922de1 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -57,12 +57,13 @@ static PetscErrorCode GetYNodeLocs(const MPI_Comm comm, * outflow. It's angle is controlled by `top_angle` (in units of degrees). * * If `node_locs` is not NULL, then the nodes will be placed at `node_locs` - * locations. + * locations. If it is NULL, then the modified coordinate values will be set in + * the array, along with `num_node_locs`. */ static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim, PetscReal growth, PetscInt N, PetscReal refine_height, PetscReal top_angle, - PetscReal node_locs[], PetscInt num_node_locs) { + PetscReal *node_locs[], PetscInt *num_node_locs) { PetscInt ierr, narr, ncoords; PetscReal domain_min[3], domain_max[3], domain_size[3]; PetscScalar *arr_coords; @@ -90,13 +91,17 @@ static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim, // Get element size of the box mesh, for indexing each node const PetscReal dybox = domain_size[1]/faces[1]; - if (!node_locs) { + if (!*node_locs) { // Calculate the first element height PetscReal dy1 = refine_height*(growth-1)/(pow(growth, N)-1); // Calculate log of sizing outside BL PetscReal logdy = (log(domain_max[1]) - log(refine_height)) / (faces[1] - N); + *num_node_locs = faces[1] + 1; + PetscReal *temp_node_locs; + ierr = PetscMalloc1(*num_node_locs, &temp_node_locs); CHKERRQ(ierr); + for (PetscInt i=0; i faces[1] +1) { + faces[1]+1, *num_node_locs); + if (*num_node_locs > faces[1] +1) { ierr = PetscPrintf(comm, "WARNING: y_node_locs_path has more locations (%d) " "than the mesh has nodes (%d). This maybe unintended.", - num_node_locs, faces[1]+1); CHKERRQ(ierr); + *num_node_locs, faces[1]+1); CHKERRQ(ierr); } - PetscScalar max_y = node_locs[faces[1]]; + PetscScalar max_y = (*node_locs)[faces[1]]; for (PetscInt i=0; idim, mesh_growth, mesh_Ndelta, - mesh_refine_height, mesh_top_angle, mesh_ynodes, - mesh_nynodes); CHKERRQ(ierr); + mesh_refine_height, mesh_top_angle, &mesh_ynodes, + &mesh_nynodes); CHKERRQ(ierr); // Some properties depend on parameters from NewtonianIdealGas CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context, From cfcf148165cf334d1ff173bdd31c7d3fb841b8c3 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 14 Jun 2022 09:45:03 -0600 Subject: [PATCH 091/172] examples/fluids: Protect against divide-by-0 - When running with strong enforced stg, the spectrum calc can try and divide by 0 (distance to the wall) - This behavior doesn't break anything persay, but this commit makes it much easier to debug floating point exceptions. --- examples/fluids/qfunctions/stg_shur14.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 8f50c1b3c5..3dc0879a89 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -114,7 +114,7 @@ void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw, const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa]; const CeedScalar hmax = Max( Max(h[0], h[1]), h[2]); - const CeedScalar ke = 2*M_PI/Min(2*dw, 3*lt); + const CeedScalar ke = dw==0 ? 1e16 : 2*M_PI/Min(2*dw, 3*lt); const CeedScalar keta = 2*M_PI*pow(pow(nu,3.0)/eps, -0.25); const CeedScalar kcut = M_PI/ Min( Max(Max(h[1], h[2]), 0.3*hmax) + 0.1*dw, hmax ); From 2a1b733929180c41f75342d5831d0a2420d70e9f Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 20 Jun 2022 11:27:37 -0600 Subject: [PATCH 092/172] doc: Update release notes for previous additions --- doc/sphinx/source/releasenotes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index a11cd441a6..31cf3401ba 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -23,6 +23,8 @@ On this page we provide a summary of the main API changes, new features and exam ### Examples - Added various performance enhancements for {ref}`example-petsc-navier-stokes` +- Refactored {ref}`example-petsc-navier-stokes` to improve code reuse +- Added Shock Tube, Channel, and Flat Plate boundary layer problems to {ref}`example-petsc-navier-stokes` (v0-10-1)= From 4dbab5e57fc57a639a22b29ad3904bd484cab3ab Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 20 Jun 2022 16:15:07 -0600 Subject: [PATCH 093/172] examples/fluids: Add STGInflow Jacobian QFunction --- examples/fluids/problems/blasius.c | 1 - examples/fluids/problems/stg_shur14.c | 11 +++- examples/fluids/qfunctions/stg_shur14.h | 84 ++++++++++++++++++++++--- 3 files changed, 85 insertions(+), 11 deletions(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 470c922de1..d8d7292caa 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -262,7 +262,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0, mesh_ynodes, mesh_nynodes); CHKERRQ(ierr); } else { - CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context); problem->apply_inflow.qfunction = Blasius_Inflow; problem->apply_inflow.qfunction_loc = Blasius_Inflow_loc; problem->apply_inflow_jacobian.qfunction = Blasius_Inflow_Jacobian; diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 481c0ecf4b..f63f08a8cd 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -402,9 +402,14 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, // Use default boundary integral QF (BoundaryIntegral) in newtonian.h problem->bc_from_ics = PETSC_FALSE; } else { - problem->apply_inflow.qfunction = STGShur14_Inflow; - problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; - CeedQFunctionContextReferenceCopy(stg_context, &problem->apply_inflow.qfunction_context); + problem->apply_inflow.qfunction = STGShur14_Inflow; + problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; + problem->apply_inflow_jacobian.qfunction = STGShur14_Inflow_Jacobian; + problem->apply_inflow_jacobian.qfunction_loc = STGShur14_Inflow_Jacobian_loc; + CeedQFunctionContextReferenceCopy(stg_context, + &problem->apply_inflow.qfunction_context); + CeedQFunctionContextReferenceCopy(stg_context, + &problem->apply_inflow_jacobian.qfunction_context); problem->bc_from_ics = PETSC_TRUE; } diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 3dc0879a89..80bcb803ff 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -222,7 +222,8 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2], (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[3]; - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; + CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0], + (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[1]; //*INDENT-ON* @@ -263,9 +264,7 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, for (CeedInt j=0; j<3; j++) u[j] = ubar[j]; } - const CeedScalar E_kinetic = .5 * rho * (u[0]*u[0] + - u[1]*u[1] + - u[2]*u[2]); + const CeedScalar E_kinetic = .5 * rho * Dot3(u, u); CeedScalar E_internal, P; if (prescribe_T) { // Temperature is being set weakly (theta0) and for constant cv this sets E_internal @@ -287,9 +286,8 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, const CeedScalar E = E_internal + E_kinetic; // Velocity normal to the boundary - const CeedScalar u_normal = norm[0]*u[0] + - norm[1]*u[1] + - norm[2]*u[2]; + const CeedScalar u_normal = Dot3(norm, u); + // The Physics // Zero v so all future terms can safely sum into it for (CeedInt j=0; j<5; j++) v[j][i] = 0.; @@ -305,8 +303,80 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, // -- Total Energy Density v[4][i] -= wdetJb * u_normal * (E + P); + + jac_data_sur[0][i] = rho; + jac_data_sur[1][i] = u[0]; + jac_data_sur[2][i] = u[1]; + jac_data_sur[3][i] = u[2]; + jac_data_sur[4][i] = E; + for (int j=0; j<6; j++) jac_data_sur[5+j][i] = 0.; } return 0; } +CEED_QFUNCTION(STGShur14_Inflow_Jacobian)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // *INDENT-ON* + const STGShur14Context stg_ctx = (STGShur14Context)ctx; + const bool implicit = stg_ctx->is_implicit; + const CeedScalar cv = stg_ctx->newtonian_ctx.cv; + const CeedScalar cp = stg_ctx->newtonian_ctx.cp; + const CeedScalar Rd = cp - cv; + const CeedScalar gamma = cp/cv; + + const CeedScalar theta0 = stg_ctx->theta0; + const bool prescribe_T = stg_ctx->prescribe_T; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; i Date: Tue, 7 Jun 2022 09:15:54 -0600 Subject: [PATCH 094/172] test: Update data for fluids blasius tests --- examples/fluids/navierstokes.c | 6 +++--- .../fluids-navierstokes-blasius_STG.bin | Bin 7816 -> 7816 bytes ...uids-navierstokes-blasius_STG_strongBC.bin | Bin 6360 -> 6360 bytes .../fluids-navierstokes-blasius_STG_weakT.bin | Bin 7816 -> 7816 bytes 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 8591adf18d..7a67acf59e 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -22,6 +22,9 @@ // ./navierstokes -ceed /cpu/self -problem density_current -degree 1 // ./navierstokes -ceed /gpu/cuda -problem advection -degree 1 // +//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 2E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin +//TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT +//TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true //TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin -snes_fd_color //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin //TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin -snes_fd_color @@ -35,9 +38,6 @@ //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin -//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 2E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin -snes_fd_color -//TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT -snes_fd_color -//TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true /// @file /// Navier-Stokes example using PETSc diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin index e46c092fc4ab7c3d07b22d4c97f81d6eed4d3d3d..dbdc3bfe0e5535846670a0c2e2c5c2d565e206d6 100644 GIT binary patch literal 7816 zcmW-lcRbbKAIFuIb(8Fo?99lX=UnUFQg%Z~Q6!^CloSz3m&i;RDV0%FGD089$`(mB z8D*5v`eyvz_xI<0ydKZ@>zs4Xd7tz8Fz{ToWME+IL`$@tL{6P3nbb_n-D8{%zrFc4 z(9+1HR#Tx0=}5E`t60+@oKGgT-P@*gpd9pfp)!GgLGNcBX#WGh)h>K4HALgw+seXN zH3{eJgSWRaZiRYrJ2ei3gYK337;&SqikpsmnQYOHz0&*fyg9x8L`CUsdi}4XXY*5IkXo7^ewX#eYM(O_Zot`bAYxOoi z5JyWLG#{NUbA*S}lkncV*5H5Z<$vdSK|iI84!EGD0A_a~=dWmkxt=%^M?LV=z~?Fd z=>Fp$2JWC`w$2qU@>2pIbt9Racm@6s)sMI+)BVMz7Gu%ky$k*3>8eP>Wtn#V#TDR( z?WHQ5Kqo6!&GBFchrh8C&k%7^e$b1j8OGOqcQe1%5a=FB&NEh+ft&rg+!tAb{CShm zvr?W_s70pj0nM#J_I^|EIN!EMmYnWze=eqP2i$c`Iqo z$P7yi=SZyrKY5QYvl?`pwinIPcoX;VG4gL`!dbh$|6kEE_!st!ee?m{ExrBNCd~SP zgR^<}Gs5xi{#vN_%EnC$Z0Y#@eEh9sFObm#`E8-`F?j z(T!R3+oVJ^Zz8(@=buYQf`DHf%8*e3-ShYCgO8Z|L9Y4+A6;Y-qc)(B7zzGOpM~D) zfqq15LR}1VAB^<6b=#ltK0AmUI2eIH|5dLL);Ct^?_ZGZcEYqXggw=|N*?em% z_6{3;cf;8PjY*KY083Sw75HCbBoq=@)UnAxl&I?FsPDVOSf5eG#jirWm`5`JN6i z=(hF|RtXt=dY`n&q;`gOxe@yQsekk77cb^TAwg2Jb;#3TD89NN82pt!@bF}U{^HJ@ z%p)v%zvsp@xei4Q{&J+EJ7iLeUIXVY`ubVh>D<%BqTJKbyh=}q2+QU(U1a!wTE}WG zZ3I46MZ5HeC>9x$NWEQmgVsonYN z;D3DY;XQ$%Pm~Bgkie2M-2tm1cTr-rU`vpMHu&c_w#+PozT1q~Kp9KCHTQ_S!9vhb zsb4_75Afu~z^2o5|D@gA8CW8Lru*X3Bw;+*nUym69{lg#74cc6`zuW;p1|8<>q->1 zen7!SUSmmzVu3H;>@W2kcv1Jo;W8|nIX_xEvJGWqsuozqc*6X2XU`)9yvbYVq%|xn zHkqyP-iJuMz@C7Do&(>ml>YfE-T$!i%P}m|imq>V)Fvc;Hl*HPgYk7p`$SwDr~4z~ z`3Wq2U+<0MO#)r1H~3f}aRbJue)WNPG4O}#sf%J*u~U0FH0}k;<(L{M8M+Adw<>jS zWdfdTs+8x16%}nxx-j1767ljDcF!3E0hQ{|6piGs(ZQF{;lo> z|EbI8qPc*t*Kzvr1j`@4-^2OM6kU^$3gSAk8TiR~f4vppQWJ0=$A1!!{GFsuJH) zoV<)|7ID5?#x*jj^S4vQ|M!2Rp=I$pR++e+t~mP&#a`-VU+8xSek`V&@*4Qw;1qXD ztYOPvS@*CIy6o!_ohO6;-q?TZ1z5eiTY9r&DUn;b)T(XSO(u02hVOe! z?>{o{47@q9x;Is4qQwk3Ds0gYA!m?D@6WIf6BWQW$8je;!@JT|TK>`^(JjgftuhPN zw+r3OTgpdEaa#?){*A^u%4#AkYyGIan>NekdJK5UosSLpfj^?D@iG-_CwDSjJtjot z4>btCN~|H1x~)&u@K%Dq_)r3t!`cGwno=SD$g5*?TcuY!nbhM$Q&!&wd|4 zk=@|kzk|SkFj=@y!Fnpfk-jg6&>f$PPP-=Jq5iFtQ&iYrahHU6zmYKx-XP{eI*97h ztHYm@VSasC%YJI_fY0Eox_JU)exY*KbyK25>14iK?R7HglV_LXxE1*Ge64i5hY>B2 z!-Vq!x}3Ds!sJm+CiUA_DW=TW2Du>=1$|J=V**m%E`T8{4)qP%*Kz`14Uk3&us zty+TMPde=;=8BD(6CdblTA;9u>Kvn9UNUJ|+f)4i_s4koUG=M&e5J8vQgIaBUzlq9 zy)Xv#gBklO{K3EV`?np**z{xS=lau6P^W%QhR%ztWYU;OnzAel_|Jc4p;lp2pC^wi zx~zz*kYK+#=?|bsMX_O6zgQj<`^r3Qx7BM!D2;rN3Hio+IZnqlbhyFb=E7=t_ z3iTOn-ZTbabJ_49)$C!Sruk-w`}HI;X>yMvsrU`(d?6K*m$2E-#F?B_4HSNHv$$Y| zH<>iGyYQutGx*P_DeH`2v!sg~A5DKl1uD(ik(!sF{sV`B7mDD&#wy;>hbgH}I$Hb3 zP;X=n8|Nu|GHE)YGRTC!zPuS9Q&TX-Zo7j}OFr>NUtPr@6Yke99Mg`;hM;dhQbBIS z6obx|-4`5CMEH&~h50c5uQ+q*HoU)Rvm<4FAF=t$hQnn8-%!z}Rbuo-4AdWd5PM<( zE!|{x6wqzJRKet-tVT1`_xs?@{+i!Ve@3m)Wf1FaTS~`LpJS?= zX+?2jJ5l#%W=EBK9Ml)J%}k<#E>R}Bn1ZPq%Dk&bbqNbjlU5D80y1ef`N#!9`2T2& zN_`Q0m@4bAsYpf%J?~sG&=QCHbLQEu2jSAN{{Oz^m60*kWV&lx{8uzEuvOIh(|+(z z>i>6%e!pz8jOeh0`m^WR#J3WS9G?_>wygkfe(p1mE9gSpuQy5npW^eDKZdx$?4Q== z#1H;us#6K{{;@>mUbBb!m7Wn**n&znFGtn%?E-$T;Dv4#egAz@Nu*+`*~PK?fC4nM z`v58R9n9}*CsXxPH~RcL$Dso?PNjwLuqc)*2rKejv|N zQISZrY7aQr_8R=lKTN=Td5QLG|H$%fOf`0G5jf+I%7nTMBDX#Oe&v#+s0R4=S=-Kr zU@CR6&G$ni8qQwg5^{S3I{%e)b~dQL`Dx|8MNGxHGX<}?h?e&4-fr^HpJx*P)20F+ zD{xg+avf7CQe_Snmx=oV;l2+{aBlEkUJF|5dZ24GsKPRNgYT0DsF-OMnqo zzU2}~9A5*SUq)sn0Q?hVw{CaB)WbYBlF_DU#Kp@#djanEnP&Otw@yNR{*8tyBuuq& z57X*sBs$$DAIS^b0nh6%pDa%I54CH4gsJ`9LQX5h5j0+w&C&8<0cMuU^7-?S#{eRXtTx{nI=szmIg#E_U$Tt%%`rNIxKDCwTU^h42ym}XO;#2lc=-;vO170k%nCfV} zJ@j!g@pLXp+RQi|^bcQ;{QX4d4J)tJVCsRI9!urji1>S3@w7Yizgg$I!m&3%KUwl> z(G^qgeD5z56h*@Yd(*6N6`3^0N|hASqVM122rkfrgZ&wEt%$d~$#Z+ApnuKI#BO%> zqWe?J*%79m(B|nhx=mz7TykKShx0SrmHYNb80Z`lSBO+hJ?5qJZS*&)aY#I>6ZaML ztM?;l^!>R;A$ZF$HRaD^pUxmO{#yQrSP<-wIllT2y!8IhAX#v&8&d<>(j+A#iFmzz zT!Eo*f6P|fHyzWV*H3t?bO2Mm6B0>n>O|Vzy1Ya0BS6oIva4bSowf0}|2|m1eonWL z^QhWXk#9lS5A=sawNvo^qp8i0OnrpBkrHUxxCeembf4as1MlC!Pp+T6fmd65yk#EV zqeY%BV~wD!^s5zqt3R?iY=FkzOAC7rpCH`YR`)$O7KVDM|84ooNUvx2 z*7pD!Yhd<%U2zC$SuY$4l}!OY=s`-=MyS7w<#iS_8mIXcUih6!s6VNSs}qI&*H#>Q z#*JRTU0~C9Z8WA!-F&$w7l~ilwNbs13HZq^GbQkTr0J|HJ}pN}N&Brt>g$O!pZKo5 zy;lhScTW~yZU+6z^zNN-4~Ly}6~7>eH1(%-{-VC~pKQ9nbWPD?v@9(fqR(-G zF!4%sJxGK6&{jg@3@ih_t(|$j3oRCE>|%c?fOL4dm9{$d0ske0f8SZqP4K3~H_x3># zbI|R+8thZXtV5ILyn`Wxe>#`dlfZr8Un=Xx3imhdqIsKd9cC@spFZJtg|O4k5Kp@L z6!=@_-V$nb|HjTQEqD`C+49It1YwgezQVEt`eR#bMf*Ye`nO({e{lh`nA9fq4W2{$ z7x$k|`V9M}b(JS#fu8TYw}ied#N5#g0p42jgnOg*gIz}$z`tw0!L|qVbMHUclw)q^ zKdo6c1B6HVGC6N-0QiD#h3YKuS3ejXGKIM>s5zXgR3q$)@ZPXgfMXYkixHQve(x@$+T%@x94ujy|SAqV+q zbI#4yuSv1nh7AvqS3{9%axnbAZQ8=7<1wJW?2nM&f<+@8t#3YQLV;n) zvz=CuZ(2;`s{`6V_sAU`UBIFOe;U)xuM=11sg+X=@cpTc_r2qS4c-6sSwRUb^5gFH zbFVfKww`M1r~iZZOS`4F2Tu<0?;I3DCb962x$3ii6DZ(_>#b6`PT+mh%eJS0KCPT^ z{34bV*i2?~XhqSJ>nr!{&wziv=Ld8Z^rKe(YC2e=p{wP1z)>P*Qld4|9P(9Lz^iJ; zQ1I8(ll*WMOQdf}on>4jtfJe4V@hSfzhKZf{yXqZPi3_);O#fN8hu%^QAkK7@j2KH z_%|ny9qb_EH#MtP^6D4#tXaqr>HP+M%q$+@)`?Pr&$uA8$+31pc6P z3~I%SDnIjf4zm)svmG9=d&B+F-r}<7kP`43Yzl|tutL>Xzx3KxLNog1KJy>~{C`=n zs{{f6E_}^i63e^4G!dZbq39VlEIAG5w|#+G(gfb`v3I7QZ7syAqg6G;-&g2i?118{ zPI|uHyHPN5H}DS3f|<)$^@@4>ktlZ}JNs;@U^Oe${~{R3ItIL2G^a@sR#o3dpdxY{NW;Wcu=1$XD;>Pi(W~0N$_7*N6>kr^!B(`K>_Y zzjsYl?S=cV`znp+J=Bkt3*4+lU~Q3QQR2;ZJfxnHE+l~xua7rwz<~SFsB#Xsve5nojmSmaac@X#; z!b3KbSf3E{erPyEJY)IV=D%+l^0Qt+O&okbj@@ospS}<4t6Ue2b2y7U_Arz9&qfP=HZ zNa068gX3(OdC*_`-B)dz=<7FP&1cJijUqKwWiyA+y^j_jGVuh|uW@a2Rt5ipYG%0s zZ2USa-X^aEHGJprknHA#eA~5?Nn;K8UOuyYV{Clj>8^MuEuw;I9Go+?1@d!3afEp- z`17@8dUs%B*10mCs-Nh5@3+YzIdRC(du)Xc{0ID$E&2KkCWl`cn~66?x!eAz^!|YV ze=t(?aN}X{?@|pE@y4bf{6mFy*rV3IwD?~JbC8cE-Ce{Cp#BEq16K}W)6o3CQ-xTbv@eb*BCEboZxr5Vzjf%V{^HM`sw4vL~Y=TlmeS0%>BlIDT}f%Z~W*Xjx#CGd94n@`;bZL)94`hFFHwlYsZw1L-~BNf#_K_i(OF7 zLEy*5f021%{_AN6E7&pc=!2S9)fXzgGo<6@nCU+pe?3gL^Q%oTm zQZ+pr%RLEy=>_DDc`GiH~ss;PnmIT$RBdY zF2{=-pcm=+YGq6|z*o*!tHbx(xaB&pU-ghbL$7D>O`}ge<7(%s;r;Nn;L^$5dr)7P zax!ubQ?>c8obMbV-uas-UVE1h{P}`|67>IHJgGG%3HY!*XT{fv1dAuSR$6erW?Nfc!r9Xk$a+;@fbooVK1)={L--2-B)Jp~zgqE1YQG9P-<$BJO#1xU-P2%v4jGRmr)G#Q z$5ZL$=J5Sv)>{3p9{v3*tG8eC5T;tCnudy`5k)}(9{C#3zh*!1=^McOVi{@$793%G zF_f3-TTr#1&GBaEAkbB`%JLxJ#Hss)QT#FW%C{3sEjDQQ8=FT{{&mo!^wVQ{ffs(c zs9lb!?)5U_m!*h~4;%6>RMN+He^FvjJLswjMz)a84k)?)JH4v)rp0 z7topSK2L`HXLG)pb1WVa(LowaGWMWTSI@@6_wCqW@__|KOiiW4w{TdZkwuPycjMil z|1lVGh5ZxD!S{Zx6H^05T>4+A5-k_j-}#-V&;N(+^$CLR|JS!a2vePA#KKkMiGrq* zD79WE(5KS9N=HCv)2*@DS;_y z)c%k@zF`3QRWxN!4gLGiN)k&Ke1Eu9e)*F1GooTj*5w%e{+eSC5J-UgJyuzvVEP)Y z&#N1$3z0-5KQGjNUtC5tcDJ&S<4w@7bE=+%_jxRn znA7RUm>R`1)Oz(B8n@weGF=fK`MqRY zlOptwShgkx73iNn%KrCSHxSn)Rtj7j>E9pjr=Q*Y4D?O=GJ>l?U-1xrTaT*0Cr?VL MmVy3AQ#P3SfB(yc9{>OV literal 7816 zcmWldcRZF~7{(VT5 zqa~B~Q&hza;vCDbi(?&|z`yIy4d*_n&-~TuY&cr%HFdVE%O`}cGAftv{t5cDvIExq zq<_dd-x0K!-d;J0C5Xd&Bp#~nzYP9rn`*QsNdN1E~>KbaOMkD1iOG$K8C2}4)={3N{(;BQ#reA5W(&*W{f zpTi8*Jk-ogJ*3|sB<_u)K(BOD`R6I%zkZ83<>AdI6^#>EDCoTKzQMZ#;Z#Zsvu{YJ z1@M)Mg6eF{8TjO-2;*VGEmxy|F7Y4eo9>FEgW&J2Do(^>N=V7!7bbm#Ud{Tv$g&^! zpP`TV&xZ78|IuuS*#pkbParj9*4)G=p~we%T64#e7wCDG#{VA1+uAwbyZy>UL0d|8 z-}1-@e}zL;9Ic@L8Oi&n40BH(7-iPlNL-5I+9$R*6ZG?U18tv>{?XwYE0|mAVb3m) zlZ0B{JBp~+NAM5x&EvfZ`e{#oT^r2RbA_tw&x-7MRl>G#-3Ps66zf^?ylApb&i%qW zX-x-ui1+A@jG?vLlVR}p+hiG00{ZWtnpvVSKkE&h0Lz!ejop%Jb0MH_lPuTp7bN{F zv;9Kx_TM{vOKXRb@>xwDbHj1q6*F%XHAsJ!xvArrPfPz-M%6ELp5>msr?fiw-&-rR zf&Gti9r(7Kfp`B5lpJ*bh|&dDS5~?I7Pl z_bKg}2g;1#KO}DAVh{QUUGG(!u!x8F?_P=>$~p2{;F>boAH3x#>#spSQDt+xAMdjg z+F0GTOx(7$?+SkH3H4d*)(YVLit_SzXxNYUS~W}Wc+iPl-whlY7L^1ZDm|Wdfb?%T zdf*G*LkZ9`|G0jn-muNEqPtkstEZv3AEJQxdBbAJnqWRyvHGhF+-^r-g zeAt7ELJd-CzqvqtKSPBEO!}AZ_F2U;KTf@6m#QNY?aJDoyMKcEUDj1^jKSYc$1vj~ zmOff1YVcAJg;KJ__v&#|DV@8KOrS03=NvgB*|F3G31srP1x1#WaUg%-?S)(wQ_#Po zZ8Hr#eq%*Ru5Z^hde9rouhM07XF$JAYc!JV&!5w8Y_rD-2eYIOq%#sJ$(z3MB*6RK zrE488RSN#jZ@kjhu)G@kON|S1DApuk+oR%Q;2t*d+})s`pK~3L#&SYx_s157P@Eac zaqNwO_4g)zUK;}c#Q)YjXRs=>w%??IE+WzuQ{KCjQYqauI+ot+p#PC5u=pQVp40Rd z=eHq}k{LGgb6uiRdZ<~8Kj*>U!NFTH3oDgKPVQn-M@iatbTS{H|Mn<<(@0hX{c_lo z3VN*Q6}X{cVGN}v`fO*;Era!Wsu?zK1OGNfbB;o+VI#~p5*>qTtBTA$Zat?`KB#C5 z3BQE;l(L-zhwy=olB{vRxrn4%gY`Rc7l40yRDI|taEl>_glepob?=MA8&~u&o#pO6 zl~=&yU%5#pg8#ps%2WodDi?mfJ^cX6oD11T9Givp#m34AgrlVxk=F%ni5NS$^B8Yg zMUCcE9Sa_1Dy8?8Ter3&)ZZB~a!w5+uk1$e0GZqn}Vp6!CJl)*7u;NzOyPiiK#1%cUsH(h)AJSWcs3# zN*QA3@_X|GxKXQvq#ULy;vM>115x46eEf1)74%0hM7Q|^XZoyvS{GwG`mV1Z8PN-~ zTLPtrw!-=w-!6Un1^!Myi+x_>!;RUm6u&5;j?@jqhURcS#uV6%j{AV#-J;|wKRzsY zgLgpa4&h_JR(8#A4!AOBmyk4YgXY^zr|}^tI<4K#?x@&9Kq9c^J(V)*p^`on0Gv&C z###((C6`RJ(%GP5Lchh~K@P0HAw^1Y5d1If_?u^hb zTelwQL)EM|eZ|_}at7^%Cy}XLHTz4Jy;RB<{tXcu3V@q1M1SeV+SYx#F6>IEylb2M z(*6q22UVKyy$75^C%AVBAI^O{XP5F6Rg5q0xL^kJ$=D*Z<8mnYUu^YN+kth%1D;r*YcoUvki^|#eM7WPSrZ=?q3D| z_)Nt0v%pV=ebAD^y5;$qC4H%=+A#ysX@dU@z0^*#OyHZhN;i~aUH=J>lW)GF+M2QB z2iVE=n>-NO*$n=kd8_kQn1%wk74Bn1pVWU2Pffu4H66BPDYFXnWe+FQ|6!V7XU&=N z^Mt~+{+#;Da6ir%J~cW-uJ449XuB$=33S#E*~+8peQtIRy>S1`%r4iRf&LoB#pTs8 zk7=wt%chcosIFM&_4mw&P`_|h-YyvY9ZmhBk6;=tR;^Yz4E6SxU0MnK3;cDz=eix} z3$7j5osMa;F7M{*qllAHvK)}X#$Y}qm=aI^F_rUgt>zmu-7bQV=@&vuxKh4V8j zbE$Jm7&!ODzkhz1rV-+H%v&7QvzqE%r;+QIJ@hCE&Tq`gV!GZ0Ow+%npOkHf2E@-k z9`IKL|IseNbr#SU{?hI+#Wdx-{&C4)L~3ngi?lH8|4c;ulni5D&Q zLvK_@ZQ}(U!C!&yl}{6J9!tl{O_-*8rWaW#p}J~jcE(eUpf^m9eQ^%_b*(%27BS7h zcXGsn6%Fw`cJ~p5{yej3ATCZE^iN+YOjg44<-{-YWe`t`AD*DmEdo!xr_z`O+%U9g z>uyZLPfd(o-$g~H=k{?M!TmHV-mqG)4xG1ynUxRrS9vwux&0q zzKfCib1VN0_!Egw9#YV7>b0jjm43h_u9hiTf&R(j){kZId_GN^r+taS6#4PC)4af+ z$mdIv^RI5!kbDHD9TMsOs1$*o9pYjgSAh9j^Ee`oUUJ=P=NPrUqZSQpcDkbZJT7fN0D?$P|(7yP->rPD-7e~St`a(}-Z za8&R`HLA6e`%TqAZ<{P5d9++RFl3XhN zAHeT-(DWvE4N;YC{M6fA7I-1IknbvR<(GmCPEh~ky(zJ0L;$x!qxd%HpR@E=-9K9c z=RG$53Em&0JpPR_eyDm!uzC0g^7nHtC=ZhWe{ba=W+A9w{&KoI3ypY-d9`e32X4Nn zce4=aL%a6AvxoH^?rrIiB^o|7=sZsU3_L|dY*Y-mM7W_k{C>yTmR|5K5;?^xMgF{o zz^8v7Pax;Bt+uC)c`)sS`FRqUu=No>Zb{^ACHgF!8 zOeIRHUiNoH!~8nalBCN){yv;4y50epX4~ueYnT&NSXvd8sIvf%5Nqun11?(Gr3qZO zish~MEi~A%k#1+HK5#RSTfs2@MRBLO{O!jyd&^@l@75BZTvo#k4WxnJxb!<+0XRcy z@}v}|xiQpC?)pwt?{zlG*#!M#Ca3<2$U1PAzq$cxnC4#oFR`-(m9EU^=HDmx@3BOc zZ6VAD6(vW~V4iKRU5IRi`#6euB9fd4m5uk z1};RoX8`keBw^_?_!f-@dR_Xwd62M++w{pJIT7mByN&6lf}SacP39~bmHGR}$n6W- zr(ZI!P@F;5pSaiy`69}*S-8?1jfy-EcbcLS%AfB9@`|{FpIp~q5GMWFrECPyh)n&L z?~AEOVE(6psbntcKb#_U9C+Q?>EMfK>7c_WLWYZQ4t@XN8Xaa9jT>Z_4*8BeC2&6O-&R8Y!V7?&u zcjdnFCi??@EB7-ew6@l-`>4Q_I5g8BT_jl#`uo{c0_6ODfl;k#2U_z;bI_5#NgQcf zX28+#`?YCg-stA*-3X_#N=*OR4SETKU^cklB8P;-3x+WBjf6`Q z5AG4>=FhYZnV`QnkF~j+?Et>c#M5ksnd<79)CJ0klP99scmGfa{WA}1fmzaDn-s@XTbVjyi-M~>*22mbo(|(O}@o`@OPuwKi>lS*@_8v zG0eXayYmdJ6W4RJ9>%l4{m{1Wo8vAP(0j>NA8f@mzgS=Zfm15iH!GFkMrxiK>Ja{5jDT_tUPKq9J zaYNZ^#&1=JP6Kz!?~^nJ{iu~cn#cQ2>@8esQz34P@}?&AK)-3HKOvi*0(#H&9KLnP z&lY(@kz0|AjNwM90ho{4Z`f9L%9H*dO!M;a9*(k`DfhpjE6pB;`x7AlwCOwxN?Qc| zpmGeMjwR#02TdGV(et6K_OV##&uysnNv9|1hice$UV<_+D|j|I0atLOIBXQlHOX_X7Q_&Vrl)7CjfNdP3(T3QnzS zVGS08`U54o8=yZ$3+&Iy;>NPIksM|!SJA5;X7kk48mR9o&bR>capbVD?3HvZGcS8i zq}qo_l(`nZZ3pzPj^;IWPjWtU=?{7A3Hh1Urz^S>g#;(MXlc?z{uOZgtqSXloV)1x z%mzz&x&}%YC!r{T+TcB%W>CLSEycA9{Ovzwb%$X^5w{5mTSrtDOJ!V3fccLmuea%|VL8D8s+Qgj6c?L#kTrCH%+JL|8g<~Ga-NMp3aiq`a%EJ-qsliE ziB0p6-@7#!tLeEwzaVHFCXAJ5>HmeDa3YdC7*Zv*G2~D6ra)P8ez*JcZ|VnDDjwfk z?aho5XM9?cx5IqZqrf_uOnyIIPCwYN1o?S)nF!B6lsck#!uJ8(A6=eDMT_67sV)i_>;1^w4ExQpxNs@YqIrN-g;RTPli9h5Rf#Sjy9fviM}~x@1|B z`8nlBSsu)963R~^Lov1;)3C3wNAFrvyl)DEfA6CM(soi%e|u4e9b9Cdih(Mpz7p~I z`9;aTEs#HVjhNo;25xf2ZR9TGXSdZ`oO0;tt`CMUdf@wCZ?f*2d2)Z~O5TKxU=7ph z$Ak4oDA&03BGnW6<41GPn_)BHFGo1MKY_K<78IsGRiLI|ksCHAen9>$QhS!n26}X3 zZz)tfFn2LO#fZ$$0Xg|M!2cqP+{Zzz zWB-N2dvpWp;yDtKVP;CYVeTr z@iC21wo1s)cfV_Yv_|E5)gDVqFn@l&ns8c)>^+ z-0A_~|HJ~Q&|jlWUvIW`h5WpqZZdfXsyZ`0biYai_<+uZ=*_@6%3|f>AU~gED%?gz z)p=#5Mw1uG{2aa^W*Pi_`$FLRDUG{K{c^M*`XFhl%*DuGzIxC-J|{tdH$`6%7bq}eztIR$V*1m$FfFp;^F?9UY*y{B)@NL zRtmFDf&5&rIN_L&YP-BShEDa7`I#0+=BpU*lrU-y%y%*8Uw7#N5@Vd&`Myp^m73)|w-G6WnjpJwG;4bUq0{tQCNy^0UAWT!cEWIW! zMdUToPxLv&fj&mof9VDABb>^IA)jMo_fd&BRK)hVK!_Fk`>Y7VuVM21^5$5642S&8 z`#tD=2CCa>IWcG~1o~rfH?vB?Uq(0$_FqYia%kA8Z)H**=A#**?|Rb_pnrVH zOZz*fX?pB!*(^l7SX$t9cYg#tXk%ftDY-B~Xr z3i&XK&-J8W2-K$?C9+PVTFvUuXW;Mu1lXCc2tp&m7{Ie35m`HGfqMJa)E z#|9%WfInwS=ABW}UwO?v3G!_tt(3tO)u{iOe_FNz`cn;$hJi=F^Voj}&L8ckv`<1m z8gY|0Vt)qtU`F5Ytp@a`sD!15_l`n7uL!l@*-KPAH}m?t%>&Op`igS_xYB#KxB{pz za?yP{hPW2w5jt*e3;uM>L>l@2@=7OgZGik7r}wbd1Xc6b3v+a8g5K#*N6Ibm_xkF5 z{w%y7eL*5=&S=D8v-0=_1l%mKPm~J!kjor8Yp_3uA23HyhPmq(*SOYu0p}D}&NhSm+$zlm_i2S$ zPq9;u5Aa)k&t!{%ixiFd!TR<0+SX{up&{mQx;KHP9$+_!Hv~Z z7SYi~3WbHoNTR*j)J)(fqHhg;x*}su61hi$s(K|z!cJqy-ps9twzhh+r)?ee_rOZ+ zi^GZ3oUBA4-(fD22$12lP5%z(Ptb-)}>wR=BdlhiXqj>VRQ{TqNH#dDUN4~ySE_;U8l0?U`r)E3x$4oRQpVMjvp73l|&RsE!lQRNpy&qkP{# zHv9b8A2U>c=0Ys665sfK&A=mL(O@o;)2pu02L0A%>^>f5cITbeD$x$S=Ujy61c2CQ00B ze6mAw3DIXl8dvDQpqA{)1aG>gQ~f>VdYKZsB;h9Su%sjj9MMgIw=AfB&39Avq2H+t z_D88+JxwIh#kezS<^mj<&z2rj=zl!Gar0&28T8MSzalZ0#B$L&qJz_E~Ku-`>XH*z~V*Ai4w`fRUP(g)!9B@W!)37q7s z+9^C(;J>tF356&sw⪼ZQ8T{>3Qjj`bi?8#V>1A4$H zvtx3obX#TQbma(e-WW&>Ws^iwdz}vdG~}tIsn$zHDkxpH}NkMIAJNK z+=9SSeWau&frYkt3k00JOEJrPB=@>NT)L$`?9aADw3o_;(&Z?;)lm2?Z(Jnxl5@Uo zW3M>)de^cg%}){!2S;4){zmk9pEiZ)CMPc_)EJa_fRvlhH0{252ok&qabVR_L z808Cii|u8_e|H_JiU_{k1nL!VDocVgBY=~Tf6{(G-cftnQKov4dd+Py=GxK&oG;fe z$iX*F`PABY6GYow8|rJlh2^Shq=j^u)X#k-#=;Abhi*1$%=!dG-|f;-&XvZJ87_?4 z&n8rjd00~&*Kd;elxF|WGz&QNfk%H|V_DkwQ_Y?^imBYYLHx%s)VB#K+n^wl$Y8xB zU6Fz43+c-t%OY5Dy~uF?$W`j+&{fl=t$mO`ht8~O903m7S<69DEYr^4v%R>EsufPG zJ7oWuBzk6>*%UJo?QMSRi;@PGpSsrmQ591a1(tp5cN>sI-o?TE11u!*)Gon!+h;`E z9$cH&w}@4e+x@>EixGY2l1MOHAeJBE;e8eRk9wc*Dlpx( zmn8a1ew{7?KYAxgw^^UUif=D!v9A`Rs+~R8nT-UHME=DS$f%ekUU;UlYit3|ugZt# zwqay(&+tcqCAGlKzi_)A&hhNK-dTMG(cz)j7bL5(lCoLB<+QU@138~!y;_VUzU&IM zo&-O7f7hH@a=mo6Wcb`@w`qGfy0}&FF;|4u~ z0tB`6UC7g-8T9%ds`F;O030*oO;kKqjhub_IQ~DXrO4&9yeRnS8^?gbh6dnF$n8e) z7;Rvjwm14s)yi=s>@YJTiPsWQpTEtJL`m0RpQ|>a?FCpq^*3P+t{F%F?g!L@bnMEB zzI7xKsJ2yk%PgY9?yQL^(88$Xt?QqGt5j16t?9Q#mL!IBHe1)fK(vp*`kRT;n7n_= za{0_As>;)nX`%^!Eqx|NJ3^ggPG5Jo{ekGqX34iz1hHoNuJhLFRn+gx9Aj)xVxZ0{ zj<~3K1E=Qk*{%OD8D^U#{q+#_v2mHI&G|zTqhc8fS@%HC@L11)J61mvl`v)#NmVyg zDDG)Ag!9+xCFEw1L{;2=&Wb|d=mjQ9(O9eLtGC=~CN=N#>nYzgh;P`6A+{3e%VAP& z5!|8pP$j!IgZ~!Q`ty(QZ2DJ{78$~$I+1vrNc&KvS#ZJV~@T^F0FKb5~$gjiIOL^z++;pGM3I4hbwD#hwn zw4q;gGSwqdoc+awmn0^x3mQ&C-SVD0zFfnHHC=n>)C60pDxXO)d%7NUyr_~A@Fs~` zagS{k)4=JgW~Bea+L=pbM5ZeB^PuNqn;tjxjq4)pJ@0_Sh~a;pfYnnMZ=Kw?k?LIk zzS-#u=$V*MUrdI$`55nKRiLnDePN@-sddy__4VhEw}77NB5oFY$kUpsnWPp9e6X;8 zCczeKSI7@M*d|LY+ip+!JJA4~gpNd$m%tHns&D&;)t@;soU+rXFUNg^Fqau{-k=Hg zZs1@)!rBFE_6=M7_93co$DnIzX(({w2`Ro&;N*943UXl0 zaf9Z4)jBqwI`Z7gFzR+tH!cE#O)E=uf8~@6OjEhXpeMxghR#GGpe_y+)`U!Bd-5!^?VfBBa zZCaZusEJFXRkF{$&_AB*1VRD+LHVAk|kwof&tD4iTz-cgO)^@=f4&Cb`9+*?p zM&u>!pZ9@NR#DgS3eo;Y&UuBt#ahNH1zjl#)K^`u2hIX8r!+DzXl>8{U3pp0$b*P> zU-c{Xt|7!tzHyJnJaa>ChEmgGW9Ykk(j@Vv*P@UQj(w-e9s>`0sXBgt0rom_UIzSfw`WjTIoR#2Zf z%auSfRBN2Q3ZW19=M8*_yo%XXWD`O&KA|Vs^HuuQ`h{xX#8Ve z0rx^P75yPUOEDbgWje9QZ+ijgY|a18=Y(kLx!hHgr)b{l;IiOmF*H1}Vx@;<4arcb zE;zp)^1;86l{F&{uRSKV6k;5T0(LCF`}_S0Nxao=9_CmE&J@RmW+wXU+-<=zYJ^oss@ zwtI*=<4ubNw%=rPP`t{Sv~hwa2})3b(u8-{gDsb^gvBxCDvl2Kf z#iL1fl9*GHY);vNXy(4?uPJXa_nRTMLVt18KNz4{bT61>$lVXry&ecY6bKXJ`iZwn zbo*~Q`337ymjQLx@vF5`3;lMH9&%^%$Z;!C{^PXcMhmnUrfVIZ*nNWb9t3YYAKn9D2<3)Em6}C8tHEQzsSsp~8)?9Qt2{+h^M{sEZG*5~mNJ z!D6D5j;_Vm(7S8wuLrtk;eNzlYF(@*8LWk7--f`q)Tz>?pfy-L_~2C|DhGWpkLTJa zb`UtlT??L8hz>OV?|ip1mg`Wg6{0^Z(xbBEc0pp z)Yd<>KtQsM`h7R6Zic1 z-d*4`;rOcDPb7nbN4lz}9XQ*A+TfKfC2;Somv|DIxb{~+sW%!pfoXJDD< z8!H~K9$3LMLFHY(>Dv$s{ieFNz4q-X;M_PX{5cuRs(bM|+O(oiW)JT~spSJ_HnLSR zlw_=o!4aFA5ncLOu4A|w@7eVznsakBnmV{{f%^#!eseBMy8(F^B&Tmzv;ptm(9fAH zFF_Txrmo)R0ClJ~>{THj%;m}KgVh7}Sf2f9P~lu6>ep=u6_!3p67ySTS0WX~lDQ6&SSUaBo8<_}SglFvX=ot& zO^Uh4q!U)GEn$=Csz5UdU+%0*NrZY(u5iWUE~0~nHbgAdW7K1~OU6-@VxCC%R||mo z=Dn*?vC2{4tTyFUDZu+TPp=n$7=Z@QRZoPpLZ4ouZ7$tfNix>t=0u0WJMN9tp_6H! zuuA`wo2wZoqVFG;-*n^=$x!mq{H+CX3wb29JH-)`Qx_U?Hx*LU&mD6CK2Ud>#w6FI zfZoJs)pna_vD&HYIdKtsXej>bg`bzfCx5z!4*ZrU84N#0L}vh^i-%WA4oqTXrqvNH zCy!>cUadV@5A|AQ>YYz5%(#t)`fsT%GB7qf#%iw=JS&hM{he|2IlL;U!_mP=HnkvQRA5AUk352;)9Ax{%`2edu?i`5;B zo}S>bLZh>5uh(5hBx40FqT+s@WC)(gFW}`xbfLH7?iv-W3;W~vVw zixC#ewERjkR@K|D%g7@c>y9Ngy%&SH-Kq7MSHfCa&4k1I-Dp0X=gQn(F_NJ(ns?iO zA8^W=9;mWoy}cpRynLZl_1;7B3HH#(28<6R7sY`diL#zPU##6#&sUaXg~p}B)Wj{1 zkqqL$J=;S5k&N{$&rd${Ky(3buwuX)to`U+9=`_%npM_~h4dpC2HH`5y9|LdIQ+fx zAlAPw9U(;?qZ)tgI~7$4dcNOZ9k{O!^eo>iXxG5Hr;nX88c9QAQaiEX5S-61`7`x6 z7wEBBT^kt-dg8a1E?vMnrv*$i@+Q#2+|KwX`B0aP8cNPqHv;FEzA-7`?=91L0RpW4fiQC`vKe57k8f9^t;+~<=D58poJ(FYzSZFFuPm&BFF$H;? zgy{UZHM|m2SSKyz()y3#Xl9>pm;3&6B*XY9Yx%`hz{$^u=rF|wZjpOA(x<2oHL=6d zFJV3zQ9ZHgeHrMPa(kL_9qT1u94a|ZqG5|i4$qR);Qm)8qzYUCUc+rIHf5Bps@@OgNuHC=e43c58@f1rA2XK~JrXnt2gL`9Q3LP#~$?Z77#r6u|cnK(B zsI&JMiX%trvEIwh>A`F7(NLxZ)yii89In&68+MY6wOon5`ATpQQPc;b@CTeol5f1AJz7EM(@Ia zW8>uJPz-un2B#vtu|fKd=e7zaRP1#AEuW3Cz%e*qHv@I+eh97O1o!i>|I_bCcl0e; zX)b$o9CW-pdRRkUh*dXZff3rvCsOM*nakCG<0Uvz*oN)x^o%<3er9?Mjy`Tv-M%@q^kTLB&o4yJB z_FwWvm2At@fC2fbWfe9C{Y=7Y$3w7>%gVp+*pnG>z zO6*^(n|00fuWtqxPrcl*>RS%U5b?(*IVP~LSrdFz@GRC>c`D#FIF0^!2;Vf*j|4qF zEAO^ezAqqP=BWkqC%T_uzEUu|^vVt187f_~M|rWH zjMOpqQU+GqoT#txT?YEDy>Uad1?*LYZr4Jiu=24Q!5xg3{HUj>qaPH9l7gW1)UV&0q zIaM?ottt`IPSW;=6zO){g6~L2`NpYWwz-uT9`-L$((G@-?6Mz86u1hpGX4K|uce-k zM_rXqYF{`yQ1`Y3AM#x<1)QCmTUPT)qGXF--c6zZosY4%YnU4T(;gkNuaY`+>T{XK z3tp0@)}&$0J_sC@%@@_I+_q@X|cq>#$*MhDQ>YF zZCnJ7yW-nJ5bruWkDE#zz~K@u-dscd^7VXotaAsdkyW00#B12;;vZq@z~8@scWW4no{Zp@*^8?{Qv4+X2siib!2|j zT%}@%_HsvGG9!uNecOwtU4Wy|(b;zx6Q2GLPG+(px>vs8NX9wfa89LFN0P)Bp&@P; zsF%v|VVf6OE7n~7FN1oL!Xy+% z-G}_{8vZa3^_MBK`ZBKs^SaZ6BYNsk{mRI}9L2pP@o&Y|*1Vh~TD(-MO|^mZ^+*0c zF0^7WI_-WV618s>v7EH2CuzF{zX@N3^WN#YC@ijmciVOLM{nn)iodK*_dawaiBhSD zk{X4;VQyfxm&1Y?wa1N=DD<`PZ(Y}90NjVTyS+q%B-*wF1zlMKoZKstZv!@!5F?}Q zp-QJ;c_i^to8z9?C~(rEsA+X9#db&a zxm*g) zKGot5EEgq-9)oD<9ZiT)k$mG@>^zoUuf%HkTM&(WQZgEnT?27b-krK*5;5+&-Rdt$ z!D{#8UQWErryBdTqHk9YlSG9~%=x@mh><>V*!ar+(f5WAHS+*R#DL%YFy0x^TEk`Ug2r}O8Fn;`LmjY<#yx`b`icHmZ6bj8Tgjv@ zzT>5uvTT#a|89pqmkHdj9RnOcR*eV)tW_ zWy%l4cqR36=Rz)4oNS_7n53X_(NOjb0|lt-6Msspb^(X6K3O#dYo?8+KTe9En)l1t zu|8}fiK=Yj4}K32G z1Bd5_eai&a{I*U)NL~R=?>vTK}CZ%@qUB|kH>X!9gHdOP*+tTLAwIoqL=laPs3o+9E=AKdO z#ALHKzk;tKVz_XudKn{<#-5a3Dg@`5wh_@wfx0d&{g1`c4QugUS(oE<1Wl%>SX-}~ zA!(ZD)-&9d5aZqh?`O$BvA%`Jzwb3G)K`DIz+?HNvl85?1;KUals;|a6IHEv< z#|O=9I=!Y=Lye>nq6#KtCrQlU!Nbw-;XXsGdgIry&enq$9j@o08BLJ~N-TLKO>2Rk zpcMw3mL-3E8*I3LB;uk{6ZPq4bxHXW=-W5_%&L}r;85;%Q5fsp1jBd{{r0)TpM?K zWeqmUY{9iXcd5@YrdH-&Atdqr*Cos4=fJsWW3k$Y4Xi{Z9;7*-nGMtDk6QAPw9PZZ z8tspOBhqaBMhfa}UlPlqF|6k}^YHH0el$DkaICu8j-(m%x?7DM0*-m)W$TaFI7sfx zESDhlw)k?n!xQ-bkAsXXGtjrR1bVCnH#SWA#=>zR6)|GGc!&@0Nt(ci3yW@dAZ|=6 znYJ(&N{q7a%GY8&x7Xz>&*A%mdhH_s>8#LuE8o{>SQSNH|Fo?2FBgaDzG+v9BD5I(V5< zH#pdg^_rZUuDo7{{`S-?07mjYv?%#+e-?yoUZu0=nGJgebB#EiVB12Cy;6C-`#TrMkUi12k z-F+r#VrOIZ3GPjBAD8tlt)PE_LYf1!0oa7UFy`8&UsO5k)b;E_b>Q?oRy}tbF;btY zjWwxbqf^PH-5+ewA2#OVPKQy5TW^*0Hpt@?TbXd;C&VbT7g+1M8|yXLjh9O(pnow7 zJgJ{v1IMz~P2)Fk&PYyJZN?@FmQ!Qp>ga@YfK#2zJaAUdnRI~eq&i4GyidnQ2YX*g z$=9QilVghUJT4@Sf6-fXV;@OOhN!gik0C~})9h#8B&_#UQiJ)#T{Kmo7!x7%0P5{* zz@vy9;2hR>J~Dz$1iOO2s2)cJofg*R0X{Il^osl9#}OmtMXtM%4K`BisT(NrLIVjx zdnTUFlC&+-hu7@T1y00qCmzU8akE+I>El?hWlvnxxEy94@)LNxDUPJ+n8r;!wFQpf z(z<;ra38bDtf{l;E$5HF%r{`3cHoTx4Df#`TbpkA|H4K}&HqjG9Yw=MD~2l921y!U zq3@3i(IhdEdq9je8Zq*3rfL86!g`}~rR>(fG4sa2XjCZxzE&hi|G5}&nrSlKcG&pO z?gUQFRP_C+%oe8$prakb$~zZC5hJN4@?)$rHsZAPseLnuMs94M`&73;(l#wR=0Erh z`u1$uUQiY>o;Pdlyt*6fZ_q4o8&t%+#>K&>Lqfn;rDPnN9YTztV~QI^#;|ek?1t6E z$EY{K)ct%I__B_Ug4O(a#CX8L>oXIE4X4(qOtwHjO|Wo^|6n0$oQ0p#d~!%)WO%uy zbqnNC^2XmmcC3H&S7n!A2Nr*N_(V9D7SyE?`|XcE5hM7J zW{?op&#k_((&~s+-e>2j?~;c;{)J@~%@89B+c~htW5X*7w%((mh%sr5jFxCHzbx}t zgZ3jv*iv$8P31IUZZ*O5w7k_i8}_iBDKV%VIE=Hn3|3Eu(^ z32zp}NW7lK=L2=p$KTj9yowkeOAmry9VUqdoxb_Ji~3BymTsB<#pr_|7$9B3hmrnCIXB@HP*6P}P}jesl=F>x%<2O#b)X zkZT8%QOL!=bwy8?sNi-{hheDGcRiDbN9NTctepYrhhUd(J#O0(f8xPStir$cA?sF zk*xyeVpK%j+L+uM?ZAPv(g{rf zPOh_^74%&}&#Xz`HsH8_X;bLIESH|m%sCBF9Y!u1@62A4g!}KrRtDrXQH3t4JxFKe zI`$@S;0f{%w~v`TdkHwY2g9$y9_Y#Ed{P6U<$UD|A+c%d`RV7qY=8TgGsj3fe+CR1O{`ojX=L(Q7yAJ*{+kMDNeIxklm8*rD4glxV;ls9xSYoxc zf73I4YFKq`+LdQ7fRikCt#%J^lAiMCF(~Rxm1di|3cA0xOh}(f0ZwR@P(0{}r{j*F zUq$eia<5BVbpzD9pUw_)L$E(*CwI$ngpfp~T=x2+jfi1OV{6a9jb)y&=EokAq<&i( z&2LSBJ?`d6rE|9$aI}AB_}xPb%V#3`^R!W{*!ntBtpYgDkC!~F15RdcHAfy6Q|CA( zV6aRz)wS8)NQHXN?$>U5qd^jH^cAMotp$#!)Vx41mT!;=aE+{?#)hIEM_%%U{KQ%G z=yM`Q{5NvuY%wNYbF&`vgFX!yt$nv%7C1v2PX5o=dk)?|cy2S697ypjzTr%Ldpl}; zE~^Oo=R{!imQy4_d2UsC2Y%4@-+yNQFR_wV;b3D+6*Vc6YRYDPl_YMx5|e)lb2eUA z?EbPO7G(D$b%Xg)=7qIk#@$cA*PG9OWruq6+B=k8&4Fd>LL1n{1F1H#l3zW^{v`2q zZC=WITkzG47eibyA1{9TWieibRVTeha}s%}DY`Aw#pK7(r-#SRT!Olf%h(cTYK|rG zo}J(FPovBuJhRi!^GRZ8&GsETZUQH6x;x#*;J$qee_}E{S`aV zZTo~PM@PS7%^eqIYQz;OMuJ4Sc*}XvQPZG`4G@>u{c+6A4Omfgb5q%!Ae6P0&+Ewj zr=at6zUpf5pWcVFCMD~!>ie6o&OW_K^`<6CJgWp9$W;hD{a%YCKB?P#rUB;?am_EQ z36rPh&&6BFQPYPN8Lzibk%ZsK)7~!-ml)@q6@jZ*^;vJu=(gvm;LauACiV@mr>x^? zECRpmozbKrJBroQ)o=3{@>6~0G%gNH$X|Y38T;csB=J?{$?6>jVq7+5?uwtm+Tj)( zjQtm>nI~E`+cjNCBJj1H|GQ@}*OpxyCFpqHfj+b4=wS3Hs=>NY1NvvIyBYJ5z-gkL z_+x=JO4_UMR&`VTf|F4PcE`YbPNGY#~8=S8<6KI?Q)h-Rp87YkveaO@yNGu zflPa<@0eev@I81>D`hvmEC;&&^VU!KkMKJOR;R%L3#@+vpNu@OO3hxUasCVios5WO z&gx_Xj*`2j^eQIJ8~;qH44@R-hMLh4i0e$a%m!i5bMF~lCRt&u^*JH)@O4kBPozSb zTKwN0|D^851=urs3^mg~7z5|4oSc~yHi&Z;^IKh|rf8A@20ph)BD!)@g)R8csNZ@8 zIcKp}^Y?%U3%V$8T-2?ew}B-7Uf9xP2>bJGnN~wC2ds03=rD71r@9~2Q^sB32VSq8 z&aU4Fb2mHtWFq*n%O9keISa6%c0Zy|oTKInXFKsR(A!v{@OuRK@+c~>QuPql&Uq-& zd*~w-@hIK&nI7oh0tS+*gHA3~T?}6+ z1J3O}`%}VLPbj78#(_?%olW~=P$>dVnRKT-%>Tl3?)M5c zF_zAeo;6Mq2`U@y9HxQe!&!NO3G3`xskr;(3{~a3GyVDK2yi@QLmbr+!-w<3FDr!g zlO83MjQaGnVfW&oI&h9kiU!>Vj!xb^@vm4<%{Ys%^#b+% zVyW!Q%TV_wU@Ep>F!Up8`-FiDyN3}g9h;a@#Brz6zIy} zim>wm*hj8;`6r3L!G?)nU9T3CbT-zOL@VK!;O}DeN82oc!=`xZy*Jh=tZS_h2&IM@ z@5tk~zXONkWbZL+cyEiZQ?Yg(v@)#4p4A7OrdELgp-%mV&=n4i}T^!Co9x_WN{ z8|v=0Hn)C2-)f;GJ=-Tu67P0?I@i;O7!f>{`rGee-GMJ54_hBmO95@S>RQ2f{S}=% z5CeUDZ%^`=WCqqZDwQACu%Z4PST@Wq76kp=b&P8l)YGcg2EOkwKY|N+=YBI`!_E68 zd$i8dRhM)=;B1&rg9lyBBbyN8{yn`wtpKd=Uf3VU6hvpT`ge%FxEapr8)|C@@1-Fn g7GZ8(*g*D&J?)GXeVsvW)|jaW%)v~zfZw(M2M%`pegFUf diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin index 698d50c7bfe706f9a27df7eeb15926e96b4fe3cb..08debb98d5e32d1fd344236ebc7ce782664556d3 100644 GIT binary patch literal 7816 zcmWldcRZC}9LE(YsVg&zvR76{$~x!XjH}W>(Xy32Qe;H3Lny0k*}KR{JVLTZNq$1n z;v$6xlHd3Kd0(H``@GM6p7WgZ{T>Dm-{TAnjQwaaN<#N_qZ66b)e%3%IS9YyxgLG~ z2frNxWqtL~V)Su;J?jZFsh834u7wxmXHx~Q!9dusG3lr#zm{8J3A5Fq? z?@1li>es+`MZ}hxLoT=Cw7`xgHXr_=cF_Q7*!?-g==%Y3NBvP@PWt%nLLU{-r02M9 z*^v=KJ<>Anb|?WpCbJuHKj^-?dpWz%gp4Lv{+(VVdn&F!!_A!TU-;?A1G%`g-BUaC zd*Hoo#@uGYJw*n*|XJskW*#%RQF>M# zIdBW>%d>2PJa~hnV<6qXu_x_2-m*<9yTNud;gG;IUbg-O{DMWKdIWsx$;r4_%&b^^ z=~`f{NV%Yx;c>ZJs5Fg!W<1YZwI3@h~VJ044EE3$hoqXMT;SqUC!M( zj@jy)Sx-FAKvz7kea*PU3I0^6B_av@2H*C4al<=qDKb-*Ith=Ay59I_{oq%t{kZuh z@F_>~3nnosxQ?-<*B@OurPU>>Zw=$8|NHUiALQ)3`mXyhuXS^JY_<#$I`=o1=&pcV zuVGNrm+nugEKX@tadCPXiyttjIb$i1*C*vsan! zCc?MG-mEt;hWwtds*yY0&o6mE1@8_BsAMg=jQovNU#pd=L%tzkRFsYGulLGZ$NU@p zcU`cSByK4!uch>cga09q%?SPfC+q!`zl(SAZ+}@EavKFZ+r+lBo&|pS&zZl~Xfbk` z>%LJ07SWb=n*Bl$VYZ6D4BI2fq@E3T=iLtjFQX&EJcosRGBYmLg`mJ@b>Fv4@cg^Y zT~%zqga0Jkwzg6%ls$JoE9)r{=B6fYUu!@n^)bBS>8F3ci6$z2+*mNe`njVb1x2aU zj)kPe!1xB4IP?tor~g%xzha3JNoy6YpG0urz2nN~1tHInF1hOpy!1@*fIJo(KAQK9 z^)3qfLw-5n-2r}@{DS)e_zeyFSgvESG4i34MpZ=A+Na`1?VI3_5ciU&03R=<i|8(L(Q7V>h6<}R`c9@8J)ECiv^%CWkns#Fa%R2UXmMj#UJW{&0i4XYhuglL0z<+j1=xaY#Q29Ch zJmf49|K!Z7JO4g1Y2c!yLCJ66V|*elC9&LuRNjEBBuZO2CLSZM4dZ*()P0c$e}&l4 zejlt{LA(A*tsMCb4Se6*@*DUIX-ghYfEQ7IPpZO7bz1A$?*~xe=IV?^>8tSnjJXa- zPl4a;Ypb9MR=V;}ack8qk!{<>WF#GrE;|(se1qf8SBt>kb=kc{#acmE^zG05LS>^(jJw$$0Z*;i6anuqQb+2a z?Hf$#7#J|~xr(0dY}&0q3F~uwAA{dCeg74HXVhAbDJM}LGAkqE5_FFRNl21OqpsI) zYR7`#^ZTT;38pZ|bWBtm6InqbRV^Rh1D~1A%`gpoveSBr8YTzMEsnSyLXYZfu0Hnq z0{k|z%-46|*Rpfj!-2Km))_aXv!UkFDrZhBINv7SYFICF125)r*qRe-JH|cobbXXb^_0P zNwQ1>>q^)dY5q(=o!!;Z0EFIPuYhocDzS3>8Pk{aN zOQlCjpd9eoCh?W-SSO)?cISLIs#eS1lz+SpcqOx`n_Iwdf6lAG4C{rPPhU9GgZjAA zPCU93N+wP9tD77R0G`{LZ>}Bdor!9wKCVQRgofuIKk7&(eHD&~^m+>ZgjWfR_E=BN zyH-}$610eF71-2-_@KV{YQN1qR~h_~ zZst-wn97uLbbLAxy*hDR;;fGnnM4!07I5+t@Ql4RwnbQPDWvjVNH&GKZ6uA8s7&Z7tCe1Pth7Q5N7oV;8 z*^H@@mty+^xzSSx^SxGm!7#oGchv3A;P2XH`xpFj?X@-{{%B}dpwGTmPsoRZ_9W5y zP1@158!=U#`*88baN_CD-R}!sH-i7vXEho;pXf+Ax$_iEMO{|R`|lB@9jC0@DNrBH zN_Q-p!~CL(JXE}|LY_P9USNiv#xG@Vdr~iZqPT-ScW=N{JXGPzLm`N#Oio2dj)UJt`O1x*;J?EqKwZI9ikRG+ z_$Z=$kC(xQ7f`>;bRqp<%*$uf4| zw#3u}H)1))s?kWK*u2Fp8Sp!O&b${6xu8vuZz`thzV5SLt0h`}?#Fai!1p&DuWGsN zH26b!T+Vr6>fxgP2SGQ8$|HH5p$>39%qot&4!S}2i(RBNVycesyLHY}=$TYdK>Ckb z@Ozy}Y{PkYQ5lTBPhN%YEGe7NpO}tb( z{i9>X4gCGpe?E(WKPcSky+6$FvqPH03Q^geespbO4gAV=DYu${ulN(zt&geHfG13{ zF{q)u6uoKN3jPRR&i&1hC$0aq*o3La_}EP@>_el2truokOM&O=8mMN1-28LOmw%Xg zrYz~(@F39@W4|Uc1^o?8dQ{W4ao{FVW2JYS=QY$go2bin)BZTZoK3;afFBS-*K&uJ<1 z)_f*vYd9Vx8A5+H%P*>OR1o-)=LXka!t=XEdG2%{HNk#TkSq&4%S#Cx3_MFt``@%)kRe_5gKdmM~VdhZXbqOsQ;t42FI_Rz*J8%%j2_KP)7UD5WzV( z-)3rGM>0r4&YExil@(K+WQ0}TuM(AV&yt6^;C!9Q`%kxA8gi!97sE1`>bz1mcj7L3 zZWN!}@TwT{tOfBAsGp-G6v-oS9(1))FUZfs?<30XuVH`h%3Bm{wgldAMJ*l9o8Ikw z0k5H6j>-v=Xz_%+)Fh|hGSB%X$qQiS-dZl+dK%W1MBOk_#HvG0<#tKc|t&pztxJhXFF7>L6LVeLolBA`7 zqx;;cEq-W1_4%)SWd@`mx-0B~)Fk)=#%~qUzi;uKB;#Q8J7TG3-zIy)v#_h6Bp2$h zu3oAcFBNkAiEf1rXyL`V@U63>$V4??+V_(p0u zd(er%FW~3UIGQO(_bZ;*Ad43A(+~8-2P5jA#Rr#<>;r$09!b>-a*?)=ZYr2@`w+)h zVO=6{hiKtoi!+Qra$%c!J>8#~cR35KlkT&5di+M#9kW?^D(@hF9P)BI58YpNuD%>I zL|DzK${mL{rr8{L|Ruw|Gb9J*R=Pdv*Wz$<=pi9gWCzEGxYzz zx9y?9Y0T=qNwpQJ5}_$igZKSB597~^Cr%X5{l}sk#V|9iNa@81A>{lx>5L@bG02{mcyKnJ}|j_?>6}c+urEave|2 z>HSwyPtxfVkn`1D+0l(TQgHTU(MBRNgI&TT0p5T24&(6;cOj?J?w`-YY+YIzePbNx zs_Tecsq_W#5BHB6!g`A~jOh5U7Vn7N$8dVYpSXH9$z`#n0Q}mcYsG!QXZ_u4REbH^ zJnlW6R>)&FzsFA}SpVH+jBec_kh424lMOI0Di?N4RUu-=G&b5Y6+wP@o1M;Wy8m)_ zbq?lMnK@e2r-OWS?*<&mOoW`lBqr5>?r(jp#EH4z-SW8*_L1<@coRz@H-O*8Kvba@ z_{@~l9p5mQddEP{U^%*>W~XlXYZ3T?!f)yMkTZY2GLnz?beV*ikMF7}{XH&PU$RH6RqPPP3q(*b_t z?|v+nSg82Ol6m|SBJ4-tdAq&ap+92{SB;^6KdH3rykl4}(l5vq$D?Rr(Kq{=HNb!2 zd0)UP_-AEAF7aZC6zAU1gH6O;byC%4XC}z+YfA;v*PnJpaQ99uHk$pW%{{&@a#Ro8%jP}uk74HnIknT+?~ zK=+Q(E@@m30)AM^BLm)FF zk`D-4cws54c}saFXOvu|wy5R?`+M(gG1t3T3f3PF4^(YI!N#F+_D!%q_ieq%v`HQO<|oW@RWy-?rwg)$rGYy^Kz*h$odm5Vb% zVxD~`d|a6Vn6YszvO;REtp*cBwV zpuFPUOsc1NfhTAI?oi)Eh74F~X<>~#8fAkJ7ra zdU3k`4qQVR^D8@Bwo;(~HJRZy=mEcL8b0&{t1HDMv>bRwWZaB6D^&~ge;c4|Y?lOl z{EwcM46G(++?m%Ph>E&jIIdO+1Fy1vA`6ictc|`|HP0BGat( z=gUTK;Jfx%OVjT!lCPFCCt$7MsvZkZ7F1sRM^>>EzOP|ZXX7La_>BzTDl}nAduyq` zP9duNy`p4&t_u2JG2+=Ay?<0)c`!1DDW@{RB({XX`YPsAsf7A{v`q6(`Y8DQ4s6%N zn6hP+OJep0k*zis{yiG*kH^c_*q-G9pMHMsLlq|9N|$%Pq>UcSyl)sMpg$Sq7xiU? z`a9C#`j@J1tUZ)#+SaxSHTp2k$;ZI^n{-Tkv<&@ult$IcU^&)y`iV~1W)c}UykwKg zCZPZQ@5%>Rm|t`lub%W?tSucUslIKV$llK1YyH3j_^}@%0vf3g~}j)erm7&nNl0f9qyg zS2CB9#(9w_im)WlPQ&^ADP3UmMS6X8hh6iV0M_|caQU=NG?CL0=WjL)^}(0sRVyAc z@C62cY+JET;*I`2nio;^3un1gA(g-%j&NOp^EvWb>xTj*tQY#HHaemKb&0ojt~fk~ z{@ZVbwF#3k6 zjAZ9(^;Fbvs`sDjJu1}Kg4azyodaH=(x=e|>-{W>P!qExs{O2SMVWWKVV=YKbjM zd4;flO*9>W#C!qf2dU(*E0REB07zb>nQfG(S zSb9+X=D_rUY3NUBI-)C|==Zlnrh^-yzuB*wRWX}`1~t7W)H&}%K5-^7Iv)7FrT$T- zn5xOTB3!kPXcG3!b#!Y4f6VbQV|x7^J*4!VfA)52U7$i#j!C(GF={>!Fh&AOm{s8^4oS6S?HBmvm zTUPX<6a3`O8Z)iHS5zp7w?IBMvyFWiJsTqDS{TClNDDHsd`zz|kmQBrozQ>VCRH9B zNB{9?ZW~fv2LGMQL4x%4qpo12Q3n0r#VbeM3W#<=iMREQaDOuW*74qBxZjEn6L)!2 z1O4&+qK)|ni7FNrFO_yqxmU2VS|1LdKRHwv+@$oPg>~JTiYZd zzi!W4)DHb!?xBa0@6kxw#U}GfxSyE$>gI0P1o?%(Onr|r)u{CsRe_P{Sl-FW*ahpG z#(yj{a1Z!{qaOd-0sZm6^IK|Q{mfFlXL*K!ul{&%`!c5Li;K|iFrY@( zMpMkVD`4eMhTuL+fB0N)V1mDd%X z-39ujRXHawX3KN`seQpD&3)|DVp-4#Q6{KJXJ@nwSnhrsd@*`KTTL$ zOz$KbKiYEAhzIJY*}IXQhkioNX}QUL2KxIbb&uy?h!P9#(JL+V{=0=Xlwu6I!uO!m z5=_11H=4e9k$CjIq)j*%_NSR2#S079!M}QISFaTG&*2)h0Ugx5q?q}tlpAtq-G{s1 z08f%1QRTzb5XxTln=EKzm!)kcC)C$7^QUKgrXgpx@jYdQsUBs`(p|l%m`D8Jhcobg zXG$A_DheTI-RsW`yi>i&mdHM$0+l2+Jx7oi%(LdKK)#91_r-fmb=jj@|7rj|Z(xkE OorV5&CQDafy6}G>-%f zO&ZAh8o&GV`_t=ko%eapxIg!G=s0|I>F5}`&}`&npvKM$Dy7}dUeKBi{*EvjSpQC? zv`MB8j+&#{sKvu=hyGD1oz7_yc0HimbIxy&r&3y1j0=D3k$#&y%N%I(cPzK94j*C0 zd3~&S@)L~bVEjnt0Xd$kUEUxXy`jCgCZPl=Pl~oB4fVqKQs?E%TFLPxyG}5oF^NHu z!nF?s_4@ivx|6lQa|!F@>>zopZaUIvRL*Ugi@yxX@SYwqZ;b{1s>~M6&!A8E95A_w z7Lo(wyCt6z&Rp3|4kvB{-_2EJVvgkRc+~j<&G5|gzdK}t^mt7J|E{sj!W^%&bCi%M(23)b);zNPJzfM27Rt|o=(c@HxGWM%rK=L~z%}X&8M}GIk zc0FWcythi>p)vS1&cszllKfD5Pe06bie*f;&V;a2o1O}zckFq%KIk2j`XV3kP9awQ+WowQ{~x!xWz!qL z|N5!8SexXxuD+_nTz0YX6qP5)dr8{FI|hj@xvK9zN9~VVg8ge7W|a1ve`=kg(goaXlBO2`mj5{ z`Xf!QzXwxa5EgVi%WmbinFwl9NDx{nrBb?dis^16@LNBnZ41P^@A0%X85}{kO0+_b z*i(RCNfRCG1U=JZmr5-by;*sg%P$&PNe#!V1|XQf%A$UqA<3`a-kgQ^@iBTQg`1+t zHluHS)#rhK=VJ5yBfwwuzq-u=@3m5|;}rQrgoa6e*K&>k{_7vwX-eR?GIn1W!or&m z8Ys&AMzQZc$cEgd2fvU%gSaN>iP>-OY{wF*d99fr+)?-i7vHStFW~3?7ZlMCde~Zp z@+mCNQryXeC@4;_;@Gwjaaf;Ig6c1x0Dn1X|LtFRKSN_Ek69cMmazHOxviB{O1EP- z??NE>uU3{#>R?fGYEZ;!Ka_OsvEQ%7Gr%u*xWBZE96x{Xw+fa$=tc_MC2 zq=KI|_ww)x=t1w<1{1M#ZT|Re#EcRvLQ7^X30NOvi}7>h`ma9o2+qP%=a1bfZa6@M z?KB99)D)yrdM=l2h^qj<#fWQB1eUO~kg(oagC4k*cv!Pl!~7%LPV5K*{l;LVIyY7b zH!|n*d4nF?IkY^RI|2T0IdA^dgYIL71evhB=hQuEyuC*;5UokaHWf*>s59;lSEY(v-nRPEuY--D)Qu=t98a%~7_w4WFU?$OmYm1L(7{-^Br}xKDP~}mda(OG5 z--q4(*<5ZN3_ao^Fffm_dbuQ!lra zgiE>kH#l>6RJl!0!GoN1|8OJ4ew^!y)G!5aQWKUWCm7k;*5djs$#`UHzp zv1Z?;Hl}B?sMy0?H2KKOnA`QP$&KyRE{4s`hMpi(Ap@ZNaE z3%bI}ZHfb?(Kp;#8QhCH<37wt^FjaiGxN#k!Whsm=qc9mVy(|!Vlnjpp)!X~kLP+4 zRLb`Q4a?WvfUnehVz(jIs&HMq5_b}n-mzY=W`g@QbwX&BfvkUSocnjMVlAH!!U-(B zsM$SW`G62FmGVt+KIIqLA85a>?RtV~`(yv{9(|9x+p;OWN0q=YRMyzJ4|J<4m&wDJ zCi2zvNWC^HwUqvwO2GN}-r-~~5D$E*hm*Pzn6_)Jadl%DDw#E`I`$-hN|{zRd;Bm9 z{MUslhmK+zt2SfAtq{~gyU1^u>;r!9)xIDb&@Hl$4EkZ3>?4`{A71F)@%2+fci?5^dliy^;0 z)Sv;Lb<4vc0>F=P)H_)Sx+z0=cs=<4^zIxlN0~tqZ5Q9{f&Hbw>(ML@eDOEoiL01) zI4mQ-wjVtURhs-~2>UY~%1I1Qf#1DoB7Zlg(Og;oZoY+@t$k{_ul)o+iw*x9sGpIg zTdQJzW7AEw-<(?mK98HP_#mdC8<9!bZYUyZZ`UskSl?;KGwz%W;P(oh_qzf7c_HTZY}DwT zn`@R?27az+pMs;H|Ng~1vSQj??u{s0GF4s}dB zd0a13`!{Mx9`R73{UYbTv#0h9e81^E51UHD{5Slq_<0t6{OeTDG8Y5<+%6hB)VD}q zPN|c7!N2nEpl>`;w*2?-UTNsxez;y^|2_i#AAaNdrR4rLSN-%SvhVG;=t=wxd_k$a zs?Xq$ZI-L6#R-yJSHR zq++ZUJB+onb1(GAB0}t3kPoBgOJ)_Jo^?8$Oba9X zL3=I}7x@3ymD8@N*Ga#J{GO|5a_YyQ12*M^dGFPkiH>y`&)Vqjr(DoO{|N>Qp;5=J z^@*9<=JuX8tY)5zAaSKBF0{!9q>`lPmKQXP8N%F(IG~b|^Y@NdagNuYgt*b7-LOJkV zo=M!l27bG*>QV~mue+trLdj#Kb^J@C%EAEn>BJvOTY>&b_TvHzW?)(Dif=nh_}qP4 zL-k7lzPJ4pBELVOY4vYv(CXmstqx(L$gDPMLh1tCk2aZhd6{97ud}#c6s?7y3^mq@ zAuL$VFLhL}0^gf^!y&Rh+oe@YZbB>eO2VfDqL3law@$|dsPAn|5yh9u{%vTx(Uuv^ zdeupCt1&YXNDDVlZio8P9Ky3MA8nHclFyk^75E?HTMT?6*7?P$e50l1%0 z^BdySlQEO7$Hj5ES;Ahdpkm{}D&U7jJo`s}pY0@?D#9@1i*)0fl6&aN(cpE_<51t) zIOr0ew1ECDch^H9%n=f~JdSNnPO67uZal3r-R34XQqHQP6!6DvXyjCl70U$I-3CPctH!`ef`@V?V= z;OD8rb&~Jr`BonD+&C8L>u>}GZO{4pUvw_`i~ilAmXQ4D3c4xC&s|(GhN48Eg4u!P zk1v3~crW|_*}q+@j@~?%}v-x7D;{WeAw``jkcr*Bu{rQt$0xY2;N2-_L@_rL3RmLv{L!)i{7O;FD`bC=zLvag z6BhOB%)Z|we{78LH)km88<@X>MbImPfmT?5WfS6xbHAAH{;trS}{E?{A11A4udG!!SsqRJlF z4}Kw|vp%Ar$MYC6Jj4>oI$|Pk_GN>SEJO;FsM(_^h*Y78z4W&EuaP` zBKYd*L*`LX-+G_UM#bL)zu8gSHATqJ;mU8XTtylFs-JttI$-`K?S;p8k-F%wRO5A22Hq$7IB-PTd(hoXQjN}Ir4Q271S>j}A;K9_xh)muS7W1)J`enj za>?7nAU_X2y&Ek<_^h2CPbq=-r9RgmLn35-yZk-))@!Wb_}?KSs}AMrGm1ro!uO$D zn>n)}33PS!W5bZ2Q}|lMDj3nTbo@ov8v2L#47z;Ngdf9*{_{~LyeA^&Db91{a z(WB?5ULLSJeT~e|_tz}Qd~Mca-MtlK59PyEmmi>)L6slOzwU(md}C>^vJdE&UZhOa zVq|{lospIZdggR6HmA!2=Jzo9!Dq<-(c5fhZR8+7Up@VcqK!iMCPXaNiXcCa2>8FU z2fxKb?p}R-n-l6izdpkmB zX^@}Y!V1dC`I|B5$Xa6RLORnOj%rl&!(FcXRXpV9U_+@s=zpTO-wdoX!BmCuJt?s- z(A|;zvwaq$ke{PZ$&|=~-`bc~oeueVYs0?K3#ig>=jf60$7Fs!|7lkm=!dH%|FL29 zx4sbu%DYg#;AqchNj1pNlZONxIzhiuFukV}tMh(My0BxwEZ)Hq_sd z2fou3GT+F*6blK(nufMNrhH3K<5?#4a}v)WpE?@qS2lrem~XFi4{J;dzw{m)MlZt$ zii&==Kz@!tGTT=Ud@%{*4fc?qv)}(>sYit(RuzqntdO4@6FwT91-}#H6Ji$fa~h9f z3MXnPUv_iNa)r*nHh$pA1M1yX1Ed7BZ z){?FAl!>cE6>|N??AQ8We!jZW3C+NlkR%#CAwRPeY^v8n#eDmmx11yWLjS#-J_UZ~ z9}^7Pke|mlRP+m@HwoJd&69USe)c&ztQ!Hk#GL-p0@hkxZoFhYfI8~g6K6bopubDW zrk*6mtli6R*9rMgz+Iq05BN&Eu9=lXe&+n5qjwLLD(h}U z7oI}@s(JV1#1rtlHg%ciKz?qZDN4wo=42o0@|m z)1-E+&-Brw-Z{>2V%re#bu52+k^P%NW{lxC$j6~$<4@R80soz*`XOHMPnxX3d&6wB zbUC&CKhU{js`KukV%k|j`Z)M~)83!gf*{{Sx_=JT=7W4v;N&1rK`m;@mM_KNedYU% zQ`>FGPmxh&!@^%MO@V8k$6W~Z_oTKpu@?hB#QE33P|(j;UT{AJ`L0B3PG=fDVODR* z2q))1x14!Q3Hajr%fiu+pLvN+wQnfDmoL?`@-*;+=q{SJk$mBIJ_3-hE@sThE}`Zd z;hel)3Fzfq*y|wMYZ?4QgT?)GC zGh62&Ow+J3nG)0{5@pN8r=RzNfA?O!v(~`p5$vrz4*8Qmp>N0@g+1QvXZ9uy_;z)= zhH~Kd9J{Pr1o`=+Rn6oV)EMx6=_Mn)&;H=3OBaRtM^5?9f1LyVbJU$+jYdX#%pUNY z1Hbd!?>SPpC}s`sh50MX6tuSyh0S5P1CtHlS2CO%83I0wfbI@&$bX)KXO;|!NEE7I zNkIQGttV!d_Zs~E-x9JRKb@3O+f(%sHQaswGfM~hn;)XF>yrMUuVtOw2>I^VW*N^r z%jjd{ag77TP~WGr>?Y{bLHE`#Z@mTnTfaB|Z6?ZNYwu+Ixdnc^6K>bw`x*5^fRR}U z=6BFaiJ^hWIhB*8QV#1oz559Im<#?`qa7P+Am284&8Pf8buS6EtB;w%e|C$<@FVck z4Kef&!}%P!A|jKIKFO9jd+&DvzFkGE8u`8#HU4jBCgf*x|BF4n#OvMfPYkY-zdxnF zJ1`3THL?{NF)+T3)aRlyqU4ocSG&bca{V+M8rZ;}bD`kS3gl;-rt`aQB7z(3`)vT{ z|HoZ_jt(+kueC%zamF;YgH6RI_UO}m_nA)x7Eph_@~WH*2c2L)wPFi?RfFz}Jw(UG z8@}hYE5X0+ILQL}e6v2OMF{wMk18g9fWId(zMBU5`7PhB zZ|SIVmqZpbZ63zoa=LW`+>fZuDy*$8a6Sy`pUX(0;Xis=Oy*Gke_m#)Y=Zh4xz=b! z59iZd?VU}IH_;!Wx4)If1^UhQ9bPTqXFluUrv&-Ac(}>Ei+JrFasKTp20ioiFQ$K> gv$*A5(uVvTe{!eO1S-Ap&pSmu4)i<*zQ+Oo17|sz(f|Me From f113e5dc50bd6f995e568a437319ce85b20288ee Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 22 Jun 2022 11:35:38 -0600 Subject: [PATCH 095/172] basis - add CeedBasisCreateProlongation --- doc/sphinx/source/releasenotes.md | 6 +- include/ceed/ceed.h | 2 + interface/ceed-basis.c | 166 ++++++++++++++++++++++++++++++ interface/ceed-preconditioning.c | 89 ++-------------- 4 files changed, 179 insertions(+), 84 deletions(-) diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index cce3f110fa..d1b8d3ed6d 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -6,13 +6,10 @@ On this page we provide a summary of the main API changes, new features and exam ## Current `main` branch -### Bugfix - -- Fix bug in setting device id for GPU backends. - ### Interface changes - Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output. +- Added {c:func}`CeedBasisCreateProjection` to facilitate interpolation between nodes for separate `CeedBases`. ### New features @@ -20,6 +17,7 @@ On this page we provide a summary of the main API changes, new features and exam ### Bugfix +- Fix bug in setting device id for GPU backends. - Fix storing of indices for `CeedElemRestriction` on the host with GPU backends. - Fix `CeedElemRestriction` sizing for {c:func}`CeedOperatorAssemblePointBlockDiagonal`. - Fix bugs in CPU implementation of {c:func}`CeedOperatorLinearAssemble` when there are different number of active input modes and active output modes. diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index bad828facd..af94f0580c 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -552,6 +552,8 @@ CEED_EXTERN int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weights, CeedBasis *basis); +CEED_EXTERN int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, CeedBasis *basis_project); +CEED_EXTERN int CeedBasisCreateProjectionMatrix(CeedBasis basis_to, CeedBasis basis_from, CeedScalar **interp_project); CEED_EXTERN int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy); CEED_EXTERN int CeedBasisView(CeedBasis basis, FILE *stream); CEED_EXTERN int CeedBasisApply(CeedBasis basis, CeedInt num_elem, diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index 33d353912a..ba3384b5c3 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -829,6 +829,172 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, return CEED_ERROR_SUCCESS; } +/** + @brief Create a CeedBasis for projection from the nodes of `basis_from` + to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` will be + valid for the new basis, `basis_project`. This projection is + given by `interp_project = interp_to^+ * interp_from`, where + the pesudoinverse `interp_to^+` is given by QR factorization. + Note: `basis_from` and `basis_to` must have compatible quadrature + spaces. + + @param[in] basis_to CeedBasis to prolong to + @param[in] basis_from CeedBasis to prolong from + @param[out] basis_project Address of the variable where the newly created + CeedBasis will be stored. + + @return An error code: 0 - success, otherwise - failure + + @ref User +**/ +int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, + CeedBasis *basis_project) { + int ierr; + Ceed ceed; + ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); + + // Create projectior matrix + CeedScalar *interp_project; + ierr = CeedBasisCreateProjectionMatrix(basis_to, basis_from, + &interp_project); CeedChk(ierr); + + // Build basis + bool is_tensor; + CeedInt dim, num_comp; + CeedScalar *q_ref, *q_weight, *grad; + ierr = CeedBasisIsTensor(basis_to, &is_tensor); CeedChk(ierr); + ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr); + ierr = CeedBasisGetNumComponents(basis_to, &num_comp); CeedChk(ierr); + if (is_tensor) { + CeedInt P_1d_to, P_1d_from; + ierr = CeedBasisGetNumNodes1D(basis_from, &P_1d_from); CeedChk(ierr); + ierr = CeedBasisGetNumNodes1D(basis_to, &P_1d_to); CeedChk(ierr); + ierr = CeedCalloc(P_1d_to, &q_ref); CeedChk(ierr); + ierr = CeedCalloc(P_1d_to, &q_weight); CeedChk(ierr); + ierr = CeedCalloc(P_1d_to * P_1d_from * dim, &grad); CeedChk(ierr); + ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, + interp_project, grad, q_ref, q_weight, basis_project); + CeedChk(ierr); + } else { + CeedElemTopology topo; + ierr = CeedBasisGetTopology(basis_to, &topo); CeedChk(ierr); + CeedInt num_nodes_to, num_nodes_from; + ierr = CeedBasisGetNumNodes(basis_from, &num_nodes_from); CeedChk(ierr); + ierr = CeedBasisGetNumNodes(basis_to, &num_nodes_to); CeedChk(ierr); + ierr = CeedCalloc(num_nodes_to * dim, &q_ref); CeedChk(ierr); + ierr = CeedCalloc(num_nodes_to, &q_weight); CeedChk(ierr); + ierr = CeedCalloc(num_nodes_to * num_nodes_from * dim, &grad); CeedChk(ierr); + ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, + interp_project, grad, q_ref, q_weight, basis_project); + CeedChk(ierr); + } + + // Cleanup + ierr = CeedFree(&interp_project); CeedChk(ierr); + ierr = CeedFree(&q_ref); CeedChk(ierr); + ierr = CeedFree(&q_weight); CeedChk(ierr); + ierr = CeedFree(&grad); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Create the interpolation matrix for projection from the nodes of + `basis_from` to the nodes of `basis_to`. This projection is + given by `interp_project = interp_to^+ * interp_from`, where + the pesudoinverse `interp_to^+` is given by QR factorization. + Note: `basis_from` and `basis_to` must have compatible quadrature + spaces. + + @param[in] basis_to CeedBasis to project to + @param[in] basis_from CeedBasis to project from + @param[out] interp_project Address of the variable where the newly created + projection matrix will be stored. + + @return An error code: 0 - success, otherwise - failure + + @ref User +**/ +int CeedBasisCreateProjectionMatrix(CeedBasis basis_to, + CeedBasis basis_from, + CeedScalar **interp_project) { + int ierr; + Ceed ceed; + ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); + + // Check for compatible quadrature spaces + CeedInt Q_to, Q_from; + ierr = CeedBasisGetNumQuadraturePoints(basis_to, &Q_to); CeedChk(ierr); + ierr = CeedBasisGetNumQuadraturePoints(basis_from, &Q_from); CeedChk(ierr); + if (Q_to != Q_from) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, + "Bases must have compatible quadrature spaces"); + // LCOV_EXCL_STOP + + // Coarse to fine basis + CeedInt P_to, P_from, Q = Q_to; + bool is_tensor_to, is_tensor_from; + ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr); + ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr); + CeedScalar *interp_to, *interp_from, *tau; + if (is_tensor_to && is_tensor_from) { + ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr); + ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr); + ierr = CeedBasisGetNumQuadraturePoints1D(basis_from, &Q); CeedChk(ierr); + } else if (!is_tensor_to && !is_tensor_from) { + ierr = CeedBasisGetNumNodes(basis_to, &P_to); CeedChk(ierr); + ierr = CeedBasisGetNumNodes(basis_from, &P_from); CeedChk(ierr); + } else { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_MINOR, + "Bases must both be tensor or non-tensor"); + // LCOV_EXCL_STOP + } + + ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr); + ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr); + ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr); + ierr = CeedMalloc(Q, &tau); CeedChk(ierr); + const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL; + if (is_tensor_to) { + ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr); + ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr); + } else { + ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr); + ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr); + } + memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); + memcpy(interp_from, interp_from_source, + Q * P_from * sizeof(interp_from_source[0])); + + // -- QR Factorization, interp_to = Q R + ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr); + + // -- Apply Qtranspose, interp_to = Qtranspose interp_from + ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, + Q, P_from, P_to, P_from, 1); CeedChk(ierr); + + // -- Apply Rinv, interp_project = Rinv interp_c + for (CeedInt j = 0; j < P_from; j++) { // Column j + (*interp_project)[j + P_from * (P_to - 1)] = interp_from[j + P_from * + (P_to - 1)] / interp_to[P_to * P_to - 1]; + for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i + (*interp_project)[j + P_from * i] = interp_from[j + P_from * i]; + for (CeedInt k = i+1; k < P_to; k++) { + (*interp_project)[j + P_from * i] -= interp_to[k + P_to * i]* + (*interp_project)[j + P_from * k]; + } + (*interp_project)[j + P_from * i] /= interp_to[i + P_to * i]; + } + } + ierr = CeedFree(&tau); CeedChk(ierr); + ierr = CeedFree(&interp_to); CeedChk(ierr); + ierr = CeedFree(&interp_from); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + /** @brief Copy the pointer to a CeedBasis. Both pointers should be destroyed with `CeedBasisDestroy()`; diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 8dee560cc7..aec919e0c8 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -2048,90 +2048,19 @@ int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedOperator *op_restrict) { int ierr; ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr); - Ceed ceed; - ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr); - // Check for compatible quadrature spaces - CeedBasis basis_fine; + // Build prolongation matrix + CeedBasis basis_fine, basis_c_to_f; ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); - CeedInt Q_f, Q_c; - ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr); - ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr); - if (Q_f != Q_c) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_DIMENSION, - "Bases must have compatible quadrature spaces"); - // LCOV_EXCL_STOP - - // Coarse to fine basis - CeedInt P_f, P_c, Q = Q_f; - bool is_tensor_f, is_tensor_c; - ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr); - ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr); - CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau; - if (is_tensor_f && is_tensor_c) { - ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr); - ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr); - ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr); - } else if (!is_tensor_f && !is_tensor_c) { - ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr); - ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr); - } else { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_MINOR, - "Bases must both be tensor or non-tensor"); - // LCOV_EXCL_STOP - } + ierr = CeedBasisCreateProjection(basis_fine, basis_coarse, &basis_c_to_f); + CeedChk(ierr); - ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr); - ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr); - ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr); - ierr = CeedMalloc(Q, &tau); CeedChk(ierr); - const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL; - if (is_tensor_f) { - ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr); - ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr); - } else { - ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr); - ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr); - } - memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]); - memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]); - - // -- QR Factorization, interp_f = Q R - ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr); - - // -- Apply Qtranspose, interp_c = Qtranspose interp_c - ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE, - Q, P_c, P_f, P_c, 1); CeedChk(ierr); - - // -- Apply Rinv, interp_c_to_f = Rinv interp_c - for (CeedInt j=0; j=0; i--) { // Row i - interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i]; - for (CeedInt k=i+1; k Date: Wed, 22 Jun 2022 16:57:43 -0600 Subject: [PATCH 096/172] gpu - fix /gpu/*/gen backends for op with all CEED_BASIS_COLLOCATED --- backends/cuda-gen/ceed-cuda-gen-operator-build.cpp | 3 ++- backends/hip-gen/ceed-hip-gen-operator-build.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp index 58381e53a8..5a78bd8d15 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp +++ b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp @@ -759,8 +759,9 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) { ierr = CeedQFunctionGetData(qf, &qf_data); CeedChkBackend(ierr); CeedSize lsize; CeedInt Q, P1d = 0, Q1d = 0, numelements, elemsize, numinputfields, - numoutputfields, ncomp, dim = 0; + numoutputfields, ncomp, dim = 1; ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); + Q1d = Q; ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); CeedOperatorField *opinputfields, *opoutputfields; ierr = CeedOperatorGetFields(op, &numinputfields, &opinputfields, &numoutputfields, &opoutputfields); diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp index 10364f84f8..9db75a5ae2 100644 --- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp +++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp @@ -768,8 +768,9 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) { ierr = CeedQFunctionGetData(qf, &qf_data); CeedChkBackend(ierr); CeedSize lsize; CeedInt Q, P1d = 0, Q1d = 0, numelements, elemsize, numinputfields, - numoutputfields, ncomp, dim = 0; + numoutputfields, ncomp, dim = 1; ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); + Q1d = Q; ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr); CeedOperatorField *opinputfields, *opoutputfields; ierr = CeedOperatorGetFields(op, &numinputfields, &opinputfields, &numoutputfields, &opoutputfields); From 10a41f97c7234e5415ddfb8b236d18d51d3bbdee Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 23 Jun 2022 10:35:40 -0600 Subject: [PATCH 097/172] doc - minor fixes --- README.md | 2 +- doc/sphinx/source/intro.md | 2 +- doc/sphinx/source/releasenotes.md | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fee7ed6c0d..b0338ee5fd 100644 --- a/README.md +++ b/README.md @@ -341,7 +341,7 @@ cd .. For the last example shown, sample meshes to be used in place of `[.exo file]` can be found at -The above code assumes a GPU-capable machine with the OCCA backend +The above code assumes a GPU-capable machine with the CUDA backends enabled. Depending on the available backends, other CEED resource specifiers can be provided with the `-ceed` option. Other command line arguments can be found in [examples/petsc](https://github.com/CEED/libCEED/blob/main/examples/petsc/README.md). diff --git a/doc/sphinx/source/intro.md b/doc/sphinx/source/intro.md index 14b70fc66d..fb9fd8b958 100644 --- a/doc/sphinx/source/intro.md +++ b/doc/sphinx/source/intro.md @@ -61,7 +61,7 @@ Programming Interface (API) for user codes so that applications with their own discretization infrastructure (e.g., those in [PETSc](https://www.mcs.anl.gov/petsc/), [MFEM](https://mfem.org/) and [Nek5000](https://nek5000.mcs.anl.gov/)) can evaluate and use the core operations provided by libCEED. GPU implementations are available via -pure [CUDA](https://developer.nvidia.com/about-cuda) as well as the +pure [CUDA](https://developer.nvidia.com/about-cuda) and pure [HIP](https://rocmdocs.amd.com) as well as the [OCCA](http://github.com/libocca/occa) and [MAGMA](https://bitbucket.org/icl/magma) libraries. CPU implementations are available via pure C and AVX intrinsics as well as the [LIBXSMM](http://github.com/hfp/libxsmm) library. libCEED provides a unified diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index a695c3fdaa..852b6aa1d2 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -24,9 +24,9 @@ On this page we provide a summary of the main API changes, new features and exam ### Examples -- Added various performance enhancements for {ref}`example-petsc-navier-stokes` -- Refactored {ref}`example-petsc-navier-stokes` to improve code reuse -- Added Shock Tube, Channel, and Flat Plate boundary layer problems to {ref}`example-petsc-navier-stokes` +- Added various performance enhancements for {ref}`example-petsc-navier-stokes`. +- Refactored {ref}`example-petsc-navier-stokes` to improve code reuse. +- Added Shock Tube, Channel, and Flat Plate boundary layer problems to {ref}`example-petsc-navier-stokes`. (v0-10-1)= From 17be3a414c6fae47654f1361bae9c9dbcdd66795 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 23 Jun 2022 10:39:11 -0600 Subject: [PATCH 098/172] doc - update style since I'm touching the file --- CODE_OF_CONDUCT.md | 104 ++--- CONTRIBUTING.md | 122 ++--- README.md | 175 +++---- RELEASING.md | 78 ++-- doc/sphinx/source/api/CeedElemRestriction.rst | 3 +- doc/sphinx/source/api/CeedOperator.rst | 5 +- doc/sphinx/source/api/CeedQFunction.rst | 3 +- doc/sphinx/source/api/CeedVector.rst | 3 +- doc/sphinx/source/ffi.md | 14 +- doc/sphinx/source/intro.md | 86 +--- doc/sphinx/source/libCEEDapi.md | 438 ++++++------------ doc/sphinx/source/libCEEDdev.md | 28 +- doc/sphinx/source/precision.md | 21 +- examples/README.md | 54 +-- examples/bps.md | 28 +- examples/ceed/README.md | 9 +- examples/fluids/README.md | 19 +- examples/nek/README.md | 28 +- examples/notation.md | 19 +- examples/solids/README.md | 16 +- julia/LibCEED.jl/README.md | 34 +- rust/libceed-sys/README.md | 48 +- tests/README.md | 3 +- 23 files changed, 451 insertions(+), 887 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 8d68c70b2a..4fce72c826 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -2,127 +2,87 @@ ## Our Pledge -We as members, contributors, and leaders pledge to make participation in our -community a harassment-free experience for everyone, regardless of age, body -size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. +We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. -We pledge to act and interact in ways that contribute to an open, welcoming, -diverse, inclusive, and healthy community. +We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards -Examples of behavior that contributes to a positive environment for our -community include: +Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, - and learning from the experience -* Focusing on what is best not just for us as individuals, but for the - overall community +* Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or - advances of any kind +* The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment -* Publishing others' private information, such as a physical or email - address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting +* Publishing others' private information, such as a physical or email address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities -Community leaders are responsible for clarifying and enforcing our standards of -acceptable behavior and will take appropriate and fair corrective action in -response to any behavior that they deem inappropriate, threatening, offensive, -or harmful. +Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. -Community leaders have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are -not aligned to this Code of Conduct, and will communicate reasons for moderation -decisions when appropriate. +Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope -This Code of Conduct applies within all community spaces, and also applies when -an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, -posting via an official social media account, or acting as an appointed -representative at an online or offline event. +This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at -jed@jedbrown.org, valeria.barra@colorado.edu, or tzanio@llnl.gov. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at jed@jedbrown.org, valeria.barra@colorado.edu, or tzanio@llnl.gov. All complaints will be reviewed and investigated promptly and fairly. -All community leaders are obligated to respect the privacy and security of the -reporter of any incident. +All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines -Community leaders will follow these Community Impact Guidelines in determining -the consequences for any action they deem in violation of this Code of Conduct: +Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction -**Community Impact**: Use of inappropriate language or other behavior deemed -unprofessional or unwelcome in the community. +**Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. -**Consequence**: A private, written warning from community leaders, providing -clarity around the nature of the violation and an explanation of why the -behavior was inappropriate. A public apology may be requested. +**Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. +A public apology may be requested. ### 2. Warning -**Community Impact**: A violation through a single incident or series -of actions. +**Community Impact**: A violation through a single incident or series of actions. -**Consequence**: A warning with consequences for continued behavior. No -interaction with the people involved, including unsolicited interaction with -those enforcing the Code of Conduct, for a specified period of time. This -includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or -permanent ban. +**Consequence**: A warning with consequences for continued behavior. +No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. +This includes avoiding interactions in community spaces as well as external channels like social media. +Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban -**Community Impact**: A serious violation of community standards, including -sustained inappropriate behavior. +**Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. -**Consequence**: A temporary ban from any sort of interaction or public -communication with the community for a specified period of time. No public or -private interaction with the people involved, including unsolicited interaction -with those enforcing the Code of Conduct, is allowed during this period. +**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. +No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban -**Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an -individual, or aggression toward or disparagement of classes of individuals. +**Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. -**Consequence**: A permanent ban from any sort of public interaction within -the community. +**Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. -Community Impact Guidelines were inspired by [Mozilla's code of conduct -enforcement ladder](https://github.com/mozilla/diversity). +Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org -For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. +For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. +Translations are available at https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d696c296c5..d6317b34e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,55 +2,33 @@ Contributions to libCEED are encouraged. -Please make your commits well-organized and -[atomic](https://en.wikipedia.org/wiki/Atomic_commit#Atomic_commit_convention), -using `git rebase --interactive` as needed. Check that tests -(including "examples") pass using `make prove-all`. If adding a new -feature, please add or extend a test so that your new feature is -tested. - -In typical development, every commit should compile, be covered by the -test suite, and pass all tests. This improves the efficiency of -reviewing and facilitates use of -[`git bisect`](https://git-scm.com/docs/git-bisect). - -Open an issue or RFC (request for comments) pull request to discuss -any significant changes before investing time. It is useful to create -a WIP (work in progress) pull request for any long-running development -so that others can be aware of your work and help to avoid creating -merge conflicts. - -Write commit messages for a reviewer of your pull request and for a -future developer (maybe you) that bisects and finds that a bug was -introduced in your commit. The assumptions that are clear in your -mind while committing are likely not in the mind of whomever (possibly -you) needs to understand it in the future. - -Give credit where credit is due using tags such as `Reported-by: -Helpful User ` or -[`Co-authored-by: Snippet Mentor `](https://help.github.com/en/github/committing-changes-to-your-project/creating-a-commit-with-multiple-authors#creating-co-authored-commits-on-the-command-line). -Please use a real name and email for your author information (`git -config user.name` and `user.email`). If your author information or -email becomes inconsistent (look at `git shortlog -se`), please edit -`.mailmap` to obtain your preferred name and email address. - -When contributors make a major contribution and support it, their names -are included in the automatically generated user-manual documentation. - -Please avoid "merging from upstream" (like merging 'main' into your -feature branch) unless there is a specific reason to do so, in which -case you should explain why in the merge commit. +Please make your commits well-organized and [atomic](https://en.wikipedia.org/wiki/Atomic_commit#Atomic_commit_convention), using `git rebase --interactive` as needed. +Check that tests (including "examples") pass using `make prove-all`. +If adding a new feature, please add or extend a test so that your new feature is tested. + +In typical development, every commit should compile, be covered by the test suite, and pass all tests. +This improves the efficiency of reviewing and facilitates use of [`git bisect`](https://git-scm.com/docs/git-bisect). + +Open an issue or RFC (request for comments) pull request to discuss any significant changes before investing time. +It is useful to create a WIP (work in progress) pull request for any long-running development so that others can be aware of your work and help to avoid creating merge conflicts. + +Write commit messages for a reviewer of your pull request and for a future developer (maybe you) that bisects and finds that a bug was introduced in your commit. +The assumptions that are clear in your mind while committing are likely not in the mind of whomever (possibly you) needs to understand it in the future. + +Give credit where credit is due using tags such as `Reported-by: Helpful User ` or [`Co-authored-by: Snippet Mentor `](https://help.github.com/en/github/committing-changes-to-your-project/creating-a-commit-with-multiple-authors#creating-co-authored-commits-on-the-command-line). +Please use a real name and email for your author information (`git config user.name` and `user.email`). +If your author information or email becomes inconsistent (look at `git shortlog -se`), please edit `.mailmap` to obtain your preferred name and email address. + +When contributors make a major contribution and support it, their names are included in the automatically generated user-manual documentation. + +Please avoid "merging from upstream" (like merging 'main' into your feature branch) unless there is a specific reason to do so, in which case you should explain why in the merge commit. [Rationale](https://lwn.net/Articles/328436/) from [Junio](https://gitster.livejournal.com/42247.html) and [Linus](http://yarchive.net/comp/linux/git_merges_from_upstream.html). -You can use `make style` to help conform to coding conventions of the -project, but try to avoid mixing whitespace or formatting changes with -content changes (see atomicity above). +You can use `make style` to help conform to coding conventions of the project, but try to avoid mixing whitespace or formatting changes with content changes (see atomicity above). By submitting a pull request, you are affirming the following. @@ -58,54 +36,26 @@ By submitting a pull request, you are affirming the following. By making a contribution to this project, I certify that: -(a) The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or +(a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or -(b) The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or +(b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or -(c) The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. +(c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. -(d) I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. +(d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ## Authorship -libCEED contains components authored by many individuals. It is -important that contributors receive appropriate recognition through -informal and academically-recognized credit systems such as -publications. Status as a named author on the users manual and -libCEED software publications will be granted for those who +libCEED contains components authored by many individuals. +It is important that contributors receive appropriate recognition through informal and academically-recognized credit systems such as publications. +Status as a named author on the users manual and libCEED software publications will be granted for those who -1. make significant contributions to libCEED (in implementation, - documentation, conceptualization, review, etc.) and +1. make significant contributions to libCEED (in implementation, documentation, conceptualization, review, etc.) and 2. maintain and support those contributions. -Maintainers will do their best to notice when contributions reach this -level and add your name to `AUTHORS`, but please email or create an -issue if you believe your contributions have met these criteria and -haven't yet been acknowledged. - -Authors of publications about libCEED as a whole, including -DOI-bearing archives, shall offer co-authorship to all individuals -listed in the `AUTHORS` file. Authors of publications claiming -specific libCEED contributions shall evaluate those listed in -`AUTHORS` and offer co-authorship to those who made significant -intellectual contributions to the work. - -Note that there is no co-authorship expectation for those publishing -about use of libCEED (versus creation of new features in libCEED), but -see the [citing section](https://libceed.org/en/latest/gettingstarted/#how-to-cite) -and use your judgment regarding significance of support/advice you may have -received in developing your use case and interpreting results. +Maintainers will do their best to notice when contributions reach this level and add your name to `AUTHORS`, but please email or create an issue if you believe your contributions have met these criteria and haven't yet been acknowledged. + +Authors of publications about libCEED as a whole, including DOI-bearing archives, shall offer co-authorship to all individuals listed in the `AUTHORS` file. +Authors of publications claiming specific libCEED contributions shall evaluate those listed in `AUTHORS` and offer co-authorship to those who made significant intellectual contributions to the work. + +Note that there is no co-authorship expectation for those publishing about use of libCEED (versus creation of new features in libCEED), but see the [citing section](https://libceed.org/en/latest/gettingstarted/#how-to-cite) and use your judgment regarding significance of support/advice you may have received in developing your use case and interpreting results. diff --git a/README.md b/README.md index b0338ee5fd..75dbff73e7 100644 --- a/README.md +++ b/README.md @@ -10,45 +10,20 @@ ## Summary and Purpose -libCEED provides fast algebra for element-based discretizations, designed for -performance portability, run-time flexibility, and clean embedding in higher -level libraries and applications. It offers a C99 interface as well as bindings -for Fortran, Python, Julia, and Rust. -While our focus is on high-order finite elements, the approach is mostly -algebraic and thus applicable to other discretizations in factored form, as -explained in the [user manual](https://libceed.org/en/latest/) and -API implementation portion of the -[documentation](https://libceed.org/en/latest/api/). - -One of the challenges with high-order methods is that a global sparse matrix is -no longer a good representation of a high-order linear operator, both with -respect to the FLOPs needed for its evaluation, as well as the memory transfer -needed for a matvec. Thus, high-order methods require a new "format" that still -represents a linear (or more generally non-linear) operator, but not through a -sparse matrix. - -The goal of libCEED is to propose such a format, as well as supporting -implementations and data structures, that enable efficient operator evaluation -on a variety of computational device types (CPUs, GPUs, etc.). This new operator -description is based on algebraically -[factored form](https://libceed.org/en/latest/libCEEDapi/#finite-element-operator-decomposition), -which is easy to incorporate in a wide variety of applications, without significant -refactoring of their own discretization infrastructure. - -The repository is part of the -[CEED software suite](http://ceed.exascaleproject.org/software/), a collection of -software benchmarks, miniapps, libraries and APIs for efficient exascale -discretizations based on high-order finite element and spectral element methods. +libCEED provides fast algebra for element-based discretizations, designed for performance portability, run-time flexibility, and clean embedding in higher level libraries and applications. +It offers a C99 interface as well as bindings for Fortran, Python, Julia, and Rust. +While our focus is on high-order finite elements, the approach is mostly algebraic and thus applicable to other discretizations in factored form, as explained in the [user manual](https://libceed.org/en/latest/) and API implementation portion of the [documentation](https://libceed.org/en/latest/api/). + +One of the challenges with high-order methods is that a global sparse matrix is no longer a good representation of a high-order linear operator, both with respect to the FLOPs needed for its evaluation, as well as the memory transfer needed for a matvec. +Thus, high-order methods require a new "format" that still represents a linear (or more generally non-linear) operator, but not through a sparse matrix. + +The goal of libCEED is to propose such a format, as well as supporting implementations and data structures, that enable efficient operator evaluation on a variety of computational device types (CPUs, GPUs, etc.). +This new operator description is based on algebraically [factored form](https://libceed.org/en/latest/libCEEDapi/#finite-element-operator-decomposition), which is easy to incorporate in a wide variety of applications, without significant refactoring of their own discretization infrastructure. + +The repository is part of the [CEED software suite](http://ceed.exascaleproject.org/software/), a collection of software benchmarks, miniapps, libraries and APIs for efficient exascale discretizations based on high-order finite element and spectral element methods. See for more information and source code availability. -The CEED research is supported by the -[Exascale Computing Project](https://exascaleproject.org/exascale-computing-project) -(17-SC-20-SC), a collaborative effort of two U.S. Department of Energy -organizations (Office of Science and the National Nuclear Security -Administration) responsible for the planning and preparation of a -[capable exascale ecosystem](https://exascaleproject.org/what-is-exascale), including -software, applications, hardware, advanced system engineering and early testbed -platforms, in support of the nation’s exascale computing imperative. +The CEED research is supported by the [Exascale Computing Project](https://exascaleproject.org/exascale-computing-project) (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy organizations (Office of Science and the National Nuclear Security Administration) responsible for the planning and preparation of a [capable exascale ecosystem](https://exascaleproject.org/what-is-exascale), including software, applications, hardware, advanced system engineering and early testbed platforms, in support of the nation’s exascale computing imperative. For more details on the CEED API see the [user manual](https://libceed.org/en/latest/). @@ -56,8 +31,8 @@ For more details on the CEED API see the [user manual](https://libceed.org/en/la ## Building -The CEED library, `libceed`, is a C99 library with no required dependencies, and -with Fortran, Python, Julia, and Rust interfaces. It can be built using: +The CEED library, `libceed`, is a C99 library with no required dependencies, and with Fortran, Python, Julia, and Rust interfaces. +It can be built using: ``` make @@ -69,11 +44,9 @@ or, with optimization flags: make OPT='-O3 -march=skylake-avx512 -ffp-contract=fast' ``` -These optimization flags are used by all languages (C, C++, Fortran) and this -makefile variable can also be set for testing and examples (below). +These optimization flags are used by all languages (C, C++, Fortran) and this makefile variable can also be set for testing and examples (below). -The library attempts to automatically detect support for the AVX -instruction set using gcc-style compiler options for the host. +The library attempts to automatically detect support for the AVX instruction set using gcc-style compiler options for the host. Support may need to be manually specified via: ``` @@ -86,13 +59,11 @@ or: make AVX=0 ``` -if your compiler does not support gcc-style options, if you are cross -compiling, etc. +if your compiler does not support gcc-style options, if you are cross compiling, etc. -To enable CUDA support, add `CUDA_DIR=/opt/cuda` or an appropriate directory -to your `make` invocation. To enable HIP support, add `HIP_DIR=/opt/rocm` or -an appropriate directory. To store these or other arguments as defaults for -future invocations of `make`, use: +To enable CUDA support, add `CUDA_DIR=/opt/cuda` or an appropriate directory to your `make` invocation. +To enable HIP support, add `HIP_DIR=/opt/rocm` or an appropriate directory. +To store these or other arguments as defaults for future invocations of `make`, use: ``` make configure CUDA_DIR=/usr/local/cuda HIP_DIR=/opt/rocm OPT='-O3 -march=znver2' @@ -120,8 +91,7 @@ julia> ] pkg> add LibCEED ``` -See the [LibCEED.jl documentation](http://ceed.exascaleproject.org/libCEED-julia-docs/dev/) -for more information. +See the [LibCEED.jl documentation](http://ceed.exascaleproject.org/libCEED-julia-docs/dev/) for more information. Rust users can include libCEED via `Cargo.toml`: @@ -191,10 +161,8 @@ There are multiple supported backends, which can be selected at runtime in the e | `/gpu/cuda/occa` | OCCA backend with CUDA kernels | Yes | | `/gpu/hip/occa`~ | OCCA backend with HIP kernels | Yes | -The `/cpu/self/*/serial` backends process one element at a time and are intended for meshes -with a smaller number of high order elements. The `/cpu/self/*/blocked` backends process -blocked batches of eight interlaced elements and are intended for meshes with higher numbers -of elements. +The `/cpu/self/*/serial` backends process one element at a time and are intended for meshes with a smaller number of high order elements. +The `/cpu/self/*/blocked` backends process blocked batches of eight interlaced elements and are intended for meshes with higher numbers of elements. The `/cpu/self/ref/*` backends are written in pure C and provide basic functionality. @@ -202,42 +170,34 @@ The `/cpu/self/opt/*` backends are written in pure C and use partial e-vectors t The `/cpu/self/avx/*` backends rely upon AVX instructions to provide vectorized CPU performance. -The `/cpu/self/memcheck/*` backends rely upon the [Valgrind](http://valgrind.org/) Memcheck tool -to help verify that user QFunctions have no undefined values. To use, run your code with -Valgrind and the Memcheck backends, e.g. `valgrind ./build/ex1 -ceed /cpu/self/ref/memcheck`. A -'development' or 'debugging' version of Valgrind with headers is required to use this backend. -This backend can be run in serial or blocked mode and defaults to running in the serial mode -if `/cpu/self/memcheck` is selected at runtime. +The `/cpu/self/memcheck/*` backends rely upon the [Valgrind](http://valgrind.org/) Memcheck tool to help verify that user QFunctions have no undefined values. +To use, run your code with Valgrind and the Memcheck backends, e.g. `valgrind ./build/ex1 -ceed /cpu/self/ref/memcheck`. +A 'development' or 'debugging' version of Valgrind with headers is required to use this backend. +This backend can be run in serial or blocked mode and defaults to running in the serial mode if `/cpu/self/memcheck` is selected at runtime. -The `/cpu/self/xsmm/*` backends rely upon the [LIBXSMM](http://github.com/hfp/libxsmm) package -to provide vectorized CPU performance. If linking MKL and LIBXSMM is desired but -the Makefile is not detecting `MKLROOT`, linking libCEED against MKL can be -forced by setting the environment variable `MKL=1`. +The `/cpu/self/xsmm/*` backends rely upon the [LIBXSMM](http://github.com/hfp/libxsmm) package to provide vectorized CPU performance. +If linking MKL and LIBXSMM is desired but the Makefile is not detecting `MKLROOT`, linking libCEED against MKL can be forced by setting the environment variable `MKL=1`. The `/gpu/cuda/*` backends provide GPU performance strictly using CUDA. -The `/gpu/hip/*` backends provide GPU performance strictly using HIP. They are based on -the `/gpu/cuda/*` backends. ROCm version 4.2 or newer is required. +The `/gpu/hip/*` backends provide GPU performance strictly using HIP. +They are based on the `/gpu/cuda/*` backends. +ROCm version 4.2 or newer is required. The `/gpu/*/magma/*` backends rely upon the [MAGMA](https://bitbucket.org/icl/magma) package. -To enable the MAGMA backends, the environment variable `MAGMA_DIR` must point to the top-level -MAGMA directory, with the MAGMA library located in `$(MAGMA_DIR)/lib/`. -By default, `MAGMA_DIR` is set to `../magma`; to build the MAGMA backends -with a MAGMA installation located elsewhere, create a link to `magma/` in libCEED's parent -directory, or set `MAGMA_DIR` to the proper location. MAGMA version 2.5.0 or newer is required. -Currently, each MAGMA library installation is only built for either CUDA or HIP. The corresponding -set of libCEED backends (`/gpu/cuda/magma/*` or `/gpu/hip/magma/*`) will automatically be built -for the version of the MAGMA library found in `MAGMA_DIR`. - -Users can specify a device for all CUDA, HIP, and MAGMA backends through adding `:device_id=#` -after the resource name. For example: +To enable the MAGMA backends, the environment variable `MAGMA_DIR` must point to the top-level MAGMA directory, with the MAGMA library located in `$(MAGMA_DIR)/lib/`. +By default, `MAGMA_DIR` is set to `../magma`; to build the MAGMA backends with a MAGMA installation located elsewhere, create a link to `magma/` in libCEED's parent directory, or set `MAGMA_DIR` to the proper location. +MAGMA version 2.5.0 or newer is required. +Currently, each MAGMA library installation is only built for either CUDA or HIP. +The corresponding set of libCEED backends (`/gpu/cuda/magma/*` or `/gpu/hip/magma/*`) will automatically be built for the version of the MAGMA library found in `MAGMA_DIR`. + +Users can specify a device for all CUDA, HIP, and MAGMA backends through adding `:device_id=#` after the resource name. +For example: > - `/gpu/cuda/gen:device_id=1` -The `/*/occa` backends rely upon the [OCCA](http://github.com/libocca/occa) package to provide -cross platform performance. To enable the OCCA backend, the environment variable `OCCA_DIR` must point -to the top-level OCCA directory, with the OCCA library located in the `${OCCA_DIR}/lib` (By default, -`OCCA_DIR` is set to `../occa`). +The `/*/occa` backends rely upon the [OCCA](http://github.com/libocca/occa) package to provide cross platform performance. +To enable the OCCA backend, the environment variable `OCCA_DIR` must point to the top-level OCCA directory, with the OCCA library located in the `${OCCA_DIR}/lib` (By default, `OCCA_DIR` is set to `../occa`). Additionally, users can pass specific OCCA device properties after setting the CEED resource. For example: @@ -250,12 +210,10 @@ The backends which are capable of generating reproducible results, with the prop ## Examples -libCEED comes with several examples of its usage, ranging from standalone C -codes in the `/examples/ceed` directory to examples based on external packages, -such as MFEM, PETSc, and Nek5000. Nek5000 v18.0 or greater is required. +libCEED comes with several examples of its usage, ranging from standalone C codes in the `/examples/ceed` directory to examples based on external packages, such as MFEM, PETSc, and Nek5000. +Nek5000 v18.0 or greater is required. -To build the examples, set the `MFEM_DIR`, `PETSC_DIR`, and -`NEK5K_DIR` variables and run: +To build the examples, set the `MFEM_DIR`, `PETSC_DIR`, and `NEK5K_DIR` variables and run: ``` cd examples/ @@ -338,13 +296,11 @@ make cd .. ``` -For the last example shown, sample meshes to be used in place of -`[.exo file]` can be found at +For the last example shown, sample meshes to be used in place of `[.exo file]` can be found at -The above code assumes a GPU-capable machine with the CUDA backends -enabled. Depending on the available backends, other CEED resource -specifiers can be provided with the `-ceed` option. Other command line -arguments can be found in [examples/petsc](https://github.com/CEED/libCEED/blob/main/examples/petsc/README.md). +The above code assumes a GPU-capable machine with the CUDA backends enabled. +Depending on the available backends, other CEED resource specifiers can be provided with the `-ceed` option. +Other command line arguments can be found in [examples/petsc](https://github.com/CEED/libCEED/blob/main/examples/petsc/README.md). % benchmarks-marker @@ -356,8 +312,7 @@ A sequence of benchmarks for all enabled backends can be run using: make benchmarks ``` -The results from the benchmarks are stored inside the `benchmarks/` directory -and they can be viewed using the commands (requires python with matplotlib): +The results from the benchmarks are stored inside the `benchmarks/` directory and they can be viewed using the commands (requires python with matplotlib): ``` cd benchmarks @@ -365,8 +320,8 @@ python postprocess-plot.py petsc-bps-bp1-*-output.txt python postprocess-plot.py petsc-bps-bp3-*-output.txt ``` -Using the `benchmarks` target runs a comprehensive set of benchmarks which may -take some time to run. Subsets of the benchmarks can be run using the scripts in the `benchmarks` folder. +Using the `benchmarks` target runs a comprehensive set of benchmarks which may take some time to run. +Subsets of the benchmarks can be run using the scripts in the `benchmarks` folder. For more details about the benchmarks, see the `benchmarks/README.md` file. @@ -391,9 +346,8 @@ make for_install=1 prefix=/path/to/install/dir make install prefix=/path/to/install/dir ``` -The usual variables like `CC` and `CFLAGS` are used, and optimization flags -for all languages can be set using the likes of `OPT='-O3 -march=native'`. Use -`STATIC=1` to build static libraries (`libceed.a`). +The usual variables like `CC` and `CFLAGS` are used, and optimization flags for all languages can be set using the likes of `OPT='-O3 -march=native'`. +Use `STATIC=1` to build static libraries (`libceed.a`). To install libCEED for Python, run: @@ -405,23 +359,20 @@ with the desired setuptools options, such as `--user`. ### pkg-config -In addition to library and header, libCEED provides a [pkg-config](https://en.wikipedia.org/wiki/Pkg-config) -file that can be used to easily compile and link. -[For example](https://people.freedesktop.org/~dbn/pkg-config-guide.html#faq), if -`$prefix` is a standard location or you set the environment variable -`PKG_CONFIG_PATH`: +In addition to library and header, libCEED provides a [pkg-config](https://en.wikipedia.org/wiki/Pkg-config) file that can be used to easily compile and link. +[For example](https://people.freedesktop.org/~dbn/pkg-config-guide.html#faq), if `$prefix` is a standard location or you set the environment variable `PKG_CONFIG_PATH`: ``` cc `pkg-config --cflags --libs ceed` -o myapp myapp.c ``` -will build `myapp` with libCEED. This can be used with the source or -installed directories. Most build systems have support for pkg-config. +will build `myapp` with libCEED. +This can be used with the source or installed directories. +Most build systems have support for pkg-config. ## Contact -You can reach the libCEED team by emailing [ceed-users@llnl.gov](mailto:ceed-users@llnl.gov) -or by leaving a comment in the [issue tracker](https://github.com/CEED/libCEED/issues). +You can reach the libCEED team by emailing [ceed-users@llnl.gov](mailto:ceed-users@llnl.gov) or by leaving a comment in the [issue tracker](https://github.com/CEED/libCEED/issues). ## How to Cite @@ -478,13 +429,11 @@ For libCEED's Python interface please cite: } ``` -The BiBTeX entries for these references can be found in the -`doc/bib/references.bib` file. +The BiBTeX entries for these references can be found in the `doc/bib/references.bib` file. ## Copyright -The following copyright applies to each file in the CEED software suite, unless -otherwise stated in the file: +The following copyright applies to each file in the CEED software suite, unless otherwise stated in the file: > Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at the > Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights reserved. diff --git a/RELEASING.md b/RELEASING.md index bf6c9b04dc..d7420e15a2 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -2,7 +2,9 @@ *These notes are meant for a maintainer to create official releases.* -In preparing a release, create a branch to hold pre-release commits. We ideally want all release mechanics (for all languages) to be in one commit, which will then be tagged. (This will change if/when we stop synchronizing releases across all language bindings.) +In preparing a release, create a branch to hold pre-release commits. +We ideally want all release mechanics (for all languages) to be in one commit, which will then be tagged. +(This will change if/when we stop synchronizing releases across all language bindings.) ## Core C library @@ -15,19 +17,26 @@ The version number must be updated in * `Doxyfile` * `CITATION.cff` -Additionally, the release notes in `doc/sphinx/source/releasenotes.rst` should be updated. Use `git log --first-parent v0.7..` to get a sense of the pull requests that have been merged and thus might warrant emphasizing in the release notes. While doing this, gather a couple sentences for key features to highlight on [GitHub releases](https://github.com/CEED/libCEED/releases). The "Current Main" heading needs to be named for the release. +Additionally, the release notes in `doc/sphinx/source/releasenotes.rst` should be updated. +Use `git log --first-parent v0.7..` to get a sense of the pull requests that have been merged and thus might warrant emphasizing in the release notes. +While doing this, gather a couple sentences for key features to highlight on [GitHub releases](https://github.com/CEED/libCEED/releases). +The "Current Main" heading needs to be named for the release. -Use `make doc-latexpdf` to build a PDF users manual and inspect it for missing references or formatting problems (e.g., with images that were converted to PDF). This contains the same content as the website, but will be archived on Zenodo. +Use `make doc-latexpdf` to build a PDF users manual and inspect it for missing references or formatting problems (e.g., with images that were converted to PDF). +This contains the same content as the website, but will be archived on Zenodo. ### Quality control and good citizenry -1. If making a minor release, check for API and ABI changes that could break [semantic versioning](https://semver.org/). The [ABI compliance checker](https://github.com/lvc/abi-compliance-checker) is a useful tool, as is `nm -D libceed.so` and checking for public symbols (capital letters like `T` and `D` that are not namespaced). +1. If making a minor release, check for API and ABI changes that could break [semantic versioning](https://semver.org/). +The [ABI compliance checker](https://github.com/lvc/abi-compliance-checker) is a useful tool, as is `nm -D libceed.so` and checking for public symbols (capital letters like `T` and `D` that are not namespaced). -2. Double check testing on any architectures that may not be exercised in continuous integration (e.g., HPC facilities) and with users of libCEED, such as MFEM and PETSc applications. While unsupported changes do not prevent release, it's polite to make a PR to support the new release, and it's good for quality to test before taggin a libCEED release. +2. Double check testing on any architectures that may not be exercised in continuous integration (e.g., HPC facilities) and with users of libCEED, such as MFEM and PETSc applications. +While unsupported changes do not prevent release, it's polite to make a PR to support the new release, and it's good for quality to test before taggin a libCEED release. 3. Update and test all the language bindings (see below) within the branch. -4. Check that `spack install libceed@develop` works prior to tagging. The Spack `libceed/package.py` file should be updated immediately after tagging a release. +4. Check that `spack install libceed@develop` works prior to tagging. +The Spack `libceed/package.py` file should be updated immediately after tagging a release. ### Tagging and releasing on GitHub @@ -44,28 +53,24 @@ More frequently, this is amending the commit message on an in-progress commit, a ### Archive Users Manual on Zenodo -Generate the PDF using `make doc-latexpdf`, click "New version" on the [Zenodo -record](https://zenodo.org/record/4302737) and upload. Update author info if applicable (new -authors, or existing authors changing institutions). Make a new PR to update the version -number and DOI in `README.rst` and `doc/bib/references.bib`. +Generate the PDF using `make doc-latexpdf`, click "New version" on the [Zenodo record](https://zenodo.org/record/4302737) and upload. +Update author info if applicable (new authors, or existing authors changing institutions). +Make a new PR to update the version number and DOI in `README.rst` and `doc/bib/references.bib`. ## Julia libCEED's Julia interface (LibCEED.jl) has two components: * LibCEED.jl, the user-facing package that contains the Julia interface. -* libCEED_jll, a binary wrapper package ("jll package") that contains prebuilt binaries of the - libCEED library for various architectures. +* libCEED_jll, a binary wrapper package ("jll package") that contains prebuilt binaries of the libCEED library for various architectures. -When there is a new release of libCEED, both of these components need to be updated. First, -libCEED_jll is updated, and then LibCEED.jl. +When there is a new release of libCEED, both of these components need to be updated. +First, libCEED_jll is updated, and then LibCEED.jl. ### Updating libCEED_jll -The binary wrapper package libCEED_jll is updated by making a pull request against -[Yggdrasil](https://github.com/JuliaPackaging/Yggdrasil), the Julia community build tree. In this -PR, the file `L/libCEED/build_tarballs.jl` should be changed to update version number and change the -hash of the libCEED commit to use to build the binaries, similar to the following diff: +The binary wrapper package libCEED_jll is updated by making a pull request against [Yggdrasil](https://github.com/JuliaPackaging/Yggdrasil), the Julia community build tree. +In this PR, the file `L/libCEED/build_tarballs.jl` should be changed to update version number and change the hash of the libCEED commit to use to build the binaries, similar to the following diff: ```diff diff --git a/L/libCEED/build_tarballs.jl b/L/libCEED/build_tarballs.jl --- a/L/libCEED/build_tarballs.jl @@ -83,15 +88,13 @@ diff --git a/L/libCEED/build_tarballs.jl b/L/libCEED/build_tarballs.jl + GitSource("https://github.com/CEED/libCEED.git", "e8f234590eddcce2220edb1d6e979af7a3c35f82") ] ``` -After the PR is merged into Yggdrasil, the new version of libCEED_jll will automatically be -registered, and then we can proceed to update LibCEED.jl. +After the PR is merged into Yggdrasil, the new version of libCEED_jll will automatically be registered, and then we can proceed to update LibCEED.jl. ### Updating LibCEED.jl -After the binary wrapper package libCEED_jll has been updated, we are ready to update the main Julia -interface LibCEED.jl. This requires updating the file `julia/LibCEED.jl/Project.toml` in the libCEED -repository. The version number should be incremented, and the dependency on the updated version of -libCEED_jll should be listed: +After the binary wrapper package libCEED_jll has been updated, we are ready to update the main Julia interface LibCEED.jl. +This requires updating the file `julia/LibCEED.jl/Project.toml` in the libCEED repository. +The version number should be incremented, and the dependency on the updated version of libCEED_jll should be listed: ```diff diff --git a/julia/LibCEED.jl/Project.toml b/julia/LibCEED.jl/Project.toml --- a/julia/LibCEED.jl/Project.toml @@ -111,32 +114,28 @@ diff --git a/julia/LibCEED.jl/Project.toml b/julia/LibCEED.jl/Project.toml -libCEED_jll = "0.7" +libCEED_jll = "0.8" ``` -Once this change is merged into libCEED's `main` branch, the updated package version can be -registered using the GitHub registrator bot by commenting on the commit: +Once this change is merged into libCEED's `main` branch, the updated package version can be registered using the GitHub registrator bot by commenting on the commit: > @JuliaRegistrator register branch=main subdir=julia/LibCEED.jl -At this point, the bot should create a PR against the [general Julia -registry](https://github.com/JuliaRegistries/General), which should be merged automatically after a -short delay. +At this point, the bot should create a PR against the [general Julia registry](https://github.com/JuliaRegistries/General), which should be merged automatically after a short delay. ### Moving development tests to release tests -LibCEED.jl has both _development_ and _release_ unit tests. The _release_ tests are run both with -the current build of libCEED, and with the most recent release of libCEED_jll. The _development_ -tests may use features which were not available in the most recent release, and so they are only run -with the current build of libCEED. +LibCEED.jl has both _development_ and _release_ unit tests. +The _release_ tests are run both with the current build of libCEED, and with the most recent release of libCEED_jll. +The _development_ tests may use features which were not available in the most recent release, and so they are only run with the current build of libCEED. -Upon release, the development tests may be moved to the release tests, so that these features will -be tested against the most recent release of libCEED_jll. The release tests are found in the file -`julia/LibCEED.jl/test/runtests.jl` and the development tests are found in -`julia/LibCEED.jl/test/rundevtests.jl`. +Upon release, the development tests may be moved to the release tests, so that these features will be tested against the most recent release of libCEED_jll. +The release tests are found in the file `julia/LibCEED.jl/test/runtests.jl` and the development tests are found in `julia/LibCEED.jl/test/rundevtests.jl`. ## Python The Python package gets its version from `ceed.pc.template` so there are no file modifications necessary. -1. `make wheel` builds and tests the wheels using Docker. See the [manylinux repo](https://github.com/pypa/manylinux) for source and usage inforamtion. If this succeeds, the completed wheels are in `wheelhouse/libceed-0.8-cp39-cp39-manylinux2010_x86_64.whl`. +1. `make wheel` builds and tests the wheels using Docker. +See the [manylinux repo](https://github.com/pypa/manylinux) for source and usage inforamtion. +If this succeeds, the completed wheels are in `wheelhouse/libceed-0.8-cp39-cp39-manylinux2010_x86_64.whl`. 2. Manually test one or more of the wheels by creating a virtualenv and using `pip install wheelhouse/libceed-0.8-cp39-cp39-manylinux2010_x86_64.whl`, then `python -c 'import libceed'` or otherwise running tests. 3. Create a `~/.pypirc` with entries for `testpypi` (`https://test.pypi.org/legacy/`) and the real `pypi`. 4. Upload to `testpypi` using @@ -160,7 +159,8 @@ The Rust crates for libCEED are split into 1. [`libceed-sys`](https://crates.io/crates/libceed-sys), which handles building/finding the `libceed.so` or `libceed.a` library and providing unsafe Rust bindings (one to one with the C interface, using C FFI datatypes) 2. [`libceed`](https://crates.io/crates/libceed) containing the safe and idiomatic Rust bindings. -We currently apply the same version number across both of these crates. There are some tests for version strings matching, but in short, one needs to update the following locations. +We currently apply the same version number across both of these crates. +There are some tests for version strings matching, but in short, one needs to update the following locations. ```console $ git grep '0\.8' -- rust/ diff --git a/doc/sphinx/source/api/CeedElemRestriction.rst b/doc/sphinx/source/api/CeedElemRestriction.rst index bba6091c45..18758dfe85 100644 --- a/doc/sphinx/source/api/CeedElemRestriction.rst +++ b/doc/sphinx/source/api/CeedElemRestriction.rst @@ -3,8 +3,7 @@ CeedElemRestriction ******************************************************************* -A `CeedElemRestriction` decomposes elements and groups the degrees of freedom (dofs) -according to the different elements they belong to. +A `CeedElemRestriction` decomposes elements and groups the degrees of freedom (DoFs) according to the different elements they belong to. Expressing element decomposition and degrees of freedom over a mesh =================================================================== diff --git a/doc/sphinx/source/api/CeedOperator.rst b/doc/sphinx/source/api/CeedOperator.rst index 4cdeefbafa..389e211d28 100644 --- a/doc/sphinx/source/api/CeedOperator.rst +++ b/doc/sphinx/source/api/CeedOperator.rst @@ -3,9 +3,8 @@ CeedOperator ************************************** -A `CeedOperator` defines the finite/spectral element operator associated to a -:ref:`CeedQFunction`. A `CeedOperator` connects objects of the type -:ref:`CeedElemRestriction`, :ref:`CeedBasis`, and :ref:`CeedQFunction`. +A `CeedOperator` defines the finite/spectral element operator associated to a :ref:`CeedQFunction`. +A `CeedOperator` connects objects of the type :ref:`CeedElemRestriction`, :ref:`CeedBasis`, and :ref:`CeedQFunction`. Discrete operators on user vectors ====================================== diff --git a/doc/sphinx/source/api/CeedQFunction.rst b/doc/sphinx/source/api/CeedQFunction.rst index c0eb1ca5fb..51dfc99c4f 100644 --- a/doc/sphinx/source/api/CeedQFunction.rst +++ b/doc/sphinx/source/api/CeedQFunction.rst @@ -3,8 +3,7 @@ CeedQFunction *********************************************************************** -A `CeedQFunction` represents the spatial terms of the point-wise functions describing the -physics at the quadrature points. +A `CeedQFunction` represents the spatial terms of the point-wise functions describing the physics at the quadrature points. Resolution/space-independent weak forms and quadrature-based operations ======================================================================= diff --git a/doc/sphinx/source/api/CeedVector.rst b/doc/sphinx/source/api/CeedVector.rst index 093a4c990b..b11fe9a4eb 100644 --- a/doc/sphinx/source/api/CeedVector.rst +++ b/doc/sphinx/source/api/CeedVector.rst @@ -3,8 +3,7 @@ CeedVector ************************************** -A `CeedVector` constitutes the main data structure and serves as input/output -for the :ref:`CeedOperator`\s. +A `CeedVector` constitutes the main data structure and serves as input/output for the :ref:`CeedOperator`\s. Basic vector operations ====================================== diff --git a/doc/sphinx/source/ffi.md b/doc/sphinx/source/ffi.md index a7eeab400c..d3aafb3575 100644 --- a/doc/sphinx/source/ffi.md +++ b/doc/sphinx/source/ffi.md @@ -1,14 +1,10 @@ # Julia, Python, and Rust Interfaces -libCEED provides high-level interfaces using the Julia, Python, and Rust -programming languages. +libCEED provides high-level interfaces using the Julia, Python, and Rust programming languages. -More information about the Julia interface can be found at the [LibCEED.jl -documentation](http://ceed.exascaleproject.org/libCEED-julia-docs/dev/). +More information about the Julia interface can be found at the [LibCEED.jl documentation](http://ceed.exascaleproject.org/libCEED-julia-docs/dev/). -Usage of the Python interface is illustrated through a sequence of [Jupyter -Notebook tutorials](https://github.com/CEED/libCEED/tree/main/examples/python). More -information on the Python interface is available in the [SciPy paper](https://doi.org/10.25080/Majora-342d178e-00c). +Usage of the Python interface is illustrated through a sequence of [Jupyter Notebook tutorials](https://github.com/CEED/libCEED/tree/main/examples/python). +More information on the Python interface is available in the [SciPy paper](https://doi.org/10.25080/Majora-342d178e-00c). -More information about the Rust interface can be found at the [Rust interface -documentation](https://docs.rs/libceed). +More information about the Rust interface can be found at the [Rust interface documentation](https://docs.rs/libceed). diff --git a/doc/sphinx/source/intro.md b/doc/sphinx/source/intro.md index fb9fd8b958..474d4d33bf 100644 --- a/doc/sphinx/source/intro.md +++ b/doc/sphinx/source/intro.md @@ -1,78 +1,34 @@ # Introduction -Historically, conventional high-order finite element methods were rarely used for -industrial problems because the Jacobian rapidly loses sparsity as the order is -increased, leading to unaffordable solve times and memory requirements -{cite}`brown2010`. This effect typically limited the order of accuracy to at most -quadratic, especially because quadratic finite element formulations are computationally advantageous in terms of -floating point operations (FLOPS) per degree of freedom (DOF)---see -{numref}`fig-assembledVsmatrix-free`---, despite the fast convergence and favorable -stability properties offered by higher order discretizations. Nowadays, high-order -numerical methods, such as the spectral element method (SEM)---a special case of -nodal p-Finite Element Method (FEM) which can reuse the interpolation nodes for -quadrature---are employed, especially with (nearly) affine elements, because -linear constant coefficient problems can be very efficiently solved using the -fast diagonalization method combined with a multilevel coarse solve. In -{numref}`fig-assembledVsmatrix-free` we analyze and compare the theoretical costs, -of different configurations: assembling the sparse matrix representing the action -of the operator (labeled as *assembled*), non assembling the matrix and storing -only the metric terms needed as an operator setup-phase (labeled as *tensor-qstore*) -and non assembling the matrix and computing the metric terms on the fly and storing -a compact representation of the linearization at quadrature points (labeled as -*tensor*). In the right panel, we show the cost in terms of FLOPS/DOF. This metric for -computational efficiency made sense historically, when the performance was mostly -limited by processors' clockspeed. A more relevant performance plot for current -state-of-the-art high-performance machines (for which the bottleneck of performance is -mostly in the memory bandwith) is shown in the left panel of -{numref}`fig-assembledVsmatrix-free`, where the memory bandwith is measured in terms of -bytes/DOF. We can see that high-order methods, implemented properly with only partial -assembly, require optimal amount of memory transfers (with respect to the -polynomial order) and near-optimal FLOPs for operator evaluation. Thus, high-order -methods in matrix-free representation not only possess favorable properties, such as -higher accuracy and faster convergence to solution, but also manifest an efficiency gain -compared to their corresponding assembled representations. +Historically, conventional high-order finite element methods were rarely used for industrial problems because the Jacobian rapidly loses sparsity as the order is increased, leading to unaffordable solve times and memory requirements {cite}`brown2010`. +This effect typically limited the order of accuracy to at most quadratic, especially because quadratic finite element formulations are computationally advantageous in terms of floating point operations (FLOPS) per degree of freedom (DOF)---see {numref}`fig-assembledVsmatrix-free`---, despite the fast convergence and favorable stability properties offered by higher order discretizations. +Nowadays, high-order numerical methods, such as the spectral element method (SEM)---a special case of nodal p-Finite Element Method (FEM) which can reuse the interpolation nodes for +quadrature---are employed, especially with (nearly) affine elements, because linear constant coefficient problems can be very efficiently solved using the fast diagonalization method combined with a multilevel coarse solve. +In {numref}`fig-assembledVsmatrix-free` we analyze and compare the theoretical costs, of different configurations: assembling the sparse matrix representing the action of the operator (labeled as *assembled*), non assembling the matrix and storing only the metric terms needed as an operator setup-phase (labeled as *tensor-qstore*) and non assembling the matrix and computing the metric terms on the fly and storing a compact representation of the linearization at quadrature points (labeled as *tensor*). In the right panel, we show the cost in terms of FLOPS/DOF. +This metric for computational efficiency made sense historically, when the performance was mostly limited by processors' clockspeed. +A more relevant performance plot for current state-of-the-art high-performance machines (for which the bottleneck of performance is mostly in the memory bandwith) is shown in the left panel of {numref}`fig-assembledVsmatrix-free`, where the memory bandwith is measured in terms of bytes/DOF. +We can see that high-order methods, implemented properly with only partial assembly, require optimal amount of memory transfers (with respect to the polynomial order) and near-optimal FLOPs for operator evaluation. +Thus, high-order methods in matrix-free representation not only possess favorable properties, such as higher accuracy and faster convergence to solution, but also manifest an efficiency gain compared to their corresponding assembled representations. (fig-assembledvsmatrix-free)= :::{figure} ../../img/TensorVsAssembly.png -Comparison of memory transfer and floating point operations per -degree of freedom for different representations of a linear operator for a PDE in -3D with $b$ components and variable coefficients arising due to Newton -linearization of a material nonlinearity. The representation labeled as *tensor* -computes metric terms on the fly and stores a compact representation of the -linearization at quadrature points. The representation labeled as *tensor-qstore* -pulls the metric terms into the stored representation. The *assembled* representation -uses a (block) CSR format. +Comparison of memory transfer and floating point operations per degree of freedom for different representations of a linear operator for a PDE in 3D with $b$ components and variable coefficients arising due to Newton linearization of a material nonlinearity. +The representation labeled as *tensor* computes metric terms on the fly and stores a compact representation of the linearization at quadrature points. The representation labeled as *tensor-qstore* pulls the metric terms into the stored representation. +The *assembled* representation uses a (block) CSR format. ::: -Furthermore, software packages that provide high-performance implementations have often -been special-purpose and intrusive. libCEED {cite}`libceed-joss-paper` is a new library that offers a purely -algebraic interface for matrix-free operator representation and supports run-time -selection of implementations tuned for a variety of computational device types, -including CPUs and GPUs. libCEED's purely algebraic interface can unobtrusively be -integrated in new and legacy software to provide performance portable interfaces. -While libCEED's focus is on high-order finite elements, the approach is algebraic -and thus applicable to other discretizations in factored form. libCEED's role, as -a lightweight portable library that allows a wide variety of applications to share -highly optimized discretization kernels, is illustrated in -{numref}`fig-libCEED-backends`, where a non-exhaustive list of specialized -implementations (backends) is provided. libCEED provides a low-level Application -Programming Interface (API) for user codes so that applications with their own -discretization infrastructure (e.g., those in [PETSc](https://www.mcs.anl.gov/petsc/), -[MFEM](https://mfem.org/) and [Nek5000](https://nek5000.mcs.anl.gov/)) can evaluate -and use the core operations provided by libCEED. GPU implementations are available via -pure [CUDA](https://developer.nvidia.com/about-cuda) and pure [HIP](https://rocmdocs.amd.com) as well as the -[OCCA](http://github.com/libocca/occa) and [MAGMA](https://bitbucket.org/icl/magma) -libraries. CPU implementations are available via pure C and AVX intrinsics as well as -the [LIBXSMM](http://github.com/hfp/libxsmm) library. libCEED provides a unified -interface, so that users only need to write a single source code and can select the -desired specialized implementation at run time. Moreover, each process or thread can -instantiate an arbitrary number of backends. +Furthermore, software packages that provide high-performance implementations have often been special-purpose and intrusive. libCEED {cite}`libceed-joss-paper` is a new library that offers a purely algebraic interface for matrix-free operator representation and supports run-time selection of implementations tuned for a variety of computational device types, including CPUs and GPUs. +libCEED's purely algebraic interface can unobtrusively be integrated in new and legacy software to provide performance portable interfaces. +While libCEED's focus is on high-order finite elements, the approach is algebraic and thus applicable to other discretizations in factored form. +libCEED's role, as a lightweight portable library that allows a wide variety of applications to share highly optimized discretization kernels, is illustrated in {numref}`fig-libCEED-backends`, where a non-exhaustive list of specialized implementations (backends) is provided. +libCEED provides a low-level Application Programming Interface (API) for user codes so that applications with their own discretization infrastructure (e.g., those in [PETSc](https://www.mcs.anl.gov/petsc/), [MFEM](https://mfem.org/) and [Nek5000](https://nek5000.mcs.anl.gov/)) can evaluate and use the core operations provided by libCEED. GPU implementations are available via pure [CUDA](https://developer.nvidia.com/about-cuda) and pure [HIP](https://rocmdocs.amd.com) as well as the [OCCA](http://github.com/libocca/occa) and [MAGMA](https://bitbucket.org/icl/magma) libraries. +CPU implementations are available via pure C and AVX intrinsics as well as the [LIBXSMM](http://github.com/hfp/libxsmm) library. +libCEED provides a unified interface, so that users only need to write a single source code and can select the desired specialized implementation at run time. Moreover, each process or thread can instantiate an arbitrary number of backends. (fig-libceed-backends)= :::{figure} ../../img/libCEEDBackends.png -The role of libCEED as a lightweight, portable library which provides a low-level -API for efficient, specialized implementations. libCEED allows different applications -to share highly optimized discretization kernels. +The role of libCEED as a lightweight, portable library which provides a low-level API for efficient, specialized implementations. +libCEED allows different applications to share highly optimized discretization kernels. ::: diff --git a/doc/sphinx/source/libCEEDapi.md b/doc/sphinx/source/libCEEDapi.md index 5e0ba6b2e3..7597b50cdf 100644 --- a/doc/sphinx/source/libCEEDapi.md +++ b/doc/sphinx/source/libCEEDapi.md @@ -1,25 +1,18 @@ # Interface Concepts -This page provides a brief description of the theoretical foundations and the -practical implementation of the libCEED library. +This page provides a brief description of the theoretical foundations and the practical implementation of the libCEED library. (theoretical-framework)= ## Theoretical Framework -In finite element formulations, the weak form of a Partial Differential Equation -(PDE) is evaluated on a subdomain $\Omega_e$ (element) and the local results -are composed into a larger system of equations that models the entire problem on -the global domain $\Omega$. In particular, when high-order finite elements or -spectral elements are used, the resulting sparse matrix representation of the global -operator is computationally expensive, with respect to both the memory transfer and -floating point operations needed for its evaluation. libCEED provides an interface -for matrix-free operator description that enables efficient evaluation on a variety -of computational device types (selectable at run time). We present here the notation -and the mathematical formulation adopted in libCEED. +In finite element formulations, the weak form of a Partial Differential Equation (PDE) is evaluated on a subdomain $\Omega_e$ (element) and the local results are composed into a larger system of equations that models the entire problem on the global domain $\Omega$. +In particular, when high-order finite elements or spectral elements are used, the resulting sparse matrix representation of the global operator is computationally expensive, with respect to both the memory transfer and floating point operations needed for its evaluation. +libCEED provides an interface for matrix-free operator description that enables efficient evaluation on a variety of computational device types (selectable at run time). +We present here the notation and the mathematical formulation adopted in libCEED. -We start by considering the discrete residual $F(u)=0$ formulation -in weak form. We first define the $L^2$ inner product between real-valued functions +We start by considering the discrete residual $F(u)=0$ formulation in weak form. +We first define the $L^2$ inner product between real-valued functions $$ \langle v, u \rangle = \int_\Omega v u d \bm{x}, @@ -27,61 +20,35 @@ $$ where $\bm{x} \in \mathbb{R}^d \supset \Omega$. -We want to find $u$ in a suitable space $V_D$, -such that +We want to find $u$ in a suitable space $V_D$, such that $$ \langle \bm v, \bm f(u) \rangle = \int_\Omega \bm v \cdot \bm f_0 (u, \nabla u) + \nabla \bm v : \bm f_1 (u, \nabla u) = 0 $$ (residual) -for all $\bm v$ in the corresponding homogeneous space $V_0$, where $\bm f_0$ -and $\bm f_1$ contain all possible sources in the problem. We notice here that -$\bm f_0$ represents all terms in {eq}`residual` which multiply the (possibly vector-valued) test -function $\bm v$ and $\bm f_1$ all terms which multiply its gradient $\nabla \bm v$. -For an n-component problems in $d$ dimensions, $\bm f_0 \in \mathbb{R}^n$ and -$\bm f_1 \in \mathbb{R}^{nd}$. +for all $\bm v$ in the corresponding homogeneous space $V_0$, where $\bm f_0$ and $\bm f_1$ contain all possible sources in the problem. +We notice here that $\bm f_0$ represents all terms in {eq}`residual` which multiply the (possibly vector-valued) test function $\bm v$ and $\bm f_1$ all terms which multiply its gradient $\nabla \bm v$. +For an n-component problems in $d$ dimensions, $\bm f_0 \in \mathbb{R}^n$ and $\bm f_1 \in \mathbb{R}^{nd}$. :::{note} -The notation $\nabla \bm v \!:\! \bm f_1$ represents contraction over both -fields and spatial dimensions while a single dot represents contraction in just one, -which should be clear from context, e.g., $\bm v \cdot \bm f_0$ contracts only over -fields. +The notation $\nabla \bm v \!:\! \bm f_1$ represents contraction over both fields and spatial dimensions while a single dot represents contraction in just one, which should be clear from context, e.g., $\bm v \cdot \bm f_0$ contracts only over fields. ::: :::{note} -In the code, the function that represents the weak form at quadrature -points is called the {ref}`CeedQFunction`. In the {ref}`Examples` provided with the -library (in the {file}`examples/` directory), we store the term $\bm f_0$ directly -into `v`, and the term $\bm f_1$ directly into `dv` (which stands for -$\nabla \bm v$). If equation {eq}`residual` only presents a term of the -type $\bm f_0$, the {ref}`CeedQFunction` will only have one output argument, -namely `v`. If equation {eq}`residual` also presents a term of the type -$\bm f_1$, then the {ref}`CeedQFunction` will have two output arguments, namely, -`v` and `dv`. +In the code, the function that represents the weak form at quadrature points is called the {ref}`CeedQFunction`. +In the {ref}`Examples` provided with the library (in the {file}`examples/` directory), we store the term $\bm f_0$ directly into `v`, and the term $\bm f_1$ directly into `dv` (which stands for $\nabla \bm v$). +If equation {eq}`residual` only presents a term of the type $\bm f_0$, the {ref}`CeedQFunction` will only have one output argument, namely `v`. +If equation {eq}`residual` also presents a term of the type $\bm f_1$, then the {ref}`CeedQFunction` will have two output arguments, namely, `v` and `dv`. ::: ## Finite Element Operator Decomposition -Finite element operators are typically defined through weak formulations of -partial differential equations that involve integration over a computational -mesh. The required integrals are computed by splitting them as a sum over the -mesh elements, mapping each element to a simple *reference* element (e.g. the -unit square) and applying a quadrature rule in reference space. - -This sequence of operations highlights an inherent hierarchical structure -present in all finite element operators where the evaluation starts on *global -(trial) degrees of freedom (dofs) or nodes on the whole mesh*, restricts to -*dofs on subdomains* (groups of elements), then moves to independent -*dofs on each element*, transitions to independent *quadrature points* in -reference space, performs the integration, and then goes back in reverse order -to global (test) degrees of freedom on the whole mesh. - -This is illustrated below for the simple case of symmetric linear operator on -third order ($Q_3$) scalar continuous ($H^1$) elements, where we use -the notions **T-vector**, **L-vector**, **E-vector** and **Q-vector** to represent -the sets corresponding to the (true) degrees of freedom on the global mesh, the split -local degrees of freedom on the subdomains, the split degrees of freedom on the -mesh elements, and the values at quadrature points, respectively. +Finite element operators are typically defined through weak formulations of partial differential equations that involve integration over a computational mesh. +The required integrals are computed by splitting them as a sum over the mesh elements, mapping each element to a simple *reference* element (e.g. the unit square) and applying a quadrature rule in reference space. + +This sequence of operations highlights an inherent hierarchical structure present in all finite element operators where the evaluation starts on *global (trial) degrees of freedom (dofs) or nodes on the whole mesh*, restricts to *dofs on subdomains* (groups of elements), then moves to independent *dofs on each element*, transitions to independent *quadrature points* in reference space, performs the integration, and then goes back in reverse order to global (test) degrees of freedom on the whole mesh. + +This is illustrated below for the simple case of symmetric linear operator on third order ($Q_3$) scalar continuous ($H^1$) elements, where we use the notions **T-vector**, **L-vector**, **E-vector** and **Q-vector** to represent the sets corresponding to the (true) degrees of freedom on the global mesh, the split local degrees of freedom on the subdomains, the split degrees of freedom on the mesh elements, and the values at quadrature points, respectively. We refer to the operators that connect the different types of vectors as: @@ -90,8 +57,7 @@ We refer to the operators that connect the different types of vectors as: - Basis (Dofs-to-Qpts) evaluator $\bm{B}$ - Operator at quadrature points $\bm{D}$ -More generally, when the test and trial space differ, they get their own -versions of $\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$. +More generally, when the test and trial space differ, they get their own versions of $\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$. (fig-operator-decomp)= @@ -99,12 +65,8 @@ versions of $\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$. Operator Decomposition ::: -Note that in the case of adaptive mesh refinement (AMR), the restrictions -$\bm{P}$ and $\bm{\mathcal{E}}$ will involve not just extracting sub-vectors, -but evaluating values at constrained degrees of freedom through the AMR interpolation. -There can also be several levels of subdomains ($\bm P_1$, $\bm P_2$, -etc.), and it may be convenient to split $\bm{D}$ as the product of several -operators ($\bm D_1$, $\bm D_2$, etc.). +Note that in the case of adaptive mesh refinement (AMR), the restrictions $\bm{P}$ and $\bm{\mathcal{E}}$ will involve not just extracting sub-vectors, but evaluating values at constrained degrees of freedom through the AMR interpolation. +There can also be several levels of subdomains ($\bm P_1$, $\bm P_2$, etc.), and it may be convenient to split $\bm{D}$ as the product of several operators ($\bm D_1$, $\bm D_2$, etc.). ### Terminology and Notation @@ -121,23 +83,17 @@ Vector representation/storage categories: - Local (w.r.t. processors) degrees of freedom/unknowns, **L-vector**: - > - each unknown $i$ has exactly one copy on each processor that owns an - > element containing $i$ - > - this is an overlapping vector decomposition with overlaps only across - > different processors---there is no duplication of unknowns on a single - > processor - > - the shared dofs/unknowns are the overlapping dofs, i.e. the ones that have - > more than one copy, on different processors. + > - each unknown $i$ has exactly one copy on each processor that owns an element containing $i$ + > - this is an overlapping vector decomposition with overlaps only across different processors---there is no duplication of unknowns on a single processor + > - the shared dofs/unknowns are the overlapping dofs, i.e. the ones that have more than one copy, on different processors. > > ```{image} ../../img/L-vector.svg > ``` - Per element decomposition, **E-vector**: - > - each unknown $i$ has as many copies as the number of elements that contain - > $i$ - > - usually, the copies of the unknowns are grouped by the element they belong - > to. + > - each unknown $i$ has as many copies as the number of elements that contain $i$ + > - usually, the copies of the unknowns are grouped by the element they belong to. > > ```{image} ../../img/E-vector.svg > ``` @@ -145,27 +101,20 @@ Vector representation/storage categories: - In the case of AMR with hanging nodes (giving rise to hanging dofs): > - the **L-vector** is enhanced with the hanging/dependent dofs - > - the additional hanging/dependent dofs are duplicated when they are shared - > by multiple processors - > - this way, an **E-vector** can be derived from an **L-vector** without any - > communications and without additional computations to derive the dependent - > dofs - > - in other words, an entry in an **E-vector** is obtained by copying an entry - > from the corresponding **L-vector**, optionally switching the sign of the - > entry (for $H(\mathrm{div})$---and $H(\mathrm{curl})$-conforming spaces). + > - the additional hanging/dependent dofs are duplicated when they are shared by multiple processors + > - this way, an **E-vector** can be derived from an **L-vector** without any communications and without additional computations to derive the dependent dofs + > - in other words, an entry in an **E-vector** is obtained by copying an entry from the corresponding **L-vector**, optionally switching the sign of the entry (for $H(\mathrm{div})$---and $H(\mathrm{curl})$-conforming spaces). > > ```{image} ../../img/L-vector-AMR.svg > ``` - In the case of variable order spaces: - > - the dependent dofs (usually on the higher-order side of a face/edge) can - > be treated just like the hanging/dependent dofs case. + > - the dependent dofs (usually on the higher-order side of a face/edge) can be treated just like the hanging/dependent dofs case. - Quadrature point vector, **Q-vector**: - > - this is similar to **E-vector** where instead of dofs, the vector represents - > values at quadrature points, grouped by element. + > - this is similar to **E-vector** where instead of dofs, the vector represents values at quadrature points, grouped by element. - In many cases it is useful to distinguish two types of vectors: @@ -173,30 +122,26 @@ Vector representation/storage categories: > - here X can be any of the T, L, E, or Q categories > - for example, the mass matrix operator maps a **T-vector** to a **T'-vector** > - the solutions vector is a **T-vector**, and the RHS vector is a **T'-vector** - > - using the parallel prolongation operator, one can map the solution - > **T-vector** to a solution **L-vector**, etc. + > - using the parallel prolongation operator, one can map the solution **T-vector** to a solution **L-vector**, etc. Operator representation/storage/action categories: - Full true-dof parallel assembly, **TA**, or **A**: > - ParCSR or similar format - > - the T in TA indicates that the data format represents an operator from a - > **T-vector** to a **T'-vector**. + > - the T in TA indicates that the data format represents an operator from a **T-vector** to a **T'-vector**. - Full local assembly, **LA**: > - CSR matrix on each rank - > - the parallel prolongation operator, $\bm{P}$, (and its transpose) should use - > optimized matrix-free action + > - the parallel prolongation operator, $\bm{P}$, (and its transpose) should use optimized matrix-free action > - note that $\bm{P}$ is the operator mapping T-vectors to L-vectors. - Element matrix assembly, **EA**: > - each element matrix is stored as a dense matrix > - optimized element and parallel prolongation operators - > - note that the element prolongation operator is the mapping from an - > **L-vector** to an **E-vector**. + > - note that the element prolongation operator is the mapping from an **L-vector** to an **E-vector**. - Quadrature-point/partial assembly, **QA** or **PA**: @@ -206,149 +151,85 @@ Operator representation/storage/action categories: - Unassembled option, **UA** or **U**: > - no assembly step - > - the action uses directly the mesh node coordinates, and assumes specific - > form of the coefficient, e.g. constant, piecewise-constant, or given as a - > **Q-vector** (Q-coefficient). + > - the action uses directly the mesh node coordinates, and assumes specific form of the coefficient, e.g. constant, piecewise-constant, or given as a **Q-vector** (Q-coefficient). ### Partial Assembly -Since the global operator $\bm{A}$ is just a series of variational restrictions -with $\bm{B}$, $\bm{\mathcal{E}}$ and $\bm{P}$, starting from its -point-wise kernel $\bm{D}$, a "matvec" with $\bm{A}$ can be -performed by evaluating and storing some of the innermost variational restriction -matrices, and applying the rest of the operators "on-the-fly". For example, one can -compute and store a global matrix on **T-vector** level. Alternatively, one can compute -and store only the subdomain (**L-vector**) or element (**E-vector**) matrices and -perform the action of $\bm{A}$ using matvecs with $\bm{P}$ or -$\bm{P}$ and $\bm{\mathcal{E}}$. While these options are natural for -low-order discretizations, they are not a good fit for high-order methods due to -the amount of FLOPs needed for their evaluation, as well as the memory transfer -needed for a matvec. - -Our focus in libCEED, instead, is on **partial assembly**, where we compute and -store only $\bm{D}$ (or portions of it) and evaluate the actions of -$\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$ on-the-fly. -Critically for performance, we take advantage of the tensor-product structure of the -degrees of freedom and quadrature points on *quad* and *hex* elements to perform the -action of $\bm{B}$ without storing it as a matrix. - -Implemented properly, the partial assembly algorithm requires optimal amount of -memory transfers (with respect to the polynomial order) and near-optimal FLOPs -for operator evaluation. It consists of an operator *setup* phase, that -evaluates and stores $\bm{D}$ and an operator *apply* (evaluation) phase that -computes the action of $\bm{A}$ on an input vector. When desired, the setup -phase may be done as a side-effect of evaluating a different operator, such as a -nonlinear residual. The relative costs of the setup and apply phases are -different depending on the physics being expressed and the representation of -$\bm{D}$. +Since the global operator $\bm{A}$ is just a series of variational restrictions with $\bm{B}$, $\bm{\mathcal{E}}$ and $\bm{P}$, starting from its point-wise kernel $\bm{D}$, a "matvec" with $\bm{A}$ can be performed by evaluating and storing some of the innermost variational restriction matrices, and applying the rest of the operators "on-the-fly". +For example, one can compute and store a global matrix on **T-vector** level. +Alternatively, one can compute and store only the subdomain (**L-vector**) or element (**E-vector**) matrices and perform the action of $\bm{A}$ using matvecs with $\bm{P}$ or $\bm{P}$ and $\bm{\mathcal{E}}$. +While these options are natural for low-order discretizations, they are not a good fit for high-order methods due to the amount of FLOPs needed for their evaluation, as well as the memory transfer needed for a matvec. + +Our focus in libCEED, instead, is on **partial assembly**, where we compute and store only $\bm{D}$ (or portions of it) and evaluate the actions of $\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$ on-the-fly. +Critically for performance, we take advantage of the tensor-product structure of the degrees of freedom and quadrature points on *quad* and *hex* elements to perform the action of $\bm{B}$ without storing it as a matrix. + +Implemented properly, the partial assembly algorithm requires optimal amount of memory transfers (with respect to the polynomial order) and near-optimal FLOPs for operator evaluation. +It consists of an operator *setup* phase, that evaluates and stores $\bm{D}$ and an operator *apply* (evaluation) phase that computes the action of $\bm{A}$ on an input vector. +When desired, the setup phase may be done as a side-effect of evaluating a different operator, such as a nonlinear residual. +The relative costs of the setup and apply phases are different depending on the physics being expressed and the representation of $\bm{D}$. ### Parallel Decomposition -After the application of each of the first three transition operators, -$\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$, the operator evaluation -is decoupled on their ranges, so $\bm{P}$, $\bm{\mathcal{E}}$ and -$\bm{B}$ allow us to "zoom-in" to subdomain, element and quadrature point -level, ignoring the coupling at higher levels. - -Thus, a natural mapping of $\bm{A}$ on a parallel computer is to split the -**T-vector** over MPI ranks (a non-overlapping decomposition, as is typically -used for sparse matrices), and then split the rest of the vector types over -computational devices (CPUs, GPUs, etc.) as indicated by the shaded regions in -the diagram above. - -One of the advantages of the decomposition perspective in these settings is that -the operators $\bm{P}$, $\bm{\mathcal{E}}$, $\bm{B}$ and -$\bm{D}$ clearly separate the MPI parallelism -in the operator ($\bm{P}$) from the unstructured mesh topology -($\bm{\mathcal{E}}$), the choice of the finite element space/basis ($\bm{B}$) -and the geometry and point-wise physics $\bm{D}$. These components also -naturally fall in different classes of numerical algorithms -- parallel (multi-device) -linear algebra for $\bm{P}$, sparse (on-device) linear algebra for -$\bm{\mathcal{E}}$, dense/structured linear algebra (tensor contractions) for -$\bm{B}$ and parallel point-wise evaluations for $\bm{D}$. - -Currently in libCEED, it is assumed that the host application manages the global -**T-vectors** and the required communications among devices (which are generally -on different compute nodes) with **P**. Our API is thus focused on the -**L-vector** level, where the logical devices, which in the library are -represented by the {ref}`Ceed` object, are independent. Each MPI rank can use one or -more {ref}`Ceed`s, and each {ref}`Ceed`, in turn, can represent one or more physical -devices, as long as libCEED backends support such configurations. The idea is -that every MPI rank can use any logical device it is assigned at runtime. For -example, on a node with 2 CPU sockets and 4 GPUs, one may decide to use 6 MPI -ranks (each using a single {ref}`Ceed` object): 2 ranks using 1 CPU socket each, and -4 using 1 GPU each. Another choice could be to run 1 MPI rank on the whole node -and use 5 {ref}`Ceed` objects: 1 managing all CPU cores on the 2 sockets and 4 -managing 1 GPU each. The communications among the devices, e.g. required for -applying the action of $\bm{P}$, are currently out of scope of libCEED. The -interface is non-blocking for all operations involving more than O(1) data, -allowing operations performed on a coprocessor or worker threads to overlap with -operations on the host. +After the application of each of the first three transition operators, $\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$, the operator evaluation is decoupled on their ranges, so $\bm{P}$, $\bm{\mathcal{E}}$ and $\bm{B}$ allow us to "zoom-in" to subdomain, element and quadrature point level, ignoring the coupling at higher levels. + +Thus, a natural mapping of $\bm{A}$ on a parallel computer is to split the **T-vector** over MPI ranks (a non-overlapping decomposition, as is typically used for sparse matrices), and then split the rest of the vector types over computational devices (CPUs, GPUs, etc.) as indicated by the shaded regions in the diagram above. + +One of the advantages of the decomposition perspective in these settings is that the operators $\bm{P}$, $\bm{\mathcal{E}}$, $\bm{B}$ and $\bm{D}$ clearly separate the MPI parallelism in the operator ($\bm{P}$) from the unstructured mesh topology ($\bm{\mathcal{E}}$), the choice of the finite element space/basis ($\bm{B}$) and the geometry and point-wise physics $\bm{D}$. +These components also naturally fall in different classes of numerical algorithms -- parallel (multi-device) linear algebra for $\bm{P}$, sparse (on-device) linear algebra for $\bm{\mathcal{E}}$, dense/structured linear algebra (tensor contractions) for $\bm{B}$ and parallel point-wise evaluations for $\bm{D}$. + +Currently in libCEED, it is assumed that the host application manages the global **T-vectors** and the required communications among devices (which are generally on different compute nodes) with **P**. +Our API is thus focused on the **L-vector** level, where the logical devices, which in the library are represented by the {ref}`Ceed` object, are independent. +Each MPI rank can use one or more {ref}`Ceed`s, and each {ref}`Ceed`, in turn, can represent one or more physical devices, as long as libCEED backends support such configurations. +The idea is that every MPI rank can use any logical device it is assigned at runtime. +For example, on a node with 2 CPU sockets and 4 GPUs, one may decide to use 6 MPI ranks (each using a single {ref}`Ceed` object): 2 ranks using 1 CPU socket each, and 4 using 1 GPU each. +Another choice could be to run 1 MPI rank on the whole node and use 5 {ref}`Ceed` objects: 1 managing all CPU cores on the 2 sockets and 4 managing 1 GPU each. +The communications among the devices, e.g. required for applying the action of $\bm{P}$, are currently out of scope of libCEED. +The interface is non-blocking for all operations involving more than O(1) data, allowing operations performed on a coprocessor or worker threads to overlap with operations on the host. ## API Description -The libCEED API takes an algebraic approach, where the user essentially -describes in the *frontend* the operators $\bm{\bm{\mathcal{E}}}$, $\bm{B}$, and $\bm{D}$ and the library -provides *backend* implementations and coordinates their action to the original -operator on **L-vector** level (i.e. independently on each device / MPI task). -This is visualized in the schematic below; "active" and "passive" inputs/outputs -will be discussed in more detail later. +The libCEED API takes an algebraic approach, where the user essentially describes in the *frontend* the operators $\bm{\bm{\mathcal{E}}}$, $\bm{B}$, and $\bm{D}$ and the library provides *backend* implementations and coordinates their action to the original operator on **L-vector** level (i.e. independently on each device / MPI task). +This is visualized in the schematic below; "active" and "passive" inputs/outputs will be discussed in more detail later. (fig-operator-schematic)= -:::{figure} ../../img/libceed_schematic.svg -Flow of data through vector types inside libCEED Operators, through backend implementations -of $\bm{\bm{\mathcal{E}}}$, $\bm{B}$, and $\bm{D}$ +:::{figure} ../../img/libceed_schematic.svg +Flow of data through vector types inside libCEED Operators, through backend implementations of $\bm{\bm{\mathcal{E}}}$, $\bm{B}$, and $\bm{D}$ ::: -One of the advantages of this purely algebraic description is that it already -includes all the finite element information, so the backends can operate on -linear algebra level without explicit finite element code. The frontend -description is general enough to support a wide variety of finite element -algorithms, as well as some other types algorithms such as spectral finite -differences. The separation of the front- and backends enables applications to -easily switch/try different backends. It also enables backend developers to -impact many applications from a single implementation. - -Our long-term vision is to include a variety of backend implementations in -libCEED, ranging from reference kernels to highly optimized kernels targeting -specific devices (e.g. GPUs) or specific polynomial orders. A simple reference -backend implementation is provided in the file +One of the advantages of this purely algebraic description is that it already includes all the finite element information, so the backends can operate on linear algebra level without explicit finite element code. +The frontend description is general enough to support a wide variety of finite element algorithms, as well as some other types algorithms such as spectral finite differences. +The separation of the front- and backends enables applications to easily switch/try different backends. +It also enables backend developers to impact many applications from a single implementation. + +Our long-term vision is to include a variety of backend implementations in libCEED, ranging from reference kernels to highly optimized kernels targeting specific devices (e.g. GPUs) or specific polynomial orders. +A simple reference backend implementation is provided in the file [ceed-ref.c](https://github.com/CEED/libCEED/blob/main/backends/ref/ceed-ref.c). -On the frontend, the mapping between the decomposition concepts and the code -implementation is as follows: +On the frontend, the mapping between the decomposition concepts and the code implementation is as follows: - **L-**, **E-** and **Q-vector** are represented as variables of type {ref}`CeedVector`. - (A backend may choose to operate incrementally without forming explicit **E-** or - **Q-vectors**.) + (A backend may choose to operate incrementally without forming explicit **E-** or **Q-vectors**.) - $\bm{\mathcal{E}}$ is represented as variable of type {ref}`CeedElemRestriction`. - $\bm{B}$ is represented as variable of type {ref}`CeedBasis`. - the action of $\bm{D}$ is represented as variable of type {ref}`CeedQFunction`. -- the overall operator $\bm{\mathcal{E}}^T \bm{B}^T \bm{D} \bm{B} \bm{\mathcal{E}}$ - is represented as variable of type - {ref}`CeedOperator` and its action is accessible through {c:func}`CeedOperatorApply()`. +- the overall operator $\bm{\mathcal{E}}^T \bm{B}^T \bm{D} \bm{B} \bm{\mathcal{E}}$ is represented as variable of type {ref}`CeedOperator` and its action is accessible through {c:func}`CeedOperatorApply()`. -To clarify these concepts and illustrate how they are combined in the API, -consider the implementation of the action of a simple 1D mass matrix -(cf. [tests/t500-operator.c](https://github.com/CEED/libCEED/blob/main/tests/t500-operator.c)). +To clarify these concepts and illustrate how they are combined in the API, consider the implementation of the action of a simple 1D mass matrix (cf. [tests/t500-operator.c](https://github.com/CEED/libCEED/blob/main/tests/t500-operator.c)). ```{literalinclude} ../../../tests/t500-operator.c :language: c :linenos: true ``` -In the following figure, we specialize the schematic used above for general operators so that -it corresponds to the specific setup and mass operators as implemented in the sample code. We show -that the active output of the setup operator, combining the quadrature weights with the Jacobian -information for the mesh transformation, becomes a passive input to the mass operator. Notations -denote the libCEED function used to set the properties of the input and output fields. +In the following figure, we specialize the schematic used above for general operators so that it corresponds to the specific setup and mass operators as implemented in the sample code. +We show that the active output of the setup operator, combining the quadrature weights with the Jacobian information for the mesh transformation, becomes a passive input to the mass operator. +Notations denote the libCEED function used to set the properties of the input and output fields. (fig-operator-schematic-mass)= :::{figure} ../../img/libceed_schematic_op_setup_mass.svg -Specific combination of $\bm{\bm{\mathcal{E}}}$, $\bm{B}$, $\bm{D}$, and input/output vectors -corresponding to the libCEED operators in the t500-operator test +Specific combination of $\bm{\bm{\mathcal{E}}}$, $\bm{B}$, $\bm{D}$, and input/output vectors corresponding to the libCEED operators in the t500-operator test ::: The constructor @@ -359,17 +240,12 @@ The constructor :start-at: CeedInit ``` -creates a logical device `ceed` on the specified *resource*, which could also be -a coprocessor such as `"/nvidia/0"`. There can be any number of such devices, -including multiple logical devices driving the same resource (though performance -may suffer in case of oversubscription). The resource is used to locate a -suitable backend which will have discretion over the implementations of all -objects created with this logical device. +creates a logical device `ceed` on the specified *resource*, which could also be a coprocessor such as `"/nvidia/0"`. +There can be any number of such devices, including multiple logical devices driving the same resource (though performance may suffer in case of oversubscription). +The resource is used to locate a suitable backend which will have discretion over the implementations of all objects created with this logical device. -The `setup` routine above computes and stores $\bm{D}$, in this case a -scalar value in each quadrature point, while `mass` uses these saved values to perform -the action of $\bm{D}$. These functions are turned into the {ref}`CeedQFunction` -variables `qf_setup` and `qf_mass` in the {c:func}`CeedQFunctionCreateInterior()` calls: +The `setup` routine above computes and stores $\bm{D}$, in this case a scalar value in each quadrature point, while `mass` uses these saved values to perform the action of $\bm{D}$. +These functions are turned into the {ref}`CeedQFunction` variables `qf_setup` and `qf_mass` in the {c:func}`CeedQFunctionCreateInterior()` calls: ```{literalinclude} ../../../tests/t500-operator.c :end-before: //! [QFunction Create] @@ -377,30 +253,21 @@ variables `qf_setup` and `qf_mass` in the {c:func}`CeedQFunctionCreateInterior() :start-after: //! [QFunction Create] ``` -A {ref}`CeedQFunction` performs independent operations at each quadrature point and -the interface is intended to facilitate vectorization. The second argument is -an expected vector length. If greater than 1, the caller must ensure that the -number of quadrature points `Q` is divisible by the vector length. This is -often satisfied automatically due to the element size or by batching elements -together to facilitate vectorization in other stages, and can always be ensured -by padding. - -In addition to the function pointers (`setup` and `mass`), {ref}`CeedQFunction` -constructors take a string representation specifying where the source for the -implementation is found. This is used by backends that support Just-In-Time -(JIT) compilation (i.e., CUDA and OCCA) to compile for coprocessors. +A {ref}`CeedQFunction` performs independent operations at each quadrature point and the interface is intended to facilitate vectorization. +The second argument is an expected vector length. +If greater than 1, the caller must ensure that the number of quadrature points `Q` is divisible by the vector length. +This is often satisfied automatically due to the element size or by batching elements together to facilitate vectorization in other stages, and can always be ensured by padding. + +In addition to the function pointers (`setup` and `mass`), {ref}`CeedQFunction` constructors take a string representation specifying where the source for the implementation is found. +This is used by backends that support Just-In-Time (JIT) compilation (i.e., CUDA and OCCA) to compile for coprocessors. For full support across all backends, these {ref}`CeedQFunction` source files must only contain constructs mutually supported by C99, C++11, and CUDA. For example, explicit type casting of void pointers and explicit use of compatible arguments for {code}`math` library functions is required, and variable-length array (VLA) syntax for array reshaping is only available via libCEED's {code}`CEED_Q_VLA` macro. -Different input and output fields are added individually, specifying the field -name, size of the field, and evaluation mode. +Different input and output fields are added individually, specifying the field name, size of the field, and evaluation mode. -The size of the field is provided by a combination of the number of components -the effect of any basis evaluations. +The size of the field is provided by a combination of the number of components the effect of any basis evaluations. -The evaluation mode (see {ref}`CeedBasis-Typedefs and Enumerations`) `CEED_EVAL_INTERP` -for both input and output fields indicates that the mass operator only contains terms of -the form +The evaluation mode (see {ref}`CeedBasis-Typedefs and Enumerations`) `CEED_EVAL_INTERP` for both input and output fields indicates that the mass operator only contains terms of the form $$ \int_\Omega v \cdot f_0 (u, \nabla u) @@ -415,18 +282,12 @@ $$ can be expressed. -For fields with derivatives, such as with the basis evaluation mode -(see {ref}`CeedBasis-Typedefs and Enumerations`) `CEED_EVAL_GRAD`, the size of the -field needs to reflect both the number of components and the geometric dimension. -A 3-dimensional gradient on four components would therefore mean the field has a size of -12\. +For fields with derivatives, such as with the basis evaluation mode (see {ref}`CeedBasis-Typedefs and Enumerations`) `CEED_EVAL_GRAD`, the size of the field needs to reflect both the number of components and the geometric dimension. +A 3-dimensional gradient on four components would therefore mean the field has a size of 12\. -The $\bm{B}$ operators for the mesh nodes, `basis_x`, and the unknown field, -`basis_u`, are defined in the calls to the function {c:func}`CeedBasisCreateTensorH1Lagrange()`. -In this example, both the mesh and the unknown field use $H^1$ Lagrange finite -elements of order 1 and 4 respectively (the `P` argument represents the number of 1D -degrees of freedom on each element). Both basis operators use the same integration rule, -which is Gauss-Legendre with 8 points (the `Q` argument). +The $\bm{B}$ operators for the mesh nodes, `basis_x`, and the unknown field, `basis_u`, are defined in the calls to the function {c:func}`CeedBasisCreateTensorH1Lagrange()`. +In this example, both the mesh and the unknown field use $H^1$ Lagrange finite elements of order 1 and 4 respectively (the `P` argument represents the number of 1D degrees of freedom on each element). +Both basis operators use the same integration rule, which is Gauss-Legendre with 8 points (the `Q` argument). ```{literalinclude} ../../../tests/t500-operator.c :end-before: //! [Basis Create] @@ -434,16 +295,11 @@ which is Gauss-Legendre with 8 points (the `Q` argument). :start-after: //! [Basis Create] ``` -Other elements with this structure can be specified in terms of the `Q×P` -matrices that evaluate values and gradients at quadrature points in one -dimension using {c:func}`CeedBasisCreateTensorH1()`. Elements that do not have tensor -product structure, such as symmetric elements on simplices, will be created -using different constructors. +Other elements with this structure can be specified in terms of the `Q×P` matrices that evaluate values and gradients at quadrature points in one dimension using {c:func}`CeedBasisCreateTensorH1()`. +Elements that do not have tensor product structure, such as symmetric elements on simplices, will be created using different constructors. -The $\bm{\mathcal{E}}$ operators for the mesh nodes, `elem_restr_x`, and the unknown field, -`elem_restr_u`, are specified in the {c:func}`CeedElemRestrictionCreate()`. Both of these -specify directly the dof indices for each element in the `ind_x` and `ind_u` -arrays: +The $\bm{\mathcal{E}}$ operators for the mesh nodes, `elem_restr_x`, and the unknown field, `elem_restr_u`, are specified in the {c:func}`CeedElemRestrictionCreate()`. +Both of these specify directly the dof indices for each element in the `ind_x` and `ind_u` arrays: ```{literalinclude} ../../../tests/t500-operator.c :end-before: //! [ElemRestr Create] @@ -457,32 +313,23 @@ arrays: :start-after: //! [ElemRestrU Create] ``` -If the user has arrays available on a device, they can be provided using -`CEED_MEM_DEVICE`. This technique is used to provide no-copy interfaces in all -contexts that involve problem-sized data. +If the user has arrays available on a device, they can be provided using `CEED_MEM_DEVICE`. +This technique is used to provide no-copy interfaces in all contexts that involve problem-sized data. -For discontinuous Galerkin and for applications such as Nek5000 that only -explicitly store **E-vectors** (inter-element continuity has been subsumed by -the parallel restriction $\bm{P}$), the element restriction $\bm{\mathcal{E}}$ -is the identity and {c:func}`CeedElemRestrictionCreateStrided()` is used instead. -We plan to support other structured representations of $\bm{\mathcal{E}}$ which will -be added according to demand. +For discontinuous Galerkin and for applications such as Nek5000 that only explicitly store **E-vectors** (inter-element continuity has been subsumed by the parallel restriction $\bm{P}$), the element restriction $\bm{\mathcal{E}}$ is the identity and {c:func}`CeedElemRestrictionCreateStrided()` is used instead. +We plan to support other structured representations of $\bm{\mathcal{E}}$ which will be added according to demand. There are two common approaches for supporting non-conforming elements: applying the node constraints via $\bm P$ so that the **L-vector** can be processed uniformly and applying the constraints via $\bm{\mathcal{E}}$ so that the **E-vector** is uniform. The former can be done with the existing interface while the latter will require a generalization to element restriction that would define field values at constrained nodes as linear combinations of the values at primary nodes. -These operations, $\bm{\mathcal{E}}$, $\bm{B}$, and $\bm{D}$, -are combined with a {ref}`CeedOperator`. As with {ref}`CeedQFunction`s, operator fields are added -separately with a matching field name, basis ($\bm{B}$), element restriction -($\bm{\mathcal{E}}$), and **L-vector**. The flag -`CEED_VECTOR_ACTIVE` indicates that the vector corresponding to that field will -be provided to the operator when {c:func}`CeedOperatorApply()` is called. Otherwise the -input/output will be read from/written to the specified **L-vector**. +These operations, $\bm{\mathcal{E}}$, $\bm{B}$, and $\bm{D}$, are combined with a {ref}`CeedOperator`. +As with {ref}`CeedQFunction`s, operator fields are added +separately with a matching field name, basis ($\bm{B}$), element restriction ($\bm{\mathcal{E}}$), and **L-vector**. +The flag `CEED_VECTOR_ACTIVE` indicates that the vector corresponding to that field will be provided to the operator when {c:func}`CeedOperatorApply()` is called. +Otherwise the input/output will be read from/written to the specified **L-vector**. -With partial assembly, we first perform a setup stage where $\bm{D}$ is evaluated -and stored. This is accomplished by the operator `op_setup` and its application -to `X`, the nodes of the mesh (these are needed to compute Jacobians at -quadrature points). Note that the corresponding {c:func}`CeedOperatorApply()` has no basis -evaluation on the output, as the quadrature data is not needed at the dofs: +With partial assembly, we first perform a setup stage where $\bm{D}$ is evaluated and stored. +This is accomplished by the operator `op_setup` and its application to `X`, the nodes of the mesh (these are needed to compute Jacobians at quadrature points). +Note that the corresponding {c:func}`CeedOperatorApply()` has no basis evaluation on the output, as the quadrature data is not needed at the dofs: ```{literalinclude} ../../../tests/t500-operator.c :end-before: //! [Setup Create] @@ -502,8 +349,7 @@ evaluation on the output, as the quadrature data is not needed at the dofs: :start-after: //! [Setup Apply] ``` -The action of the operator is then represented by operator `op_mass` and its -{c:func}`CeedOperatorApply()` to the input **L-vector** `U` with output in `V`: +The action of the operator is then represented by operator `op_mass` and its {c:func}`CeedOperatorApply()` to the input **L-vector** `U` with output in `V`: ```{literalinclude} ../../../tests/t500-operator.c :end-before: //! [Operator Create] @@ -523,24 +369,16 @@ The action of the operator is then represented by operator `op_mass` and its :start-after: //! [Operator Apply] ``` -A number of function calls in the interface, such as {c:func}`CeedOperatorApply()`, are -intended to support asynchronous execution via their last argument, -`CeedRequest*`. The specific (pointer) value used in the above example, -`CEED_REQUEST_IMMEDIATE`, is used to express the request (from the user) for the -operation to complete before returning from the function call, i.e. to make sure -that the result of the operation is available in the output parameters -immediately after the call. For a true asynchronous call, one needs to provide -the address of a user defined variable. Such a variable can be used later to -explicitly wait for the completion of the operation. +A number of function calls in the interface, such as {c:func}`CeedOperatorApply()`, are intended to support asynchronous execution via their last argument, `CeedRequest*`. +The specific (pointer) value used in the above example, `CEED_REQUEST_IMMEDIATE`, is used to express the request (from the user) for the operation to complete before returning from the function call, i.e. to make sure that the result of the operation is available in the output parameters immediately after the call. +For a true asynchronous call, one needs to provide the address of a user defined variable. +Such a variable can be used later to explicitly wait for the completion of the operation. ## Gallery of QFunctions LibCEED provides a gallery of built-in {ref}`CeedQFunction`s in the {file}`gallery/` directory. -The available QFunctions are the ones associated with the mass, the Laplacian, and -the identity operators. To illustrate how the user can declare a {ref}`CeedQFunction` -via the gallery of available QFunctions, consider the selection of the -{ref}`CeedQFunction` associated with a simple 1D mass matrix -(cf. [tests/t410-qfunction.c](https://github.com/CEED/libCEED/blob/main/tests/t410-qfunction.c)). +The available QFunctions are the ones associated with the mass, the Laplacian, and the identity operators. +To illustrate how the user can declare a {ref}`CeedQFunction` via the gallery of available QFunctions, consider the selection of the {ref}`CeedQFunction` associated with a simple 1D mass matrix (cf. [tests/t410-qfunction.c](https://github.com/CEED/libCEED/blob/main/tests/t410-qfunction.c)). ```{literalinclude} ../../../tests/t410-qfunction.c :language: c @@ -549,9 +387,8 @@ via the gallery of available QFunctions, consider the selection of the ## Interface Principles and Evolution -LibCEED is intended to be extensible via backends that are packaged with the -library and packaged separately (possibly as a binary containing proprietary -code). Backends are registered by calling +LibCEED is intended to be extensible via backends that are packaged with the library and packaged separately (possibly as a binary containing proprietary code). +Backends are registered by calling ```{literalinclude} ../../../backends/ref/ceed-ref.c :end-before: //! [Register] @@ -562,11 +399,8 @@ code). Backends are registered by calling typically in a library initializer or "constructor" that runs automatically. `CeedInit` uses this prefix to find an appropriate backend for the resource. -Source (API) and binary (ABI) stability are important to libCEED. Prior to -reaching version 1.0, libCEED does not implement strict [semantic versioning](https://semver.org) across the entire interface. However, user code, -including libraries of {ref}`CeedQFunction`s, should be source and binary -compatible moving from 0.x.y to any later release 0.x.z. We have less experience -with external packaging of backends and do not presently guarantee source or -binary stability, but we intend to define stability guarantees for libCEED 1.0. -We'd love to talk with you if you're interested in packaging backends -externally, and will work with you on a practical stability policy. +Source (API) and binary (ABI) stability are important to libCEED. +Prior to reaching version 1.0, libCEED does not implement strict [semantic versioning](https://semver.org) across the entire interface. +However, user code, including libraries of {ref}`CeedQFunction`s, should be source and binary compatible moving from 0.x.y to any later release 0.x.z. +We have less experience with external packaging of backends and do not presently guarantee source or binary stability, but we intend to define stability guarantees for libCEED 1.0. +We'd love to talk with you if you're interested in packaging backends externally, and will work with you on a practical stability policy. diff --git a/doc/sphinx/source/libCEEDdev.md b/doc/sphinx/source/libCEEDdev.md index cf157a817c..262d51b6dc 100644 --- a/doc/sphinx/source/libCEEDdev.md +++ b/doc/sphinx/source/libCEEDdev.md @@ -22,13 +22,14 @@ Please check your code for common issues by running `make tidy` -which uses the `clang-tidy` utility included in recent releases of Clang. This -tool is much slower than actual compilation (`make -j8` parallelism helps). To -run on a single file, use +which uses the `clang-tidy` utility included in recent releases of Clang. +This tool is much slower than actual compilation (`make -j8` parallelism helps). +To run on a single file, use `make interface/ceed.c.tidy` -for example. All issues reported by `make tidy` should be fixed. +for example. +All issues reported by `make tidy` should be fixed. ## Include-What-You-Use @@ -52,11 +53,10 @@ The `ceed-f64.h` and `ceed-f32.h` headers should only be included in `ceed.h`. ## Shape -Backends often manipulate tensors of dimension greater than 2. It is -awkward to pass fully-specified multi-dimensional arrays using C99 and -certain operations will flatten/reshape the tensors for computational -convenience. We frequently use comments to document shapes using a -lexicographic ordering. For example, the comment +Backends often manipulate tensors of dimension greater than 2. +It is awkward to pass fully-specified multi-dimensional arrays using C99 and certain operations will flatten/reshape the tensors for computational convenience. +We frequently use comments to document shapes using a lexicographic ordering. +For example, the comment ```c // u has shape [dim, num_comp, Q, num_elem] @@ -72,8 +72,8 @@ for (d=0; d -e \ -n -b @@ -55,8 +53,8 @@ options: -clean clean the examples directory -m|-make Make the examples ``` -The only mandatory argument is `-b` or `-box` which sets the box geometry to be -used. This geometry should be found in `./boxes` directory. +The only mandatory argument is `-b` or `-box` which sets the box geometry to be used. +This geometry should be found in `./boxes` directory. For example, you can run bp1 as follows: ```sh diff --git a/examples/notation.md b/examples/notation.md index 33891da2ee..ceed477cc2 100644 --- a/examples/notation.md +++ b/examples/notation.md @@ -2,20 +2,9 @@ # Common notation -For most of our examples, the spatial discretization -uses high-order finite elements/spectral elements, namely, the high-order Lagrange -polynomials defined over $P$ non-uniformly spaced nodes, the -Gauss-Legendre-Lobatto (GLL) points, and quadrature points $\{q_i\}_{i=1}^Q$, with -corresponding weights $\{w_i\}_{i=1}^Q$ (typically the ones given by Gauss -or Gauss-Lobatto quadratures, that are built in the library). +For most of our examples, the spatial discretization uses high-order finite elements/spectral elements, namely, the high-order Lagrange polynomials defined over $P$ non-uniformly spaced nodes, the Gauss-Legendre-Lobatto (GLL) points, and quadrature points $\{q_i\}_{i=1}^Q$, with corresponding weights $\{w_i\}_{i=1}^Q$ (typically the ones given by Gauss or Gauss-Lobatto quadratures, that are built in the library). -We discretize the domain, $\Omega \subset \mathbb{R}^d$ (with $d=1,2,3$, -typically) by letting $\Omega = \bigcup_{e=1}^{N_e}\Omega_e$, with $N_e$ -disjoint elements. For most examples we use unstructured meshes for which the elements -are hexahedra (although this is not a requirement in libCEED). +We discretize the domain, $\Omega \subset \mathbb{R}^d$ (with $d=1,2,3$, typically) by letting $\Omega = \bigcup_{e=1}^{N_e}\Omega_e$, with $N_e$ disjoint elements. +For most examples we use unstructured meshes for which the elements are hexahedra (although this is not a requirement in libCEED). -The physical coordinates are denoted by -$\bm{x}=(x,y,z) \equiv (x_0,x_1,x_2) \in\Omega_e$, -while the reference coordinates are represented as -$\bm{X}=(X,Y,Z) \equiv (X_0,X_1,X_2) \in \textrm{I}=[-1,1]^3$ -(for $d=3$). +The physical coordinates are denoted by $\bm{x}=(x,y,z) \equiv (x_0,x_1,x_2) \in\Omega_e$, while the reference coordinates are represented as $\bm{X}=(X,Y,Z) \equiv (X_0,X_1,X_2) \in \textrm{I}=[-1,1]^3$ (for $d=3$). diff --git a/examples/solids/README.md b/examples/solids/README.md index c204333939..4f0d14ab86 100644 --- a/examples/solids/README.md +++ b/examples/solids/README.md @@ -1,7 +1,6 @@ # libCEED: Solid Mechanics Example -This page provides a description of the solid mechanics example for the -libCEED library, based on PETSc. +This page provides a description of the solid mechanics example for the libCEED library, based on PETSc. PETSc v3.17 or a development version of PETSc at commit 0e95d842 or later is required. This code solves the steady-state static momentum balance equations using unstructured high-order finite/spectral element spatial discretizations. @@ -41,13 +40,11 @@ The elasticity mini-app is controlled via command-line options, the following of * - `-nu [real]` - [Poisson's ratio](https://en.wikipedia.org/wiki/Poisson%27s_ratio), $\nu < 0.5$ * - `-bc_clamp [int list]` - - List of face sets on which to displace by `-bc_clamp_[facenumber]_translate [x,y,z]` - and/or `bc_clamp_[facenumber]_rotate [rx,ry,rz,c_0,c_1]`. Note: The default - for a clamped face is zero displacement. All displacement is with respect to - the initial configuration. + - List of face sets on which to displace by `-bc_clamp_[facenumber]_translate [x,y,z]` and/or `bc_clamp_[facenumber]_rotate [rx,ry,rz,c_0,c_1]`. + Note: The default for a clamped face is zero displacement. + All displacement is with respect to the initial configuration. * - `-bc_traction [int list]` - - List of face sets on which to set traction boundary conditions with the - traction vector `-bc_traction_[facenumber] [tx,ty,tz]` + - List of face sets on which to set traction boundary conditions with the traction vector `-bc_traction_[facenumber] [tx,ty,tz]` ::: :::{note} @@ -71,7 +68,8 @@ In this example, we set the left boundary, face set $999$, to zero displacement As an alternative to specifying a mesh with {code}`-mesh`, the user may use a DMPlex box mesh by specifying {code}`-dm_plex_box_faces [int list]`, {code}`-dm_plex_box_upper [real list]`, and {code}`-dm_plex_box_lower [real list]`. -As an alternative example exploiting {code}`-dm_plex_box_faces`, we consider a {code}`4 x 4 x 4` mesh where essential (Drichlet) boundary condition is placed on all sides. Sides 1 through 6 are rotated around $x$-axis: +As an alternative example exploiting {code}`-dm_plex_box_faces`, we consider a {code}`4 x 4 x 4` mesh where essential (Drichlet) boundary condition is placed on all sides. +Sides 1 through 6 are rotated around $x$-axis: ``` ./elasticity -problem FSInitial-NH1 -E 1 -nu 0.3 -num_steps 40 -snes_linesearch_type cp -dm_plex_box_faces 4,4,4 -bc_clamp 1,2,3,4,5,6 -bc_clamp_1_rotate 0,0,1,0,.3 -bc_clamp_2_rotate 0,0,1,0,.3 -bc_clamp_3_rotate 0,0,1,0,.3 -bc_clamp_4_rotate 0,0,1,0,.3 -bc_clamp_5_rotate 0,0,1,0,.3 -bc_clamp_6_rotate 0,0,1,0,.3 diff --git a/julia/LibCEED.jl/README.md b/julia/LibCEED.jl/README.md index fe36464f31..1ad322a7ca 100644 --- a/julia/LibCEED.jl/README.md +++ b/julia/LibCEED.jl/README.md @@ -1,35 +1,24 @@ # LibCEED.jl: Julia Interface for [libCEED](https://github.com/CEED/libCEED) -Please see the [LibCEED.jl -documentation](http://ceed.exascaleproject.org/libCEED-julia-docs/dev/) for -usage and API documentation. +Please see the [LibCEED.jl documentation](http://ceed.exascaleproject.org/libCEED-julia-docs/dev/) for usage and API documentation. ## Installation -The LibCEED.jl package can be installed with Julia's package manager by running -`] add LibCEED`. This will automatically install a pre-built binary of the -libCEED library. If you require features of a specific build of libCEED (e.g. -CUDA/GPU support, specific compiler flags, etc.) then you should compile your -own version of the libCEED library, and configure LibCEED.jl to use this binary -as described in the [Configuring LibCEED.jl](#configuring-libceedjl) section. +The LibCEED.jl package can be installed with Julia's package manager by running `] add LibCEED`. +This will automatically install a pre-built binary of the libCEED library. +If you require features of a specific build of libCEED (e.g. CUDA/GPU support, specific compiler flags, etc.) then you should compile your own version of the libCEED library, and configure LibCEED.jl to use this binary as described in the [Configuring LibCEED.jl](#configuring-libceedjl) section. **Warning:** the pre-built libCEED binaries do not support CUDA backends -The pre-built binaries automatically installed by LibCEED.jl (through the -[libCEED_jll](https://juliahub.com/ui/Packages/libCEED_jll/LB2fn) package) are -not built with CUDA support. If you want to run libCEED on the GPU, you will -have to build libCEED from source and configure LibCEED.jl as described in the -[Configuring LibCEED.jl](#configuring-libceedjl) section. +The pre-built binaries automatically installed by LibCEED.jl (through the [libCEED_jll](https://juliahub.com/ui/Packages/libCEED_jll/LB2fn) package) are not built with CUDA support. +If you want to run libCEED on the GPU, you will have to build libCEED from source and configure LibCEED.jl as described in the [Configuring LibCEED.jl](#configuring-libceedjl) section. ### Configuring LibCEED.jl -By default, LibCEED.jl will use the pre-built libCEED binaries provided by the -[libCEED_jll](https://juliahub.com/ui/Packages/libCEED_jll/LB2fn) package. If -you wish to use a different libCEED binary (e.g. one built from source), -LibCEED.jl can be configured using Julia's _preferences_ mechanism. Note that -this preference will be set for the currently active Julia environment, and can -be different between different environments. The Julia session must be restarted -for changes to take effect. +By default, LibCEED.jl will use the pre-built libCEED binaries provided by the [libCEED_jll](https://juliahub.com/ui/Packages/libCEED_jll/LB2fn) package. +If you wish to use a different libCEED binary (e.g. one built from source), LibCEED.jl can be configured using Julia's _preferences_ mechanism. +Note that this preference will be set for the currently active Julia environment, and can be different between different environments. +The Julia session must be restarted for changes to take effect. ```julia julia> using LibCEED @@ -38,5 +27,4 @@ julia> set_libceed_path!("/path/to/libceed.so") [ Info: Restart the Julia session for changes to take effect. ``` -See [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl) for more -information. +See [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl) for more information. diff --git a/rust/libceed-sys/README.md b/rust/libceed-sys/README.md index 5a01c78c8a..3c4cef6f29 100644 --- a/rust/libceed-sys/README.md +++ b/rust/libceed-sys/README.md @@ -1,19 +1,15 @@ # libceed-sys: unsafe bindings to libCEED -This is the documentation for the low level (unsafe) Rust bindings to the libCEED C -interface. See the [libCEED user manual](https://libceed.org) for usage -information. Note that most Rust users will prefer the higher level (safe) Rust -interface in the [`libceed` crate](https://docs.rs/libceed). +This is the documentation for the low level (unsafe) Rust bindings to the libCEED C interface. +See the [libCEED user manual](https://libceed.org) for usage information. +Note that most Rust users will prefer the higher level (safe) Rust interface in the [`libceed` crate](https://docs.rs/libceed). -libCEED is a low-level API for for the efficient high-order discretization methods -developed by the ECP co-design Center for Efficient Exascale Discretizations (CEED). -While our focus is on high-order finite elements, the approach is mostly algebraic -and thus applicable to other discretizations in factored form. +libCEED is a low-level API for for the efficient high-order discretization methods developed by the ECP co-design Center for Efficient Exascale Discretizations (CEED). +While our focus is on high-order finite elements, the approach is mostly algebraic and thus applicable to other discretizations in factored form. ## Usage -To use low level libCEED bindings in a Rust package, the following `Cargo.toml` -can be used. +To use low level libCEED bindings in a Rust package, the following `Cargo.toml` can be used. ```toml [dependencies] libceed-sys = "0.10.0" @@ -31,17 +27,13 @@ Supported features: ## Development -To develop libCEED, use `cargo build` in the `rust/libceed-sys` directory to -install a local copy and build the bindings. +To develop libCEED, use `cargo build` in the `rust/libceed-sys` directory to install a local copy and build the bindings. -If you need custom flags for the C project, we recommend using `make -C c-src -configure` to cache arguments in `c-src/config.mk`. If that file exists during -`cargo build` then edits will prompt recompilation of the bindings. +If you need custom flags for the C project, we recommend using `make -C c-src configure` to cache arguments in `c-src/config.mk`. +If that file exists during `cargo build` then edits will prompt recompilation of the bindings. ### Shared libraries -If one is developing libCEED C source and testing multiple language bindings at -once, a few seconds can be cut out of the edit/compile/test loop by disabling -the `static` feature and using +If one is developing libCEED C source and testing multiple language bindings at once, a few seconds can be cut out of the edit/compile/test loop by disabling the `static` feature and using ```bash export LD_LIBRARY_PATH=$CEED_DIR/lib @@ -49,27 +41,21 @@ export PKG_CONFIG_PATH=$CEED_DIR/lib/pkgconfig ``` #### Without system -If you disable the `static` feature and are not using a system version from a -standard path/somewhere that can be found by pkg-config, then you'll need to set -`LD_LIBRARY_PATH` to the appropriate target directory for doctests to be able to -find it. This might look like +If you disable the `static` feature and are not using a system version from a standard path/somewhere that can be found by pkg-config, then you'll need to set `LD_LIBRARY_PATH` to the appropriate target directory for doctests to be able to find it. +This might look like ```bash export LD_LIBRARY_PATH=$CEED_DIR/target/debug/build/libceed-sys-d1ea22c6e1ad3f23/out/lib ``` -where the precise hash value is printed during `cargo build --verbose` or you -can find it with `find target -name libceed.so`. This mode of development is -more fragile than the default (which uses static libraries). +where the precise hash value is printed during `cargo build --verbose` or you can find it with `find target -name libceed.so`. +This mode of development is more fragile than the default (which uses static libraries). -Note that the `LD_LIBRARY_PATH` workarounds will become unnecessary if [this -issue](https://github.com/rust-lang/cargo/issues/1592) is resolved -- it's -currently closed, but the problem still exists. +Note that the `LD_LIBRARY_PATH` workarounds will become unnecessary if [this issue](https://github.com/rust-lang/cargo/issues/1592) is resolved -- it's currently closed, but the problem still exists. ## License: BSD-2-Clause ## Contributing -The `libceed-sys` crate is developed within the [libCEED -repository](https://github.com/CEED/libCEED). See the [contributing -guidelines](https://libceed.org/en/latest/CONTRIBUTING/) for details. +The `libceed-sys` crate is developed within the [libCEED repository](https://github.com/CEED/libCEED). +See the [contributing guidelines](https://libceed.org/en/latest/CONTRIBUTING/) for details. diff --git a/tests/README.md b/tests/README.md index 8d3cb1928c..c000c4f60d 100644 --- a/tests/README.md +++ b/tests/README.md @@ -2,8 +2,7 @@ This page provides a brief description of the tests for the libCEED library. -The tests are organized by API object, and some tests are further organized, -as required. +The tests are organized by API object, and some tests are further organized, as required. 0. Ceed Tests 1. CeedVector Tests From 6704dbc6f6ba4bc959fd1fee4c354a953a708e05 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 23 Jun 2022 11:18:20 -0600 Subject: [PATCH 099/172] doc - fix missing link --- doc/sphinx/source/api/CeedBasis.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx/source/api/CeedBasis.rst b/doc/sphinx/source/api/CeedBasis.rst index 6e4744d190..b1affdf2ea 100644 --- a/doc/sphinx/source/api/CeedBasis.rst +++ b/doc/sphinx/source/api/CeedBasis.rst @@ -14,6 +14,8 @@ Discrete element bases and quadrature :content-only: :members: +.. _CeedBasis-typedefs and enumerations + Typedefs and Enumerations -------------------------------------- From 690ffe02454c2b9d595e7a9fbbf18b7ea1278444 Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 23 Jun 2022 16:20:29 -0600 Subject: [PATCH 100/172] fluids: Add missing license boilerplate --- examples/fluids/qfunctions/newtonian_types.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 9366597d97..6ea385e8d9 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -1,3 +1,10 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + #ifndef newtonian_types_h #define newtonian_types_h From 13fa47b256d7b8fa7dc04000fe86398448c8602c Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 23 Jun 2022 16:19:56 -0600 Subject: [PATCH 101/172] fluids: Combine misc QF helpers into utils.h --- examples/fluids/problems/stg_shur14.c | 4 -- examples/fluids/qfunctions/advection2d.h | 7 +--- examples/fluids/qfunctions/blasius.h | 7 +--- examples/fluids/qfunctions/channel.h | 20 ++++------ examples/fluids/qfunctions/densitycurrent.h | 5 +-- examples/fluids/qfunctions/eulervortex.h | 5 +-- examples/fluids/qfunctions/newtonian.h | 5 +-- examples/fluids/qfunctions/newtonian_state.h | 11 +----- examples/fluids/qfunctions/newtonian_types.h | 6 --- examples/fluids/qfunctions/shocktube.h | 5 +-- examples/fluids/qfunctions/stg_shur14.h | 12 ++---- examples/fluids/qfunctions/utils.h | 41 ++++++++++++++++++++ 12 files changed, 59 insertions(+), 69 deletions(-) create mode 100644 examples/fluids/qfunctions/utils.h diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index f63f08a8cd..4f2f5ce8a8 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -16,10 +16,6 @@ #include "stg_shur14.h" #include "../qfunctions/stg_shur14.h" -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - STGShur14Context global_stg_ctx; /* diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h index 9e0f9d7259..6aef50d1fc 100644 --- a/examples/fluids/qfunctions/advection2d.h +++ b/examples/fluids/qfunctions/advection2d.h @@ -13,10 +13,7 @@ #include #include - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "utils.h" typedef struct SetupContext_ *SetupContext; struct SetupContext_ { @@ -37,8 +34,6 @@ struct AdvectionContext_ { int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG }; -CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; } - // ***************************************************************************** // This QFunction sets the initial conditions and the boundary conditions // for two test cases: ROTATION and TRANSLATION diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index b740f39108..b4b99bc463 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -15,6 +15,7 @@ #include #include #include "newtonian_types.h" +#include "utils.h" typedef struct BlasiusContext_ *BlasiusContext; struct BlasiusContext_ { @@ -28,10 +29,6 @@ struct BlasiusContext_ { struct NewtonianIdealGasContext_ newtonian_ctx; }; -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - void CEED_QFUNCTION_HELPER(BlasiusSolution)(const CeedScalar y, const CeedScalar Uinf, const CeedScalar x0, const CeedScalar x, const CeedScalar rho, CeedScalar *u, CeedScalar *v, CeedScalar *t12, @@ -192,7 +189,7 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, const CeedScalar x_inflow = context->x_inflow; const bool weakT = context->weakT; const CeedScalar rho_0 = P0 / (Rd * theta0); - const CeedScalar x0 = Uinf*rho_0 / (mu*25/ (delta0*delta0) ); + const CeedScalar x0 = Uinf*rho_0 / (mu*25/ Square(delta0) ); CeedPragmaSIMD // Quadrature Point Loop diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index a44e5fd00e..39b1c6872f 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -15,6 +15,7 @@ #include #include #include "newtonian_types.h" +#include "utils.h" typedef struct ChannelContext_ *ChannelContext; struct ChannelContext_ { @@ -120,9 +121,8 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; CeedScalar q_exact[5] = {0.}; Exact_Channel(3, 0., x, 5, q_exact, ctx); - const CeedScalar E_kinetic_exact = 0.5*(q_exact[1]*q_exact[1] + - q_exact[2]*q_exact[2] + - q_exact[3]*q_exact[3]) / q_exact[0]; + const CeedScalar E_kinetic_exact = 0.5*Dot3(&q_exact[1], &q_exact[1]) + / q_exact[0]; const CeedScalar velocity[3] = {q_exact[1]/q_exact[0], q_exact[2]/q_exact[0], q_exact[3]/q_exact[0] @@ -132,16 +132,13 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // Find pressure using state inside the domain const CeedScalar rho = q[0][i]; const CeedScalar u[3] = {q[1][i]/rho, q[2][i]/rho, q[3][i]/rho}; - const CeedScalar E_internal = q[4][i] - .5 * rho * (u[0]*u[0] + u[1]*u[1] + - u[2]*u[2]); + const CeedScalar E_internal = q[4][i] - .5 * rho * Dot3(u,u); const CeedScalar P = E_internal * (gamma - 1.); // Find inflow state using calculated P and prescribed velocity, theta0 const CeedScalar e_internal = cv * theta; const CeedScalar rho_in = P / ((gamma - 1) * e_internal); - const CeedScalar E_kinetic = .5 * rho_in * (velocity[0]*velocity[0] + - velocity[1]*velocity[1] + - velocity[2]*velocity[2]); + const CeedScalar E_kinetic = .5 * rho_in * Dot3(velocity, velocity); const CeedScalar E = rho_in * e_internal + E_kinetic; // ---- Normal vect const CeedScalar norm[3] = {q_data_sur[1][i], @@ -153,9 +150,7 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // Zero v so all future terms can safely sum into it for (CeedInt j=0; j<5; j++) v[j][i] = 0.; - const CeedScalar u_normal = norm[0]*velocity[0] + - norm[1]*velocity[1] + - norm[2]*velocity[2]; + const CeedScalar u_normal = Dot3(norm, velocity); // The Physics // -- Density @@ -220,8 +215,7 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q, // Implementing outflow condition const CeedScalar P = P0; // pressure - const CeedScalar u_normal = norm[0]*u[0] + norm[1]*u[1] + - norm[2]*u[2]; // Normal velocity + const CeedScalar u_normal = Dot3(norm, u); // Normal velocity // The Physics // -- Density v[0][i] -= wdetJb * rho * u_normal; diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index 2d503d8165..4405bb4975 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -18,10 +18,7 @@ #include #include #include "newtonian_types.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "utils.h" // ***************************************************************************** // This function sets the initial conditions and the boundary conditions diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h index dc82e29915..be4937378b 100644 --- a/examples/fluids/qfunctions/eulervortex.h +++ b/examples/fluids/qfunctions/eulervortex.h @@ -18,10 +18,7 @@ #include #include - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "utils.h" typedef struct EulerContext_ *EulerContext; struct EulerContext_ { diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 19cb7d116b..fa6c3d769f 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -16,10 +16,7 @@ #include #include "newtonian_types.h" #include "newtonian_state.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "utils.h" // ***************************************************************************** // Helper function for computing flux Jacobian diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h index 4113166ed3..47ec180d3e 100644 --- a/examples/fluids/qfunctions/newtonian_state.h +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -15,6 +15,7 @@ #include #include #include "newtonian_types.h" +#include "utils.h" typedef struct { CeedScalar pressure; @@ -127,16 +128,6 @@ CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3], strain_rate[5] = weight * (grad_s[1].Y.velocity[0] + grad_s[0].Y.velocity[1]); } -CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) { - const CeedScalar weight = 1 / sqrt(2.); - A[0][0] = v[0]; - A[1][1] = v[1]; - A[2][2] = v[2]; - A[2][1] = A[1][2] = weight * v[3]; - A[2][0] = A[0][2] = weight * v[4]; - A[1][0] = A[0][1] = weight * v[5]; -} - CEED_QFUNCTION_HELPER void NewtonianStress(NewtonianIdealGasContext gas, const CeedScalar strain_rate[6], CeedScalar stress[6]) { CeedScalar div_u = strain_rate[0] + strain_rate[1] + strain_rate[2]; diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 6ea385e8d9..3ea539caa4 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -53,10 +53,4 @@ struct NewtonianIdealGasContext_ { StabilizationType stabilization; }; -CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; } -CEED_QFUNCTION_HELPER CeedScalar Dot3(const CeedScalar u[3], - const CeedScalar v[3]) { - return u[0]*v[0] + u[1]*v[1] + u[2]*v[2]; -} - #endif // newtonian_types_h diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h index 3da66cbba1..e8137f3d94 100644 --- a/examples/fluids/qfunctions/shocktube.h +++ b/examples/fluids/qfunctions/shocktube.h @@ -27,10 +27,7 @@ #include #include - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "utils.h" typedef struct SetupContext_ *SetupContext; struct SetupContext_ { diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 80bcb803ff..81ec377d5f 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -20,16 +20,10 @@ #include #include #include "stg_shur14_type.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "utils.h" #define STG_NMODES_MAX 1024 -CEED_QFUNCTION_HELPER CeedScalar Max(CeedScalar a, CeedScalar b) { return a < b ? b : a; } -CEED_QFUNCTION_HELPER CeedScalar Min(CeedScalar a, CeedScalar b) { return a < b ? a : b; } - /* * @brief Interpolate quantities from input profile to given location * @@ -115,7 +109,7 @@ void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw, const CeedScalar hmax = Max( Max(h[0], h[1]), h[2]); const CeedScalar ke = dw==0 ? 1e16 : 2*M_PI/Min(2*dw, 3*lt); - const CeedScalar keta = 2*M_PI*pow(pow(nu,3.0)/eps, -0.25); + const CeedScalar keta = 2*M_PI*pow(Cube(nu)/eps, -0.25); const CeedScalar kcut = M_PI/ Min( Max(Max(h[1], h[2]), 0.3*hmax) + 0.1*dw, hmax ); CeedScalar fcut, feta, Ektot=0.0; @@ -253,7 +247,7 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, CeedScalar h[3]; for (CeedInt j=0; j<3; j++) - h[j] = 2/sqrt(dXdx[0][j]*dXdx[0][j] + dXdx[1][j]*dXdx[1][j]); + h[j] = 2/sqrt(Square(dXdx[0][j]) + Square(dXdx[1][j])); h[0] = dx; InterpolateProfile(X[1][i], ubar, cij, &eps, <, stg_ctx); diff --git a/examples/fluids/qfunctions/utils.h b/examples/fluids/qfunctions/utils.h new file mode 100644 index 0000000000..25dd2005ed --- /dev/null +++ b/examples/fluids/qfunctions/utils.h @@ -0,0 +1,41 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#ifndef utils_h +#define utils_h + +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +CEED_QFUNCTION_HELPER CeedScalar Max(CeedScalar a, CeedScalar b) { return a < b ? b : a; } +CEED_QFUNCTION_HELPER CeedScalar Min(CeedScalar a, CeedScalar b) { return a < b ? a : b; } + +CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; } +CEED_QFUNCTION_HELPER CeedScalar Cube(CeedScalar x) { return x*x*x; } + +// @brief Dot product of 3 element vectors +CEED_QFUNCTION_HELPER CeedScalar Dot3(const CeedScalar u[3], + const CeedScalar v[3]) { + return u[0]*v[0] + u[1]*v[1] + u[2]*v[2]; +} + +// @brief Unpack Kelvin-Mandel notation symmetric tensor into full tensor +CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) { + const CeedScalar weight = 1 / sqrt(2.); + A[0][0] = v[0]; + A[1][1] = v[1]; + A[2][2] = v[2]; + A[2][1] = A[1][2] = weight * v[3]; + A[2][0] = A[0][2] = weight * v[4]; + A[1][0] = A[0][1] = weight * v[5]; +} + +#endif // utils_h From c42f38b112c169aa5035aee9bc56d3ad72b21cee Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Fri, 24 Jun 2022 14:23:19 -0600 Subject: [PATCH 102/172] Change naming style for MAGMA runtime compilation type/function defines --- backends/magma/ceed-magma-basis.c | 82 ++++++++++++------------- backends/magma/ceed-magma-restriction.c | 34 +++++----- backends/magma/ceed-magma.h | 50 +++++++-------- 3 files changed, 83 insertions(+), 83 deletions(-) diff --git a/backends/magma/ceed-magma-basis.c b/backends/magma/ceed-magma-basis.c index d277e0caf0..ca86a2613c 100644 --- a/backends/magma/ceed-magma-basis.c +++ b/backends/magma/ceed-magma-basis.c @@ -141,11 +141,11 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, }; if (tmode == CEED_TRANSPOSE) { - ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_interp_tr, grid, + ierr = CeedRunKernelDimSharedMagma(ceed, impl->magma_interp_tr, grid, nthreads, ntcol, 1, shmem, args); CeedChkBackend(ierr); } else { - ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_interp, grid, + ierr = CeedRunKernelDimSharedMagma(ceed, impl->magma_interp, grid, nthreads, ntcol, 1, shmem, args); CeedChkBackend(ierr); } @@ -234,11 +234,11 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, }; if (tmode == CEED_TRANSPOSE) { - ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_grad_tr, grid, + ierr = CeedRunKernelDimSharedMagma(ceed, impl->magma_grad_tr, grid, nthreads, ntcol, 1, shmem, args); CeedChkBackend(ierr); } else { - ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_grad, grid, + ierr = CeedRunKernelDimSharedMagma(ceed, impl->magma_grad, grid, nthreads, ntcol, 1, shmem, args); CeedChkBackend(ierr); } @@ -276,7 +276,7 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, CeedInt grid = (nelem + ntcol-1) / ntcol; void *args[] = {&impl->dqweight1d, &v, &eldofssize, &nelem}; - ierr = MAGMA_RTC_RUN_KERNEL_DIM_SH(ceed, impl->magma_weight, grid, + ierr = CeedRunKernelDimSharedMagma(ceed, impl->magma_weight, grid, nthreads, ntcol, 1, shmem, args); CeedChkBackend(ierr); } @@ -662,65 +662,65 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, // data Ceed delegate; ierr = CeedGetDelegate(ceed, &delegate); CeedChkBackend(ierr); - ierr = MAGMA_RTC_COMPILE(delegate, basis_kernel_source, &impl->module, 5, - "DIM", dim, - "NCOMP", ncomp, - "P", P1d, - "Q", Q1d, - "MAXPQ", CeedIntMax(P1d, Q1d)); + ierr = CeedCompileMagma(delegate, basis_kernel_source, &impl->module, 5, + "DIM", dim, + "NCOMP", ncomp, + "P", P1d, + "Q", Q1d, + "MAXPQ", CeedIntMax(P1d, Q1d)); CeedChkBackend(ierr); // Kernel setup switch (dim) { case 1: - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpn_1d_kernel", - &impl->magma_interp); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_interpn_1d_kernel", + &impl->magma_interp); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpt_1d_kernel", - &impl->magma_interp_tr); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_interpt_1d_kernel", + &impl->magma_interp_tr); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradn_1d_kernel", - &impl->magma_grad); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_gradn_1d_kernel", + &impl->magma_grad); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradt_1d_kernel", - &impl->magma_grad_tr); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_gradt_1d_kernel", + &impl->magma_grad_tr); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_weight_1d_kernel", - &impl->magma_weight); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_weight_1d_kernel", + &impl->magma_weight); CeedChkBackend(ierr); break; case 2: - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpn_2d_kernel", - &impl->magma_interp); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_interpn_2d_kernel", + &impl->magma_interp); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpt_2d_kernel", - &impl->magma_interp_tr); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_interpt_2d_kernel", + &impl->magma_interp_tr); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradn_2d_kernel", - &impl->magma_grad); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_gradn_2d_kernel", + &impl->magma_grad); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradt_2d_kernel", - &impl->magma_grad_tr); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_gradt_2d_kernel", + &impl->magma_grad_tr); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_weight_2d_kernel", - &impl->magma_weight); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_weight_2d_kernel", + &impl->magma_weight); CeedChkBackend(ierr); break; case 3: - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpn_3d_kernel", - &impl->magma_interp); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_interpn_3d_kernel", + &impl->magma_interp); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_interpt_3d_kernel", - &impl->magma_interp_tr); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_interpt_3d_kernel", + &impl->magma_interp_tr); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradn_3d_kernel", - &impl->magma_grad); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_gradn_3d_kernel", + &impl->magma_grad); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_gradt_3d_kernel", - &impl->magma_grad_tr); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_gradt_3d_kernel", + &impl->magma_grad_tr); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_weight_3d_kernel", - &impl->magma_weight); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_weight_3d_kernel", + &impl->magma_weight); CeedChkBackend(ierr); } diff --git a/backends/magma/ceed-magma-restriction.c b/backends/magma/ceed-magma-restriction.c index 182da59e30..9594136d4f 100644 --- a/backends/magma/ceed-magma-restriction.c +++ b/backends/magma/ceed-magma-restriction.c @@ -88,11 +88,11 @@ static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, CeedInt blocksize = 256; // Perform strided restriction with dstrides if (tmode == CEED_TRANSPOSE) { - ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->StridedTranspose, - grid, blocksize, args); CeedChkBackend(ierr); + ierr = CeedRunKernelMagma(ceed, impl->StridedTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } else { - ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->StridedNoTranspose, - grid, blocksize, args); CeedChkBackend(ierr); + ierr = CeedRunKernelMagma(ceed, impl->StridedNoTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } ierr = magma_free(dstrides); CeedChkBackend(ierr); @@ -106,11 +106,11 @@ static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, CeedInt blocksize = 256; if (tmode == CEED_TRANSPOSE) { - ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->OffsetTranspose, - grid, blocksize, args); CeedChkBackend(ierr); + ierr = CeedRunKernelMagma(ceed, impl->OffsetTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } else { - ierr = MAGMA_RTC_RUN_KERNEL(ceed, impl->OffsetNoTranspose, - grid, blocksize, args); CeedChkBackend(ierr); + ierr = CeedRunKernelMagma(ceed, impl->OffsetNoTranspose, + grid, blocksize, args); CeedChkBackend(ierr); } } @@ -290,21 +290,21 @@ int CeedElemRestrictionCreate_Magma(CeedMemType mtype, CeedCopyMode cmode, // data Ceed delegate; ierr = CeedGetDelegate(ceed, &delegate); CeedChkBackend(ierr); - ierr = MAGMA_RTC_COMPILE(delegate, restriction_kernel_source, &impl->module, 0); + ierr = CeedCompileMagma(delegate, restriction_kernel_source, &impl->module, 0); CeedChkBackend(ierr); // Kernel setup - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_readDofsStrided_kernel", - &impl->StridedNoTranspose); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_readDofsStrided_kernel", + &impl->StridedNoTranspose); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_readDofsOffset_kernel", - &impl->OffsetNoTranspose); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_readDofsOffset_kernel", + &impl->OffsetNoTranspose); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_writeDofsStrided_kernel", - &impl->StridedTranspose); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_writeDofsStrided_kernel", + &impl->StridedTranspose); CeedChkBackend(ierr); - ierr = MAGMA_RTC_GET_KERNEL(ceed, impl->module, "magma_writeDofsOffset_kernel", - &impl->OffsetTranspose); + ierr = CeedGetKernelMagma(ceed, impl->module, "magma_writeDofsOffset_kernel", + &impl->OffsetTranspose); CeedChkBackend(ierr); ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); diff --git a/backends/magma/ceed-magma.h b/backends/magma/ceed-magma.h index b76b714879..ce0770e7ba 100644 --- a/backends/magma/ceed-magma.h +++ b/backends/magma/ceed-magma.h @@ -24,21 +24,21 @@ #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) #ifdef CEED_MAGMA_USE_HIP -#define MAGMA_RTC_MODULE hipModule_t -#define MAGMA_RTC_FUNCTION hipFunction_t -#define MAGMA_RTC_COMPILE CeedCompileHip -#define MAGMA_RTC_GET_KERNEL CeedGetKernelHip -#define MAGMA_RTC_RUN_KERNEL CeedRunKernelHip -#define MAGMA_RTC_RUN_KERNEL_DIM CeedRunKernelDimHip -#define MAGMA_RTC_RUN_KERNEL_DIM_SH CeedRunKernelDimSharedHip +typedef hipModule_t CeedMagmaModule; +typedef hipFunction_t CeedMagmaFunction; +#define CeedCompileMagma CeedCompileHip +#define CeedGetKernelMagma CeedGetKernelHip +#define CeedRunKernelMagma CeedRunKernelHip +#define CeedRunKernelDimMagma CeedRunKernelDimHip +#define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedHip #else -#define MAGMA_RTC_MODULE CUmodule -#define MAGMA_RTC_FUNCTION CUfunction -#define MAGMA_RTC_COMPILE CeedCompileCuda -#define MAGMA_RTC_GET_KERNEL CeedGetKernelCuda -#define MAGMA_RTC_RUN_KERNEL CeedRunKernelCuda -#define MAGMA_RTC_RUN_KERNEL_DIM CeedRunKernelDimCuda -#define MAGMA_RTC_RUN_KERNEL_DIM_SH CeedRunKernelDimSharedCuda +typedef CUmodule CeedMagmaModule; +typedef CUfunction CeedMagmaFunction; +#define CeedCompileMagma CeedCompileCuda +#define CeedGetKernelMagma CeedGetKernelCuda +#define CeedRunKernelMagma CeedRunKernelCuda +#define CeedRunKernelDimMagma CeedRunKernelDimCuda +#define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedCuda #endif typedef enum { @@ -53,12 +53,12 @@ typedef struct { } Ceed_Magma; typedef struct { - MAGMA_RTC_MODULE module; - MAGMA_RTC_FUNCTION magma_interp; - MAGMA_RTC_FUNCTION magma_interp_tr; - MAGMA_RTC_FUNCTION magma_grad; - MAGMA_RTC_FUNCTION magma_grad_tr; - MAGMA_RTC_FUNCTION magma_weight; + CeedMagmaModule module; + CeedMagmaFunction magma_interp; + CeedMagmaFunction magma_interp_tr; + CeedMagmaFunction magma_grad; + CeedMagmaFunction magma_grad_tr; + CeedMagmaFunction magma_weight; CeedScalar *dqref1d; CeedScalar *dinterp1d; CeedScalar *dgrad1d; @@ -79,11 +79,11 @@ typedef enum { } OwnershipMode; typedef struct { - MAGMA_RTC_MODULE module; - MAGMA_RTC_FUNCTION StridedTranspose; - MAGMA_RTC_FUNCTION StridedNoTranspose; - MAGMA_RTC_FUNCTION OffsetTranspose; - MAGMA_RTC_FUNCTION OffsetNoTranspose; + CeedMagmaModule module; + CeedMagmaFunction StridedTranspose; + CeedMagmaFunction StridedNoTranspose; + CeedMagmaFunction OffsetTranspose; + CeedMagmaFunction OffsetNoTranspose; CeedInt *offsets; CeedInt *doffsets; OwnershipMode own_; From 446e7af4d6e7c58f469d4618e9b5e398a4084523 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 24 Jun 2022 22:46:16 -0600 Subject: [PATCH 103/172] basis - use ncomp from 'from' basis for projection (#1012) * basis - use ncomp from 'from' basis for projection * doc - document numcomp convention for CeedBasisCreateProjection * basis - reorder BasisCreateProjection arguments --- include/ceed/ceed.h | 4 ++-- interface/ceed-basis.c | 18 +++++++++++------- interface/ceed-preconditioning.c | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index af94f0580c..e8ad57f5a3 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -552,8 +552,8 @@ CEED_EXTERN int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weights, CeedBasis *basis); -CEED_EXTERN int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, CeedBasis *basis_project); -CEED_EXTERN int CeedBasisCreateProjectionMatrix(CeedBasis basis_to, CeedBasis basis_from, CeedScalar **interp_project); +CEED_EXTERN int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project); +CEED_EXTERN int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project); CEED_EXTERN int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy); CEED_EXTERN int CeedBasisView(CeedBasis basis, FILE *stream); CEED_EXTERN int CeedBasisApply(CeedBasis basis, CeedInt num_elem, diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index ba3384b5c3..f87ccfde4e 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -837,9 +837,13 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, the pesudoinverse `interp_to^+` is given by QR factorization. Note: `basis_from` and `basis_to` must have compatible quadrature spaces. + Note: `basis_project` will have the same number of components as + `basis_from`, regardless of the number of components that + `basis_to` has. If `basis_from` has 3 components and `basis_to` + has 5 components, then `basis_project` will have 3 components. - @param[in] basis_to CeedBasis to prolong to @param[in] basis_from CeedBasis to prolong from + @param[in] basis_to CeedBasis to prolong to @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored. @@ -847,7 +851,7 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, @ref User **/ -int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, +int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) { int ierr; Ceed ceed; @@ -855,7 +859,7 @@ int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, // Create projectior matrix CeedScalar *interp_project; - ierr = CeedBasisCreateProjectionMatrix(basis_to, basis_from, + ierr = CeedBasisCreateProjectionMatrix(basis_from, basis_to, &interp_project); CeedChk(ierr); // Build basis @@ -864,7 +868,7 @@ int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, CeedScalar *q_ref, *q_weight, *grad; ierr = CeedBasisIsTensor(basis_to, &is_tensor); CeedChk(ierr); ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr); - ierr = CeedBasisGetNumComponents(basis_to, &num_comp); CeedChk(ierr); + ierr = CeedBasisGetNumComponents(basis_from, &num_comp); CeedChk(ierr); if (is_tensor) { CeedInt P_1d_to, P_1d_from; ierr = CeedBasisGetNumNodes1D(basis_from, &P_1d_from); CeedChk(ierr); @@ -906,8 +910,8 @@ int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, Note: `basis_from` and `basis_to` must have compatible quadrature spaces. - @param[in] basis_to CeedBasis to project to @param[in] basis_from CeedBasis to project from + @param[in] basis_to CeedBasis to project to @param[out] interp_project Address of the variable where the newly created projection matrix will be stored. @@ -915,8 +919,8 @@ int CeedBasisCreateProjection(CeedBasis basis_to, CeedBasis basis_from, @ref User **/ -int CeedBasisCreateProjectionMatrix(CeedBasis basis_to, - CeedBasis basis_from, +int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, + CeedBasis basis_to, CeedScalar **interp_project) { int ierr; Ceed ceed; diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index aec919e0c8..f9f679ef4b 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -2052,7 +2052,7 @@ int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, // Build prolongation matrix CeedBasis basis_fine, basis_c_to_f; ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr); - ierr = CeedBasisCreateProjection(basis_fine, basis_coarse, &basis_c_to_f); + ierr = CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f); CeedChk(ierr); // Core code From 121d4b7fb417fe929bc878bd436cd4820a8e3053 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 27 Jun 2022 15:35:20 -0600 Subject: [PATCH 104/172] gallery - fix bad indexing in 3d poission det --- examples/ceed/ex2-surface.h | 4 ++-- examples/rust/ex2-surface/src/main.rs | 4 ++-- examples/rust/ex4-vector-surface/src/main.rs | 4 ++-- include/ceed/jit-source/gallery/ceed-poisson3dbuild.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/ceed/ex2-surface.h b/examples/ceed/ex2-surface.h index 78ba0e8b52..875bb35e6e 100644 --- a/examples/ceed/ex2-surface.h +++ b/examples/ceed/ex2-surface.h @@ -58,8 +58,8 @@ CEED_QFUNCTION(f_build_diff)(void *ctx, const CeedInt Q, J[i+Q*((j+1)%3+3*((k+2)%3))]*J[i+Q*((j+2)%3+3*((k+1)%3))]; // Compute quadrature weight / det(J) - const CeedScalar qw = w[i] / (J[i+Q*0]*A[0][0] + J[i+Q*1]*A[1][1] + - J[i+Q*2]*A[2][2]); + const CeedScalar qw = w[i] / (J[i+Q*0]*A[0][0] + J[i+Q*1]*A[0][1] + + J[i+Q*2]*A[0][2]); // Compute geometric factors // Stored in Voigt convention diff --git a/examples/rust/ex2-surface/src/main.rs b/examples/rust/ex2-surface/src/main.rs index 42882590a5..d5e29e9af8 100644 --- a/examples/rust/ex2-surface/src/main.rs +++ b/examples/rust/ex2-surface/src/main.rs @@ -166,8 +166,8 @@ fn example_2(options: opt::Opt) -> libceed::Result<()> { } let qw = weights[i] / (jacobian[i + q * 0] * a[0 * 3 + 0] - + jacobian[i + q * 1] * a[1 * 3 + 1] - + jacobian[i + q * 2] * a[2 * 3 + 2]); + + jacobian[i + q * 1] * a[0 * 3 + 1] + + jacobian[i + q * 2] * a[0 * 3 + 2]); qdata[i + q * 0] = qw * (a[0 * 3 + 0] * a[0 * 3 + 0] + a[0 * 3 + 1] * a[0 * 3 + 1] diff --git a/examples/rust/ex4-vector-surface/src/main.rs b/examples/rust/ex4-vector-surface/src/main.rs index 5a0c1e25dd..cfa76126be 100644 --- a/examples/rust/ex4-vector-surface/src/main.rs +++ b/examples/rust/ex4-vector-surface/src/main.rs @@ -173,8 +173,8 @@ fn example_4(options: opt::Opt) -> libceed::Result<()> { } let qw = weights[i] / (jacobian[i + q * 0] * a[0 * 3 + 0] - + jacobian[i + q * 1] * a[1 * 3 + 1] - + jacobian[i + q * 2] * a[2 * 3 + 2]); + + jacobian[i + q * 1] * a[0 * 3 + 1] + + jacobian[i + q * 2] * a[0 * 3 + 2]); qdata[i + q * 0] = qw * (a[0 * 3 + 0] * a[0 * 3 + 0] + a[0 * 3 + 1] * a[0 * 3 + 1] diff --git a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h index 7943e5f932..f453f998f6 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h @@ -42,8 +42,8 @@ CEED_QFUNCTION(Poisson3DBuild)(void *ctx, const CeedInt Q, J[(k+2)%dim][(j+1)%dim][i]*J[(k+1)%dim][(j+2)%dim][i]; // Compute quadrature weight / det(J) - const CeedScalar qw = w[i] / (J[0][0][i]*A[0][0] + J[0][1][i]*A[1][1] + - J[0][2][i]*A[2][2]); + const CeedScalar qw = w[i] / (J[0][0][i]*A[0][0] + J[0][1][i]*A[0][1] + + J[0][2][i]*A[0][2]); // Compute geometric factors // Stored in Voigt convention From 990fdeb6bb8fc9af2da4472bdc0d1f57da5da0e5 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 21 Jun 2022 16:09:13 -0600 Subject: [PATCH 105/172] fmt - add CeedInt_FMT --- backends/magma/ceed-magma-basis.c | 21 ++--- backends/memcheck/ceed-memcheck-qfunction.c | 2 +- backends/opt/ceed-opt-operator.c | 2 +- backends/ref/ceed-ref-restriction.c | 4 +- doc/sphinx/source/releasenotes.md | 1 + examples/ceed/ex1-volume.c | 88 +++++++++++---------- examples/ceed/ex2-surface.c | 86 ++++++++++---------- examples/fluids/navierstokes.c | 4 +- examples/fluids/problems/advection.c | 2 +- examples/fluids/problems/blasius.c | 3 +- examples/fluids/problems/stg_shur14.c | 15 ++-- examples/petsc/area.c | 4 +- examples/petsc/bps.c | 6 +- examples/petsc/bpsraw.c | 6 +- examples/petsc/bpssphere.c | 7 +- examples/petsc/multigrid.c | 12 +-- examples/solids/elasticity.c | 14 ++-- examples/solids/src/cl-options.c | 7 +- examples/solids/src/setup-dm.c | 3 +- include/ceed/ceed.h | 1 + interface/ceed-basis.c | 10 ++- interface/ceed-elemrestriction.c | 28 ++++--- interface/ceed-operator.c | 41 ++++++---- interface/ceed-qfunction.c | 12 +-- interface/ceed.c | 2 +- 25 files changed, 206 insertions(+), 175 deletions(-) diff --git a/backends/magma/ceed-magma-basis.c b/backends/magma/ceed-magma-basis.c index ca86a2613c..64bba6237c 100644 --- a/backends/magma/ceed-magma-basis.c +++ b/backends/magma/ceed-magma-basis.c @@ -54,8 +54,8 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr); ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr); - CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", - ncomp*CeedIntPow(P1d, dim), ncomp); + CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%" CeedInt_FMT + ", comp = %" CeedInt_FMT, ncomp*CeedIntPow(P1d, dim), ncomp); if (tmode == CEED_TRANSPOSE) { CeedSize length; @@ -335,8 +335,8 @@ int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem, CeedBasisNonTensor_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); - CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", - ncomp*ndof, ncomp); + CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%" CeedInt_FMT + ", comp = %" CeedInt_FMT, ncomp*ndof, ncomp); if (tmode == CEED_TRANSPOSE) { CeedSize length; @@ -460,8 +460,8 @@ int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem, CeedBasisNonTensor_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); - CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", - ncomp*ndof, ncomp); + CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%" CeedInt_FMT + ", comp = %" CeedInt_FMT, ncomp*ndof, ncomp); if (tmode == CEED_TRANSPOSE) { CeedSize length; @@ -632,7 +632,8 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, char *interp_name_base = "ceed/jit-source/magma/interp"; CeedInt interp_name_len = strlen(interp_name_base) + 6; char interp_name[interp_name_len]; - snprintf(interp_name, interp_name_len, "%s-%dd.h", interp_name_base, dim); + snprintf(interp_name, interp_name_len, "%s-%" CeedInt_FMT "d.h", + interp_name_base, dim); ierr = CeedGetJitAbsolutePath(ceed, interp_name, &interp_path); CeedChkBackend(ierr); ierr = CeedLoadSourceToInitializedBuffer(ceed, interp_path, @@ -641,7 +642,8 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, char *grad_name_base = "ceed/jit-source/magma/grad"; CeedInt grad_name_len = strlen(grad_name_base) + 6; char grad_name[grad_name_len]; - snprintf(grad_name, grad_name_len, "%s-%dd.h", grad_name_base, dim); + snprintf(grad_name, grad_name_len, "%s-%" CeedInt_FMT "d.h", grad_name_base, + dim); ierr = CeedGetJitAbsolutePath(ceed, grad_name, &grad_path); CeedChkBackend(ierr); ierr = CeedLoadSourceToInitializedBuffer(ceed, grad_path, @@ -650,7 +652,8 @@ int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, char *weight_name_base = "ceed/jit-source/magma/weight"; CeedInt weight_name_len = strlen(weight_name_base) + 6; char weight_name[weight_name_len]; - snprintf(weight_name, weight_name_len, "%s-%dd.h", weight_name_base, dim); + snprintf(weight_name, weight_name_len, "%s-%" CeedInt_FMT "d.h", + weight_name_base, dim); ierr = CeedGetJitAbsolutePath(ceed, weight_name, &weight_path); CeedChkBackend(ierr); ierr = CeedLoadSourceToInitializedBuffer(ceed, weight_path, diff --git a/backends/memcheck/ceed-memcheck-qfunction.c b/backends/memcheck/ceed-memcheck-qfunction.c index 6e90ee1736..9b41096352 100644 --- a/backends/memcheck/ceed-memcheck-qfunction.c +++ b/backends/memcheck/ceed-memcheck-qfunction.c @@ -49,7 +49,7 @@ static int CeedQFunctionApply_Memcheck(CeedQFunction qf, CeedInt Q, ierr = CeedVectorGetLength(V[i], &len); CeedChkBackend(ierr); VALGRIND_MAKE_MEM_UNDEFINED(impl->outputs[i], len); - snprintf(name, 30, "'QFunction output %d'", i); + snprintf(name, 30, "'QFunction output %" CeedInt_FMT "'", i); mem_block_ids[i] = VALGRIND_CREATE_BLOCK(impl->outputs[i], len, name); } diff --git a/backends/opt/ceed-opt-operator.c b/backends/opt/ceed-opt-operator.c index b2bd548d77..0995394956 100644 --- a/backends/opt/ceed-opt-operator.c +++ b/backends/opt/ceed-opt-operator.c @@ -820,7 +820,7 @@ int CeedOperatorCreate_Opt(CeedOperator op) { if (blk_size != 1 && blk_size != 8) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, - "Opt backend cannot use blocksize: %d", blk_size); + "Opt backend cannot use blocksize: %" CeedInt_FMT, blk_size); // LCOV_EXCL_STOP ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", diff --git a/backends/ref/ceed-ref-restriction.c b/backends/ref/ceed-ref-restriction.c index 5ac3010453..355eb724c2 100644 --- a/backends/ref/ceed-ref-restriction.c +++ b/backends/ref/ceed-ref-restriction.c @@ -362,8 +362,8 @@ int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_BACKEND, - "Restriction offset %d (%d) out of range " - "[0, %d]", i, offsets[i], l_size); + "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range " + "[0, %" CeedInt_FMT "]", i, offsets[i], l_size); // LCOV_EXCL_STOP } diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 96ed0afc2d..1dc6d48be7 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -14,6 +14,7 @@ On this page we provide a summary of the main API changes, new features and exam ### New features - Update `/cpu/self/memcheck/*` backends to help verify `CeedQFunctionContext` data sizes provided by user. +- Added `CeedInt_FMT` to support potential future use of larger interger sizes. ### Bugfix diff --git a/examples/ceed/ex1-volume.c b/examples/ceed/ex1-volume.c index f019227c5e..c479996337 100644 --- a/examples/ceed/ex1-volume.c +++ b/examples/ceed/ex1-volume.c @@ -47,24 +47,26 @@ #include "ex1-volume.h" // Auxiliary functions. -int GetCartesianMeshSize(int dim, int degree, int prob_size, int num_xyz[dim]); -int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[dim], int degree, - int num_comp, CeedInt *size, CeedInt num_qpts, - CeedElemRestriction *restr, +int GetCartesianMeshSize(CeedInt dim, CeedInt degree, CeedInt prob_size, + CeedInt num_xyz[dim]); +int BuildCartesianRestriction(Ceed ceed, CeedInt dim, CeedInt num_xyz[dim], + CeedInt degree, CeedInt num_comp, CeedInt *size, + CeedInt num_qpts, CeedElemRestriction *restr, CeedElemRestriction *restr_i); -int SetCartesianMeshCoords(int dim, int num_xyz[dim], int mesh_degree, - CeedVector mesh_coords); -CeedScalar TransformMeshCoords(int dim, int mesh_size, CeedVector mesh_coords); +int SetCartesianMeshCoords(CeedInt dim, CeedInt num_xyz[dim], + CeedInt mesh_degree, CeedVector mesh_coords); +CeedScalar TransformMeshCoords(CeedInt dim, CeedInt mesh_size, + CeedVector mesh_coords); int main(int argc, const char *argv[]) { const char *ceed_spec = "/cpu/self"; - int dim = 3; // dimension of the mesh - int num_comp_x = 3; // number of x components - int mesh_degree = 4; // polynomial degree for the mesh - int sol_degree = 4; // polynomial degree for the solution - int num_qpts = sol_degree + 2; // number of 1D quadrature points - int prob_size = -1; // approximate problem size - int help = 0, test = 0, gallery = 0; + CeedInt dim = 3; // dimension of the mesh + CeedInt num_comp_x = 3; // number of x components + CeedInt mesh_degree = 4; // polynomial degree for the mesh + CeedInt sol_degree = 4; // polynomial degree for the solution + CeedInt num_qpts = sol_degree + 2; // number of 1D quadrature points + CeedInt prob_size = -1; // approximate problem size + CeedInt help = 0, test = 0, gallery = 0; // Process command line arguments. for (int ia = 1; ia < argc; ia++) { @@ -103,11 +105,11 @@ int main(int argc, const char *argv[]) { // LCOV_EXCL_START printf("Selected options: [command line option] : \n"); printf(" Ceed specification [-c] : %s\n", ceed_spec); - printf(" Mesh dimension [-d] : %d\n", dim); - printf(" Mesh degree [-m] : %d\n", mesh_degree); - printf(" Solution degree [-p] : %d\n", sol_degree); - printf(" Num. 1D quadr. pts [-q] : %d\n", num_qpts); - printf(" Approx. # unknowns [-s] : %d\n", prob_size); + printf(" Mesh dimension [-d] : %" CeedInt_FMT "\n", dim); + printf(" Mesh degree [-m] : %" CeedInt_FMT "\n", mesh_degree); + printf(" Solution degree [-p] : %" CeedInt_FMT "\n", sol_degree); + printf(" Num. 1D quadr. pts [-q] : %" CeedInt_FMT "\n", num_qpts); + printf(" Approx. # unknowns [-s] : %" CeedInt_FMT "\n", prob_size); printf(" QFunction source [-g] : %s\n", gallery?"gallery":"header"); if (help) { printf("Test/quiet mode is %s\n", (test?"ON":"OFF (use -t to enable)")); @@ -130,13 +132,13 @@ int main(int argc, const char *argv[]) { CEED_GAUSS, &sol_basis); // Determine the mesh size based on the given approximate problem size. - int num_xyz[dim]; + CeedInt num_xyz[dim]; GetCartesianMeshSize(dim, sol_degree, prob_size, num_xyz); if (!test) { // LCOV_EXCL_START - printf("Mesh size: nx = %d", num_xyz[0]); - if (dim > 1) { printf(", ny = %d", num_xyz[1]); } - if (dim > 2) { printf(", nz = %d", num_xyz[2]); } + printf("Mesh size: nx = %" CeedInt_FMT, num_xyz[0]); + if (dim > 1) { printf(", ny = %" CeedInt_FMT, num_xyz[1]); } + if (dim > 2) { printf(", nz = %" CeedInt_FMT, num_xyz[2]); } printf("\n"); // LCOV_EXCL_STOP } @@ -151,8 +153,8 @@ int main(int argc, const char *argv[]) { num_qpts, &sol_restr, &sol_restr_i); if (!test) { // LCOV_EXCL_START - printf("Number of mesh nodes : %d\n", mesh_size/dim); - printf("Number of solution nodes : %d\n", sol_size); + printf("Number of mesh nodes : %" CeedInt_FMT "\n", mesh_size/dim); + printf("Number of solution nodes : %" CeedInt_FMT "\n", sol_size); // LCOV_EXCL_STOP } @@ -188,7 +190,7 @@ int main(int argc, const char *argv[]) { case 1: { // This creates the QFunction via the gallery. char name[13] = ""; - snprintf(name, sizeof name, "Mass%dDBuild", dim); + snprintf(name, sizeof name, "Mass%" CeedInt_FMT "DBuild", dim); CeedQFunctionCreateInteriorByName(ceed, name, &qf_build); break; } @@ -209,7 +211,7 @@ int main(int argc, const char *argv[]) { CeedVector q_data; CeedInt elem_qpts = CeedIntPow(num_qpts, dim); CeedInt num_elem = 1; - for (int d = 0; d < dim; d++) + for (CeedInt d = 0; d < dim; d++) num_elem *= num_xyz[d]; CeedVectorCreate(ceed, num_elem*elem_qpts, &q_data); CeedOperatorApply(op_build, mesh_coords, q_data, @@ -294,7 +296,8 @@ int main(int argc, const char *argv[]) { return 0; } -int GetCartesianMeshSize(int dim, int degree, int prob_size, int num_xyz[dim]) { +int GetCartesianMeshSize(CeedInt dim, CeedInt degree, CeedInt prob_size, + CeedInt num_xyz[dim]) { // Use the approximate formula: // prob_size ~ num_elem * degree^dim CeedInt num_elem = prob_size / CeedIntPow(degree, dim); @@ -304,23 +307,23 @@ int GetCartesianMeshSize(int dim, int degree, int prob_size, int num_xyz[dim]) { s++; } CeedInt r = s%dim; - for (int d = 0; d < dim; d++) { - int sd = s/dim; + for (CeedInt d = 0; d < dim; d++) { + CeedInt sd = s/dim; if (r > 0) { sd++; r--; } num_xyz[d] = 1 << sd; } return 0; } -int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[dim], int degree, - int num_comp, CeedInt *size, CeedInt num_qpts, - CeedElemRestriction *restr, +int BuildCartesianRestriction(Ceed ceed, CeedInt dim, CeedInt num_xyz[dim], + CeedInt degree, CeedInt num_comp, CeedInt *size, + CeedInt num_qpts, CeedElemRestriction *restr, CeedElemRestriction *restr_i) { CeedInt p = degree + 1; CeedInt num_nodes = CeedIntPow(p, dim); // number of scalar nodes per element CeedInt elem_qpts = CeedIntPow(num_qpts, dim); // number of qpts per element CeedInt nd[3], num_elem = 1, scalar_size = 1; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { num_elem *= num_xyz[d]; nd[d] = num_xyz[d] * (p - 1) + 1; scalar_size *= nd[d]; @@ -332,11 +335,11 @@ int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[dim], int degree, CeedInt *elem_nodes = malloc(sizeof(CeedInt)*num_elem*num_nodes); for (CeedInt e = 0; e < num_elem; e++) { CeedInt e_xyz[3] = {1, 1, 1}, re = e; - for (int d = 0; d < dim; d++) { e_xyz[d] = re % num_xyz[d]; re /= num_xyz[d]; } + for (CeedInt d = 0; d < dim; d++) { e_xyz[d] = re % num_xyz[d]; re /= num_xyz[d]; } CeedInt *loc_el_nodes = elem_nodes + e*num_nodes; - for (int l_nodes = 0; l_nodes < num_nodes; l_nodes++) { + for (CeedInt l_nodes = 0; l_nodes < num_nodes; l_nodes++) { CeedInt g_nodes = 0, g_nodes_stride = 1, r_nodes = l_nodes; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { g_nodes += (e_xyz[d] * (p - 1) + r_nodes % p) * g_nodes_stride; g_nodes_stride *= nd[d]; r_nodes /= p; @@ -355,11 +358,11 @@ int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[dim], int degree, return 0; } -int SetCartesianMeshCoords(int dim, int num_xyz[dim], int mesh_degree, - CeedVector mesh_coords) { +int SetCartesianMeshCoords(CeedInt dim, CeedInt num_xyz[dim], + CeedInt mesh_degree, CeedVector mesh_coords) { CeedInt p = mesh_degree + 1; CeedInt nd[3], num_elem = 1, scalar_size = 1; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { num_elem *= num_xyz[d]; nd[d] = num_xyz[d] * (p - 1) + 1; scalar_size *= nd[d]; @@ -372,7 +375,7 @@ int SetCartesianMeshCoords(int dim, int num_xyz[dim], int mesh_degree, for (CeedInt i = 0; i < p; i++) { nodes[i] = 0.5 + 0.5 * nodes[i]; } for (CeedInt gs_nodes = 0; gs_nodes < scalar_size; gs_nodes++) { CeedInt r_nodes = gs_nodes; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { CeedInt d_1d = r_nodes % nd[d]; coords[gs_nodes + scalar_size * d] = ((d_1d / (p - 1)) + nodes[d_1d % (p - 1)]) / num_xyz[d]; @@ -389,7 +392,8 @@ int SetCartesianMeshCoords(int dim, int num_xyz[dim], int mesh_degree, #define M_PI_2 1.57079632679489661923 #endif -CeedScalar TransformMeshCoords(int dim, int mesh_size, CeedVector mesh_coords) { +CeedScalar TransformMeshCoords(CeedInt dim, CeedInt mesh_size, + CeedVector mesh_coords) { CeedScalar exact_volume; CeedScalar *coords; CeedVectorGetArray(mesh_coords, CEED_MEM_HOST, &coords); diff --git a/examples/ceed/ex2-surface.c b/examples/ceed/ex2-surface.c index 0feef0d67d..415e8f13cc 100644 --- a/examples/ceed/ex2-surface.c +++ b/examples/ceed/ex2-surface.c @@ -48,24 +48,26 @@ #include "ex2-surface.h" // Auxiliary functions. -int GetCartesianMeshSize(int dim, int degree, int prob_size, int num_xyz[3]); -int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[3], int degree, - int num_comp, CeedInt *size, CeedInt num_qpts, - CeedElemRestriction *restr, +int GetCartesianMeshSize(CeedInt dim, CeedInt degree, CeedInt prob_size, + CeedInt num_xyz[3]); +int BuildCartesianRestriction(Ceed ceed, CeedInt dim, CeedInt num_xyz[3], + CeedInt degree, CeedInt num_comp, CeedInt *size, + CeedInt num_qpts, CeedElemRestriction *restr, CeedElemRestriction *restr_i); -int SetCartesianMeshCoords(int dim, int num_xyz[3], int mesh_degree, +int SetCartesianMeshCoords(CeedInt dim, CeedInt num_xyz[3], CeedInt mesh_degree, CeedVector mesh_coords); -CeedScalar TransformMeshCoords(int dim, int mesh_size, CeedVector mesh_coords); +CeedScalar TransformMeshCoords(CeedInt dim, CeedInt mesh_size, + CeedVector mesh_coords); int main(int argc, const char *argv[]) { const char *ceed_spec = "/cpu/self"; - int dim = 3; // dimension of the mesh - int num_comp_x = 3; // number of x components - int mesh_degree = 4; // polynomial degree for the mesh - int sol_degree = 4; // polynomial degree for the solution - int num_qpts = sol_degree + 2; // number of 1D quadrature points - int prob_size = -1; // approximate problem size - int help = 0, test = 0, gallery = 0; + CeedInt dim = 3; // dimension of the mesh + CeedInt num_comp_x = 3; // number of x components + CeedInt mesh_degree = 4; // polynomial degree for the mesh + CeedInt sol_degree = 4; // polynomial degree for the solution + CeedInt num_qpts = sol_degree + 2; // number of 1D quadrature points + CeedInt prob_size = -1; // approximate problem size + CeedInt help = 0, test = 0, gallery = 0; // Process command line arguments. for (int ia = 1; ia < argc; ia++) { @@ -108,11 +110,11 @@ int main(int argc, const char *argv[]) { // LCOV_EXCL_START printf("Selected options: [command line option] : \n"); printf(" Ceed specification [-c] : %s\n", ceed_spec); - printf(" Mesh dimension [-d] : %d\n", dim); - printf(" Mesh degree [-m] : %d\n", mesh_degree); - printf(" Solution degree [-p] : %d\n", sol_degree); - printf(" Num. 1D quadr. pts [-q] : %d\n", num_qpts); - printf(" Approx. # unknowns [-s] : %d\n", prob_size); + printf(" Mesh dimension [-d] : %" CeedInt_FMT "\n", dim); + printf(" Mesh degree [-m] : %" CeedInt_FMT "\n", mesh_degree); + printf(" Solution degree [-p] : %" CeedInt_FMT "\n", sol_degree); + printf(" Num. 1D quadr. pts [-q] : %" CeedInt_FMT "\n", num_qpts); + printf(" Approx. # unknowns [-s] : %" CeedInt_FMT "\n", prob_size); printf(" QFunction source [-g] : %s\n", gallery?"gallery":"header"); if (help) { printf("Test/quiet mode is %s\n", (test?"ON":"OFF (use -t to enable)")); @@ -135,14 +137,14 @@ int main(int argc, const char *argv[]) { CEED_GAUSS, &sol_basis); // Determine the mesh size based on the given approximate problem size. - int num_xyz[3]; + CeedInt num_xyz[3]; GetCartesianMeshSize(dim, sol_degree, prob_size, num_xyz); if (!test) { // LCOV_EXCL_START - printf("Mesh size: nx = %d", num_xyz[0]); - if (dim > 1) { printf(", ny = %d", num_xyz[1]); } - if (dim > 2) { printf(", nz = %d", num_xyz[2]); } + printf("Mesh size: nx = %" CeedInt_FMT, num_xyz[0]); + if (dim > 1) { printf(", ny = %" CeedInt_FMT, num_xyz[1]); } + if (dim > 2) { printf(", nz = %" CeedInt_FMT, num_xyz[2]); } printf("\n"); // LCOV_EXCL_STOP } @@ -159,8 +161,8 @@ int main(int argc, const char *argv[]) { num_qpts, &sol_restr, NULL); if (!test) { // LCOV_EXCL_START - printf("Number of mesh nodes : %d\n", mesh_size/dim); - printf("Number of solution nodes : %d\n", sol_size); + printf("Number of mesh nodes : %" CeedInt_FMT "\n", mesh_size/dim); + printf("Number of solution nodes : %" CeedInt_FMT "\n", sol_size); // LCOV_EXCL_STOP } @@ -196,7 +198,7 @@ int main(int argc, const char *argv[]) { case 1: { // This creates the QFunction via the gallery. char name[16] = ""; - snprintf(name, sizeof name, "Poisson%dDBuild", dim); + snprintf(name, sizeof name, "Poisson%" CeedInt_FMT "DBuild", dim); CeedQFunctionCreateInteriorByName(ceed, name, &qf_build); break; } @@ -218,7 +220,7 @@ int main(int argc, const char *argv[]) { CeedVector q_data; CeedInt elem_qpts = CeedIntPow(num_qpts, dim); CeedInt num_elem = 1; - for (int d = 0; d < dim; d++) + for (CeedInt d = 0; d < dim; d++) num_elem *= num_xyz[d]; CeedVectorCreate(ceed, num_elem*elem_qpts*dim*(dim+1)/2, &q_data); CeedOperatorApply(op_build, mesh_coords, q_data, @@ -239,7 +241,7 @@ int main(int argc, const char *argv[]) { case 1: { // This creates the QFunction via the gallery. char name[16] = ""; - snprintf(name, sizeof name, "Poisson%dDApply", dim); + snprintf(name, sizeof name, "Poisson%" CeedInt_FMT "DApply", dim); CeedQFunctionCreateInteriorByName(ceed, name, &qf_apply); break; } @@ -318,7 +320,8 @@ int main(int argc, const char *argv[]) { return 0; } -int GetCartesianMeshSize(int dim, int degree, int prob_size, int num_xyz[3]) { +int GetCartesianMeshSize(CeedInt dim, CeedInt degree, CeedInt prob_size, + CeedInt num_xyz[3]) { // Use the approximate formula: // prob_size ~ num_elem * degree^dim CeedInt num_elem = prob_size / CeedIntPow(degree, dim); @@ -328,23 +331,23 @@ int GetCartesianMeshSize(int dim, int degree, int prob_size, int num_xyz[3]) { s++; } CeedInt r = s%dim; - for (int d = 0; d < dim; d++) { - int sd = s/dim; + for (CeedInt d = 0; d < dim; d++) { + CeedInt sd = s/dim; if (r > 0) { sd++; r--; } num_xyz[d] = 1 << sd; } return 0; } -int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[3], int degree, - int num_comp, CeedInt *size, CeedInt num_qpts, - CeedElemRestriction *restr, +int BuildCartesianRestriction(Ceed ceed, CeedInt dim, CeedInt num_xyz[3], + CeedInt degree, CeedInt num_comp, CeedInt *size, + CeedInt num_qpts, CeedElemRestriction *restr, CeedElemRestriction *restr_i) { CeedInt p = degree + 1; CeedInt num_nodes = CeedIntPow(p, dim); // number of scalar nodes per element CeedInt elem_qpts = CeedIntPow(num_qpts, dim); // number of qpts per element CeedInt nd[3], num_elem = 1, scalar_size = 1; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { num_elem *= num_xyz[d]; nd[d] = num_xyz[d] * (p - 1) + 1; scalar_size *= nd[d]; @@ -356,11 +359,11 @@ int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[3], int degree, CeedInt *el_nodes = malloc(sizeof(CeedInt)*num_elem*num_nodes); for (CeedInt e = 0; e < num_elem; e++) { CeedInt e_xyz[3] = {1, 1, 1}, re = e; - for (int d = 0; d < dim; d++) { e_xyz[d] = re%num_xyz[d]; re /= num_xyz[d]; } + for (CeedInt d = 0; d < dim; d++) { e_xyz[d] = re%num_xyz[d]; re /= num_xyz[d]; } CeedInt *loc_el_nodes = el_nodes + e*num_nodes; - for (int l_nodes = 0; l_nodes < num_nodes; l_nodes++) { + for (CeedInt l_nodes = 0; l_nodes < num_nodes; l_nodes++) { CeedInt g_nodes = 0, g_nodes_stride = 1, r_nodes = l_nodes; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { g_nodes += (e_xyz[d] * (p - 1) + r_nodes % p) * g_nodes_stride; g_nodes_stride *= nd[d]; r_nodes /= p; @@ -383,11 +386,11 @@ int BuildCartesianRestriction(Ceed ceed, int dim, int num_xyz[3], int degree, return 0; } -int SetCartesianMeshCoords(int dim, int num_xyz[3], int mesh_degree, +int SetCartesianMeshCoords(CeedInt dim, CeedInt num_xyz[3], CeedInt mesh_degree, CeedVector mesh_coords) { CeedInt p = mesh_degree + 1; CeedInt nd[3], num_elem = 1, scalar_size = 1; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { num_elem *= num_xyz[d]; nd[d] = num_xyz[d] * (p - 1) + 1; scalar_size *= nd[d]; @@ -400,7 +403,7 @@ int SetCartesianMeshCoords(int dim, int num_xyz[3], int mesh_degree, for (CeedInt i = 0; i < p; i++) { nodes[i] = 0.5 + 0.5 * nodes[i]; } for (CeedInt gs_nodes = 0; gs_nodes < scalar_size; gs_nodes++) { CeedInt r_nodes = gs_nodes; - for (int d = 0; d < dim; d++) { + for (CeedInt d = 0; d < dim; d++) { CeedInt d1d = r_nodes % nd[d]; coords[gs_nodes + scalar_size * d] = ((d1d / (p - 1)) + nodes[d1d % (p - 1)]) / num_xyz[d]; @@ -416,7 +419,8 @@ int SetCartesianMeshCoords(int dim, int num_xyz[3], int mesh_degree, #define M_PI 3.14159265358979323846 #endif -CeedScalar TransformMeshCoords(int dim, int mesh_size, CeedVector mesh_coords) { +CeedScalar TransformMeshCoords(CeedInt dim, CeedInt mesh_size, + CeedVector mesh_coords) { CeedScalar exact_sa = (dim == 1 ? 2 : dim == 2 ? 4 : 6); CeedScalar *coords; diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 7a67acf59e..f0a237efb0 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -270,8 +270,8 @@ int main(int argc, char **argv) { owned_nodes /= num_comp_q; ierr = PetscPrintf(comm, " Mesh:\n" - " Number of 1D Basis Nodes (P) : %d\n" - " Number of 1D Quadrature Points (Q) : %d\n" + " Number of 1D Basis Nodes (P) : %" CeedInt_FMT "\n" + " Number of 1D Quadrature Points (Q) : %" CeedInt_FMT "\n" " Global DoFs : %" PetscInt_FMT "\n" " Owned DoFs : %" PetscInt_FMT "\n" " DoFs per node : %" PetscInt_FMT "\n" diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c index 8964682aa8..535707f328 100644 --- a/examples/fluids/problems/advection.c +++ b/examples/fluids/problems/advection.c @@ -237,7 +237,7 @@ PetscErrorCode PRINT_ADVECTION(ProblemData *problem, AppCtx app_ctx) { " Problem:\n" " Problem Name : %s\n" " Stabilization : %s\n" - " Bubble Type : %s (%dD)\n" + " Bubble Type : %s (%" CeedInt_FMT "D)\n" " Bubble Continuity : %s\n" " Wind Type : %s\n", app_ctx->problem_name, StabilizationTypes[advection_ctx->stabilization], diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index d8d7292caa..56ce3085b6 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -37,7 +37,8 @@ static PetscErrorCode GetYNodeLocs(const MPI_Comm comm, ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr); ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr); if (ndims < dims[1]) SETERRQ(comm, -1, - "Line %d of %s does not contain enough columns (%d instead of %d)", i, + "Line %" PetscInt_FMT" of %s does not contain enough columns (%" + PetscInt_FMT" instead of %" PetscInt_FMT ")", i, path, ndims, dims[1]); node_locs[i] = (PetscReal) atof(array[0]); diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 4f2f5ce8a8..5d7f9841fa 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -41,8 +41,8 @@ PetscErrorCode CalcCholeskyDecomp(MPI_Comm comm, PetscInt nprofs, Cij[2][i] = sqrt(Rij[2][i] - pow(Cij[4][i], 2) - pow(Cij[5][i], 2)); if (isnan(Cij[0][i]) || isnan(Cij[1][i]) || isnan(Cij[2][i])) - SETERRQ(comm, -1, "Cholesky decomposition failed at profile point %d. " - "Either STGInflow has non-SPD matrix or contains nan.", i+1); + SETERRQ(comm, -1, "Cholesky decomposition failed at profile point %" + PetscInt_FMT ". Either STGInflow has non-SPD matrix or contains nan.", i+1); } PetscFunctionReturn(0); } @@ -78,7 +78,7 @@ static PetscErrorCode OpenPHASTADatFile(const MPI_Comm comm, ierr = PetscSynchronizedFGets(comm, *fp, char_array_len, line); CHKERRQ(ierr); ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr); if (ndims != 2) SETERRQ(comm, -1, - "Found %d dimensions instead of 2 on the first line of %s", + "Found %" PetscInt_FMT" dimensions instead of 2 on the first line of %s", ndims, path); for (PetscInt i=0; i STG_NMODES_MAX) - SETERRQ(comm, 1, "Number of wavemodes in %s (%d) exceeds STG_NMODES_MAX (%d). " + SETERRQ(comm, 1, "Number of wavemodes in %s (%" + PetscInt_FMT") exceeds STG_NMODES_MAX (%" PetscInt_FMT"). " "Change size of STG_NMODES_MAX and recompile", stg_rand_path, nmodes, STG_NMODES_MAX); diff --git a/examples/petsc/area.c b/examples/petsc/area.c index 35828cbeb4..17af12e8a7 100644 --- a/examples/petsc/area.c +++ b/examples/petsc/area.c @@ -188,8 +188,8 @@ int main(int argc, char **argv) { " libCEED Backend : %s\n" " libCEED Backend MemType : %s\n" " Mesh:\n" - " Number of 1D Basis Nodes (p) : %d\n" - " Number of 1D Quadrature Points (q) : %d\n" + " Number of 1D Basis Nodes (p) : %" CeedInt_FMT "\n" + " Number of 1D Quadrature Points (q) : %" CeedInt_FMT "\n" " Global nodes : %" PetscInt_FMT "\n" " DoF per node : %" PetscInt_FMT "\n" " Global DoFs : %" PetscInt_FMT "\n", diff --git a/examples/petsc/bps.c b/examples/petsc/bps.c index 3d3c66da91..ad834e9eb8 100644 --- a/examples/petsc/bps.c +++ b/examples/petsc/bps.c @@ -170,7 +170,7 @@ static PetscErrorCode RunWithDM(RunParams rp, DM dm, PetscMPIInt comm_size; ierr = MPI_Comm_size(rp->comm, &comm_size); CHKERRQ(ierr); ierr = PetscPrintf(rp->comm, - "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" + "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n" " MPI:\n" " Hostname : %s\n" " Total ranks : %d\n" @@ -181,8 +181,8 @@ static PetscErrorCode RunWithDM(RunParams rp, DM dm, " libCEED Backend : %s\n" " libCEED Backend MemType : %s\n" " Mesh:\n" - " Number of 1D Basis Nodes (P) : %d\n" - " Number of 1D Quadrature Points (Q) : %d\n" + " Number of 1D Basis Nodes (P) : %" CeedInt_FMT "\n" + " Number of 1D Quadrature Points (Q) : %" CeedInt_FMT "\n" " Global nodes : %" PetscInt_FMT "\n" " Local Elements : %" PetscInt_FMT "\n" " Owned nodes : %" PetscInt_FMT "\n" diff --git a/examples/petsc/bpsraw.c b/examples/petsc/bpsraw.c index 166516f8ba..ae5bb7c5bd 100644 --- a/examples/petsc/bpsraw.c +++ b/examples/petsc/bpsraw.c @@ -546,15 +546,15 @@ int main(int argc, char **argv) { ierr = VecGetType(X, &vec_type); CHKERRQ(ierr); ierr = PetscPrintf(comm, - "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" + "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc --\n" " PETSc:\n" " PETSc Vec Type : %s\n" " libCEED:\n" " libCEED Backend : %s\n" " libCEED Backend MemType : %s\n" " Mesh:\n" - " Number of 1D Basis Nodes (P) : %d\n" - " Number of 1D Quadrature Points (Q) : %d\n" + " Number of 1D Basis Nodes (P) : %" CeedInt_FMT "\n" + " Number of 1D Quadrature Points (Q) : %" CeedInt_FMT "\n" " Global nodes : %" PetscInt_FMT "\n" " Process Decomposition : %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n" diff --git a/examples/petsc/bpssphere.c b/examples/petsc/bpssphere.c index 79a4019261..1f4596cf4a 100644 --- a/examples/petsc/bpssphere.c +++ b/examples/petsc/bpssphere.c @@ -193,13 +193,14 @@ int main(int argc, char **argv) { const char *used_resource; CeedGetResource(ceed, &used_resource); ierr = PetscPrintf(comm, - "\n-- CEED Benchmark Problem %d on the Sphere -- libCEED + PETSc --\n" + "\n-- CEED Benchmark Problem %" CeedInt_FMT + " on the Sphere -- libCEED + PETSc --\n" " libCEED:\n" " libCEED Backend : %s\n" " libCEED Backend MemType : %s\n" " Mesh:\n" - " Number of 1D Basis Nodes (p) : %d\n" - " Number of 1D Quadrature Points (q) : %d\n" + " Number of 1D Basis Nodes (p) : %" CeedInt_FMT "\n" + " Number of 1D Quadrature Points (q) : %" CeedInt_FMT "\n" " Global nodes : %" PetscInt_FMT "\n", bp_choice+1, ceed_resource, CeedMemTypes[mem_type_backend], P, Q, g_size/num_comp_u); CHKERRQ(ierr); diff --git a/examples/petsc/multigrid.c b/examples/petsc/multigrid.c index c32fd93524..56ba5de8c3 100644 --- a/examples/petsc/multigrid.c +++ b/examples/petsc/multigrid.c @@ -264,20 +264,20 @@ int main(int argc, char **argv) { ierr = VecGetType(X[0], &vec_type); CHKERRQ(ierr); ierr = PetscPrintf(comm, - "\n-- CEED Benchmark Problem %d -- libCEED + PETSc + PCMG --\n" + "\n-- CEED Benchmark Problem %" CeedInt_FMT " -- libCEED + PETSc + PCMG --\n" " PETSc:\n" " PETSc Vec Type : %s\n" " libCEED:\n" " libCEED Backend : %s\n" " libCEED Backend MemType : %s\n" " Mesh:\n" - " Number of 1D Basis Nodes (p) : %d\n" - " Number of 1D Quadrature Points (q) : %d\n" + " Number of 1D Basis Nodes (p) : %" CeedInt_FMT "\n" + " Number of 1D Quadrature Points (q) : %" CeedInt_FMT "\n" " Global Nodes : %" PetscInt_FMT "\n" " Owned Nodes : %" PetscInt_FMT "\n" " DoF per node : %" PetscInt_FMT "\n" " Multigrid:\n" - " Number of Levels : %d\n", + " Number of Levels : %" CeedInt_FMT "\n", bp_choice+1, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, g_size[fine_level]/num_comp_u, l_size[fine_level]/num_comp_u, @@ -293,11 +293,11 @@ int main(int argc, char **argv) { // Set up libCEED operators on each level ierr = PetscMalloc1(num_levels, &ceed_data); CHKERRQ(ierr); - for (int i=0; iname_for_disp, forcing_types_for_disp[app_ctx->forcing_choice], app_ctx->mesh_file[0] ? app_ctx->mesh_file : "Box Mesh", @@ -413,7 +413,7 @@ int main(int argc, char **argv) { CeedInt level = i ? fine_level : 0; ierr = PetscPrintf(comm, " Level %" PetscInt_FMT " (%s):\n" - " Number of 1D Basis Nodes (p) : %d\n" + " Number of 1D Basis Nodes (p) : %" CeedInt_FMT "\n" " Global Nodes : %" PetscInt_FMT "\n" " Owned Nodes : %" PetscInt_FMT "\n", level, i ? "fine" : "coarse", @@ -690,7 +690,7 @@ int main(int argc, char **argv) { for (increment = 1; increment <= app_ctx->num_increments; increment++) { // -- Log increment count if (snes_monitor) { - ierr = PetscPrintf(comm, "%d Load Increment\n", increment - 1); + ierr = PetscPrintf(comm, "%" PetscInt_FMT " Load Increment\n", increment - 1); CHKERRQ(ierr); } @@ -761,8 +761,8 @@ int main(int argc, char **argv) { " SNES:\n" " SNES Type : %s\n" " SNES Convergence : %s\n" - " Number of Load Increments : %d\n" - " Completed Load Increments : %d\n" + " Number of Load Increments : %" PetscInt_FMT "\n" + " Completed Load Increments : %" PetscInt_FMT "\n" " Total SNES Iterations : %" PetscInt_FMT "\n" " Final rnorm : %e\n", snes_type, SNESConvergedReasons[reason], diff --git a/examples/solids/src/cl-options.c b/examples/solids/src/cl-options.c index 3e5a0df1ed..c4285e8af8 100644 --- a/examples/solids/src/cl-options.c +++ b/examples/solids/src/cl-options.c @@ -102,7 +102,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) { for (PetscInt j = 0; j < nclamp_params; j++) app_ctx->bc_clamp_max[i][j] = 0.; - snprintf(option_name, sizeof option_name, "-bc_clamp_%d_translate", + snprintf(option_name, sizeof option_name, + "-bc_clamp_%" PetscInt_FMT "_translate", app_ctx->bc_clamp_faces[i]); max_n = 3; ierr = PetscOptionsScalarArray(option_name, @@ -112,7 +113,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) { // Rotation vector max_n = 5; - snprintf(option_name, sizeof option_name, "-bc_clamp_%d_rotate", + snprintf(option_name, sizeof option_name, "-bc_clamp_%" PetscInt_FMT "_rotate", app_ctx->bc_clamp_faces[i]); ierr = PetscOptionsScalarArray(option_name, "Vector with axis of rotation and rotation, in radians", @@ -142,7 +143,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) { for (PetscInt j = 0; j < 3; j++) app_ctx->bc_traction_vector[i][j] = 0.; - snprintf(option_name, sizeof option_name, "-bc_traction_%d", + snprintf(option_name, sizeof option_name, "-bc_traction_%" PetscInt_FMT, app_ctx->bc_traction_faces[i]); max_n = 3; PetscBool set = false; diff --git a/examples/solids/src/setup-dm.c b/examples/solids/src/setup-dm.c index 3041045130..8e805c4fb6 100644 --- a/examples/solids/src/setup-dm.c +++ b/examples/solids/src/setup-dm.c @@ -132,7 +132,8 @@ PetscErrorCode SetupDMByDegree(DM dm, AppCtx app_ctx, PetscInt order, // -- Clamp BCs for (PetscInt i = 0; i < app_ctx->bc_clamp_count; i++) { char bcName[25]; - snprintf(bcName, sizeof bcName, "clamp_%d", app_ctx->bc_clamp_faces[i]); + snprintf(bcName, sizeof bcName, "clamp_%" PetscInt_FMT, + app_ctx->bc_clamp_faces[i]); ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, bcName, label, 1, &app_ctx->bc_clamp_faces[i], 0, 0, NULL, (void(*)(void))BCClamp, NULL, diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index e8ad57f5a3..59e67b457b 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -134,6 +134,7 @@ /// Integer type, used for indexing /// @ingroup Ceed typedef int32_t CeedInt; +#define CeedInt_FMT "d" /// Integer type, used array sizes /// @ingroup Ceed diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index f87ccfde4e..7fa279d247 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -160,7 +160,7 @@ static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) { for (CeedInt i=0; i 1) - fprintf(stream, "%12s[%d]:", name, i); + fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i); else fprintf(stream, "%12s:", name); for (CeedInt j=0; jtopo; // Print FE space and element topology of the basis if (basis->tensor_basis) { - fprintf(stream, "CeedBasis (%s on a %s element): dim=%d P=%d Q=%d\n", + fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" + CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space], CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d); } else { - fprintf(stream, "CeedBasis (%s on a %s element): dim=%d P=%d Q=%d\n", + fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" + CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space], CeedElemTopologies[topo], basis->dim, basis->P, basis->Q); } @@ -1594,7 +1596,7 @@ int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, if (Q < 2) // LCOV_EXCL_START return CeedError(NULL, CEED_ERROR_DIMENSION, - "Cannot create Lobatto quadrature with Q=%d < 2 points", Q); + "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q); // LCOV_EXCL_STOP wi = 2.0/((CeedScalar)(Q*(Q-1))); if (q_weight_1d) { diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c index 3e2c3b0e78..3fa2643b6f 100644 --- a/interface/ceed-elemrestriction.c +++ b/interface/ceed-elemrestriction.c @@ -854,14 +854,14 @@ int CeedElemRestrictionApply(CeedElemRestriction rstr, CeedTransposeMode t_mode, if (n != u->length) // LCOV_EXCL_START return CeedError(rstr->ceed, CEED_ERROR_DIMENSION, - "Input vector size %d not compatible with " - "element restriction (%d, %d)", u->length, m, n); + "Input vector size %" CeedInt_FMT " not compatible with " + "element restriction (%" CeedInt_FMT ", %" CeedInt_FMT ")", u->length, m, n); // LCOV_EXCL_STOP if (m != ru->length) // LCOV_EXCL_START return CeedError(rstr->ceed, CEED_ERROR_DIMENSION, - "Output vector size %d not compatible with " - "element restriction (%d, %d)", ru->length, m, n); + "Output vector size %" CeedInt_FMT " not compatible with " + "element restriction (%" CeedInt_FMT ", %" CeedInt_FMT ")", ru->length, m, n); // LCOV_EXCL_STOP if (rstr->num_elem > 0) { ierr = rstr->Apply(rstr, t_mode, u, ru, request); CeedChk(ierr); @@ -903,20 +903,20 @@ int CeedElemRestrictionApplyBlock(CeedElemRestriction rstr, CeedInt block, if (n != u->length) // LCOV_EXCL_START return CeedError(rstr->ceed, CEED_ERROR_DIMENSION, - "Input vector size %d not compatible with " - "element restriction (%d, %d)", u->length, m, n); + "Input vector size %" CeedInt_FMT " not compatible with " + "element restriction (%" CeedInt_FMT ", %" CeedInt_FMT ")", u->length, m, n); // LCOV_EXCL_STOP if (m != ru->length) // LCOV_EXCL_START return CeedError(rstr->ceed, CEED_ERROR_DIMENSION, - "Output vector size %d not compatible with " - "element restriction (%d, %d)", ru->length, m, n); + "Output vector size %" CeedInt_FMT " not compatible with " + "element restriction (%" CeedInt_FMT ", %" CeedInt_FMT ")", ru->length, m, n); // LCOV_EXCL_STOP if (rstr->blk_size*block > rstr->num_elem) // LCOV_EXCL_START return CeedError(rstr->ceed, CEED_ERROR_DIMENSION, - "Cannot retrieve block %d, element %d > " - "total elements %d", block, rstr->blk_size*block, + "Cannot retrieve block %" CeedInt_FMT ", element %" CeedInt_FMT " > " + "total elements %" CeedInt_FMT "", block, rstr->blk_size*block, rstr->num_elem); // LCOV_EXCL_STOP ierr = rstr->ApplyBlock(rstr, block, t_mode, u, ru, request); @@ -1096,12 +1096,14 @@ int CeedElemRestrictionGetMultiplicity(CeedElemRestriction rstr, int CeedElemRestrictionView(CeedElemRestriction rstr, FILE *stream) { char stridesstr[500]; if (rstr->strides) - sprintf(stridesstr, "[%d, %d, %d]", rstr->strides[0], rstr->strides[1], + sprintf(stridesstr, "[%" CeedInt_FMT ", %" CeedInt_FMT ", %" CeedInt_FMT "]", + rstr->strides[0], rstr->strides[1], rstr->strides[2]); else - sprintf(stridesstr, "%d", rstr->comp_stride); + sprintf(stridesstr, "%" CeedInt_FMT, rstr->comp_stride); - fprintf(stream, "%sCeedElemRestriction from (%td, %d) to %d elements with %d " + fprintf(stream, "%sCeedElemRestriction from (%td, %" CeedInt_FMT ") to %" + CeedInt_FMT " elements with %" CeedInt_FMT " " "nodes each and %s %s\n", rstr->blk_size > 1 ? "Blocked " : "", rstr->l_size, rstr->num_comp, rstr->num_elem, rstr->elem_size, rstr->strides ? "strides" : "component stride", stridesstr); diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index c3fe9000f5..420e7d29cf 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -71,8 +71,8 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, if (r != CEED_ELEMRESTRICTION_NONE && restr_num_comp != num_comp) { // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, - "Field '%s' of size %d and EvalMode %s: ElemRestriction " - "has %d components, but Basis has %d components", + "Field '%s' of size %" CeedInt_FMT " and EvalMode %s: ElemRestriction " + "has %" CeedInt_FMT " components, but Basis has %" CeedInt_FMT " components", qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], restr_num_comp, num_comp); @@ -85,7 +85,8 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, if (size != restr_num_comp) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, - "Field '%s' of size %d and EvalMode %s: ElemRestriction has %d components", + "Field '%s' of size %" CeedInt_FMT " and EvalMode %s: ElemRestriction has " + CeedInt_FMT " components", qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], restr_num_comp); // LCOV_EXCL_STOP @@ -94,7 +95,9 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, if (size != num_comp) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, - "Field '%s' of size %d and EvalMode %s: ElemRestriction/Basis has %d components", + "Field '%s' of size %" CeedInt_FMT + " and EvalMode %s: ElemRestriction/Basis has " + CeedInt_FMT " components", qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], num_comp); // LCOV_EXCL_STOP @@ -103,8 +106,9 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, if (size != num_comp * dim) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, - "Field '%s' of size %d and EvalMode %s in %d dimensions: " - "ElemRestriction/Basis has %d components", + "Field '%s' of size %" CeedInt_FMT " and EvalMode %s in %" CeedInt_FMT + " dimensions: " + "ElemRestriction/Basis has %" CeedInt_FMT " components", qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], dim, num_comp); // LCOV_EXCL_STOP @@ -143,11 +147,11 @@ static int CeedOperatorFieldView(CeedOperatorField field, const char *pre = sub ? " " : ""; const char *in_out = input ? "Input" : "Output"; - fprintf(stream, "%s %s field %d:\n" + fprintf(stream, "%s %s field %" CeedInt_FMT ":\n" "%s Name: \"%s\"\n", pre, in_out, field_number, pre, qf_field->field_name); - fprintf(stream, "%s Size: %d\n", pre, qf_field->size); + fprintf(stream, "%s Size: %" CeedInt_FMT "\n", pre, qf_field->size); fprintf(stream, "%s EvalMode: %s\n", pre, CeedEvalModes[qf_field->eval_mode]); @@ -184,20 +188,23 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) { CeedInt total_fields = 0; ierr = CeedOperatorGetNumArgs(op, &total_fields); CeedChk(ierr); - fprintf(stream, "%s %d elements with %d quadrature points each\n", + fprintf(stream, "%s %" CeedInt_FMT " elements with %" CeedInt_FMT + " quadrature points each\n", pre, num_elem, num_qpts); - fprintf(stream, "%s %d field%s\n", pre, total_fields, + fprintf(stream, "%s %" CeedInt_FMT " field%s\n", pre, total_fields, total_fields>1 ? "s" : ""); - fprintf(stream, "%s %d input field%s:\n", pre, op->qf->num_input_fields, + fprintf(stream, "%s %" CeedInt_FMT " input field%s:\n", pre, + op->qf->num_input_fields, op->qf->num_input_fields>1 ? "s" : ""); for (CeedInt i=0; iqf->num_input_fields; i++) { ierr = CeedOperatorFieldView(op->input_fields[i], op->qf->input_fields[i], i, sub, 1, stream); CeedChk(ierr); } - fprintf(stream, "%s %d output field%s:\n", pre, op->qf->num_output_fields, + fprintf(stream, "%s %" CeedInt_FMT " output field%s:\n", pre, + op->qf->num_output_fields, op->qf->num_output_fields>1 ? "s" : ""); for (CeedInt i=0; iqf->num_output_fields; i++) { ierr = CeedOperatorFieldView(op->output_fields[i], op->qf->output_fields[i], @@ -720,8 +727,8 @@ int CeedOperatorSetField(CeedOperator op, const char *field_name, op->num_elem != num_elem) // LCOV_EXCL_START return CeedError(op->ceed, CEED_ERROR_DIMENSION, - "ElemRestriction with %d elements incompatible with prior " - "%d elements", num_elem, op->num_elem); + "ElemRestriction with %" CeedInt_FMT " elements incompatible with prior " + CeedInt_FMT " elements", num_elem, op->num_elem); // LCOV_EXCL_STOP CeedInt num_qpts = 0; @@ -730,8 +737,8 @@ int CeedOperatorSetField(CeedOperator op, const char *field_name, if (op->num_qpts && op->num_qpts != num_qpts) // LCOV_EXCL_START return CeedError(op->ceed, CEED_ERROR_DIMENSION, - "Basis with %d quadrature points " - "incompatible with prior %d points", num_qpts, + "Basis with %" CeedInt_FMT " quadrature points " + "incompatible with prior %" CeedInt_FMT " points", num_qpts, op->num_qpts); // LCOV_EXCL_STOP } @@ -1221,7 +1228,7 @@ int CeedOperatorView(CeedOperator op, FILE *stream) { for (CeedInt i=0; inum_suboperators; i++) { has_name = op->sub_operators[i]->name; - fprintf(stream, " SubOperator %d%s%s:\n", i, + fprintf(stream, " SubOperator %" CeedInt_FMT "%s%s:\n", i, has_name ? " - " : "", has_name ? op->sub_operators[i]->name : ""); ierr = CeedOperatorSingleView(op->sub_operators[i], 1, stream); diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c index 5208e0357b..611780250a 100644 --- a/interface/ceed-qfunction.c +++ b/interface/ceed-qfunction.c @@ -143,9 +143,9 @@ static int CeedQFunctionFieldView(CeedQFunctionField field, ierr = CeedQFunctionFieldGetSize(field, &size); CeedChk(ierr); CeedEvalMode eval_mode; ierr = CeedQFunctionFieldGetEvalMode(field, &eval_mode); CeedChk(ierr); - fprintf(stream, " %s field %d:\n" + fprintf(stream, " %s field %" CeedInt_FMT ":\n" " Name: \"%s\"\n" - " Size: %d\n" + " Size: %" CeedInt_FMT "\n" " EvalMode: \"%s\"\n", inout, field_number, field_name, size, CeedEvalModes[eval_mode]); return CEED_ERROR_SUCCESS; @@ -974,14 +974,14 @@ int CeedQFunctionView(CeedQFunction qf, FILE *stream) { qf->is_gallery ? "Gallery " : "User ", qf->is_gallery ? qf->gallery_name : qf->kernel_name); - fprintf(stream, " %d input field%s:\n", qf->num_input_fields, + fprintf(stream, " %" CeedInt_FMT " input field%s:\n", qf->num_input_fields, qf->num_input_fields>1 ? "s" : ""); for (CeedInt i=0; inum_input_fields; i++) { ierr = CeedQFunctionFieldView(qf->input_fields[i], i, 1, stream); CeedChk(ierr); } - fprintf(stream, " %d output field%s:\n", qf->num_output_fields, + fprintf(stream, " %" CeedInt_FMT " output field%s:\n", qf->num_output_fields, qf->num_output_fields>1 ? "s" : ""); for (CeedInt i=0; inum_output_fields; i++) { ierr = CeedQFunctionFieldView(qf->output_fields[i], i, 0, stream); @@ -1031,8 +1031,8 @@ int CeedQFunctionApply(CeedQFunction qf, CeedInt Q, if (Q % qf->vec_length) // LCOV_EXCL_START return CeedError(qf->ceed, CEED_ERROR_DIMENSION, - "Number of quadrature points %d must be a " - "multiple of %d", Q, qf->vec_length); + "Number of quadrature points %" CeedInt_FMT " must be a " + "multiple of %" CeedInt_FMT, Q, qf->vec_length); // LCOV_EXCL_STOP qf->is_immutable = true; ierr = qf->Apply(qf, Q, u, v); CeedChk(ierr); diff --git a/interface/ceed.c b/interface/ceed.c index 5aecdbca5c..081d4ad43f 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -1217,7 +1217,7 @@ int CeedErrorStore(Ceed ceed, const char *filename, int line_no, err_code, format, args); // Build message - CeedInt len; + int len; len = snprintf(ceed->err_msg, CEED_MAX_RESOURCE_LEN, "%s:%d in %s(): ", filename, line_no, func); // Using pointer to va_list for better FFI, but clang-tidy can't verify va_list is initalized From 13f886e92f3445bcbdaccd4d0093947b9a882fe5 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 28 Jun 2022 13:41:47 -0600 Subject: [PATCH 106/172] debug - consistent use of CeedDebug vs CeedDebug256 --- backends/magma/ceed-magma-basis.c | 12 ++++++------ interface/ceed-jit-tools.c | 16 ++++++++-------- interface/ceed-preconditioning.c | 10 +++++----- interface/ceed.c | 6 +++--- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/backends/magma/ceed-magma-basis.c b/backends/magma/ceed-magma-basis.c index 64bba6237c..db6b416aed 100644 --- a/backends/magma/ceed-magma-basis.c +++ b/backends/magma/ceed-magma-basis.c @@ -54,8 +54,8 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr); ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr); - CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%" CeedInt_FMT - ", comp = %" CeedInt_FMT, ncomp*CeedIntPow(P1d, dim), ncomp); + CeedDebug256(ceed, 4, "[CeedBasisApply_Magma] vsize=%" CeedInt_FMT + ", comp = %" CeedInt_FMT, ncomp*CeedIntPow(P1d, dim), ncomp); if (tmode == CEED_TRANSPOSE) { CeedSize length; @@ -335,8 +335,8 @@ int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem, CeedBasisNonTensor_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); - CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%" CeedInt_FMT - ", comp = %" CeedInt_FMT, ncomp*ndof, ncomp); + CeedDebug256(ceed, 4, "[CeedBasisApplyNonTensor_Magma] vsize=%" CeedInt_FMT + ", comp = %" CeedInt_FMT, ncomp*ndof, ncomp); if (tmode == CEED_TRANSPOSE) { CeedSize length; @@ -460,8 +460,8 @@ int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem, CeedBasisNonTensor_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); - CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%" CeedInt_FMT - ", comp = %" CeedInt_FMT, ncomp*ndof, ncomp); + CeedDebug256(ceed, 4, "[CeedBasisApplyNonTensor_Magma] vsize=%" CeedInt_FMT + ", comp = %" CeedInt_FMT, ncomp*ndof, ncomp); if (tmode == CEED_TRANSPOSE) { CeedSize length; diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c index 3b9ec0899d..3de7d75a41 100644 --- a/interface/ceed-jit-tools.c +++ b/interface/ceed-jit-tools.c @@ -43,7 +43,7 @@ int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) { // Debug CeedDebug256(ceed, 1, "Checking for source file: "); - CeedDebug256(ceed, 255, "%s\n", source_file_path_only); + CeedDebug(ceed, "%s\n", source_file_path_only); // Check for valid file path FILE *source_file; @@ -53,7 +53,7 @@ int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) { if (*is_valid) { // Debug CeedDebug256(ceed, 1, "Found JiT source file: "); - CeedDebug256(ceed, 255, "%s\n", source_file_path_only); + CeedDebug(ceed, "%s\n", source_file_path_only); fclose(source_file); } @@ -88,9 +88,9 @@ int CeedLoadSourceToInitializedBuffer(Ceed ceed, // Debug CeedDebug256(ceed, 1, "---------- Ceed JiT ----------\n"); CeedDebug256(ceed, 1, "Current source file: "); - CeedDebug256(ceed, 255, "%s\n", source_file_path); + CeedDebug(ceed, "%s\n", source_file_path); CeedDebug256(ceed, 1, "Current buffer:\n"); - CeedDebug256(ceed, 255, "%s\n", *buffer); + CeedDebug(ceed, "%s\n", *buffer); // Read file to temporary buffer source_file = fopen(source_file_path, "rb"); @@ -178,9 +178,9 @@ int CeedLoadSourceToInitializedBuffer(Ceed ceed, // Debug CeedDebug256(ceed, 1, "---------- Ceed JiT ----------\n"); CeedDebug256(ceed, 1, "Current source file: "); - CeedDebug256(ceed, 255, "%s\n", source_file_path); + CeedDebug(ceed, "%s\n", source_file_path); CeedDebug256(ceed, 1, "Final buffer:\n"); - CeedDebug256(ceed, 255, "%s\n", *buffer); + CeedDebug(ceed, "%s\n", *buffer); return CEED_ERROR_SUCCESS; } @@ -284,7 +284,7 @@ int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path, // Debug CeedDebug256(ceed, 1, "---------- Ceed JiT ----------\n"); CeedDebug256(ceed, 1, "Relative JiT source file: "); - CeedDebug256(ceed, 255, "%s\n", relative_file_path); + CeedDebug(ceed, "%s\n", relative_file_path); ierr = CeedGetParent(ceed, &ceed_parent); CeedChk(ierr); @@ -293,7 +293,7 @@ int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path, // Debug CeedDebug256(ceed, 1, "Checking JiT root: "); - CeedDebug256(ceed, 255, "%s\n", ceed_parent->jit_source_roots[i]); + CeedDebug(ceed, "%s\n", ceed_parent->jit_source_roots[i]); // Build and check absolute path with current root ierr = CeedPathConcatenate(ceed, ceed_parent->jit_source_roots[i], diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index f9f679ef4b..33143da699 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -43,7 +43,7 @@ static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, if (!qf) return CEED_ERROR_SUCCESS; CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n"); - CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n"); + CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n"); char *source_path_with_name = ""; if (qf->source_path) { @@ -104,7 +104,7 @@ static int CeedOperatorCreateFallback(CeedOperator op) { if (!ceed_fallback) return CEED_ERROR_SUCCESS; CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); - CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n"); + CeedDebug(op->ceed, "Creating fallback CeedOperator\n"); // Clone Op CeedOperator op_fallback; @@ -188,9 +188,9 @@ int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) { ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr); CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n"); - CeedDebug256(op->ceed, 255, - "Falling back from %s operator at address %ld to %s operator at address %ld\n", - resource, op, resource_fallback, op->op_fallback); + CeedDebug(op->ceed, + "Falling back from %s operator at address %ld to %s operator at address %ld\n", + resource, op, resource_fallback, op->op_fallback); } } *op_fallback = op->op_fallback; diff --git a/interface/ceed.c b/interface/ceed.c index 081d4ad43f..97721b96bc 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -507,13 +507,13 @@ int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed) { if (ceed->has_valid_op_fallback_resource) { CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n"); - CeedDebug256(ceed, 255, "Getting fallback from %s to %s\n", ceed->resource, - ceed->op_fallback_resource); + CeedDebug(ceed, "Getting fallback from %s to %s\n", ceed->resource, + ceed->op_fallback_resource); } // Create fallback Ceed if uninitalized if (!ceed->op_fallback_ceed && ceed->has_valid_op_fallback_resource) { - CeedDebug256(ceed, 255, "Creating fallback Ceed"); + CeedDebug(ceed, "Creating fallback Ceed"); Ceed fallback_ceed; const char *fallback_resource; From 600b7929b98f3d8efad5f619bace308a359a46af Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 28 Jun 2022 14:21:36 -0600 Subject: [PATCH 107/172] test - update tests to use CeedInt_FMT --- tests/t001-ceed.c | 3 ++- tests/t002-ceed.c | 2 +- tests/t005-ceed.c | 2 +- tests/t009-ceed.c | 2 +- tests/t100-vector.c | 3 ++- tests/t101-vector.c | 6 +++--- tests/t103-vector.c | 3 ++- tests/t104-vector.c | 2 +- tests/t105-vector.c | 3 ++- tests/t106-vector.c | 3 ++- tests/t109-vector.c | 8 ++++---- tests/t114-vector.c | 3 ++- tests/t119-vector.c | 3 ++- tests/t120-vector.c | 2 +- tests/t121-vector.c | 4 ++-- tests/t122-vector.c | 12 ++++++++---- tests/t123-vector.c | 4 ++-- tests/t124-vector.c | 2 +- tests/t200-elemrestriction.c | 2 +- tests/t201-elemrestriction.c | 3 ++- tests/t202-elemrestriction.c | 5 +++-- tests/t203-elemrestriction.c | 6 ++++-- tests/t204-elemrestriction.c | 3 ++- tests/t205-elemrestriction.c | 3 ++- tests/t206-elemrestriction.c | 4 ++-- tests/t207-elemrestriction.c | 4 ++-- tests/t208-elemrestriction.c | 5 +++-- tests/t209-elemrestriction.c | 3 ++- tests/t213-elemrestriction.c | 6 ++++-- tests/t216-elemrestriction.c | 2 +- tests/t220-elemrestriction.c | 4 ++-- tests/t300-basis.c | 2 +- tests/t301-basis.c | 4 ++-- tests/t304-basis.c | 3 ++- tests/t305-basis.c | 6 ++++-- tests/t306-basis.c | 4 ++-- tests/t307-basis.c | 2 +- tests/t310-basis.c | 2 +- tests/t311-basis.c | 2 +- tests/t313-basis.c | 2 +- tests/t314-basis.c | 2 +- tests/t315-basis.c | 2 +- tests/t316-basis.c | 2 +- tests/t317-basis.c | 2 +- tests/t318-basis.c | 2 +- tests/t321-basis.c | 2 +- tests/t323-basis.c | 4 ++-- tests/t324-basis.c | 2 +- tests/t325-basis.c | 2 +- tests/t400-qfunction.c | 2 +- tests/t401-qfunction.c | 2 +- tests/t403-qfunction.c | 4 ++-- tests/t405-qfunction.c | 2 +- tests/t406-qfunction.c | 2 +- tests/t407-qfunction.c | 8 +++++--- tests/t409-qfunction.c | 2 +- tests/t410-qfunction.c | 2 +- tests/t411-qfunction.c | 2 +- tests/t412-qfunction.c | 2 +- tests/t414-qfunction.c | 6 +++--- tests/t415-qfunction.c | 9 +++++---- tests/t500-operator.c | 2 +- tests/t508-operator.c | 2 +- tests/t509-operator.c | 2 +- tests/t510-operator.c | 2 +- tests/t520-operator.c | 2 +- tests/t525-operator.c | 11 ++++++----- tests/t530-operator.c | 2 +- tests/t531-operator.c | 2 +- tests/t533-operator.c | 3 ++- tests/t534-operator.c | 3 ++- tests/t535-operator.c | 3 ++- tests/t536-operator.c | 3 ++- tests/t537-operator.c | 3 ++- tests/t538-operator.c | 3 ++- tests/t540-operator.c | 3 ++- tests/t541-operator.c | 3 ++- tests/t560-operator.c | 4 ++-- tests/t561-operator.c | 4 ++-- tests/t562-operator.c | 4 ++-- tests/t563-operator.c | 4 ++-- tests/t564-operator.c | 4 ++-- tests/t565-operator.c | 4 ++-- tests/t566-operator.c | 3 ++- tests/t567-operator.c | 3 ++- tests/t568-operator.c | 3 ++- 86 files changed, 164 insertions(+), 125 deletions(-) diff --git a/tests/t001-ceed.c b/tests/t001-ceed.c index bcc2f08954..75f72bdb85 100644 --- a/tests/t001-ceed.c +++ b/tests/t001-ceed.c @@ -12,7 +12,8 @@ int main(int argc, char **argv) { CeedGetPreferredMemType(ceed, (CeedMemType *)&type); if (type == -1) // LCOV_EXCL_START - printf("Error getting preferred memory type. %d \n", type); + printf("Error getting preferred memory type. %" CeedInt_FMT + "\n", type); // LCOV_EXCL_STOP CeedDestroy(&ceed); diff --git a/tests/t002-ceed.c b/tests/t002-ceed.c index 8be6c4952e..ef9f5b1239 100644 --- a/tests/t002-ceed.c +++ b/tests/t002-ceed.c @@ -24,7 +24,7 @@ int main(int argc, char **argv) { if (!is_exact_match && !is_match_with_query_arguments) { // LCOV_EXCL_START - return CeedError(ceed, 1, "Incorrect full resource name: %s != %s", + return CeedError(ceed, 1, "Incorrect full resource name: %s != %s\n", resource, backend); // LCOV_EXCL_STOP } diff --git a/tests/t005-ceed.c b/tests/t005-ceed.c index 0bc2fd641d..770492cb58 100644 --- a/tests/t005-ceed.c +++ b/tests/t005-ceed.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) { // Check error message CeedGetErrorMessage(ceed, &err_msg); - if (!err_msg || !strcmp(err_msg, "No error message stored")) + if (!err_msg || !strcmp(err_msg, "No error message stored\n")) // LCOV_EXCL_START printf("Unexpected error message received: \"%s\"\n", err_msg); // LCOV_EXCL_STOP diff --git a/tests/t009-ceed.c b/tests/t009-ceed.c index f406c28b70..6547087a3f 100644 --- a/tests/t009-ceed.c +++ b/tests/t009-ceed.c @@ -12,7 +12,7 @@ int main(int argc, char **argv) { CeedReferenceCopy(ceed, &ceed_2); // This destroys the previous ceed_2 if (ceed != ceed_2) // LCOV_EXCL_START - printf("Error copying Ceed reference."); + printf("Error copying Ceed reference\n"); // LCOV_EXCL_STOP CeedDestroy(&ceed); diff --git a/tests/t100-vector.c b/tests/t100-vector.c index d3e2dbbad1..79d8c66996 100644 --- a/tests/t100-vector.c +++ b/tests/t100-vector.c @@ -22,7 +22,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 10.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error taking array c[3] = %f", (CeedScalar)c[3]); + printf("Error taking array c[3] = %f\n", (CeedScalar)c[3]); // LCOV_EXCL_STOP // Getting array should not modify a @@ -34,7 +34,7 @@ int main(int argc, char **argv) { if (fabs(a[5] + 3.14) < 10.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error protecting array a[3] = %f", (CeedScalar)a[3]); + printf("Error protecting array a[3] = %f\n", (CeedScalar)a[3]); // LCOV_EXCL_STOP // Note: We do not need to free c because c == a was stack allocated. @@ -44,10 +44,10 @@ int main(int argc, char **argv) { CeedVectorCreate(ceed, 0, &x); CeedVectorSetArray(x, CEED_MEM_HOST, CEED_USE_POINTER, NULL); CeedVectorGetArrayRead(x, CEED_MEM_HOST, &d); - if (b) printf("CeedVectorGetArrayRead returned non-NULL for zero-sized Vector"); + if (b) printf("CeedVectorGetArrayRead returned non-NULL for zero-sized Vector\n"); CeedVectorRestoreArrayRead(x, &d); CeedVectorTakeArray(x, CEED_MEM_HOST, &c); - if (c) printf("CeedVectorTakeArray returned non-NULL for zero-sized Vector"); + if (c) printf("CeedVectorTakeArray returned non-NULL for zero-sized Vector\n"); CeedVectorDestroy(&x); CeedDestroy(&ceed); diff --git a/tests/t114-vector.c b/tests/t114-vector.c index e503b4b98b..42cfee7b5a 100644 --- a/tests/t114-vector.c +++ b/tests/t114-vector.c @@ -21,7 +21,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 10.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error reading array b[%d] = %f",i,(CeedScalar)b[i]); + printf("Error reading array b[%" CeedInt_FMT + "] = %f\n",i,(CeedScalar)b[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(x, &b); diff --git a/tests/t120-vector.c b/tests/t120-vector.c index 3a2e75f6e2..4c34cce11e 100644 --- a/tests/t120-vector.c +++ b/tests/t120-vector.c @@ -21,7 +21,7 @@ int main(int argc, char **argv) { CeedVectorGetLength(x_2, &len); // Second reference still valid if (len != n) // LCOV_EXCL_START - printf("Error copying CeedVector reference."); + printf("Error copying CeedVector reference\n"); // LCOV_EXCL_STOP CeedVectorDestroy(&x_2); diff --git a/tests/t121-vector.c b/tests/t121-vector.c index 03d3ac6b63..adc64155e9 100644 --- a/tests/t121-vector.c +++ b/tests/t121-vector.c @@ -33,8 +33,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-14) // LCOV_EXCL_START - printf("Error in alpha x + y, computed: %f actual: %f\n", b[i], - (10.0 + i)/2); + printf("Error in alpha x + y at index %" CeedInt_FMT + ", computed: %f actual: %f\n", i, b[i], (10.0 + i)/2); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(y, &b); diff --git a/tests/t122-vector.c b/tests/t122-vector.c index 09b1b2283d..168846ee6e 100644 --- a/tests/t122-vector.c +++ b/tests/t122-vector.c @@ -28,7 +28,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-14) // LCOV_EXCL_START - printf("Error in w = x .* y, computed: %f actual: %f\n", b[i], 1.0*i*i); + printf("Error in w = x .* y at index %" CeedInt_FMT + ", computed: %f actual: %f\n", i, b[i], 1.0*i*i); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(w, &b); @@ -38,7 +39,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-14) // LCOV_EXCL_START - printf("Error in w = w .* y, computed: %f actual: %f\n", b[i], 1.0*i*i*i); + printf("Error in w = w .* y at index %" CeedInt_FMT + ", computed: %f actual: %f\n", i, b[i], 1.0*i*i*i); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(w, &b); @@ -48,7 +50,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-14) // LCOV_EXCL_START - printf("Error in w = x .* w, computed: %f actual: %f\n", b[i], 1.0*i*i*i*i); + printf("Error in w = x .* w at index %" CeedInt_FMT + ", computed: %f actual: %f\n", i, b[i], 1.0*i*i*i*i); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(w, &b); @@ -64,7 +67,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-14) // LCOV_EXCL_START - printf("Error in y = y .* y, computed: %f actual: %f\n", b[i], 1.0*i*i); + printf("Error in y = y .* y at index %" CeedInt_FMT + ", computed: %f actual: %f\n", i, b[i], 1.0*i*i); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(y, &b); diff --git a/tests/t123-vector.c b/tests/t123-vector.c index 92d7866e27..386c921c61 100644 --- a/tests/t123-vector.c +++ b/tests/t123-vector.c @@ -31,8 +31,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-14) // LCOV_EXCL_START - printf("Error in alpha x, computed: %f actual: %f\n", b[i], - -(10.0 + i)/2); + printf("Error in alpha x at index %" CeedInt_FMT + ", computed: %f actual: %f\n", i, b[i], -(10.0 + i)/2); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(x, &b); diff --git a/tests/t124-vector.c b/tests/t124-vector.c index 25fdeb95ec..ad56135d7f 100644 --- a/tests/t124-vector.c +++ b/tests/t124-vector.c @@ -22,7 +22,7 @@ int main(int argc, char **argv) { for (CeedInt i = 0; i < n; i++) if (a[i] != (CeedScalar)(3*i)) // LCOV_EXCL_START - printf("Error writing array a[%d] = %f", i, a[i]); + printf("Error writing array a[%" CeedInt_FMT "] = %f\n", i, a[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(x, (const CeedScalar **)&a); diff --git a/tests/t200-elemrestriction.c b/tests/t200-elemrestriction.c index f527b19a77..9690bd6370 100644 --- a/tests/t200-elemrestriction.c +++ b/tests/t200-elemrestriction.c @@ -32,7 +32,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 0 && i < num_elem ? 2.0 : 1.0)) // LCOV_EXCL_START - printf("Error in restricted array x[%d] = %f\n", + printf("Error in restricted array x[%" CeedInt_FMT "] = %f\n", i, (double)xx[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(x, &xx); diff --git a/tests/t203-elemrestriction.c b/tests/t203-elemrestriction.c index de48c5890e..aa4d6153cf 100644 --- a/tests/t203-elemrestriction.c +++ b/tests/t203-elemrestriction.c @@ -50,7 +50,8 @@ int main(int argc, char **argv) { block*layout[2]*blk_size; if (yy[index] != a[ind[k*elem_size + i]+j*(num_elem+1)]) // LCOV_EXCL_START - printf("Error in restricted array y[%d][%d][%d] = %f\n", + printf("Error in restricted array y[%" CeedInt_FMT + "][%" CeedInt_FMT "][%" CeedInt_FMT "] = %f\n", i, j, k, (double)yy[index]); // LCOV_EXCL_STOP } @@ -64,7 +65,8 @@ int main(int argc, char **argv) { for (CeedInt j=0; j 0 && i < num_elem ? 2.0 : 1.0)) // LCOV_EXCL_START - printf("Error in restricted array x[%d][%d] = %f\n", + printf("Error in restricted array x[%" CeedInt_FMT + "][%" CeedInt_FMT "] = %f\n", j, i, (double)xx[i+j*(num_elem+1)]); // LCOV_EXCL_STOP } diff --git a/tests/t204-elemrestriction.c b/tests/t204-elemrestriction.c index ab37491408..c22eaffde4 100644 --- a/tests/t204-elemrestriction.c +++ b/tests/t204-elemrestriction.c @@ -44,7 +44,8 @@ int main(int argc, char **argv) { for (CeedInt k=0; k0&&i0&&i blk_size && i < num_elem ? 2.0 : 1.0)) // LCOV_EXCL_START - printf("Error in restricted array x[%d] = %f\n", + printf("Error in restricted array x[%" CeedInt_FMT "] = %f\n", i, (double)xx[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(x, &xx); diff --git a/tests/t209-elemrestriction.c b/tests/t209-elemrestriction.c index cf2bfe92ac..98dd980803 100644 --- a/tests/t209-elemrestriction.c +++ b/tests/t209-elemrestriction.c @@ -31,7 +31,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i<3*num_elem+1; i++) if ((1 + (i > 0 && i < 3*num_elem && (i%3==0) ? 1 : 0)) != mm[i]) // LCOV_EXCL_START - printf("Error in multiplicity vector: mult[%d] = %f\n", i, (CeedScalar)mm[i]); + printf("Error in multiplicity vector: mult[%" CeedInt_FMT + "] = %f\n", i, (CeedScalar)mm[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(mult, &mm); diff --git a/tests/t213-elemrestriction.c b/tests/t213-elemrestriction.c index cae33ae689..f13f903de6 100644 --- a/tests/t213-elemrestriction.c +++ b/tests/t213-elemrestriction.c @@ -54,7 +54,8 @@ int main(int argc, char **argv) { block*layout[2]*blk_size; if (yy[index] != a[ind[k*elem_size + i]+j*(num_elem+1)]) // LCOV_EXCL_START - printf("Error in restricted array y[%d][%d][%d] = %f\n", + printf("Error in restricted array y[%" CeedInt_FMT + "][%" CeedInt_FMT "][%" CeedInt_FMT "] = %f\n", i, j, k, (double)yy[index]); // LCOV_EXCL_STOP } @@ -68,7 +69,8 @@ int main(int argc, char **argv) { for (CeedInt j=0; j 0 && i < num_elem ? 2.0 : 1.0)) // LCOV_EXCL_START - printf("Error in restricted array x[%d][%d] = %f\n", + printf("Error in restricted array x[%" CeedInt_FMT + "][%" CeedInt_FMT "] = %f\n", j, i, (double)xx[i+j*(num_elem+1)]); // LCOV_EXCL_STOP } diff --git a/tests/t216-elemrestriction.c b/tests/t216-elemrestriction.c index d51d64d42a..4ef124e5b6 100644 --- a/tests/t216-elemrestriction.c +++ b/tests/t216-elemrestriction.c @@ -27,7 +27,7 @@ int main(int argc, char **argv) { CeedElemRestrictionGetCompStride(r_2, &comp_stride_2); if (comp_stride_2 != comp_stride) // LCOV_EXCL_START - printf("Error copying CeedElemRestriction reference."); + printf("Error copying CeedElemRestriction reference\n"); // LCOV_EXCL_STOP CeedElemRestrictionDestroy(&r_2); diff --git a/tests/t220-elemrestriction.c b/tests/t220-elemrestriction.c index 94367d1dc7..6d0b482412 100644 --- a/tests/t220-elemrestriction.c +++ b/tests/t220-elemrestriction.c @@ -39,8 +39,8 @@ int main(int argc, char **argv) { CeedInt k = j + P*i; if (10+(k+1)/2 != yy[k] * CeedIntPow(-1, i%2)) // LCOV_EXCL_START - printf("Error in restricted array y[%d] = %f", - k, (CeedScalar)yy[k]); + printf("Error in restricted array y[%" CeedInt_FMT + "] = %f\n", k, (CeedScalar)yy[k]); // LCOV_EXCL_STOP } } diff --git a/tests/t300-basis.c b/tests/t300-basis.c index 1066470221..58017b2771 100644 --- a/tests/t300-basis.c +++ b/tests/t300-basis.c @@ -12,7 +12,7 @@ int main(int argc, char **argv) { // Test skipped if using single precision if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { return CeedError(ceed, CEED_ERROR_UNSUPPORTED, - "Test not implemented in single precision"); + "Test not implemented in single precision\n"); } CeedBasisCreateTensorH1Lagrange(ceed, 1, 1, 4, 4, CEED_GAUSS_LOBATTO, &b); diff --git a/tests/t301-basis.c b/tests/t301-basis.c index c1c7966f88..a5c00e23f5 100644 --- a/tests/t301-basis.c +++ b/tests/t301-basis.c @@ -23,8 +23,8 @@ int main(int argc, char **argv) { for (CeedInt i=0; i<12; i++) if (fabs(A_qr[i] - A[i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error in QR factorization A_qr[%d] = %f != A[%d] = %f\n", - i, A_qr[i], i, A[i]); + printf("Error in QR factorization A_qr[%" CeedInt_FMT + "] = %f != A[%" CeedInt_FMT "] = %f\n", i, A_qr[i], i, A[i]); // LCOV_EXCL_STOP CeedDestroy(&ceed); diff --git a/tests/t304-basis.c b/tests/t304-basis.c index 7c873f22a7..7bd437b838 100644 --- a/tests/t304-basis.c +++ b/tests/t304-basis.c @@ -41,7 +41,8 @@ int main(int argc, char **argv) { for (int j=0; j 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error in diagonalization [%d, %d]: %f != %f\n", + printf("Error in diagonalization [%" CeedInt_FMT + ", %" CeedInt_FMT "]: %f != %f\n", i, j, M[P*i+j], Q_lambda_Qt[P*i+j]); // LCOV_EXCL_STOP diff --git a/tests/t305-basis.c b/tests/t305-basis.c index 6058c520d1..9c777f3595 100644 --- a/tests/t305-basis.c +++ b/tests/t305-basis.c @@ -51,7 +51,8 @@ int main(int argc, char **argv) { for (int j=0; j 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error in diagonalization of M [%d, %d]: %f != %f\n", + printf("Error in diagonalization of M [%" CeedInt_FMT + ", %" CeedInt_FMT "]: %f != %f\n", i, j, M[P*i+j], (i == j ? 1.0 : 0.0)); // LCOV_EXCL_STOP @@ -74,7 +75,8 @@ int main(int argc, char **argv) { for (int j=0; j 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error in diagonalization of K [%d, %d]: %f != %f\n", + printf("Error in diagonalization of K [%" CeedInt_FMT + ", %" CeedInt_FMT "]: %f != %f\n", i, j, K[P*i+j], (i == j ? lambda[i] : 0.0)); // LCOV_EXCL_STOP diff --git a/tests/t306-basis.c b/tests/t306-basis.c index 9f727d5910..ba970b8a23 100644 --- a/tests/t306-basis.c +++ b/tests/t306-basis.c @@ -17,11 +17,11 @@ int main(int argc, char **argv) { if (P != 64) // LCOV_EXCL_START - printf("%d != 64\n", P); + printf("%" CeedInt_FMT " != 64\n", P); // LCOV_EXCL_STOP if (Q != 125) // LCOV_EXCL_START - printf("%d != 125\n", Q); + printf("%" CeedInt_FMT " != 125\n", Q); // LCOV_EXCL_STOP CeedBasisDestroy(&b); diff --git a/tests/t307-basis.c b/tests/t307-basis.c index 88f864f2af..202e083ca5 100644 --- a/tests/t307-basis.c +++ b/tests/t307-basis.c @@ -21,7 +21,7 @@ int main(int argc, char **argv) { CeedBasisGetNumNodes1D(b_2, &P_1d_2); if (P_1d != P_1d_2) // LCOV_EXCL_START - printf("Error copying CeedBasis reference."); + printf("Error copying CeedBasis reference\n"); // LCOV_EXCL_STOP CeedBasisDestroy(&b_2); diff --git a/tests/t310-basis.c b/tests/t310-basis.c index cff28fb645..1f5bf2edd6 100644 --- a/tests/t310-basis.c +++ b/tests/t310-basis.c @@ -32,7 +32,7 @@ int main(int argc, char **argv) { for (i = 0; i < len; i++) if (fabs(v[i] - 1.) > 10.*CEED_EPSILON) // LCOV_EXCL_START - printf("v[%d] = %f != 1.\n", i, v[i]); + printf("v[%" CeedInt_FMT "] = %f != 1.\n", i, v[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &v); diff --git a/tests/t311-basis.c b/tests/t311-basis.c index 26260386ce..0da4acd504 100644 --- a/tests/t311-basis.c +++ b/tests/t311-basis.c @@ -62,7 +62,7 @@ int main(int argc, char **argv) { CeedScalar px = PolyEval(xq[i], ALEN(p), p); if (fabs(uuq[i] - px) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("%f != %f=p(%f)\n", uuq[i], px, xq[i]); + printf("%f != %f = p(%f)\n", uuq[i], px, xq[i]); // LCOV_EXCL_STOP } CeedVectorRestoreArrayRead(X_q, &xq); diff --git a/tests/t313-basis.c b/tests/t313-basis.c index 16fc553fbe..b8d6572ef1 100644 --- a/tests/t313-basis.c +++ b/tests/t313-basis.c @@ -73,7 +73,7 @@ int main(int argc, char **argv) { CeedScalar fx = Eval(dim, xx); if (fabs(u[i] - fx) > 1E-4) { // LCOV_EXCL_START - printf("[%d] %f != %f=f(%f", dim, u[i], fx, xx[0]); + printf("[%" CeedInt_FMT "] %f != %f=f(%f", dim, u[i], fx, xx[0]); for (CeedInt d=1; d tol) // LCOV_EXCL_START - printf("[%d] %f != %f\n", dim, sum_1, sum_2); + printf("[%" CeedInt_FMT "] %f != %f\n", dim, sum_1, sum_2); // LCOV_EXCL_STOP CeedVectorDestroy(&X); diff --git a/tests/t315-basis.c b/tests/t315-basis.c index 1286c46bc8..481ed409f3 100644 --- a/tests/t315-basis.c +++ b/tests/t315-basis.c @@ -89,7 +89,7 @@ int main(int argc, char **argv) { CeedScalar tol = GetTolerance(CEED_SCALAR_TYPE, dim); if (fabs(sum_1 - sum_2) > tol) // LCOV_EXCL_START - printf("[%d] %f != %f\n", dim, sum_1, sum_2); + printf("[%" CeedInt_FMT "] %f != %f\n", dim, sum_1, sum_2); // LCOV_EXCL_STOP CeedVectorDestroy(&X); diff --git a/tests/t316-basis.c b/tests/t316-basis.c index e468afef7f..e96f69264a 100644 --- a/tests/t316-basis.c +++ b/tests/t316-basis.c @@ -88,7 +88,7 @@ int main(int argc, char **argv) { CeedScalar tol = GetTolerance(CEED_SCALAR_TYPE, dim); if (fabs(sum_1 - sum_2) > tol) // LCOV_EXCL_START - printf("[%d] %f != %f\n", dim, sum_1, sum_2); + printf("[%" CeedInt_FMT "] %f != %f\n", dim, sum_1, sum_2); // LCOV_EXCL_STOP CeedVectorDestroy(&X); diff --git a/tests/t317-basis.c b/tests/t317-basis.c index 921838c712..4b7e8b762a 100644 --- a/tests/t317-basis.c +++ b/tests/t317-basis.c @@ -63,7 +63,7 @@ int main(int argc, char **argv) { CeedScalar px = PolyEval(xq[i], ALEN(dp), dp); if (fabs(uuq[i] - px) > 1000.*CEED_EPSILON) // LCOV_EXCL_START - printf("%f != %f=p(%f)\n", uuq[i], px, xq[i]); + printf("%f != %f = p(%f)\n", uuq[i], px, xq[i]); // LCOV_EXCL_STOP } CeedVectorRestoreArrayRead(X_q, &xq); diff --git a/tests/t318-basis.c b/tests/t318-basis.c index b57c5ed691..5b0239438e 100644 --- a/tests/t318-basis.c +++ b/tests/t318-basis.c @@ -73,7 +73,7 @@ int main(int argc, char **argv) { CeedScalar fx = Eval(dim, xx); if (fabs(u[i] - fx) > 1E-4) { // LCOV_EXCL_START - printf("[%d] %f != %f=f(%f", dim, u[i], fx, xx[0]); + printf("[%" CeedInt_FMT "] %f != %f = f(%f", dim, u[i], fx, xx[0]); for (CeedInt d=1; d 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] %f != %f\n", i, out[i], value); + printf("[%" CeedInt_FMT "] %f != %f\n", i, out[i], value); // LCOV_EXCL_STOP } CeedVectorRestoreArrayRead(Out, &out); diff --git a/tests/t323-basis.c b/tests/t323-basis.c index a882a3590e..11996aca7c 100644 --- a/tests/t323-basis.c +++ b/tests/t323-basis.c @@ -49,12 +49,12 @@ int main(int argc, char **argv) { value = dfeval(xq[0*Q+i], xq[1*Q+i]); if (fabs(out[0*Q+i] - value) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] %f != %f\n", i, out[0*Q+i], value); + printf("[%" CeedInt_FMT "] %f != %f\n", i, out[0*Q+i], value); // LCOV_EXCL_STOP value = dfeval(xq[1*Q+i], xq[0*Q+i]); if (fabs(out[1*Q+i] - value) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] %f != %f\n", i, out[1*Q+i], value); + printf("[%" CeedInt_FMT "] %f != %f\n", i, out[1*Q+i], value); // LCOV_EXCL_STOP } CeedVectorRestoreArrayRead(Out, &out); diff --git a/tests/t324-basis.c b/tests/t324-basis.c index 95686e0d9b..857664df60 100644 --- a/tests/t324-basis.c +++ b/tests/t324-basis.c @@ -41,7 +41,7 @@ int main(int argc, char **argv) { for (int i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] %f != %f\n", i, out[i], colsum[i]); + printf("[%" CeedInt_FMT "] %f != %f\n", i, out[i], colsum[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(Out, &out); diff --git a/tests/t325-basis.c b/tests/t325-basis.c index 4ab0257183..3045c96893 100644 --- a/tests/t325-basis.c +++ b/tests/t325-basis.c @@ -47,7 +47,7 @@ int main(int argc, char **argv) { for (int n=0; n 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] %f != %f\n", p, out[p+n*P], n*colsum[p]); + printf("[%" CeedInt_FMT "] %f != %f\n", p, out[p+n*P], n*colsum[p]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(Out, &out); diff --git a/tests/t400-qfunction.c b/tests/t400-qfunction.c index ff7035f6ed..c7d0b05605 100644 --- a/tests/t400-qfunction.c +++ b/tests/t400-qfunction.c @@ -56,7 +56,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] v %f != vv %f\n",i, v[i], vv[i]); + printf("[%" CeedInt_FMT "] v %f != vv %f\n",i, v[i], vv[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &vv); diff --git a/tests/t403-qfunction.c b/tests/t403-qfunction.c index 8dd3c339d2..6a92e20c0b 100644 --- a/tests/t403-qfunction.c +++ b/tests/t403-qfunction.c @@ -17,7 +17,7 @@ int main(int argc, char **argv) { CeedQFunctionReferenceCopy(qf, &qf_2); // This destroys the previous qf_2 if (qf != qf_2) // LCOV_EXCL_START - printf("Error copying CeedQFunction reference."); + printf("Error copying CeedQFunction reference\n"); // LCOV_EXCL_STOP CeedQFunctionContextCreate(ceed, &ctx); @@ -26,7 +26,7 @@ int main(int argc, char **argv) { CeedQFunctionContextReferenceCopy(ctx, &ctx_2); if (ctx != ctx_2) // LCOV_EXCL_START - printf("Error copying CeedQFunctionContext reference."); + printf("Error copying CeedQFunctionContext reference\n"); // LCOV_EXCL_STOP CeedQFunctionDestroy(&qf); diff --git a/tests/t405-qfunction.c b/tests/t405-qfunction.c index 8d662e6349..8bb419868a 100644 --- a/tests/t405-qfunction.c +++ b/tests/t405-qfunction.c @@ -56,7 +56,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1E3*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] v %f != vv %f\n",i, 5*v[i]*sqrt(2.), vv[i]); + printf("[%" CeedInt_FMT "] v %f != vv %f\n",i, 5*v[i]*sqrt(2.), vv[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &vv); diff --git a/tests/t407-qfunction.c b/tests/t407-qfunction.c index a65120d62f..57b3c89215 100644 --- a/tests/t407-qfunction.c +++ b/tests/t407-qfunction.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) { CeedQFunctionContextGetAllFieldLabels(ctx, &field_labels, &num_fields); if (num_fields != 2) // LCOV_EXCL_START - printf("Incorrect number of fields set: %d != 2\n", num_fields); + printf("Incorrect number of fields set: %" CeedInt_FMT " != 2\n", num_fields); // LCOV_EXCL_STOP const char *name; @@ -92,11 +92,13 @@ int main(int argc, char **argv) { CeedQFunctionContextSetInt32(ctx, count_label, (int *)&values_count); if (ctx_data.count[0] != 14) // LCOV_EXCL_START - printf("Incorrect context data for count[0]: %d != 14\n", ctx_data.count[0]); + printf("Incorrect context data for count[0]: %" CeedInt_FMT " != 14\n", + ctx_data.count[0]); // LCOV_EXCL_STOP if (ctx_data.count[1] != 43) // LCOV_EXCL_START - printf("Incorrect context data for count[1]: %d != 43\n", ctx_data.count[1]); + printf("Incorrect context data for count[1]: %" CeedInt_FMT " != 43\n", + ctx_data.count[1]); // LCOV_EXCL_STOP CeedQFunctionContextDestroy(&ctx); diff --git a/tests/t409-qfunction.c b/tests/t409-qfunction.c index edbe120ba1..ae7b77114d 100644 --- a/tests/t409-qfunction.c +++ b/tests/t409-qfunction.c @@ -44,7 +44,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("v[%d] %f != 2.0\n", i, v[i]); + printf("v[%" CeedInt_FMT "] %f != 2.0\n", i, v[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &v); diff --git a/tests/t410-qfunction.c b/tests/t410-qfunction.c index 8e28d400cf..f29fb949a0 100644 --- a/tests/t410-qfunction.c +++ b/tests/t410-qfunction.c @@ -53,7 +53,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i1e-14) // LCOV_EXCL_START - printf("[%d] v %f != u %f\n",i, v[i], u[i]); + printf("[%" CeedInt_FMT "] v %f != u %f\n",i, v[i], u[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &v); diff --git a/tests/t412-qfunction.c b/tests/t412-qfunction.c index b025fdecd2..2abaa137d5 100644 --- a/tests/t412-qfunction.c +++ b/tests/t412-qfunction.c @@ -36,7 +36,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i1e-12) // LCOV_EXCL_START - printf("[%d] v %f != u %f\n",i, v[i], u[i]); + printf("[%" CeedInt_FMT "] v %f != u %f\n",i, v[i], u[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &v); diff --git a/tests/t414-qfunction.c b/tests/t414-qfunction.c index 6d50279935..e5a4f9d48d 100644 --- a/tests/t414-qfunction.c +++ b/tests/t414-qfunction.c @@ -21,7 +21,7 @@ int main(int argc, char **argv) { CeedScalar j[num_qpts * dim * dim], w[num_qpts], u[num_qpts * num_comp]; char name[13] = ""; - snprintf(name, sizeof name, "Mass%dDBuild", dim); + snprintf(name, sizeof name, "Mass%" CeedInt_FMT "DBuild", dim); CeedQFunctionCreateInteriorByName(ceed, name, &qf_setup); CeedQFunctionCreateInteriorByName(ceed, "Vector3MassApply", &qf_mass); @@ -72,8 +72,8 @@ int main(int argc, char **argv) { sum += vv[i + c * num_qpts]; if (fabs(sum - (c + 1)) > 10*CEED_EPSILON) // LCOV_EXCL_START - printf("%dD volume error in component %d: %f != %f\n", - dim, c, sum, (c + 1.0)); + printf("%" CeedInt_FMT "D volume error in component %" CeedInt_FMT + ": %f != %f\n", dim, c, sum, (c + 1.0)); // LCOV_EXCL_STOP } CeedVectorRestoreArrayRead(V, &vv); diff --git a/tests/t415-qfunction.c b/tests/t415-qfunction.c index b840ff6ff1..ffec354639 100644 --- a/tests/t415-qfunction.c +++ b/tests/t415-qfunction.c @@ -22,9 +22,10 @@ int main(int argc, char **argv) { du[num_qpts * dim * num_comp]; char name_setup[26] = "", name_apply[26] = ""; - snprintf(name_setup, sizeof name_setup, "Poisson%dDBuild", dim); + snprintf(name_setup, sizeof name_setup, "Poisson%" CeedInt_FMT "DBuild", dim); CeedQFunctionCreateInteriorByName(ceed, name_setup, &qf_setup); - snprintf(name_apply, sizeof name_apply, "Vector3Poisson%dDApply", dim); + snprintf(name_apply, sizeof name_apply, "Vector3Poisson%" CeedInt_FMT "DApply", + dim); CeedQFunctionCreateInteriorByName(ceed, name_apply, &qf_diff); for (CeedInt i=0; i 10*CEED_EPSILON) // LCOV_EXCL_START - printf("%dD volume error in component %d: %f != %f\n", - dim, c, sum, dim * (c + 1.0)); + printf("%" CeedInt_FMT "D volume error in component %" CeedInt_FMT + ": %f != %f\n", dim, c, sum, dim * (c + 1.0)); // LCOV_EXCL_STOP } CeedVectorRestoreArrayRead(dV, &vv); diff --git a/tests/t500-operator.c b/tests/t500-operator.c index cc03f4dbb2..d16c273369 100644 --- a/tests/t500-operator.c +++ b/tests/t500-operator.c @@ -106,7 +106,7 @@ int main(int argc, char **argv) { CeedVectorGetArrayRead(V, CEED_MEM_HOST, &hv); for (CeedInt i=0; i 1e-14) printf("[%d] v %g != 0.0\n",i, hv[i]); + if (fabs(hv[i]) > 1e-14) printf("[%" CeedInt_FMT "] v %g != 0.0\n",i, hv[i]); CeedVectorRestoreArrayRead(V, &hv); CeedQFunctionDestroy(&qf_setup); diff --git a/tests/t508-operator.c b/tests/t508-operator.c index 7f4502029d..481d7c4b4a 100644 --- a/tests/t508-operator.c +++ b/tests/t508-operator.c @@ -24,7 +24,7 @@ int main(int argc, char **argv) { CeedOperatorReferenceCopy(op, &op_2); // This destroys the previous op_2 if (op != op_2) // LCOV_EXCL_START - printf("Error copying CeedOperator reference."); + printf("Error copying CeedOperator reference\n"); // LCOV_EXCL_STOP CeedQFunctionDestroy(&qf); diff --git a/tests/t509-operator.c b/tests/t509-operator.c index c247a30102..04c392836d 100644 --- a/tests/t509-operator.c +++ b/tests/t509-operator.c @@ -60,7 +60,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i1e-14) // LCOV_EXCL_START - printf("%d: Computed Value: %f != True Value: 1.0\n", i, hv[i]); + printf("[%" CeedInt_FMT "] Computed Value: %f != True Value: 1.0\n", i, hv[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(V, &hv); diff --git a/tests/t510-operator.c b/tests/t510-operator.c index ecb3a1d70f..3ec1a5e5ee 100644 --- a/tests/t510-operator.c +++ b/tests/t510-operator.c @@ -114,7 +114,7 @@ int main(int argc, char **argv) { // Check output CeedVectorGetArrayRead(V, CEED_MEM_HOST, &hv); for (CeedInt i=0; i 1e-14) printf("[%d] v %g != 0.0\n",i, hv[i]); + if (fabs(hv[i]) > 1e-14) printf("[%" CeedInt_FMT "] v %g != 0.0\n",i, hv[i]); CeedVectorRestoreArrayRead(V, &hv); CeedQFunctionDestroy(&qf_setup); diff --git a/tests/t520-operator.c b/tests/t520-operator.c index 72e8bb2da9..2561cc14ee 100644 --- a/tests/t520-operator.c +++ b/tests/t520-operator.c @@ -224,7 +224,7 @@ int main(int argc, char **argv) { // Check output CeedVectorGetArrayRead(V, CEED_MEM_HOST, &hv); for (CeedInt i=0; i 1e-14) printf("[%d] v %g != 0.0\n",i, hv[i]); + if (fabs(hv[i]) > 1e-14) printf("[%" CeedInt_FMT "] v %g != 0.0\n",i, hv[i]); CeedVectorRestoreArrayRead(V, &hv); // Cleanup diff --git a/tests/t525-operator.c b/tests/t525-operator.c index 3b206b4582..0683da4fec 100644 --- a/tests/t525-operator.c +++ b/tests/t525-operator.c @@ -56,7 +56,8 @@ int main(int argc, char **argv) { CeedOperatorContextSetInt32(op_sub_1, count_label, &value_count); if (ctx_data_1.count != 43) // LCOV_EXCL_START - printf("Incorrect context data for count: %d != 43", ctx_data_1.count); + printf("Incorrect context data for count: %" CeedInt_FMT " != 43", + ctx_data_1.count); // LCOV_EXCL_STOP // Second sub-operator @@ -87,7 +88,7 @@ int main(int argc, char **argv) { CeedOperatorContextSetDouble(op_composite, time_label, &value_time); if (ctx_data_2.time != 2.0) // LCOV_EXCL_START - printf("Incorrect context data for time: %f != 2.0", ctx_data_2.time); + printf("Incorrect context data for time: %f != 2.0\n", ctx_data_2.time); // LCOV_EXCL_STOP // Check setting field in context of multiple sub-operators for composite operator @@ -98,18 +99,18 @@ int main(int argc, char **argv) { CeedOperatorContextSetDouble(op_composite, other_label, &value_other); if (ctx_data_1.other != 9000.0) // LCOV_EXCL_START - printf("Incorrect context data for other: %f != 2.0", ctx_data_1.other); + printf("Incorrect context data for other: %f != 2.0\n", ctx_data_1.other); // LCOV_EXCL_STOP if (ctx_data_2.other != 9000.0) // LCOV_EXCL_START - printf("Incorrect context data for other: %f != 2.0", ctx_data_2.other); + printf("Incorrect context data for other: %f != 2.0\n", ctx_data_2.other); // LCOV_EXCL_STOP // Check requesting label for field that doesn't exist returns NULL CeedOperatorContextGetFieldLabel(op_composite, "bad", &bad_label); if (bad_label) // LCOV_EXCL_START - printf("Incorrect context label returned"); + printf("Incorrect context label returned\n"); // LCOV_EXCL_STOP CeedQFunctionContextDestroy(&qf_ctx_sub_1); diff --git a/tests/t530-operator.c b/tests/t530-operator.c index 4001da6461..bd9d086086 100644 --- a/tests/t530-operator.c +++ b/tests/t530-operator.c @@ -106,7 +106,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 1e-9) // LCOV_EXCL_START - printf("Error: A[%d] = %f != %f\n", i, a[i], q[i]); + printf("Error: A[%" CeedInt_FMT "] = %f != %f\n", i, a[i], q[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); CeedVectorRestoreArrayRead(q_data, &q); diff --git a/tests/t531-operator.c b/tests/t531-operator.c index 053301690b..3a826e1b47 100644 --- a/tests/t531-operator.c +++ b/tests/t531-operator.c @@ -105,7 +105,7 @@ int main(int argc, char **argv) { for (CeedInt i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("Error: Operator computed v[i] = %f != 0.0\n", vv[i]); + printf("Error: Operator computed v[%" CeedInt_FMT "] = %f != 0.0\n", i, vv[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(v, &vv); diff --git a/tests/t533-operator.c b/tests/t533-operator.c index 80bed6f464..e798646f5d 100644 --- a/tests/t533-operator.c +++ b/tests/t533-operator.c @@ -121,7 +121,8 @@ int main(int argc, char **argv) { for (int i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in assembly: %f != %f\n", i, a[i], assembled_true[i]); + printf("[%" CeedInt_FMT "] Error in assembly: %f != %f\n", i, a[i], + assembled_true[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); diff --git a/tests/t534-operator.c b/tests/t534-operator.c index e6591253bf..ec0525a189 100644 --- a/tests/t534-operator.c +++ b/tests/t534-operator.c @@ -124,7 +124,8 @@ int main(int argc, char **argv) { for (int i=0; i 1000.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in assembly: %f != %f\n", i, a[i], assembled_true[i]); + printf("[%" CeedInt_FMT "] Error in assembly: %f != %f\n", i, a[i], + assembled_true[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); diff --git a/tests/t535-operator.c b/tests/t535-operator.c index 93d38a7c49..c69cce9567 100644 --- a/tests/t535-operator.c +++ b/tests/t535-operator.c @@ -159,7 +159,8 @@ int main(int argc, char **argv) { for (int i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in assembly: %f != %f\n", i, a[i], assembled_true[i]); + printf("[%" CeedInt_FMT "] Error in assembly: %f != %f\n", i, a[i], + assembled_true[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); diff --git a/tests/t536-operator.c b/tests/t536-operator.c index deb64712e8..b1ad3d99f0 100644 --- a/tests/t536-operator.c +++ b/tests/t536-operator.c @@ -177,7 +177,8 @@ int main(int argc, char **argv) { for (int i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in assembly: %f != %f\n", i, a[i], assembled_true[i]); + printf("[%" CeedInt_FMT "] Error in assembly: %f != %f\n", i, a[i], + assembled_true[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); diff --git a/tests/t537-operator.c b/tests/t537-operator.c index b4104281da..c0fe3dc43b 100644 --- a/tests/t537-operator.c +++ b/tests/t537-operator.c @@ -131,7 +131,8 @@ int main(int argc, char **argv) { for (int i=0; i 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in assembly: %f != %f\n", i, a[i], assembled_true[i]); + printf("[%" CeedInt_FMT "] Error in assembly: %f != %f\n", i, a[i], + assembled_true[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); diff --git a/tests/t538-operator.c b/tests/t538-operator.c index f90310d6e7..f950c3c11e 100644 --- a/tests/t538-operator.c +++ b/tests/t538-operator.c @@ -156,7 +156,8 @@ int main(int argc, char **argv) { for (int i=0; i 1000.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in assembly: %f != %f\n", i, a[i], assembled_true[i]); + printf("[%" CeedInt_FMT "] Error in assembly: %f != %f\n", i, a[i], + assembled_true[i]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(A, &a); diff --git a/tests/t540-operator.c b/tests/t540-operator.c index aae4ea442a..76c428da13 100644 --- a/tests/t540-operator.c +++ b/tests/t540-operator.c @@ -105,7 +105,8 @@ int main(int argc, char **argv) { for (int i=0; i 500.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d] Error in inverse: %e - 1.0 = %e\n", i, u[i], u[i] - 1.); + printf("[%" CeedInt_FMT "] Error in inverse: %e - 1.0 = %e\n", + i, u[i], u[i] - 1.); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(U, &u); diff --git a/tests/t541-operator.c b/tests/t541-operator.c index 3fba9e4d23..9d806d5a67 100644 --- a/tests/t541-operator.c +++ b/tests/t541-operator.c @@ -318,7 +318,8 @@ int main(int argc, char **argv) { for (CeedInt j=0; j 2e-3) // LCOV_EXCL_START - printf("[%d, %d] Error in inverse: %e != %e\n", i, j, w[i*P+j], u[i*P+j]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in inverse: %e != %e\n", + i, j, w[i*P+j], u[i*P+j]); // LCOV_EXCL_STOP CeedVectorRestoreArrayRead(U, &u); CeedVectorRestoreArrayRead(W, &w); diff --git a/tests/t560-operator.c b/tests/t560-operator.c index 6b9e00dbc1..6e53660443 100644 --- a/tests/t560-operator.c +++ b/tests/t560-operator.c @@ -140,8 +140,8 @@ int main(int argc, char **argv) { if (fabs(assembled[j*num_dofs+i] - assembled_true[j*num_dofs+i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d,%d] Error in assembly: %f != %f\n", i, j, - assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in assembly: %f != %f\n", + i, j, assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); // LCOV_EXCL_STOP // Cleanup diff --git a/tests/t561-operator.c b/tests/t561-operator.c index 169d7f4fbf..56c5454db8 100644 --- a/tests/t561-operator.c +++ b/tests/t561-operator.c @@ -143,8 +143,8 @@ int main(int argc, char **argv) { if (fabs(assembled[j*num_dofs+i] - assembled_true[j*num_dofs+i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d,%d] Error in assembly: %f != %f\n", i, j, - assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in assembly: %f != %f\n", + i, j, assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); // LCOV_EXCL_STOP // Cleanup diff --git a/tests/t562-operator.c b/tests/t562-operator.c index ae58a8e89f..0ef7c96c6a 100644 --- a/tests/t562-operator.c +++ b/tests/t562-operator.c @@ -178,8 +178,8 @@ int main(int argc, char **argv) { if (fabs(assembled[j*num_dofs+i] - assembled_true[j*num_dofs+i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d,%d] Error in assembly: %f != %f\n", i, j, - assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in assembly: %f != %f\n", + i, j, assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); // LCOV_EXCL_STOP // Cleanup diff --git a/tests/t563-operator.c b/tests/t563-operator.c index a0dbbcd9ef..2501dcabad 100644 --- a/tests/t563-operator.c +++ b/tests/t563-operator.c @@ -193,8 +193,8 @@ int main(int argc, char **argv) { if (fabs(assembled[j*num_dofs+i] - assembled_true[j*num_dofs+i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d,%d] Error in assembly: %f != %f\n", i, j, - assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in assembly: %f != %f\n", + i, j, assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); // LCOV_EXCL_STOP // Cleanup diff --git a/tests/t564-operator.c b/tests/t564-operator.c index 6728712b8e..586add1400 100644 --- a/tests/t564-operator.c +++ b/tests/t564-operator.c @@ -145,8 +145,8 @@ int main(int argc, char **argv) { +i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d,%d] Error in assembly: %f != %f\n", i, j, - assembled[j*num_dofs*num_comp+i], assembled_true[j*num_dofs*num_comp+i]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in assembly: %f != %f\n", + i, j, assembled[j*num_dofs*num_comp+i], assembled_true[j*num_dofs*num_comp+i]); // LCOV_EXCL_STOP // Cleanup diff --git a/tests/t565-operator.c b/tests/t565-operator.c index 67d38da341..b10f9e996b 100644 --- a/tests/t565-operator.c +++ b/tests/t565-operator.c @@ -175,8 +175,8 @@ int main(int argc, char **argv) { if (fabs(assembled[j*num_dofs+i] - assembled_true[j*num_dofs+i]) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[%d,%d] Error in assembly: %f != %f\n", i, j, - assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in assembly: %f != %f\n", + i, j, assembled[j*num_dofs+i], assembled_true[j*num_dofs+i]); // LCOV_EXCL_STOP // Cleanup diff --git a/tests/t566-operator.c b/tests/t566-operator.c index a30c5d7795..f2db62437d 100644 --- a/tests/t566-operator.c +++ b/tests/t566-operator.c @@ -153,7 +153,8 @@ int main(int argc, char **argv) { if (fabs(assembled_value - assembled_true_value) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", + printf("[(%" CeedInt_FMT ", %" CeedInt_FMT "), (%" CeedInt_FMT + ", %" CeedInt_FMT ")] Error in assembly: %f != %f\n", node_out, comp_out, node_in, comp_in, assembled_value, assembled_true_value); // LCOV_EXCL_STOP diff --git a/tests/t567-operator.c b/tests/t567-operator.c index 7f8ea402fc..600c47a9bf 100644 --- a/tests/t567-operator.c +++ b/tests/t567-operator.c @@ -153,7 +153,8 @@ int main(int argc, char **argv) { if (fabs(assembled_value - assembled_true_value) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", + printf("[(%" CeedInt_FMT ", %" CeedInt_FMT "), (%" CeedInt_FMT + ", %" CeedInt_FMT ")] Error in assembly: %f != %f\n", node_out, comp_out, node_in, comp_in, assembled_value, assembled_true_value); // LCOV_EXCL_STOP diff --git a/tests/t568-operator.c b/tests/t568-operator.c index d1d832c2cc..48ed0c311e 100644 --- a/tests/t568-operator.c +++ b/tests/t568-operator.c @@ -156,7 +156,8 @@ int main(int argc, char **argv) { if (fabs(assembled_value - assembled_true_value) > 100.*CEED_EPSILON) // LCOV_EXCL_START - printf("[(%d, %d), (%d, %d)] Error in assembly: %f != %f\n", + printf("[(%" CeedInt_FMT ", %" CeedInt_FMT "), (%" CeedInt_FMT + ", %" CeedInt_FMT ")] Error in assembly: %f != %f\n", node_out, comp_out, node_in, comp_in, assembled_value, assembled_true_value); // LCOV_EXCL_STOP From 0a6353c2e77191a3193107576006f2135ccdbe4d Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 23 Jun 2022 16:36:01 -0600 Subject: [PATCH 108/172] fluids: Create QFs for strong STG w/ QFunctions --- .../fluids/qfunctions/dirichlet_boundary.h | 32 ++++++++++ examples/fluids/qfunctions/stg_shur14.h | 59 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 examples/fluids/qfunctions/dirichlet_boundary.h diff --git a/examples/fluids/qfunctions/dirichlet_boundary.h b/examples/fluids/qfunctions/dirichlet_boundary.h new file mode 100644 index 0000000000..5e7d23dfd8 --- /dev/null +++ b/examples/fluids/qfunctions/dirichlet_boundary.h @@ -0,0 +1,32 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#ifndef dirichlet_boundary_h +#define dirichlet_boundary_h + +#include + +CEED_QFUNCTION(SetupDirichletBC)(void *ctx, CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out) { + // Inputs + const CeedScalar (*coords)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + const CeedScalar (*multiplicity) = (const CeedScalar(*))in[1]; + + // Outputs + CeedScalar (*coords_stored)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + CeedScalar (*scale_stored) = (CeedScalar(*))out[1]; + + CeedPragmaSIMD + for(CeedInt i=0; i `bcFunc` method. + */ +CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + + //*INDENT-OFF* + const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], + (*coords)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1], + (*scale) = (const CeedScalar(*)) in[2]; + + CeedScalar(*bcval)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; + //*INDENT-ON* + + const STGShur14Context stg_ctx = (STGShur14Context) ctx; + CeedScalar qn[STG_NMODES_MAX], u[3], ubar[3], cij[6], eps, lt; + const bool mean_only = stg_ctx->mean_only; + const CeedScalar dx = stg_ctx->dx; + const CeedScalar mu = stg_ctx->newtonian_ctx.mu; + const CeedScalar time = stg_ctx->time; + const CeedScalar theta0 = stg_ctx->theta0; + const CeedScalar P0 = stg_ctx->P0; + const CeedScalar cv = stg_ctx->newtonian_ctx.cv; + const CeedScalar cp = stg_ctx->newtonian_ctx.cp; + const CeedScalar Rd = cp - cv; + const CeedScalar rho = P0 / (Rd * theta0); + + CeedPragmaSIMD + for(CeedInt i=0; i Date: Wed, 29 Jun 2022 16:20:21 -0600 Subject: [PATCH 109/172] memcheck - use QFGet/RestoreContextData --- backends/memcheck/ceed-memcheck-qfunction.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/backends/memcheck/ceed-memcheck-qfunction.c b/backends/memcheck/ceed-memcheck-qfunction.c index 9b41096352..94330dcea1 100644 --- a/backends/memcheck/ceed-memcheck-qfunction.c +++ b/backends/memcheck/ceed-memcheck-qfunction.c @@ -20,13 +20,9 @@ static int CeedQFunctionApply_Memcheck(CeedQFunction qf, CeedInt Q, CeedQFunction_Memcheck *impl; ierr = CeedQFunctionGetData(qf, &impl); CeedChkBackend(ierr); - CeedQFunctionContext ctx; - ierr = CeedQFunctionGetContext(qf, &ctx); CeedChkBackend(ierr); - void *ctxData = NULL; - if (ctx) { - ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_HOST, &ctxData); - CeedChkBackend(ierr); - } + void *ctx_data = NULL; + ierr = CeedQFunctionGetContextData(qf, CEED_MEM_HOST, &ctx_data); + CeedChkBackend(ierr); CeedQFunctionUser f = NULL; ierr = CeedQFunctionGetUserFunction(qf, &f); CeedChkBackend(ierr); @@ -53,7 +49,7 @@ static int CeedQFunctionApply_Memcheck(CeedQFunction qf, CeedInt Q, mem_block_ids[i] = VALGRIND_CREATE_BLOCK(impl->outputs[i], len, name); } - ierr = f(ctxData, Q, impl->inputs, impl->outputs); CeedChkBackend(ierr); + ierr = f(ctx_data, Q, impl->inputs, impl->outputs); CeedChkBackend(ierr); for (CeedInt i = 0; iinputs[i]); CeedChkBackend(ierr); @@ -62,9 +58,7 @@ static int CeedQFunctionApply_Memcheck(CeedQFunction qf, CeedInt Q, ierr = CeedVectorRestoreArray(V[i], &impl->outputs[i]); CeedChkBackend(ierr); VALGRIND_DISCARD(mem_block_ids[i]); } - if (ctx) { - ierr = CeedQFunctionContextRestoreData(ctx, &ctxData); CeedChkBackend(ierr); - } + ierr = CeedQFunctionRestoreContextData(qf, &ctx_data); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } From 75a19770cda24d00f4da8d6890f256f14dd4565b Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 29 Jun 2022 16:26:18 -0600 Subject: [PATCH 110/172] vec/ctx - backend Restore*Read only after last reader --- interface/ceed-qfunctioncontext.c | 5 +++-- interface/ceed-vector.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c index 29bf20b957..5eba10a3a5 100644 --- a/interface/ceed-qfunctioncontext.c +++ b/interface/ceed-qfunctioncontext.c @@ -724,11 +724,12 @@ int CeedQFunctionContextRestoreDataRead(CeedQFunctionContext ctx, void *data) { "access was not granted"); // LCOV_EXCL_STOP - if (ctx->RestoreDataRead) { + ctx->num_readers--; + if (ctx->num_readers == 0 && ctx->RestoreDataRead) { ierr = ctx->RestoreData(ctx); CeedChk(ierr); } *(void **)data = NULL; - ctx->num_readers--; + return CEED_ERROR_SUCCESS; } diff --git a/interface/ceed-vector.c b/interface/ceed-vector.c index 29b3c3cd35..65a9ebd14d 100644 --- a/interface/ceed-vector.c +++ b/interface/ceed-vector.c @@ -594,11 +594,12 @@ int CeedVectorRestoreArrayRead(CeedVector vec, const CeedScalar **array) { "access was not granted"); // LCOV_EXCL_STOP - if (vec->RestoreArrayRead) { + vec->num_readers--; + if (vec->num_readers == 0 && vec->RestoreArrayRead) { ierr = vec->RestoreArrayRead(vec); CeedChk(ierr); } *array = NULL; - vec->num_readers--; + return CEED_ERROR_SUCCESS; } From 493d886db8007834484176765a8b93e115697155 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 29 Jun 2022 16:52:43 -0600 Subject: [PATCH 111/172] ctx - fix using backend RestoreData vs RestoreDataRead --- interface/ceed-qfunctioncontext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c index 5eba10a3a5..788c686d5d 100644 --- a/interface/ceed-qfunctioncontext.c +++ b/interface/ceed-qfunctioncontext.c @@ -726,7 +726,7 @@ int CeedQFunctionContextRestoreDataRead(CeedQFunctionContext ctx, void *data) { ctx->num_readers--; if (ctx->num_readers == 0 && ctx->RestoreDataRead) { - ierr = ctx->RestoreData(ctx); CeedChk(ierr); + ierr = ctx->RestoreDataRead(ctx); CeedChk(ierr); } *(void **)data = NULL; From 8e45746727b209a3fedd7b4b820ee2a9aeea7f9a Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 29 Jun 2022 16:54:01 -0600 Subject: [PATCH 112/172] memcheck - verify ctx read-only access --- .../memcheck/ceed-memcheck-qfunctioncontext.c | 54 ++++++++++++++++++- backends/memcheck/ceed-memcheck.h | 3 +- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/backends/memcheck/ceed-memcheck-qfunctioncontext.c b/backends/memcheck/ceed-memcheck-qfunctioncontext.c index cc8416596b..51e0933032 100644 --- a/backends/memcheck/ceed-memcheck-qfunctioncontext.c +++ b/backends/memcheck/ceed-memcheck-qfunctioncontext.c @@ -154,6 +154,31 @@ static int CeedQFunctionContextGetData_Memcheck(CeedQFunctionContext ctx, return CEED_ERROR_SUCCESS; } +//------------------------------------------------------------------------------ +// QFunctionContext Get Data Read-Only +//------------------------------------------------------------------------------ +static int CeedQFunctionContextGetDataRead_Memcheck(CeedQFunctionContext ctx, + CeedMemType mem_type, void *data) { + int ierr; + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + size_t ctx_size; + ierr = CeedQFunctionContextGetContextSize(ctx, &ctx_size); CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + ierr = CeedQFunctionContextGetData_Memcheck(ctx, mem_type, data); + CeedChkBackend(ierr); + + // Make copy to verify no write occured + ierr = CeedMallocArray(1, ctx_size, &impl->data_read_only_copy); + CeedChkBackend(ierr); + memcpy(impl->data_read_only_copy, *(void **)data, ctx_size); + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // QFunctionContext Restore Data //------------------------------------------------------------------------------ @@ -175,6 +200,31 @@ static int CeedQFunctionContextRestoreData_Memcheck(CeedQFunctionContext ctx) { return CEED_ERROR_SUCCESS; } +//------------------------------------------------------------------------------ +// QFunctionContext Restore Data Read-Only +//------------------------------------------------------------------------------ +static int CeedQFunctionContextRestoreDataRead_Memcheck( + CeedQFunctionContext ctx) { + int ierr; + size_t ctx_size; + ierr = CeedQFunctionContextGetContextSize(ctx, &ctx_size); CeedChkBackend(ierr); + CeedQFunctionContext_Memcheck *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + if (memcmp(impl->data, impl->data_read_only_copy, ctx_size)) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Context data changed while accessed in read-only mode"); + // LCOV_EXCL_STOP + + ierr = CeedFree(&impl->data_read_only_copy); + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // QFunctionContext destroy user data //------------------------------------------------------------------------------ @@ -241,11 +291,11 @@ int CeedQFunctionContextCreate_Memcheck(CeedQFunctionContext ctx) { ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData", CeedQFunctionContextGetData_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetDataRead", - CeedQFunctionContextGetData_Memcheck); CeedChkBackend(ierr); + CeedQFunctionContextGetDataRead_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreData", CeedQFunctionContextRestoreData_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreDataRead", - CeedQFunctionContextRestoreData_Memcheck); CeedChkBackend(ierr); + CeedQFunctionContextRestoreDataRead_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "DataDestroy", CeedQFunctionContextDataDestroy_Memcheck); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", diff --git a/backends/memcheck/ceed-memcheck.h b/backends/memcheck/ceed-memcheck.h index ca97f16012..5dc710ca66 100644 --- a/backends/memcheck/ceed-memcheck.h +++ b/backends/memcheck/ceed-memcheck.h @@ -21,8 +21,9 @@ typedef struct { int mem_block_id; void *data; void *data_allocated; - void *data_borrowed; void *data_owned; + void *data_borrowed; + void *data_read_only_copy; } CeedQFunctionContext_Memcheck; CEED_INTERN int CeedQFunctionCreate_Memcheck(CeedQFunction qf); From 2ae780f285b973adc2f17a8658b271f19558a5ad Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 30 Jun 2022 09:42:34 -0600 Subject: [PATCH 113/172] test - verify that memcheck catches ctx writes in ci --- tests/junit.py | 2 ++ tests/t409-qfunction.c | 8 +++++++- tests/tap.sh | 9 +++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/junit.py b/tests/junit.py index 746781c670..59dbbaaba8 100755 --- a/tests/junit.py +++ b/tests/junit.py @@ -133,6 +133,8 @@ def run(test, backends): check_required_failure(case, proc.stderr, 'Length of input/output vectors incompatible with basis dimensions') if test[:4] in 't408'.split(): check_required_failure(case, proc.stderr, 'CeedQFunctionContextGetData(): Cannot grant CeedQFunctionContext data access, a process has read access') + if test[:4] in 't409'.split() and contains_any(ceed_resource, ['memcheck']): + check_required_failure(case, proc.stderr, 'Context data changed while accessed in read-only mode') if not case.is_skipped() and not case.status: if proc.stderr: diff --git a/tests/t409-qfunction.c b/tests/t409-qfunction.c index ae7b77114d..b94b27857e 100644 --- a/tests/t409-qfunction.c +++ b/tests/t409-qfunction.c @@ -61,12 +61,18 @@ int main(int argc, char **argv) { // Note: The interface cannot enforce this in user code // so setting is_writable == false and then calling // CeedQFunctionApply to mutate the context would lead - // to inconsistent data on the GPU + // to inconsistent data on the GPU. + // Only the `/cpu/self/memcheck/*` backends verify that + // read-only access resulted in no changes to the context data + CeedQFunctionContextGetData(ctx, CEED_MEM_HOST, &ctx_data_new); + ctx_data_new[0] = 5; + CeedQFunctionContextRestoreData(ctx, &ctx_data_new); is_writable = false; CeedQFunctionSetContextWritable(qf, is_writable); { in[0] = U; out[0] = V; + // Will only error in `/cpu/self/memcheck/*` backends CeedQFunctionApply(qf, Q, in, out); } diff --git a/tests/tap.sh b/tests/tap.sh index b0c49be14e..f085cb8cf0 100755 --- a/tests/tap.sh +++ b/tests/tap.sh @@ -175,6 +175,15 @@ for ((i=0;i<${#backends[@]};++i)); do continue fi + # t409 must fail for memcheck backends + if grep -F -q -e 'Context data changed while accessed in read-only mode' ${output}.err \ + && [[ "$backend" = *memcheck* && "$1" = "t409"* ]] ; then + printf "ok $i0 PASS - expected failure $1 $backend\n" + printf "ok $i1 PASS - expected failure $1 $backend stdout\n" + printf "ok $i2 PASS - expected failure $1 $backend stderr\n" + continue + fi + # grep to pass test t541 for single precision if grep -F -q -e 'Test not implemented in single precision' ${output}.err \ && [[ "$1" = "t541"* ]] ; then From 5571c6fd979b2d8a02ec737d0c535858266a543d Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 30 Jun 2022 11:53:25 -0600 Subject: [PATCH 114/172] fluids: Fix post-processing in DMPlexInsertBoundaryValues_NS --- examples/fluids/src/misc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/examples/fluids/src/misc.c b/examples/fluids/src/misc.c index 7edfceb9e2..02b37f768b 100644 --- a/examples/fluids/src/misc.c +++ b/examples/fluids/src/misc.c @@ -98,18 +98,20 @@ PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user, PetscFunctionReturn(0); } - -// Note: The BCs must be inserted *before* the other values are inserted into Q_loc PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm, PetscBool insert_essential, Vec Q_loc, PetscReal time, Vec face_geom_FVM, Vec cell_geom_FVM, Vec grad_FVM) { - Vec Qbc; + Vec Qbc, boundary_mask; PetscErrorCode ierr; PetscFunctionBegin; + // Mask (zero) Dirichlet entries + PetscCall(DMGetNamedLocalVector(dm, "boundary mask", &boundary_mask)); + PetscCall(VecPointwiseMult(Q_loc, Q_loc, boundary_mask)); + PetscCall(DMRestoreNamedLocalVector(dm, "boundary mask", &boundary_mask)); + ierr = DMGetNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr); - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); ierr = VecAXPY(Q_loc, 1., Qbc); CHKERRQ(ierr); ierr = DMRestoreNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr); @@ -246,7 +248,7 @@ PetscErrorCode SetupICsFromBinary(MPI_Comm comm, AppCtx app_ctx, Vec Q) { // Record boundary values from initial condition PetscErrorCode SetBCsFromICs_NS(DM dm, Vec Q, Vec Q_loc) { - Vec Qbc; + Vec Qbc, boundary_mask; PetscErrorCode ierr; PetscFunctionBegin; @@ -260,6 +262,13 @@ PetscErrorCode SetBCsFromICs_NS(DM dm, Vec Q, Vec Q_loc) { "DMPlexInsertBoundaryValues_C", DMPlexInsertBoundaryValues_NS); CHKERRQ(ierr); + PetscCall(DMGetNamedLocalVector(dm, "boundary mask", &boundary_mask)); + PetscCall(DMGetGlobalVector(dm, &Q)); + PetscCall(VecZeroEntries(boundary_mask)); + PetscCall(VecSet(Q, 1.0)); + PetscCall(DMGlobalToLocal(dm, Q, INSERT_VALUES, boundary_mask)); + PetscCall(DMRestoreNamedLocalVector(dm, "boundary mask", &boundary_mask)); + PetscFunctionReturn(0); } From dada6cc03e18f0f18a02732451145fd10701e640 Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 24 Jun 2022 17:53:15 -0600 Subject: [PATCH 115/172] fluids: Implement dirichlet bcs via libCEED --- examples/fluids/navierstokes.c | 1 + examples/fluids/navierstokes.h | 14 +- examples/fluids/problems/stg_shur14.c | 25 +++- examples/fluids/problems/stg_shur14.h | 4 + examples/fluids/src/dirichlet.c | 176 ++++++++++++++++++++++++++ examples/fluids/src/setuplibceed.c | 4 + 6 files changed, 219 insertions(+), 5 deletions(-) create mode 100644 examples/fluids/src/dirichlet.c diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index f0a237efb0..b00ea8e4ac 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -128,6 +128,7 @@ int main(int argc, char **argv) { else mat_type = MATAIJ; ierr = CreateDM(comm, problem, mat_type, vec_type, &dm); CHKERRQ(ierr); user->dm = dm; + PetscCall(DMSetApplicationContext(dm, user)); // --------------------------------------------------------------------------- // Choose the problem from the list of registered problems diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index cd6bafa179..85f4c6d6f7 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -147,8 +147,9 @@ struct User_private { Vec M, Q_loc, Q_dot_loc; Physics phys; AppCtx app_ctx; - CeedVector q_ceed, q_dot_ceed, g_ceed, coo_values; - CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction, op_ijacobian; + CeedVector q_ceed, q_dot_ceed, g_ceed, coo_values, x_ceed; + CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction, op_ijacobian, + op_dirichlet; bool matrices_set_up; CeedScalar time, dt; }; @@ -215,7 +216,7 @@ struct ProblemData_private { PetscErrorCode (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt, PetscScalar[], void *); void *bc_ctx; - PetscBool bc_from_ics; + PetscBool bc_from_ics, use_dirichlet_ceed; PetscErrorCode (*print_info)(ProblemData*, AppCtx); }; // *INDENT-ON* @@ -368,5 +369,12 @@ PetscErrorCode SetupICsFromBinary(MPI_Comm comm, AppCtx app_ctx, Vec Q); PetscErrorCode SetBCsFromICs_NS(DM dm, Vec Q, Vec Q_loc); // ----------------------------------------------------------------------------- +// Boundary Condition Related Functions +// ----------------------------------------------------------------------------- + +// Setup StrongBCs that use QFunctions +PetscErrorCode SetupStrongBC_Ceed(Ceed ceed, CeedData ceed_data, DM dm, + User user, AppCtx app_ctx, ProblemData *problem, + SimpleBC bc, CeedInt Q_sur, CeedInt q_data_size_sur); #endif // libceed_fluids_examples_navier_stokes_h diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 5d7f9841fa..6882daf66e 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -399,7 +399,8 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, if (use_stgstrong) { // Use default boundary integral QF (BoundaryIntegral) in newtonian.h - problem->bc_from_ics = PETSC_FALSE; + problem->use_dirichlet_ceed = PETSC_TRUE; + problem->bc_from_ics = PETSC_FALSE; } else { problem->apply_inflow.qfunction = STGShur14_Inflow; problem->apply_inflow.qfunction_loc = STGShur14_Inflow_loc; @@ -409,7 +410,7 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, &problem->apply_inflow.qfunction_context); CeedQFunctionContextReferenceCopy(stg_context, &problem->apply_inflow_jacobian.qfunction_context); - problem->bc_from_ics = PETSC_TRUE; + problem->bc_from_ics = PETSC_TRUE; } PetscFunctionReturn(0); @@ -433,6 +434,7 @@ static inline PetscScalar FindDy(const PetscScalar ynodes[], } // Function passed to DMAddBoundary +// NOTE: Not used in favor of QFunction-based method PetscErrorCode StrongSTGbcFunc(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar bcval[], void *ctx) { PetscFunctionBeginUser; @@ -486,3 +488,22 @@ PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem) { PetscFunctionReturn(0); } + +PetscErrorCode SetupStrongSTG_QF(Ceed ceed, ProblemData *problem, + CeedInt num_comp_x, CeedInt num_comp_q, CeedInt q_data_size_sur, + CeedQFunction *pqf_strongbc) { + + CeedQFunction qf_strongbc; + PetscFunctionBeginUser; + CeedQFunctionCreateInterior(ceed, 1, STGShur14_Inflow_StrongQF, + STGShur14_Inflow_StrongQF_loc, &qf_strongbc); + CeedQFunctionAddInput(qf_strongbc, "surface qdata", q_data_size_sur, + CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_strongbc, "x", num_comp_x, CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_strongbc, "scale", 1, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_strongbc, "q", num_comp_q, CEED_EVAL_NONE); + + CeedQFunctionSetContext(qf_strongbc, problem->ics.qfunction_context); + *pqf_strongbc = qf_strongbc; + PetscFunctionReturn(0); +} diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h index 09f84031b1..03cc010743 100644 --- a/examples/fluids/problems/stg_shur14.h +++ b/examples/fluids/problems/stg_shur14.h @@ -17,3 +17,7 @@ extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, const CeedScalar ynodes[], const CeedInt nynodes); extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem); + +extern PetscErrorCode SetupStrongSTG_QF(Ceed ceed, ProblemData *problem, + CeedInt num_comp_x, CeedInt num_comp_q, + CeedInt q_data_size_sur, CeedQFunction *qf_strongbc); diff --git a/examples/fluids/src/dirichlet.c b/examples/fluids/src/dirichlet.c new file mode 100644 index 0000000000..8ba01f392a --- /dev/null +++ b/examples/fluids/src/dirichlet.c @@ -0,0 +1,176 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include "../navierstokes.h" +#include "../qfunctions/dirichlet_boundary.h" +#include "../problems/stg_shur14.h" + +PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, + AppCtx app_ctx, ProblemData *problem, SimpleBC bc, + CeedInt Q_sur, CeedInt q_data_size_sur, CeedOperator op_dirichlet) { + CeedInt num_comp_x=problem->dim, num_comp_q = 5, num_elem, + elem_size; + CeedVector multiplicity, x_stored, scale_stored, q_data_sur; + CeedBasis basis_x_to_q_sur; + CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_x_stored, + elem_restr_scale, elem_restr_qd_sur; + CeedQFunction qf_setup, qf_strongbc; + CeedOperator op_setup, op_dirichlet_sub, op_setup_sur; + PetscFunctionBeginUser; + + DMLabel domain_label; + PetscCall(DMGetLabel(dm, "Face Sets", &domain_label)); + + // Basis + CeedInt height = 1; + PetscCall(CeedBasisCreateProjection(ceed_data->basis_x_sur, + ceed_data->basis_q_sur, &basis_x_to_q_sur)); + + // Setup QFunction + CeedQFunctionCreateInterior(ceed, 1, SetupDirichletBC, SetupDirichletBC_loc, + &qf_setup); + CeedQFunctionAddInput(qf_setup, "x", num_comp_x, CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_setup, "multiplicity", num_comp_q, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_setup, "x stored", num_comp_x, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_setup, "scale", 1, CEED_EVAL_NONE); + + // Compute contribution on each boundary face + for (CeedInt i=0; i < bc->num_inflow; i++) { + // -- Restrictions + PetscCall(GetRestrictionForDomain(ceed, dm, height, domain_label, + bc->inflows[i], + Q_sur, q_data_size_sur, &elem_restr_q_sur, &elem_restr_x_sur, + &elem_restr_qd_sur)); + CeedElemRestrictionCreateVector(elem_restr_q_sur, &multiplicity, NULL); + CeedElemRestrictionGetMultiplicity(elem_restr_q_sur, multiplicity); + CeedElemRestrictionGetNumElements(elem_restr_q_sur, &num_elem); + CeedElemRestrictionGetElementSize(elem_restr_q_sur, &elem_size); + CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, num_comp_x, + num_elem * elem_size * num_comp_x, + CEED_STRIDES_BACKEND, &elem_restr_x_stored); + CeedElemRestrictionCreateVector(elem_restr_x_stored, &x_stored, NULL); + + CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, 1, + num_elem * elem_size, + CEED_STRIDES_BACKEND, &elem_restr_scale); + CeedElemRestrictionCreateVector(elem_restr_scale, &scale_stored, NULL); + + CeedVectorCreate(ceed, q_data_size_sur*num_elem*elem_size, &q_data_sur); + + // -- Setup Operator + CeedOperatorCreate(ceed, qf_setup, NULL, NULL, &op_setup); + CeedOperatorSetName(op_setup, "surface geometric data"); + CeedOperatorSetField(op_setup, "x", elem_restr_x_sur, basis_x_to_q_sur, + CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_setup, "multiplicity", elem_restr_q_sur, + CEED_BASIS_COLLOCATED, multiplicity); + CeedOperatorSetField(op_setup, "x stored", elem_restr_x_stored, + CEED_BASIS_COLLOCATED, x_stored); + CeedOperatorSetField(op_setup, "scale", elem_restr_scale, CEED_BASIS_COLLOCATED, + scale_stored); + + // -- Compute geometric factors + CeedOperatorApply(op_setup, ceed_data->x_coord, CEED_VECTOR_NONE, + CEED_REQUEST_IMMEDIATE); + + // -- Compute QData for the surface + CeedOperatorCreate(ceed, ceed_data->qf_setup_sur, NULL, NULL, &op_setup_sur); + CeedOperatorSetField(op_setup_sur, "dx", elem_restr_x_sur, + ceed_data->basis_x_sur, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_setup_sur, "weight", CEED_ELEMRESTRICTION_NONE, + ceed_data->basis_x_sur, CEED_VECTOR_NONE); + CeedOperatorSetField(op_setup_sur, "surface qdata", elem_restr_qd_sur, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + + CeedOperatorApply(op_setup_sur, ceed_data->x_coord, q_data_sur, + CEED_REQUEST_IMMEDIATE); + + // -- Setup BC QFunctions + SetupStrongSTG_QF(ceed, problem, num_comp_x, num_comp_q, q_data_size_sur, + &qf_strongbc); + CeedOperatorCreate(ceed, qf_strongbc, NULL, NULL, &op_dirichlet_sub); + CeedOperatorSetName(op_dirichlet_sub, "Strong STG"); + + CeedOperatorSetField(op_dirichlet_sub, "surface qdata", elem_restr_qd_sur, + CEED_BASIS_COLLOCATED, q_data_sur); + CeedOperatorSetField(op_dirichlet_sub, "x", elem_restr_x_stored, + CEED_BASIS_COLLOCATED, x_stored); + CeedOperatorSetField(op_dirichlet_sub, "scale", elem_restr_scale, + CEED_BASIS_COLLOCATED, scale_stored); + CeedOperatorSetField(op_dirichlet_sub, "q", elem_restr_q_sur, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetNumQuadraturePoints(op_dirichlet_sub, elem_size); + + // -- Add to composite operator + CeedCompositeOperatorAddSub(op_dirichlet, op_dirichlet_sub); + + CeedVectorDestroy(&q_data_sur); + CeedVectorDestroy(&multiplicity); + CeedVectorDestroy(&x_stored); + CeedVectorDestroy(&scale_stored); + CeedElemRestrictionDestroy(&elem_restr_x_sur); + CeedElemRestrictionDestroy(&elem_restr_q_sur); + CeedElemRestrictionDestroy(&elem_restr_qd_sur); + CeedElemRestrictionDestroy(&elem_restr_x_stored); + CeedElemRestrictionDestroy(&elem_restr_scale); + CeedQFunctionDestroy(&qf_strongbc); + CeedOperatorDestroy(&op_setup_sur); + CeedOperatorDestroy(&op_dirichlet_sub); + CeedOperatorDestroy(&op_setup); + } + + CeedBasisDestroy(&basis_x_to_q_sur); + CeedQFunctionDestroy(&qf_setup); + + PetscFunctionReturn(0); +} + +PetscErrorCode DMPlexInsertBoundaryValues_StrongBCCeed(DM dm, + PetscBool insert_essential, Vec Q_loc, PetscReal time, Vec face_geom_FVM, + Vec cell_geom_FVM, Vec grad_FVM) { + + User user; + PetscScalar *q; + PetscMemType q_mem_type; + PetscFunctionBeginUser; + + PetscCall(DMGetApplicationContext(dm, &user)); + // Setup libCEED vector + PetscCall(VecGetArrayAndMemType(Q_loc, &q, &q_mem_type)); + CeedVectorSetArray(user->q_ceed, MemTypeP2C(q_mem_type), CEED_USE_POINTER, q); + + // Apply libCEED operator + CeedOperatorApply(user->op_dirichlet, CEED_VECTOR_NONE, user->q_ceed, + CEED_REQUEST_IMMEDIATE); + + // Restore PETSc vectors + CeedVectorTakeArray(user->q_ceed, MemTypeP2C(q_mem_type), NULL); + PetscCall(VecRestoreArrayAndMemType(Q_loc, &q)); + + PetscFunctionReturn(0); +} + +PetscErrorCode SetupStrongBC_Ceed(Ceed ceed, CeedData ceed_data, DM dm, + User user, AppCtx app_ctx, ProblemData *problem, + SimpleBC bc, CeedInt Q_sur, CeedInt q_data_size_sur) { + PetscFunctionBeginUser; + + CeedCompositeOperatorCreate(ceed, &user->op_dirichlet); + { + PetscBool use_strongstg = PETSC_FALSE; + PetscCall(PetscOptionsGetBool(NULL, NULL, "-stg_strong", &use_strongstg, NULL)); + + if (use_strongstg) + PetscCall(SetupStrongSTG_Ceed(ceed, ceed_data, dm, app_ctx, problem, bc, + Q_sur, q_data_size_sur, user->op_dirichlet)); + } + + PetscCall(PetscObjectComposeFunction((PetscObject)dm, + "DMPlexInsertBoundaryValues_C", + DMPlexInsertBoundaryValues_StrongBCCeed)); + PetscFunctionReturn(0); +} diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index c2f6fea306..8ef8aa7345 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -716,6 +716,10 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CeedOperatorContextGetFieldLabel(user->op_ijacobian, "ijacobian time shift", &user->phys->ijacobian_time_shift_label); } + if (problem->use_dirichlet_ceed) { + PetscCall(SetupStrongBC_Ceed(ceed, ceed_data, dm, user, app_ctx, problem, bc, + Q_sur, q_data_size_sur)); + } } CeedElemRestrictionDestroy(&elem_restr_jd_i); From 72d852595123308417bcd99e2ca111c71d29cbff Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 28 Jun 2022 16:38:22 -0600 Subject: [PATCH 116/172] test: Update data for strong_stg test - Previous data was from using `bcFunc` and was actually incorrect. --- ...uids-navierstokes-blasius_STG_strongBC.bin | Bin 6360 -> 6360 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin index 27137a7e621b46bc239326fc497b0b4769d3cd26..fba7ca225f661bf683eff427b8555f67fa2a29c3 100644 GIT binary patch literal 6360 zcmWldcRbZ^6vt71$@nUxL5UXGgoJ#buXV3kWL3(jl$ETAC`B?ak|-5Y6bXe(_}Vg( zEh}3^R#u_>j=%2f`+COvJkN8^`J8hZxO@y47&h<`m>-8_c4!pe^_ zue5^iJA&J59SE%aO1#0_#Xoj$IOiaqri3(;CF|Xm*K7)g2G)`?AWg3g@fba-e9JC2$B^-@**M!}Uy) z>KSI-+P%p8?%fjv)|#C6p`(Yu5(17(FxkOA-7ol2Ln%|n%a<%Hr^zzAteb^_7GRJ4 zll(q$7aRfTTz4#WGR*N98IJURS{881=BranfJ5nj=v9vY5O!vk3|+%-Jv&mY z^llnBCug_Wz`PWmwG*=sVV{AW2aeX2GWpxSGF?g_3%7V~N~4nm)L3_=h439QO9AWAa_I64WF+3fg|iqA^l)w_6df0l&bk|A)|D}hz>Hly+Fz&UwV*rXP1;M?BI)HI1_DTdzTzZ3^{_At9>DiGM)9DCcq z0N@;6wP+C_hqg84KGoGHOHJAM{s{{a*yr79t4ElDqnRUfTp#T!=zk??f0j(+`}W(^ z5dkMhw1#R19E(S5yA2Wh((-_$$TqxC^AmmIqziCJTX!vZ$M>6)4y&pF$IU!;@j3ph z`-9R_2LoCDz4(Mx(G3D?j)H?+NNA6_$C}e?GDmm+xWeaV0;|z6nUE?2 z&gFe)p3)FcTEU_|KObJM6;Q63^&fC5^zvi=5?Fp}k0Zq$IP{BggHI9L8Lc#*HG-@# zE~DJHa3io!#-jU0bb-TBwUmK z^`DRQ?}-6F&@gO~yh~ukkE~t#eSvf8M#R=2w0Ea&+n6Sutk%4y?-!Q@=i#i$G3*0Q zPk>AJW+Z+w@O*=W3t6?Ji0sj{C$QRxpRt1Le(k4XT5=Q#p9y4tu5b!(?jJam{22VC z^~?UaAc)se%Z2TV`+$?7G@O`@M2eYCue6zxHQ9a6j>~DliON^>C4tlMS-YwLNjLA< zeEoY4`LV)IGx%Kufz=fpxG6ZhejfRO%acfgts)>+{2bm^P{dNr{TkwrK4Ev`DC7+t zK1zSJ#^%(_K^4`b@KB-#%7Cru(wg2 zc=X~na4s}{yYm^z+g&nAcOS&NmF?NL=-CliXKBa54eRGVq8!q91vokUf1Xz(#gL_U zGy3{uz3jX6D$^P8mteM+yR?DR`1CKA5mHv?{I+Xj5ZT?I;O%s>*JY4*0U!x z;bS-9+{r~FzpH@L+*NP1hHx+C%tFl|vgMCcMn;e@+&}BJpYBb-(OfwqfT3ftiv!*wy`+l32^%Jk49G@b;)1OE8a0=U1QP3_!xh%=f;d?SSWBtmMOh@Nd0Gy zVz+!P*=}>|rXF7;f%UvM;g^N@qKQT^-Q0!LJ$mO-lEd*qL08k79p4FTWG#PFBKUuI z$rI<)L72yOdi3NDq_H+Q-?zd=*8j2j`&?v^zy=Md9EHDtGa0P*W)IR#3f6zQ%Zu!E z+@i)44?5Ye>r|Y>4(s<2%zZDA=Eb!F8MEK`AZ6xP_NXXD-7=C9>q!x@+3`6)sLTsw5E>D ze(@P1YabTbGUY%%9^)`}m(KuB$EfSmzlh>Dux(*SEZJptt1go52!Rc|*hxt>!}{&t z_VwBzN_q|xzo0GNUrV`vbdL#vP4WD&)V=_m11mw-dtjciWgce%qDY(VSiSL_ti1R; zzJ`?!oK?41u|dFTsw)y;L6q_yEob#ZWM@yuGa~&waQ@yK{aFj^d5YS?cOoj&GhSgj z58k`zK&3hq0jDF6gTe*(cJd*&=mD4~Xl|Vpizw>!vA*i_WVM-W`b<10fqnChTDT?z zoGdH(GHpcZ80%Q-JVACl*q=Hey8xWkbfe)kSkG&A+W#V=@&>cX)bGQ)8O2Twot_2G zvVq40{C59Zo;&pq=Bc)tF~uMXg*2FwwjxR6LA@wl(D&~GVyserbSTT3$D-AuZ>I-mO@tG=NL}mZFWcA#R?3HPL+1C&ZoUoltwV;zUi3_ogig0hUlP-04 z@sF}^*d~}J32a)T;r%{w;E2Avr*I88lRl14rifxQWY*WPn{1SMuXWMl34x8fRDLn6 z1WqAmUfndJ3K=}eKRiKpN|=}WZi75I!XhMXx(ztnb<5)e5LIyhR_yT-UK%*9@>><` zn3l`U6gC9T-Y~^0kT)V~UJb~bA&Sqgm<+GWWKYC%gP~QZA11Ca>?|ArPJ`HG<9S3C z*Eu@2U4!gDIDh@H&%#e#z~6L1E@oK%ISJ`h-&Pm**?FU{F*}`+b)8Tq=;TyXJ9-bg@b!)KDAxka`>9w* z4MY@+eH-#mnv-)b)EL8ui-9`NLh4l+SzNUhR$< zMc)Jcy0ArHANYy8q!~Y#7CxZ!%HfB&2CQ#&Y3I}?us0tJzq5h<&;+@S#9&?5d!P7T z2Kbf<{i7QXI=06hI|21sm?&f85y*!P4nnVlhY5^c_9@P7C(QGZ+Q0M=ugURq&Ze%B z6^}Oj{1n~=I-tilItu5c@n2NtSHOR6bTh5s&c$E6?hf0cvJv)ecIyzZg?;6o_+2OP zzM!iUQM=p72aKFn9)o$n;n_dKAV*+jgH%Zy57eeAx8u#ll@_Io*J zpnpO1ji#J1E^+<35;wkTXaBe)X)Au$t!l6NZAJoXe!e%7@8Y`s4|vSI@R@hgvR|1c z$cu9_MH_u-1XiDYUu+2Cq1K?+5Oi~nW|&-D{}`W-`$)6FSjnRE7s?O30zGfZYngnz zUN;)E2_^N)nzutv^q}O>0(aZJ} zI39Wozb-@Ft-*Gb^$zT#R67wKg_s1UEtU@EkXheb=L-_y{hPPsa)!N!aaoZQaVv=9 zO=n+Kbpd|!Wx@bYv^sFAS`0g z-=F4Y9`FyE5>4=tBVxZOuKw+3ESdLA&N%dgD%i1@QJ5bLW=~T{~k$w1W zNA{9@bI5zG%1&I|t-uj%G4OeXFqTKIXmJ(~k#d*|KI{P;DY?6TP-k|&w%@|01pA1c z%$=S_Jkb-(tCIF)_DN}_jr*SSB=N(yM7+G$ABZVVOt~A7u__v{+kupM=0rG2`k#QJMhnI za*fQ1ern>~4DZ)i-m=Vn1jf}5N79DT?kdmSV&iP2XI`l)Pmdn>$)2NDm7q6aT-8BW zrxBm`=%Iq>0X#9@off5W(O z$`wC$bP#Pl%P81G-j^?|573ABYFFj;a$5Im#t;18DChvUPwNwdLOd-bj9p#r7{m`2 zV4eqYHjs7H))@3LV)0v9NG%di-R$8e`hiUQbE`Yw5B&P`qi@IAd4Y4*qYCs-j6p({ z+WVb+#QxveH7C&f4o=ax&%L4V`u&b4p#~lLo|vvIHGrq*FFJKDFaxKg@^(rS)H(MX z-_W7|jTpbfeDfBPIUD!SOm>t^-SU==@#`~)_tf0lZI-~fm>?zc1xahyG{ve4lZiQg zt5KVv4(dEqX7M`)IMeZaFDN1@*B31dFQxI!d+%R5o_Y;^N|CJy_fN>vbgqh|8sH4d zg{o~uav!^oNEio_&)L7+Qu1Yh_>g(3o}UaHN4c@uE_C?Lr_FJx-^i4q>_2W%kT1Tx zekQAN3^-$54gbQC?9lLE`e{`>tJ}d?C@&iBL%?vRk_qY)ogKGX!+_JpJn6=Q6ce0B zqoQ-kB$`I-21^5YXQLksMQ4HI#;bLi7afuMu;W5fJejW4>$=rO7yLC-ZXbCGIA8OR z_}d}{&(|L&@B?_xY_s#60OZ|fhR>NU&<~C~G7hvr9*t-_+rpZLRN8uT-5=MIk0Xpu z8)bvObsiLQXDGhtnWR_2Y?};GL`#vLPKgBiRbhgiK5=TlNo)a$) zUcz&2t)d_NErE(*p@@)Iwt8F*g<|9NaPS{fVv{ImA0`%8mZRdx@v>};qtNOKR<9s1rVGCGyE`O!)T)F-Ct5)KsLM1}M_#UqWQkJdJQ*-DaE zyOIT?c0;|E$(fK$1x|aYiA)kwZz-5j_07dg9i-K>aDEe)(}JS*0@MQ4tPx?6fVPp%K(Cp+6cr zEtn8RP|f43_BOmCmizaEt?)i$JcD}~-vg&)T2K-Pj{mmWSbjupNp{%vBLu(oU&(l) zDi7G}{3w-uy?(sOO3#!+l;uPLrqZ8eRgo2+mIOP2&HNjB)2<1eIaSTIZej2vXRtx+hY3h`=TdFO`-peRQ>M!} zq8e_^&U{irew_MT9rW)wa6*n<7lFPhjK}ur#)pV{PLy!iTaQ=lt|+~d4*C9v)t1{m z*#x#M;k^HW0C1Eo^)myJwlvlE??N~plFwXSk_x&pWpF?(qY*d~HCFGf5Y?O>vAN!a zY%FC>kXMI3eU{MEqKN}X$=O(r2{;R5WyPU*MfFm!w~`-lo_%0c2hQ*LGf!HfKaP-{ z4UfHuwB;^vRNi}vza)-1v>XFnm?{Z3x!ehy-S@SY3=q|Zx2T!#4cSn7yW$}`#ws1~|SQ z+b?w>s+DvkH9rWiGAl@u*SZV-DpTt;y@9}%&PS{0taH+C1}V%UZJlFRmt6z#cjmlz z&KW>GIZepUJh%azewXily@={2>ijvbk!(n)P+fi{3!Kxfm1etuLmSI9X-3owOlxuS zLwMyS5xZlNt-x_}-~9sW_r=`7UHzbA;q2i$?d?e0x;@2A`2n8$<+5rt`+ndkzr`q} zzp`URO+EeJGfR}fSoM~NM0nWQ38@3eSP)fZ` zO(D)~sD))i`eapL}L-vTX9FdYaiYuW36rH z0CnKhafLwtNZ?f3aLH2;)xK@k5uL{?c5G5fw_yVg-85qf`m2RL%czxj;2hXuG!c!o zQ|(=M?xEtd9Vyuf+)!uyG%#Z*(}H_FD^7VWhp1<*IQwNb<8u#N1dBh4!}zUVO4ZN@ hhRqr3*+O4+E~D|bbqm>48X#cS(+!*^&8;op{|5}p4jcdg literal 6360 zcmW-lcOX~a|Hq9)$$cYKDk5nZg+iorZuVA0DJ?0Ilo92VO=cNcp+Z?D$*RzO+o9}? zkdkb|mC?}e_5J;I|2vQSe4XcOoEHnfhY<@4yD&*ueV5%V=ZR>mQx;KHP9$+_!Hv~Z z7SYi~3WbHoNTR*j)J)(fqHhg;x*}su61hi$s(K|z!cJqy-ps9twzhh+r)?ee_rOZ+ zi^GZ3oUBA4-(fD22$12lP5%z(Ptb-)}>wR=BdlhiXqj>VRQ{TqNH#dDUN4~ySE_;U8l0?U`r)E3x$4oRQpVMjvp73l|&RsE!lQRNpy&qkP{# zHv9b8A2U>c=0Ys665sfK&A=mL(O@o;)2pu02L0A%>^>f5cITbeD$x$S=Ujy61c2CQ00B ze6mAw3DIXl8dvDQpqA{)1aG>gQ~f>VdYKZsB;h9Su%sjj9MMgIw=AfB&39Avq2H+t z_D88+JxwIh#kezS<^mj<&z2rj=zl!Gar0&28T8MSzalZ0#B$L&qJz_E~Ku-`>XH*z~V*Ai4w`fRUP(g)!9B@W!)37q7s z+9^C(;J>tF356&sw⪼ZQ8T{>3Qjj`bi?8#V>1A4$H zvtx3obX#TQbma(e-WW&>Ws^iwdz}vdG~}tIsn$zHDkxpH}NkMIAJNK z+=9SSeWau&frYkt3k00JOEJrPB=@>NT)L$`?9aADw3o_;(&Z?;)lm2?Z(Jnxl5@Uo zW3M>)de^cg%}){!2S;4){zmk9pEiZ)CMPc_)EJa_fRvlhH0{252ok&qabVR_L z808Cii|u8_e|H_JiU_{k1nL!VDocVgBY=~Tf6{(G-cftnQKov4dd+Py=GxK&oG;fe z$iX*F`PABY6GYow8|rJlh2^Shq=j^u)X#k-#=;Abhi*1$%=!dG-|f;-&XvZJ87_?4 z&n8rjd00~&*Kd;elxF|WGz&QNfk%H|V_DkwQ_Y?^imBYYLHx%s)VB#K+n^wl$Y8xB zU6Fz43+c-t%OY5Dy~uF?$W`j+&{fl=t$mO`ht8~O903m7S<69DEYr^4v%R>EsufPG zJ7oWuBzk6>*%UJo?QMSRi;@PGpSsrmQ591a1(tp5cN>sI-o?TE11u!*)Gon!+h;`E z9$cH&w}@4e+x@>EixGY2l1MOHAeJBE;e8eRk9wc*Dlpx( zmn8a1ew{7?KYAxgw^^UUif=D!v9A`Rs+~R8nT-UHME=DS$f%ekUU;UlYit3|ugZt# zwqay(&+tcqCAGlKzi_)A&hhNK-dTMG(cz)j7bL5(lCoLB<+QU@138~!y;_VUzU&IM zo&-O7f7hH@a=mo6Wcb`@w`qGfy0}&FF;|4u~ z0tB`6UC7g-8T9%ds`F;O030*oO;kKqjhub_IQ~DXrO4&9yeRnS8^?gbh6dnF$n8e) z7;Rvjwm14s)yi=s>@YJTiPsWQpTEtJL`m0RpQ|>a?FCpq^*3P+t{F%F?g!L@bnMEB zzI7xKsJ2yk%PgY9?yQL^(88$Xt?QqGt5j16t?9Q#mL!IBHe1)fK(vp*`kRT;n7n_= za{0_As>;)nX`%^!Eqx|NJ3^ggPG5Jo{ekGqX34iz1hHoNuJhLFRn+gx9Aj)xVxZ0{ zj<~3K1E=Qk*{%OD8D^U#{q+#_v2mHI&G|zTqhc8fS@%HC@L11)J61mvl`v)#NmVyg zDDG)Ag!9+xCFEw1L{;2=&Wb|d=mjQ9(O9eLtGC=~CN=N#>nYzgh;P`6A+{3e%VAP& z5!|8pP$j!IgZ~!Q`ty(QZ2DJ{78$~$I+1vrNc&KvS#ZJV~@T^F0FKb5~$gjiIOL^z++;pGM3I4hbwD#hwn zw4q;gGSwqdoc+awmn0^x3mQ&C-SVD0zFfnHHC=n>)C60pDxXO)d%7NUyr_~A@Fs~` zagS{k)4=JgW~Bea+L=pbM5ZeB^PuNqn;tjxjq4)pJ@0_Sh~a;pfYnnMZ=Kw?k?LIk zzS-#u=$V*MUrdI$`55nKRiLnDePN@-sddy__4VhEw}77NB5oFY$kUpsnWPp9e6X;8 zCczeKSI7@M*d|LY+ip+!JJA4~gpNd$m%tHns&D&;)t@;soU+rXFUNg^Fqau{-k=Hg zZs1@)!rBFE_6=M7_93co$DnIzX(({w2`Ro&;N*943UXl0 zaf9Z4)jBqwI`Z7gFzR+tH!cE#O)E=uf8~@6OjEhXpeMxghR#GGpe_y+)`U!Bd-5!^?VfBBa zZCaZusEJFXRkF{$&_AB*1VRD+LHVAk|kwof&tD4iTz-cgO)^@=f4&Cb`9+*?p zM&u>!pZ9@NR#DgS3eo;Y&UuBt#ahNH1zjl#)K^`u2hIX8r!+DzXl>8{U3pp0$b*P> zU-c{Xt|7!tzHyJnJaa>ChEmgGW9Ykk(j@Vv*P@UQj(w-e9s>`0sXBgt0rom_UIzSfw`WjTIoR#2Zf z%auSfRBN2Q3ZW19=M8*_yo%XXWD`O&KA|Vs^HuuQ`h{xX#8Ve z0rx^P75yPUOEDbgWje9QZ+ijgY|a18=Y(kLx!hHgr)b{l;IiOmF*H1}Vx@;<4arcb zE;zp)^1;86l{F&{uRSKV6k;5T0(LCF`}_S0Nxao=9_CmE&J@RmW+wXU+-<=zYJ^oss@ zwtI*=<4ubNw%=rPP`t{Sv~hwa2})3b(u8-{gDsb^gvBxCDvl2Kf z#iL1fl9*GHY);vNXy(4?uPJXa_nRTMLVt18KNz4{bT61>$lVXry&ecY6bKXJ`iZwn zbo*~Q`337ymjQLx@vF5`3;lMH9&%^%$Z;!C{^PXcMhmnUrfVIZ*nNWb9t3YYAKn9D2<3)Em6}C8tHEQzsSsp~8)?9Qt2{+h^M{sEZG*5~mNJ z!D6D5j;_Vm(7S8wuLrtk;eNzlYF(@*8LWk7--f`q)Tz>?pfy-L_~2C|DhGWpkLTJa zb`UtlT??L8hz>OV?|ip1mg`Wg6{0^Z(xbBEc0pp z)Yd<>KtQsM`h7R6Zic1 z-d*4`;rOcDPb7nbN4lz}9XQ*A+TfKfC2;Somv|DIxb{~+sW%!pfoXJDD< z8!H~K9$3LMLFHY(>Dv$s{ieFNz4q-X;M_PX{5cuRs(bM|+O(oiW)JT~spSJ_HnLSR zlw_=o!4aFA5ncLOu4A|w@7eVznsakBnmV{{f%^#!eseBMy8(F^B&Tmzv;ptm(9fAH zFF_Txrmo)R0ClJ~>{THj%;m}KgVh7}Sf2f9P~lu6>ep=u6_!3p67ySTS0WX~lDQ6&SSUaBo8<_}SglFvX=ot& zO^Uh4q!U)GEn$=Csz5UdU+%0*NrZY(u5iWUE~0~nHbgAdW7K1~OU6-@VxCC%R||mo z=Dn*?vC2{4tTyFUDZu+TPp=n$7=Z@QRZoPpLZ4ouZ7$tfNix>t=0u0WJMN9tp_6H! zuuA`wo2wZoqVFG;-*n^=$x!mq{H+CX3wb29JH-)`Qx_U?Hx*LU&mD6CK2Ud>#w6FI zfZoJs)pna_vD&HYIdKtsXej>bg`bzfCx5z!4*ZrU84N#0L}vh^i-%WA4oqTXrqvNH zCy!>cUadV@5A|AQ>YYz5%(#t)`fsT%GB7qf#%iw=JS&hM{he|2IlL;U!_mP=HnkvQRA5AUk352;)9Ax{%`2edu?i`5;B zo}S>bLZh>5uh(5hBx40FqT+s@WC)(gFW}`xbfLH7?iv-W3;W~vVw zixC#ewERjkR@K|D%g7@c>y9Ngy%&SH-Kq7MSHfCa&4k1I-Dp0X=gQn(F_NJ(ns?iO zA8^W=9;mWoy}cpRynLZl_1;7B3HH#(28<6R7sY`diL#zPU##6#&sUaXg~p}B)Wj{1 zkqqL$J=;S5k&N{$&rd${Ky(3buwuX)to`U+9=`_%npM_~h4dpC2HH`5y9|LdIQ+fx zAlAPw9U(;?qZ)tgI~7$4dcNOZ9k{O!^eo>iXxG5Hr;nX88c9QAQaiEX5S-61`7`x6 z7wEBBT^kt-dg8a1E?vMnrv*$i@+Q#2+|KwX`B0aP8cNPqHv;FEzA-7`?=91L0RpW4fiQC`vKe57k8f9^t;+~<=D58poJ(FYzSZFFuPm&BFF$H;? zgy{UZHM|m2SSKyz()y3#Xl9>pm;3&6B*XY9Yx%`hz{$^u=rF|wZjpOA(x<2oHL=6d zFJV3zQ9ZHgeHrMPa(kL_9qT1u94a|ZqG5|i4$qR);Qm)8qzYUCUc+rIHf5Bps@@OgNuHC=e43c58@f1rA2XK~JrXnt2gL`9Q3LP#~$?Z77#r6u|cnK(B zsI&JMiX%trvEIwh>A`F7(NLxZ)yii89In&68+MY6wOon5`ATpQQPc;b@CTeol5f1AJz7EM(@Ia zW8>uJPz-un2B#vtu|fKd=e7zaRP1#AEuW3Cz%e*qHv@I+eh97O1o!i>|I_bCcl0e; zX)b$o9CW-pdRRkUh*dXZff3rvCsOM*nakCG<0Uvz*oN)x^o%<3er9?Mjy`Tv-M%@q^kTLB&o4yJB z_FwWvm2At@fC2fbWfe9C{Y=7Y$3w7>%gVp+*pnG>z zO6*^(n|00fuWtqxPrcl*>RS%U5b?(*IVP~LSrdFz@GRC>c`D#FIF0^!2;Vf*j|4qF zEAO^ezAqqP=BWkqC%T_uzEUu|^vVt187f_~M|rWH zjMOpqQU+GqoT#txT?YEDy>Uad1?*LYZr Date: Tue, 28 Jun 2022 17:25:38 -0600 Subject: [PATCH 117/172] fluids: Use ApplyAdd for dirichlet_libCEED BCs --- examples/fluids/src/dirichlet.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/examples/fluids/src/dirichlet.c b/examples/fluids/src/dirichlet.c index 8ba01f392a..f4837567c4 100644 --- a/examples/fluids/src/dirichlet.c +++ b/examples/fluids/src/dirichlet.c @@ -132,20 +132,26 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, PetscErrorCode DMPlexInsertBoundaryValues_StrongBCCeed(DM dm, PetscBool insert_essential, Vec Q_loc, PetscReal time, Vec face_geom_FVM, Vec cell_geom_FVM, Vec grad_FVM) { - + Vec boundary_mask; User user; PetscScalar *q; PetscMemType q_mem_type; PetscFunctionBeginUser; PetscCall(DMGetApplicationContext(dm, &user)); + + // Mask Dirichlet entries + PetscCall(DMGetNamedLocalVector(dm, "boundary mask", &boundary_mask)); + PetscCall(VecPointwiseMult(Q_loc, Q_loc, boundary_mask)); + PetscCall(DMRestoreNamedLocalVector(dm, "boundary mask", &boundary_mask)); + // Setup libCEED vector PetscCall(VecGetArrayAndMemType(Q_loc, &q, &q_mem_type)); CeedVectorSetArray(user->q_ceed, MemTypeP2C(q_mem_type), CEED_USE_POINTER, q); // Apply libCEED operator - CeedOperatorApply(user->op_dirichlet, CEED_VECTOR_NONE, user->q_ceed, - CEED_REQUEST_IMMEDIATE); + CeedOperatorApplyAdd(user->op_dirichlet, CEED_VECTOR_NONE, user->q_ceed, + CEED_REQUEST_IMMEDIATE); // Restore PETSc vectors CeedVectorTakeArray(user->q_ceed, MemTypeP2C(q_mem_type), NULL); @@ -159,6 +165,18 @@ PetscErrorCode SetupStrongBC_Ceed(Ceed ceed, CeedData ceed_data, DM dm, SimpleBC bc, CeedInt Q_sur, CeedInt q_data_size_sur) { PetscFunctionBeginUser; + { + Vec boundary_mask, global_vec; + + PetscCall(DMGetNamedLocalVector(dm, "boundary mask", &boundary_mask)); + PetscCall(DMGetGlobalVector(dm, &global_vec)); + PetscCall(VecZeroEntries(boundary_mask)); + PetscCall(VecSet(global_vec, 1.0)); + PetscCall(DMGlobalToLocal(dm, global_vec, INSERT_VALUES, boundary_mask)); + PetscCall(DMRestoreNamedLocalVector(dm, "boundary mask", &boundary_mask)); + PetscCall(DMRestoreGlobalVector(dm, &global_vec)); + } + CeedCompositeOperatorCreate(ceed, &user->op_dirichlet); { PetscBool use_strongstg = PETSC_FALSE; From dcd0d0f3054a9244665ebd1f4480c81f2d63fee8 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 28 Jun 2022 17:36:24 -0600 Subject: [PATCH 118/172] doc: Update release notes --- doc/sphinx/source/releasenotes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1dc6d48be7..f73c5cff24 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -28,6 +28,7 @@ On this page we provide a summary of the main API changes, new features and exam - Added various performance enhancements for {ref}`example-petsc-navier-stokes`. - Refactored {ref}`example-petsc-navier-stokes` to improve code reuse. - Added Shock Tube, Channel, and Flat Plate boundary layer problems to {ref}`example-petsc-navier-stokes`. +- Added ability to use QFunctions for strong STG inflow in {ref}`example-petsc-navier-stokes`. (v0-10-1)= From cf3d54fffc9ba47e12bc2058a7d1606296c22d88 Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 30 Jun 2022 12:06:17 -0600 Subject: [PATCH 119/172] fluids: Set all outputs in STG QF --- examples/fluids/qfunctions/stg_shur14.h | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 0224e59897..a852a0bf8a 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -428,6 +428,7 @@ CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, bcval[1][i] = scale[i] * rho * u[0]; bcval[2][i] = scale[i] * rho * u[1]; bcval[3][i] = scale[i] * rho * u[2]; + bcval[4][i] = 0.; } return 0; } From a939fbaba829436be4179987cb24722f19485c20 Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 30 Jun 2022 14:08:35 -0600 Subject: [PATCH 120/172] fluids: Minor improvements for dirichlet and stg --- examples/fluids/qfunctions/dirichlet_boundary.h | 8 +++++--- examples/fluids/qfunctions/stg_shur14.h | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/fluids/qfunctions/dirichlet_boundary.h b/examples/fluids/qfunctions/dirichlet_boundary.h index 5e7d23dfd8..0e87d3731d 100644 --- a/examples/fluids/qfunctions/dirichlet_boundary.h +++ b/examples/fluids/qfunctions/dirichlet_boundary.h @@ -14,8 +14,10 @@ CEED_QFUNCTION(SetupDirichletBC)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs - const CeedScalar (*coords)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; - const CeedScalar (*multiplicity) = (const CeedScalar(*))in[1]; + // *INDENT-OFF* + const CeedScalar (*coords)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + const CeedScalar (*multiplicity)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + // *INDENT-ON* // Outputs CeedScalar (*coords_stored)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; @@ -24,7 +26,7 @@ CEED_QFUNCTION(SetupDirichletBC)(void *ctx, CeedInt Q, CeedPragmaSIMD for(CeedInt i=0; i Date: Wed, 6 Jul 2022 16:53:17 -0600 Subject: [PATCH 121/172] fix(fluids): Create and use basis_xc_sur for dirichlet bcs --- examples/fluids/navierstokes.h | 3 ++- examples/fluids/src/dirichlet.c | 4 ++-- examples/fluids/src/setuplibceed.c | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index 85f4c6d6f7..8612a7d3f4 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -131,7 +131,8 @@ struct CeedData_private { qf_setup_sur, qf_apply_inflow, qf_apply_inflow_jacobian, qf_apply_outflow, qf_apply_outflow_jacobian; - CeedBasis basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur; + CeedBasis basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur, + basis_xc_sur; CeedElemRestriction elem_restr_x, elem_restr_q, elem_restr_qd_i; CeedOperator op_setup_vol, op_ics; }; diff --git a/examples/fluids/src/dirichlet.c b/examples/fluids/src/dirichlet.c index f4837567c4..18e0c7c391 100644 --- a/examples/fluids/src/dirichlet.c +++ b/examples/fluids/src/dirichlet.c @@ -80,9 +80,9 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, // -- Compute QData for the surface CeedOperatorCreate(ceed, ceed_data->qf_setup_sur, NULL, NULL, &op_setup_sur); CeedOperatorSetField(op_setup_sur, "dx", elem_restr_x_sur, - ceed_data->basis_x_sur, CEED_VECTOR_ACTIVE); + ceed_data->basis_xc_sur, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_setup_sur, "weight", CEED_ELEMRESTRICTION_NONE, - ceed_data->basis_x_sur, CEED_VECTOR_NONE); + ceed_data->basis_xc_sur, CEED_VECTOR_NONE); CeedOperatorSetField(op_setup_sur, "surface qdata", elem_restr_qd_sur, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 8ef8aa7345..05d3dc9985 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -585,6 +585,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, CEED_GAUSS, &ceed_data->basis_q_sur); CeedBasisCreateTensorH1Lagrange(ceed, dim_sur, num_comp_x, 2, Q_sur, CEED_GAUSS, &ceed_data->basis_x_sur); + CeedBasisCreateTensorH1Lagrange(ceed, dim_sur, num_comp_x, 2, P_sur, + CEED_GAUSS_LOBATTO, &ceed_data->basis_xc_sur); // ----------------------------------------------------------------------------- // CEED QFunctions From 3b0d37b7b48e6b9e226afa94eeb793b3e14bfbcc Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Wed, 6 Jul 2022 19:58:55 -0600 Subject: [PATCH 122/172] {cuda,hip}/gen: fix incorrect quadrature points when all bases are collocated https://github.com/CEED/libCEED/pull/1009#issuecomment-1176751436 Co-authored-by: Natalie Beams --- backends/cuda-gen/ceed-cuda-gen-operator-build.cpp | 5 +++++ backends/hip-gen/ceed-hip-gen-operator-build.cpp | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp index 5a78bd8d15..8fc35bfc38 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp +++ b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp @@ -815,10 +815,12 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) { // Find dim and Q1d bool useCollograd = true; + bool allCollograd = true; data->maxP1d = 0; for (CeedInt i = 0; i < numinputfields; i++) { ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChkBackend(ierr); if (basis != CEED_BASIS_COLLOCATED) { + allCollograd = false; ierr = CeedBasisGetData(basis, &basis_data); CeedChkBackend(ierr); ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); CeedChkBackend(ierr); @@ -847,6 +849,7 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) { ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); CeedChkBackend(ierr); if (basis != CEED_BASIS_COLLOCATED) { + allCollograd = false; ierr = CeedBasisGetData(basis, &basis_data); CeedChkBackend(ierr); ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); CeedChkBackend(ierr); @@ -869,6 +872,8 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) { } data->dim = dim; data->Q1d = Q1d; + // TODO: https://github.com/CEED/libCEED/pull/1009#issuecomment-1176751436 + useCollograd &= !allCollograd; // Define CEED_Q_VLA if (dim != 3 || useCollograd) { diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp index 9db75a5ae2..b04f589092 100644 --- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp +++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp @@ -814,10 +814,12 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) { // Find dim and Q1d bool useCollograd = true; + bool allCollograd = true; data->maxP1d = 0; for (CeedInt i = 0; i < numinputfields; i++) { ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChkBackend(ierr); if (basis != CEED_BASIS_COLLOCATED) { + allCollograd = false; ierr = CeedBasisGetData(basis, &basis_data); CeedChkBackend(ierr); ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); CeedChkBackend(ierr); @@ -846,6 +848,7 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) { ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis); CeedChkBackend(ierr); if (basis != CEED_BASIS_COLLOCATED) { + allCollograd = false; ierr = CeedBasisGetData(basis, &basis_data); CeedChkBackend(ierr); ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); CeedChkBackend(ierr); @@ -868,6 +871,8 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) { } data->dim = dim; data->Q1d = Q1d; + // TODO: https://github.com/CEED/libCEED/pull/1009#issuecomment-1176751436 + useCollograd &= !allCollograd; // Define CEED_Q_VLA if (dim != 3 || useCollograd) { From 86c7fc999aa022469f08d96430426945f8749adb Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Wed, 6 Jul 2022 20:00:56 -0600 Subject: [PATCH 123/172] examples/fluids: fix spelling in output --- examples/fluids/problems/eulervortex.c | 2 +- examples/fluids/problems/stg_shur14.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c index 7c9748ca57..a1fc15436e 100644 --- a/examples/fluids/problems/eulervortex.c +++ b/examples/fluids/problems/eulervortex.c @@ -164,7 +164,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextSetDataDestroy(euler_context, CEED_MEM_HOST, FreeContextPetsc); CeedQFunctionContextRegisterDouble(euler_context, "solution time", - offsetof(struct EulerContext_, curr_time), 1, "Phyiscal time of the solution"); + offsetof(struct EulerContext_, curr_time), 1, "Physical time of the solution"); CeedQFunctionContextReferenceCopy(euler_context, &problem->ics.qfunction_context); CeedQFunctionContextReferenceCopy(euler_context, diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 6882daf66e..ce19df942a 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -390,7 +390,7 @@ PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem, FreeContextPetsc); CeedQFunctionContextRegisterDouble(stg_context, "solution time", offsetof(struct STGShur14Context_, time), 1, - "Phyiscal time of the solution"); + "Physical time of the solution"); CeedQFunctionContextDestroy(&problem->ics.qfunction_context); problem->ics.qfunction = ICsSTG; From 3796c4882ea508e8c50f59304fb2ef31152c921a Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Tue, 5 Jul 2022 13:10:36 -0600 Subject: [PATCH 124/172] examples/fluids: use DMGetCellCoordinateDM to handle periodicity Note that without -dm_sparse_localize 0 (default=1), the cell DM will only have cells with localized coordinates (and there is no vector representation that contains both). --- examples/fluids/src/setupdm.c | 1 + examples/fluids/src/setuplibceed.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index a8a81d6c7b..e6aec20af3 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -25,6 +25,7 @@ PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, // Set Tensor elements ierr = PetscOptionsSetValue(NULL, "-dm_plex_simplex", "0"); CHKERRQ(ierr); + ierr = PetscOptionsSetValue(NULL, "-dm_sparse_localize", "0"); CHKERRQ(ierr); // Set CL options ierr = DMSetFromOptions(*dm); CHKERRQ(ierr); ierr = DMViewFromOptions(*dm, NULL, "-dm_view"); CHKERRQ(ierr); diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index 05d3dc9985..488d9fde64 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -56,7 +56,10 @@ PetscErrorCode GetRestrictionForDomain(Ceed ceed, DM dm, CeedInt height, CHKERRQ(ierr); if (elem_restr_q) *elem_restr_q = elem_restr_tmp; if (elem_restr_x) { - ierr = DMGetCoordinateDM(dm, &dm_coord); CHKERRQ(ierr); + ierr = DMGetCellCoordinateDM(dm, &dm_coord); CHKERRQ(ierr); + if (!dm_coord) { + ierr = DMGetCoordinateDM(dm, &dm_coord); CHKERRQ(ierr); + } ierr = DMPlexSetClosurePermutationTensor(dm_coord, PETSC_DETERMINE, NULL); CHKERRQ(ierr); ierr = CreateRestrictionFromPlex(ceed, dm_coord, height, domain_label, value, @@ -462,7 +465,12 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user, // -- Copy PETSc vector in CEED vector Vec X_loc; const PetscScalar *X_loc_array; - ierr = DMGetCoordinatesLocal(dm, &X_loc); CHKERRQ(ierr); + { + DM cdm; + ierr = DMGetCellCoordinateDM(dm, &cdm); CHKERRQ(ierr); + if (cdm) {ierr = DMGetCellCoordinatesLocal(dm, &X_loc); CHKERRQ(ierr);} + else {ierr = DMGetCoordinatesLocal(dm, &X_loc); CHKERRQ(ierr);} + } ierr = VecScale(X_loc, problem->dm_scale); CHKERRQ(ierr); ierr = VecGetArrayRead(X_loc, &X_loc_array); CHKERRQ(ierr); CeedVectorSetArray(ceed_data->x_coord, CEED_MEM_HOST, CEED_COPY_VALUES, From e159aeac15e8e644cec5ea034206454e5074ed03 Mon Sep 17 00:00:00 2001 From: James Wright Date: Wed, 22 Jun 2022 17:14:14 -0600 Subject: [PATCH 125/172] fluids: Abstract out spectrum calculation --- examples/fluids/qfunctions/stg_shur14.h | 49 +++++++++++++++++++------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index edc1302cad..acf329b005 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -87,6 +87,39 @@ CEED_QFUNCTION_HELPER void InterpolateProfile(const CeedScalar dw, } } +/* + * @brief Calculate spectrum coefficient, qn + * + * Calculates q_n at a given distance to the wall + * + * @param[in] kappa nth wavenumber + * @param[in] dkappa Difference between wavenumbers + * @param[in] keta Dissipation wavenumber + * @param[in] kcut Mesh-induced cutoff wavenumber + * @param[in] ke Energy-containing wavenumber + * @param[in] Ektot Total turbulent kinetic energy of spectrum + * @returns qn Spectrum coefficient + */ +CeedScalar CEED_QFUNCTION_HELPER(Calc_qn)(const CeedScalar kappa, + const CeedScalar dkappa, const CeedScalar keta, const CeedScalar kcut, + const CeedScalar ke, const CeedScalar Ektot) { + const CeedScalar feta_x_fcut = exp(-Square(12*kappa/keta) + -Cube(4*Max(kappa - 0.9*kcut, 0)/kcut) ); + return pow(kappa/ke, 4.) * pow(1 + 2.4*Square(kappa/ke),-17./6) + *feta_x_fcut*dkappa/Ektot; +} + +// Calculate hmax, ke, keta, and kcut +void CEED_QFUNCTION_HELPER(SpectrumConstants)(const CeedScalar dw, + const CeedScalar eps, const CeedScalar lt, const CeedScalar h[3], + const CeedScalar nu, CeedScalar *hmax, CeedScalar *ke, + CeedScalar *keta, CeedScalar *kcut) { + *hmax = Max( Max(h[0], h[1]), h[2]); + *ke = dw==0 ? 1e16 : 2*M_PI/Min(2*dw, 3*lt); + *keta = 2*M_PI*pow(Cube(nu)/eps, -0.25); + *kcut = M_PI/ Min( Max(Max(h[1], h[2]), 0.3*(*hmax)) + 0.1*dw, *hmax ); +} + /* * @brief Calculate spectrum coefficients for STG * @@ -106,20 +139,12 @@ void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw, const CeedInt nmodes = stg_ctx->nmodes; const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa]; - - const CeedScalar hmax = Max( Max(h[0], h[1]), h[2]); - const CeedScalar ke = dw==0 ? 1e16 : 2*M_PI/Min(2*dw, 3*lt); - const CeedScalar keta = 2*M_PI*pow(Cube(nu)/eps, -0.25); - const CeedScalar kcut = - M_PI/ Min( Max(Max(h[1], h[2]), 0.3*hmax) + 0.1*dw, hmax ); - CeedScalar fcut, feta, Ektot=0.0; + CeedScalar hmax, ke, keta, kcut, Ektot=0.0; + SpectrumConstants(dw, eps, lt, h, nu, &hmax, &ke, &keta, &kcut); for(CeedInt n=0; n Date: Thu, 30 Jun 2022 16:07:02 -0600 Subject: [PATCH 126/172] fluids: Add STG Preprocessing and Calc QFs --- examples/fluids/qfunctions/stg_shur14.h | 102 +++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index acf329b005..ce1b12aa6c 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -195,6 +195,106 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3], u[2] = ubar[2] + cij[4]*vp[0] + cij[5]*vp[1] + cij[2]*vp[2]; } +/****************************************************** + * @brief Calculate u(x,t) for STG inflow condition + * + * @param[in] X Location to evaluate u(X,t) + * @param[in] t Time to evaluate u(X,t) + * @param[in] ubar Mean velocity at X + * @param[in] cij Cholesky decomposition at X + * @param[in] qn Wavemode amplitudes at X, [nmodes] + * @param[out] u Velocity at X and t + * @param[in] stg_ctx STGShur14Context for the problem + */ +void CEED_QFUNCTION_HELPER(STGShur14_Calc_PrecompEktot)(const CeedScalar X[3], + const CeedScalar t, const CeedScalar ubar[3], const CeedScalar cij[6], + const CeedScalar Ektot, const CeedScalar h[3], const CeedScalar dw, + const CeedScalar eps, const CeedScalar lt, const CeedScalar nu, CeedScalar u[3], + const STGShur14Context stg_ctx) { + + //*INDENT-OFF* + const CeedInt nmodes = stg_ctx->nmodes; + const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa]; + const CeedScalar *phi = &stg_ctx->data[stg_ctx->offsets.phi]; + const CeedScalar *sigma = &stg_ctx->data[stg_ctx->offsets.sigma]; + const CeedScalar *d = &stg_ctx->data[stg_ctx->offsets.d]; + //*INDENT-ON* + CeedScalar hmax, ke, keta, kcut; + SpectrumConstants(dw, eps, lt, h, nu, &hmax, &ke, &keta, &kcut); + CeedScalar xdotd, vp[3] = {0.}; + CeedScalar xhat[] = {0., X[1], X[2]}; + + CeedPragmaSIMD + for(CeedInt n=0; nu0*t)*Max(2*kappa[0]/kappa[n], 0.1); + xdotd = 0.; + for(CeedInt i=0; i<3; i++) xdotd += d[i*nmodes+n]*xhat[i]; + const CeedScalar cos_kxdp = cos(kappa[n]*xdotd + phi[n]); + const CeedScalar dkappa = n==0 ? kappa[0] : kappa[n] - kappa[n-1]; + const CeedScalar qn = Calc_qn(kappa[n], dkappa, keta, kcut, ke, Ektot); + vp[0] += sqrt(qn)*sigma[0*nmodes+n] * cos_kxdp; + vp[1] += sqrt(qn)*sigma[1*nmodes+n] * cos_kxdp; + vp[2] += sqrt(qn)*sigma[2*nmodes+n] * cos_kxdp; + } + for(CeedInt i=0; i<3; i++) vp[i] *= 2*sqrt(1.5); + + u[0] = ubar[0] + cij[0]*vp[0]; + u[1] = ubar[1] + cij[3]*vp[0] + cij[1]*vp[1]; + u[2] = ubar[2] + cij[4]*vp[0] + cij[5]*vp[1] + cij[2]*vp[2]; +} + +CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + //*INDENT-OFF* + const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1]; + + CeedScalar (*stg_data) = (CeedScalar(*)) out[0]; + + //*INDENT-ON* + CeedScalar ubar[3], cij[6], eps, lt; + const STGShur14Context stg_ctx = (STGShur14Context) ctx; + const CeedScalar dx = stg_ctx->dx; + const CeedScalar mu = stg_ctx->newtonian_ctx.mu; + const CeedScalar theta0 = stg_ctx->theta0; + const CeedScalar P0 = stg_ctx->P0; + const CeedScalar cv = stg_ctx->newtonian_ctx.cv; + const CeedScalar cp = stg_ctx->newtonian_ctx.cp; + const CeedScalar Rd = cp - cv; + const CeedScalar rho = P0 / (Rd * theta0); + const CeedScalar nu = mu / rho; + + const CeedInt nmodes = stg_ctx->nmodes; + const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa]; + CeedScalar hmax, ke, keta, kcut, Ektot=0.0; + + CeedPragmaSIMD + for(CeedInt i=0; inewtonian_ctx.cv; const CeedScalar cp = stg_ctx->newtonian_ctx.cp; const CeedScalar Rd = cp - cv; - const CeedScalar rho = P0 / (Rd * theta0); + const CeedScalar rho = P0 / (Rd * theta0); CeedPragmaSIMD for(CeedInt i=0; i Date: Fri, 1 Jul 2022 09:20:29 -0600 Subject: [PATCH 127/172] fluids: Implement stgdata for ektot --- examples/fluids/problems/stg_shur14.c | 29 ++++++++++++++--- examples/fluids/problems/stg_shur14.h | 6 +++- examples/fluids/qfunctions/stg_shur14.h | 13 +++++--- examples/fluids/src/dirichlet.c | 41 ++++++++++++++++++++----- 4 files changed, 71 insertions(+), 18 deletions(-) diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index ce19df942a..e267d6ba79 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -490,8 +490,8 @@ PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem) { } PetscErrorCode SetupStrongSTG_QF(Ceed ceed, ProblemData *problem, - CeedInt num_comp_x, CeedInt num_comp_q, CeedInt q_data_size_sur, - CeedQFunction *pqf_strongbc) { + CeedInt num_comp_x, CeedInt num_comp_q, CeedInt stg_data_size, + CeedInt q_data_size_sur, CeedQFunction *pqf_strongbc) { CeedQFunction qf_strongbc; PetscFunctionBeginUser; @@ -499,9 +499,28 @@ PetscErrorCode SetupStrongSTG_QF(Ceed ceed, ProblemData *problem, STGShur14_Inflow_StrongQF_loc, &qf_strongbc); CeedQFunctionAddInput(qf_strongbc, "surface qdata", q_data_size_sur, CEED_EVAL_NONE); - CeedQFunctionAddInput(qf_strongbc, "x", num_comp_x, CEED_EVAL_NONE); - CeedQFunctionAddInput(qf_strongbc, "scale", 1, CEED_EVAL_NONE); - CeedQFunctionAddOutput(qf_strongbc, "q", num_comp_q, CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_strongbc, "x", num_comp_x, CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_strongbc, "scale", 1, CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_strongbc, "stg data", stg_data_size, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_strongbc, "q", num_comp_q, CEED_EVAL_NONE); + + CeedQFunctionSetContext(qf_strongbc, problem->ics.qfunction_context); + *pqf_strongbc = qf_strongbc; + PetscFunctionReturn(0); +} + +PetscErrorCode SetupStrongSTG_PreProcessing(Ceed ceed, ProblemData *problem, + CeedInt num_comp_x, CeedInt stg_data_size, CeedInt q_data_size_sur, + CeedQFunction *pqf_strongbc) { + + CeedQFunction qf_strongbc; + PetscFunctionBeginUser; + CeedQFunctionCreateInterior(ceed, 1, Preprocess_STGShur14, + Preprocess_STGShur14_loc, &qf_strongbc); + CeedQFunctionAddInput(qf_strongbc, "surface qdata", q_data_size_sur, + CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_strongbc, "x", num_comp_x, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_strongbc, "stg data", stg_data_size, CEED_EVAL_NONE); CeedQFunctionSetContext(qf_strongbc, problem->ics.qfunction_context); *pqf_strongbc = qf_strongbc; diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h index 03cc010743..af7fb58874 100644 --- a/examples/fluids/problems/stg_shur14.h +++ b/examples/fluids/problems/stg_shur14.h @@ -19,5 +19,9 @@ extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem); extern PetscErrorCode SetupStrongSTG_QF(Ceed ceed, ProblemData *problem, - CeedInt num_comp_x, CeedInt num_comp_q, + CeedInt num_comp_x, CeedInt num_comp_q, CeedInt stg_data_size, CeedInt q_data_size_sur, CeedQFunction *qf_strongbc); + +extern PetscErrorCode SetupStrongSTG_PreProcessing(Ceed ceed, + ProblemData *problem, CeedInt num_comp_x, CeedInt stg_data_size, + CeedInt q_data_size_sur, CeedQFunction *pqf_strongbc); diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index ce1b12aa6c..f5cc8a788f 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -244,7 +244,7 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc_PrecompEktot)(const CeedScalar X[3], } CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) { + const CeedScalar *const *in, CeedScalar *const *out) { //*INDENT-OFF* const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1]; @@ -266,7 +266,7 @@ CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, const CeedInt nmodes = stg_ctx->nmodes; const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa]; - CeedScalar hmax, ke, keta, kcut, Ektot=0.0; + CeedScalar hmax, ke, keta, kcut; CeedPragmaSIMD for(CeedInt i=0; idim, num_comp_q = 5, num_elem, - elem_size; - CeedVector multiplicity, x_stored, scale_stored, q_data_sur; + elem_size, stg_data_size=1; + CeedVector multiplicity, x_stored, scale_stored, q_data_sur, stg_data; CeedBasis basis_x_to_q_sur; CeedElemRestriction elem_restr_x_sur, elem_restr_q_sur, elem_restr_x_stored, - elem_restr_scale, elem_restr_qd_sur; - CeedQFunction qf_setup, qf_strongbc; - CeedOperator op_setup, op_dirichlet_sub, op_setup_sur; + elem_restr_scale, elem_restr_qd_sur, elem_restr_stgdata; + CeedQFunction qf_setup, qf_strongbc, qf_stgdata; + CeedOperator op_setup, op_dirichlet_sub, op_setup_sur, op_stgdata; PetscFunctionBeginUser; DMLabel domain_label; @@ -38,6 +38,10 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, CeedQFunctionAddOutput(qf_setup, "x stored", num_comp_x, CEED_EVAL_NONE); CeedQFunctionAddOutput(qf_setup, "scale", 1, CEED_EVAL_NONE); + // Setup STG Setup QFunction + PetscCall(SetupStrongSTG_PreProcessing(ceed, problem, num_comp_x, stg_data_size, + q_data_size_sur, &qf_stgdata)); + // Compute contribution on each boundary face for (CeedInt i=0; i < bc->num_inflow; i++) { // -- Restrictions @@ -59,6 +63,11 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, CEED_STRIDES_BACKEND, &elem_restr_scale); CeedElemRestrictionCreateVector(elem_restr_scale, &scale_stored, NULL); + CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, stg_data_size, + num_elem * elem_size, + CEED_STRIDES_BACKEND, &elem_restr_stgdata); + CeedElemRestrictionCreateVector(elem_restr_stgdata, &stg_data, NULL); + CeedVectorCreate(ceed, q_data_size_sur*num_elem*elem_size, &q_data_sur); // -- Setup Operator @@ -89,9 +98,21 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, CeedOperatorApply(op_setup_sur, ceed_data->x_coord, q_data_sur, CEED_REQUEST_IMMEDIATE); + // -- Compute STGData + CeedOperatorCreate(ceed, qf_stgdata, NULL, NULL, &op_stgdata); + CeedOperatorSetField(op_stgdata, "surface qdata", elem_restr_qd_sur, + CEED_BASIS_COLLOCATED, q_data_sur); + CeedOperatorSetField(op_stgdata, "x", elem_restr_x_stored, + CEED_BASIS_COLLOCATED, x_stored); + CeedOperatorSetField(op_stgdata, "stg data", elem_restr_stgdata, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetNumQuadraturePoints(op_stgdata, elem_size); + + CeedOperatorApply(op_stgdata, NULL, stg_data, CEED_REQUEST_IMMEDIATE); + // -- Setup BC QFunctions - SetupStrongSTG_QF(ceed, problem, num_comp_x, num_comp_q, q_data_size_sur, - &qf_strongbc); + SetupStrongSTG_QF(ceed, problem, num_comp_x, num_comp_q, stg_data_size, + q_data_size_sur, &qf_strongbc); CeedOperatorCreate(ceed, qf_strongbc, NULL, NULL, &op_dirichlet_sub); CeedOperatorSetName(op_dirichlet_sub, "Strong STG"); @@ -101,6 +122,8 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, CEED_BASIS_COLLOCATED, x_stored); CeedOperatorSetField(op_dirichlet_sub, "scale", elem_restr_scale, CEED_BASIS_COLLOCATED, scale_stored); + CeedOperatorSetField(op_dirichlet_sub, "stg data", elem_restr_stgdata, + CEED_BASIS_COLLOCATED, stg_data); CeedOperatorSetField(op_dirichlet_sub, "q", elem_restr_q_sur, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); CeedOperatorSetNumQuadraturePoints(op_dirichlet_sub, elem_size); @@ -112,15 +135,19 @@ PetscErrorCode SetupStrongSTG_Ceed(Ceed ceed, CeedData ceed_data, DM dm, CeedVectorDestroy(&multiplicity); CeedVectorDestroy(&x_stored); CeedVectorDestroy(&scale_stored); + CeedVectorDestroy(&stg_data); CeedElemRestrictionDestroy(&elem_restr_x_sur); CeedElemRestrictionDestroy(&elem_restr_q_sur); CeedElemRestrictionDestroy(&elem_restr_qd_sur); CeedElemRestrictionDestroy(&elem_restr_x_stored); CeedElemRestrictionDestroy(&elem_restr_scale); + CeedElemRestrictionDestroy(&elem_restr_stgdata); CeedQFunctionDestroy(&qf_strongbc); + CeedQFunctionDestroy(&qf_stgdata); CeedOperatorDestroy(&op_setup_sur); CeedOperatorDestroy(&op_dirichlet_sub); CeedOperatorDestroy(&op_setup); + CeedOperatorDestroy(&op_stgdata); } CeedBasisDestroy(&basis_x_to_q_sur); From 62e628f8051c3e9ced0bda56777e9b0e78a2a3fb Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 7 Jul 2022 08:09:59 -0600 Subject: [PATCH 128/172] fluids: Store 1/Ektot --- examples/fluids/qfunctions/stg_shur14.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index f5cc8a788f..c92710e2a4 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -102,11 +102,11 @@ CEED_QFUNCTION_HELPER void InterpolateProfile(const CeedScalar dw, */ CeedScalar CEED_QFUNCTION_HELPER(Calc_qn)(const CeedScalar kappa, const CeedScalar dkappa, const CeedScalar keta, const CeedScalar kcut, - const CeedScalar ke, const CeedScalar Ektot) { + const CeedScalar ke, const CeedScalar Ektot_inv) { const CeedScalar feta_x_fcut = exp(-Square(12*kappa/keta) -Cube(4*Max(kappa - 0.9*kcut, 0)/kcut) ); return pow(kappa/ke, 4.) * pow(1 + 2.4*Square(kappa/ke),-17./6) - *feta_x_fcut*dkappa/Ektot; + *feta_x_fcut*dkappa * Ektot_inv; } // Calculate hmax, ke, keta, and kcut @@ -243,6 +243,9 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc_PrecompEktot)(const CeedScalar X[3], u[2] = ubar[2] + cij[4]*vp[0] + cij[5]*vp[1] + cij[2]*vp[2]; } +// Create preprocessed input for the stg calculation +// +// stg_data[0] = 1 / Ektot (inverse of total spectrum energy) CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { //*INDENT-OFF* @@ -291,6 +294,7 @@ CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, const CeedScalar dkappa = n==0 ? kappa[0] : kappa[n] - kappa[n-1]; stg_data[i] += Calc_qn(kappa[n], dkappa, keta, kcut, ke, 1.0); } + stg_data[i] = 1/stg_data[i]; } return 0; } @@ -517,7 +521,7 @@ CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, //*INDENT-ON* const STGShur14Context stg_ctx = (STGShur14Context) ctx; - CeedScalar qn[STG_NMODES_MAX], u[3], ubar[3], cij[6], eps, lt; + CeedScalar u[3], ubar[3], cij[6], eps, lt; const bool mean_only = stg_ctx->mean_only; const CeedScalar dx = stg_ctx->dx; const CeedScalar mu = stg_ctx->newtonian_ctx.mu; @@ -544,10 +548,14 @@ CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, InterpolateProfile(coords[1][i], ubar, cij, &eps, <, stg_ctx); if (!mean_only) { - STGShur14_Calc_PrecompEktot(x, time, ubar, cij, stg_data[0][i], - h, x[1], eps, lt, mu/rho, u, stg_ctx); - // CalcSpectrum(coords[1][i], eps, lt, h, mu/rho, qn, stg_ctx); - // STGShur14_Calc(x, time, ubar, cij, qn, u, stg_ctx); + if (1) { + STGShur14_Calc_PrecompEktot(x, time, ubar, cij, stg_data[0][i], + h, x[1], eps, lt, mu/rho, u, stg_ctx); + } else { // Original way + CeedScalar qn[STG_NMODES_MAX]; + CalcSpectrum(coords[1][i], eps, lt, h, mu/rho, qn, stg_ctx); + STGShur14_Calc(x, time, ubar, cij, qn, u, stg_ctx); + } } else { for (CeedInt j=0; j<3; j++) u[j] = ubar[j]; } From 41bdf1c9f3b677ca0cd3704eec13319d41b7356c Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 7 Jul 2022 11:42:32 -0600 Subject: [PATCH 129/172] op - early check that CEED_BASIS_COLLOCATEDonly has CEED_EVAL_NONE --- interface/ceed-operator.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index 420e7d29cf..fb055dbd13 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -64,8 +64,8 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Field '%s' configured with CEED_EVAL_NONE must " "be used with CEED_BASIS_COLLOCATED", - // LCOV_EXCL_STOP qf_field->field_name); + // LCOV_EXCL_STOP ierr = CeedBasisGetDimension(b, &dim); CeedChk(ierr); ierr = CeedBasisGetNumComponents(b, &num_comp); CeedChk(ierr); if (r != CEED_ELEMRESTRICTION_NONE && restr_num_comp != num_comp) { @@ -78,6 +78,14 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, num_comp); // LCOV_EXCL_STOP } + } else if (eval_mode != CEED_EVAL_NONE) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, + "Field '%s' configured with %s cannot " + "be used with CEED_BASIS_COLLOCATED", + qf_field->field_name, CeedEvalModes[eval_mode]); + // LCOV_EXCL_STOP + } // Field size switch(eval_mode) { From 151157ab1cfc57bfbb9fa07ee95eb9b023c32737 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 7 Jul 2022 11:55:44 -0600 Subject: [PATCH 130/172] doc - fix CEED_BASIS_COLLOCATED documentation --- include/ceed/ceed.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index 59e67b457b..6a3b24926e 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -382,9 +382,7 @@ CEED_EXTERN CeedRequest *const CEED_REQUEST_ORDERED; CEED_EXTERN int CeedRequestWait(CeedRequest *req); /// Argument for CeedOperatorSetField that vector is collocated with -/// quadrature points, used with QFunction eval mode CEED_EVAL_NONE -/// or CEED_EVAL_INTERP only, not with CEED_EVAL_GRAD, CEED_EVAL_DIV, -/// or CEED_EVAL_CURL +/// quadrature points, only used with CeedEvalMode CEED_EVAL_NONE /// @ingroup CeedBasis CEED_EXTERN const CeedBasis CEED_BASIS_COLLOCATED; From 6d1815bbd7f0a859b76376071bc4958493de0d4d Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 7 Jul 2022 12:21:47 -0600 Subject: [PATCH 131/172] memcheck - add missing CeedChk --- backends/memcheck/ceed-memcheck-qfunctioncontext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/memcheck/ceed-memcheck-qfunctioncontext.c b/backends/memcheck/ceed-memcheck-qfunctioncontext.c index 51e0933032..54ac17cb4e 100644 --- a/backends/memcheck/ceed-memcheck-qfunctioncontext.c +++ b/backends/memcheck/ceed-memcheck-qfunctioncontext.c @@ -220,7 +220,7 @@ static int CeedQFunctionContextRestoreDataRead_Memcheck( "Context data changed while accessed in read-only mode"); // LCOV_EXCL_STOP - ierr = CeedFree(&impl->data_read_only_copy); + ierr = CeedFree(&impl->data_read_only_copy); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } From a0c16c2cb731ed3a5ef5d20de4d6a11a20c753d3 Mon Sep 17 00:00:00 2001 From: rezgarshakeri Date: Fri, 8 Jul 2022 09:39:38 -0600 Subject: [PATCH 132/172] ceed-operator.c: Update CeedOperatorCheckField function for H(div) --- interface/ceed-operator.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index fb055dbd13..3173d59b3d 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -88,6 +88,8 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, } // Field size + CeedInt Q_comp; + ierr = CeedBasisGetNumQuadratureComponents(b, &Q_comp); CeedChk(ierr); switch(eval_mode) { case CEED_EVAL_NONE: if (size != restr_num_comp) @@ -100,14 +102,14 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, // LCOV_EXCL_STOP break; case CEED_EVAL_INTERP: - if (size != num_comp) + if (size != num_comp*Q_comp) // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "Field '%s' of size %" CeedInt_FMT " and EvalMode %s: ElemRestriction/Basis has " CeedInt_FMT " components", qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], - num_comp); + num_comp*Q_comp); // LCOV_EXCL_STOP break; case CEED_EVAL_GRAD: @@ -125,7 +127,15 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, // No additional checks required break; case CEED_EVAL_DIV: - // Not implemented + if (size != num_comp) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, + "Field '%s' of size %" CeedInt_FMT + " and EvalMode %s: ElemRestriction/Basis has " + CeedInt_FMT " components", + qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], + num_comp); + // LCOV_EXCL_STOP break; case CEED_EVAL_CURL: // Not implemented From a76a04e77a9db9309c4310fb27f031a0ea1f3c06 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 7 Jul 2022 14:38:44 -0600 Subject: [PATCH 133/172] basis - make CreateProjectionMatrix internal fn --- include/ceed/ceed.h | 1 - interface/ceed-basis.c | 194 ++++++++++++++++++++--------------------- 2 files changed, 97 insertions(+), 98 deletions(-) diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index 6a3b24926e..322e68f879 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -552,7 +552,6 @@ CEED_EXTERN int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, const CeedScalar *q_ref, const CeedScalar *q_weights, CeedBasis *basis); CEED_EXTERN int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project); -CEED_EXTERN int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project); CEED_EXTERN int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy); CEED_EXTERN int CeedBasisView(CeedBasis basis, FILE *stream); CEED_EXTERN int CeedBasisApply(CeedBasis basis, CeedInt num_elem, diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index 7fa279d247..865b2e25a6 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -170,6 +170,103 @@ static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, return CEED_ERROR_SUCCESS; } +/** + @brief Create the interpolation matrix for projection from the nodes of + `basis_from` to the nodes of `basis_to`. This projection is + given by `interp_project = interp_to^+ * interp_from`, where + the pesudoinverse `interp_to^+` is given by QR factorization. + Note: `basis_from` and `basis_to` must have compatible quadrature + spaces. + + @param[in] basis_from CeedBasis to project from + @param[in] basis_to CeedBasis to project to + @param[out] interp_project Address of the variable where the newly created + projection matrix will be stored. + + @return An error code: 0 - success, otherwise - failure + + @ref Developer +**/ +static int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, + CeedBasis basis_to, + CeedScalar **interp_project) { + int ierr; + Ceed ceed; + ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); + + // Check for compatible quadrature spaces + CeedInt Q_to, Q_from; + ierr = CeedBasisGetNumQuadraturePoints(basis_to, &Q_to); CeedChk(ierr); + ierr = CeedBasisGetNumQuadraturePoints(basis_from, &Q_from); CeedChk(ierr); + if (Q_to != Q_from) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, + "Bases must have compatible quadrature spaces"); + // LCOV_EXCL_STOP + + // Coarse to fine basis + CeedInt P_to, P_from, Q = Q_to; + bool is_tensor_to, is_tensor_from; + ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr); + ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr); + CeedScalar *interp_to, *interp_from, *tau; + if (is_tensor_to && is_tensor_from) { + ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr); + ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr); + ierr = CeedBasisGetNumQuadraturePoints1D(basis_from, &Q); CeedChk(ierr); + } else if (!is_tensor_to && !is_tensor_from) { + ierr = CeedBasisGetNumNodes(basis_to, &P_to); CeedChk(ierr); + ierr = CeedBasisGetNumNodes(basis_from, &P_from); CeedChk(ierr); + } else { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_MINOR, + "Bases must both be tensor or non-tensor"); + // LCOV_EXCL_STOP + } + + ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr); + ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr); + ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr); + ierr = CeedMalloc(Q, &tau); CeedChk(ierr); + const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL; + if (is_tensor_to) { + ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr); + ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr); + } else { + ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr); + ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr); + } + memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); + memcpy(interp_from, interp_from_source, + Q * P_from * sizeof(interp_from_source[0])); + + // -- QR Factorization, interp_to = Q R + ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr); + + // -- Apply Qtranspose, interp_to = Qtranspose interp_from + ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, + Q, P_from, P_to, P_from, 1); CeedChk(ierr); + + // -- Apply Rinv, interp_project = Rinv interp_c + for (CeedInt j = 0; j < P_from; j++) { // Column j + (*interp_project)[j + P_from * (P_to - 1)] = interp_from[j + P_from * + (P_to - 1)] / interp_to[P_to * P_to - 1]; + for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i + (*interp_project)[j + P_from * i] = interp_from[j + P_from * i]; + for (CeedInt k = i+1; k < P_to; k++) { + (*interp_project)[j + P_from * i] -= interp_to[k + P_to * i]* + (*interp_project)[j + P_from * k]; + } + (*interp_project)[j + P_from * i] /= interp_to[i + P_to * i]; + } + } + ierr = CeedFree(&tau); CeedChk(ierr); + ierr = CeedFree(&interp_to); CeedChk(ierr); + ierr = CeedFree(&interp_from); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + /// @} /// ---------------------------------------------------------------------------- @@ -902,103 +999,6 @@ int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, return CEED_ERROR_SUCCESS; } -/** - @brief Create the interpolation matrix for projection from the nodes of - `basis_from` to the nodes of `basis_to`. This projection is - given by `interp_project = interp_to^+ * interp_from`, where - the pesudoinverse `interp_to^+` is given by QR factorization. - Note: `basis_from` and `basis_to` must have compatible quadrature - spaces. - - @param[in] basis_from CeedBasis to project from - @param[in] basis_to CeedBasis to project to - @param[out] interp_project Address of the variable where the newly created - projection matrix will be stored. - - @return An error code: 0 - success, otherwise - failure - - @ref User -**/ -int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, - CeedBasis basis_to, - CeedScalar **interp_project) { - int ierr; - Ceed ceed; - ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); - - // Check for compatible quadrature spaces - CeedInt Q_to, Q_from; - ierr = CeedBasisGetNumQuadraturePoints(basis_to, &Q_to); CeedChk(ierr); - ierr = CeedBasisGetNumQuadraturePoints(basis_from, &Q_from); CeedChk(ierr); - if (Q_to != Q_from) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_DIMENSION, - "Bases must have compatible quadrature spaces"); - // LCOV_EXCL_STOP - - // Coarse to fine basis - CeedInt P_to, P_from, Q = Q_to; - bool is_tensor_to, is_tensor_from; - ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr); - ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr); - CeedScalar *interp_to, *interp_from, *tau; - if (is_tensor_to && is_tensor_from) { - ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr); - ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr); - ierr = CeedBasisGetNumQuadraturePoints1D(basis_from, &Q); CeedChk(ierr); - } else if (!is_tensor_to && !is_tensor_from) { - ierr = CeedBasisGetNumNodes(basis_to, &P_to); CeedChk(ierr); - ierr = CeedBasisGetNumNodes(basis_from, &P_from); CeedChk(ierr); - } else { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_MINOR, - "Bases must both be tensor or non-tensor"); - // LCOV_EXCL_STOP - } - - ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr); - ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr); - ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr); - ierr = CeedMalloc(Q, &tau); CeedChk(ierr); - const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL; - if (is_tensor_to) { - ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr); - ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr); - } else { - ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr); - ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr); - } - memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); - memcpy(interp_from, interp_from_source, - Q * P_from * sizeof(interp_from_source[0])); - - // -- QR Factorization, interp_to = Q R - ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr); - - // -- Apply Qtranspose, interp_to = Qtranspose interp_from - ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, - Q, P_from, P_to, P_from, 1); CeedChk(ierr); - - // -- Apply Rinv, interp_project = Rinv interp_c - for (CeedInt j = 0; j < P_from; j++) { // Column j - (*interp_project)[j + P_from * (P_to - 1)] = interp_from[j + P_from * - (P_to - 1)] / interp_to[P_to * P_to - 1]; - for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i - (*interp_project)[j + P_from * i] = interp_from[j + P_from * i]; - for (CeedInt k = i+1; k < P_to; k++) { - (*interp_project)[j + P_from * i] -= interp_to[k + P_to * i]* - (*interp_project)[j + P_from * k]; - } - (*interp_project)[j + P_from * i] /= interp_to[i + P_to * i]; - } - } - ierr = CeedFree(&tau); CeedChk(ierr); - ierr = CeedFree(&interp_to); CeedChk(ierr); - ierr = CeedFree(&interp_from); CeedChk(ierr); - - return CEED_ERROR_SUCCESS; -} - /** @brief Copy the pointer to a CeedBasis. Both pointers should be destroyed with `CeedBasisDestroy()`; From 14556e6313a53166f141434c5f96b2c98b83e854 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 7 Jul 2022 17:05:15 -0600 Subject: [PATCH 134/172] basis - add gradient to projection bases --- interface/ceed-basis.c | 112 ++++++++++++++++++------------- tests/t319-basis.c | 145 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 45 deletions(-) create mode 100644 tests/t319-basis.c diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index 865b2e25a6..e47d9b7020 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -171,25 +171,27 @@ static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, } /** - @brief Create the interpolation matrix for projection from the nodes of - `basis_from` to the nodes of `basis_to`. This projection is - given by `interp_project = interp_to^+ * interp_from`, where - the pesudoinverse `interp_to^+` is given by QR factorization. + @brief Create the interpolation and gradient matrices for projection from + the nodes of `basis_from` to the nodes of `basis_to`. + The interpolation is given by `interp_project = interp_to^+ * interp_from`, + where the pesudoinverse `interp_to^+` is given by QR factorization. + The gradient is given by `grad_project = interp_to^+ * grad_from`. Note: `basis_from` and `basis_to` must have compatible quadrature spaces. @param[in] basis_from CeedBasis to project from @param[in] basis_to CeedBasis to project to @param[out] interp_project Address of the variable where the newly created - projection matrix will be stored. + interpolation matrix will be stored. + @param[out] grad_project Address of the variable where the newly created + gradient matrix will be stored. @return An error code: 0 - success, otherwise - failure @ref Developer **/ -static int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, - CeedBasis basis_to, - CeedScalar **interp_project) { +static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, + CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) { int ierr; Ceed ceed; ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); @@ -204,12 +206,11 @@ static int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, "Bases must have compatible quadrature spaces"); // LCOV_EXCL_STOP - // Coarse to fine basis + // Check for matching tensor or non-tensor CeedInt P_to, P_from, Q = Q_to; bool is_tensor_to, is_tensor_from; ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr); ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr); - CeedScalar *interp_to, *interp_from, *tau; if (is_tensor_to && is_tensor_from) { ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr); ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr); @@ -224,42 +225,63 @@ static int CeedBasisCreateProjectionMatrix(CeedBasis basis_from, // LCOV_EXCL_STOP } + // Get source matrices + CeedInt dim; + CeedScalar *interp_to, *interp_from, *tau; + ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr); ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr); ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr); ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr); + ierr = CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project); + CeedChk(ierr); ierr = CeedMalloc(Q, &tau); CeedChk(ierr); - const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL; + const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, + *grad_from_source; if (is_tensor_to) { ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr); ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr); + ierr = CeedBasisGetGrad1D(basis_from, &grad_from_source); CeedChk(ierr); } else { ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr); ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr); + ierr = CeedBasisGetGrad(basis_from, &grad_from_source); CeedChk(ierr); + } + + // Build matrices + CeedInt num_matrices = 1 + (is_tensor_to ? 1 : dim); + CeedScalar *input_from[num_matrices], *output_project[num_matrices]; + input_from[0] = (CeedScalar *)interp_from_source; + output_project[0] = *interp_project; + for (CeedInt m = 1; m < num_matrices; m++) { + input_from[m] = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from]; + output_project[m] = &(*grad_project[(m - 1) * P_to * P_from]); } - memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); - memcpy(interp_from, interp_from_source, - Q * P_from * sizeof(interp_from_source[0])); - - // -- QR Factorization, interp_to = Q R - ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr); - - // -- Apply Qtranspose, interp_to = Qtranspose interp_from - ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, - Q, P_from, P_to, P_from, 1); CeedChk(ierr); - - // -- Apply Rinv, interp_project = Rinv interp_c - for (CeedInt j = 0; j < P_from; j++) { // Column j - (*interp_project)[j + P_from * (P_to - 1)] = interp_from[j + P_from * - (P_to - 1)] / interp_to[P_to * P_to - 1]; - for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i - (*interp_project)[j + P_from * i] = interp_from[j + P_from * i]; - for (CeedInt k = i+1; k < P_to; k++) { - (*interp_project)[j + P_from * i] -= interp_to[k + P_to * i]* - (*interp_project)[j + P_from * k]; + for (CeedInt m = 0; m < num_matrices; m++) { + // -- QR Factorization, interp_to = Q R + memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); + ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr); + + // -- Apply Qtranspose, interp_to = Qtranspose interp_from + memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0])); + ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, + Q, P_from, P_to, P_from, 1); CeedChk(ierr); + + // -- Apply Rinv, interp_project = Rinv interp_c + for (CeedInt j = 0; j < P_from; j++) { // Column j + output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * + (P_to - 1)] / interp_to[P_to * P_to - 1]; + for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i + output_project[m][j + P_from * i] = interp_from[j + P_from * i]; + for (CeedInt k = i+1; k < P_to; k++) { + output_project[m][j + P_from * i] -= interp_to[k + P_to * i]* + output_project[m][j + P_from * k]; + } + output_project[m][j + P_from * i] /= interp_to[i + P_to * i]; } - (*interp_project)[j + P_from * i] /= interp_to[i + P_to * i]; } } + + // Cleanup ierr = CeedFree(&tau); CeedChk(ierr); ierr = CeedFree(&interp_to); CeedChk(ierr); ierr = CeedFree(&interp_from); CeedChk(ierr); @@ -928,10 +950,11 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, /** @brief Create a CeedBasis for projection from the nodes of `basis_from` - to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` will be - valid for the new basis, `basis_project`. This projection is - given by `interp_project = interp_to^+ * interp_from`, where - the pesudoinverse `interp_to^+` is given by QR factorization. + to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` and + `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`. + The interpolation is given by `interp_project = interp_to^+ * interp_from`, + where the pesudoinverse `interp_to^+` is given by QR factorization. + The gradient is given by `grad_project = interp_to^+ * grad_from`. Note: `basis_from` and `basis_to` must have compatible quadrature spaces. Note: `basis_project` will have the same number of components as @@ -955,14 +978,15 @@ int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); // Create projectior matrix - CeedScalar *interp_project; - ierr = CeedBasisCreateProjectionMatrix(basis_from, basis_to, - &interp_project); CeedChk(ierr); + CeedScalar *interp_project, *grad_project; + ierr = CeedBasisCreateProjectionMatrices(basis_from, basis_to, + &interp_project, &grad_project); + CeedChk(ierr); // Build basis bool is_tensor; CeedInt dim, num_comp; - CeedScalar *q_ref, *q_weight, *grad; + CeedScalar *q_ref, *q_weight; ierr = CeedBasisIsTensor(basis_to, &is_tensor); CeedChk(ierr); ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr); ierr = CeedBasisGetNumComponents(basis_from, &num_comp); CeedChk(ierr); @@ -972,9 +996,8 @@ int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, ierr = CeedBasisGetNumNodes1D(basis_to, &P_1d_to); CeedChk(ierr); ierr = CeedCalloc(P_1d_to, &q_ref); CeedChk(ierr); ierr = CeedCalloc(P_1d_to, &q_weight); CeedChk(ierr); - ierr = CeedCalloc(P_1d_to * P_1d_from * dim, &grad); CeedChk(ierr); ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, - interp_project, grad, q_ref, q_weight, basis_project); + interp_project, grad_project, q_ref, q_weight, basis_project); CeedChk(ierr); } else { CeedElemTopology topo; @@ -984,17 +1007,16 @@ int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, ierr = CeedBasisGetNumNodes(basis_to, &num_nodes_to); CeedChk(ierr); ierr = CeedCalloc(num_nodes_to * dim, &q_ref); CeedChk(ierr); ierr = CeedCalloc(num_nodes_to, &q_weight); CeedChk(ierr); - ierr = CeedCalloc(num_nodes_to * num_nodes_from * dim, &grad); CeedChk(ierr); ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, - interp_project, grad, q_ref, q_weight, basis_project); + interp_project, grad_project, q_ref, q_weight, basis_project); CeedChk(ierr); } // Cleanup ierr = CeedFree(&interp_project); CeedChk(ierr); + ierr = CeedFree(&grad_project); CeedChk(ierr); ierr = CeedFree(&q_ref); CeedChk(ierr); ierr = CeedFree(&q_weight); CeedChk(ierr); - ierr = CeedFree(&grad); CeedChk(ierr); return CEED_ERROR_SUCCESS; } diff --git a/tests/t319-basis.c b/tests/t319-basis.c new file mode 100644 index 0000000000..37c6f74718 --- /dev/null +++ b/tests/t319-basis.c @@ -0,0 +1,145 @@ +/// @file +/// Test projection interp and grad in multiple dimensions +/// \test Test projection interp and grad in multiple dimensions +#include +#include + +static CeedScalar Eval(CeedInt dim, const CeedScalar x[]) { + CeedScalar result = (x[0] + 0.1) * (x[0] + 0.1); + if (dim > 1) result += (x[1] + 0.2) * (x[1] + 0.2); + if (dim > 2) result += -(x[2] + 0.3) * (x[2] + 0.3); + return result; +} + +static CeedScalar EvalGrad(CeedInt dim, const CeedScalar x[]) { + switch (dim) { + case 0: return 2 * x[0] + 0.2; + case 1: return 2 * x[1] + 0.4; + default: return -2 * x[2] - 0.6; + } +} + +static CeedScalar GetTolerance(CeedScalarType scalar_type, int dim) { + CeedScalar tol; + if (scalar_type == CEED_SCALAR_FP32) { + if (dim == 3) + tol = 1.e-4; + else + tol = 1.e-5; + } else { + tol = 1.e-11; + } + return tol; +} + +int main(int argc, char **argv) { + Ceed ceed; + + CeedInit(argv[1], &ceed); + + for (CeedInt dim = 1; dim <= 3; dim++) { + CeedVector X_corners, X_from, X_to, U_from, U_to, dU_to; + CeedBasis basis_x, basis_from, basis_to, basis_project; + CeedInt P_from = 5, P_to = 6, Q = 7, X_dim = CeedIntPow(2, dim), + P_from_dim = CeedIntPow(P_from, dim), P_to_dim = CeedIntPow(P_to, dim); + CeedScalar x[X_dim * dim], u_from[P_from_dim]; + const CeedScalar *u_to, *du_to, *x_from, *x_to; + + for (CeedInt d=0; d tol) + // LCOV_EXCL_START + printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] %f != %f\n", dim, i, u_to[i], u); + // LCOV_EXCL_STOP + } + CeedVectorRestoreArrayRead(X_to, &x_to); + CeedVectorRestoreArrayRead(U_to, &u_to); + + // Project and take gradient + CeedBasisApply(basis_project, 1, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, U_from, + dU_to); + + // Check solution + CeedVectorGetArrayRead(dU_to, CEED_MEM_HOST, &du_to); + CeedVectorGetArrayRead(X_to, CEED_MEM_HOST, &x_to); + for (CeedInt i=0; i tol) + // LCOV_EXCL_START + printf("[%" CeedInt_FMT ", %" CeedInt_FMT ", %" CeedInt_FMT "] %f != %f\n", dim, + i, d, du_to[P_to_dim * (dim - 1 - d) + i], du); + // LCOV_EXCL_STOP + } + } + CeedVectorRestoreArrayRead(X_to, &x_to); + CeedVectorRestoreArrayRead(dU_to, &du_to); + + CeedVectorDestroy(&X_corners); + CeedVectorDestroy(&X_from); + CeedVectorDestroy(&X_to); + CeedVectorDestroy(&U_from); + CeedVectorDestroy(&U_to); + CeedVectorDestroy(&dU_to); + CeedBasisDestroy(&basis_from); + CeedBasisDestroy(&basis_to); + CeedBasisDestroy(&basis_project); + } + CeedDestroy(&ceed); + return 0; +} From 4d5194fa3c1fae197ed44574f4321969d70d5a61 Mon Sep 17 00:00:00 2001 From: rezgarshakeri Date: Fri, 8 Jul 2022 11:16:27 -0600 Subject: [PATCH 135/172] moved CeedBasisGetNumQuadratureComponents to line 72 --- interface/ceed-operator.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index 3173d59b3d..02b730fe92 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -37,7 +37,8 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, CeedElemRestriction r, CeedBasis b) { int ierr; CeedEvalMode eval_mode = qf_field->eval_mode; - CeedInt dim = 1, num_comp = 1, restr_num_comp = 1, size = qf_field->size; + CeedInt dim = 1, num_comp = 1, Q_comp = 1, restr_num_comp = 1, + size = qf_field->size; // Restriction if (r != CEED_ELEMRESTRICTION_NONE) { if (eval_mode == CEED_EVAL_WEIGHT) { @@ -68,14 +69,14 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, // LCOV_EXCL_STOP ierr = CeedBasisGetDimension(b, &dim); CeedChk(ierr); ierr = CeedBasisGetNumComponents(b, &num_comp); CeedChk(ierr); + ierr = CeedBasisGetNumQuadratureComponents(b, &Q_comp); CeedChk(ierr); if (r != CEED_ELEMRESTRICTION_NONE && restr_num_comp != num_comp) { // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "Field '%s' of size %" CeedInt_FMT " and EvalMode %s: ElemRestriction " "has %" CeedInt_FMT " components, but Basis has %" CeedInt_FMT " components", qf_field->field_name, qf_field->size, CeedEvalModes[qf_field->eval_mode], - restr_num_comp, - num_comp); + restr_num_comp, num_comp); // LCOV_EXCL_STOP } } else if (eval_mode != CEED_EVAL_NONE) { @@ -88,8 +89,6 @@ static int CeedOperatorCheckField(Ceed ceed, CeedQFunctionField qf_field, } // Field size - CeedInt Q_comp; - ierr = CeedBasisGetNumQuadratureComponents(b, &Q_comp); CeedChk(ierr); switch(eval_mode) { case CEED_EVAL_NONE: if (size != restr_num_comp) From 02af40365181b955bbc0cd1b2932ebe55916c777 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 8 Jul 2022 15:55:03 -0600 Subject: [PATCH 136/172] basis - fix pointer pointer mistake --- interface/ceed-basis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index e47d9b7020..1e4e179554 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -254,7 +254,7 @@ static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, output_project[0] = *interp_project; for (CeedInt m = 1; m < num_matrices; m++) { input_from[m] = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from]; - output_project[m] = &(*grad_project[(m - 1) * P_to * P_from]); + output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]); } for (CeedInt m = 0; m < num_matrices; m++) { // -- QR Factorization, interp_to = Q R From 16efe9613178a5046128a487694c679a19b22026 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 8 Jul 2022 16:01:24 -0600 Subject: [PATCH 137/172] pc - fix small leak --- interface/ceed-preconditioning.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 33143da699..0d0e5be8c3 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -1455,6 +1455,8 @@ int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) { ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr); ierr = CeedBasisDestroy(&(*data)->basis_in); CeedChk(ierr); ierr = CeedBasisDestroy(&(*data)->basis_out); CeedChk(ierr); + ierr = CeedFree(&(*data)->eval_mode_in); CeedChk(ierr); + ierr = CeedFree(&(*data)->eval_mode_out); CeedChk(ierr); ierr = CeedFree(&(*data)->B_in); CeedChk(ierr); ierr = CeedFree(&(*data)->B_out); CeedChk(ierr); From 996d9ab5c54332ca0e6e2ccc7c2249de90aa4aa0 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Fri, 17 Jun 2022 00:06:33 -0600 Subject: [PATCH 138/172] CeedCompositeOperatorCreate: fix initial reference count --- interface/ceed-operator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index 02b730fe92..e7cdf1352c 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -645,6 +645,7 @@ int CeedCompositeOperatorCreate(Ceed ceed, CeedOperator *op) { ierr = CeedCalloc(1, op); CeedChk(ierr); (*op)->ceed = ceed; ierr = CeedReference(ceed); CeedChk(ierr); + (*op)->ref_count = 1; (*op)->is_composite = true; ierr = CeedCalloc(CEED_COMPOSITE_MAX, &(*op)->sub_operators); CeedChk(ierr); (*op)->input_size = -1; From 544be873e4f477177e7f8c17bfd35ebcd452b114 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 10 Jul 2022 15:47:02 -0600 Subject: [PATCH 139/172] examples/fluids: use shell for Amat, block diagonal aij* for Pmat This adds two independent options for tuning solves. -amat_type shell: causes Amat and Pmat to be different matrices, with shell used for Amat. It's technically possible to use other matrix formats here. -pmat_pbdiagonal: causes Pmat to be assembled as point-block diagonal. Note that this usually only makes sense in addition to -amat_type shell because otherwise your KSP is only working with the point-block diagonal. --- examples/fluids/navierstokes.c | 5 +- examples/fluids/navierstokes.h | 6 ++- examples/fluids/src/cloptions.c | 13 +++++ examples/fluids/src/setupts.c | 91 ++++++++++++++++++++++++++------- 4 files changed, 94 insertions(+), 21 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index b00ea8e4ac..243f8c407e 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -307,7 +307,8 @@ int main(int argc, char **argv) { CeedVectorDestroy(&user->q_ceed); CeedVectorDestroy(&user->q_dot_ceed); CeedVectorDestroy(&user->g_ceed); - CeedVectorDestroy(&user->coo_values); + CeedVectorDestroy(&user->coo_values_amat); + CeedVectorDestroy(&user->coo_values_pmat); // -- QFunctions CeedQFunctionDestroy(&ceed_data->qf_setup_vol); @@ -366,6 +367,8 @@ int main(int argc, char **argv) { // -- Function list ierr = PetscFunctionListDestroy(&app_ctx->problems); CHKERRQ(ierr); + PetscCall(PetscFree(app_ctx->amat_type)); + // -- Structs ierr = PetscFree(units); CHKERRQ(ierr); ierr = PetscFree(user); CHKERRQ(ierr); diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index 8612a7d3f4..d90e59fb59 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -110,6 +110,9 @@ struct AppCtx_private { char ceed_resource[PETSC_MAX_PATH_LEN]; // libCEED backend PetscInt degree; PetscInt q_extra; + // Solver arguments + MatType amat_type; + PetscBool pmat_pbdiagonal; // Post-processing arguments PetscInt output_freq; PetscInt viz_refine; @@ -148,7 +151,8 @@ struct User_private { Vec M, Q_loc, Q_dot_loc; Physics phys; AppCtx app_ctx; - CeedVector q_ceed, q_dot_ceed, g_ceed, coo_values, x_ceed; + CeedVector q_ceed, q_dot_ceed, g_ceed, coo_values_amat, coo_values_pmat, + x_ceed; CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction, op_ijacobian, op_dirichlet; bool matrices_set_up; diff --git a/examples/fluids/src/cloptions.c b/examples/fluids/src/cloptions.c index fd1471912d..baea78d4d5 100644 --- a/examples/fluids/src/cloptions.c +++ b/examples/fluids/src/cloptions.c @@ -98,6 +98,19 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx, ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, app_ctx->q_extra, &app_ctx->q_extra, NULL); CHKERRQ(ierr); + { + PetscBool option_set; + char amat_type[256] = ""; + PetscCall(PetscOptionsFList("-amat_type", + "Set the type of Amat distinct from Pmat (-dm_mat_type)", + NULL, MatList, amat_type, amat_type, sizeof(amat_type), &option_set)); + if (option_set) PetscCall(PetscStrallocpy(amat_type, + (char **)&app_ctx->amat_type)); + } + PetscCall(PetscOptionsBool("-pmat_pbdiagonal", + "Assemble only point-block diagonal for Pmat", NULL, app_ctx->pmat_pbdiagonal, + &app_ctx->pmat_pbdiagonal, NULL)); + ierr = PetscStrncpy(app_ctx->output_dir, ".", 2); CHKERRQ(ierr); ierr = PetscOptionsString("-output_dir", "Output directory", NULL, app_ctx->output_dir, app_ctx->output_dir, diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index e3206cd188..194b57e880 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -281,6 +281,59 @@ PetscErrorCode MatGetDiagonal_NS_IJacobian(Mat A, Vec D) { PetscFunctionReturn(0); } +static PetscErrorCode FormPreallocation(User user, PetscBool pbdiagonal, Mat J, + CeedVector *coo_values) { + PetscCount ncoo; + PetscInt *rows, *cols; + + PetscFunctionBeginUser; + if (pbdiagonal) { + CeedSize l_size; + CeedOperatorGetActiveVectorLengths(user->op_ijacobian, &l_size, NULL); + ncoo = l_size * 5; + rows = malloc(ncoo*sizeof(rows[0])); + cols = malloc(ncoo*sizeof(cols[0])); + for (PetscCount n=0; nop_ijacobian, &ncoo, &rows, + &cols)); + } + PetscCall(MatSetPreallocationCOOLocal(J, ncoo, rows, cols)); + free(rows); + free(cols); + CeedVectorCreate(user->ceed, ncoo, coo_values); + PetscFunctionReturn(0); +} + +static PetscErrorCode FormSetValues(User user, PetscBool pbdiagonal, Mat J, + CeedVector coo_values) { + CeedMemType mem_type = CEED_MEM_HOST; + const PetscScalar *values; + MatType mat_type; + + PetscFunctionBeginUser; + PetscCall(MatGetType(J, &mat_type)); + if (strstr(mat_type, "kokkos") || strstr(mat_type, "cusparse")) + mem_type = CEED_MEM_DEVICE; + if (user->app_ctx->pmat_pbdiagonal) { + CeedOperatorLinearAssemblePointBlockDiagonal(user->op_ijacobian, + coo_values, CEED_REQUEST_IMMEDIATE); + } else { + CeedOperatorLinearAssemble(user->op_ijacobian, coo_values); + } + CeedVectorGetArrayRead(coo_values, mem_type, &values); + PetscCall(MatSetValuesCOO(J, values, INSERT_VALUES)); + CeedVectorRestoreArrayRead(coo_values, &values); + PetscFunctionReturn(0); +} + PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, PetscReal shift, Mat J, Mat J_pre, void *user_data) { @@ -305,29 +358,20 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, PetscCall(MatSetUp(J)); } if (!J_pre_is_shell) { - PetscCount ncoo; - PetscInt *rows, *cols; - PetscCall(CeedOperatorLinearAssembleSymbolic(user->op_ijacobian, &ncoo, &rows, - &cols)); - PetscCall(MatSetPreallocationCOOLocal(J_pre, ncoo, rows, cols)); - free(rows); - free(cols); - CeedVectorCreate(user->ceed, ncoo, &user->coo_values); - user->matrices_set_up = true; + PetscCall(FormPreallocation(user,user->app_ctx->pmat_pbdiagonal,J_pre, + &user->coo_values_pmat)); + } + if (J != J_pre && !J_is_shell) { + PetscCall(FormPreallocation(user,PETSC_FALSE,J, &user->coo_values_amat)); } + user->matrices_set_up = true; } if (!J_pre_is_shell) { - CeedMemType mem_type = CEED_MEM_HOST; - const PetscScalar *values; - MatType mat_type; - PetscCall(MatGetType(J_pre, &mat_type)); - if (strstr(mat_type, "kokkos") || strstr(mat_type, "cusparse")) - mem_type = CEED_MEM_DEVICE; - CeedOperatorLinearAssemble(user->op_ijacobian, user->coo_values); - CeedVectorGetArrayRead(user->coo_values, mem_type, &values); - PetscCall(MatSetValuesCOO(J_pre, values, INSERT_VALUES)); - CeedVectorRestoreArrayRead(user->coo_values, &values); + PetscCall(FormSetValues(user, user->app_ctx->pmat_pbdiagonal, J_pre, + user->coo_values_pmat)); } + if (user->coo_values_amat) PetscCall(FormSetValues(user, PETSC_FALSE, J, + user->coo_values_amat)); PetscFunctionReturn(0); } @@ -432,6 +476,15 @@ PetscErrorCode TSSolve_NS(DM dm, User user, AppCtx app_ctx, Physics phys, } if (user->op_ijacobian) { ierr = DMTSSetIJacobian(dm, FormIJacobian_NS, &user); CHKERRQ(ierr); + if (app_ctx->amat_type) { + Mat Pmat,Amat; + ierr = DMCreateMatrix(dm, &Pmat); CHKERRQ(ierr); + ierr = DMSetMatType(dm, app_ctx->amat_type); CHKERRQ(ierr); + ierr = DMCreateMatrix(dm, &Amat); CHKERRQ(ierr); + ierr = TSSetIJacobian(*ts, Amat, Pmat, NULL, NULL); CHKERRQ(ierr); + ierr = MatDestroy(&Amat); CHKERRQ(ierr); + ierr = MatDestroy(&Pmat); CHKERRQ(ierr); + } } } else { if (!user->op_rhs) SETERRQ(comm, PETSC_ERR_ARG_NULL, From d6bf345c94a7d0e998bf5c2e97981488148a2433 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 10 Jul 2022 16:21:06 -0600 Subject: [PATCH 140/172] examples/fluids: explicitly handle MFFD --- examples/fluids/src/setupts.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 194b57e880..5e446c64da 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -338,13 +338,12 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, PetscReal shift, Mat J, Mat J_pre, void *user_data) { User user = *(User *)user_data; - PetscBool J_is_shell, J_pre_is_shell; + PetscBool J_is_shell, J_is_mffd, J_pre_is_shell; PetscFunctionBeginUser; if (user->phys->ijacobian_time_shift_label) CeedOperatorContextSetDouble(user->op_ijacobian, user->phys->ijacobian_time_shift_label, &shift); - PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); - PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); + PetscCall(PetscObjectTypeCompare((PetscObject)J, MATMFFD, &J_is_mffd)); PetscCall(PetscObjectTypeCompare((PetscObject)J, MATSHELL, &J_is_shell)); PetscCall(PetscObjectTypeCompare((PetscObject)J_pre, MATSHELL, &J_pre_is_shell)); @@ -361,7 +360,7 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, PetscCall(FormPreallocation(user,user->app_ctx->pmat_pbdiagonal,J_pre, &user->coo_values_pmat)); } - if (J != J_pre && !J_is_shell) { + if (J != J_pre && !J_is_shell && !J_is_mffd) { PetscCall(FormPreallocation(user,PETSC_FALSE,J, &user->coo_values_amat)); } user->matrices_set_up = true; @@ -370,8 +369,12 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, PetscCall(FormSetValues(user, user->app_ctx->pmat_pbdiagonal, J_pre, user->coo_values_pmat)); } - if (user->coo_values_amat) PetscCall(FormSetValues(user, PETSC_FALSE, J, - user->coo_values_amat)); + if (user->coo_values_amat) { + PetscCall(FormSetValues(user, PETSC_FALSE, J, user->coo_values_amat)); + } else if (J_is_mffd) { + PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); + PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); + } PetscFunctionReturn(0); } From c32b0260484249f899a1ea0fdf06d545baadc9ad Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Fri, 17 Jun 2022 00:06:51 -0600 Subject: [PATCH 141/172] examples/fluids: skip legacy preallocation (we use COO) When using -snes_fd_color or running a model that doesn't provide a Jacobian (in which case coloring will be used automatically), we still need to preallocate. --- examples/fluids/navierstokes.c | 16 ++++++++-------- examples/fluids/src/setupdm.c | 6 ++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index 243f8c407e..f4c9da6143 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -25,17 +25,17 @@ //TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 2E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin //TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT //TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true -//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin -snes_fd_color +//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin -//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin -snes_fd_color +//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin -//TESTARGS(name="adv_rotation_implicit_sharp_cylinder") -ceed {ceed_resource} -test -problem advection -bubble_type cylinder -bubble_continuity back_sharp -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_Slip_z 1,2 -bc_wall 3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin -//TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin -//TESTARGS(name="adv_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -wind_type translation -wind_translation .53,-1.33,-2.65 -bc_inflow 1,2,3,4,5,6 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-translation-implicit-stab-su.bin +//TESTARGS(name="adv_rotation_implicit_sharp_cylinder") -ceed {ceed_resource} -test -problem advection -bubble_type cylinder -bubble_continuity back_sharp -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_Slip_z 1,2 -bc_wall 3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin +//TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin +//TESTARGS(name="adv_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -wind_type translation -wind_translation .53,-1.33,-2.65 -bc_inflow 1,2,3,4,5,6 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-translation-implicit-stab-su.bin //TESTARGS(name="adv2d_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection2d -strong_form 1 -degree 3 -dm_plex_box_faces 2,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -bc_wall 1,2,3,4 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-explicit-strong.bin -//TESTARGS(name="adv2d_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -bc_wall 1,2,3,4 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-implicit-stab-supg.bin -//TESTARGS(name="adv2d_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -wind_type translation -wind_translation .53,-1.33,0 -bc_inflow 1,2,3,4 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin -//TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin +//TESTARGS(name="adv2d_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -bc_wall 1,2,3,4 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-implicit-stab-supg.bin +//TESTARGS(name="adv2d_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -wind_type translation -wind_translation .53,-1.33,0 -bc_inflow 1,2,3,4 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin +//TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index e6aec20af3..d06270edab 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -20,6 +20,12 @@ PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, // Create DMPLEX ierr = DMCreate(comm, dm); CHKERRQ(ierr); ierr = DMSetType(*dm, DMPLEX); CHKERRQ(ierr); + { + PetscBool skip = PETSC_TRUE; + PetscCall(PetscOptionsGetBool(NULL, NULL, "-dm_mat_preallocate_skip", &skip, + NULL)); + PetscCall(DMSetMatrixPreallocateSkip(*dm, skip)); + } ierr = DMSetMatType(*dm, mat_type); CHKERRQ(ierr); ierr = DMSetVecType(*dm, vec_type); CHKERRQ(ierr); From 7b37518f4460a55fba94758f9e1a3003479d95d9 Mon Sep 17 00:00:00 2001 From: James Wright Date: Thu, 14 Jul 2022 11:04:09 -0600 Subject: [PATCH 142/172] fluids: Skip I/O if output_freq == 0 (#1027) --- examples/fluids/README.md | 2 +- examples/fluids/problems/blasius.c | 2 +- examples/fluids/src/setupts.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/fluids/README.md b/examples/fluids/README.md index 0fc784f7d9..25e0834d1f 100644 --- a/examples/fluids/README.md +++ b/examples/fluids/README.md @@ -69,7 +69,7 @@ The following options are common among all problem types: - `0` * - `-output_freq` - - Frequency of output, in number of steps + - Frequency of output, in number of steps. `0` results in no output - `10` * - `-continue` diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 56ce3085b6..4243d66b74 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -176,7 +176,7 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) { PetscReal mesh_top_angle = 5; // degrees char mesh_ynodes_path[PETSC_MAX_PATH_LEN] = ""; - PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL); + PetscOptionsBegin(comm, NULL, "Options for BLASIUS problem", NULL); ierr = PetscOptionsBool("-weakT", "Change from rho weak to T weak at inflow", NULL, weakT, &weakT, NULL); CHKERRQ(ierr); ierr = PetscOptionsScalar("-Uinf", "Velocity at boundary layer edge", diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 5e446c64da..60dd6cb3e0 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -389,7 +389,8 @@ PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time, PetscFunctionBeginUser; // Print every 'output_freq' steps - if (step_no % user->app_ctx->output_freq != 0) + if (user->app_ctx->output_freq == 0 + || step_no % user->app_ctx->output_freq != 0) PetscFunctionReturn(0); // Set up output From f14660a459b2af900ad20fa71d89acec187abe2a Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 15 Jul 2022 11:17:05 -0600 Subject: [PATCH 143/172] fluids: Force final output except if output_freq=0 --- examples/fluids/src/setupts.c | 127 +++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 55 deletions(-) diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c index 60dd6cb3e0..e66fd7f0fc 100644 --- a/examples/fluids/src/setupts.c +++ b/examples/fluids/src/setupts.c @@ -378,84 +378,94 @@ PetscErrorCode FormIJacobian_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, PetscFunctionReturn(0); } -// User provided TS Monitor -PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time, - Vec Q, void *ctx) { - User user = ctx; +PetscErrorCode WriteOutput(User user, Vec Q, PetscInt step_no, + PetscScalar time) { Vec Q_loc; char file_path[PETSC_MAX_PATH_LEN]; PetscViewer viewer; - PetscErrorCode ierr; PetscFunctionBeginUser; - // Print every 'output_freq' steps - if (user->app_ctx->output_freq == 0 - || step_no % user->app_ctx->output_freq != 0) - PetscFunctionReturn(0); - // Set up output - ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); - ierr = PetscObjectSetName((PetscObject)Q_loc, "StateVec"); CHKERRQ(ierr); - ierr = VecZeroEntries(Q_loc); CHKERRQ(ierr); - ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr); + PetscCall(DMGetLocalVector(user->dm, &Q_loc)); + PetscCall(PetscObjectSetName((PetscObject)Q_loc, "StateVec")); + PetscCall(VecZeroEntries(Q_loc)); + PetscCall(DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc)); // Output - ierr = PetscSNPrintf(file_path, sizeof file_path, - "%s/ns-%03" PetscInt_FMT ".vtu", - user->app_ctx->output_dir, step_no + user->app_ctx->cont_steps); - CHKERRQ(ierr); - ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q), file_path, - FILE_MODE_WRITE, &viewer); CHKERRQ(ierr); - ierr = VecView(Q_loc, viewer); CHKERRQ(ierr); - ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr); + PetscCall(PetscSNPrintf(file_path, sizeof file_path, + "%s/ns-%03" PetscInt_FMT ".vtu", + user->app_ctx->output_dir, step_no + user->app_ctx->cont_steps)); + + PetscCall(PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q), file_path, + FILE_MODE_WRITE, &viewer)); + PetscCall(VecView(Q_loc, viewer)); + PetscCall(PetscViewerDestroy(&viewer)); if (user->dm_viz) { Vec Q_refined, Q_refined_loc; char file_path_refined[PETSC_MAX_PATH_LEN]; PetscViewer viewer_refined; - ierr = DMGetGlobalVector(user->dm_viz, &Q_refined); CHKERRQ(ierr); - ierr = DMGetLocalVector(user->dm_viz, &Q_refined_loc); CHKERRQ(ierr); - ierr = PetscObjectSetName((PetscObject)Q_refined_loc, "Refined"); - CHKERRQ(ierr); - ierr = MatInterpolate(user->interp_viz, Q, Q_refined); CHKERRQ(ierr); - ierr = VecZeroEntries(Q_refined_loc); CHKERRQ(ierr); - ierr = DMGlobalToLocal(user->dm_viz, Q_refined, INSERT_VALUES, Q_refined_loc); - CHKERRQ(ierr); - ierr = PetscSNPrintf(file_path_refined, sizeof file_path_refined, - "%s/nsrefined-%03" PetscInt_FMT ".vtu", user->app_ctx->output_dir, - step_no + user->app_ctx->cont_steps); - CHKERRQ(ierr); - ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q_refined), - file_path_refined, FILE_MODE_WRITE, &viewer_refined); CHKERRQ(ierr); - ierr = VecView(Q_refined_loc, viewer_refined); CHKERRQ(ierr); - ierr = DMRestoreLocalVector(user->dm_viz, &Q_refined_loc); CHKERRQ(ierr); - ierr = DMRestoreGlobalVector(user->dm_viz, &Q_refined); CHKERRQ(ierr); - ierr = PetscViewerDestroy(&viewer_refined); CHKERRQ(ierr); + PetscCall(DMGetGlobalVector(user->dm_viz, &Q_refined)); + PetscCall(DMGetLocalVector(user->dm_viz, &Q_refined_loc)); + PetscCall(PetscObjectSetName((PetscObject)Q_refined_loc, "Refined")); + + PetscCall(MatInterpolate(user->interp_viz, Q, Q_refined)); + PetscCall(VecZeroEntries(Q_refined_loc)); + PetscCall(DMGlobalToLocal(user->dm_viz, Q_refined, INSERT_VALUES, + Q_refined_loc)); + + PetscCall(PetscSNPrintf(file_path_refined, sizeof file_path_refined, + "%s/nsrefined-%03" PetscInt_FMT ".vtu", user->app_ctx->output_dir, + step_no + user->app_ctx->cont_steps)); + + PetscCall(PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q_refined), + file_path_refined, FILE_MODE_WRITE, &viewer_refined)); + PetscCall(VecView(Q_refined_loc, viewer_refined)); + PetscCall(DMRestoreLocalVector(user->dm_viz, &Q_refined_loc)); + PetscCall(DMRestoreGlobalVector(user->dm_viz, &Q_refined)); + PetscCall(PetscViewerDestroy(&viewer_refined)); } - ierr = DMRestoreLocalVector(user->dm, &Q_loc); CHKERRQ(ierr); + PetscCall(DMRestoreLocalVector(user->dm, &Q_loc)); // Save data in a binary file for continuation of simulations - ierr = PetscSNPrintf(file_path, sizeof file_path, "%s/ns-solution.bin", - user->app_ctx->output_dir); CHKERRQ(ierr); - ierr = PetscViewerBinaryOpen(user->comm, file_path, FILE_MODE_WRITE, &viewer); - CHKERRQ(ierr); - ierr = VecView(Q, viewer); CHKERRQ(ierr); - ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr); + PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/ns-solution.bin", + user->app_ctx->output_dir)); + PetscCall(PetscViewerBinaryOpen(user->comm, file_path, FILE_MODE_WRITE, + &viewer)); + + PetscCall(VecView(Q, viewer)); + PetscCall(PetscViewerDestroy(&viewer)); // Save time stamp // Dimensionalize time back time /= user->units->second; - ierr = PetscSNPrintf(file_path, sizeof file_path, "%s/ns-time.bin", - user->app_ctx->output_dir); CHKERRQ(ierr); - ierr = PetscViewerBinaryOpen(user->comm, file_path, FILE_MODE_WRITE, &viewer); - CHKERRQ(ierr); + PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/ns-time.bin", + user->app_ctx->output_dir)); + PetscCall(PetscViewerBinaryOpen(user->comm, file_path, FILE_MODE_WRITE, + &viewer)); + #if PETSC_VERSION_GE(3,13,0) - ierr = PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL); + PetscCall(PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL)); #else - ierr = PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL, true); + PetscCall(PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL, true)); #endif - CHKERRQ(ierr); - ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr); + PetscCall(PetscViewerDestroy(&viewer)); + + PetscFunctionReturn(0); +} + +// User provided TS Monitor +PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time, + Vec Q, void *ctx) { + User user = ctx; + PetscFunctionBeginUser; + + // Print every 'output_freq' steps + if (user->app_ctx->output_freq <= 0 + || step_no % user->app_ctx->output_freq != 0) + PetscFunctionReturn(0); + + PetscCall(WriteOutput(user, Q, step_no, time)); PetscFunctionReturn(0); } @@ -563,7 +573,14 @@ PetscErrorCode TSSolve_NS(DM dm, User user, AppCtx app_ctx, Physics phys, PetscCall(TSGetSolveTime(*ts, &final_time)); *f_time = final_time; + if (!app_ctx->test_mode) { + if (user->app_ctx->output_freq > 0 || user->app_ctx->output_freq == -1) { + PetscInt step_no; + PetscCall(TSGetStepNumber(*ts, &step_no)); + PetscCall(WriteOutput(user, *Q, step_no, final_time)); + } + PetscLogEvent stage_id; PetscStageLog stage_log; From 0b22eb44be7c6b7434e6bc4a1e1128f52e461d62 Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 15 Jul 2022 11:19:16 -0600 Subject: [PATCH 144/172] doc: State output_freq=-1 -> output final state only --- examples/fluids/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fluids/README.md b/examples/fluids/README.md index 25e0834d1f..16df8a11e2 100644 --- a/examples/fluids/README.md +++ b/examples/fluids/README.md @@ -69,7 +69,7 @@ The following options are common among all problem types: - `0` * - `-output_freq` - - Frequency of output, in number of steps. `0` results in no output + - Frequency of output, in number of steps. `0` has no output, `-1` outputs final state only - `10` * - `-continue` From a6ea5fbef97e8d672d22d8f160c9adfbdff3664d Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 15 Jul 2022 11:19:47 -0600 Subject: [PATCH 145/172] tests: Shrink fluids test run time --- examples/fluids/navierstokes.c | 5 +---- .../fluids/tests-output/blasius_stgtest.yaml | 9 ++++----- ...ierstokes-adv-rotation-explicit-strong.bin | Bin 11984 -> 0 bytes ...s-adv-rotation-implicit-sharp-cylinder.bin | Bin 3560 -> 0 bytes ...kes-adv2d-translation-implicit-stab-su.bin | Bin 648 -> 0 bytes .../fluids-navierstokes-blasius_STG.bin | Bin 7816 -> 5256 bytes ...uids-navierstokes-blasius_STG_strongBC.bin | Bin 6360 -> 4280 bytes .../fluids-navierstokes-blasius_STG_weakT.bin | Bin 7816 -> 5256 bytes .../fluids-navierstokes-channel.bin | Bin 6408 -> 1608 bytes 9 files changed, 5 insertions(+), 9 deletions(-) delete mode 100644 examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin delete mode 100644 examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin delete mode 100644 examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index f4c9da6143..f42703a1f6 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -25,16 +25,13 @@ //TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 2E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin //TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT //TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true -//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin +//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin -dm_plex_box_faces 5,5,1 -ts_max_steps 5 //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin //TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin -//TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin -//TESTARGS(name="adv_rotation_implicit_sharp_cylinder") -ceed {ceed_resource} -test -problem advection -bubble_type cylinder -bubble_continuity back_sharp -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_Slip_z 1,2 -bc_wall 3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin //TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin //TESTARGS(name="adv_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -wind_type translation -wind_translation .53,-1.33,-2.65 -bc_inflow 1,2,3,4,5,6 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-translation-implicit-stab-su.bin //TESTARGS(name="adv2d_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection2d -strong_form 1 -degree 3 -dm_plex_box_faces 2,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -bc_wall 1,2,3,4 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-explicit-strong.bin //TESTARGS(name="adv2d_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -bc_wall 1,2,3,4 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-implicit-stab-supg.bin -//TESTARGS(name="adv2d_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -wind_type translation -wind_translation .53,-1.33,0 -bc_inflow 1,2,3,4 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin diff --git a/examples/fluids/tests-output/blasius_stgtest.yaml b/examples/fluids/tests-output/blasius_stgtest.yaml index 7da68de046..a50a5d6072 100644 --- a/examples/fluids/tests-output/blasius_stgtest.yaml +++ b/examples/fluids/tests-output/blasius_stgtest.yaml @@ -5,13 +5,12 @@ ts: adapt_type: 'none' type: 'beuler' dt: 2e-6 - max_steps: 10 -output_freq: 10 + max_steps: 5 -dm_plex_box_faces: 3,30,1 +dm_plex_box_faces: 3,20,1 platemesh: - Ndelta: 22 - growth: 1.1664 # 1.08^2 + Ndelta: 15 + growth: 1.3 stab: 'supg' Ctau_t: 1 diff --git a/examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin b/examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin deleted file mode 100644 index 1cc3ce90e76bbb6bb80d64398406e93c5e6d049e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11984 zcmeHNeN5I>6n_06UG}Ah)=K7*O`BlO$|RZBX63?#(^5&(NnyE4QxcmWpi@&ZomNQ5 zKYFqiw9lz%}&pG!w_nzO& zi=K8f!aW|pVs*je`=^dw+}89`^`0zE?b0)BEb)Z;)WMbCt(kZ=!QwGJm0#UEGi=8w;tBPs ztdPL)A>;a4Jg#?W_w+j#6bvJtsZTMUlQ^sYywb=pi^qLVers*rh^wuLXLPupnie#3 zXx^}Li^ud-Wc7vaVcuoL6Y5ikgR6Fwo%`M5alP!5UGq=%P9~mIUvd13+_bBu7LV(d zBy9SA$+FqtIYy747qvL%azR_w(&90nRm##=Qb%-HOFWa$(pz1(aAi@k$Kr9ltS1IH z+vC5Jcv5}4{TKE5p%eK+``~&d3qrHbubNDHQhmiy5xwpS9BuKq-qDwqu6|=oA@K}u zotG_%13FesRu+%<KlY8eX68$Z#p8MP ze2F8T!7V*KU!p7?*VFSQl6aw~; zMbDRp_X7IH`=RGc4$o`p>G^WT;&DAaU+BI`_38OMHl6f@`W)YU{xg+$#t*AL-+Ues zYVmk|zWMz5XW%(PKa`#?85WQALzVdE%j?85`S^KJdcM%P0*?H2?)7}3egFrbc)t1E zuZ;3yAG|)_eD?p5cv5}7`F!M}#p8N@g zdV0Q;5KpSlH=ifb9D>g{ujI3y&oqb72dh5ceE#(-%8PyQ`t*Fh4?I<_GdX6Zy!d>` zo=Z4%IF6qsec*gWeRwYELl-OYP5u12giHE(&UH{9aOh&?IOhQeU(yH8S5?_Oq}J>A z0oRf8Kp!jg5YL}WxTKHgTnGBVp^KH{oCh3yNgp_0dAy_#U98YSJby0Xl0NP=*MUCn zvxu8}EaGO)h&Y=AsNY$$>YFhO$2GhM*&HR_r12haR^c0&ngkz3M_zlKm{!0E~?uxjJ|Nk@|eq&V|*0*?VeoyPY101W$UKSA$ zy*C1WVL!mJs>;r79v!%CKk=kKmi3!oxaUAG;<@;b{Ym{pJtFSbKZ8sD->&;(&U?f6 z5PyH(m%~rY1M~%}zK>DX@A<&7I==guHQ9tTxY&`PucxU+? zQz}a82O$sfSU**E(#-Y)*3PkbX5WZYSruz?I({e{8XFHkt$2OjzlBf4<8`p} zPCVw9zK?bEIgfZ;Pv6Js$P53i`k4RHe7Vth_%HRBaJFC6PdMKv>z@+h;ggkLpZD>| zLp-jd&wHlDWA*6!SP*XUxSqa`8^MFWR(-5L(*ABV9=g)`Ae`+N^`mYv-o!alyz$41 zFYuePC}c2w&nDK9+iiAvY~0hX^{l$Kab5iKhVMtEx{l1vkKcUs3h7Dp;rs!Y_J=(D z93URJ6mR0-Go4e!19#CE<4s=pE9&$53h!qp!6W>qEBD3@n;Z?F z5Ra936%Qt5ZypmsJhKl=@5H*hE+#)e-{SFoRm_OF@Jw(L@ud1pe^3urstdSO565{O zz|C{zhD-f3&&7D)QoNM^d}z=)|K8iUPm1eQWq3zrSBDS}`S^ZLUz!#l+PBu?alJ$F z8QX_fG<4oRI4;EF{cfE%6HhqbFTD1DEm>F6I8l=4IWy8X}T`*xqvQ`g}R(0si1;t1s++S^eNR?}Nc_yZ@#x)PenC|Jav= z!xs^E(`UX&`SBbp>O+1!$4a=HzQL9B{l2-65{~>L?xt^W$-mp?|E|z8tq*sufsd#I z{$f={A5A;Hb7#Zff7$#Z-q}`DA8I>`KG*VjL%h@V_dilI;W^6d=HGv{KW2W2eZhQ@ z`T^Yd3;(hHYy8LiTe_dO2!}6N$sbnN&BCSmg6CMJ{Efw-=dgWZ|F;T99;_<8E9JxecDWyj!`j$*#9@U$T=!<- LO7Iu?n;QQMXX=st diff --git a/examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin b/examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin deleted file mode 100644 index 248d10e5ace9e21f9d57fbaf53877c94606cc91f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3560 zcmcJRJxE+Z6op^Va&3h48v;?+&PGV{x-HWL3&9Ax5bGq3MJllkgpfc8DMAFXq zueGbZ%rE4d`3TQ){XgIQ<9Eu-{6fA*JMjAFYi}qo^NaiU310tvi*J;d`Q7IGzSoA= zKVNG~d6{3xH#-ThZ@$K_56a8@!v56`;Fag`Bvn?m4^AH*Rr|{4GyHUJIseSHsyI5T zhcicU`sm3^KUK>Ej|xA1CNKH<|Ml`myo)}|HzIv`$9xaxe0n(N&BM`Gcg*+jQR%&W z_su`B-qvk%I@$MYxUI8yZ*zWz)U&VNd~k0*_@H|B&C`Q>^T7wz51W5jy{#*~kM?di zxHrIAe;MxcxtHL!?(`mpcY40LuXxgj_vQT>IR56-JAdK#L+ea=_@r|h-HTl_GJMX7 z^3|XHLI;jcaeOPA=i#0nfArp5`Z`bh%%N|eJLu@!y!5Wohfgm0@U;&g|7`vDd_VE^ w^d7GBH7~j7!^e-lyFTuBtC|lV=8Owxj^{rvoOMxkK7H!A@Y30T=8hV_0s49LtpET3 diff --git a/examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin b/examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin deleted file mode 100644 index 9a5d4bb9d6c8dcdf63f2011817fb0c317bb9fab1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 648 zcmZP&s`g`GUCsOs$10o6(KlOW#&WDI61l8RNd)o8S zbpkp0!_9Pf(px nR_w{mt>y&P?X-2^R(f)Rpt{pqA2!{4=1x%EBl#74QHdS^!tv<~ diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin index dbdc3bfe0e5535846670a0c2e2c5c2d565e206d6..c9489ab30cc0d7f63ad4aa25503906e3e1ee39a8 100644 GIT binary patch literal 5256 zcmWlccRbc#6vqjfJw#E;3K3;wWTbnai03C6CGw*bLPSEg6q5CbqL7g?BFV}szSd)B z^~;t~L`0>m(C_&B^E&5!$GPWxE*-b8J{{ebXtWTPtH!5xj7*}r4|(Y6poMU&Ll!;D zWKv_$$=wdu(LzM2RpTpOGO6Xvjq#xbv_NI!zI(cmOnP@>a6yO{;`78?s(H|-tw(f! z(~O76@Gu$RDhw5_)$t{|!$a z7BrqBYo~nuHX&1#rY+l73_N2hI%Nwy*6GTD95k9)#KE?fiv*9*l}kxefv=srTo(xZ zV&c#EUbLuTw#xP+oNz4Yk@gg`2EL#8sNewbErV_hb)m&01zYVr`H&>X?l+AE<-ia9 zsp>%4hYJGd~-F!`)DC_b6KQNKEzp$- z2fnW7uE-AH*T&jU?8Xeavh;Fnn|`HNr%t+uE^dMiQ=P5zZ*@&LXq=d~?ez-KYgj}XW77T2v7 zE>EG;vaQmq)>nXUoTqM`2>gLP^Z&YIPRZNF`qGTVwWm`h!HS;1fBwTQbR77`9ZOGx zFzHKjpnpm~VKHLm?IJ1y{DNVBg-+m?Q{O*x!X)$fy=xxLgzEYfThRiXZ_Djm!rcM* zThk+5A7S>4m*`wvBeHU1MMkG~0pGFQ`|T3&*Rx-D@5X#bbX)ASJBUkdT9RWa6~Lc- zx}TdK_$Pzt-+Ex)DSxhSe`<-VsXzSsf|`IoBhtd|4*WO1zjSIa?+vZ6|Ll9v9=`zZ zf!6Q9FW>&rc{lJGy;C@z;T;u1F?&Vikjpo_g%nR);6J`=(fJqh%W#cnd1B#IcCI{G z7PMcOc{H){H<{F69xABo3p(0IUR}n*XX-u+Disp9)R!!Gmjb`Z^e{2-3HX&Y)~D>T z(EK~P49|wDF$d12oM{F!sa1sAx%V3I>BmD?hq1swzp-=oZ{+i0KDkV21^COF`l?SL zzmtFsV>Ol#IhSkUw1M0r#`FqeEnt6gzq2prgD&?ytMCinpRu~|Pcq8CHlLmgxObovhmZ5)OH5hsb$@xxN^_!fA{Ye`gqrVFL zwVM|XY{ds7$9T1$M4-rB(_hnShRLLk53>TCToBJg-WC#rl{nZQ95AUwv0W8XWIsPx zzm!g4U?uRmV?`1Qv10h|LzGz`;^DacE!h~jpSG^nxWjW0U&~%ttA!PrDvxLBbs?8y zGggt=P+uLk^elEhz+b%>_e~co9N2T9{^)fSTfiewFmRVl>Y6<46QTw2PC1zgs#rZ_ znBPD(1f^X)_UOFxBZyZ{vg?QdKJ%~nf3{$?p+7gqH5G|;jV%iiy18UhXTlo2{2;_v z*+&n4#A?cNRKd`#$nK|pkP(Ld@P6x0DZ$#!{pXmM)MM2nbipd6KIrk<%NKEfNMzCn zz0k_(tq}iVxO@8;CcEr%NSxh*3RY_LZ-1!*T|!e`T>$u;MpgX|n9Ls5B>k(1NdF@K z@U9NjPmeN%rBe#xD{i`Q^s)diWj;NDTH<1k z=uB#WzD8SBfcp!V-%-jci*-cy`G3kNA&TW~wJa3Rl1byOWzNl~f#3R0v*HQXUS7-T z)eS(8>xX9AcV8rvCe9jEP09hEBY%$Og|*Y~YU-6qqC)ATZH8Sh$fPgM>Gt(3z`y&5 z^kx)O6huvi=7dnk_gpn`@x5fy=YzIIH1*B>M+!w>!4wG%y8THOMD@AosT#gK$d8&o zu&x2WO}j!$2~)(RZzfT8qA*sAc*PklGHGi6@z?pVJ}M7aONT9{(AA75JKjdcOd6Ga zb7^GK*TUXNk_hl)7Skci{CN$MX28#A4r%@geC~y8{bb1h z!(ez_8kHN(&>GLZC6i{%nsqH3fPcM;W_1@+D2b7ET$SkE`Er2==l=kIJ)xU41bk_4 zk-_7ba)#b!*!CCE5LG>Yj&cO}Zx0MF^Z>tsopE6|rkEZh<=XB+w^STDTs;Wj$3Kh> zmjFJgd-3-;rs#c1os$^wIL z`7`wHiDU!cVNJ_)3{%XC=9e_miMmna-qYs!WYVOvY-kefFXDYlBg+t`{QFlXWbh_R z=~SF6GYJNMwVs7P^bacQtK?JzSpTgl#&{!C+wN@sHzO4Ib|tO$dm+9lJgha857waUOhOkQHDYWWKMEML783Z~fb%^tM=K{V^0`C!Ru0D8Ge*xCCKKia9E zq>d?;EU)7u5sLP0aQ!d={by?O%L_X-;M4sWtUd|ra}qzJ-H+;D?zZ)2?gzeop|ukD zp9t3yUH*NT61k&Gh+7SHKJyJ*wR{5cZnwu={6SBh(i~LA6c4ek|6E=YPjlkBz1M|6 zZ=3dY2!``B(>6~y zg!TM^?=BmlJKXq;Nj)pZ|RiCx;D2YEIm!u+9bNj`a&9aEfJ znGSe=LiGpVG2TjA0zD<`)pn?lu(I%{iE%LR@3%ivxee#-+3frX-dAJco~*(U*ngRS za@`J?Hzrb7^*A6ONA1nv1(+` z06bHztx8s#yoAt9IyCxp%hOUC-l$&S|rFAL`8#)00<$|6GJ~k^uhoJ+p`JV1_4YxlBSENXh&k=C5_&zZ>gw$G$>; zi73Cj`g{U27*H#Ga^4ZgALcbAHuM1BzTeOh>Wj*@8hxG))0^3SlS7X{kP@q z)+IIr;4^dO#HQjMrE|JIOIgSjC#Ja6h5$cJBR8M|@~iJ2aW%uj2^`_uStJPmKU;#0 z-vt2QtD;_XKj_A6|0TO%;j^fwI7N)`JIoQSQw{yODMVC7bo2fRnc(z?SZHZmVE!dz z;#hEFLBJ5ySF4!VE5AD6Gg`Y2v|)k1dyUy$?a1%RdsZpa`@mnHKN6q{`F)Rk^eM&? zdyTYwZOsU`*^>NU$KZS#^$#vH!}||Eq0y_e4e!so^CjxWSt4-Mk%w!>8T_MO?^%P* z{!;VXS~(Bz7ov|7sy~LbblqFj;?{vbmU&$e{5_T4XriDMi;bLLs9C}24zc*>wuuG! z$LxofPF#ZgH1GB&;#k)A4Qd#EOSnGfDv*AZ1pA{U7tX@@hAUp{Eb_rJO{SAbI+A$c zoja?X1oOKkJ7F?Y4ES|jHq883hDlpM`mr!^d|6q(h2#eNld0TU(Y)Ee%0#aG#0Mj4 z*}BSV5OubjQ`{B&TgRZs-mDmi7p;`3mB31DUun*6^XRJGB~v>|_&>Gk(Ds^`0AJi> z=~g~gj95IY5g#3qEp3_WP{wPDM*Y{ww8Y zo`S!RT=ThPdK4>2M!T=F$fCG`FcnAjRPc{8$&&DdNv?E~soy}V2>znb_IXsFHSWSib@{~s+ zIwx9N)4Tc{{3AQPzbw>8E*aH60sC|zj{2wAFI}0z8 z!9ViyZdqPK8E;O_4Bsb%uI)-F1^{0qX0h=dCUcyYYpa(bp1tYB?D=cpA8nlyF8+e} zx9JWQ_TV3Xrt4mQjP7o1*tX9xfq(SVcT;JH`)7C^RaON4F{aweiiR?tCZ1fDf&ce_ zzrv^6)v!MK=KgXs@Q;@bzxYL=*H6mw$URX1Lsk!hj%@Zn`RvEc=fFSqea>1uOBALC zm_;Q)e;QU`xwgX<_-|I~-h-~4OUqt!M4vd@g*|2AQ6(^Rtp zd=hm{;XL@qYfq{5&Zw|kA)~R&75t-XfsxvG;8V4~+rI<gp(=bU+51J?Gi%5i{KwSYITOjQSeTN z)1t<8Fy9aUA$7|De@D!ji>lxsD^kL4$f06ljv+0A1oORwyD1pvVPyQg)wv1qZ|kIF zn;6s<`tQ-nB)Ff?{T?4gxPh<1m6aX?{_34XyMYu@Mdvs2SCt$1lV)a`vB2+kF7heE z6chVr+ArkLV~5iZFRMWPPQ4r$H;)27&rtDqZt#zDEmvO)q6(&$b8b)OVSbyJ994(@ z80oxPS?dJ(M+QawU!n$Cr{DwEtRTOxhLe{i@KM;j`6uwFF}7FDE)h)^WCxku;r&lc z*U;FjfZuGSI?f0FF;H){T@-okl1OG0LX= z^#%SVjrUTez*lm$Xy*n06=H2}!GjtajXK-6HGn@8m=qg_`Uwwmc0KqCQ!YqwA1b{- z)Q@ym3z7RkcVQM0fclK+v;5N%h$-emeFy7*pybgPFF(Ek|1{M$c`_H~JC#MFr49Vu z8Nb#G;NR*P7YW;ne}QlFnsZTq!n+3#Q`*THR8dKt*>7CS-L?7fUWQJ#< zzfXO-opo)qzn5&dCCp)pkM(l0N<0yLY!B}t1wJxqn)%jD^5*@WA4*6Qg!M(;XP9I` zCE?0n&9v>HzPLxs`k;SCuG5xGhrvHKKX7b&j%bzs4Xd7tz8Fz{ToWME+IL`$@tL{6P3nbb_n-D8{%zrFc4 z(9+1HR#Tx0=}5E`t60+@oKGgT-P@*gpd9pfp)!GgLGNcBX#WGh)h>K4HALgw+seXN zH3{eJgSWRaZiRYrJ2ei3gYK337;&SqikpsmnQYOHz0&*fyg9x8L`CUsdi}4XXY*5IkXo7^ewX#eYM(O_Zot`bAYxOoi z5JyWLG#{NUbA*S}lkncV*5H5Z<$vdSK|iI84!EGD0A_a~=dWmkxt=%^M?LV=z~?Fd z=>Fp$2JWC`w$2qU@>2pIbt9Racm@6s)sMI+)BVMz7Gu%ky$k*3>8eP>Wtn#V#TDR( z?WHQ5Kqo6!&GBFchrh8C&k%7^e$b1j8OGOqcQe1%5a=FB&NEh+ft&rg+!tAb{CShm zvr?W_s70pj0nM#J_I^|EIN!EMmYnWze=eqP2i$c`Iqo z$P7yi=SZyrKY5QYvl?`pwinIPcoX;VG4gL`!dbh$|6kEE_!st!ee?m{ExrBNCd~SP zgR^<}Gs5xi{#vN_%EnC$Z0Y#@eEh9sFObm#`E8-`F?j z(T!R3+oVJ^Zz8(@=buYQf`DHf%8*e3-ShYCgO8Z|L9Y4+A6;Y-qc)(B7zzGOpM~D) zfqq15LR}1VAB^<6b=#ltK0AmUI2eIH|5dLL);Ct^?_ZGZcEYqXggw=|N*?em% z_6{3;cf;8PjY*KY083Sw75HCbBoq=@)UnAxl&I?FsPDVOSf5eG#jirWm`5`JN6i z=(hF|RtXt=dY`n&q;`gOxe@yQsekk77cb^TAwg2Jb;#3TD89NN82pt!@bF}U{^HJ@ z%p)v%zvsp@xei4Q{&J+EJ7iLeUIXVY`ubVh>D<%BqTJKbyh=}q2+QU(U1a!wTE}WG zZ3I46MZ5HeC>9x$NWEQmgVsonYN z;D3DY;XQ$%Pm~Bgkie2M-2tm1cTr-rU`vpMHu&c_w#+PozT1q~Kp9KCHTQ_S!9vhb zsb4_75Afu~z^2o5|D@gA8CW8Lru*X3Bw;+*nUym69{lg#74cc6`zuW;p1|8<>q->1 zen7!SUSmmzVu3H;>@W2kcv1Jo;W8|nIX_xEvJGWqsuozqc*6X2XU`)9yvbYVq%|xn zHkqyP-iJuMz@C7Do&(>ml>YfE-T$!i%P}m|imq>V)Fvc;Hl*HPgYk7p`$SwDr~4z~ z`3Wq2U+<0MO#)r1H~3f}aRbJue)WNPG4O}#sf%J*u~U0FH0}k;<(L{M8M+Adw<>jS zWdfdTs+8x16%}nxx-j1767ljDcF!3E0hQ{|6piGs(ZQF{;lo> z|EbI8qPc*t*Kzvr1j`@4-^2OM6kU^$3gSAk8TiR~f4vppQWJ0=$A1!!{GFsuJH) zoV<)|7ID5?#x*jj^S4vQ|M!2Rp=I$pR++e+t~mP&#a`-VU+8xSek`V&@*4Qw;1qXD ztYOPvS@*CIy6o!_ohO6;-q?TZ1z5eiTY9r&DUn;b)T(XSO(u02hVOe! z?>{o{47@q9x;Is4qQwk3Ds0gYA!m?D@6WIf6BWQW$8je;!@JT|TK>`^(JjgftuhPN zw+r3OTgpdEaa#?){*A^u%4#AkYyGIan>NekdJK5UosSLpfj^?D@iG-_CwDSjJtjot z4>btCN~|H1x~)&u@K%Dq_)r3t!`cGwno=SD$g5*?TcuY!nbhM$Q&!&wd|4 zk=@|kzk|SkFj=@y!Fnpfk-jg6&>f$PPP-=Jq5iFtQ&iYrahHU6zmYKx-XP{eI*97h ztHYm@VSasC%YJI_fY0Eox_JU)exY*KbyK25>14iK?R7HglV_LXxE1*Ge64i5hY>B2 z!-Vq!x}3Ds!sJm+CiUA_DW=TW2Du>=1$|J=V**m%E`T8{4)qP%*Kz`14Uk3&us zty+TMPde=;=8BD(6CdblTA;9u>Kvn9UNUJ|+f)4i_s4koUG=M&e5J8vQgIaBUzlq9 zy)Xv#gBklO{K3EV`?np**z{xS=lau6P^W%QhR%ztWYU;OnzAel_|Jc4p;lp2pC^wi zx~zz*kYK+#=?|bsMX_O6zgQj<`^r3Qx7BM!D2;rN3Hio+IZnqlbhyFb=E7=t_ z3iTOn-ZTbabJ_49)$C!Sruk-w`}HI;X>yMvsrU`(d?6K*m$2E-#F?B_4HSNHv$$Y| zH<>iGyYQutGx*P_DeH`2v!sg~A5DKl1uD(ik(!sF{sV`B7mDD&#wy;>hbgH}I$Hb3 zP;X=n8|Nu|GHE)YGRTC!zPuS9Q&TX-Zo7j}OFr>NUtPr@6Yke99Mg`;hM;dhQbBIS z6obx|-4`5CMEH&~h50c5uQ+q*HoU)Rvm<4FAF=t$hQnn8-%!z}Rbuo-4AdWd5PM<( zE!|{x6wqzJRKet-tVT1`_xs?@{+i!Ve@3m)Wf1FaTS~`LpJS?= zX+?2jJ5l#%W=EBK9Ml)J%}k<#E>R}Bn1ZPq%Dk&bbqNbjlU5D80y1ef`N#!9`2T2& zN_`Q0m@4bAsYpf%J?~sG&=QCHbLQEu2jSAN{{Oz^m60*kWV&lx{8uzEuvOIh(|+(z z>i>6%e!pz8jOeh0`m^WR#J3WS9G?_>wygkfe(p1mE9gSpuQy5npW^eDKZdx$?4Q== z#1H;us#6K{{;@>mUbBb!m7Wn**n&znFGtn%?E-$T;Dv4#egAz@Nu*+`*~PK?fC4nM z`v58R9n9}*CsXxPH~RcL$Dso?PNjwLuqc)*2rKejv|N zQISZrY7aQr_8R=lKTN=Td5QLG|H$%fOf`0G5jf+I%7nTMBDX#Oe&v#+s0R4=S=-Kr zU@CR6&G$ni8qQwg5^{S3I{%e)b~dQL`Dx|8MNGxHGX<}?h?e&4-fr^HpJx*P)20F+ zD{xg+avf7CQe_Snmx=oV;l2+{aBlEkUJF|5dZ24GsKPRNgYT0DsF-OMnqo zzU2}~9A5*SUq)sn0Q?hVw{CaB)WbYBlF_DU#Kp@#djanEnP&Otw@yNR{*8tyBuuq& z57X*sBs$$DAIS^b0nh6%pDa%I54CH4gsJ`9LQX5h5j0+w&C&8<0cMuU^7-?S#{eRXtTx{nI=szmIg#E_U$Tt%%`rNIxKDCwTU^h42ym}XO;#2lc=-;vO170k%nCfV} zJ@j!g@pLXp+RQi|^bcQ;{QX4d4J)tJVCsRI9!urji1>S3@w7Yizgg$I!m&3%KUwl> z(G^qgeD5z56h*@Yd(*6N6`3^0N|hASqVM122rkfrgZ&wEt%$d~$#Z+ApnuKI#BO%> zqWe?J*%79m(B|nhx=mz7TykKShx0SrmHYNb80Z`lSBO+hJ?5qJZS*&)aY#I>6ZaML ztM?;l^!>R;A$ZF$HRaD^pUxmO{#yQrSP<-wIllT2y!8IhAX#v&8&d<>(j+A#iFmzz zT!Eo*f6P|fHyzWV*H3t?bO2Mm6B0>n>O|Vzy1Ya0BS6oIva4bSowf0}|2|m1eonWL z^QhWXk#9lS5A=sawNvo^qp8i0OnrpBkrHUxxCeembf4as1MlC!Pp+T6fmd65yk#EV zqeY%BV~wD!^s5zqt3R?iY=FkzOAC7rpCH`YR`)$O7KVDM|84ooNUvx2 z*7pD!Yhd<%U2zC$SuY$4l}!OY=s`-=MyS7w<#iS_8mIXcUih6!s6VNSs}qI&*H#>Q z#*JRTU0~C9Z8WA!-F&$w7l~ilwNbs13HZq^GbQkTr0J|HJ}pN}N&Brt>g$O!pZKo5 zy;lhScTW~yZU+6z^zNN-4~Ly}6~7>eH1(%-{-VC~pKQ9nbWPD?v@9(fqR(-G zF!4%sJxGK6&{jg@3@ih_t(|$j3oRCE>|%c?fOL4dm9{$d0ske0f8SZqP4K3~H_x3># zbI|R+8thZXtV5ILyn`Wxe>#`dlfZr8Un=Xx3imhdqIsKd9cC@spFZJtg|O4k5Kp@L z6!=@_-V$nb|HjTQEqD`C+49It1YwgezQVEt`eR#bMf*Ye`nO({e{lh`nA9fq4W2{$ z7x$k|`V9M}b(JS#fu8TYw}ied#N5#g0p42jgnOg*gIz}$z`tw0!L|qVbMHUclw)q^ zKdo6c1B6HVGC6N-0QiD#h3YKuS3ejXGKIM>s5zXgR3q$)@ZPXgfMXYkixHQve(x@$+T%@x94ujy|SAqV+q zbI#4yuSv1nh7AvqS3{9%axnbAZQ8=7<1wJW?2nM&f<+@8t#3YQLV;n) zvz=CuZ(2;`s{`6V_sAU`UBIFOe;U)xuM=11sg+X=@cpTc_r2qS4c-6sSwRUb^5gFH zbFVfKww`M1r~iZZOS`4F2Tu<0?;I3DCb962x$3ii6DZ(_>#b6`PT+mh%eJS0KCPT^ z{34bV*i2?~XhqSJ>nr!{&wziv=Ld8Z^rKe(YC2e=p{wP1z)>P*Qld4|9P(9Lz^iJ; zQ1I8(ll*WMOQdf}on>4jtfJe4V@hSfzhKZf{yXqZPi3_);O#fN8hu%^QAkK7@j2KH z_%|ny9qb_EH#MtP^6D4#tXaqr>HP+M%q$+@)`?Pr&$uA8$+31pc6P z3~I%SDnIjf4zm)svmG9=d&B+F-r}<7kP`43Yzl|tutL>Xzx3KxLNog1KJy>~{C`=n zs{{f6E_}^i63e^4G!dZbq39VlEIAG5w|#+G(gfb`v3I7QZ7syAqg6G;-&g2i?118{ zPI|uHyHPN5H}DS3f|<)$^@@4>ktlZ}JNs;@U^Oe${~{R3ItIL2G^a@sR#o3dpdxY{NW;Wcu=1$XD;>Pi(W~0N$_7*N6>kr^!B(`K>_Y zzjsYl?S=cV`znp+J=Bkt3*4+lU~Q3QQR2;ZJfxnHE+l~xua7rwz<~SFsB#Xsve5nojmSmaac@X#; z!b3KbSf3E{erPyEJY)IV=D%+l^0Qt+O&okbj@@ospS}<4t6Ue2b2y7U_Arz9&qfP=HZ zNa068gX3(OdC*_`-B)dz=<7FP&1cJijUqKwWiyA+y^j_jGVuh|uW@a2Rt5ipYG%0s zZ2USa-X^aEHGJprknHA#eA~5?Nn;K8UOuyYV{Clj>8^MuEuw;I9Go+?1@d!3afEp- z`17@8dUs%B*10mCs-Nh5@3+YzIdRC(du)Xc{0ID$E&2KkCWl`cn~66?x!eAz^!|YV ze=t(?aN}X{?@|pE@y4bf{6mFy*rV3IwD?~JbC8cE-Ce{Cp#BEq16K}W)6o3CQ-xTbv@eb*BCEboZxr5Vzjf%V{^HM`sw4vL~Y=TlmeS0%>BlIDT}f%Z~W*Xjx#CGd94n@`;bZL)94`hFFHwlYsZw1L-~BNf#_K_i(OF7 zLEy*5f021%{_AN6E7&pc=!2S9)fXzgGo<6@nCU+pe?3gL^Q%oTm zQZ+pr%RLEy=>_DDc`GiH~ss;PnmIT$RBdY zF2{=-pcm=+YGq6|z*o*!tHbx(xaB&pU-ghbL$7D>O`}ge<7(%s;r;Nn;L^$5dr)7P zax!ubQ?>c8obMbV-uas-UVE1h{P}`|67>IHJgGG%3HY!*XT{fv1dAuSR$6erW?Nfc!r9Xk$a+;@fbooVK1)={L--2-B)Jp~zgqE1YQG9P-<$BJO#1xU-P2%v4jGRmr)G#Q z$5ZL$=J5Sv)>{3p9{v3*tG8eC5T;tCnudy`5k)}(9{C#3zh*!1=^McOVi{@$793%G zF_f3-TTr#1&GBaEAkbB`%JLxJ#Hss)QT#FW%C{3sEjDQQ8=FT{{&mo!^wVQ{ffs(c zs9lb!?)5U_m!*h~4;%6>RMN+He^FvjJLswjMz)a84k)?)JH4v)rp0 z7topSK2L`HXLG)pb1WVa(LowaGWMWTSI@@6_wCqW@__|KOiiW4w{TdZkwuPycjMil z|1lVGh5ZxD!S{Zx6H^05T>4+A5-k_j-}#-V&;N(+^$CLR|JS!a2vePA#KKkMiGrq* zD79WE(5KS9N=HCv)2*@DS;_y z)c%k@zF`3QRWxN!4gLGiN)k&Ke1Eu9e)*F1GooTj*5w%e{+eSC5J-UgJyuzvVEP)Y z&#N1$3z0-5KQGjNUtC5tcDJ&S<4w@7bE=+%_jxRn znA7RUm>R`1)Oz(B8n@weGF=fK`MqRY zlOptwShgkx73iNn%KrCSHxSn)Rtj7j>E9pjr=Q*Y4D?O=GJ>l?U-1xrTaT*0Cr?VL MmVy3AQ#P3SfB(yc9{>OV diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin index fba7ca225f661bf683eff427b8555f67fa2a29c3..4ac8cfd10fde3df4ac3cd4dee68b2ccbbc9a8ffb 100644 GIT binary patch literal 4280 zcmWlcc|4U{7srvA9wIl2NK%rakSOi_l$?X&c8SuZ$xW}ZZlP2vQ|5%qT#~62NhQ-3 z&Ozpx29cQzbx>F6UH&?s^I3cUzI(4{O`A*NvKbc_&mT19Xwp?7>xDReS)X}?bZM$> ze?-VNHsS=9?k~RFMpH%h*IR5}AkMAE$GkSGG}S&*<(o85Q|{i{-QsPqZ$6_$Se6X7 zyDzq8%94uDLN{x}V`!?mhWnECPSAO>cP`)+no`%*d&e3<=`uCvZaw7CRIyxqVUIKD zez@7+H;3NI`Ca0*{y^@`ef6s0TM3--8NJ9oH06D>H2Zf2a9Rx*{in#c70U79RXHR_ zi`$B1C(%^nDn6a*3E;evo3b~<+&U?qgLjP4&2Jw0JdOsy8AHmk2sjfGhUsTDkJ~K zn)urAG1gfO04KK#kDLO|JSXc$GF}nh@Yg^3?I^2xq%OW|1~_HE`937jRMg^2c|l*` zTwWYmla3|g*V{nd}OI{O-vhb;;sDJ>B+ zRqEstGEj;*5fVl^;vsnT{KP_5`Ve~kRjK~?e_Fu#uFf&!p{aYNX=`@$0H^V{%Cfsy zVfAoc?+Z;*;ThyEqPG_~N#(U&y1>z!CTDKqjVZCN%0!(!PN_^gb_e#qb&=E_1iwe9 ztoAp5gV)E|x)=-gpwd8%xpP9v5C<1>wisTasrX?pw)!IC1gP}fD4fBYvZCj+wuF&N z**lL*6z72ROXmygGjOC#_v3%SI8EWps9|_1Li9mXexOH&x@39;M}))dCnVeeW`n)#jcpJ&-lE};H`o< zuzsOv)POiQzm#bzyvEuAH^Qd`Y)F2h$C1@TTWN}QAxTGcGjJ?YPpePiZBNc0{Zm98 z)r3|bkUM#mrn>$f$OsCdsr0%ax9Dcjk$(OAV*%c=OaALt4?$8ZJ=zwfJVjFtVtMv| zya9W$4VTz9tbbcu#%j$lDIK3U(xSIOQ`Kgu1dcM)-i^|W$ok$ul9Q<>!3^-k}^YyM>zzIyU6=GvU<9xB+7B;EW z|Lh+dngQ`N&=>d?@*?8c1Z(6o-kZ6{rE<0cb@0YN5oSRi4~U&L$T!G`b z;T%^kHd2k7dgOVTRN2ZfWX~(oRCoJ>9fs1tx%c6K+bV1{y3Ij-l?!1vuW@8sd;`C% zi)>r57B~hmLtO*dDDq(3xp+C$`9xJ_a5#;o#=8Sm#Go#)%O~Sp#epN2Ki27k8T-{9 zCVG7&E&7a&*9+Zgs;BHUbKg7Ql&*SW@dz^oY)<|Yz#?_*rveZDG^44{icLPbD}W=c zl=bKeW=!7;ij@&Uo$d3pK7CMUr=It3FU*7QD0#7b^-?`vFV~T4fEhs_y`|oWkxJe_ zwi((8fPc4?M@-xW&h%doUNSIqyEwCMbt9=hT>0mCzbKqT?5wq?64ZmzMUEsJGxeIi z)V7A8j>rnjMO9ni6sPUBHif$4CG=0gFz6JSM2YBPM)J+$eI}NqEhsjP)dhLl^EyDU zND9tjD(~v^7iQ|(UfYm!oYZ?>sy|u-@znE5dG@O;a15RsYG1-kgRyFEHH2DO4odwN z;xsjx*KN#VLcBPM>n$3Ajzs^@w0|%o+po^0=>TCns-9dt(ggQ8EMgy>#Ec+ z(R~1to~8d|!u|CV_VGD>_N} zl=;-&Zuri9>3NO0*MY+w=jPjunWkcj9oJr>+M{a~zg_HrI53Fxer-)topxMohqs6m ztbl|1=P{#inci*Pagu8<-p!erq^W_>v_XFi_DQzEb4$!TbHg!*=QVniel@zgTbZT? zdn&3Y`7S88}@70G-mIF`gXPL&VGNy3E3Ha*WxElwYKs@D#Cv! zy8v6Tv%UB=1Y$v_th8GL@~zQX-0S06n#$UjGV{OA~4g8<&doP9){yNio+x zorLeSM;Ykf9l%cIUZ5$~rV zo5`J9axSwo%YY+w^to+3P30M74E&o8IyM&nW+tQ2xar!HO6y2^)EIxL3dC6>U-r25 z(s}JW$w&;E7qYN8`{*h1&$gKmsty89A5TtT0&rLgC)mblGTrdoRkO?F)LOHUmZ)D) zSIKtT8v03rLd@;5a?m;JTU%?1=6@_GKhv@&MZXe?Uj;sbI8JJP%kKnsyWlDdT`V9} zBi1|bi{d`Y4gGI}JWW*{or#f122N$Qmy#t~RO=7Pyl{Yc*iQ&8_lNjuh!QlX;J4&R z_l}_`&~aczukppgdtQCtcVijJR-;P#VUH>siS*|W0RN>$;8r<16`X^3A!=uvrEhrLmk>O+XfhSuj-0>I2Nbbw)#H z)gj(P{_F@_8HAM|jeb{iawnlT69;}I55qi=>zotCy~HWM^L8Uv3Orb){j3^2F|+k( zsCo}^)Qv*4WJV-p+Y*bdPej4xMgkR`7uLxiHw5~2NuuQHbegmVThg% z55?-9Xf`8MgTz~D_X?iOg8QzWeYH7giF5syP9old(%8oRQYg33{{9;`xNp;~`&si# zeqsGA{ogjwi7Swjx5nC$hn&@RohA7i!3+C}Uch<&{@*5rIk4|}Q#x9RwRM|CsWXZs z^X$jtUI$>Vt9L)>F$42uSf!TCp|x0xf6Bcg;X5iw=Zx*+e+_*+Tp-v{67o>la_%Sn&L}&x_zVU8Yb315qdA#RG8!lY26sr z-wOM8B@MrTIV!B&dXl#d>q!T@npYe|rR{mq?g?Hnk9}B`+W7_gdFq9YUzYsxXwEb4 zAf`2?1Uj_*3Hg!STeuAT+qHeMZ?^^5(QG|$AjTyoN10Xno zPD!x0Z4SoL4GYU7i&3RfVx|!56x747I*ca~&<|X>_C18Y74r0Ok@!z+*rN6&-qDj3 z{?hiDuLaKMPssF;tt+h z;@O%0w*sp16K_2D{yXTOWfV7o|3|q)e%xO2|5?jxuWK;lVyd5kvoc|2h9!p=LVgS! z%j&#-4eVD+24qfQqnWm;GB*iQUa@mJ(Pa%y^>fa@FfIm8>`tw~8Eo{>h)eUEBx*X{ z?XbC!sgZ^a}ZKWj$I!3G&l?#{|%ex_B3nt}sw^6*-D9ky}JoP&Qz@Ac^ zkpC}cN;e%8PDi9-SCg1rkU!{%>Xs-#-3oKF`cajRnLN6ul~OuU)2Z%NpHtyoHRf%) z&Gjqv{SYZuYdrL&>D1lp<1w>6p`QVB8~T=dVAF0T%w(iCIR4Q|*gG=oukmR? z-}2?>JcfBJw7%m?M=@p|nA6&!la88JI(59#f_NL9Sf?_cXHw53iHTq9J zHs;ZIp_ROxH2wDZY0UE8J4w3Lqn`6H#W`JLUE9#Rp3$U@Y)qg+vZArO+ z!o=Sg-g!egEOS-Judsjzk3#J+)7CiVFt-NDb7T9KCR##2ziKYm9|L|3n|-qG1!me5 z+&_ZVQ00t+QYg9xem%Eq&J!Jr0{IX}8>FwV^o0HN?kFo6%yf$M``cF*HA-mu z{kDYfKeSMt9ybIW{d%ourm)VHj=%2f`+COvJkN8^`J8hZxO@y47&h<`m>-8_c4!pe^_ zue5^iJA&J59SE%aO1#0_#Xoj$IOiaqri3(;CF|Xm*K7)g2G)`?AWg3g@fba-e9JC2$B^-@**M!}Uy) z>KSI-+P%p8?%fjv)|#C6p`(Yu5(17(FxkOA-7ol2Ln%|n%a<%Hr^zzAteb^_7GRJ4 zll(q$7aRfTTz4#WGR*N98IJURS{881=BranfJ5nj=v9vY5O!vk3|+%-Jv&mY z^llnBCug_Wz`PWmwG*=sVV{AW2aeX2GWpxSGF?g_3%7V~N~4nm)L3_=h439QO9AWAa_I64WF+3fg|iqA^l)w_6df0l&bk|A)|D}hz>Hly+Fz&UwV*rXP1;M?BI)HI1_DTdzTzZ3^{_At9>DiGM)9DCcq z0N@;6wP+C_hqg84KGoGHOHJAM{s{{a*yr79t4ElDqnRUfTp#T!=zk??f0j(+`}W(^ z5dkMhw1#R19E(S5yA2Wh((-_$$TqxC^AmmIqziCJTX!vZ$M>6)4y&pF$IU!;@j3ph z`-9R_2LoCDz4(Mx(G3D?j)H?+NNA6_$C}e?GDmm+xWeaV0;|z6nUE?2 z&gFe)p3)FcTEU_|KObJM6;Q63^&fC5^zvi=5?Fp}k0Zq$IP{BggHI9L8Lc#*HG-@# zE~DJHa3io!#-jU0bb-TBwUmK z^`DRQ?}-6F&@gO~yh~ukkE~t#eSvf8M#R=2w0Ea&+n6Sutk%4y?-!Q@=i#i$G3*0Q zPk>AJW+Z+w@O*=W3t6?Ji0sj{C$QRxpRt1Le(k4XT5=Q#p9y4tu5b!(?jJam{22VC z^~?UaAc)se%Z2TV`+$?7G@O`@M2eYCue6zxHQ9a6j>~DliON^>C4tlMS-YwLNjLA< zeEoY4`LV)IGx%Kufz=fpxG6ZhejfRO%acfgts)>+{2bm^P{dNr{TkwrK4Ev`DC7+t zK1zSJ#^%(_K^4`b@KB-#%7Cru(wg2 zc=X~na4s}{yYm^z+g&nAcOS&NmF?NL=-CliXKBa54eRGVq8!q91vokUf1Xz(#gL_U zGy3{uz3jX6D$^P8mteM+yR?DR`1CKA5mHv?{I+Xj5ZT?I;O%s>*JY4*0U!x z;bS-9+{r~FzpH@L+*NP1hHx+C%tFl|vgMCcMn;e@+&}BJpYBb-(OfwqfT3ftiv!*wy`+l32^%Jk49G@b;)1OE8a0=U1QP3_!xh%=f;d?SSWBtmMOh@Nd0Gy zVz+!P*=}>|rXF7;f%UvM;g^N@qKQT^-Q0!LJ$mO-lEd*qL08k79p4FTWG#PFBKUuI z$rI<)L72yOdi3NDq_H+Q-?zd=*8j2j`&?v^zy=Md9EHDtGa0P*W)IR#3f6zQ%Zu!E z+@i)44?5Ye>r|Y>4(s<2%zZDA=Eb!F8MEK`AZ6xP_NXXD-7=C9>q!x@+3`6)sLTsw5E>D ze(@P1YabTbGUY%%9^)`}m(KuB$EfSmzlh>Dux(*SEZJptt1go52!Rc|*hxt>!}{&t z_VwBzN_q|xzo0GNUrV`vbdL#vP4WD&)V=_m11mw-dtjciWgce%qDY(VSiSL_ti1R; zzJ`?!oK?41u|dFTsw)y;L6q_yEob#ZWM@yuGa~&waQ@yK{aFj^d5YS?cOoj&GhSgj z58k`zK&3hq0jDF6gTe*(cJd*&=mD4~Xl|Vpizw>!vA*i_WVM-W`b<10fqnChTDT?z zoGdH(GHpcZ80%Q-JVACl*q=Hey8xWkbfe)kSkG&A+W#V=@&>cX)bGQ)8O2Twot_2G zvVq40{C59Zo;&pq=Bc)tF~uMXg*2FwwjxR6LA@wl(D&~GVyserbSTT3$D-AuZ>I-mO@tG=NL}mZFWcA#R?3HPL+1C&ZoUoltwV;zUi3_ogig0hUlP-04 z@sF}^*d~}J32a)T;r%{w;E2Avr*I88lRl14rifxQWY*WPn{1SMuXWMl34x8fRDLn6 z1WqAmUfndJ3K=}eKRiKpN|=}WZi75I!XhMXx(ztnb<5)e5LIyhR_yT-UK%*9@>><` zn3l`U6gC9T-Y~^0kT)V~UJb~bA&Sqgm<+GWWKYC%gP~QZA11Ca>?|ArPJ`HG<9S3C z*Eu@2U4!gDIDh@H&%#e#z~6L1E@oK%ISJ`h-&Pm**?FU{F*}`+b)8Tq=;TyXJ9-bg@b!)KDAxka`>9w* z4MY@+eH-#mnv-)b)EL8ui-9`NLh4l+SzNUhR$< zMc)Jcy0ArHANYy8q!~Y#7CxZ!%HfB&2CQ#&Y3I}?us0tJzq5h<&;+@S#9&?5d!P7T z2Kbf<{i7QXI=06hI|21sm?&f85y*!P4nnVlhY5^c_9@P7C(QGZ+Q0M=ugURq&Ze%B z6^}Oj{1n~=I-tilItu5c@n2NtSHOR6bTh5s&c$E6?hf0cvJv)ecIyzZg?;6o_+2OP zzM!iUQM=p72aKFn9)o$n;n_dKAV*+jgH%Zy57eeAx8u#ll@_Io*J zpnpO1ji#J1E^+<35;wkTXaBe)X)Au$t!l6NZAJoXe!e%7@8Y`s4|vSI@R@hgvR|1c z$cu9_MH_u-1XiDYUu+2Cq1K?+5Oi~nW|&-D{}`W-`$)6FSjnRE7s?O30zGfZYngnz zUN;)E2_^N)nzutv^q}O>0(aZJ} zI39Wozb-@Ft-*Gb^$zT#R67wKg_s1UEtU@EkXheb=L-_y{hPPsa)!N!aaoZQaVv=9 zO=n+Kbpd|!Wx@bYv^sFAS`0g z-=F4Y9`FyE5>4=tBVxZOuKw+3ESdLA&N%dgD%i1@QJ5bLW=~T{~k$w1W zNA{9@bI5zG%1&I|t-uj%G4OeXFqTKIXmJ(~k#d*|KI{P;DY?6TP-k|&w%@|01pA1c z%$=S_Jkb-(tCIF)_DN}_jr*SSB=N(yM7+G$ABZVVOt~A7u__v{+kupM=0rG2`k#QJMhnI za*fQ1ern>~4DZ)i-m=Vn1jf}5N79DT?kdmSV&iP2XI`l)Pmdn>$)2NDm7q6aT-8BW zrxBm`=%Iq>0X#9@off5W(O z$`wC$bP#Pl%P81G-j^?|573ABYFFj;a$5Im#t;18DChvUPwNwdLOd-bj9p#r7{m`2 zV4eqYHjs7H))@3LV)0v9NG%di-R$8e`hiUQbE`Yw5B&P`qi@IAd4Y4*qYCs-j6p({ z+WVb+#QxveH7C&f4o=ax&%L4V`u&b4p#~lLo|vvIHGrq*FFJKDFaxKg@^(rS)H(MX z-_W7|jTpbfeDfBPIUD!SOm>t^-SU==@#`~)_tf0lZI-~fm>?zc1xahyG{ve4lZiQg zt5KVv4(dEqX7M`)IMeZaFDN1@*B31dFQxI!d+%R5o_Y;^N|CJy_fN>vbgqh|8sH4d zg{o~uav!^oNEio_&)L7+Qu1Yh_>g(3o}UaHN4c@uE_C?Lr_FJx-^i4q>_2W%kT1Tx zekQAN3^-$54gbQC?9lLE`e{`>tJ}d?C@&iBL%?vRk_qY)ogKGX!+_JpJn6=Q6ce0B zqoQ-kB$`I-21^5YXQLksMQ4HI#;bLi7afuMu;W5fJejW4>$=rO7yLC-ZXbCGIA8OR z_}d}{&(|L&@B?_xY_s#60OZ|fhR>NU&<~C~G7hvr9*t-_+rpZLRN8uT-5=MIk0Xpu z8)bvObsiLQXDGhtnWR_2Y?};GL`#vLPKgBiRbhgiK5=TlNo)a$) zUcz&2t)d_NErE(*p@@)Iwt8F*g<|9NaPS{fVv{ImA0`%8mZRdx@v>};qtNOKR<9s1rVGCGyE`O!)T)F-Ct5)KsLM1}M_#UqWQkJdJQ*-DaE zyOIT?c0;|E$(fK$1x|aYiA)kwZz-5j_07dg9i-K>aDEe)(}JS*0@MQ4tPx?6fVPp%K(Cp+6cr zEtn8RP|f43_BOmCmizaEt?)i$JcD}~-vg&)T2K-Pj{mmWSbjupNp{%vBLu(oU&(l) zDi7G}{3w-uy?(sOO3#!+l;uPLrqZ8eRgo2+mIOP2&HNjB)2<1eIaSTIZej2vXRtx+hY3h`=TdFO`-peRQ>M!} zq8e_^&U{irew_MT9rW)wa6*n<7lFPhjK}ur#)pV{PLy!iTaQ=lt|+~d4*C9v)t1{m z*#x#M;k^HW0C1Eo^)myJwlvlE??N~plFwXSk_x&pWpF?(qY*d~HCFGf5Y?O>vAN!a zY%FC>kXMI3eU{MEqKN}X$=O(r2{;R5WyPU*MfFm!w~`-lo_%0c2hQ*LGf!HfKaP-{ z4UfHuwB;^vRNi}vza)-1v>XFnm?{Z3x!ehy-S@SY3=q|Zx2T!#4cSn7yW$}`#ws1~|SQ z+b?w>s+DvkH9rWiGAl@u*SZV-DpTt;y@9}%&PS{0taH+C1}V%UZJlFRmt6z#cjmlz z&KW>GIZepUJh%azewXily@={2>ijvbk!(n)P+fi{3!Kxfm1etuLmSI9X-3owOlxuS zLwMyS5xZlNt-x_}-~9sW_r=`7UHzbA;q2i$?d?e0x;@2A`2n8$<+5rt`+ndkzr`q} zzp`URO+EeJGfR}fSoM~NM0nWQ38@3eSP)fZ` zO(D)~sD))i`eapL}L-vTX9FdYaiYuW36rH z0CnKhafLwtNZ?f3aLH2;)xK@k5uL{?c5G5fw_yVg-85qf`m2RL%czxj;2hXuG!c!o zQ|(=M?xEtd9Vyuf+)!uyG%#Z*(}H_FD^7VWhp1<*IQwNb<8u#N1dBh4!}zUVO4ZN@ hhRqr3*+O4+E~D|bbqm>48X#cS(+!*^&8;op{|5}p4jcdg diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin index 08debb98d5e32d1fd344236ebc7ce782664556d3..2da707106e6e1ee7776c1acdc5c16e94c4079fe2 100644 GIT binary patch literal 5256 zcmWmHc|2837zgl>QgjnSN{H~1eJyI{cEz``o{u&w0KxXXecO&K5RL-7Q<_AE3pc$Mj!9B*~=O^Z}8C`)DzgJp5VV zESc1#oqF*03|b7C%rJMYBa<4V>NQ&Tqs8!<_3&rbWYT*{&&uUAc>YnvsqRfQDeWAX z<$dzcRCh)O>7d`D~vPAQyBq-%*_@3^dxTm?{pw)M=(KzrjzO za#c!4D_6|ouWp?uT$An2UimNse(u`6JtUgn;u}UrOWlDVPFgh}eP=yMEy{E7^}?md zF*ILC_QZa)Yhu&UY$}ueAo;8gr7;8(KJ84;Jqwbv{ak0kt8FXR?EuIg5BWD zZJ*@$0e*>VaRAl9`hO{?wmH zq6#y6@kTlz1$0BHd+kR|3iyV@Dml5}`z=PugkkPnj@$ZU(nMgvQ@xB!F#ozkD|Cs+ zX#RiRu{oF%eSdGxNI}*$#6Oe?c=Ot}p48Itd&Fe^un= zDR1y6-P-t*Fh@qv2m72fp@cpR0%m09X(>cdA9Sa!mJhj3!NrZ3K;oZDz z;3p>+tna1yX_w3NFyE#XS?^yea>|sB-p}`jOlszImb+d>^BcDJFkpV(pa!N-C4}dO zYt>RjGmOvXva6mT_~W`<*aq{~CB2`}T}Gk&QQ{}&`@kPco-W}A-)`_t@d6fU@jsNL zYeqyiExol*Tp*L`J#uecVFRDMw6o+67B*%$?RTygxivUD)k(brzt8d)qZ9a(zZZry zurOLQiX81G?!8NycTRi?^+TSeeY41p< zKhvgVSb(KjEGYUrUlTF0goNY-yq|{Oi;}GY;Nt^jp{`i+PI9-s!!dNxOg?c=<`S9I zDy!~iOKT6fJ|!l-|qHR8~mBAzE1D3Jg1zM%V;(ce6c5?o@I$lYRg~Dv#tTZ zPbQD$D3&|C&#`)F1*P|Qj$IAkO(u0Wmu=SW0iRiTVyhNbsZjgDQ(8gm8^F^M}API;%S*X#Otw=bQMbJh$SFatZXz!k=Srs3n>7Nh*3x@jO}#oWZ?9R801> zu@4SiLa`!OtLp#`*K5B6=T6UfsFDIR5&C&XfH%hCVes6 z$ucPker=7<`HT3}cCkk{eios8$E};&F2ecIpQS`nwE^G6be+!zs|EKncLeGZaZg*c z_G!H*lLj*bip%H0pU>Fa8Hd%h@yVYCj6`%@e1pq^1erAOh#UL5)7HP)KXU?88iJYY zCC5-Hn}|!>vOk$LMz>&j70$)LKSvi$g|UX?aq-p1ji~0QZ&9skC7CpAFD2L*4Zf-K z?I;>d18n=1j(=Z5i=CSONXITC3#g~UJcoUwD=HrJY;rtjY z;%jAv_Y?kUqgev#S&nn5g2zxr|F#qRn}^Ay8H(+jVNqCr-tCnXS4>s(=3Gp;jq0@@ zc$~eRKqieoD7kHU0s0>=;2l%MRKDecY}Z&KzlGd6AO+voWLG2Cqz=?c*N2&|Vyeg+ zmj}icL`v_0SB+-&P!B4z9DNLa|BvS<-eM}fXyI1xYV_aTn-^*~tI4Eq-nApwV1EQT zh+fI@!&Iu!Wod&)sL`)@^dUPpjGrScyFDNJD>t)d9fbP514A9G#Iv0Zj=@2t_U^V-4B4AQJs!&FT}eu*7@sD@#`On|;I_+D#t?y!GCB^g$PZ87!qE#3&W z%f#zS=C}TwaQ{t71Us?nLw%p9x*i{hhJ4C|}r_0EBS z{<^5+(kDzccDeZG0V`4BJa>AJ;CATmw=YRG6za!6_A_!|>Ur{NLe?e`BVVGtcE1AZ zck5Xcj)I@|fo1F!rW$E7Nr}3kMqZK4dDge!S08<(lmveKax%vWOuaNJmt0+j3T%E* zua92@e{Eeop#b{dQthz(h^ZG`9ZI=1h^oI`9e?++K>sDfH*v3^Zl3;E;TWb`KR51- z>mV|n^%ReE3q!qEMabkd`1kQ2xg>Z$MSM+d;i&n(6?cEaP4NAk#9Ow&_cL%YrgQ*P z!-nTtycbZbZ1y#|B-p=GMho+%B4{y8T7^L&A5+~g{$$|(OgK|H%^n`v2m7~_C&uU; z)VGE(53^#b!*9KL|TFbBYq(a?UWMXj+{9U~@dkQesxtxbb zlLysvGVFYv=?wLvEL|?z{g-FwRR-r-ZPPnbOUNsshI7oi@P3+ZbQy1O($2S6-Z_vj z8xx&2Jq)1V*HZqyvv7Y(3ryDZ(Vjol5~+nI+4`%8Tm=ZbZ0lwoH(GvcsozjNL+dx5 z*OPzZi!5bTKH$*+TO!W*ioa9WNCk z{C2$iw~PYcf9=@EkCi+$f7Pew3Z_r_`qrB}7};vGZ_|Ex3w-}8R&yt4{(+X9|1d+9 z6Tk9wFk#{9$uJXC3x4jr)I6L&;WLG7>=aCQRIIW^`v|gewfejwUbl2(5z_^DBm zW0jb7a;Nsok9boE@7%2=l=_)oGP zvcdTtGF_8q;E72K%zDXj})+aG)^Tp@eWX6kv#piG(^G6NR`5{uCScKqBTyYhLEn z`HFn=Et(4HGr{*Cil?xGAG}^46o3WJc$xZJpCsb@3myj_dj@`O^I>IK+WT+u4Bw9V z>9Vdd{mwx*?nkZP)`a}qe4-=C5AuJ=j1rs4CCtxX5_~J-IN|-{%xrWp5BOKCJ$z{Q z=Xh7`3Nz+?+YqYhcMpZ_DheH)(*b`(f_V<|O^|16)75D#($KQA?%!JC;i@IMItKDv zeafJ@G34J+1KXM{T3FbW`LMp-dE{BVAhaMC4*863N${sX_+Otr(&xa!6v-V;XPSxo zyMx3(k7&X8{z=H?gnSb|a`{fS6Bha)W;S7QAI16{*15If1NkvbOivlk_n=F0by+@G zdgs*}7TQKc+9AISAunCYq<6fH0rww)f2OTp=pvT%DpIgXQbSJeM@Vcsb&wy=_0z3E zJ`S1fcCJXrk|LG2TpG_3VRZpVH1$?se77TerbECVm=Mox$4BmGD@teRprp9JDNf6W zAYWCP60hNZGDt68OZfp-IL*Uf8Es0WD!-}}Imid^FULIHT@(D%8rr z^8QB0YM(F<`7w|6qpBtN->Jn$+ps*>0Jq02ev9)v?0)5i%Rz`Lq`wQ-7wHe#M%IGq1@PsIY+f zol8+WLK|4H}s{)zy9TkLT$HOP5EWVJ_hpU2{`+#4b?y}W{{#-~ zSK>9or?z>2&53kGv5x0CDxRf5ery$H(NqK9)~R!l9;=1)r3%_#CE_SYqRT^HTaUuhQ0k@Xj43$d8v#(?4TGrDb75 z4JW-JpU@W^_-`-FpQ}K4@DJ9w`lDO@?=UJqzP2dF8G($7jv&+4eqIy z*SMTW@P3~}K1B!jdw5$K{{g7)F1k6xi`6&SV)u0n6MkaHP|jy3=wDG%Wi~*YKjS^| z9>|YpeTYme^d`!qN624^mLFrzH7$d$9J;Vff&5tfy;#c>RqgyY>&jVJ|Iunb+g)(K zhU&bz{yG&?1q^F#SFDLbyLZ9jrm#OJSBHy@cS4@ZbZzO(F+Bzi0Q zANpG`2je%g&pUS;`ctvE^><9wI{)X;nMmS!gqXD<^B2e$HVb-jcc6aoR?2Q+$d7)h zPb}q$2)Dmd?5$%^7hDR6CxPGbwJV7k^5gjvvk&=D%~D}uH5aV!chVbu?|AUroQ-9~ zA-`EN{MtBzUN(fX(=+}A|9!97Nqgw8TF_n%`BzVwJM30E@urF{NpU0)`kVNrjM4Hn z?_DKB7sy8^a&y!3iDy!Y{-JwEp)N&P*9&Glrx-RR3P#?t&ZSi*HMY zji=!J47NQ|EV~N%tEoDLl8j1U2qsiZWjEMsAE2mzl_usg4+dA zqh*x7I|=g1w{rWDBopvIWQnMBVycHheR|#ydSUIK&tW<`UkozmbPK4 z-J@QPR6~N$QhBJl6aFu!g*61IWl*=y->>rk#_!~xzGs@q^yJKSz7OZ))Kr{#oEG?Z zbH@WEVElRLPq;Yb$J^a5ZEWCs-lK|n!SlSC-@D-bJ-m=$m{yLuX6Nx%QCj|PQ5}B^ z_g@&c{gpcm<7>2K9g##~pFF=Dbn%7#`z>p&_Z`%C^qbnxVd@pmLWMqARHWW6uswYh z>fVmBKZL;7@=c|?0Qu2IdrMXndN=+gG2>Sl_y;IzqUYfGHtDjVbWHV^mix!L2#xB; zOK#l)>o;qwtiIJ1>c9L_ED|u))wXAP`3R~x^?JSMFXZ>xTlqJ8cR+nsq`^xb@*%tY wCq|5_h`0Ab9fqLp=x!sM0Y2$u7*jFSE8TTI>_YYXC*>mwCZS$jw<&t=e+q>4P5=M^ literal 7816 zcmWldcRZC}9LE(YsVg&zvR76{$~x!XjH}W>(Xy32Qe;H3Lny0k*}KR{JVLTZNq$1n z;v$6xlHd3Kd0(H``@GM6p7WgZ{T>Dm-{TAnjQwaaN<#N_qZ66b)e%3%IS9YyxgLG~ z2frNxWqtL~V)Su;J?jZFsh834u7wxmXHx~Q!9dusG3lr#zm{8J3A5Fq? z?@1li>es+`MZ}hxLoT=Cw7`xgHXr_=cF_Q7*!?-g==%Y3NBvP@PWt%nLLU{-r02M9 z*^v=KJ<>Anb|?WpCbJuHKj^-?dpWz%gp4Lv{+(VVdn&F!!_A!TU-;?A1G%`g-BUaC zd*Hoo#@uGYJw*n*|XJskW*#%RQF>M# zIdBW>%d>2PJa~hnV<6qXu_x_2-m*<9yTNud;gG;IUbg-O{DMWKdIWsx$;r4_%&b^^ z=~`f{NV%Yx;c>ZJs5Fg!W<1YZwI3@h~VJ044EE3$hoqXMT;SqUC!M( zj@jy)Sx-FAKvz7kea*PU3I0^6B_av@2H*C4al<=qDKb-*Ith=Ay59I_{oq%t{kZuh z@F_>~3nnosxQ?-<*B@OurPU>>Zw=$8|NHUiALQ)3`mXyhuXS^JY_<#$I`=o1=&pcV zuVGNrm+nugEKX@tadCPXiyttjIb$i1*C*vsan! zCc?MG-mEt;hWwtds*yY0&o6mE1@8_BsAMg=jQovNU#pd=L%tzkRFsYGulLGZ$NU@p zcU`cSByK4!uch>cga09q%?SPfC+q!`zl(SAZ+}@EavKFZ+r+lBo&|pS&zZl~Xfbk` z>%LJ07SWb=n*Bl$VYZ6D4BI2fq@E3T=iLtjFQX&EJcosRGBYmLg`mJ@b>Fv4@cg^Y zT~%zqga0Jkwzg6%ls$JoE9)r{=B6fYUu!@n^)bBS>8F3ci6$z2+*mNe`njVb1x2aU zj)kPe!1xB4IP?tor~g%xzha3JNoy6YpG0urz2nN~1tHInF1hOpy!1@*fIJo(KAQK9 z^)3qfLw-5n-2r}@{DS)e_zeyFSgvESG4i34MpZ=A+Na`1?VI3_5ciU&03R=<i|8(L(Q7V>h6<}R`c9@8J)ECiv^%CWkns#Fa%R2UXmMj#UJW{&0i4XYhuglL0z<+j1=xaY#Q29Ch zJmf49|K!Z7JO4g1Y2c!yLCJ66V|*elC9&LuRNjEBBuZO2CLSZM4dZ*()P0c$e}&l4 zejlt{LA(A*tsMCb4Se6*@*DUIX-ghYfEQ7IPpZO7bz1A$?*~xe=IV?^>8tSnjJXa- zPl4a;Ypb9MR=V;}ack8qk!{<>WF#GrE;|(se1qf8SBt>kb=kc{#acmE^zG05LS>^(jJw$$0Z*;i6anuqQb+2a z?Hf$#7#J|~xr(0dY}&0q3F~uwAA{dCeg74HXVhAbDJM}LGAkqE5_FFRNl21OqpsI) zYR7`#^ZTT;38pZ|bWBtm6InqbRV^Rh1D~1A%`gpoveSBr8YTzMEsnSyLXYZfu0Hnq z0{k|z%-46|*Rpfj!-2Km))_aXv!UkFDrZhBINv7SYFICF125)r*qRe-JH|cobbXXb^_0P zNwQ1>>q^)dY5q(=o!!;Z0EFIPuYhocDzS3>8Pk{aN zOQlCjpd9eoCh?W-SSO)?cISLIs#eS1lz+SpcqOx`n_Iwdf6lAG4C{rPPhU9GgZjAA zPCU93N+wP9tD77R0G`{LZ>}Bdor!9wKCVQRgofuIKk7&(eHD&~^m+>ZgjWfR_E=BN zyH-}$610eF71-2-_@KV{YQN1qR~h_~ zZst-wn97uLbbLAxy*hDR;;fGnnM4!07I5+t@Ql4RwnbQPDWvjVNH&GKZ6uA8s7&Z7tCe1Pth7Q5N7oV;8 z*^H@@mty+^xzSSx^SxGm!7#oGchv3A;P2XH`xpFj?X@-{{%B}dpwGTmPsoRZ_9W5y zP1@158!=U#`*88baN_CD-R}!sH-i7vXEho;pXf+Ax$_iEMO{|R`|lB@9jC0@DNrBH zN_Q-p!~CL(JXE}|LY_P9USNiv#xG@Vdr~iZqPT-ScW=N{JXGPzLm`N#Oio2dj)UJt`O1x*;J?EqKwZI9ikRG+ z_$Z=$kC(xQ7f`>;bRqp<%*$uf4| zw#3u}H)1))s?kWK*u2Fp8Sp!O&b${6xu8vuZz`thzV5SLt0h`}?#Fai!1p&DuWGsN zH26b!T+Vr6>fxgP2SGQ8$|HH5p$>39%qot&4!S}2i(RBNVycesyLHY}=$TYdK>Ckb z@Ozy}Y{PkYQ5lTBPhN%YEGe7NpO}tb( z{i9>X4gCGpe?E(WKPcSky+6$FvqPH03Q^geespbO4gAV=DYu${ulN(zt&geHfG13{ zF{q)u6uoKN3jPRR&i&1hC$0aq*o3La_}EP@>_el2truokOM&O=8mMN1-28LOmw%Xg zrYz~(@F39@W4|Uc1^o?8dQ{W4ao{FVW2JYS=QY$go2bin)BZTZoK3;afFBS-*K&uJ<1 z)_f*vYd9Vx8A5+H%P*>OR1o-)=LXka!t=XEdG2%{HNk#TkSq&4%S#Cx3_MFt``@%)kRe_5gKdmM~VdhZXbqOsQ;t42FI_Rz*J8%%j2_KP)7UD5WzV( z-)3rGM>0r4&YExil@(K+WQ0}TuM(AV&yt6^;C!9Q`%kxA8gi!97sE1`>bz1mcj7L3 zZWN!}@TwT{tOfBAsGp-G6v-oS9(1))FUZfs?<30XuVH`h%3Bm{wgldAMJ*l9o8Ikw z0k5H6j>-v=Xz_%+)Fh|hGSB%X$qQiS-dZl+dK%W1MBOk_#HvG0<#tKc|t&pztxJhXFF7>L6LVeLolBA`7 zqx;;cEq-W1_4%)SWd@`mx-0B~)Fk)=#%~qUzi;uKB;#Q8J7TG3-zIy)v#_h6Bp2$h zu3oAcFBNkAiEf1rXyL`V@U63>$V4??+V_(p0u zd(er%FW~3UIGQO(_bZ;*Ad43A(+~8-2P5jA#Rr#<>;r$09!b>-a*?)=ZYr2@`w+)h zVO=6{hiKtoi!+Qra$%c!J>8#~cR35KlkT&5di+M#9kW?^D(@hF9P)BI58YpNuD%>I zL|DzK${mL{rr8{L|Ruw|Gb9J*R=Pdv*Wz$<=pi9gWCzEGxYzz zx9y?9Y0T=qNwpQJ5}_$igZKSB597~^Cr%X5{l}sk#V|9iNa@81A>{lx>5L@bG02{mcyKnJ}|j_?>6}c+urEave|2 z>HSwyPtxfVkn`1D+0l(TQgHTU(MBRNgI&TT0p5T24&(6;cOj?J?w`-YY+YIzePbNx zs_Tecsq_W#5BHB6!g`A~jOh5U7Vn7N$8dVYpSXH9$z`#n0Q}mcYsG!QXZ_u4REbH^ zJnlW6R>)&FzsFA}SpVH+jBec_kh424lMOI0Di?N4RUu-=G&b5Y6+wP@o1M;Wy8m)_ zbq?lMnK@e2r-OWS?*<&mOoW`lBqr5>?r(jp#EH4z-SW8*_L1<@coRz@H-O*8Kvba@ z_{@~l9p5mQddEP{U^%*>W~XlXYZ3T?!f)yMkTZY2GLnz?beV*ikMF7}{XH&PU$RH6RqPPP3q(*b_t z?|v+nSg82Ol6m|SBJ4-tdAq&ap+92{SB;^6KdH3rykl4}(l5vq$D?Rr(Kq{=HNb!2 zd0)UP_-AEAF7aZC6zAU1gH6O;byC%4XC}z+YfA;v*PnJpaQ99uHk$pW%{{&@a#Ro8%jP}uk74HnIknT+?~ zK=+Q(E@@m30)AM^BLm)FF zk`D-4cws54c}saFXOvu|wy5R?`+M(gG1t3T3f3PF4^(YI!N#F+_D!%q_ieq%v`HQO<|oW@RWy-?rwg)$rGYy^Kz*h$odm5Vb% zVxD~`d|a6Vn6YszvO;REtp*cBwV zpuFPUOsc1NfhTAI?oi)Eh74F~X<>~#8fAkJ7ra zdU3k`4qQVR^D8@Bwo;(~HJRZy=mEcL8b0&{t1HDMv>bRwWZaB6D^&~ge;c4|Y?lOl z{EwcM46G(++?m%Ph>E&jIIdO+1Fy1vA`6ictc|`|HP0BGat( z=gUTK;Jfx%OVjT!lCPFCCt$7MsvZkZ7F1sRM^>>EzOP|ZXX7La_>BzTDl}nAduyq` zP9duNy`p4&t_u2JG2+=Ay?<0)c`!1DDW@{RB({XX`YPsAsf7A{v`q6(`Y8DQ4s6%N zn6hP+OJep0k*zis{yiG*kH^c_*q-G9pMHMsLlq|9N|$%Pq>UcSyl)sMpg$Sq7xiU? z`a9C#`j@J1tUZ)#+SaxSHTp2k$;ZI^n{-Tkv<&@ult$IcU^&)y`iV~1W)c}UykwKg zCZPZQ@5%>Rm|t`lub%W?tSucUslIKV$llK1YyH3j_^}@%0vf3g~}j)erm7&nNl0f9qyg zS2CB9#(9w_im)WlPQ&^ADP3UmMS6X8hh6iV0M_|caQU=NG?CL0=WjL)^}(0sRVyAc z@C62cY+JET;*I`2nio;^3un1gA(g-%j&NOp^EvWb>xTj*tQY#HHaemKb&0ojt~fk~ z{@ZVbwF#3k6 zjAZ9(^;Fbvs`sDjJu1}Kg4azyodaH=(x=e|>-{W>P!qExs{O2SMVWWKVV=YKbjM zd4;flO*9>W#C!qf2dU(*E0REB07zb>nQfG(S zSb9+X=D_rUY3NUBI-)C|==Zlnrh^-yzuB*wRWX}`1~t7W)H&}%K5-^7Iv)7FrT$T- zn5xOTB3!kPXcG3!b#!Y4f6VbQV|x7^J*4!VfA)52U7$i#j!C(GF={>!Fh&AOm{s8^4oS6S?HBmvm zTUPX<6a3`O8Z)iHS5zp7w?IBMvyFWiJsTqDS{TClNDDHsd`zz|kmQBrozQ>VCRH9B zNB{9?ZW~fv2LGMQL4x%4qpo12Q3n0r#VbeM3W#<=iMREQaDOuW*74qBxZjEn6L)!2 z1O4&+qK)|ni7FNrFO_yqxmU2VS|1LdKRHwv+@$oPg>~JTiYZd zzi!W4)DHb!?xBa0@6kxw#U}GfxSyE$>gI0P1o?%(Onr|r)u{CsRe_P{Sl-FW*ahpG z#(yj{a1Z!{qaOd-0sZm6^IK|Q{mfFlXL*K!ul{&%`!c5Li;K|iFrY@( zMpMkVD`4eMhTuL+fB0N)V1mDd%X z-39ujRXHawX3KN`seQpD&3)|DVp-4#Q6{KJXJ@nwSnhrsd@*`KTTL$ zOz$KbKiYEAhzIJY*}IXQhkioNX}QUL2KxIbb&uy?h!P9#(JL+V{=0=Xlwu6I!uO!m z5=_11H=4e9k$CjIq)j*%_NSR2#S079!M}QISFaTG&*2)h0Ugx5q?q}tlpAtq-G{s1 z08f%1QRTzb5XxTln=EKzm!)kcC)C$7^QUKgrXgpx@jYdQsUBs`(p|l%m`D8Jhcobg zXG$A_DheTI-RsW`yi>i&mdHM$0+l2+Jx7oi%(LdKK)#91_r-fmb=jj@|7rj|Z(xkE OorV5&CQDafy6}G>-6%GI&POn5DH7j!NL%ug-FitS+SIr~w%!a0+D$S!NIhbYv7f z&aenGs6{D90vrbcLAc0{=wT6}(?Vz>1JdH9Faosi`(FP3?tRbiyuaspndQj=48wFF zLASkef5;HWDm07tMmr(ltZmgwQwqnbBd<>s<|E-uT<`sx4IG=W*&(EE4-#_qCf?GX zW7C?`o0CeAaN2TTaJY(NW6s(4rryBs?|TIwcawRR%^^|oW&X&HGNZ}$I>&0v6$Rm; zc>d_Fb+4Ocp4F30*Zwvm?Pz6ZQ|TIx)$P0*X#54|$y}PSK3(QnZ{BM$I>*0V6rQY| zE5LayYRA|ZB;*Mz4t^XV^Q^ND8ecJjG*d~%Nj{4?Hv8^LY2phwZ&rEGnNFE!v-0uf zZ>M;7=0Df>^f4T(N(@<6_7?u%X{N?vNGA7b`JvorBPuu?pSoz(eU2@BIO3>oL_(pZ zyD7pblLtt?PYF`;xi?e1jk8`HTijBVAso zmT=(@B;*S=4Xb_e`<6!Ofo?v1de@(&sRJB)e$l4iI;@bO^$pJ)ag@oo+1KAbFGq^B z9Oql2CXUtG4YYRLqxq`;38DEa@?&|N@1)f24&;l9g?yi@X+DMVDZ=^uepO^aKChjS z?=uIQ&tkU?kMp&^tLTS(+7pmZbeQIojCUiPZ@{Il7xHD_rTH|X?;xMQkyoVY9_`rg zOY^nawKW!Fdgu` z*&BG4ECik#l)!V%8^Ck51bEhOq&znaHUiJ90)XdhFH@fTTnAv@c08Z>;myFj8*MPp zD1~|T_@a71&>9Gu>aQoeul) z@2RfwY1p4$4g5Vj27c_85I$zyRUPmbeu?szqJNY4;j6111OB#N1pW?rQ2v;; zR>Y6K=i*KQf8W>wf8r&SKj~Qm@xy#V;s^X)`T+d+y&m{eX^0=<)!x8g>raD_@9{&( zH~13qBU=)P?-B9$;1b9ed57jxy0gR&|E=Aaf8wvN0rG_r{;|JBSBM`CPPG>yUv4qv zTQ*Jetx0yIegs*)3i%9}f9$VDR5I_!E#k*LLq-qy@uEHSt8=3LE_fw``0?|Q>=Wqs zD88TM{yFEeEs>sq&rm1~98n93^-HfA!(hTN+qKL%lL zZl~s#iTRz4(ZWlibx+qNE`?)cl6BcdugiW5t#P&yS$f)1Bju^{9a9>G?agg{kCaI8 z_kCZUYcj$8UQB)d<&D(Pt$mp4pGYCF;f3nF{`NA-&ZW_6NBp68Jd>ELCpGHw%cm&Z zURb|j<7~ajDbPpUtP1lMS`(CQuMLq&4c#?*Un?Cg_`4|8Pf+iG@9*_9JiZ+I<%d2h z4TS!=!pxzsgn$FVYj5U9Uq zU8*PiiNpPmI#V#zM%b_@De^#}1?Dkj&sX^ft$VlqvNc^OHT1ZAW!1v#LP(E@58B4( za2|_iYc3L?cj#j>s!?jBB9Xir94c%*Zy@R(uY-Q$*dk8>BuFaHR+t{ ze0y!FX@K}NzmK&LG)MMzvf^f$WN*dueR}JlpHbMk;WhLNJonERg>`NjlVzX0pr3Cv zx-Xt@LTkj$R%Pg2Qlor+3hSGHhOg+TE|Wy%Z49qeg?`TR#);3RrtJ;xA2k;WUKM4F zdg{)F-ZNhIY6Rw!yxdEQp&vNL+oDd`B0RD$h}?zeSD3mrF%I*;_4RWXL4Wmq)r#f9 z>Nu_8zWt{`zoGhI7S1O&c)@{+Ea)?aiH}l*u;H#J9Za_4`CWQBE%Gk(Hm7Yi^uql| z-{_=PDr}7n?>u7UE$CM{kIxB&A1|I#x_Mt}x_0NyPF7_?1QnSo37p0IEVP)8C&r|I0KRx*Va}&+)pLBZ$ z{p0~9rJBO}7?W48Z@c327pyvaa2)36-)+w8484=8d3vZ4e&;1oUHB{K4aw&7>b-d62SSYU!9s+;p=L~dAlFnLvv23QVl@NB? z-P%dnQ6@8w%T-P#+T2!0>5f7t6p$|UJE2`X`}&`+*ztnLSW zbMC~5CxVY8c!c7>8_@6ZGVI`s^J^I2?apZEqawy!Xc9aV4OS~Q*T^Jkr=MjOOn~0( z{07rK&`->@IN%~IR#)3=drTGj>=M_{_b}J7i*h9H7QCMxgV)%5LmyI`b1GC=9bV(# z>ua=35`Ql8p~4pEXV~cuDuVvak)v@^VYT$(f%vc2q0cI=H#v;+JC@tu`x@?F>6Wsa zq7Y?8aegdCdwk$v%6>5Z&=Ozw#UX zLN%loT4SFqR(xlgeCYcu{|3wtcc1^pqzZa{OVN+I`b9aqywv_b2U$VU` zaelw&yCy!CT4<(+cT4@R^k_(6@ys&J544ZjnKu`D<9p6qjiK*qe&(G*>Cx!jw>_7V zJ~d6<;RfbYng{MLkXrP;8@=nhP%2wFY3#T5&+z}JvE6UhV?J?V*41pO#gZJC{pbHR zjq`nyq&k=Mv6F3{I^g^QhG}@7gg*LrV&)-JFVkllHx-b-iO=-^-ToAMU7h!Xb)o-n z^Nr zbN_jD6yZNBtH>@mfAXI-_n*fs?!U*;BKfb2$t$`4==sBcRjS;7VKE~7H|nX2AN1ru z>^t&bl?wOY*%c!E7vO!eyWD?0pojma@8teVX%OMR6b0M9(3AgaF^B&acj5kX?k>WA zcIB5}%Kc}EIsDghoBQwKcoF{7xAKjF9{#gd;r{#EQ0~70BK)@|W8oFK|H7ds|1IJE ziy11C{{p-hLr?xImHY1!_urEBBKglJ3;ToluP5Gr%0E4xf1{3zlz%f{Zo~N^|3V6Q z{%IBS{M*+{ME-^6EIY&V&mZ|l`Io27^Uo?(ME;GBsn`HL^6%^qo_}9+c>dLaSpFd&DgOqS@cjEVRHXd-edB>V|AL`M{xuDe=ift-@^6aAUg#z zzvNfu%lB^?^t6Ak^ZmQ-lZgFm{GjMK^w__B{^0r2{yoe0@Ak(c_HRUQ!(#dVrToYK zoiv>9-ytVN?BB(KIq(PV-VKzje&~NXF}(l%oGMcP^SJRpdH;J0J^Ekg$GrcU z^%K$m*3HcKgdY7b{ubuc{~CDz3z;WU|JxyIVNU%I=fnD66YqbM_KVd2(y!9rr~an| z|4{!k=l##$K&1ZXtkFx}|8V})|MZsd{x|W7Nc~Tt82gj@pQeSp|JCvSXTMma{%7+7 z{-pk=3V%|+AAml_`aSvr_4`4C@cpRYPsJSle&le>soz`Td7-)}{}vVQ*+ zbM*V=*Ktk#9{qs&{UP*A>h~|e3#{K8%lrKR-tWKf!5sa5^-9dq@1^K_)bGz=f3kl6 z2y^0(o0x+?f?o6c8=S`PF9rET_ov&8Iqt7~How2U*spYd;iE7If6()!`&%{`*SJ5W zp}5BVWqIP7?ynoJ!5?18E8-8kiTwWlpkFZlKpxZmnIbRf{?39Q=>9H$#rea3qdIZ_ zrOjack*c#Ae;@v9_2d5g{FM8z+fv3K)BVw3h(Gpsf*$_cH=p}&mnGv5!+mKCR;lIwi^BIM|8;5L{u`;o_~WofEA;SRvO4DQU(iYJKOberAKMDY zVGjS*7hw+mX#XVVk37a7yC->L4*#t|zbF4q$>9ETe!=*|ARqmN_`@E2NB+}S=l&ab zpYcckr|rV;#@GKCX;E6!)!x9{HF3g6H4u z9z6f%?_m7V$88VJ7yMDb6?*L79!`A!PQJkSWBVoWH0|H8V|@SiT_)#`YQ`TXZOA9$ z5AQC1Y=Y5p%hxuDMe^_es{^uFU```0Ej6cF^b9w(uih>^f z?{){?|F#7af9N_!`?CHwwtYR$kNRI%-v2}c;t#*K?%jF+E7}e{`rowQy#F0AVEnP! z0PjEbzt-EBqyIUBe~3Rk-ZK8U&<*)V{Bij)=IDP-xIf~Lhn2)1`p2!nchvtZ>Gucl z$3h?8|IWHG{*a7X!24h4ZqTFu4eT!Gj~R?Vw0`6Lr~YS*^CAB5eariwrxWpqL}B@Q z-v1ow{=pv&=-y95JJ@{i4{yyc4pTQrgDV#sr)JwOOD=Yszi~djiQ9=C|{Bff%=Z`XV)16@kaWPv+ z5B_*&i}M42xKHH#aoD>wywe@!XEZ1N2o8fD{Gp@8`9rBkX=-4RmRKo91HT5j|DIz``~kip|JAxO{;;k={xSc7zsY~| z%DDduf*F6jiRmx*AALUjH+ZC+Kgt<@9Lga*{5M6J`){Q^_n+$w#vj)Gi{$>>h4&x% zr}mlW-&h;QA5%-f@05SrCi48#u;lrdw43qA>we$l`Bw}*Aj?!x!)jM0oglAj}A==Z-dZO~)?*0=Ee8yLd)<6h!S=&^q_uz!d@J}>0^H+m)G zkI~awq%l7Xf_?!CQBi{dN(;0un`MY3_{`U#L?+|}{KF#~zqQ4k_ zjCKDh?|;Z2>VK)k+1WBd_Y3I3w~7h#C=L;o98%lqH29L66B(de`E`*Rff6ZJo3 zN8bNtnKS+vc5gfM=zl55XV(A1c>minl<`Mmsx9W!|M32@{@2X=pYeOfA0?V*n4|ys zpN409??*hw9R2>H8lD&UBc>RiPyOBqbNc@R z@eA=s7hF@nx5qX0`+8hczlUF_-+RNa)bDMpF From dc805cc4a09d29f27b3febd084feb659e74b9d08 Mon Sep 17 00:00:00 2001 From: Leila Ghaffari <49916147+LeilaGhaffari@users.noreply.github.com> Date: Mon, 18 Jul 2022 09:46:29 -0600 Subject: [PATCH 146/172] Fluids - Newtonian with Primitive variables (#1011) * Fluids - Initial commit for Newtonian primitive variables * Fluids - include A0 (dU/dY) in the output * Fluids - added ICs for IG in primitive variables * Fluids - added Jacobean QFunction for primitive variables * Fluids - added RHS QFunction for primitive variables * Fluids - fixed compilation errors and warnings * Fluids - added ICs for density_current with primitive variables * Fluids - In/OutFlow BCs for channel in primitive variables * Adding the missing parts after rebasing onto main * Fluids - Use correct component names for primitive variables * Fluids - Primitive variables supported only with implicit scheme * Fluids - drop in/outflow for channel flow and call Exact_Channel_Prim() in Exact_Channel() * Fluids - Set solver's QFunction data in an if-else statement * Fluids - style * Fluids - add a comment to explain why the the gravity body force is excluded from the potential energy. * Fluids - Exact_Channel return State * Fluids - density_current: some style and cleanup * Fluids - DC: refactor & cleanup * Fluids - Singel QFunction for prim&cons * Fluids - Use absolute temperature * Fluids - DC: Fix pressure --- examples/fluids/navierstokes.h | 16 +- examples/fluids/problems/channel.c | 14 +- examples/fluids/problems/densitycurrent.c | 61 +-- examples/fluids/problems/newtonian.c | 81 ++-- examples/fluids/qfunctions/channel.h | 83 ++-- examples/fluids/qfunctions/densitycurrent.h | 86 ++-- examples/fluids/qfunctions/newtonian.h | 398 ++++++++++++++++++- examples/fluids/qfunctions/newtonian_state.h | 61 +++ examples/fluids/qfunctions/newtonian_types.h | 1 + examples/fluids/src/setupdm.c | 22 +- 10 files changed, 670 insertions(+), 153 deletions(-) diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index d90e59fb59..e3ee6a3960 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -194,6 +194,7 @@ struct Physics_private { EulerTestType euler_test; StabilizationType stab; PetscBool implicit; + PetscBool primitive; PetscBool has_curr_time; PetscBool has_neumann; CeedContextFieldLabel solution_time_label; @@ -251,20 +252,15 @@ extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx); // Print function for each problem -extern PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem, - AppCtx app_ctx); +extern PetscErrorCode PRINT_NEWTONIAN(ProblemData *problem, AppCtx app_ctx); -extern PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem, - AppCtx app_ctx); +extern PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem, AppCtx app_ctx); -extern PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, - AppCtx app_ctx); +extern PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, AppCtx app_ctx); -extern PetscErrorCode PRINT_ADVECTION(ProblemData *problem, - AppCtx app_ctx); +extern PetscErrorCode PRINT_ADVECTION(ProblemData *problem, AppCtx app_ctx); -extern PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem, - AppCtx app_ctx); +extern PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem, AppCtx app_ctx); // ----------------------------------------------------------------------------- // libCEED functions diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c index 53350370b2..8063b95ba1 100644 --- a/examples/fluids/problems/channel.c +++ b/examples/fluids/problems/channel.c @@ -28,12 +28,14 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *ctx) { // SET UP Channel // ------------------------------------------------------ CeedQFunctionContextDestroy(&problem->ics.qfunction_context); - problem->ics.qfunction = ICsChannel; - problem->ics.qfunction_loc = ICsChannel_loc; - problem->apply_inflow.qfunction = Channel_Inflow; - problem->apply_inflow.qfunction_loc = Channel_Inflow_loc; - problem->apply_outflow.qfunction = Channel_Outflow; - problem->apply_outflow.qfunction_loc = Channel_Outflow_loc; + problem->ics.qfunction = ICsChannel; + problem->ics.qfunction_loc = ICsChannel_loc; + if (!user->phys->primitive) { + problem->apply_inflow.qfunction = Channel_Inflow; + problem->apply_inflow.qfunction_loc = Channel_Inflow_loc; + problem->apply_outflow.qfunction = Channel_Outflow; + problem->apply_outflow.qfunction_loc = Channel_Outflow_loc; + } // -- Command Line Options CeedScalar umax = 10.; // m/s diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c index 27c4c40078..869d1ee213 100644 --- a/examples/fluids/problems/densitycurrent.c +++ b/examples/fluids/problems/densitycurrent.c @@ -14,21 +14,22 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) { - PetscInt ierr; - SetupContext setup_context; - User user = *(User *)ctx; - MPI_Comm comm = PETSC_COMM_WORLD; + PetscInt ierr; + MPI_Comm comm = PETSC_COMM_WORLD; + User user = *(User *)ctx; + DensityCurrentContext dc_ctx; + CeedQFunctionContext density_current_context; + NewtonianIdealGasContext newtonian_ig_ctx; PetscFunctionBeginUser; ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr); + ierr = PetscCalloc1(1, &dc_ctx); CHKERRQ(ierr); // ------------------------------------------------------ // SET UP DENSITY_CURRENT // ------------------------------------------------------ - problem->ics.qfunction = ICsDC; + CeedQFunctionContextDestroy(&problem->ics.qfunction_context); + problem->ics.qfunction = ICsDC; problem->ics.qfunction_loc = ICsDC_loc; - problem->bc = Exact_DC; - CeedQFunctionContextGetData(problem->ics.qfunction_context, CEED_MEM_HOST, - &setup_context); // ------------------------------------------------------ // Create the libCEED context @@ -85,10 +86,10 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) { PetscOptionsEnd(); - PetscScalar meter = user->units->meter; - PetscScalar second = user->units->second; - PetscScalar Kelvin = user->units->Kelvin; - PetscScalar Pascal = user->units->Pascal; + PetscScalar meter = user->units->meter; + PetscScalar second = user->units->second; + PetscScalar Kelvin = user->units->Kelvin; + PetscScalar Pascal = user->units->Pascal; rc = fabs(rc) * meter; theta0 *= Kelvin; thetaC *= Kelvin; @@ -97,21 +98,29 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) { for (PetscInt i = 0; i < 3; i++) center[i] *= meter; - setup_context->theta0 = theta0; - setup_context->thetaC = thetaC; - setup_context->P0 = P0; - setup_context->N = N; - setup_context->rc = rc; - setup_context->center[0] = center[0]; - setup_context->center[1] = center[1]; - setup_context->center[2] = center[2]; - setup_context->dc_axis[0] = dc_axis[0]; - setup_context->dc_axis[1] = dc_axis[1]; - setup_context->dc_axis[2] = dc_axis[2]; + dc_ctx->theta0 = theta0; + dc_ctx->thetaC = thetaC; + dc_ctx->P0 = P0; + dc_ctx->N = N; + dc_ctx->rc = rc; + dc_ctx->center[0] = center[0]; + dc_ctx->center[1] = center[1]; + dc_ctx->center[2] = center[2]; + dc_ctx->dc_axis[0] = dc_axis[0]; + dc_ctx->dc_axis[1] = dc_axis[1]; + dc_ctx->dc_axis[2] = dc_axis[2]; - problem->bc_ctx = - setup_context; // This is bad, context data should only be accessed via Get/Restore - CeedQFunctionContextRestoreData(problem->ics.qfunction_context, &setup_context); + CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context, + CEED_MEM_HOST, &newtonian_ig_ctx); + dc_ctx->newtonian_ctx = *newtonian_ig_ctx; + CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context, + &newtonian_ig_ctx); + CeedQFunctionContextCreate(user->ceed, &density_current_context); + CeedQFunctionContextSetData(density_current_context, CEED_MEM_HOST, + CEED_USE_POINTER, sizeof(*dc_ctx), dc_ctx); + CeedQFunctionContextSetDataDestroy(density_current_context, CEED_MEM_HOST, + FreeContextPetsc); + problem->ics.qfunction_context = density_current_context; PetscFunctionReturn(0); } diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 16f2dffa88..bd07858d76 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -38,12 +38,14 @@ static PetscErrorCode UnitTests_Newtonian(User user, NewtonianIdealGasContext gas) { Units units = user->units; - const CeedScalar eps = 1e-6; - const CeedScalar kg = units->kilogram, m = units->meter, sec = units->second, + const CeedScalar eps = 1e-6; + const CeedScalar kg = units->kilogram, + m = units->meter, + sec = units->second, Pascal = units->Pascal; - PetscFunctionBeginUser; - const CeedScalar rho = 1.2 * kg / (m*m*m), u = 40 * m/sec; + const CeedScalar rho = 1.2 * kg / (m*m*m), + u = 40 * m/sec; CeedScalar U[5] = {rho, rho*u, rho *u*1.1, rho *u*1.2, 250e3*Pascal + .5*rho *u*u}; const CeedScalar x[3] = {.1, .2, .3}; State s = StateFromU(gas, U, x); @@ -74,7 +76,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { StabilizationType stab; MPI_Comm comm = PETSC_COMM_WORLD; PetscBool implicit; - PetscBool has_curr_time = PETSC_FALSE, unit_tests; + PetscBool has_curr_time = PETSC_FALSE, + prim_var, unit_tests; PetscInt ierr; NewtonianIdealGasContext newtonian_ig_ctx; CeedQFunctionContext newtonian_ig_context; @@ -92,28 +95,12 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->jac_data_size_sur = 11; problem->setup_vol.qfunction = Setup; problem->setup_vol.qfunction_loc = Setup_loc; - problem->ics.qfunction = ICsNewtonianIG; - problem->ics.qfunction_loc = ICsNewtonianIG_loc; problem->setup_sur.qfunction = SetupBoundary; problem->setup_sur.qfunction_loc = SetupBoundary_loc; - problem->apply_vol_rhs.qfunction = RHSFunction_Newtonian; - problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; - problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; - problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; - problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; - problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; - problem->apply_inflow.qfunction = BoundaryIntegral; - problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; - problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian; - problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_loc; - problem->apply_outflow.qfunction = PressureOutflow; - problem->apply_outflow.qfunction_loc = PressureOutflow_loc; - problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; - problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_loc; problem->bc = NULL; problem->bc_ctx = setup_context; problem->non_zero_time = PETSC_FALSE; - problem->print_info = PRINT_DENSITY_CURRENT; + problem->print_info = PRINT_NEWTONIAN; // ------------------------------------------------------ // Create the libCEED context @@ -125,11 +112,11 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { CeedScalar mu = 1.8e-5; // Pa s, dynamic viscosity CeedScalar k = 0.02638; // W/(m K) CeedScalar c_tau = 0.5; // - - CeedScalar Ctau_t = 1.0; // - - CeedScalar Ctau_v = 36.0; // TODO make function of degree - CeedScalar Ctau_C = 1.0; // TODO make function of degree - CeedScalar Ctau_M = 1.0; // TODO make function of degree - CeedScalar Ctau_E = 1.0; // TODO make function of degree + CeedScalar Ctau_t = 1.0; // - + CeedScalar Ctau_v = 36.0; // TODO make function of degree + CeedScalar Ctau_C = 1.0; // TODO make function of degree + CeedScalar Ctau_M = 1.0; // TODO make function of degree + CeedScalar Ctau_E = 1.0; // TODO make function of degree PetscReal domain_min[3], domain_max[3], domain_size[3]; ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr); for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i]; @@ -140,7 +127,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { PetscScalar meter = 1; // 1 meter in scaled length units PetscScalar kilogram = 1; // 1 kilogram in scaled mass units PetscScalar second = 1; // 1 second in scaled time units - PetscScalar Kelvin = 1; // 1 Kelvin in scaled temperature units + PetscScalar Kelvin = 1; // 1 Kelvin in scaled temperature units PetscScalar W_per_m_K, Pascal, J_per_kg_K, m_per_squared_s; // ------------------------------------------------------ @@ -148,6 +135,34 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { // ------------------------------------------------------ PetscOptionsBegin(comm, NULL, "Options for Newtonian Ideal Gas based problem", NULL); + // -- Conservative vs Primitive variables + ierr = PetscOptionsBool("-primitive", "Use primitive variables", + NULL, prim_var=PETSC_FALSE, &prim_var, NULL); CHKERRQ(ierr); + if (prim_var) { + problem->ics.qfunction = ICsNewtonianIG_Prim; + problem->ics.qfunction_loc = ICsNewtonianIG_Prim_loc; + problem->apply_vol_ifunction.qfunction = IFunction_Newtonian_Prim; + problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_Prim_loc; + problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian_Prim; + problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_Prim_loc; + } else { + problem->ics.qfunction = ICsNewtonianIG; + problem->ics.qfunction_loc = ICsNewtonianIG_loc; + problem->apply_vol_rhs.qfunction = RHSFunction_Newtonian; + problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; + problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; + problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; + problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; + problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; + problem->apply_inflow.qfunction = BoundaryIntegral; + problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; + problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian; + problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_loc; + problem->apply_outflow.qfunction = PressureOutflow; + problem->apply_outflow.qfunction_loc = PressureOutflow_loc; + problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; + problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_loc; + } // -- Physics ierr = PetscOptionsScalar("-cv", "Heat capacity at constant volume", @@ -208,6 +223,10 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { "Warning! Use -stab supg only with -implicit\n"); CHKERRQ(ierr); } + if (prim_var && !implicit) { + SETERRQ(comm, PETSC_ERR_ARG_NULL, + "RHSFunction is not provided for primitive variables (use -primitive only with -implicit)\n"); + } PetscOptionsEnd(); // ------------------------------------------------------ @@ -252,6 +271,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { // -- Solver Settings user->phys->stab = stab; user->phys->implicit = implicit; + user->phys->primitive = prim_var; user->phys->has_curr_time = has_curr_time; // -- QFunction Context @@ -268,6 +288,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { newtonian_ig_ctx->Ctau_E = Ctau_E; newtonian_ig_ctx->stabilization = stab; newtonian_ig_ctx->is_implicit = implicit; + newtonian_ig_ctx->primitive = prim_var; ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr); CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); @@ -311,8 +332,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { PetscFunctionReturn(0); } -PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem, - AppCtx app_ctx) { +PetscErrorCode PRINT_NEWTONIAN(ProblemData *problem, AppCtx app_ctx) { + MPI_Comm comm = PETSC_COMM_WORLD; PetscErrorCode ierr; NewtonianIdealGasContext newtonian_ctx; diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 39b1c6872f..96306663b4 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -15,6 +15,7 @@ #include #include #include "newtonian_types.h" +#include "newtonian_state.h" #include "utils.h" typedef struct ChannelContext_ *ChannelContext; @@ -29,43 +30,42 @@ struct ChannelContext_ { struct NewtonianIdealGasContext_ newtonian_ctx; }; -CEED_QFUNCTION_HELPER CeedInt Exact_Channel(CeedInt dim, CeedScalar time, - const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) { +CEED_QFUNCTION_HELPER State Exact_Channel(CeedInt dim, CeedScalar time, + const CeedScalar X[], CeedInt Nf, void *ctx) { const ChannelContext context = (ChannelContext)ctx; - const CeedScalar theta0 = context->theta0; - const CeedScalar P0 = context->P0; - const CeedScalar umax = context->umax; - const CeedScalar center = context->center; - const CeedScalar H = context->H; - const CeedScalar cv = context->newtonian_ctx.cv; - const CeedScalar cp = context->newtonian_ctx.cp; - const CeedScalar Rd = cp - cv; - const CeedScalar mu = context->newtonian_ctx.mu; - const CeedScalar k = context->newtonian_ctx.k; - - const CeedScalar y=X[1]; - + const CeedScalar theta0 = context->theta0; + const CeedScalar P0 = context->P0; + const CeedScalar umax = context->umax; + const CeedScalar center = context->center; + const CeedScalar H = context->H; + NewtonianIdealGasContext gas = &context->newtonian_ctx; + const CeedScalar cp = gas->cp; + const CeedScalar mu = gas->mu; + const CeedScalar k = gas->k; + // There is a gravity body force but it is excluded from + // the potential energy due to periodicity. + gas->g[0] = 0.; + gas->g[1] = 0.; + gas->g[2] = 0.; + + const CeedScalar y = X[1]; const CeedScalar Pr = mu / (cp*k); const CeedScalar Ec = (umax*umax) / (cp*theta0); const CeedScalar theta = theta0*(1 + (Pr*Ec/3) * (1 - Square(Square((y-center)/H)))); - - const CeedScalar p = P0; - - const CeedScalar rho = p / (Rd*theta); - - q[0] = rho; - q[1] = rho * umax*(1 - Square((y-center)/H)); - q[2] = 0; - q[3] = 0; - q[4] = rho * (cv*theta) + .5 * (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]) / rho; - - return 0; + CeedScalar Y[5] = {0.}; + Y[0] = P0; + Y[1] = umax*(1 - Square((y-center)/H)); + Y[2] = 0.; + Y[3] = 0.; + Y[4] = theta; + + return StateFromY(gas, Y, X); } // ***************************************************************************** -// This QFunction sets the initial condition +// This QFunction set the initial condition // ***************************************************************************** CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { @@ -75,15 +75,26 @@ CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q, // Outputs CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // Context + const ChannelContext context = (ChannelContext)ctx; + // Quadrature Point Loop CeedPragmaSIMD for (CeedInt i=0; inewtonian_ctx.primitive) { + q0[0][i] = s.Y.pressure; + for (CeedInt j=0; j<3; j++) + q0[j+1][i] = s.Y.velocity[j]; + q0[4][i] = s.Y.temperature; + } else { + q0[0][i] = s.U.density; + for (CeedInt j=0; j<3; j++) + q0[j+1][i] = s.U.momentum[j]; + q0[4][i] = s.U.E_total; + } - for (CeedInt j=0; j<5; j++) - q0[j][i] = q[j]; } // End of Quadrature Point Loop return 0; } @@ -119,8 +130,12 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // Calcualte prescribed inflow values const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; + State s = Exact_Channel(3, 0., x, 5, ctx); CeedScalar q_exact[5] = {0.}; - Exact_Channel(3, 0., x, 5, q_exact, ctx); + q_exact[0] = s.U.density; + for (CeedInt j=0; j<3; j++) + q_exact[j+1] = s.U.momentum[j]; + q_exact[4] = s.U.E_total; const CeedScalar E_kinetic_exact = 0.5*Dot3(&q_exact[1], &q_exact[1]) / q_exact[0]; const CeedScalar velocity[3] = {q_exact[1]/q_exact[0], @@ -230,4 +245,6 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q, } // End Quadrature Point Loop return 0; } + +// ***************************************************************************** #endif // channel_h diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index 4405bb4975..8f503a8692 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -18,8 +18,21 @@ #include #include #include "newtonian_types.h" +#include "newtonian_state.h" #include "utils.h" +typedef struct DensityCurrentContext_ *DensityCurrentContext; +struct DensityCurrentContext_ { + CeedScalar theta0; + CeedScalar thetaC; + CeedScalar P0; + CeedScalar N; + CeedScalar rc; + CeedScalar center[3]; + CeedScalar dc_axis[3]; + struct NewtonianIdealGasContext_ newtonian_ctx; +}; + // ***************************************************************************** // This function sets the initial conditions and the boundary conditions // @@ -72,23 +85,23 @@ // This helper function provides support for the exact, time-dependent solution // (currently not implemented) and IC formulation for density current // ***************************************************************************** -CEED_QFUNCTION_HELPER int Exact_DC(CeedInt dim, CeedScalar time, - const CeedScalar X[], CeedInt Nf, CeedScalar q[], - void *ctx) { +CEED_QFUNCTION_HELPER State Exact_DC(CeedInt dim, CeedScalar time, + const CeedScalar X[], CeedInt Nf, void *ctx) { // Context - const SetupContext context = (SetupContext)ctx; - const CeedScalar theta0 = context->theta0; - const CeedScalar thetaC = context->thetaC; - const CeedScalar P0 = context->P0; - const CeedScalar N = context->N; - const CeedScalar cv = context->cv; - const CeedScalar cp = context->cp; - const CeedScalar *g_vec = context->g; - const CeedScalar rc = context->rc; - const CeedScalar *center = context->center; - const CeedScalar *dc_axis = context->dc_axis; - const CeedScalar Rd = cp - cv; - const CeedScalar g = -g_vec[2]; + const DensityCurrentContext context = (DensityCurrentContext)ctx; + const CeedScalar theta0 = context->theta0; + const CeedScalar thetaC = context->thetaC; + const CeedScalar P0 = context->P0; + const CeedScalar N = context->N; + const CeedScalar rc = context->rc; + const CeedScalar *center = context->center; + const CeedScalar *dc_axis = context->dc_axis; + NewtonianIdealGasContext gas = &context->newtonian_ctx; + const CeedScalar cp = gas->cp; + const CeedScalar cv = gas->cv; + const CeedScalar Rd = cp - cv; + const CeedScalar *g_vec = gas->g; + const CeedScalar g = -g_vec[2]; // Setup // -- Coordinates @@ -108,18 +121,16 @@ CEED_QFUNCTION_HELPER int Exact_DC(CeedInt dim, CeedScalar time, // -- Exner pressure, hydrostatic balance const CeedScalar Pi = 1. + g*g*(exp(-N*N*z/g) - 1.) / (cp*theta0*N*N); - // -- Density - - const CeedScalar rho = P0 * pow(Pi, cv/Rd) / (Rd*theta); // Initial Conditions - q[0] = rho; - q[1] = 0.0; - q[2] = 0.0; - q[3] = 0.0; - q[4] = rho * (cv*theta*Pi + g*z); - - return 0; + CeedScalar Y[5] = {0.}; + Y[0] = P0 * pow(Pi, cp/Rd); + Y[1] = 0.0; + Y[2] = 0.0; + Y[3] = 0.0; + Y[4] = Pi * theta; + + return StateFromY(gas, Y, X); } // ***************************************************************************** @@ -133,19 +144,30 @@ CEED_QFUNCTION(ICsDC)(void *ctx, CeedInt Q, // Outputs CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + // Context + const DensityCurrentContext context = (DensityCurrentContext)ctx; + CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; inewtonian_ctx.primitive) { + q0[0][i] = s.Y.pressure; + for (CeedInt j=0; j<3; j++) + q0[j+1][i] = s.Y.velocity[j]; + q0[4][i] = s.Y.temperature; + } else { + q0[0][i] = s.U.density; + for (CeedInt j=0; j<3; j++) + q0[j+1][i] = s.U.momentum[j]; + q0[4][i] = s.U.E_total; + } } // End of Quadrature Point Loop return 0; } +// ***************************************************************************** + #endif // densitycurrent_h diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index fa6c3d769f..ffd03d39b8 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -222,6 +222,39 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q, return 0; } +// ***************************************************************************** +// This QFunction sets a "still" initial condition for generic Newtonian IG +// problems in primitive variables +// ***************************************************************************** +CEED_QFUNCTION(ICsNewtonianIG_Prim)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + // Outputs + CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + + // Context + const SetupContext context = (SetupContext)ctx; + const CeedScalar theta0 = context->theta0; + const CeedScalar P0 = context->P0; + + // Quadrature Point Loop + CeedPragmaSIMD + for (CeedInt i=0; ig; const CeedScalar dt = context->dt; const CeedScalar gamma = cp / cv; - const CeedScalar Rd = cp-cv; + const CeedScalar Rd = cp - cv; CeedPragmaSIMD // Quadrature Point Loop @@ -575,13 +606,7 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, tau_strong_res[2] = Tau_d[1] * strong_res[2]; tau_strong_res[3] = Tau_d[1] * strong_res[3]; tau_strong_res[4] = Tau_d[2] * strong_res[4]; -// Alternate route (useful later with primitive variable code) -// this function was verified against PHASTA for as IC that was as close as possible -// computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv); -// it has also been verified to compute a correct through the following -// stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive -// applied in the triple loop below -// However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz + PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, tau_strong_res, tau_strong_res_conservative); for (CeedInt j=0; j<3; j++) @@ -606,6 +631,11 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, return 0; } +// ***************************************************************************** +// This QFunction implements the jacobean of the Navier-Stokes equations +// for implicit time stepping method. +// +// ***************************************************************************** CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { @@ -1071,4 +1101,350 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, } // ***************************************************************************** +// This QFunction implements the Navier-Stokes equations (mentioned above) in +// primitive variables and with implicit time stepping method +// +// ***************************************************************************** +CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], + (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; + // *INDENT-ON* + // Context + NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + const CeedScalar mu = context->mu; + const CeedScalar cv = context->cv; + const CeedScalar cp = context->cp; + const CeedScalar *g = context->g; + const CeedScalar dt = context->dt; + const CeedScalar gamma = cp / cv; + const CeedScalar Rd = cp - cv; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; istabilization) { + case STAB_NONE: // Galerkin + break; + case STAB_SU: // SU + Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); + tau_strong_conv[0] = Tau_d[0] * strong_conv[0]; + tau_strong_conv[1] = Tau_d[1] * strong_conv[1]; + tau_strong_conv[2] = Tau_d[1] * strong_conv[2]; + tau_strong_conv[3] = Tau_d[1] * strong_conv[3]; + tau_strong_conv[4] = Tau_d[2] * strong_conv[4]; + PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, + tau_strong_conv, tau_strong_conv_conservative); + for (CeedInt j=0; j<3; j++) + for (CeedInt k=0; k<5; k++) + for (CeedInt l=0; l<5; l++) + stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l]; + + for (CeedInt j=0; j<5; j++) + for (CeedInt k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); + + break; + case STAB_SUPG: // SUPG + Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); + tau_strong_res[0] = Tau_d[0] * strong_res[0]; + tau_strong_res[1] = Tau_d[1] * strong_res[1]; + tau_strong_res[2] = Tau_d[1] * strong_res[2]; + tau_strong_res[3] = Tau_d[1] * strong_res[3]; + tau_strong_res[4] = Tau_d[2] * strong_res[4]; + + PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, + tau_strong_res, tau_strong_res_conservative); + for (CeedInt j=0; j<3; j++) + for (CeedInt k=0; k<5; k++) + for (CeedInt l=0; l<5; l++) + stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l]; + + for (CeedInt j=0; j<5; j++) + for (CeedInt k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); + break; + } + for (CeedInt j=0; j<5; j++) jac_data[j][i] = Y[j]; + for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; + for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j]; + + } // End Quadrature Point Loop + + // Return + return 0; +} + +// ***************************************************************************** +// This QFunction implements the jacobean of the Navier-Stokes equations +// in primitive variables for implicit time stepping method. +// +// ***************************************************************************** +CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + // *INDENT-OFF* + // Inputs + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + // Outputs + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + // *INDENT-ON* + // Context + NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + const CeedScalar *g = context->g; + const CeedScalar cp = context->cp; + const CeedScalar cv = context->cv; + const CeedScalar Rd = cp - cv; + const CeedScalar gamma = cp / cv; + + CeedPragmaSIMD + // Quadrature Point Loop + for (CeedInt i=0; iijacobian_time_shift * dU[j] - dbody_force[j]); + + if (1) { + CeedScalar jacob_F_conv[3][5][5] = {0}; + computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, + gamma, g, x_i); + CeedScalar grad_dU[5][3]; + for (int j=0; j<3; j++) { + grad_dU[0][j] = grad_ds[j].U.density; + for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k]; + grad_dU[4][j] = grad_ds[j].U.E_total; + } + CeedScalar dstrong_conv[5] = {0.}; + for (int j=0; j<3; j++) + for (int k=0; k<5; k++) + for (int l=0; l<5; l++) + dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j]; + + CeedScalar dstrong_res[5]; + for (int j=0; j<5; j++) + dstrong_res[j] = context->ijacobian_time_shift * dU[j] + + dstrong_conv[j] - + dbody_force[j]; + + CeedScalar dtau_strong_res[5] = {0.}, + dtau_strong_res_conservative[5] = {0.}; + dtau_strong_res[0] = Tau_d[0] * dstrong_res[0]; + dtau_strong_res[1] = Tau_d[1] * dstrong_res[1]; + dtau_strong_res[2] = Tau_d[1] * dstrong_res[2]; + dtau_strong_res[3] = Tau_d[1] * dstrong_res[3]; + dtau_strong_res[4] = Tau_d[2] * dstrong_res[4]; + PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, + dtau_strong_res, dtau_strong_res_conservative); + CeedScalar dstab[5][3] = {0}; + for (int j=0; j<3; j++) + for (int k=0; k<5; k++) + for (int l=0; l<5; l++) + dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l]; + + for (int j=0; j<5; j++) + for (int k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + + dstab[j][1] * dXdx[k][1] + + dstab[j][2] * dXdx[k][2]); + + } + } // End Quadrature Point Loop + return 0; +} +// ***************************************************************************** + #endif // newtonian_h diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h index 47ec180d3e..959eb9c33f 100644 --- a/examples/fluids/qfunctions/newtonian_state.h +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -68,6 +68,42 @@ CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative_fwd( return dY; } +CEED_QFUNCTION_HELPER StateConservative StateConservativeFromPrimitive( + NewtonianIdealGasContext gas, StatePrimitive Y, const CeedScalar x[3]) { + StateConservative U; + CeedScalar R = gas->cp - gas->cv; + U.density = Y.pressure / (R * Y.temperature); + for (int i=0; i<3; i++) U.momentum[i] = U.density*Y.velocity[i]; + CeedScalar e_internal = gas->cv * Y.temperature; + CeedScalar e_kinetic = .5 * Dot3(Y.velocity, Y.velocity); + CeedScalar e_potential = -Dot3(gas->g, x); + CeedScalar e_total = e_internal + e_kinetic + e_potential; + U.E_total = U.density*e_total; + return U; +} + +CEED_QFUNCTION_HELPER StateConservative StateConservativeFromPrimitive_fwd( + NewtonianIdealGasContext gas, State s, StatePrimitive dY, + const CeedScalar x[3], const CeedScalar dx[3]) { + StateConservative dU; + CeedScalar R = gas->cp - gas->cv; + dU.density = (dY.pressure * s.Y.temperature - s.Y.pressure * dY.temperature) / + (R * s.Y.temperature * s.Y.temperature); + for (int i=0; i<3; i++) { + dU.momentum[i] = dU.density * s.Y.velocity[i] + s.U.density * dY.velocity[i]; + } + CeedScalar e_kinetic = .5 * Dot3(s.Y.velocity, s.Y.velocity); + CeedScalar de_kinetic = Dot3(dY.velocity, s.Y.velocity); + CeedScalar e_potential = -Dot3(gas->g, x); + CeedScalar de_potential = -Dot3(gas->g, dx); + CeedScalar e_internal = gas->cv * s.Y.temperature; + CeedScalar de_internal = gas->cv * dY.temperature; + CeedScalar e_total = e_internal + e_kinetic + e_potential; + CeedScalar de_total = de_internal + de_kinetic + de_potential; + dU.E_total = dU.density*e_total + s.U.density*de_total; + return dU; +} + CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas, const CeedScalar U[5], const CeedScalar x[3]) { State s; @@ -93,6 +129,31 @@ CEED_QFUNCTION_HELPER State StateFromU_fwd(NewtonianIdealGasContext gas, return ds; } +CEED_QFUNCTION_HELPER State StateFromY(NewtonianIdealGasContext gas, + const CeedScalar Y[5], const CeedScalar x[3]) { + State s; + s.Y.pressure = Y[0]; + s.Y.velocity[0] = Y[1]; + s.Y.velocity[1] = Y[2]; + s.Y.velocity[2] = Y[3]; + s.Y.temperature = Y[4]; + s.U = StateConservativeFromPrimitive(gas, s.Y, x); + return s; +} + +CEED_QFUNCTION_HELPER State StateFromY_fwd(NewtonianIdealGasContext gas, + State s, const CeedScalar dY[5], + const CeedScalar x[3], const CeedScalar dx[3]) { + State ds; + ds.Y.pressure = dY[0]; + ds.Y.velocity[0] = dY[1]; + ds.Y.velocity[1] = dY[2]; + ds.Y.velocity[2] = dY[3]; + ds.Y.temperature = dY[4]; + ds.U = StateConservativeFromPrimitive_fwd(gas, s, ds.Y, x, dx); + return ds; +} + CEED_QFUNCTION_HELPER void FluxInviscid(NewtonianIdealGasContext gas, State s, StateConservative Flux[3]) { for (CeedInt i=0; i<3; i++) { diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 3ea539caa4..5b5d3af457 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -50,6 +50,7 @@ struct NewtonianIdealGasContext_ { CeedScalar ijacobian_time_shift; CeedScalar P0; bool is_implicit; + bool primitive; StabilizationType stabilization; }; diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index d06270edab..6659686f5b 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -97,11 +97,23 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree, CHKERRQ(ierr); ierr = PetscFEDestroy(&fe); CHKERRQ(ierr); } - { - // Empty name for conserved field (because there is only one field) - PetscSection section; - ierr = DMGetLocalSection(dm, §ion); CHKERRQ(ierr); - ierr = PetscSectionSetFieldName(section, 0, ""); CHKERRQ(ierr); + + // Empty name for conserved field (because there is only one field) + PetscSection section; + ierr = DMGetLocalSection(dm, §ion); CHKERRQ(ierr); + ierr = PetscSectionSetFieldName(section, 0, ""); CHKERRQ(ierr); + if (phys->primitive) { + ierr = PetscSectionSetComponentName(section, 0, 0, "Pressure"); + CHKERRQ(ierr); + ierr = PetscSectionSetComponentName(section, 0, 1, "Velocity X"); + CHKERRQ(ierr); + ierr = PetscSectionSetComponentName(section, 0, 2, "Velocity Y"); + CHKERRQ(ierr); + ierr = PetscSectionSetComponentName(section, 0, 3, "Velocity Z"); + CHKERRQ(ierr); + ierr = PetscSectionSetComponentName(section, 0, 4, "Temperature"); + CHKERRQ(ierr); + } else { ierr = PetscSectionSetComponentName(section, 0, 0, "Density"); CHKERRQ(ierr); ierr = PetscSectionSetComponentName(section, 0, 1, "Momentum X"); From c10dcd270d49709391078e668f60e2f13d7bc4a8 Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 12:22:12 -0600 Subject: [PATCH 147/172] fluids: Add missing \n to blasius warning message --- examples/fluids/problems/blasius.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c index 4243d66b74..b990bc2f77 100644 --- a/examples/fluids/problems/blasius.c +++ b/examples/fluids/problems/blasius.c @@ -126,7 +126,7 @@ static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim, faces[1]+1, *num_node_locs); if (*num_node_locs > faces[1] +1) { ierr = PetscPrintf(comm, "WARNING: y_node_locs_path has more locations (%d) " - "than the mesh has nodes (%d). This maybe unintended.", + "than the mesh has nodes (%d). This maybe unintended.\n", *num_node_locs, faces[1]+1); CHKERRQ(ierr); } PetscScalar max_y = (*node_locs)[faces[1]]; From 6f4983205ad3149e8beeb8d60d860bbd9002156c Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 11:31:56 -0600 Subject: [PATCH 148/172] fluids: Change newt->primitive to is_primitive --- examples/fluids/problems/newtonian.c | 2 +- examples/fluids/qfunctions/channel.h | 2 +- examples/fluids/qfunctions/densitycurrent.h | 2 +- examples/fluids/qfunctions/newtonian_types.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index bd07858d76..081af8200d 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -288,7 +288,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { newtonian_ig_ctx->Ctau_E = Ctau_E; newtonian_ig_ctx->stabilization = stab; newtonian_ig_ctx->is_implicit = implicit; - newtonian_ig_ctx->primitive = prim_var; + newtonian_ig_ctx->is_primitive = prim_var; ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr); CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 96306663b4..b877d98841 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -83,7 +83,7 @@ CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q, for (CeedInt i=0; inewtonian_ctx.primitive) { + if (context->newtonian_ctx.is_primitive) { q0[0][i] = s.Y.pressure; for (CeedInt j=0; j<3; j++) q0[j+1][i] = s.Y.velocity[j]; diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index 8f503a8692..22a10d99ae 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -152,7 +152,7 @@ CEED_QFUNCTION(ICsDC)(void *ctx, CeedInt Q, for (CeedInt i=0; inewtonian_ctx.primitive) { + if (context->newtonian_ctx.is_primitive) { q0[0][i] = s.Y.pressure; for (CeedInt j=0; j<3; j++) q0[j+1][i] = s.Y.velocity[j]; diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 5b5d3af457..51d43f5877 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -50,7 +50,7 @@ struct NewtonianIdealGasContext_ { CeedScalar ijacobian_time_shift; CeedScalar P0; bool is_implicit; - bool primitive; + bool is_primitive; StabilizationType stabilization; }; From 57e55a1c25e80345e382270f49941411c9f30449 Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 11:53:52 -0600 Subject: [PATCH 149/172] fluids: Make newtonian boundary QFs primitive compatible --- examples/fluids/qfunctions/newtonian.h | 78 +++++++++++++++++--------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index ffd03d39b8..ea832beadb 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -793,9 +793,10 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, CeedPragmaSIMD for(CeedInt i=0; iis_primitive ? StateFromY(context, solution_state, x_i) + : StateFromU(context, solution_state, x_i); const CeedScalar wdetJb = (is_implicit ? -1. : 1.) * q_data_sur[0][i]; // ---- Normal vect @@ -846,11 +847,15 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, // -- Total Energy Density v[4][i] = -wdetJb * Flux[4]; - jac_data_sur[0][i] = s.U.density; - jac_data_sur[1][i] = s.Y.velocity[0]; - jac_data_sur[2][i] = s.Y.velocity[1]; - jac_data_sur[3][i] = s.Y.velocity[2]; - jac_data_sur[4][i] = s.U.E_total; + if (context->is_primitive) { + jac_data_sur[0][i] = s.Y.pressure; + for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j]; + jac_data_sur[4][i] = s.Y.temperature; + } else { + jac_data_sur[0][i] = s.U.density; + for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.U.momentum[j]; + jac_data_sur[4][i] = s.U.E_total; + } for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j]; } return 0; @@ -888,13 +893,19 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]} }; - CeedScalar U[5], kmstress[6], dU[5], dx_i[3] = {0.}; - for (int j=0; j<5; j++) U[j] = jac_data_sur[j][i]; - for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; - for (int j=0; j<3; j++) U[j+1] *= U[0]; - for (int j=0; j<5; j++) dU[j] = dq[j][i]; - State s = StateFromU(context, U, x_i); - State ds = StateFromU_fwd(context, s, dU, x_i, dx_i); + CeedScalar state[5], kmstress[6], dstate[5], dx_i[3] = {0.}; + for (int j=0; j<5; j++) state[j] = jac_data_sur[j][i]; + for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; + for (int j=0; j<5; j++) dstate[j] = dq[j][i]; + + State s, ds; + if (context->is_primitive) { + s = StateFromY(context, state, x_i); + ds = StateFromY_fwd(context, s, dstate, x_i, dx_i); + } else { + s = StateFromU(context, state, x_i); + ds = StateFromU_fwd(context, s, dstate, x_i, dx_i); + } State grad_ds[3]; for (CeedInt j=0; j<3; j++) { @@ -954,9 +965,10 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, for (CeedInt i=0; iis_primitive ? StateFromY(context, solution_state, x_i) + : StateFromU(context, solution_state, x_i); s.Y.pressure = P0; // -- Interp-to-Interp q_data @@ -1014,11 +1026,15 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, v[4][i] = -wdetJb * Flux[4]; // Save values for Jacobian - jac_data_sur[0][i] = s.U.density; - jac_data_sur[1][i] = s.Y.velocity[0]; - jac_data_sur[2][i] = s.Y.velocity[1]; - jac_data_sur[3][i] = s.Y.velocity[2]; - jac_data_sur[4][i] = s.U.E_total; + if (context->is_primitive) { + jac_data_sur[0][i] = s.Y.pressure; + for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j]; + jac_data_sur[4][i] = s.Y.temperature; + } else { + jac_data_sur[0][i] = s.U.density; + for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.U.momentum[j]; + jac_data_sur[4][i] = s.U.E_total; + } for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j]; } // End Quadrature Point Loop return 0; @@ -1056,13 +1072,19 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]} }; - CeedScalar U[5], kmstress[6], dU[5], dx_i[3] = {0.}; - for (int j=0; j<5; j++) U[j] = jac_data_sur[j][i]; - for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; - for (int j=0; j<3; j++) U[j+1] *= U[0]; - for (int j=0; j<5; j++) dU[j] = dq[j][i]; - State s = StateFromU(context, U, x_i); - State ds = StateFromU_fwd(context, s, dU, x_i, dx_i); + CeedScalar state[5], kmstress[6], dstate[5], dx_i[3] = {0.}; + for (int j=0; j<5; j++) state[j] = jac_data_sur[j][i]; + for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; + for (int j=0; j<5; j++) dstate[j] = dq[j][i]; + + State s, ds; + if (context->is_primitive) { + s = StateFromY(context, state, x_i); + ds = StateFromY_fwd(context, s, dstate, x_i, dx_i); + } else { + s = StateFromU(context, state, x_i); + ds = StateFromU_fwd(context, s, dstate, x_i, dx_i); + } s.Y.pressure = context->P0; ds.Y.pressure = 0.; From 192a7459e0ceeee0ec4cde222205ce48f3d6dc48 Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 15:49:24 -0600 Subject: [PATCH 150/172] fluids: Make stg components dependent on primitive --- examples/fluids/problems/stg_shur14.c | 14 +++++++++++--- examples/fluids/problems/stg_shur14.h | 3 ++- examples/fluids/src/setupdm.c | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index e267d6ba79..8cae0a5f3d 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -470,13 +470,21 @@ PetscErrorCode StrongSTGbcFunc(PetscInt dim, PetscReal time, PetscFunctionReturn(0); } -PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem) { +PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, + Physics phys) { PetscErrorCode ierr; DMLabel label; - const PetscInt comps[] = {0, 1, 2, 3}; - const PetscInt num_comps = 4; PetscFunctionBeginUser; + PetscInt comps[5], num_comps=4; + if (phys->primitive) { + // {1,2,3,4} for u, v, w, T + for(int i=0; i<4; i++) comps[i] = i+1; + } else { + // {0,1,2,3} for rho, rho*u, rho*v, rho*w + for(int i=0; i<4; i++) comps[i] = i; + } + ierr = DMGetLabel(dm, "Face Sets", &label); CHKERRQ(ierr); // Set wall BCs if (bc->num_inflow > 0) { diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h index af7fb58874..e7be94741a 100644 --- a/examples/fluids/problems/stg_shur14.h +++ b/examples/fluids/problems/stg_shur14.h @@ -16,7 +16,8 @@ extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, const CeedScalar P0, const CeedScalar ynodes[], const CeedInt nynodes); -extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem); +extern PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, + Physics phys); extern PetscErrorCode SetupStrongSTG_QF(Ceed ceed, ProblemData *problem, CeedInt num_comp_x, CeedInt num_comp_q, CeedInt stg_data_size, diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index 6659686f5b..26fa2c349a 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -89,7 +89,7 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree, CHKERRQ(ierr); if (use_strongstg) { - ierr = SetupStrongSTG(dm, bc, problem); CHKERRQ(ierr); + ierr = SetupStrongSTG(dm, bc, problem, phys); CHKERRQ(ierr); } } From 7c4551aa234e0eff480e1dd63d526514ef84b00e Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 15:50:19 -0600 Subject: [PATCH 151/172] fluids: Make STG QFs primitive compatible --- examples/fluids/qfunctions/stg_shur14.h | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index c92710e2a4..6201f43afd 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -321,11 +321,19 @@ CEED_QFUNCTION(ICsSTG)(void *ctx, CeedInt Q, for(CeedInt i=0; inewtonian_ctx.is_primitive) { + q0[0][i] = P0; + q0[1][i] = u[0]; + q0[2][i] = u[1]; + q0[3][i] = u[2]; + q0[4][i] = theta0; + } else { + q0[0][i] = rho; + q0[1][i] = u[0] * rho; + q0[2][i] = u[1] * rho; + q0[3][i] = u[2] * rho; + q0[4][i] = rho * (0.5 * Dot3(u, u) + cv * theta0); + } } // End of Quadrature Point Loop return 0; } @@ -560,11 +568,20 @@ CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, for (CeedInt j=0; j<3; j++) u[j] = ubar[j]; } - bcval[0][i] = scale[i] * rho; - bcval[1][i] = scale[i] * rho * u[0]; - bcval[2][i] = scale[i] * rho * u[1]; - bcval[3][i] = scale[i] * rho * u[2]; - bcval[4][i] = 0.; + if (stg_ctx->newtonian_ctx.is_primitive) { + bcval[0][i] = 0; + bcval[1][i] = scale[i] * u[0]; + bcval[2][i] = scale[i] * u[1]; + bcval[3][i] = scale[i] * u[2]; + bcval[4][i] = scale[i] * theta0; + + } else { + bcval[0][i] = scale[i] * rho; + bcval[1][i] = scale[i] * rho * u[0]; + bcval[2][i] = scale[i] * rho * u[1]; + bcval[3][i] = scale[i] * rho * u[2]; + bcval[4][i] = 0.; + } } return 0; } From 89da5dba42bf2ecbd957712d1512324c59154052 Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 15:50:34 -0600 Subject: [PATCH 152/172] fluids: Use standard boundary integrals for primitive --- examples/fluids/problems/newtonian.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 081af8200d..e1bee405fa 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -145,6 +145,14 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_Prim_loc; problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian_Prim; problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_Prim_loc; + problem->apply_inflow.qfunction = BoundaryIntegral; + problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; + problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian; + problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_loc; + problem->apply_outflow.qfunction = PressureOutflow; + problem->apply_outflow.qfunction_loc = PressureOutflow_loc; + problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; + problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_loc; } else { problem->ics.qfunction = ICsNewtonianIG; problem->ics.qfunction_loc = ICsNewtonianIG_loc; From de2fdd78376714e0de2d5f1b5aa98b2670032e5c Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 18 Jul 2022 16:00:22 -0600 Subject: [PATCH 153/172] doc: Document primitive variable options for newtonian solver --- examples/fluids/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/fluids/README.md b/examples/fluids/README.md index 16df8a11e2..0a7bce7e9f 100644 --- a/examples/fluids/README.md +++ b/examples/fluids/README.md @@ -534,6 +534,11 @@ For the Density Current, Channel, and Blasius problems, the following common com - Developer option to test properties - `false` - boolean + +* - `-primitive` + - Use primitive variables (pressure, velocity, temperature) instead of conservative variables (density, momentum, total energy) + - `false` + - boolean ::: From efe9d856d652f85e9e9f33171e1b615db5aa2505 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 19 Jul 2022 14:35:40 -0600 Subject: [PATCH 154/172] fluids: Use function pointers for StateFrom*, Misc fixes - Fixes: I missed some StateFrom* functions that needed to be changed --- examples/fluids/qfunctions/newtonian.h | 108 ++++++++++++++----------- 1 file changed, 62 insertions(+), 46 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index ea832beadb..9fddd27555 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -790,13 +790,20 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, const NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx; const bool is_implicit = context->is_implicit; + State (*StateFromQi)(NewtonianIdealGasContext gas, + const CeedScalar qi[5], const CeedScalar x[3]); + State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, + State s, const CeedScalar dqi[5], + const CeedScalar x[3], const CeedScalar dx[3]); + StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + CeedPragmaSIMD for(CeedInt i=0; iis_primitive ? StateFromY(context, solution_state, x_i) - : StateFromU(context, solution_state, x_i); + const CeedScalar qi[5] = {q[0][i], q[1][i], q[2][i], q[3][i], q[4][i]}; + State s = StateFromQi(context, qi, x_i); const CeedScalar wdetJb = (is_implicit ? -1. : 1.) * q_data_sur[0][i]; // ---- Normal vect @@ -812,12 +819,12 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, State grad_s[3]; for (CeedInt j=0; j<3; j++) { - CeedScalar dx_i[3] = {0}, dU[5]; + CeedScalar dx_i[3] = {0}, dqi[5]; for (CeedInt k=0; k<5; k++) - dU[k] = Grad_q[0][k][i] * dXdx[0][j] + - Grad_q[1][k][i] * dXdx[1][j]; + dqi[k] = Grad_q[0][k][i] * dXdx[0][j] + + Grad_q[1][k][i] * dXdx[1][j]; dx_i[j] = 1.; - grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); + grad_s[j] = StateFromQi_fwd(context, s, dqi, x_i, dx_i); } CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3]; @@ -878,6 +885,13 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const bool implicit = context->is_implicit; + State (*StateFromQi)(NewtonianIdealGasContext gas, + const CeedScalar qi[5], const CeedScalar x[3]); + State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, + State s, const CeedScalar dqi[5], + const CeedScalar x[3], const CeedScalar dx[3]); + StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; CeedPragmaSIMD // Quadrature Point Loop @@ -893,28 +907,22 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]} }; - CeedScalar state[5], kmstress[6], dstate[5], dx_i[3] = {0.}; - for (int j=0; j<5; j++) state[j] = jac_data_sur[j][i]; + CeedScalar qi[5], kmstress[6], dqi[5], dx_i[3] = {0.}; + for (int j=0; j<5; j++) qi[j] = jac_data_sur[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; - for (int j=0; j<5; j++) dstate[j] = dq[j][i]; + for (int j=0; j<5; j++) dqi[j] = dq[j][i]; - State s, ds; - if (context->is_primitive) { - s = StateFromY(context, state, x_i); - ds = StateFromY_fwd(context, s, dstate, x_i, dx_i); - } else { - s = StateFromU(context, state, x_i); - ds = StateFromU_fwd(context, s, dstate, x_i, dx_i); - } + State s = StateFromQi(context, qi, x_i); + State ds = StateFromQi_fwd(context, s, dqi, x_i, dx_i); State grad_ds[3]; for (CeedInt j=0; j<3; j++) { - CeedScalar dx_i[3] = {0}, dUj[5]; + CeedScalar dx_i[3] = {0}, dqi_j[5]; for (CeedInt k=0; k<5; k++) - dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] + - Grad_dq[1][k][i] * dXdx[1][j]; + dqi_j[k] = Grad_dq[0][k][i] * dXdx[0][j] + + Grad_dq[1][k][i] * dXdx[1][j]; dx_i[j] = 1.; - grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx_i); + grad_ds[j] = StateFromQi_fwd(context, s, dqi_j, x_i, dx_i); } CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3]; @@ -960,15 +968,22 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, const bool implicit = context->is_implicit; const CeedScalar P0 = context->P0; + State (*StateFromQi)(NewtonianIdealGasContext gas, + const CeedScalar qi[5], const CeedScalar x[3]); + State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, + State s, const CeedScalar dqi[5], + const CeedScalar x[3], const CeedScalar dx[3]); + StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iis_primitive ? StateFromY(context, solution_state, x_i) - : StateFromU(context, solution_state, x_i); + const CeedScalar qi[5] = {q[0][i], q[1][i], q[2][i], q[3][i], q[4][i]}; + State s = StateFromQi(context, qi, x_i); s.Y.pressure = P0; // -- Interp-to-Interp q_data @@ -990,12 +1005,12 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, State grad_s[3]; for (CeedInt j=0; j<3; j++) { - CeedScalar dx_i[3] = {0}, dU[5]; + CeedScalar dx_i[3] = {0}, dqi[5]; for (CeedInt k=0; k<5; k++) - dU[k] = Grad_q[0][k][i] * dXdx[0][j] + - Grad_q[1][k][i] * dXdx[1][j]; + dqi[k] = Grad_q[0][k][i] * dXdx[0][j] + + Grad_q[1][k][i] * dXdx[1][j]; dx_i[j] = 1.; - grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i); + grad_s[j] = StateFromQi_fwd(context, s, dqi, x_i, dx_i); } CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3]; @@ -1042,8 +1057,7 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, // Jacobian for weak-pressure outflow boundary condition CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) { + const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], @@ -1058,6 +1072,14 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const bool implicit = context->is_implicit; + State (*StateFromQi)(NewtonianIdealGasContext gas, + const CeedScalar qi[5], const CeedScalar x[3]); + State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, + State s, const CeedScalar dQi[5], + const CeedScalar x[3], const CeedScalar dx[3]); + StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iis_primitive) { - s = StateFromY(context, state, x_i); - ds = StateFromY_fwd(context, s, dstate, x_i, dx_i); - } else { - s = StateFromU(context, state, x_i); - ds = StateFromU_fwd(context, s, dstate, x_i, dx_i); - } + State s = StateFromQi(context, qi, x_i); + State ds = StateFromQi_fwd(context, s, dqi, x_i, dx_i); s.Y.pressure = context->P0; ds.Y.pressure = 0.; State grad_ds[3]; for (CeedInt j=0; j<3; j++) { - CeedScalar dx_i[3] = {0}, dUj[5]; + CeedScalar dx_i[3] = {0}, dqi_j[5]; for (CeedInt k=0; k<5; k++) - dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] + - Grad_dq[1][k][i] * dXdx[1][j]; + dqi_j[k] = Grad_dq[0][k][i] * dXdx[0][j] + + Grad_dq[1][k][i] * dXdx[1][j]; dx_i[j] = 1.; - grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx_i); + grad_ds[j] = StateFromQi_fwd(context, s, dqi_j, x_i, dx_i); } CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3]; From 2b89d87e089857a3cae26709579e6a27709cba4a Mon Sep 17 00:00:00 2001 From: Leila Ghaffari <49916147+LeilaGhaffari@users.noreply.github.com> Date: Fri, 22 Jul 2022 19:11:53 -0600 Subject: [PATCH 155/172] Fluids - Minor Refactor (#995) * Fluids - remove *.bin* since it exists in libCEED/.gitignore * cleanup * WIP: Fluids - use FluxInviscid_fwd() for computing the flux jacobean strong_conv is not reproduced and stab is not computed correctly * WIP: working on variational notation for stabilization * Fluids - fix missing parts after rebasing onto main * Fluids - fix SU in the IFunction * Fluids - refactor stabilization * Fluids - move stab helper functions to stabilization.h * Fluids - minor style * Fluids - minor refactor * Fluids - add regression test for primitive variables * Fluids - use UnpackState_*() * Fluids - more refactoring of stabilization * Fluids - DC: use utility functions * Fluids - refactor total flux * Fluids - minor cleanup * Fluids - set x[0] = 0 for channel to exclude gravitational potential * Fluids - updated stabilization.h * Fluids - zero strong_conv before pass it to FluxInviscidStrong() to be safe * Fluids - move switch-case for STAB to a helper function * Fluids - rename Y -> R in Stabilization*() * Fluids - fix stab sign in RHS Co-authored-by: Jed Brown --- examples/fluids/.gitignore | 1 - examples/fluids/navierstokes.c | 1 + examples/fluids/qfunctions/channel.h | 89 ++- examples/fluids/qfunctions/densitycurrent.h | 30 +- examples/fluids/qfunctions/newtonian.h | 610 +++--------------- examples/fluids/qfunctions/newtonian_state.h | 35 + examples/fluids/qfunctions/stabilization.h | 155 +++++ .../fluids-navierstokes-channel-prim.bin | Bin 0 -> 1608 bytes 8 files changed, 333 insertions(+), 588 deletions(-) create mode 100644 examples/fluids/qfunctions/stabilization.h create mode 100644 examples/fluids/tests-output/fluids-navierstokes-channel-prim.bin diff --git a/examples/fluids/.gitignore b/examples/fluids/.gitignore index 00888303ef..d1f637561a 100644 --- a/examples/fluids/.gitignore +++ b/examples/fluids/.gitignore @@ -1,7 +1,6 @@ navierstokes *.vtr *.vts -*.bin* *.vtu *.log *.png diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c index f42703a1f6..4b20d67571 100644 --- a/examples/fluids/navierstokes.c +++ b/examples/fluids/navierstokes.c @@ -26,6 +26,7 @@ //TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT //TESTARGS(name="blasius_STG_strongBC") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-10 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_strongBC.bin -stg_strong true //TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin -dm_plex_box_faces 5,5,1 -ts_max_steps 5 +//TESTARGS(name="channel-primitive") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 2e-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel-prim.bin -dm_plex_box_faces 5,5,1 -ts_max_steps 5 -primitive //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin //TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin //TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -dm_mat_preallocate_skip 0 -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index b877d98841..5ef6804509 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -45,23 +45,23 @@ CEED_QFUNCTION_HELPER State Exact_Channel(CeedInt dim, CeedScalar time, const CeedScalar k = gas->k; // There is a gravity body force but it is excluded from // the potential energy due to periodicity. - gas->g[0] = 0.; - gas->g[1] = 0.; - gas->g[2] = 0.; + // g = (g, 0, 0) + // x = (0, x_2, x_3) + // e_potential = dot(g, x) = 0 + const CeedScalar x[3] = {0, X[1], X[2]}; - const CeedScalar y = X[1]; const CeedScalar Pr = mu / (cp*k); const CeedScalar Ec = (umax*umax) / (cp*theta0); const CeedScalar theta = theta0*(1 + (Pr*Ec/3) - * (1 - Square(Square((y-center)/H)))); + * (1 - Square(Square((x[1]-center)/H)))); CeedScalar Y[5] = {0.}; Y[0] = P0; - Y[1] = umax*(1 - Square((y-center)/H)); + Y[1] = umax*(1 - Square((x[1]-center)/H)); Y[2] = 0.; Y[3] = 0.; Y[4] = theta; - return StateFromY(gas, Y, X); + return StateFromY(gas, Y, x); } // ***************************************************************************** @@ -83,22 +83,21 @@ CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q, for (CeedInt i=0; inewtonian_ctx.is_primitive) { - q0[0][i] = s.Y.pressure; - for (CeedInt j=0; j<3; j++) - q0[j+1][i] = s.Y.velocity[j]; - q0[4][i] = s.Y.temperature; - } else { - q0[0][i] = s.U.density; - for (CeedInt j=0; j<3; j++) - q0[j+1][i] = s.U.momentum[j]; - q0[4][i] = s.U.E_total; - } + CeedScalar q[5] = {0}; + if (context->newtonian_ctx.is_primitive) + UnpackState_Y(s.Y, q); + else + UnpackState_U(s.U, q); + + for (CeedInt j=0; j<5; j++) + q0[j][i] = q[j]; } // End of Quadrature Point Loop return 0; } +// ***************************************************************************** +// This QFunction set the inflow boundary condition for conservative variables // ***************************************************************************** CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, const CeedScalar *const *in, @@ -113,10 +112,11 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; // *INDENT-ON* const ChannelContext context = (ChannelContext)ctx; - const bool implicit = context->implicit; - const CeedScalar cv = context->newtonian_ctx.cv; - const CeedScalar cp = context->newtonian_ctx.cp; - const CeedScalar gamma = cp/cv; + const bool implicit = context->implicit; + NewtonianIdealGasContext gas = &context->newtonian_ctx; + const CeedScalar cv = gas->cv; + const CeedScalar cp = gas->cp; + const CeedScalar gamma = cp / cv; CeedPragmaSIMD // Quadrature Point Loop @@ -128,44 +128,42 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // We can effect this by swapping the sign on this weight const CeedScalar wdetJb = (implicit ? -1. : 1.) * q_data_sur[0][i]; + // There is a gravity body force but it is excluded from + // the potential energy due to periodicity. + // g = (g, 0, 0) + // x = (0, x_2, x_3) + // e_potential = dot(g, x) = 0 + const CeedScalar x[3] = {0, X[1][i], X[2][i]}; + // Calcualte prescribed inflow values - const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; - State s = Exact_Channel(3, 0., x, 5, ctx); + State s_exact = Exact_Channel(3, 0., x, 5, ctx); CeedScalar q_exact[5] = {0.}; - q_exact[0] = s.U.density; - for (CeedInt j=0; j<3; j++) - q_exact[j+1] = s.U.momentum[j]; - q_exact[4] = s.U.E_total; - const CeedScalar E_kinetic_exact = 0.5*Dot3(&q_exact[1], &q_exact[1]) - / q_exact[0]; - const CeedScalar velocity[3] = {q_exact[1]/q_exact[0], - q_exact[2]/q_exact[0], - q_exact[3]/q_exact[0] - }; - const CeedScalar theta = (q_exact[4] - E_kinetic_exact) / (q_exact[0]*cv); + UnpackState_U(s_exact.U, q_exact); // Find pressure using state inside the domain - const CeedScalar rho = q[0][i]; - const CeedScalar u[3] = {q[1][i]/rho, q[2][i]/rho, q[3][i]/rho}; - const CeedScalar E_internal = q[4][i] - .5 * rho * Dot3(u,u); - const CeedScalar P = E_internal * (gamma - 1.); + CeedScalar q_inside[5] = {0}; + for (CeedInt j; j<5; j++) + q_inside[j] = q[j][i]; + State s_inside = StateFromU(gas, q_inside, x); + const CeedScalar P = s_inside.Y.pressure; // Find inflow state using calculated P and prescribed velocity, theta0 - const CeedScalar e_internal = cv * theta; + const CeedScalar e_internal = cv * s_exact.Y.temperature; const CeedScalar rho_in = P / ((gamma - 1) * e_internal); - const CeedScalar E_kinetic = .5 * rho_in * Dot3(velocity, velocity); + const CeedScalar E_kinetic = .5 * rho_in * Dot3(s_exact.Y.velocity, + s_exact.Y.velocity); const CeedScalar E = rho_in * e_internal + E_kinetic; + // ---- Normal vect const CeedScalar norm[3] = {q_data_sur[1][i], q_data_sur[2][i], q_data_sur[3][i] }; - // The Physics // Zero v so all future terms can safely sum into it for (CeedInt j=0; j<5; j++) v[j][i] = 0.; - const CeedScalar u_normal = Dot3(norm, velocity); + const CeedScalar u_normal = Dot3(norm, s_exact.Y.velocity); // The Physics // -- Density @@ -173,7 +171,7 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // -- Momentum for (CeedInt j=0; j<3; j++) - v[j+1][i] -= wdetJb * (rho_in * u_normal * velocity[j] + + v[j+1][i] -= wdetJb * (rho_in * u_normal * s_exact.Y.velocity[j] + norm[j] * P); // -- Total Energy Density @@ -183,6 +181,8 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, return 0; } +// ***************************************************************************** +// This QFunction set the outflow boundary condition for conservative variables // ***************************************************************************** CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q, const CeedScalar *const *in, @@ -223,7 +223,6 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q, q_data_sur[2][i], q_data_sur[3][i] }; - // The Physics // Zero v so all future terms can safely sum into it for (CeedInt j=0; j<5; j++) v[j][i] = 0.; diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index 22a10d99ae..46ded17076 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -113,14 +113,14 @@ CEED_QFUNCTION_HELPER State Exact_DC(CeedInt dim, CeedScalar time, CeedScalar rr[3] = {x - center[0], y - center[1], z - center[2]}; // (I - q q^T) r: distance from dc_axis (or from center if dc_axis is the zero vector) for (CeedInt i=0; i<3; i++) - rr[i] -= dc_axis[i] * - (dc_axis[0]*rr[0] + dc_axis[1]*rr[1] + dc_axis[2]*rr[2]); - const CeedScalar r = sqrt(rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2]); + rr[i] -= dc_axis[i] * Dot3(dc_axis, rr); + const CeedScalar r = sqrt(Dot3(rr, rr)); const CeedScalar delta_theta = r <= rc ? thetaC*(1. + cos(M_PI*r/rc))/2. : 0.; - const CeedScalar theta = theta0*exp(N*N*z/g) + delta_theta; + const CeedScalar theta = theta0*exp(Square(N)*z/g) + delta_theta; // -- Exner pressure, hydrostatic balance - const CeedScalar Pi = 1. + g*g*(exp(-N*N*z/g) - 1.) / (cp*theta0*N*N); + const CeedScalar Pi = 1. + Square(g)*(exp(-Square(N)*z/g) - 1.) / + (cp*theta0*Square(N)); // Initial Conditions CeedScalar Y[5] = {0.}; @@ -152,17 +152,15 @@ CEED_QFUNCTION(ICsDC)(void *ctx, CeedInt Q, for (CeedInt i=0; inewtonian_ctx.is_primitive) { - q0[0][i] = s.Y.pressure; - for (CeedInt j=0; j<3; j++) - q0[j+1][i] = s.Y.velocity[j]; - q0[4][i] = s.Y.temperature; - } else { - q0[0][i] = s.U.density; - for (CeedInt j=0; j<3; j++) - q0[j+1][i] = s.U.momentum[j]; - q0[4][i] = s.U.E_total; - } + CeedScalar q[5] = {0}; + if (context->newtonian_ctx.is_primitive) + UnpackState_Y(s.Y, q); + else + UnpackState_U(s.U, q); + + for (CeedInt j=0; j<5; j++) + q0[j][i] = q[j]; + } // End of Quadrature Point Loop return 0; diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 9fddd27555..3ca74f003a 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -17,164 +17,7 @@ #include "newtonian_types.h" #include "newtonian_state.h" #include "utils.h" - -// ***************************************************************************** -// Helper function for computing flux Jacobian -// ***************************************************************************** -CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5], - const CeedScalar rho, const CeedScalar u[3], const CeedScalar E, - const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) { - CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square - CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]); - for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions - for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix - dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) - - u[i]*u[j]; - for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix - dF[i][0][k+1] = ((i==k) ? 1. : 0.); - dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) + - ((i==k) ? u[j] : 0.) - - ((i==j) ? u[k] : 0.) * (gamma-1.); - dF[i][4][k+1] = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) - - (gamma-1.)*u[i]*u[k]; - } - dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.); - } - dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho); - dF[i][4][4] = u[i] * gamma; - } -} - -// ***************************************************************************** -// Helper function for computing flux Jacobian of Primitive variables -// ***************************************************************************** -CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5], - const CeedScalar rho, const CeedScalar u[3], const CeedScalar E, - const CeedScalar Rd, const CeedScalar cv) { - CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square - // TODO Add in gravity's contribution - - CeedScalar T = ( E / rho - u_sq / 2. ) / cv; - CeedScalar drdT = -rho / T; - CeedScalar drdP = 1. / ( Rd * T); - CeedScalar etot = E / rho ; - CeedScalar e2p = drdP * etot + 1. ; - CeedScalar e3p = ( E + rho * Rd * T ); - CeedScalar e4p = drdT * etot + rho * cv ; - - for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions - for (CeedInt j=0; j<3; j++) { // j counts F^{m_j} -// [row][col] of A_i - dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p - for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k - dF[i][0][k+1] = ((i==k) ? rho : 0.); // F^c wrt u_k - dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) + // F^m_j wrt u_k - ((i==k) ? u[j] : 0.) ) * rho; - dF[i][4][k+1] = rho * u[i] * u[k] - + ((i==k) ? e3p : 0.) ; // F^e wrt u_k - } - dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T - } - dF[i][4][0] = u[i] * e2p; // F^e wrt p - dF[i][4][4] = u[i] * e4p; // F^e wrt T - dF[i][0][0] = u[i] * drdP; // F^c wrt p - dF[i][0][4] = u[i] * drdT; // F^c wrt T - } -} - -CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho, - const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd, - const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) { - CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; - CeedScalar T = ( E / rho - u_sq / 2. ) / cv; - CeedScalar drdT = -rho / T; - CeedScalar drdP = 1. / ( Rd * T); - dU[0] = drdP * dY[0] + drdT * dY[4]; - CeedScalar de_kinetic = 0; - for (CeedInt i=0; i<3; i++) { - dU[1+i] = dU[0] * u[i] + rho * dY[1+i]; - de_kinetic += u[i] * dY[1+i]; - } - dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e - + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2 -} - -// ***************************************************************************** -// Helper function for computing Tau elements (stabilization constant) -// Model from: -// PHASTA -// -// Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial) -// -// Where NOT UPDATED YET -// ***************************************************************************** -CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3], - const CeedScalar dXdx[3][3], const CeedScalar u[3], - const CeedScalar cv, const NewtonianIdealGasContext newt_ctx, - const CeedScalar mu, const CeedScalar dt, - const CeedScalar rho) { - // Context - const CeedScalar Ctau_t = newt_ctx->Ctau_t; - const CeedScalar Ctau_v = newt_ctx->Ctau_v; - const CeedScalar Ctau_C = newt_ctx->Ctau_C; - const CeedScalar Ctau_M = newt_ctx->Ctau_M; - const CeedScalar Ctau_E = newt_ctx->Ctau_E; - CeedScalar gijd[6]; - CeedScalar tau; - CeedScalar dts; - CeedScalar fact; - - //*INDENT-OFF* - gijd[0] = dXdx[0][0] * dXdx[0][0] - + dXdx[1][0] * dXdx[1][0] - + dXdx[2][0] * dXdx[2][0]; - - gijd[1] = dXdx[0][0] * dXdx[0][1] - + dXdx[1][0] * dXdx[1][1] - + dXdx[2][0] * dXdx[2][1]; - - gijd[2] = dXdx[0][1] * dXdx[0][1] - + dXdx[1][1] * dXdx[1][1] - + dXdx[2][1] * dXdx[2][1]; - - gijd[3] = dXdx[0][0] * dXdx[0][2] - + dXdx[1][0] * dXdx[1][2] - + dXdx[2][0] * dXdx[2][2]; - - gijd[4] = dXdx[0][1] * dXdx[0][2] - + dXdx[1][1] * dXdx[1][2] - + dXdx[2][1] * dXdx[2][2]; - - gijd[5] = dXdx[0][2] * dXdx[0][2] - + dXdx[1][2] * dXdx[1][2] - + dXdx[2][2] * dXdx[2][2]; - //*INDENT-ON* - - dts = Ctau_t / dt ; - - tau = rho*rho*((4. * dts * dts) - + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3])) - + u[1] * ( u[1] * gijd[2] + 2. * u[2] * gijd[4]) - + u[2] * u[2] * gijd[5]) - + Ctau_v* mu * mu * - (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] + - + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4])); - - fact=sqrt(tau); - - Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125; - - Tau_d[1] = Ctau_M / fact; - Tau_d[2] = Ctau_E / ( fact * cv ); - -// consider putting back the way I initially had it Ctau_E * Tau_d[1] /cv -// to avoid a division if the compiler is smart enough to see that cv IS -// a constant that it could invert once for all elements -// but in that case energy tau is scaled by the product of Ctau_E * Ctau_M -// OR we could absorb cv into Ctau_E but this puts more burden on user to -// know how to change constants with a change of fluid or units. Same for -// Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T) -} +#include "stabilization.h" // ***************************************************************************** // This QFunction sets a "still" initial condition for generic Newtonian IG problems @@ -204,7 +47,7 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q, // Setup // -- Coordinates const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]}; - const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]); + const CeedScalar e_potential = -Dot3(g, x); // -- Density const CeedScalar rho = P0 / (Rd*theta0); @@ -218,6 +61,7 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q, for (CeedInt j=0; j<5; j++) q0[j][i] = q[j]; + } // End of Quadrature Point Loop return 0; } @@ -319,13 +163,8 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar mu = context->mu; - const CeedScalar cv = context->cv; - const CeedScalar cp = context->cp; - const CeedScalar *g = context->g; - const CeedScalar dt = context->dt; - const CeedScalar gamma = cp / cv; - const CeedScalar Rd = cp - cv; + const CeedScalar *g = context->g; + const CeedScalar dt = context->dt; CeedPragmaSIMD // Quadrature Point Loop @@ -373,74 +212,28 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, // Total flux CeedScalar Flux[5][3]; - for (CeedInt j=0; j<3; j++) { - Flux[0][j] = F_inviscid[j].density; - for (CeedInt k=0; k<3; k++) - Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j]; - Flux[4][j] = F_inviscid[j].E_total + Fe[j]; - } + FluxTotal(F_inviscid, stress, Fe, Flux); - for (CeedInt j=0; j<3; j++) { - for (CeedInt k=0; k<5; k++) { + for (CeedInt j=0; j<3; j++) + for (CeedInt k=0; k<5; k++) Grad_v[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] + dXdx[j][1] * Flux[k][1] + dXdx[j][2] * Flux[k][2]); - } - } const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0}; for (int j=0; j<5; j++) v[j][i] = wdetJ * body_force[j]; - // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction - CeedScalar jacob_F_conv[3][5][5] = {0}; - computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, - gamma, g, x_i); - CeedScalar grad_U[5][3]; - for (CeedInt j=0; j<3; j++) { - grad_U[0][j] = grad_s[j].U.density; - for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k]; - grad_U[4][j] = grad_s[j].U.E_total; - } + // -- Stabilization method: none (Galerkin), SU, or SUPG + CeedScalar Tau_d[3], stab[5][3], U_dot[5] = {0}; + Tau_diagPrim(context, s, dXdx, dt, Tau_d); + Stabilization(context, s, Tau_d, grad_s, U_dot, body_force, x_i, stab); - // strong_conv = dF/dq * dq/dx (Strong convection) - CeedScalar strong_conv[5] = {0}; - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j]; - - // -- Stabilization method: none, SU, or SUPG - CeedScalar stab[5][3] = {{0.}}; - CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0}; - CeedScalar Tau_d[3] = {0.}; - switch (context->stabilization) { - case STAB_NONE: // Galerkin - break; - case STAB_SU: // SU - Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); - tau_strong_conv[0] = Tau_d[0] * strong_conv[0]; - tau_strong_conv[1] = Tau_d[1] * strong_conv[1]; - tau_strong_conv[2] = Tau_d[1] * strong_conv[2]; - tau_strong_conv[3] = Tau_d[1] * strong_conv[3]; - tau_strong_conv[4] = Tau_d[2] * strong_conv[4]; - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - tau_strong_conv, - tau_strong_conv_conservative); - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l]; - - for (CeedInt j=0; j<5; j++) - for (CeedInt k=0; k<3; k++) - Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); - break; - case STAB_SUPG: // SUPG is not implemented for explicit scheme - break; - } + for (CeedInt j=0; j<5; j++) + for (CeedInt k=0; k<3; k++) + Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); } // End Quadrature Point Loop @@ -473,13 +266,8 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar mu = context->mu; - const CeedScalar cv = context->cv; - const CeedScalar cp = context->cp; const CeedScalar *g = context->g; const CeedScalar dt = context->dt; - const CeedScalar gamma = cp / cv; - const CeedScalar Rd = cp - cv; CeedPragmaSIMD // Quadrature Point Loop @@ -525,102 +313,32 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, StateConservative F_inviscid[3]; FluxInviscid(context, s, F_inviscid); - // Total flux CeedScalar Flux[5][3]; - for (CeedInt j=0; j<3; j++) { - Flux[0][j] = F_inviscid[j].density; - for (CeedInt k=0; k<3; k++) - Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j]; - Flux[4][j] = F_inviscid[j].E_total + Fe[j]; - } + FluxTotal(F_inviscid, stress, Fe, Flux); - for (CeedInt j=0; j<3; j++) { - for (CeedInt k=0; k<5; k++) { + for (CeedInt j=0; j<3; j++) + for (CeedInt k=0; k<5; k++) Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] + dXdx[j][1] * Flux[k][1] + dXdx[j][2] * Flux[k][2]); - } - } const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0}; for (CeedInt j=0; j<5; j++) v[j][i] = wdetJ * (q_dot[j][i] - body_force[j]); - // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction - CeedScalar jacob_F_conv[3][5][5] = {0}; - computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, - gamma, g, x_i); - CeedScalar grad_U[5][3]; - for (CeedInt j=0; j<3; j++) { - grad_U[0][j] = grad_s[j].U.density; - for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k]; - grad_U[4][j] = grad_s[j].U.E_total; - } - - // strong_conv = dF/dq * dq/dx (Strong convection) - CeedScalar strong_conv[5] = {0}; - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j]; + // -- Stabilization method: none (Galerkin), SU, or SUPG + CeedScalar Tau_d[3], stab[5][3], U_dot[5] = {0}; + for (CeedInt j=0; j<5; j++) U_dot[j] = q_dot[j][i]; + Tau_diagPrim(context, s, dXdx, dt, Tau_d); + Stabilization(context, s, Tau_d, grad_s, U_dot, body_force, x_i, stab); - // Strong residual - CeedScalar strong_res[5]; for (CeedInt j=0; j<5; j++) - strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j]; - - // -- Stabilization method: none, SU, or SUPG - CeedScalar stab[5][3] = {{0.}}; - CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0}; - CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0}; - CeedScalar Tau_d[3] = {0.}; - switch (context->stabilization) { - case STAB_NONE: // Galerkin - break; - case STAB_SU: // SU - Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); - tau_strong_conv[0] = Tau_d[0] * strong_conv[0]; - tau_strong_conv[1] = Tau_d[1] * strong_conv[1]; - tau_strong_conv[2] = Tau_d[1] * strong_conv[2]; - tau_strong_conv[3] = Tau_d[1] * strong_conv[3]; - tau_strong_conv[4] = Tau_d[2] * strong_conv[4]; - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - tau_strong_conv, tau_strong_conv_conservative); - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l]; - - for (CeedInt j=0; j<5; j++) - for (CeedInt k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); - - break; - case STAB_SUPG: // SUPG - Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); - tau_strong_res[0] = Tau_d[0] * strong_res[0]; - tau_strong_res[1] = Tau_d[1] * strong_res[1]; - tau_strong_res[2] = Tau_d[1] * strong_res[2]; - tau_strong_res[3] = Tau_d[1] * strong_res[3]; - tau_strong_res[4] = Tau_d[2] * strong_res[4]; - - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - tau_strong_res, tau_strong_res_conservative); - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l]; - - for (CeedInt j=0; j<5; j++) - for (CeedInt k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); - break; - } + for (CeedInt k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); + for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j]; for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j]; @@ -653,10 +371,6 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const CeedScalar *g = context->g; - const CeedScalar cp = context->cp; - const CeedScalar cv = context->cv; - const CeedScalar Rd = cp - cv; - const CeedScalar gamma = cp / cv; CeedPragmaSIMD // Quadrature Point Loop @@ -692,9 +406,10 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, State grad_ds[3]; for (int j=0; j<3; j++) { CeedScalar dUj[5]; - for (int k=0; k<5; k++) dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] - + Grad_dq[1][k][i] * dXdx[1][j] - + Grad_dq[2][k][i] * dXdx[2][j]; + for (int k=0; k<5; k++) + dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] + + Grad_dq[1][k][i] * dXdx[1][j] + + Grad_dq[2][k][i] * dXdx[2][j]; grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx0); } @@ -710,69 +425,36 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, // Total flux CeedScalar dFlux[5][3]; - for (int j=0; j<3; j++) { - dFlux[0][j] = dF_inviscid[j].density; - for (int k=0; k<3; k++) - dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j]; - dFlux[4][j] = dF_inviscid[j].E_total + dFe[j]; - } + FluxTotal(dF_inviscid, dstress, dFe, dFlux); - for (int j=0; j<3; j++) { - for (int k=0; k<5; k++) { + for (int j=0; j<3; j++) + for (int k=0; k<5; k++) Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] + dXdx[j][1] * dFlux[k][1] + dXdx[j][2] * dFlux[k][2]); - } - } const CeedScalar dbody_force[5] = {0, ds.U.density *g[0], ds.U.density *g[1], ds.U.density *g[2], 0}; for (int j=0; j<5; j++) v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]); - if (1) { - CeedScalar jacob_F_conv[3][5][5] = {0}; - computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, - gamma, g, x_i); - CeedScalar grad_dU[5][3]; - for (int j=0; j<3; j++) { - grad_dU[0][j] = grad_ds[j].U.density; - for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k]; - grad_dU[4][j] = grad_ds[j].U.E_total; - } - CeedScalar dstrong_conv[5] = {0}; - for (int j=0; j<3; j++) - for (int k=0; k<5; k++) - for (int l=0; l<5; l++) - dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j]; - CeedScalar dstrong_res[5]; - for (int j=0; j<5; j++) - dstrong_res[j] = context->ijacobian_time_shift * dU[j] + dstrong_conv[j] - - dbody_force[j]; - CeedScalar dtau_strong_res[5] = {0.}, dtau_strong_res_conservative[5] = {0}; - dtau_strong_res[0] = Tau_d[0] * dstrong_res[0]; - dtau_strong_res[1] = Tau_d[1] * dstrong_res[1]; - dtau_strong_res[2] = Tau_d[1] * dstrong_res[2]; - dtau_strong_res[3] = Tau_d[1] * dstrong_res[3]; - dtau_strong_res[4] = Tau_d[2] * dstrong_res[4]; - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - dtau_strong_res, dtau_strong_res_conservative); - CeedScalar dstab[5][3] = {0}; - for (int j=0; j<3; j++) - for (int k=0; k<5; k++) - for (int l=0; l<5; l++) - dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l]; - for (int j=0; j<5; j++) - for (int k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + - dstab[j][1] * dXdx[k][1] + - dstab[j][2] * dXdx[k][2]); + // -- Stabilization method: none (Galerkin), SU, or SUPG + CeedScalar dstab[5][3], U_dot[5] = {0}; + for (CeedInt j=0; j<5; j++) U_dot[j] = context->ijacobian_time_shift * dU[j]; + Stabilization(context, s, Tau_d, grad_ds, U_dot, dbody_force, x_i, dstab); + + for (int j=0; j<5; j++) + for (int k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + + dstab[j][1] * dXdx[k][1] + + dstab[j][2] * dXdx[k][2]); - } } // End Quadrature Point Loop return 0; } +// ***************************************************************************** // Compute boundary integral (ie. for strongly set inflows) +// ***************************************************************************** CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { @@ -841,7 +523,7 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, Flux[0] += F_inviscid[j].density * norm[j]; for (int k=0; k<3; k++) Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j]; - Flux[4] += (F_inviscid[j].E_total + Fe[j])*norm[j]; + Flux[4] += (F_inviscid[j].E_total + Fe[j]) * norm[j]; } // -- Density @@ -868,7 +550,9 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, return 0; } +// ***************************************************************************** // Jacobian for "set nothing" boundary integral +// ***************************************************************************** CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { @@ -949,7 +633,9 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, return 0; } +// ***************************************************************************** // Outflow boundary condition, weakly setting a constant pressure +// ***************************************************************************** CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { @@ -1055,7 +741,9 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, return 0; } +// ***************************************************************************** // Jacobian for weak-pressure outflow boundary condition +// ***************************************************************************** CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* @@ -1159,13 +847,8 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar mu = context->mu; - const CeedScalar cv = context->cv; - const CeedScalar cp = context->cp; const CeedScalar *g = context->g; const CeedScalar dt = context->dt; - const CeedScalar gamma = cp / cv; - const CeedScalar Rd = cp - cv; CeedPragmaSIMD // Quadrature Point Loop @@ -1213,20 +896,13 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, // Total flux CeedScalar Flux[5][3]; - for (CeedInt j=0; j<3; j++) { - Flux[0][j] = F_inviscid[j].density; - for (CeedInt k=0; k<3; k++) - Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j]; - Flux[4][j] = F_inviscid[j].E_total + Fe[j]; - } + FluxTotal(F_inviscid, stress, Fe, Flux); - for (CeedInt j=0; j<3; j++) { - for (CeedInt k=0; k<5; k++) { + for (CeedInt j=0; j<3; j++) + for (CeedInt k=0; k<5; k++) Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] + dXdx[j][1] * Flux[k][1] + dXdx[j][2] * Flux[k][2]); - } - } const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0}; @@ -1235,88 +911,22 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, State s_dot = StateFromY_fwd(context, s, Y_dot, x_i, dx0); CeedScalar U_dot[5] = {0.}; - U_dot[0] = s_dot.U.density; - for (CeedInt j=0; j<3; j++) - U_dot[j+1] = s_dot.U.momentum[j]; - U_dot[4] = s_dot.U.E_total; + UnpackState_U(s_dot.U, U_dot); for (CeedInt j=0; j<5; j++) v[j][i] = wdetJ * (U_dot[j] - body_force[j]); - // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction - CeedScalar jacob_F_conv[3][5][5] = {0}; - computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, - gamma, g, x_i); - CeedScalar grad_U[5][3]; - for (CeedInt j=0; j<3; j++) { - grad_U[0][j] = grad_s[j].U.density; - for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k]; - grad_U[4][j] = grad_s[j].U.E_total; - } - - // strong_conv = dF/dq * dq/dx (Strong convection) - CeedScalar strong_conv[5] = {0}; - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j]; + // -- Stabilization method: none (Galerkin), SU, or SUPG + CeedScalar Tau_d[3], stab[5][3]; + Tau_diagPrim(context, s, dXdx, dt, Tau_d); + Stabilization(context, s, Tau_d, grad_s, U_dot, body_force, x_i, stab); - // Strong residual - CeedScalar strong_res[5]; for (CeedInt j=0; j<5; j++) - strong_res[j] = U_dot[j] + strong_conv[j] - body_force[j]; - - // -- Stabilization method: none, SU, or SUPG - CeedScalar stab[5][3] = {{0.}}; - CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0}; - CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0}; - CeedScalar Tau_d[3] = {0.}; - switch (context->stabilization) { - case STAB_NONE: // Galerkin - break; - case STAB_SU: // SU - Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); - tau_strong_conv[0] = Tau_d[0] * strong_conv[0]; - tau_strong_conv[1] = Tau_d[1] * strong_conv[1]; - tau_strong_conv[2] = Tau_d[1] * strong_conv[2]; - tau_strong_conv[3] = Tau_d[1] * strong_conv[3]; - tau_strong_conv[4] = Tau_d[2] * strong_conv[4]; - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - tau_strong_conv, tau_strong_conv_conservative); - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l]; - - for (CeedInt j=0; j<5; j++) - for (CeedInt k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); - - break; - case STAB_SUPG: // SUPG - Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density); - tau_strong_res[0] = Tau_d[0] * strong_res[0]; - tau_strong_res[1] = Tau_d[1] * strong_res[1]; - tau_strong_res[2] = Tau_d[1] * strong_res[2]; - tau_strong_res[3] = Tau_d[1] * strong_res[3]; - tau_strong_res[4] = Tau_d[2] * strong_res[4]; - - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - tau_strong_res, tau_strong_res_conservative); - for (CeedInt j=0; j<3; j++) - for (CeedInt k=0; k<5; k++) - for (CeedInt l=0; l<5; l++) - stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l]; - - for (CeedInt j=0; j<5; j++) - for (CeedInt k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + - stab[j][1] * dXdx[k][1] + - stab[j][2] * dXdx[k][2]); - break; - } + for (CeedInt k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] + + stab[j][1] * dXdx[k][1] + + stab[j][2] * dXdx[k][2]); + for (CeedInt j=0; j<5; j++) jac_data[j][i] = Y[j]; for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j]; @@ -1348,10 +958,6 @@ CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const CeedScalar *g = context->g; - const CeedScalar cp = context->cp; - const CeedScalar cv = context->cv; - const CeedScalar Rd = cp - cv; - const CeedScalar gamma = cp / cv; CeedPragmaSIMD // Quadrature Point Loop @@ -1406,80 +1012,32 @@ CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, // Total flux CeedScalar dFlux[5][3]; - for (int j=0; j<3; j++) { - dFlux[0][j] = dF_inviscid[j].density; - for (int k=0; k<3; k++) - dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j]; - dFlux[4][j] = dF_inviscid[j].E_total + dFe[j]; - } + FluxTotal(dF_inviscid, dstress, dFe, dFlux); - for (int j=0; j<3; j++) { - for (int k=0; k<5; k++) { + for (int j=0; j<3; j++) + for (int k=0; k<5; k++) Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] + dXdx[j][1] * dFlux[k][1] + dXdx[j][2] * dFlux[k][2]); - } - } - const CeedScalar dbody_force[5] = {0, - ds.U.density *g[0], - ds.U.density *g[1], - ds.U.density *g[2], - 0 - }; + const CeedScalar dbody_force[5] = {0, ds.U.density *g[0], ds.U.density *g[1], ds.U.density *g[2], 0}; CeedScalar dU[5] = {0.}; - dU[0] = ds.U.density; - for (CeedInt j=0; j<3; j++) - dU[j+1] = ds.U.momentum[j]; - dU[4] = ds.U.E_total; + UnpackState_U(ds.U, dU); for (int j=0; j<5; j++) v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]); - if (1) { - CeedScalar jacob_F_conv[3][5][5] = {0}; - computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total, - gamma, g, x_i); - CeedScalar grad_dU[5][3]; - for (int j=0; j<3; j++) { - grad_dU[0][j] = grad_ds[j].U.density; - for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k]; - grad_dU[4][j] = grad_ds[j].U.E_total; - } - CeedScalar dstrong_conv[5] = {0.}; - for (int j=0; j<3; j++) - for (int k=0; k<5; k++) - for (int l=0; l<5; l++) - dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j]; - - CeedScalar dstrong_res[5]; - for (int j=0; j<5; j++) - dstrong_res[j] = context->ijacobian_time_shift * dU[j] + - dstrong_conv[j] - - dbody_force[j]; - - CeedScalar dtau_strong_res[5] = {0.}, - dtau_strong_res_conservative[5] = {0.}; - dtau_strong_res[0] = Tau_d[0] * dstrong_res[0]; - dtau_strong_res[1] = Tau_d[1] * dstrong_res[1]; - dtau_strong_res[2] = Tau_d[1] * dstrong_res[2]; - dtau_strong_res[3] = Tau_d[1] * dstrong_res[3]; - dtau_strong_res[4] = Tau_d[2] * dstrong_res[4]; - PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv, - dtau_strong_res, dtau_strong_res_conservative); - CeedScalar dstab[5][3] = {0}; - for (int j=0; j<3; j++) - for (int k=0; k<5; k++) - for (int l=0; l<5; l++) - dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l]; - - for (int j=0; j<5; j++) - for (int k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + - dstab[j][1] * dXdx[k][1] + - dstab[j][2] * dXdx[k][2]); + // -- Stabilization method: none (Galerkin), SU, or SUPG + CeedScalar dstab[5][3], U_dot[5] = {0}; + for (CeedInt j=0; j<5; j++) U_dot[j] = context->ijacobian_time_shift * dU[j]; + Stabilization(context, s, Tau_d, grad_ds, U_dot, dbody_force, x_i, dstab); + + for (int j=0; j<5; j++) + for (int k=0; k<3; k++) + Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + + dstab[j][1] * dXdx[k][1] + + dstab[j][2] * dXdx[k][2]); - } } // End Quadrature Point Loop return 0; } diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h index 959eb9c33f..e1d80e9e84 100644 --- a/examples/fluids/qfunctions/newtonian_state.h +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -34,6 +34,18 @@ typedef struct { StatePrimitive Y; } State; +CEED_QFUNCTION_HELPER void UnpackState_U(StateConservative s, CeedScalar U[5]) { + U[0] = s.density; + for (int i=0; i<3; i++) U[i+1] = s.momentum[i]; + U[4] = s.E_total; +} + +CEED_QFUNCTION_HELPER void UnpackState_Y(StatePrimitive s, CeedScalar Y[5]) { + Y[0] = s.pressure; + for (int i=0; i<3; i++) Y[i+1] = s.velocity[i]; + Y[4] = s.temperature; +} + CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative( NewtonianIdealGasContext gas, StateConservative U, const CeedScalar x[3]) { StatePrimitive Y; @@ -177,6 +189,29 @@ CEED_QFUNCTION_HELPER void FluxInviscid_fwd(NewtonianIdealGasContext gas, } } +CEED_QFUNCTION_HELPER void FluxInviscidStrong(NewtonianIdealGasContext gas, + State s, State ds[3], CeedScalar strong_conv[5]) { + for (CeedInt i=0; i<5; i++) strong_conv[i] = 0; + for (CeedInt i=0; i<3; i++) { + StateConservative dF[3]; + FluxInviscid_fwd(gas, s, ds[i], dF); + CeedScalar dF_i[5]; + UnpackState_U(dF[i], dF_i); + for (CeedInt j=0; j<5; j++) + strong_conv[j] += dF_i[j]; + } +} + +CEED_QFUNCTION_HELPER void FluxTotal(StateConservative F_inviscid[3], + CeedScalar stress[3][3], CeedScalar Fe[3], CeedScalar Flux[5][3]) { + for (CeedInt j=0; j<3; j++) { + Flux[0][j] = F_inviscid[j].density; + for (CeedInt k=0; k<3; k++) + Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j]; + Flux[4][j] = F_inviscid[j].E_total + Fe[j]; + } +} + // Kelvin-Mandel notation CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3], CeedScalar strain_rate[6]) { diff --git a/examples/fluids/qfunctions/stabilization.h b/examples/fluids/qfunctions/stabilization.h new file mode 100644 index 0000000000..b472ee66e8 --- /dev/null +++ b/examples/fluids/qfunctions/stabilization.h @@ -0,0 +1,155 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +/// @file +/// Helper functions for computing stabilization terms of a newtonian simulation + + +#ifndef stabilization_h +#define stabilization_h + +#include "newtonian_state.h" +#include + +// ***************************************************************************** +// Helper function for computing the variation in primitive variables, +// given Tau_d +// ***************************************************************************** +CEED_QFUNCTION_HELPER void dYFromTau(CeedScalar Y[5], CeedScalar Tau_d[3], + CeedScalar dY[5]) { + dY[0] = Tau_d[0] * Y[0]; + dY[1] = Tau_d[1] * Y[1]; + dY[2] = Tau_d[1] * Y[2]; + dY[3] = Tau_d[1] * Y[3]; + dY[4] = Tau_d[2] * Y[4]; +} + +// ***************************************************************************** +// Helper functions for computing the stabilization terms +// ***************************************************************************** +CEED_QFUNCTION_HELPER void StabilizationMatrix(NewtonianIdealGasContext gas, + State s, CeedScalar Tau_d[3], CeedScalar R[5], const CeedScalar x[3], + CeedScalar stab[5][3]) { + CeedScalar dY[5]; + const CeedScalar dx_i[3] = {0}; + StateConservative dF[3]; + // Zero stab so all future terms can safely sum into it + for (CeedInt i=0; i<5; i++) + for (CeedInt j=0; j<3; j++) + stab[i][j] = 0; + dYFromTau(R, Tau_d, dY); + State ds = StateFromY_fwd(gas, s, dY, x, dx_i); + FluxInviscid_fwd(gas, s, ds, dF); + for (CeedInt i=0; i<3; i++) { + CeedScalar dF_i[5]; + UnpackState_U(dF[i], dF_i); + for (CeedInt j=0; j<5; j++) + stab[j][i] += dF_i[j]; + } +} + +CEED_QFUNCTION_HELPER void Stabilization(NewtonianIdealGasContext gas, State s, + CeedScalar Tau_d[3], State ds[3], CeedScalar U_dot[5], + const CeedScalar body_force[5], const CeedScalar x[3], CeedScalar stab[5][3]) { + // -- Stabilization method: none (Galerkin), SU, or SUPG + CeedScalar R[5] = {0}; + switch (gas->stabilization) { + case STAB_NONE: + break; + case STAB_SU: + FluxInviscidStrong(gas, s, ds, R); + break; + case STAB_SUPG: + FluxInviscidStrong(gas, s, ds, R); + for (CeedInt j=0; j<5; j++) R[j] += U_dot[j] - body_force[j]; + break; + } + StabilizationMatrix(gas, s, Tau_d, R, x, stab); +} + +// ***************************************************************************** +// Helper function for computing Tau elements (stabilization constant) +// Model from: +// PHASTA +// +// Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial) +// +// ***************************************************************************** +CEED_QFUNCTION_HELPER void Tau_diagPrim(NewtonianIdealGasContext gas, State s, + const CeedScalar dXdx[3][3], + const CeedScalar dt, CeedScalar Tau_d[3]) { + // Context + const CeedScalar Ctau_t = gas->Ctau_t; + const CeedScalar Ctau_v = gas->Ctau_v; + const CeedScalar Ctau_C = gas->Ctau_C; + const CeedScalar Ctau_M = gas->Ctau_M; + const CeedScalar Ctau_E = gas->Ctau_E; + const CeedScalar cv = gas->cv; + const CeedScalar mu = gas->mu; + const CeedScalar u[3] = {s.Y.velocity[0], s.Y.velocity[1], s.Y.velocity[2]}; + const CeedScalar rho = s.U.density; + + CeedScalar gijd[6]; + CeedScalar tau; + CeedScalar dts; + CeedScalar fact; + + //*INDENT-OFF* + gijd[0] = dXdx[0][0] * dXdx[0][0] + + dXdx[1][0] * dXdx[1][0] + + dXdx[2][0] * dXdx[2][0]; + + gijd[1] = dXdx[0][0] * dXdx[0][1] + + dXdx[1][0] * dXdx[1][1] + + dXdx[2][0] * dXdx[2][1]; + + gijd[2] = dXdx[0][1] * dXdx[0][1] + + dXdx[1][1] * dXdx[1][1] + + dXdx[2][1] * dXdx[2][1]; + + gijd[3] = dXdx[0][0] * dXdx[0][2] + + dXdx[1][0] * dXdx[1][2] + + dXdx[2][0] * dXdx[2][2]; + + gijd[4] = dXdx[0][1] * dXdx[0][2] + + dXdx[1][1] * dXdx[1][2] + + dXdx[2][1] * dXdx[2][2]; + + gijd[5] = dXdx[0][2] * dXdx[0][2] + + dXdx[1][2] * dXdx[1][2] + + dXdx[2][2] * dXdx[2][2]; + //*INDENT-ON* + + dts = Ctau_t / dt ; + + tau = rho*rho*((4. * dts * dts) + + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3])) + + u[1] * ( u[1] * gijd[2] + 2. * u[2] * gijd[4]) + + u[2] * u[2] * gijd[5]) + + Ctau_v* mu * mu * + (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] + + + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4])); + + fact = sqrt(tau); + + Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125; + + Tau_d[1] = Ctau_M / fact; + Tau_d[2] = Ctau_E / ( fact * cv ); + + // consider putting back the way I initially had it Ctau_E * Tau_d[1] /cv + // to avoid a division if the compiler is smart enough to see that cv IS + // a constant that it could invert once for all elements + // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M + // OR we could absorb cv into Ctau_E but this puts more burden on user to + // know how to change constants with a change of fluid or units. Same for + // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T) +} + +// ***************************************************************************** + +#endif // stabilization_h diff --git a/examples/fluids/tests-output/fluids-navierstokes-channel-prim.bin b/examples/fluids/tests-output/fluids-navierstokes-channel-prim.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f52daa6bf85207aaf98b625abddb592eb3c3dc8 GIT binary patch literal 1608 zcmXw)drXsO6vpce4*$Ri(;`cpQwJL3oB{y~y6-gwwqR(Z;$}c5R7@yLkh&Mrwlu>bCg0tYY-viR$?D zJnH_jsjf!!|Lys&EC%(}%3XnfpnmR<@x#wGr{&1WuEO!c(KPgrmOBOW z6Q)!po)spy&l~3#pgF(YUKHPP67ze%j!HO;`RB8e&UuCDsPW4?*;lmPj)4vB;eqI9 z#Z^ySUoB5Y;=npUC#l1m)IC!w;x-cF0bVS}+rj>*@ z9CpvG7miEOEjPdW4DTBflRbXI9I?i8V|0>M(rBIYI^~3L{MMY@>}o}ShRaqNEzF-Z zP6+;Lu5NqaNpky2v47alTCp7apKjRt{IoDfHfgr~!iLXCoNb$ASp-=h-D1E0M(n;U;ySG{2By*CZmg}UicsaoO zR$!j=O<{d^=%lZ&YYgjar2Wu0UFmy&P$zxcR!#;V={t%(>6@kW{d1>I`tJWYh4qa? zAAZhn#y)BR3!J_cky zgq_}}e7wEi8Fjy0Kbenfzt2XW@?oO=xL>}y-@`POkHr;yKios!?`L(tzEGKu1Jif$ z{akn-`KZ6e{<|Ra@#=#g*?)oPQ$E6#|I$ZfK8jOo*?+m%Px+{1{|(4|^pD*RKFY^D z%u_z{l>asvWj>zuZDjw2(LC~Tp8e-9^HH^z_$VI+=KGuZhE+aJS(tAH^TjK^j8vJA zZyqHu-+JaNQ+)0MnGX@xzEnvQbiZ55ld~{ERzlpE)9{Qy3 zJf-in5t$EfTMg?QiT$LnN9p@+x6FrDn#TH`z&z!nSm|r;kooA`@r3n#n~!z;{o5z= zak;h=e8|T_`o2&;D*5|2r1BAJXMHK(l#hGZM}EflQu_G{YVvb3`jn5SsL9U-=u Date: Mon, 25 Jul 2022 15:09:21 -0600 Subject: [PATCH 156/172] debug - fix error message int format --- interface/ceed-operator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index e7cdf1352c..5087ad8f85 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -745,7 +745,7 @@ int CeedOperatorSetField(CeedOperator op, const char *field_name, op->num_elem != num_elem) // LCOV_EXCL_START return CeedError(op->ceed, CEED_ERROR_DIMENSION, - "ElemRestriction with %" CeedInt_FMT " elements incompatible with prior " + "ElemRestriction with %" CeedInt_FMT " elements incompatible with prior %" CeedInt_FMT " elements", num_elem, op->num_elem); // LCOV_EXCL_STOP From 43622462ff3b1e6f216f011e0b35418088f3edba Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 28 Jul 2022 11:34:46 -0600 Subject: [PATCH 157/172] op - allow empty composite operators --- interface/ceed-operator.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index 5087ad8f85..98238f36ab 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -1006,18 +1006,20 @@ int CeedOperatorCheckReady(CeedOperator op) { CeedQFunction qf = op->qf; if (op->is_composite) { - if (!op->num_suboperators) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_INCOMPLETE, "No sub_operators set"); - // LCOV_EXCL_STOP - for (CeedInt i = 0; i < op->num_suboperators; i++) { - ierr = CeedOperatorCheckReady(op->sub_operators[i]); CeedChk(ierr); + if (!op->num_suboperators) { + // Empty operator setup + op->input_size = 0; + op->output_size = 0; + } else { + for (CeedInt i = 0; i < op->num_suboperators; i++) { + ierr = CeedOperatorCheckReady(op->sub_operators[i]); CeedChk(ierr); + } + // Sub-operators could be modified after adding to composite operator + // Need to verify no lvec incompatibility from any changes + CeedSize input_size, output_size; + ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); + CeedChk(ierr); } - // Sub-operators could be modified after adding to composite operator - // Need to verify no lvec incompatibility from any changes - CeedSize input_size, output_size; - ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size); - CeedChk(ierr); } else { if (op->num_fields == 0) // LCOV_EXCL_START From 9b7dec89cc3aeb8c2b16858150bbff3f71cd9c48 Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 29 Jul 2022 14:18:31 -0600 Subject: [PATCH 158/172] fluids: Fix for loop variable initialization --- examples/fluids/qfunctions/channel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 5ef6804509..5b95052f41 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -142,7 +142,7 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, // Find pressure using state inside the domain CeedScalar q_inside[5] = {0}; - for (CeedInt j; j<5; j++) + for (CeedInt j=0; j<5; j++) q_inside[j] = q[j][i]; State s_inside = StateFromU(gas, q_inside, x); const CeedScalar P = s_inside.Y.pressure; From 43e9189f11ddca69aef92630b754e68f7908534b Mon Sep 17 00:00:00 2001 From: Rezgar Shakeri <42816410+rezgarshakeri@users.noreply.github.com> Date: Mon, 1 Aug 2022 11:37:10 -0600 Subject: [PATCH 159/172] Fixed dXdx definition in setupgeo2d.h (#1034) --- examples/fluids/qfunctions/setupgeo2d.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/fluids/qfunctions/setupgeo2d.h b/examples/fluids/qfunctions/setupgeo2d.h index 2a3c715f3b..55a4b56976 100644 --- a/examples/fluids/qfunctions/setupgeo2d.h +++ b/examples/fluids/qfunctions/setupgeo2d.h @@ -74,8 +74,8 @@ CEED_QFUNCTION(Setup2d)(void *ctx, CeedInt Q, // -- Interp-to-Grad q_data // Inverse of change of coordinate matrix: X_i,j q_data[1][i] = J22 / detJ; - q_data[2][i] = -J21 / detJ; - q_data[3][i] = -J12 / detJ; + q_data[2][i] = -J12 / detJ; + q_data[3][i] = -J21 / detJ; q_data[4][i] = J11 / detJ; } // End of Quadrature Point Loop From c9c2c07970382857cc7b4a28d359710237b91a3e Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 5 Aug 2022 14:10:01 -0600 Subject: [PATCH 160/172] QF headers for typedefs and macros (#1036) * jit - qf headers for typedefs and macros * jit - smaller list of permitted files * ceed - only include ceed.h in QF source --- .gitlab-ci.yml | 4 +- Makefile | 4 +- .../cuda-gen/ceed-cuda-gen-operator-build.cpp | 5 - .../cuda-ref/ceed-cuda-ref-qfunction-load.cpp | 6 - backends/cuda/ceed-cuda-common.h | 2 +- ...d-cuda-compile.c => ceed-cuda-compile.cpp} | 61 +++++---- backends/cuda/ceed-cuda-compile.h | 2 +- .../hip-gen/ceed-hip-gen-operator-build.cpp | 5 - .../hip-ref/ceed-hip-ref-qfunction-load.cpp | 6 - backends/hip/ceed-hip-compile.cpp | 43 +++--- backends/hip/ceed-hip-compile.h | 2 +- examples/ceed/ex1-volume.h | 2 + examples/ceed/ex2-surface.h | 2 + examples/fluids/qfunctions/advection.h | 2 +- examples/fluids/qfunctions/advection2d.h | 2 +- examples/fluids/qfunctions/blasius.h | 2 +- examples/fluids/qfunctions/channel.h | 4 +- examples/fluids/qfunctions/densitycurrent.h | 4 +- examples/fluids/qfunctions/eulervortex.h | 2 +- examples/fluids/qfunctions/mass.h | 2 +- examples/fluids/qfunctions/newtonian.h | 6 +- examples/fluids/qfunctions/newtonian_state.h | 2 +- examples/fluids/qfunctions/newtonian_types.h | 2 +- examples/fluids/qfunctions/setupgeo.h | 2 +- examples/fluids/qfunctions/setupgeo2d.h | 2 +- examples/fluids/qfunctions/shocktube.h | 2 +- examples/fluids/qfunctions/stabilization.h | 2 +- examples/fluids/qfunctions/stg_shur14.h | 2 +- examples/fluids/qfunctions/utils.h | 2 +- examples/mfem/bp1.h | 2 + examples/mfem/bp3.h | 2 + examples/nek/bps/bps.h | 1 + examples/petsc/qfunctions/area/areacube.h | 1 + examples/petsc/qfunctions/area/areasphere.h | 1 + examples/petsc/qfunctions/bps/bp1.h | 1 + examples/petsc/qfunctions/bps/bp1sphere.h | 1 + examples/petsc/qfunctions/bps/bp2.h | 1 + examples/petsc/qfunctions/bps/bp2sphere.h | 1 + examples/petsc/qfunctions/bps/bp3.h | 1 + examples/petsc/qfunctions/bps/bp3sphere.h | 1 + examples/petsc/qfunctions/bps/bp4.h | 1 + examples/petsc/qfunctions/bps/bp4sphere.h | 1 + examples/petsc/qfunctions/bps/common.h | 2 + examples/solids/qfunctions/common.h | 2 + examples/solids/qfunctions/constant-force.h | 1 + .../finite-strain-mooney-rivlin-initial-1.h | 1 + .../finite-strain-neo-hookean-current-1.h | 1 + .../finite-strain-neo-hookean-current-2.h | 1 + .../finite-strain-neo-hookean-initial-1.h | 1 + .../finite-strain-neo-hookean-initial-2.h | 1 + examples/solids/qfunctions/linear.h | 1 + .../solids/qfunctions/manufactured-force.h | 1 + .../solids/qfunctions/manufactured-true.h | 1 + .../qfunctions/small-strain-neo-hookean.h | 1 + .../solids/qfunctions/traction-boundary.h | 2 + include/ceed/ceed-f32.h | 12 +- include/ceed/ceed-f64.h | 12 +- include/ceed/ceed.h | 114 +--------------- include/ceed/jit-source/cuda/cuda-jit.h | 22 +++ .../cuda/cuda-ref-basis-nontensor.h | 2 +- .../jit-source/cuda/cuda-ref-basis-tensor.h | 2 +- .../cuda-ref-operator-assemble-diagonal.h | 2 +- .../cuda/cuda-ref-operator-assemble.h | 2 +- .../ceed/jit-source/cuda/cuda-ref-qfunction.h | 2 +- .../jit-source/cuda/cuda-ref-restriction.h | 2 +- .../ceed/jit-source/cuda/cuda-shared-basis.h | 2 +- .../ceed/jit-source/gallery/ceed-identity.h | 2 + .../jit-source/gallery/ceed-mass1dbuild.h | 2 + .../jit-source/gallery/ceed-mass2dbuild.h | 2 + .../jit-source/gallery/ceed-mass3dbuild.h | 2 + .../ceed/jit-source/gallery/ceed-massapply.h | 2 + .../jit-source/gallery/ceed-poisson1dapply.h | 2 + .../jit-source/gallery/ceed-poisson1dbuild.h | 2 + .../jit-source/gallery/ceed-poisson2dapply.h | 2 + .../jit-source/gallery/ceed-poisson2dbuild.h | 2 + .../jit-source/gallery/ceed-poisson3dapply.h | 2 + .../jit-source/gallery/ceed-poisson3dbuild.h | 2 + include/ceed/jit-source/gallery/ceed-scale.h | 2 + .../jit-source/gallery/ceed-vectormassapply.h | 2 + .../gallery/ceed-vectorpoisson1dapply.h | 2 + .../gallery/ceed-vectorpoisson2dapply.h | 2 + .../gallery/ceed-vectorpoisson3dapply.h | 2 + include/ceed/jit-source/hip/hip-jit.h | 22 +++ .../jit-source/hip/hip-ref-basis-nontensor.h | 2 +- .../jit-source/hip/hip-ref-basis-tensor.h | 3 +- .../hip/hip-ref-operator-assemble-diagonal.h | 2 +- .../hip/hip-ref-operator-assemble.h | 2 +- .../ceed/jit-source/hip/hip-ref-qfunction.h | 2 +- .../ceed/jit-source/hip/hip-ref-restriction.h | 2 +- .../ceed/jit-source/hip/hip-shared-basis.h | 2 +- include/ceed/types.h | 128 ++++++++++++++++++ interface/ceed-jit-tools.c | 54 +++++--- python/build_ceed_cffi.py | 69 +++++----- rust/libceed-sys/build.rs | 1 + tests/t400-qfunction.h | 2 + tests/t401-qfunction.h | 2 + tests/t405-qfunction.h | 2 + tests/t406-qfunction-helper.h | 1 + tests/t406-qfunction.h | 1 + tests/t409-qfunction.h | 2 + tests/t500-operator.h | 2 + tests/t502-operator.h | 2 + tests/t507-operator.h | 2 + tests/t510-operator.h | 2 + tests/t522-operator.h | 2 + tests/t530-operator.h | 2 + tests/t531-operator.h | 2 + tests/t532-operator.h | 2 + tests/t534-operator.h | 2 + tests/t535-operator.h | 2 + tests/t537-operator.h | 2 + tests/t540-operator.h | 2 + tests/t541-operator.h | 2 + tests/t566-operator.h | 2 + tests/t567-operator.h | 2 + 115 files changed, 472 insertions(+), 278 deletions(-) rename backends/cuda/{ceed-cuda-compile.c => ceed-cuda-compile.cpp} (80%) create mode 100644 include/ceed/jit-source/cuda/cuda-jit.h create mode 100644 include/ceed/jit-source/hip/hip-jit.h create mode 100644 include/ceed/types.h diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 18ae891f7c..45759833cd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -171,7 +171,7 @@ noether-float: - rm -f .SUCCESS # libCEED # Change to single precision - - sed -i 's/ceed-f64/ceed-f32/1' include/ceed/ceed.h + - sed -i 's/ceed-f64/ceed-f32/1' include/ceed/types.h # Build libCEED - make configure HIP_DIR=/opt/rocm OPT='-O -march=native -ffp-contract=fast' - BACKENDS_CPU=$(make info-backends-all | grep -o '/cpu[^ ]*') && BACKENDS_GPU=$(make info-backends | grep -o '/gpu[^ ]*') @@ -198,7 +198,7 @@ noether-float: bash <(curl -s https://codecov.io/bash) -f coverage.info -t ${CODECOV_ACCESS_TOKEN} -F tests; bash <(curl -s https://codecov.io/bash) -f coverage.info -t ${CODECOV_ACCESS_TOKEN} -F examples; fi - - sed -i 's/ceed-f32/ceed-f64/1' include/ceed/ceed.h + - sed -i 's/ceed-f32/ceed-f64/1' include/ceed/types.h artifacts: paths: - build/*.junit diff --git a/Makefile b/Makefile index 4180414352..4348541bfe 100644 --- a/Makefile +++ b/Makefile @@ -224,6 +224,7 @@ opt.c := $(sort $(wildcard backends/opt/*.c)) avx.c := $(sort $(wildcard backends/avx/*.c)) xsmm.c := $(sort $(wildcard backends/xsmm/*.c)) cuda.c := $(sort $(wildcard backends/cuda/*.c)) +cuda.cpp := $(sort $(wildcard backends/cuda/*.cpp)) cuda-ref.c := $(sort $(wildcard backends/cuda-ref/*.c)) cuda-ref.cpp := $(sort $(wildcard backends/cuda-ref/*.cpp)) cuda-ref.cu := $(sort $(wildcard backends/cuda-ref/kernels/*.cu)) @@ -391,7 +392,7 @@ ifneq ($(CUDA_LIB_DIR),) LIBCEED_CONTAINS_CXX = 1 libceed.c += interface/ceed-cuda.c libceed.c += $(cuda.c) $(cuda-ref.c) $(cuda-shared.c) $(cuda-gen.c) - libceed.cpp += $(cuda-ref.cpp) $(cuda-gen.cpp) + libceed.cpp += $(cuda.cpp) $(cuda-ref.cpp) $(cuda-gen.cpp) libceed.cu += $(cuda-ref.cu) $(cuda-shared.cu) $(cuda-gen.cu) BACKENDS_MAKE += $(CUDA_BACKENDS) endif @@ -635,6 +636,7 @@ install : $(libceed) $(OBJDIR)/ceed.pc "$(includedir)/ceed/jit-source/cuda/" "$(includedir)/ceed/jit-source/hip/"\ "$(includedir)/ceed/jit-source/gallery/" "$(libdir)" "$(pkgconfigdir)") $(INSTALL_DATA) include/ceed/ceed.h "$(DESTDIR)$(includedir)/ceed/" + $(INSTALL_DATA) include/ceed/types.h "$(DESTDIR)$(includedir)/ceed/" $(INSTALL_DATA) include/ceed/ceed-f32.h "$(DESTDIR)$(includedir)/ceed/" $(INSTALL_DATA) include/ceed/ceed-f64.h "$(DESTDIR)$(includedir)/ceed/" $(INSTALL_DATA) include/ceed/fortran.h "$(DESTDIR)$(includedir)/ceed/" diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp index 8fc35bfc38..fa73a0ede7 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp +++ b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp @@ -808,11 +808,6 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) { string oper; oper = "CeedKernel_Cuda_gen_" + qFunctionName; - code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n"; - code << "#define CEED_QFUNCTION_HELPER inline __device__\n"; - code << "#define CeedPragmaSIMD\n"; - code << "#define CEED_ERROR_SUCCESS 0\n\n"; - // Find dim and Q1d bool useCollograd = true; bool allCollograd = true; diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp index aee2038a34..02f83943d0 100644 --- a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp +++ b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp @@ -58,12 +58,6 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) { ostringstream code; // Defintions - code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n"; - code << "#define CEED_QFUNCTION_HELPER inline __device__\n"; - code << "#define CeedPragmaSIMD\n"; - code << "#define CEED_ERROR_SUCCESS 0\n"; - code << "#define CEED_Q_VLA 1\n\n"; - code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda;\n"; code << read_write; code << qfunction_source; code << "\n"; diff --git a/backends/cuda/ceed-cuda-common.h b/backends/cuda/ceed-cuda-common.h index 98d47b55fc..48e8cbaabf 100644 --- a/backends/cuda/ceed-cuda-common.h +++ b/backends/cuda/ceed-cuda-common.h @@ -18,7 +18,7 @@ #define CeedChk_Cu(ceed, x) \ do { \ - CUresult cuda_result = x; \ + CUresult cuda_result = (CUresult)x; \ if (cuda_result != CUDA_SUCCESS) { \ const char *msg; \ cuGetErrorName(cuda_result, &msg); \ diff --git a/backends/cuda/ceed-cuda-compile.c b/backends/cuda/ceed-cuda-compile.cpp similarity index 80% rename from backends/cuda/ceed-cuda-compile.c rename to backends/cuda/ceed-cuda-compile.cpp index 92f6668149..7c5175830e 100644 --- a/backends/cuda/ceed-cuda-compile.c +++ b/backends/cuda/ceed-cuda-compile.cpp @@ -7,16 +7,19 @@ #include #include +#include #include #include #include +#include +#include #include #include "ceed-cuda-common.h" #include "ceed-cuda-compile.h" #define CeedChk_Nvrtc(ceed, x) \ do { \ - nvrtcResult result = x; \ + nvrtcResult result = static_cast(x); \ if (result != NVRTC_SUCCESS) \ return CeedError((ceed), CEED_ERROR_BACKEND, nvrtcGetErrorString(result)); \ } while (0) @@ -25,50 +28,60 @@ do { \ // Compile CUDA kernel //------------------------------------------------------------------------------ int CeedCompileCuda(Ceed ceed, const char *source, CUmodule *module, - const CeedInt num_opts, ...) { + const CeedInt num_defines, ...) { int ierr; cudaFree(0); // Make sure a Context exists for nvrtc nvrtcProgram prog; - CeedChk_Nvrtc(ceed, nvrtcCreateProgram(&prog, source, NULL, 0, NULL, NULL)); + + std::ostringstream code; // Get kernel specific options, such as kernel constants - const int opts_len = 32; - const int opts_extra = 4; - const char *opts[num_opts + opts_extra]; - char buf[num_opts][opts_len]; - if (num_opts > 0) { + if (num_defines > 0) { va_list args; - va_start(args, num_opts); + va_start(args, num_defines); char *name; int val; - for (int i = 0; i < num_opts; i++) { + for (int i = 0; i < num_defines; i++) { name = va_arg(args, char *); val = va_arg(args, int); - snprintf(&buf[i][0], opts_len,"-D%s=%d", name, val); - opts[i] = &buf[i][0]; + code << "#define " << name << " " << val << "\n"; } va_end(args); } - // Standard backend options - if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { - opts[num_opts] = "-DCeedScalar=float"; - } else { - opts[num_opts] = "-DCeedScalar=double"; - } - opts[num_opts + 1] = "-DCeedInt=int"; - opts[num_opts + 2] = "-default-device"; + // Standard libCEED definitions for CUDA backends + char *jit_defs_path, *jit_defs_source; + ierr = CeedGetJitAbsolutePath(ceed, + "ceed/jit-source/cuda/cuda-jit.h", + &jit_defs_path); CeedChkBackend(ierr); + ierr = CeedLoadSourceToBuffer(ceed, jit_defs_path, &jit_defs_source); + CeedChkBackend(ierr); + code << jit_defs_source; + code << "\n\n"; + ierr = CeedFree(&jit_defs_path); CeedChkBackend(ierr); + ierr = CeedFree(&jit_defs_source); CeedChkBackend(ierr); + + // Non-macro options + const int num_opts = 3; + const char *opts[num_opts]; + opts[0] = "-default-device"; struct cudaDeviceProp prop; Ceed_Cuda *ceed_data; ierr = CeedGetData(ceed, &ceed_data); CeedChkBackend(ierr); ierr = cudaGetDeviceProperties(&prop, ceed_data->device_id); CeedChk_Cu(ceed, ierr); - char buff[opts_len]; - snprintf(buff, opts_len,"-arch=compute_%d%d", prop.major, prop.minor); - opts[num_opts + 3] = buff; + std::string arch_arg = "-arch=compute_" + std::to_string(prop.major) + std::to_string(prop.minor); + opts[1] = arch_arg.c_str(); + opts[2] = "-Dint32_t=int"; + + // Add string source argument provided in call + code << source; + + // Create Program + CeedChk_Nvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); // Compile kernel - nvrtcResult result = nvrtcCompileProgram(prog, num_opts + opts_extra, opts); + nvrtcResult result = nvrtcCompileProgram(prog, num_opts, opts); if (result != NVRTC_SUCCESS) { size_t log_size; CeedChk_Nvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size)); diff --git a/backends/cuda/ceed-cuda-compile.h b/backends/cuda/ceed-cuda-compile.h index c68a8bb97b..b6c465b383 100644 --- a/backends/cuda/ceed-cuda-compile.h +++ b/backends/cuda/ceed-cuda-compile.h @@ -17,7 +17,7 @@ static inline CeedInt CeedDivUpInt(CeedInt numerator, CeedInt denominator) { } CEED_INTERN int CeedCompileCuda(Ceed ceed, const char *source, CUmodule *module, - const CeedInt num_opts, ...); + const CeedInt num_defines, ...); CEED_INTERN int CeedGetKernelCuda(Ceed ceed, CUmodule module, const char *name, CUfunction *kernel); diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp index b04f589092..bd5a0e3d6e 100644 --- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp +++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp @@ -807,11 +807,6 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) { string oper; oper = "CeedKernel_Hip_gen_" + qFunctionName; - code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n"; - code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n"; - code << "#define CeedPragmaSIMD\n"; - code << "#define CEED_ERROR_SUCCESS 0\n\n"; - // Find dim and Q1d bool useCollograd = true; bool allCollograd = true; diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp index 3d81f0ee26..01632d8458 100644 --- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp +++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp @@ -60,12 +60,6 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) { ostringstream code; // Defintions - code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n"; - code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n"; - code << "#define CeedPragmaSIMD\n"; - code << "#define CEED_ERROR_SUCCESS 0\n"; - code << "#define CEED_Q_VLA 1\n\n"; - code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Hip;\n"; code << read_write; code << qfunction_source; code << "\n"; diff --git a/backends/hip/ceed-hip-compile.cpp b/backends/hip/ceed-hip-compile.cpp index 4fceec5f2a..250ab855c5 100644 --- a/backends/hip/ceed-hip-compile.cpp +++ b/backends/hip/ceed-hip-compile.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -25,12 +26,13 @@ do { \ // Compile HIP kernel //------------------------------------------------------------------------------ int CeedCompileHip(Ceed ceed, const char *source, hipModule_t *module, - const CeedInt num_opts, ...) { + const CeedInt num_defines, ...) { int ierr; - hipFree(0); // Make sure a Context exists for hiprtc + hipFree(0); // Make sure a Context exists for hiprtc hiprtcProgram prog; std::ostringstream code; + // Add hip runtime include statement for generation if runtime < 40400000 // (implies ROCm < 4.5) int runtime_version; @@ -46,16 +48,13 @@ int CeedCompileHip(Ceed ceed, const char *source, hipModule_t *module, code << "#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];\n"; } - // Macro definitions - // Get kernel specific options, such as kernel constants - const int opts_size = 3; - const char *opts[opts_size]; - if (num_opts > 0) { + // Kernel specific options, such as kernel constants + if (num_defines > 0) { va_list args; - va_start(args, num_opts); + va_start(args, num_defines); char *name; int val; - for (int i = 0; i < num_opts; i++) { + for (int i = 0; i < num_defines; i++) { name = va_arg(args, char *); val = va_arg(args, int); code << "#define " << name << " " << val << "\n"; @@ -63,17 +62,21 @@ int CeedCompileHip(Ceed ceed, const char *source, hipModule_t *module, va_end(args); } - // Standard backend options - if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { - code << "#define CeedScalar float\n"; - } - else { - code << "#define CeedScalar double\n"; - } - code << "#define CeedInt int\n"; - code << "#define CEED_ERROR_SUCCESS 0\n\n"; + // Standard libCEED definitions for HIP backends + char *jit_defs_path, *jit_defs_source; + ierr = CeedGetJitAbsolutePath(ceed, + "ceed/jit-source/hip/hip-jit.h", + &jit_defs_path); CeedChkBackend(ierr); + ierr = CeedLoadSourceToBuffer(ceed, jit_defs_path, &jit_defs_source); + CeedChkBackend(ierr); + code << jit_defs_source; + code << "\n\n"; + ierr = CeedFree(&jit_defs_path); CeedChkBackend(ierr); + ierr = CeedFree(&jit_defs_source); CeedChkBackend(ierr); - // Non-macro options + // Non-macro options + const int num_opts = 3; + const char *opts[num_opts]; opts[0] = "-default-device"; struct hipDeviceProp_t prop; Ceed_Hip *ceed_data; @@ -90,7 +93,7 @@ int CeedCompileHip(Ceed ceed, const char *source, hipModule_t *module, CeedChk_hiprtc(ceed, hiprtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); // Compile kernel - hiprtcResult result = hiprtcCompileProgram(prog, opts_size, opts); + hiprtcResult result = hiprtcCompileProgram(prog, num_opts, opts); if (result != HIPRTC_SUCCESS) { size_t log_size; CeedChk_hiprtc(ceed, hiprtcGetProgramLogSize(prog, &log_size)); diff --git a/backends/hip/ceed-hip-compile.h b/backends/hip/ceed-hip-compile.h index d15325a62b..abcb260270 100644 --- a/backends/hip/ceed-hip-compile.h +++ b/backends/hip/ceed-hip-compile.h @@ -16,7 +16,7 @@ static inline CeedInt CeedDivUpInt(CeedInt numerator, CeedInt denominator) { } CEED_INTERN int CeedCompileHip(Ceed ceed, const char *source, - hipModule_t *module, const CeedInt numopts, ...); + hipModule_t *module, const CeedInt num_defines, ...); CEED_INTERN int CeedGetKernelHip(Ceed ceed, hipModule_t module, const char *name, hipFunction_t *kernel); diff --git a/examples/ceed/ex1-volume.h b/examples/ceed/ex1-volume.h index 7bbc10c46f..a9bf4f9349 100644 --- a/examples/ceed/ex1-volume.h +++ b/examples/ceed/ex1-volume.h @@ -8,6 +8,8 @@ #ifndef ex1_volume_h #define ex1_volume_h +#include + /// A structure used to pass additional data to f_build_mass struct BuildContext { CeedInt dim, space_dim; }; diff --git a/examples/ceed/ex2-surface.h b/examples/ceed/ex2-surface.h index 875bb35e6e..26ff69fd12 100644 --- a/examples/ceed/ex2-surface.h +++ b/examples/ceed/ex2-surface.h @@ -8,6 +8,8 @@ #ifndef ex2_surface_h #define ex2_surface_h +#include + /// A structure used to pass additional data to f_build_diff struct BuildContext { CeedInt dim, space_dim; }; diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h index 293547c384..5922446a37 100644 --- a/examples/fluids/qfunctions/advection.h +++ b/examples/fluids/qfunctions/advection.h @@ -11,8 +11,8 @@ #ifndef advection_h #define advection_h -#include #include +#include typedef struct SetupContext_ *SetupContext; struct SetupContext_ { diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h index 6aef50d1fc..b324255454 100644 --- a/examples/fluids/qfunctions/advection2d.h +++ b/examples/fluids/qfunctions/advection2d.h @@ -11,8 +11,8 @@ #ifndef advection2d_h #define advection2d_h -#include #include +#include #include "utils.h" typedef struct SetupContext_ *SetupContext; diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index b4b99bc463..6fb44bc529 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -12,8 +12,8 @@ #ifndef blasius_h #define blasius_h -#include #include +#include #include "newtonian_types.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 5b95052f41..443f18da30 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -12,10 +12,10 @@ #ifndef channel_h #define channel_h +#include #include -#include -#include "newtonian_types.h" #include "newtonian_state.h" +#include "newtonian_types.h" #include "utils.h" typedef struct ChannelContext_ *ChannelContext; diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index 46ded17076..cb431c3eaa 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -15,10 +15,10 @@ #ifndef densitycurrent_h #define densitycurrent_h -#include #include -#include "newtonian_types.h" +#include #include "newtonian_state.h" +#include "newtonian_types.h" #include "utils.h" typedef struct DensityCurrentContext_ *DensityCurrentContext; diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h index be4937378b..ef6c55cbbd 100644 --- a/examples/fluids/qfunctions/eulervortex.h +++ b/examples/fluids/qfunctions/eulervortex.h @@ -16,8 +16,8 @@ #ifndef eulervortex_h #define eulervortex_h -#include #include +#include #include "utils.h" typedef struct EulerContext_ *EulerContext; diff --git a/examples/fluids/qfunctions/mass.h b/examples/fluids/qfunctions/mass.h index 45ae3198b1..ab8c1d23d2 100644 --- a/examples/fluids/qfunctions/mass.h +++ b/examples/fluids/qfunctions/mass.h @@ -11,8 +11,8 @@ #ifndef mass_h #define mass_h -#include #include +#include // ***************************************************************************** // This QFunction applies the mass matrix to five interlaced fields. diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 3ca74f003a..cbe455a6c5 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -12,12 +12,12 @@ #ifndef newtonian_h #define newtonian_h -#include #include -#include "newtonian_types.h" +#include #include "newtonian_state.h" -#include "utils.h" +#include "newtonian_types.h" #include "stabilization.h" +#include "utils.h" // ***************************************************************************** // This QFunction sets a "still" initial condition for generic Newtonian IG problems diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h index e1d80e9e84..776c90f03f 100644 --- a/examples/fluids/qfunctions/newtonian_state.h +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -12,8 +12,8 @@ #ifndef newtonian_state_h #define newtonian_state_h -#include #include +#include #include "newtonian_types.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 51d43f5877..879add7d13 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -8,7 +8,7 @@ #ifndef newtonian_types_h #define newtonian_types_h -#include +#include #include "stabilization_types.h" typedef struct SetupContext_ *SetupContext; diff --git a/examples/fluids/qfunctions/setupgeo.h b/examples/fluids/qfunctions/setupgeo.h index 01406fd941..c45c094a94 100644 --- a/examples/fluids/qfunctions/setupgeo.h +++ b/examples/fluids/qfunctions/setupgeo.h @@ -11,8 +11,8 @@ #ifndef setup_geo_h #define setup_geo_h -#include #include +#include // ***************************************************************************** // This QFunction sets up the geometric factors required for integration and diff --git a/examples/fluids/qfunctions/setupgeo2d.h b/examples/fluids/qfunctions/setupgeo2d.h index 55a4b56976..f88fa135e2 100644 --- a/examples/fluids/qfunctions/setupgeo2d.h +++ b/examples/fluids/qfunctions/setupgeo2d.h @@ -11,8 +11,8 @@ #ifndef setup_geo_2d_h #define setup_geo_2d_h -#include #include +#include // ***************************************************************************** // This QFunction sets up the geometric factors required for integration and diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h index e8137f3d94..a074bf8306 100644 --- a/examples/fluids/qfunctions/shocktube.h +++ b/examples/fluids/qfunctions/shocktube.h @@ -25,8 +25,8 @@ #ifndef shocktube_h #define shocktube_h -#include #include +#include #include "utils.h" typedef struct SetupContext_ *SetupContext; diff --git a/examples/fluids/qfunctions/stabilization.h b/examples/fluids/qfunctions/stabilization.h index b472ee66e8..0299612246 100644 --- a/examples/fluids/qfunctions/stabilization.h +++ b/examples/fluids/qfunctions/stabilization.h @@ -12,8 +12,8 @@ #ifndef stabilization_h #define stabilization_h -#include "newtonian_state.h" #include +#include "newtonian_state.h" // ***************************************************************************** // Helper function for computing the variation in primitive variables, diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 6201f43afd..043328f55f 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -16,8 +16,8 @@ #ifndef stg_shur14_h #define stg_shur14_h -#include #include +#include #include #include "stg_shur14_type.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/utils.h b/examples/fluids/qfunctions/utils.h index 25dd2005ed..de8efa7afe 100644 --- a/examples/fluids/qfunctions/utils.h +++ b/examples/fluids/qfunctions/utils.h @@ -8,8 +8,8 @@ #ifndef utils_h #define utils_h -#include #include +#include #ifndef M_PI #define M_PI 3.14159265358979323846 diff --git a/examples/mfem/bp1.h b/examples/mfem/bp1.h index 8fafb4e306..fdfb020d6b 100644 --- a/examples/mfem/bp1.h +++ b/examples/mfem/bp1.h @@ -8,6 +8,8 @@ #ifndef bp1_h #define bp1_h +#include + /// A structure used to pass additional data to f_build_mass struct BuildContext { CeedInt dim, space_dim; }; diff --git a/examples/mfem/bp3.h b/examples/mfem/bp3.h index fe21b74562..10c95fae8c 100644 --- a/examples/mfem/bp3.h +++ b/examples/mfem/bp3.h @@ -8,6 +8,8 @@ #ifndef bp3_h #define bp3_h +#include + /// A structure used to pass additional data to f_build_diff and f_apply_diff struct BuildContext { CeedInt dim, space_dim; }; diff --git a/examples/nek/bps/bps.h b/examples/nek/bps/bps.h index b2622bab92..e878d5dac8 100644 --- a/examples/nek/bps/bps.h +++ b/examples/nek/bps/bps.h @@ -8,6 +8,7 @@ #ifndef bps_h #define bps_h +#include #include #ifndef M_PI diff --git a/examples/petsc/qfunctions/area/areacube.h b/examples/petsc/qfunctions/area/areacube.h index 5762b71441..5cbc30d3d9 100644 --- a/examples/petsc/qfunctions/area/areacube.h +++ b/examples/petsc/qfunctions/area/areacube.h @@ -11,6 +11,7 @@ #ifndef areacube_h #define areacube_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/area/areasphere.h b/examples/petsc/qfunctions/area/areasphere.h index 3af4af097f..fc2b6bc80d 100644 --- a/examples/petsc/qfunctions/area/areasphere.h +++ b/examples/petsc/qfunctions/area/areasphere.h @@ -11,6 +11,7 @@ #ifndef areasphere_h #define areasphere_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp1.h b/examples/petsc/qfunctions/bps/bp1.h index 83e3300301..a9a632767c 100644 --- a/examples/petsc/qfunctions/bps/bp1.h +++ b/examples/petsc/qfunctions/bps/bp1.h @@ -11,6 +11,7 @@ #ifndef bp1_h #define bp1_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp1sphere.h b/examples/petsc/qfunctions/bps/bp1sphere.h index febe0aad9a..a7eb660990 100644 --- a/examples/petsc/qfunctions/bps/bp1sphere.h +++ b/examples/petsc/qfunctions/bps/bp1sphere.h @@ -11,6 +11,7 @@ #ifndef bp1sphere_h #define bp1sphere_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp2.h b/examples/petsc/qfunctions/bps/bp2.h index 92a2affdf1..3a94a46114 100644 --- a/examples/petsc/qfunctions/bps/bp2.h +++ b/examples/petsc/qfunctions/bps/bp2.h @@ -11,6 +11,7 @@ #ifndef bp2_h #define bp2_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp2sphere.h b/examples/petsc/qfunctions/bps/bp2sphere.h index ce090c67f0..4150e9888d 100644 --- a/examples/petsc/qfunctions/bps/bp2sphere.h +++ b/examples/petsc/qfunctions/bps/bp2sphere.h @@ -11,6 +11,7 @@ #ifndef bp2sphere_h #define bp2sphere_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp3.h b/examples/petsc/qfunctions/bps/bp3.h index ae247ca5c1..3086af17b7 100644 --- a/examples/petsc/qfunctions/bps/bp3.h +++ b/examples/petsc/qfunctions/bps/bp3.h @@ -11,6 +11,7 @@ #ifndef bp3_h #define bp3_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp3sphere.h b/examples/petsc/qfunctions/bps/bp3sphere.h index 7d9539b030..b12fa27aea 100644 --- a/examples/petsc/qfunctions/bps/bp3sphere.h +++ b/examples/petsc/qfunctions/bps/bp3sphere.h @@ -20,6 +20,7 @@ #ifndef bp3sphere_h #define bp3sphere_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp4.h b/examples/petsc/qfunctions/bps/bp4.h index 5561cda5e8..8f57030854 100644 --- a/examples/petsc/qfunctions/bps/bp4.h +++ b/examples/petsc/qfunctions/bps/bp4.h @@ -11,6 +11,7 @@ #ifndef bp4_h #define bp4_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/bp4sphere.h b/examples/petsc/qfunctions/bps/bp4sphere.h index e44e6c96cf..5858351bc9 100644 --- a/examples/petsc/qfunctions/bps/bp4sphere.h +++ b/examples/petsc/qfunctions/bps/bp4sphere.h @@ -11,6 +11,7 @@ #ifndef bp4sphere_h #define bp4sphere_h +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/petsc/qfunctions/bps/common.h b/examples/petsc/qfunctions/bps/common.h index bfdfe3d36c..40da6af0fa 100644 --- a/examples/petsc/qfunctions/bps/common.h +++ b/examples/petsc/qfunctions/bps/common.h @@ -11,6 +11,8 @@ #ifndef common_h #define common_h +#include + // ----------------------------------------------------------------------------- CEED_QFUNCTION(Error)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/examples/solids/qfunctions/common.h b/examples/solids/qfunctions/common.h index 04e69846e5..b49a8ea256 100644 --- a/examples/solids/qfunctions/common.h +++ b/examples/solids/qfunctions/common.h @@ -11,6 +11,8 @@ #ifndef COMMON_H #define COMMON_H +#include + // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factors required for integration and // coordinate transformations diff --git a/examples/solids/qfunctions/constant-force.h b/examples/solids/qfunctions/constant-force.h index 6f958e90ea..8cc51a2227 100644 --- a/examples/solids/qfunctions/constant-force.h +++ b/examples/solids/qfunctions/constant-force.h @@ -11,6 +11,7 @@ #ifndef CONSTANT_H #define CONSTANT_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/finite-strain-mooney-rivlin-initial-1.h b/examples/solids/qfunctions/finite-strain-mooney-rivlin-initial-1.h index 9f9bcea518..c12a752049 100644 --- a/examples/solids/qfunctions/finite-strain-mooney-rivlin-initial-1.h +++ b/examples/solids/qfunctions/finite-strain-mooney-rivlin-initial-1.h @@ -11,6 +11,7 @@ #ifndef ELAS_FSInitialMR1_H #define ELAS_FSInitialMR1_H +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/solids/qfunctions/finite-strain-neo-hookean-current-1.h b/examples/solids/qfunctions/finite-strain-neo-hookean-current-1.h index 567fe19bc5..35b26930db 100644 --- a/examples/solids/qfunctions/finite-strain-neo-hookean-current-1.h +++ b/examples/solids/qfunctions/finite-strain-neo-hookean-current-1.h @@ -11,6 +11,7 @@ #ifndef ELAS_FSCurrentNH1_H #define ELAS_FSCurrentNH1_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/finite-strain-neo-hookean-current-2.h b/examples/solids/qfunctions/finite-strain-neo-hookean-current-2.h index 0bfaed022a..3ae4291fc5 100644 --- a/examples/solids/qfunctions/finite-strain-neo-hookean-current-2.h +++ b/examples/solids/qfunctions/finite-strain-neo-hookean-current-2.h @@ -11,6 +11,7 @@ #ifndef ELAS_FSCurrentNH2_H #define ELAS_FSCurrentNH2_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/finite-strain-neo-hookean-initial-1.h b/examples/solids/qfunctions/finite-strain-neo-hookean-initial-1.h index 74b0db2c00..c44eb463e3 100644 --- a/examples/solids/qfunctions/finite-strain-neo-hookean-initial-1.h +++ b/examples/solids/qfunctions/finite-strain-neo-hookean-initial-1.h @@ -11,6 +11,7 @@ #ifndef ELAS_FSInitialNH1_H #define ELAS_FSInitialNH1_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/finite-strain-neo-hookean-initial-2.h b/examples/solids/qfunctions/finite-strain-neo-hookean-initial-2.h index 3a55f1471e..09eee03306 100644 --- a/examples/solids/qfunctions/finite-strain-neo-hookean-initial-2.h +++ b/examples/solids/qfunctions/finite-strain-neo-hookean-initial-2.h @@ -11,6 +11,7 @@ #ifndef ELAS_FSInitialNH2_H #define ELAS_FSInitialNH2_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/linear.h b/examples/solids/qfunctions/linear.h index b1d1cd91b8..3b11d3dc1a 100644 --- a/examples/solids/qfunctions/linear.h +++ b/examples/solids/qfunctions/linear.h @@ -11,6 +11,7 @@ #ifndef ELAS_LINEAR_H #define ELAS_LINEAR_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/manufactured-force.h b/examples/solids/qfunctions/manufactured-force.h index 433fd5c293..586a8f675c 100644 --- a/examples/solids/qfunctions/manufactured-force.h +++ b/examples/solids/qfunctions/manufactured-force.h @@ -11,6 +11,7 @@ #ifndef MANUFACTURED_H #define MANUFACTURED_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/manufactured-true.h b/examples/solids/qfunctions/manufactured-true.h index 59c9c3a84a..0268e89e40 100644 --- a/examples/solids/qfunctions/manufactured-true.h +++ b/examples/solids/qfunctions/manufactured-true.h @@ -11,6 +11,7 @@ #ifndef MANUFACTURED_TRUE_H #define MANUFACTURED_TRUE_H +#include #include // ----------------------------------------------------------------------------- diff --git a/examples/solids/qfunctions/small-strain-neo-hookean.h b/examples/solids/qfunctions/small-strain-neo-hookean.h index a29b4ad283..79bce231b5 100644 --- a/examples/solids/qfunctions/small-strain-neo-hookean.h +++ b/examples/solids/qfunctions/small-strain-neo-hookean.h @@ -11,6 +11,7 @@ #ifndef ELAS_SS_NH_H #define ELAS_SS_NH_H +#include #include #ifndef PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/traction-boundary.h b/examples/solids/qfunctions/traction-boundary.h index b223a54b6b..d2326ebe4c 100644 --- a/examples/solids/qfunctions/traction-boundary.h +++ b/examples/solids/qfunctions/traction-boundary.h @@ -11,6 +11,8 @@ #ifndef TRACTION_BOUNDARY_H #define TRACTION_BOUNDARY_H +#include + // ----------------------------------------------------------------------------- // This QFunction computes the surface integral of the user traction vector on // the constrained faces. diff --git a/include/ceed/ceed-f32.h b/include/ceed/ceed-f32.h index 81b5c668c9..7ee7b43f8c 100644 --- a/include/ceed/ceed-f32.h +++ b/include/ceed/ceed-f32.h @@ -1,9 +1,9 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed +/// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +/// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +/// +/// SPDX-License-Identifier: BSD-2-Clause +/// +/// This file is part of CEED: http://github.com/ceed /// @file /// Public header for definitions related to using FP32 floating point (single diff --git a/include/ceed/ceed-f64.h b/include/ceed/ceed-f64.h index d1fec2d68b..fb557df17d 100644 --- a/include/ceed/ceed-f64.h +++ b/include/ceed/ceed-f64.h @@ -1,9 +1,9 @@ -// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. -// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. -// -// SPDX-License-Identifier: BSD-2-Clause -// -// This file is part of CEED: http://github.com/ceed +/// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +/// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +/// +/// SPDX-License-Identifier: BSD-2-Clause +/// +/// This file is part of CEED: http://github.com/ceed /// @file /// Public header for definitions related to using FP64 floating point (double diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index 322e68f879..47f52a7b10 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -64,94 +64,12 @@ # define CEED_EXTERN extern CEED_VISIBILITY(default) #endif -/** - @ingroup CeedQFunction - This macro populates the correct function annotations for User QFunction - source for code generation backends or populates default values for CPU - backends. It also creates a variable `name_loc` populated with the correct - source path for creating the respective User QFunction. -**/ -#ifndef CEED_QFUNCTION -#define CEED_QFUNCTION(name) \ - static const char name ## _loc[] = __FILE__ ":" #name; \ - static int name -#endif - -/** - @ingroup CeedQFunction - This macro populates the correct function annotations for User QFunction - helper function source for code generation backends or populates default - values for CPU backends. -**/ -#ifndef CEED_QFUNCTION_HELPER -#define CEED_QFUNCTION_HELPER static inline -#endif - -/** - @ingroup CeedQFunction - Using VLA syntax to reshape User QFunction inputs and outputs can make - user code more readable. VLA is a C99 feature that is not supported by - the C++ dialect used by CUDA. This macro allows users to use the VLA - syntax with the CUDA backends. -**/ -#ifndef CEED_Q_VLA -# define CEED_Q_VLA Q -#endif - -/** - @ingroup Ceed - This macro provides the appropriate SIMD Pragma for the compilation - environment. Code generation backends may redefine this macro, as needed. -**/ -#ifndef CeedPragmaSIMD -# if defined(__INTEL_COMPILER) -# define CeedPragmaSIMD _Pragma("vector") -// Cannot use Intel pragma ivdep because it miscompiles unpacking symmetric tensors, as in -// Poisson2DApply, where the SIMD loop body contains temporaries such as the following. -// -// const CeedScalar dXdxdXdxT[2][2] = {{qd[i+0*Q], qd[i+2*Q]}, -// {qd[i+2*Q], qd[i+1*Q]}}; -// for (int j=0; j<2; j++) -// vg[i+j*Q] = (du[0] * dXdxdXdxT[0][j] + du[1] * dXdxdXdxT[1][j]); -// -// Miscompilation with pragma ivdep observed with icc (ICC) 19.0.5.281 20190815 -// at -O2 and above. -# elif defined(__GNUC__) && __GNUC__ >= 5 -# define CeedPragmaSIMD _Pragma("GCC ivdep") -# elif defined(_OPENMP) && _OPENMP >= 201307 // OpenMP-4.0 (July, 2013) -# define CeedPragmaSIMD _Pragma("omp simd") -# else -# define CeedPragmaSIMD -# endif -#endif - #include #include -#include -#include #include -/// Integer type, used for indexing -/// @ingroup Ceed -typedef int32_t CeedInt; -#define CeedInt_FMT "d" - -/// Integer type, used array sizes -/// @ingroup Ceed -typedef ptrdiff_t CeedSize; - -/// Scalar (floating point) types -/// -/// @ingroup Ceed -typedef enum { - /// Single precision - CEED_SCALAR_FP32, - /// Double precision - CEED_SCALAR_FP64 -} CeedScalarType; -/// Base scalar type for the library to use: change which header is -/// included to change the precision. -#include "ceed-f64.h" +/// Typedefs and macros used in public interfaces and user QFunction source +#include "types.h" /// Library context created by CeedInit() /// @ingroup CeedUser @@ -275,34 +193,6 @@ CEED_EXTERN int CeedGetVersion(int *major, int *minor, int *patch, CEED_EXTERN int CeedGetScalarType(CeedScalarType *scalar_type); -/// Ceed Errors -/// -/// This enum is used to specify the type of error returned by a function. -/// A zero error code is success, negative error codes indicate terminal errors -/// and positive error codes indicate nonterminal errors. With nonterminal errors -/// the object state has not been modifiend, but with terminal errors the object -/// data is likely modified or corrupted. -/// @ingroup Ceed -typedef enum { - /// Success error code - CEED_ERROR_SUCCESS = 0, - /// Minor error, generic - CEED_ERROR_MINOR = 1, - /// Minor error, dimension mismatch in inputs - CEED_ERROR_DIMENSION = 2, - /// Minor error, incomplete object setup - CEED_ERROR_INCOMPLETE = 3, - /// Minor error, incompatible arguments/configuration - CEED_ERROR_INCOMPATIBLE = 4, - /// Minor error, access lock problem - CEED_ERROR_ACCESS = 5, - /// Major error, generic - CEED_ERROR_MAJOR = -1, - /// Major error, internal backend error - CEED_ERROR_BACKEND = -2, - /// Major error, operation unsupported by current backend - CEED_ERROR_UNSUPPORTED = -3, -} CeedErrorType; CEED_EXTERN const char *const *CeedErrorTypes; /// Specify memory type diff --git a/include/ceed/jit-source/cuda/cuda-jit.h b/include/ceed/jit-source/cuda/cuda-jit.h new file mode 100644 index 0000000000..d0ebc2d842 --- /dev/null +++ b/include/ceed/jit-source/cuda/cuda-jit.h @@ -0,0 +1,22 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +/// @file +/// Internal header for CUDA backend macro and type definitions for JiT source +#ifndef _ceed_cuda_jit_defs_h +#define _ceed_cuda_jit_defs_h + +#define CEED_QFUNCTION(name) inline __device__ int name +#define CEED_QFUNCTION_HELPER inline __device__ +#define CeedPragmaSIMD +#define CEED_Q_VLA 1 + +#include + +typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda; + +#endif diff --git a/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h index 63fa1b08aa..1a21204720 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h +++ b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Non-Tensor Basis Kernels diff --git a/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h index 446a871d4c..6898ef8987 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h +++ b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Tensor Basis Kernels diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h index 6955d74efb..b7c69b0e33 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h +++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Diagonal assembly kernels diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h index 2cbc185ecd..6f975c924b 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h +++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Matrix assembly kernel for low-order elements (2D thread block) diff --git a/include/ceed/jit-source/cuda/cuda-ref-qfunction.h b/include/ceed/jit-source/cuda/cuda-ref-qfunction.h index 80dbd982ab..6ad3676282 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-qfunction.h +++ b/include/ceed/jit-source/cuda/cuda-ref-qfunction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include template //------------------------------------------------------------------------------ diff --git a/include/ceed/jit-source/cuda/cuda-ref-restriction.h b/include/ceed/jit-source/cuda/cuda-ref-restriction.h index 81f9f24ba2..0f465d8f05 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-restriction.h +++ b/include/ceed/jit-source/cuda/cuda-ref-restriction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, strided diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis.h b/include/ceed/jit-source/cuda/cuda-shared-basis.h index 79825ea6a9..5f9185b4b7 100644 --- a/include/ceed/jit-source/cuda/cuda-shared-basis.h +++ b/include/ceed/jit-source/cuda/cuda-shared-basis.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Shared mem kernels diff --git a/include/ceed/jit-source/gallery/ceed-identity.h b/include/ceed/jit-source/gallery/ceed-identity.h index d8c0502840..e7dbc2ddca 100644 --- a/include/ceed/jit-source/gallery/ceed-identity.h +++ b/include/ceed/jit-source/gallery/ceed-identity.h @@ -12,6 +12,8 @@ #ifndef identity_h #define identity_h +#include + typedef struct { CeedInt size; } IdentityCtx; diff --git a/include/ceed/jit-source/gallery/ceed-mass1dbuild.h b/include/ceed/jit-source/gallery/ceed-mass1dbuild.h index 48e9fa2068..e9034b0db0 100644 --- a/include/ceed/jit-source/gallery/ceed-mass1dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass1dbuild.h @@ -12,6 +12,8 @@ #ifndef mass1dbuild_h #define mass1dbuild_h +#include + CEED_QFUNCTION(Mass1DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is Jacobians, size (Q) diff --git a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h index d8135727b0..3ba4563450 100644 --- a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h @@ -12,6 +12,8 @@ #ifndef mass2dbuild_h #define mass2dbuild_h +#include + CEED_QFUNCTION(Mass2DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* diff --git a/include/ceed/jit-source/gallery/ceed-mass3dbuild.h b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h index 30a06bb773..dc3ce56da5 100644 --- a/include/ceed/jit-source/gallery/ceed-mass3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h @@ -12,6 +12,8 @@ #ifndef mass3dbuild_h #define mass3dbuild_h +#include + CEED_QFUNCTION(Mass3DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* diff --git a/include/ceed/jit-source/gallery/ceed-massapply.h b/include/ceed/jit-source/gallery/ceed-massapply.h index 1c48186b0d..ff4e6d8ec0 100644 --- a/include/ceed/jit-source/gallery/ceed-massapply.h +++ b/include/ceed/jit-source/gallery/ceed-massapply.h @@ -12,6 +12,8 @@ #ifndef massapply_h #define massapply_h +#include + CEED_QFUNCTION(MassApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is u, size (Q) diff --git a/include/ceed/jit-source/gallery/ceed-poisson1dapply.h b/include/ceed/jit-source/gallery/ceed-poisson1dapply.h index 3f9aaa12fa..9a6b885ec8 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson1dapply.h +++ b/include/ceed/jit-source/gallery/ceed-poisson1dapply.h @@ -12,6 +12,8 @@ #ifndef poisson1dapply_h #define poisson1dapply_h +#include + CEED_QFUNCTION(Poisson1DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h index 1d15ee76ee..e96300e608 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h @@ -12,6 +12,8 @@ #ifndef poisson1dbuild_h #define poisson1dbuild_h +#include + CEED_QFUNCTION(Poisson1DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-poisson2dapply.h b/include/ceed/jit-source/gallery/ceed-poisson2dapply.h index a5c030fdc7..d3b1c14b0c 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson2dapply.h +++ b/include/ceed/jit-source/gallery/ceed-poisson2dapply.h @@ -12,6 +12,8 @@ #ifndef poisson2dapply_h #define poisson2dapply_h +#include + CEED_QFUNCTION(Poisson2DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h index 6361ff1e5b..69d55aeac1 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h @@ -12,6 +12,8 @@ #ifndef poisson2dbuild_h #define poisson2dbuild_h +#include + CEED_QFUNCTION(Poisson2DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-poisson3dapply.h b/include/ceed/jit-source/gallery/ceed-poisson3dapply.h index 4280cf2a7f..48d9216a7d 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson3dapply.h +++ b/include/ceed/jit-source/gallery/ceed-poisson3dapply.h @@ -13,6 +13,8 @@ #ifndef poisson3dapply_h #define poisson3dapply_h +#include + CEED_QFUNCTION(Poisson3DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h index f453f998f6..7dbbe57167 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h @@ -13,6 +13,8 @@ #ifndef poisson3dbuild_h #define poisson3dbuild_h +#include + CEED_QFUNCTION(Poisson3DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-scale.h b/include/ceed/jit-source/gallery/ceed-scale.h index 3ae6da19c7..249fac095d 100644 --- a/include/ceed/jit-source/gallery/ceed-scale.h +++ b/include/ceed/jit-source/gallery/ceed-scale.h @@ -12,6 +12,8 @@ #ifndef scale_h #define scale_h +#include + CEED_QFUNCTION(Scale)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Ctx holds field size diff --git a/include/ceed/jit-source/gallery/ceed-vectormassapply.h b/include/ceed/jit-source/gallery/ceed-vectormassapply.h index f7dd19121b..b20218175d 100644 --- a/include/ceed/jit-source/gallery/ceed-vectormassapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectormassapply.h @@ -13,6 +13,8 @@ #ifndef vectormassapply_h #define vectormassapply_h +#include + CEED_QFUNCTION(Vector3MassApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h index 548f0d4c10..19d9b278fc 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h @@ -13,6 +13,8 @@ #ifndef vectorpoisson1dapply_h #define vectorpoisson1dapply_h +#include + CEED_QFUNCTION(Vector3Poisson1DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h index 0d375c4544..6588e65e7f 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h @@ -13,6 +13,8 @@ #ifndef vectorpoisson2dapply_h #define vectorpoisson2dapply_h +#include + CEED_QFUNCTION(Vector3Poisson2DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h index 4f4be5dfb3..541723dec6 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h @@ -14,6 +14,8 @@ #ifndef vectorpoisson3dapply_h #define vectorpoisson3dapply_h +#include + CEED_QFUNCTION(Vector3Poisson3DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/include/ceed/jit-source/hip/hip-jit.h b/include/ceed/jit-source/hip/hip-jit.h new file mode 100644 index 0000000000..874362fc1d --- /dev/null +++ b/include/ceed/jit-source/hip/hip-jit.h @@ -0,0 +1,22 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +/// @file +/// Internal header for HIP backend macro and type definitions for JiT source +#ifndef _ceed_hip_jit_h +#define _ceed_hip_jit_h + +#define CEED_QFUNCTION(name) inline __device__ int name +#define CEED_QFUNCTION_HELPER inline __device__ +#define CeedPragmaSIMD +#define CEED_Q_VLA 1 + +#include + +typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Hip; + +#endif diff --git a/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h b/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h index 89d59c4c98..bc74ed38a9 100644 --- a/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h +++ b/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Non-Tensor Basis Kernels diff --git a/include/ceed/jit-source/hip/hip-ref-basis-tensor.h b/include/ceed/jit-source/hip/hip-ref-basis-tensor.h index f865431e88..2dc68743cc 100644 --- a/include/ceed/jit-source/hip/hip-ref-basis-tensor.h +++ b/include/ceed/jit-source/hip/hip-ref-basis-tensor.h @@ -5,7 +5,8 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include + //------------------------------------------------------------------------------ // Tensor Basis Kernels //------------------------------------------------------------------------------ diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h index 622a159bad..e2ac083b52 100644 --- a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h +++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Diagonal assembly kernels diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h index 2cbc185ecd..6f975c924b 100644 --- a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h +++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Matrix assembly kernel for low-order elements (2D thread block) diff --git a/include/ceed/jit-source/hip/hip-ref-qfunction.h b/include/ceed/jit-source/hip/hip-ref-qfunction.h index 80dbd982ab..6ad3676282 100644 --- a/include/ceed/jit-source/hip/hip-ref-qfunction.h +++ b/include/ceed/jit-source/hip/hip-ref-qfunction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include template //------------------------------------------------------------------------------ diff --git a/include/ceed/jit-source/hip/hip-ref-restriction.h b/include/ceed/jit-source/hip/hip-ref-restriction.h index 6b09e64d09..598bb6b402 100644 --- a/include/ceed/jit-source/hip/hip-ref-restriction.h +++ b/include/ceed/jit-source/hip/hip-ref-restriction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, strided diff --git a/include/ceed/jit-source/hip/hip-shared-basis.h b/include/ceed/jit-source/hip/hip-shared-basis.h index 769419473f..c9076260a2 100644 --- a/include/ceed/jit-source/hip/hip-shared-basis.h +++ b/include/ceed/jit-source/hip/hip-shared-basis.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include //------------------------------------------------------------------------------ // Shared mem kernels diff --git a/include/ceed/types.h b/include/ceed/types.h new file mode 100644 index 0000000000..c7cc95de9a --- /dev/null +++ b/include/ceed/types.h @@ -0,0 +1,128 @@ +/// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +/// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +/// +/// SPDX-License-Identifier: BSD-2-Clause +/// +/// This file is part of CEED: http://github.com/ceed + +/// @file +/// Public header for types and macros used in user QFunction source code +#ifndef _ceed_qfunction_defs_h +#define _ceed_qfunction_defs_h + +#include +#include + +/** + @ingroup CeedQFunction + This macro populates the correct function annotations for User QFunction + source for code generation backends or populates default values for CPU + backends. It also creates a variable `name_loc` populated with the correct + source path for creating the respective User QFunction. +**/ +#ifndef CEED_QFUNCTION +#define CEED_QFUNCTION(name) \ + static const char name ## _loc[] = __FILE__ ":" #name; \ + static int name +#endif + +/** + @ingroup CeedQFunction + This macro populates the correct function annotations for User QFunction + helper function source for code generation backends or populates default + values for CPU backends. +**/ +#ifndef CEED_QFUNCTION_HELPER +#define CEED_QFUNCTION_HELPER static inline +#endif + +/** + @ingroup CeedQFunction + Using VLA syntax to reshape User QFunction inputs and outputs can make + user code more readable. VLA is a C99 feature that is not supported by + the C++ dialect used by CUDA. This macro allows users to use the VLA + syntax with the CUDA backends. +**/ +#ifndef CEED_Q_VLA +# define CEED_Q_VLA Q +#endif + +/** + @ingroup Ceed + This macro provides the appropriate SIMD Pragma for the compilation + environment. Code generation backends may redefine this macro, as needed. +**/ +#ifndef CeedPragmaSIMD +# if defined(__INTEL_COMPILER) +# define CeedPragmaSIMD _Pragma("vector") +/// Cannot use Intel pragma ivdep because it miscompiles unpacking symmetric tensors, as in +/// Poisson2DApply, where the SIMD loop body contains temporaries such as the following. +/// +/// const CeedScalar dXdxdXdxT[2][2] = {{qd[i+0*Q], qd[i+2*Q]}, +/// {qd[i+2*Q], qd[i+1*Q]}}; +/// for (int j=0; j<2; j++) +/// vg[i+j*Q] = (du[0] * dXdxdXdxT[0][j] + du[1] * dXdxdXdxT[1][j]); +/// +/// Miscompilation with pragma ivdep observed with icc (ICC) 19.0.5.281 20190815 +/// at -O2 and above. +# elif defined(__GNUC__) && __GNUC__ >= 5 +# define CeedPragmaSIMD _Pragma("GCC ivdep") +# elif defined(_OPENMP) && _OPENMP >= 201307 // OpenMP-4.0 (July, 2013) +# define CeedPragmaSIMD _Pragma("omp simd") +# else +# define CeedPragmaSIMD +# endif +#endif + +/// Integer type, used for indexing +/// @ingroup Ceed +typedef int32_t CeedInt; +#define CeedInt_FMT "d" + +/// Integer type, used array sizes +/// @ingroup Ceed +typedef ptrdiff_t CeedSize; + +/// Scalar (floating point) types +/// +/// @ingroup Ceed +typedef enum { + /// Single precision + CEED_SCALAR_FP32, + /// Double precision + CEED_SCALAR_FP64 +} CeedScalarType; +/// Base scalar type for the library to use: change which header is +/// included to change the precision. +#include "ceed-f64.h" + +/// Ceed Errors +/// +/// This enum is used to specify the type of error returned by a function. +/// A zero error code is success, negative error codes indicate terminal errors +/// and positive error codes indicate nonterminal errors. With nonterminal errors +/// the object state has not been modifiend, but with terminal errors the object +/// data is likely modified or corrupted. +/// @ingroup Ceed +typedef enum { + /// Success error code + CEED_ERROR_SUCCESS = 0, + /// Minor error, generic + CEED_ERROR_MINOR = 1, + /// Minor error, dimension mismatch in inputs + CEED_ERROR_DIMENSION = 2, + /// Minor error, incomplete object setup + CEED_ERROR_INCOMPLETE = 3, + /// Minor error, incompatible arguments/configuration + CEED_ERROR_INCOMPATIBLE = 4, + /// Minor error, access lock problem + CEED_ERROR_ACCESS = 5, + /// Major error, generic + CEED_ERROR_MAJOR = -1, + /// Major error, internal backend error + CEED_ERROR_BACKEND = -2, + /// Major error, operation unsupported by current backend + CEED_ERROR_UNSUPPORTED = -3, +} CeedErrorType; + +#endif diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c index 3de7d75a41..00f6097a6c 100644 --- a/interface/ceed-jit-tools.c +++ b/interface/ceed-jit-tools.c @@ -122,13 +122,14 @@ int CeedLoadSourceToInitializedBuffer(Ceed ceed, // -- Check for 'include' keyword const char *next_e = strchr(first_hash, 'e'); char keyword[8] = ""; - if (next_e && next_e - first_hash >= 7) - memcpy(keyword, &next_e[-6], 7); + if (next_e && next_e - first_hash >= 7) memcpy(keyword, &next_e[-6], 7); bool is_hash_include = !strcmp(keyword, "include"); // ---- Spaces allowed in '# include ' - if (next_e) - for (CeedInt i = 1; first_hash - next_e + i < -6; i++) + if (next_e) { + for (CeedInt i = 1; first_hash - next_e + i < -6; i++) { is_hash_include &= first_hash[i] == ' '; + } + } if (is_hash_include) { // -- Copy into buffer all preceding # long current_size = strlen(*buffer); @@ -140,23 +141,42 @@ int CeedLoadSourceToInitializedBuffer(Ceed ceed, // -- Load local "header.h" char *next_quote = strchr(first_hash, '"'); char *next_new_line = strchr(first_hash, '\n'); - bool is_local_header = is_hash_include && next_quote - && (next_new_line - next_quote > 0); - if (is_local_header) { + bool is_local_header = is_hash_include && next_quote && + (next_new_line - next_quote > 0); + char *next_left_chevron = strchr(first_hash, '<'); + bool is_ceed_header = is_hash_include && next_left_chevron && + (next_new_line - next_left_chevron > 0) && + (!strncmp(next_left_chevron, "", 14) || + !strncmp(next_left_chevron, "", 17) || + !strncmp(next_left_chevron, "", 17)); + if (is_local_header || is_ceed_header) { // ---- Build source path char *include_source_path; - long root_length = strrchr(source_file_path, '/') - source_file_path; - long include_file_name_len = strchr(&next_quote[1], '"') - next_quote - 1; - ierr = CeedCalloc(root_length + include_file_name_len + 2, - &include_source_path); CeedChk(ierr); - memcpy(include_source_path, source_file_path, root_length + 1); - memcpy(&include_source_path[root_length + 1], &next_quote[1], - include_file_name_len); - memcpy(&include_source_path[root_length + include_file_name_len + 1], "", 1); + if (is_local_header) { + long root_length = strrchr(source_file_path, '/') - source_file_path; + long include_file_name_len = strchr(&next_quote[1], '"') - next_quote - 1; + ierr = CeedCalloc(root_length + include_file_name_len + 2, + &include_source_path); CeedChk(ierr); + memcpy(include_source_path, source_file_path, root_length + 1); + memcpy(&include_source_path[root_length + 1], &next_quote[1], + include_file_name_len); + memcpy(&include_source_path[root_length + include_file_name_len + 1], "", 1); + } else { + char *next_right_chevron = strchr(first_hash, '>'); + char *ceed_relative_path; + long ceed_relative_path_length = next_right_chevron - next_left_chevron - 1; + ierr = CeedCalloc(ceed_relative_path_length + 1, &ceed_relative_path); + CeedChk(ierr); + memcpy(ceed_relative_path, &next_left_chevron[1], ceed_relative_path_length); + ierr = CeedGetJitAbsolutePath(ceed, ceed_relative_path, &include_source_path); + CeedChk(ierr); + } // ---- Recursive call to load source to buffer - ierr = CeedLoadSourceToInitializedBuffer(ceed, include_source_path, buffer); CeedDebug256(ceed, 2, "JiT Including: %s\n", include_source_path); CeedChk(ierr); + ierr = CeedLoadSourceToInitializedBuffer(ceed, include_source_path, buffer); + CeedChk(ierr); ierr = CeedFree(&include_source_path); CeedChk(ierr); } file_offset = strchr(first_hash, '\n') - temp_buffer + 1; @@ -295,7 +315,7 @@ int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path, CeedDebug256(ceed, 1, "Checking JiT root: "); CeedDebug(ceed, "%s\n", ceed_parent->jit_source_roots[i]); - // Build and check absolute path with current root + // Build and check absolute path with current root ierr = CeedPathConcatenate(ceed, ceed_parent->jit_source_roots[i], relative_file_path, absolute_file_path); CeedChk(ierr); diff --git a/python/build_ceed_cffi.py b/python/build_ceed_cffi.py index f467eff697..1b82fd811e 100644 --- a/python/build_ceed_cffi.py +++ b/python/build_ceed_cffi.py @@ -15,37 +15,44 @@ # ------------------------------------------------------------------------------ # Provide C definitions to CFFI # ------------------------------------------------------------------------------ -with open(os.path.abspath("include/ceed/ceed.h")) as f: - lines = [line.strip() for line in f if - not (line.startswith("#") and not line.startswith("#include")) and - not line.startswith(" static") and - "CeedErrorImpl" not in line and - "const char *, ...);" not in line and - not line.startswith("CEED_EXTERN const char *const") and - not ceed_version_ge.match(line)] - lines = [line.replace("CEED_EXTERN", "extern") for line in lines] - # Find scalar type inclusion line and insert definitions - for line in lines: - if re.search("ceed-f32.h", line) is not None: - insert_index = lines.index(line) + 1 - extra_lines = ['typedef float CeedScalar;'] - extra_lines.append('static const int CEED_SCALAR_TYPE;') - extra_lines.append('static const double CEED_EPSILON;') - elif re.search("ceed-f64.h", line) is not None: - insert_index = lines.index(line) + 1 - extra_lines = ['typedef double CeedScalar;'] - extra_lines.append('static const int CEED_SCALAR_TYPE;') - extra_lines.append('static const double CEED_EPSILON;') - lines[insert_index: insert_index] = extra_lines - # Remove all include statements now that scalar type has been dealt with - lines = [line for line in lines if not line.startswith("#include")] - # Build header from lines - header = '\n'.join(lines) - header = header.split("static inline CeedInt CeedIntPow", 1)[0] - header += '\nextern int CeedVectorGetState(CeedVector, uint64_t*);' - header += '\nextern int CeedElemRestrictionGetELayout(CeedElemRestriction, CeedInt *layout);' - # Note: cffi cannot handle vargs - header = re.sub("va_list", "const char *", header) +lines = [] +for header_path in ["include/ceed/types.h", "include/ceed/ceed.h"]: + with open(os.path.abspath(header_path)) as f: + lines += [line.strip() for line in f if + not (line.startswith("#") and not line.startswith("#include")) and + not line.startswith(" static") and + "CeedErrorImpl" not in line and + "const char *, ...);" not in line and + not line.startswith("CEED_EXTERN const char *const") and + not ceed_version_ge.match(line)] +lines = [line.replace("CEED_EXTERN", "extern") for line in lines] + +# Find scalar type inclusion line and insert definitions +for line in lines: + if re.search("ceed-f32.h", line) is not None: + insert_index = lines.index(line) + 1 + extra_lines = ['typedef float CeedScalar;'] + extra_lines.append('static const int CEED_SCALAR_TYPE;') + extra_lines.append('static const double CEED_EPSILON;') + elif re.search("ceed-f64.h", line) is not None: + insert_index = lines.index(line) + 1 + extra_lines = ['typedef double CeedScalar;'] + extra_lines.append('static const int CEED_SCALAR_TYPE;') + extra_lines.append('static const double CEED_EPSILON;') +lines[insert_index: insert_index] = extra_lines + +# Remove all include statements now that scalar type has been dealt with +lines = [line for line in lines if not line.startswith("#include")] + +# Build header from lines +header = '\n'.join(lines) +header = header.split("static inline CeedInt CeedIntPow", 1)[0] +header += '\nextern int CeedVectorGetState(CeedVector, uint64_t*);' +header += '\nextern int CeedElemRestrictionGetELayout(CeedElemRestriction, CeedInt *layout);' + +# Note: cffi cannot handle vargs +header = re.sub("va_list", "const char *", header) + ffibuilder.cdef(header) ffibuilder.set_source("_ceed_cffi", diff --git a/rust/libceed-sys/build.rs b/rust/libceed-sys/build.rs index 8f5d90b079..fe5a50d2df 100644 --- a/rust/libceed-sys/build.rs +++ b/rust/libceed-sys/build.rs @@ -47,6 +47,7 @@ fn main() { // Tell cargo to invalidate the built crate whenever the wrapper changes println!("cargo:rerun-if-changed=c-src/include/ceed/ceed.h"); + println!("cargo:rerun-if-changed=c-src/include/ceed/types.h"); println!("cargo:rerun-if-changed=c-src/Makefile"); if Path::new("c-src/config.mk").is_file() { println!("cargo:rerun-if-changed=c-src/config.mk"); diff --git a/tests/t400-qfunction.h b/tests/t400-qfunction.h index fec131229a..456f1e60e9 100644 --- a/tests/t400-qfunction.h +++ b/tests/t400-qfunction.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t401-qfunction.h b/tests/t401-qfunction.h index 48b0257c0a..b14b4a31d4 100644 --- a/tests/t401-qfunction.h +++ b/tests/t401-qfunction.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t405-qfunction.h b/tests/t405-qfunction.h index 05a1e753fd..029c18c8f7 100644 --- a/tests/t405-qfunction.h +++ b/tests/t405-qfunction.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t406-qfunction-helper.h b/tests/t406-qfunction-helper.h index f7a3d46828..e800d7b064 100644 --- a/tests/t406-qfunction-helper.h +++ b/tests/t406-qfunction-helper.h @@ -8,6 +8,7 @@ #ifndef _helper_h #define _helper_h +#include #include "t406-qfunction-scales.h" CEED_QFUNCTION_HELPER CeedScalar times_two(CeedScalar x) { diff --git a/tests/t406-qfunction.h b/tests/t406-qfunction.h index 80ffb79078..da7dd540f7 100644 --- a/tests/t406-qfunction.h +++ b/tests/t406-qfunction.h @@ -7,6 +7,7 @@ // Note: intentionally testing strange spacing in '#include's #include +#include #include "./t406-qfunction-scales.h" # include "t406-qfunction-helper.h" diff --git a/tests/t409-qfunction.h b/tests/t409-qfunction.h index 70f60bc7b9..1e4291ff61 100644 --- a/tests/t409-qfunction.h +++ b/tests/t409-qfunction.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(scale)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { CeedScalar *scale = (CeedScalar *)ctx; diff --git a/tests/t500-operator.h b/tests/t500-operator.h index 06a18c0484..00c9d52bba 100644 --- a/tests/t500-operator.h +++ b/tests/t500-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t502-operator.h b/tests/t502-operator.h index 3746dc3e7b..fac104dd2a 100644 --- a/tests/t502-operator.h +++ b/tests/t502-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t507-operator.h b/tests/t507-operator.h index cdc8e39be1..2927435d47 100644 --- a/tests/t507-operator.h +++ b/tests/t507-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t510-operator.h b/tests/t510-operator.h index 2270680b77..0bbb388102 100644 --- a/tests/t510-operator.h +++ b/tests/t510-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t522-operator.h b/tests/t522-operator.h index bb761e0d06..3d163c7d4b 100644 --- a/tests/t522-operator.h +++ b/tests/t522-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t530-operator.h b/tests/t530-operator.h index 2270680b77..0bbb388102 100644 --- a/tests/t530-operator.h +++ b/tests/t530-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t531-operator.h b/tests/t531-operator.h index ba009ac2fe..4e02a4ada2 100644 --- a/tests/t531-operator.h +++ b/tests/t531-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t532-operator.h b/tests/t532-operator.h index 88d0dae44d..9ad804ebe7 100644 --- a/tests/t532-operator.h +++ b/tests/t532-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup_mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t534-operator.h b/tests/t534-operator.h index ff2e3102c9..7a76f45ac4 100644 --- a/tests/t534-operator.h +++ b/tests/t534-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t535-operator.h b/tests/t535-operator.h index eb79c03c7e..3fee235b51 100644 --- a/tests/t535-operator.h +++ b/tests/t535-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup_mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t537-operator.h b/tests/t537-operator.h index e4e3ab9ee8..05f5fbc881 100644 --- a/tests/t537-operator.h +++ b/tests/t537-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t540-operator.h b/tests/t540-operator.h index 7eb6b114ed..91c10583f8 100644 --- a/tests/t540-operator.h +++ b/tests/t540-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup_mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t541-operator.h b/tests/t541-operator.h index e002fca8c8..303febe433 100644 --- a/tests/t541-operator.h +++ b/tests/t541-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup_diff)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t566-operator.h b/tests/t566-operator.h index d8d07a9d53..4b00885864 100644 --- a/tests/t566-operator.h +++ b/tests/t566-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/tests/t567-operator.h b/tests/t567-operator.h index a83f391405..56b8132380 100644 --- a/tests/t567-operator.h +++ b/tests/t567-operator.h @@ -5,6 +5,8 @@ // // This file is part of CEED: http://github.com/ceed +#include + CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { From 0df8cb37b9aaa76209859e0efa175841728630e5 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 16 Aug 2022 14:38:57 -0600 Subject: [PATCH 161/172] hip - guard hipblas header include for HIP_VERSION --- backends/hip-ref/ceed-hip-ref-vector.c | 1 - backends/hip-ref/ceed-hip-ref.h | 1 - backends/hip/ceed-hip-common.h | 4 ++++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backends/hip-ref/ceed-hip-ref-vector.c b/backends/hip-ref/ceed-hip-ref-vector.c index 49c6494025..c86a9dd1fc 100644 --- a/backends/hip-ref/ceed-hip-ref-vector.c +++ b/backends/hip-ref/ceed-hip-ref-vector.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include "ceed-hip-ref.h" diff --git a/backends/hip-ref/ceed-hip-ref.h b/backends/hip-ref/ceed-hip-ref.h index a925acde42..7a6e3e1488 100644 --- a/backends/hip-ref/ceed-hip-ref.h +++ b/backends/hip-ref/ceed-hip-ref.h @@ -11,7 +11,6 @@ #include #include #include -#include #include "../hip/ceed-hip-common.h" typedef struct { diff --git a/backends/hip/ceed-hip-common.h b/backends/hip/ceed-hip-common.h index b624ac8bed..f95bccec66 100644 --- a/backends/hip/ceed-hip-common.h +++ b/backends/hip/ceed-hip-common.h @@ -11,7 +11,11 @@ #include #include #include +#if (HIP_VERSION >= 50200000) +#include +#else #include +#endif #define QUOTE(...) #__VA_ARGS__ From 51dd2dfabd269bdf77ee724e13831d4500b8d47c Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Fri, 19 Aug 2022 09:28:39 -0600 Subject: [PATCH 162/172] mailmap - minor formatting fix (#1042) * mailmap - minor formatting fix * mailmap - update for Yohann --- .mailmap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index ca99bc5295..61c3cab2b6 100644 --- a/.mailmap +++ b/.mailmap @@ -21,7 +21,7 @@ Leila Ghaffari <246972+nbeams@users.noreply.github.com> Rey Koki <36133157+reykoki@users.noreply.github.com> Rezgar Shakeri <42816410+rezgarshakeri@users.noreply.github.com> -Thilina Ratnayaka +Thilina Ratnayaka Tzanio Kolev Valeria Barra Valeria Barra <39932030+valeriabarra@users.noreply.github.com> @@ -29,5 +29,5 @@ Valeria Barra Valeria Barra Will Pazner <11493037+pazner@users.noreply.github.com> -Yohann Dudouit -Yohann Dudouit \ No newline at end of file +Yohann Dudouit +Yohann Dudouit From b2aba9807d625f02ac00f4555d4ea9fdfc631e0a Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Mon, 22 Aug 2022 15:29:41 -0700 Subject: [PATCH 163/172] Makefile: support icc_orig (icc --version on quartz) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4348541bfe..cb75f926b5 100644 --- a/Makefile +++ b/Makefile @@ -90,7 +90,7 @@ endif AFLAGS = -fsanitize=address #-fsanitize=undefined -fno-omit-frame-pointer # Note: Intel oneAPI C/C++ compiler is now icx/icpx -CC_VENDOR := $(subst oneAPI,icc,$(firstword $(filter gcc clang icc oneAPI XL,$(subst -, ,$(shell $(CC) --version))))) +CC_VENDOR := $(subst icc_orig,icc,$(subst oneAPI,icc,$(firstword $(filter gcc clang icc icc_orig oneAPI XL,$(subst -, ,$(shell $(CC) --version)))))) FC_VENDOR := $(if $(FC),$(firstword $(filter GNU ifort XL,$(shell $(FC) --version 2>&1 || $(FC) -qversion)))) # Default extra flags by vendor From 0d627ac128dbf360e78b18bad1cfd2089ff3d6d8 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Tue, 23 Aug 2022 11:51:11 -0600 Subject: [PATCH 164/172] doc: pin sphinxcontrib-bibtex==2.4.2 We can update to 2.5 when we upgrade to docutils-0.18 (which I think needs sphinx-5). --- doc/sphinx/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx/requirements.txt b/doc/sphinx/requirements.txt index c5ce4642c0..6ceb051f5b 100644 --- a/doc/sphinx/requirements.txt +++ b/doc/sphinx/requirements.txt @@ -5,7 +5,7 @@ sphinx-hoverxref>=0.3b1 sphinx-panels sphinx>=4.3 sphinx_rtd_theme -sphinxcontrib-bibtex>=2.1.1 +sphinxcontrib-bibtex==2.4.2 sphinxcontrib-katex sphinxcontrib-mermaid sphinxcontrib-svg2pdfconverter From 8f0d9e130bae647d792e3311e024988851753d8a Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Tue, 23 Aug 2022 11:52:26 -0600 Subject: [PATCH 165/172] doc: fix warnings and malformed hyperlink target --- doc/sphinx/source/api/CeedBasis.rst | 2 +- doc/sphinx/source/conf.py | 12 +----------- doc/sphinx/source/precision.md | 4 ++-- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/doc/sphinx/source/api/CeedBasis.rst b/doc/sphinx/source/api/CeedBasis.rst index b1affdf2ea..ed04540b27 100644 --- a/doc/sphinx/source/api/CeedBasis.rst +++ b/doc/sphinx/source/api/CeedBasis.rst @@ -14,7 +14,7 @@ Discrete element bases and quadrature :content-only: :members: -.. _CeedBasis-typedefs and enumerations +.. _CeedBasis-typedefs and enumerations: Typedefs and Enumerations -------------------------------------- diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index ebe6857d3d..9da10a0a52 100755 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -129,7 +129,7 @@ ] myst_heading_anchors = 2 -myst_url_schemes = ["http", "https", "mailto"] +myst_url_schemes = ("http", "https", "mailto") # -- Options for HTML output ---------------------------------------------- @@ -175,16 +175,6 @@ katex_macros = katex.latex_defs_to_katex_macros(latex_macros) katex_options = 'macros: {' + katex_macros + '}' - -def katex_cdn(path): - katex_gitcommit = '7c696bb7ac8995f177676d62be09ceefa37d66e3' - return f'https://cdn.jsdelivr.net/gh/jedbrown/katex@{katex_gitcommit}/' + path - - -katex_css_path = katex_cdn('dist/katex.min.css') -katex_js_path = katex_cdn('dist/katex.min.js') -katex_autorender_path = katex_cdn('dist/contrib/auto-render.min.js') - # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. diff --git a/doc/sphinx/source/precision.md b/doc/sphinx/source/precision.md index 72f9558909..1a80eb562e 100644 --- a/doc/sphinx/source/precision.md +++ b/doc/sphinx/source/precision.md @@ -2,9 +2,9 @@ Currently, libCEED supports two options for {code}`CeedScalar` : double and single. The default is to use double precision. -Users wishing to set {code}`CeedScalar` to single precision should edit `include/ceed/ceed.h` and change +Users wishing to set {code}`CeedScalar` to single precision should edit `include/ceed/types.h` and change -```{literalinclude} ../../../include/ceed/ceed.h +```{literalinclude} ../../../include/ceed/types.h :end-at: "#include \"ceed-f64.h\"" :language: c :start-at: "#include \"ceed-f64.h\"" From 73d4748a9c512fdf3b31504d85ffa9864302697f Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 23 Aug 2022 12:27:18 -0600 Subject: [PATCH 166/172] doc - fix ref warning for hoverxref_role_types --- doc/sphinx/source/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index 9da10a0a52..b9a3b4c164 100755 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -164,6 +164,9 @@ # hoverxref options hoverxref_auto_ref = True hoverxref_mathjax = True +hoverxref_role_types = { + 'ref': 'modal', +} latex_macros = r""" \def \diff {\operatorname{d}\!} From 228d9efb602501654fc8a4ffc9c266287bd4349c Mon Sep 17 00:00:00 2001 From: James Wright Date: Wed, 24 Aug 2022 09:56:00 -0600 Subject: [PATCH 167/172] ceed: Add CEED_QFUNCTION_ATTR for inlining GCC doesn't like to inline all Qfunction helpers, so this forces it do so if inlining is allowed at all. --- include/ceed/types.h | 26 ++++++++++++++++++++++++-- python/build_ceed_cffi.py | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/ceed/types.h b/include/ceed/types.h index c7cc95de9a..f9e74e7afa 100644 --- a/include/ceed/types.h +++ b/include/ceed/types.h @@ -13,6 +13,28 @@ #include #include +/** + @ingroup CeedQFunction + This macro defines compiler attributes to the CEED_QFUNCTION to force inlining + for called functions. The `inline` declaration does not necessarily enforce a + compiler to inline a function. This can be deterimental to performance, so + here we force inlining to occur unless inlining has been forced off (like + during debugging). +**/ +#ifndef CEED_QFUNCTION_ATTR +#ifndef __NO_INLINE__ +# if defined(__GNUC__) || defined(__clang__) +# define CEED_QFUNCTION_ATTR __attribute__((flatten)) +# elif defined(__INTEL_COMPILER) +# define CEED_QFUNCTION_ATTR _Pragma("forceinline") +# else +# define CEED_QFUNCTION_ATTR +# endif +#else +# define CEED_QFUNCTION_ATTR +#endif +#endif + /** @ingroup CeedQFunction This macro populates the correct function annotations for User QFunction @@ -23,7 +45,7 @@ #ifndef CEED_QFUNCTION #define CEED_QFUNCTION(name) \ static const char name ## _loc[] = __FILE__ ":" #name; \ - static int name + CEED_QFUNCTION_ATTR static int name #endif /** @@ -33,7 +55,7 @@ values for CPU backends. **/ #ifndef CEED_QFUNCTION_HELPER -#define CEED_QFUNCTION_HELPER static inline +#define CEED_QFUNCTION_HELPER CEED_QFUNCTION_ATTR static inline #endif /** diff --git a/python/build_ceed_cffi.py b/python/build_ceed_cffi.py index 1b82fd811e..11ea5bf0a2 100644 --- a/python/build_ceed_cffi.py +++ b/python/build_ceed_cffi.py @@ -21,6 +21,7 @@ lines += [line.strip() for line in f if not (line.startswith("#") and not line.startswith("#include")) and not line.startswith(" static") and + not line.startswith(" CEED_QFUNCTION_ATTR") and "CeedErrorImpl" not in line and "const char *, ...);" not in line and not line.startswith("CEED_EXTERN const char *const") and From fb24771e6c3a1e16c424ee212294f6a511b7d452 Mon Sep 17 00:00:00 2001 From: James Wright Date: Wed, 24 Aug 2022 15:38:15 -0600 Subject: [PATCH 168/172] doc: Add CEED_QFUNCTION_ATTR to releasenotes - Also add CEED_QFUNCTION_ATTR to the backend documentation --- doc/sphinx/source/api/backend/CeedQFunction.rst | 6 ++++++ doc/sphinx/source/releasenotes.md | 1 + 2 files changed, 7 insertions(+) diff --git a/doc/sphinx/source/api/backend/CeedQFunction.rst b/doc/sphinx/source/api/backend/CeedQFunction.rst index d506798fc7..2953ff5651 100644 --- a/doc/sphinx/source/api/backend/CeedQFunction.rst +++ b/doc/sphinx/source/api/backend/CeedQFunction.rst @@ -6,3 +6,9 @@ CeedQFunction :path: ../../../../xml :content-only: :members: + +Macros +-------------------------------------- + +.. doxygendefine:: CEED_QFUNCTION_ATTR + :project: libCEED diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index f73c5cff24..ae846d9e96 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -15,6 +15,7 @@ On this page we provide a summary of the main API changes, new features and exam - Update `/cpu/self/memcheck/*` backends to help verify `CeedQFunctionContext` data sizes provided by user. - Added `CeedInt_FMT` to support potential future use of larger interger sizes. +- Added CEED_QFUNCTION_ATTR for setting compiler attributes/pragmas to CEED_QFUNCTION_HELPER and CEED_QFUNCTION ### Bugfix From 23d6ba15ce2709c4ef8d39cdb3938232a70f8a28 Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 22 Jul 2022 15:50:31 -0600 Subject: [PATCH 169/172] fluids: Rename primitive booleans, misc formatting --- examples/fluids/navierstokes.h | 2 +- examples/fluids/problems/channel.c | 2 +- examples/fluids/problems/newtonian.c | 12 +- examples/fluids/problems/stg_shur14.c | 2 +- examples/fluids/qfunctions/channel.h | 2 +- examples/fluids/qfunctions/densitycurrent.h | 2 +- examples/fluids/qfunctions/newtonian.h | 164 ++++++++----------- examples/fluids/qfunctions/newtonian_types.h | 2 +- examples/fluids/qfunctions/stg_shur14.h | 4 +- examples/fluids/src/setupdm.c | 2 +- 10 files changed, 82 insertions(+), 112 deletions(-) diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h index e3ee6a3960..55245de126 100644 --- a/examples/fluids/navierstokes.h +++ b/examples/fluids/navierstokes.h @@ -194,7 +194,7 @@ struct Physics_private { EulerTestType euler_test; StabilizationType stab; PetscBool implicit; - PetscBool primitive; + PetscBool use_primitive; PetscBool has_curr_time; PetscBool has_neumann; CeedContextFieldLabel solution_time_label; diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c index 8063b95ba1..20f944e833 100644 --- a/examples/fluids/problems/channel.c +++ b/examples/fluids/problems/channel.c @@ -30,7 +30,7 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *ctx) { CeedQFunctionContextDestroy(&problem->ics.qfunction_context); problem->ics.qfunction = ICsChannel; problem->ics.qfunction_loc = ICsChannel_loc; - if (!user->phys->primitive) { + if (!user->phys->use_primitive) { problem->apply_inflow.qfunction = Channel_Inflow; problem->apply_inflow.qfunction_loc = Channel_Inflow_loc; problem->apply_outflow.qfunction = Channel_Outflow; diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index e1bee405fa..499d183954 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -77,7 +77,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { MPI_Comm comm = PETSC_COMM_WORLD; PetscBool implicit; PetscBool has_curr_time = PETSC_FALSE, - prim_var, unit_tests; + use_primitive, unit_tests; PetscInt ierr; NewtonianIdealGasContext newtonian_ig_ctx; CeedQFunctionContext newtonian_ig_context; @@ -137,8 +137,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { NULL); // -- Conservative vs Primitive variables ierr = PetscOptionsBool("-primitive", "Use primitive variables", - NULL, prim_var=PETSC_FALSE, &prim_var, NULL); CHKERRQ(ierr); - if (prim_var) { + NULL, use_primitive=PETSC_FALSE, &use_primitive, NULL); CHKERRQ(ierr); + if (use_primitive) { problem->ics.qfunction = ICsNewtonianIG_Prim; problem->ics.qfunction_loc = ICsNewtonianIG_Prim_loc; problem->apply_vol_ifunction.qfunction = IFunction_Newtonian_Prim; @@ -231,7 +231,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { "Warning! Use -stab supg only with -implicit\n"); CHKERRQ(ierr); } - if (prim_var && !implicit) { + if (use_primitive && !implicit) { SETERRQ(comm, PETSC_ERR_ARG_NULL, "RHSFunction is not provided for primitive variables (use -primitive only with -implicit)\n"); } @@ -279,7 +279,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { // -- Solver Settings user->phys->stab = stab; user->phys->implicit = implicit; - user->phys->primitive = prim_var; + user->phys->use_primitive = use_primitive; user->phys->has_curr_time = has_curr_time; // -- QFunction Context @@ -296,7 +296,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { newtonian_ig_ctx->Ctau_E = Ctau_E; newtonian_ig_ctx->stabilization = stab; newtonian_ig_ctx->is_implicit = implicit; - newtonian_ig_ctx->is_primitive = prim_var; + newtonian_ig_ctx->use_primitive = use_primitive; ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr); CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context); diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c index 8cae0a5f3d..b237c3668e 100644 --- a/examples/fluids/problems/stg_shur14.c +++ b/examples/fluids/problems/stg_shur14.c @@ -477,7 +477,7 @@ PetscErrorCode SetupStrongSTG(DM dm, SimpleBC bc, ProblemData *problem, PetscFunctionBeginUser; PetscInt comps[5], num_comps=4; - if (phys->primitive) { + if (phys->use_primitive) { // {1,2,3,4} for u, v, w, T for(int i=0; i<4; i++) comps[i] = i+1; } else { diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 443f18da30..44da0f9e6a 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -84,7 +84,7 @@ CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q, const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]}; State s = Exact_Channel(3, 0., x, 5, ctx); CeedScalar q[5] = {0}; - if (context->newtonian_ctx.is_primitive) + if (context->newtonian_ctx.use_primitive) UnpackState_Y(s.Y, q); else UnpackState_U(s.U, q); diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index cb431c3eaa..cd85c1d90a 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -153,7 +153,7 @@ CEED_QFUNCTION(ICsDC)(void *ctx, CeedInt Q, const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]}; State s = Exact_DC(3, 0., x, 5, ctx); CeedScalar q[5] = {0}; - if (context->newtonian_ctx.is_primitive) + if (context->newtonian_ctx.use_primitive) UnpackState_Y(s.Y, q); else UnpackState_U(s.U, q); diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index cbe455a6c5..85e9266270 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -152,12 +152,12 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; // *INDENT-ON* @@ -179,15 +179,9 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, // -- Interp-to-Grad q_data // ---- Inverse of change of coordinate matrix: X_i,j // *INDENT-OFF* - const CeedScalar dXdx[3][3] = {{q_data[1][i], - q_data[2][i], - q_data[3][i]}, - {q_data[4][i], - q_data[5][i], - q_data[6][i]}, - {q_data[7][i], - q_data[8][i], - q_data[9][i]} + const CeedScalar dXdx[3][3] = {{q_data[1][i], q_data[2][i], q_data[3][i]}, + {q_data[4][i], q_data[5][i], q_data[6][i]}, + {q_data[7][i], q_data[8][i], q_data[9][i]} }; // *INDENT-ON* State grad_s[3]; @@ -254,20 +248,20 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], - (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; + (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar *g = context->g; - const CeedScalar dt = context->dt; + const CeedScalar *g = context->g; + const CeedScalar dt = context->dt; CeedPragmaSIMD // Quadrature Point Loop @@ -282,15 +276,9 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, // -- Interp-to-Grad q_data // ---- Inverse of change of coordinate matrix: X_i,j // *INDENT-OFF* - const CeedScalar dXdx[3][3] = {{q_data[1][i], - q_data[2][i], - q_data[3][i]}, - {q_data[4][i], - q_data[5][i], - q_data[6][i]}, - {q_data[7][i], - q_data[8][i], - q_data[9][i]} + const CeedScalar dXdx[3][3] = {{q_data[1][i], q_data[2][i], q_data[3][i]}, + {q_data[4][i], q_data[5][i], q_data[6][i]}, + {q_data[7][i], q_data[8][i], q_data[9][i]} }; // *INDENT-ON* State grad_s[3]; @@ -339,8 +327,8 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, stab[j][1] * dXdx[k][1] + stab[j][2] * dXdx[k][2]); - for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j]; - for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; + for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j]; + for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j]; } // End Quadrature Point Loop @@ -359,13 +347,13 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; // *INDENT-ON* // Context @@ -380,22 +368,16 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, // -- Interp-to-Grad q_data // ---- Inverse of change of coordinate matrix: X_i,j // *INDENT-OFF* - const CeedScalar dXdx[3][3] = {{q_data[1][i], - q_data[2][i], - q_data[3][i]}, - {q_data[4][i], - q_data[5][i], - q_data[6][i]}, - {q_data[7][i], - q_data[8][i], - q_data[9][i]} + const CeedScalar dXdx[3][3] = {{q_data[1][i], q_data[2][i], q_data[3][i]}, + {q_data[4][i], q_data[5][i], q_data[6][i]}, + {q_data[7][i], q_data[8][i], q_data[9][i]} }; // *INDENT-ON* CeedScalar U[5], kmstress[6], Tau_d[3] __attribute((unused)); - for (int j=0; j<5; j++) U[j] = jac_data[j][i]; + for (int j=0; j<5; j++) U[j] = jac_data[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i]; - for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i]; + for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i]; const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]}; State s = StateFromU(context, U, x_i); @@ -477,8 +459,8 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, State s, const CeedScalar dqi[5], const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; CeedPragmaSIMD @@ -536,7 +518,7 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, // -- Total Energy Density v[4][i] = -wdetJb * Flux[4]; - if (context->is_primitive) { + if (context->use_primitive) { jac_data_sur[0][i] = s.Y.pressure; for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j]; jac_data_sur[4][i] = s.Y.temperature; @@ -574,8 +556,8 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, State s, const CeedScalar dqi[5], const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; CeedPragmaSIMD // Quadrature Point Loop @@ -592,9 +574,9 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, }; CeedScalar qi[5], kmstress[6], dqi[5], dx_i[3] = {0.}; - for (int j=0; j<5; j++) qi[j] = jac_data_sur[j][i]; + for (int j=0; j<5; j++) qi[j] = jac_data_sur[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; - for (int j=0; j<5; j++) dqi[j] = dq[j][i]; + for (int j=0; j<5; j++) dqi[j] = dq[j][i]; State s = StateFromQi(context, qi, x_i); State ds = StateFromQi_fwd(context, s, dqi, x_i, dx_i); @@ -659,8 +641,8 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, State s, const CeedScalar dqi[5], const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; CeedPragmaSIMD // Quadrature Point Loop @@ -727,7 +709,7 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, v[4][i] = -wdetJb * Flux[4]; // Save values for Jacobian - if (context->is_primitive) { + if (context->use_primitive) { jac_data_sur[0][i] = s.Y.pressure; for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j]; jac_data_sur[4][i] = s.Y.temperature; @@ -765,8 +747,8 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, State s, const CeedScalar dQi[5], const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; + StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; + StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; CeedPragmaSIMD // Quadrature Point Loop @@ -783,9 +765,9 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, }; CeedScalar qi[5], kmstress[6], dqi[5], dx_i[3] = {0.}; - for (int j=0; j<5; j++) qi[j] = jac_data_sur[j][i]; + for (int j=0; j<5; j++) qi[j] = jac_data_sur[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i]; - for (int j=0; j<5; j++) dqi[j] = dq[j][i]; + for (int j=0; j<5; j++) dqi[j] = dq[j][i]; State s = StateFromQi(context, qi, x_i); State ds = StateFromQi_fwd(context, s, dqi, x_i, dx_i); @@ -835,20 +817,20 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], - (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; + (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar *g = context->g; - const CeedScalar dt = context->dt; + const CeedScalar *g = context->g; + const CeedScalar dt = context->dt; CeedPragmaSIMD // Quadrature Point Loop @@ -863,15 +845,9 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, // -- Interp-to-Grad q_data // ---- Inverse of change of coordinate matrix: X_i,j // *INDENT-OFF* - const CeedScalar dXdx[3][3] = {{q_data[1][i], - q_data[2][i], - q_data[3][i]}, - {q_data[4][i], - q_data[5][i], - q_data[6][i]}, - {q_data[7][i], - q_data[8][i], - q_data[9][i]} + const CeedScalar dXdx[3][3] = {{q_data[1][i], q_data[2][i], q_data[3][i]}, + {q_data[4][i], q_data[5][i], q_data[6][i]}, + {q_data[7][i], q_data[8][i], q_data[9][i]} }; // *INDENT-ON* State grad_s[3]; @@ -927,8 +903,8 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, stab[j][1] * dXdx[k][1] + stab[j][2] * dXdx[k][2]); - for (CeedInt j=0; j<5; j++) jac_data[j][i] = Y[j]; - for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; + for (CeedInt j=0; j<5; j++) jac_data[j][i] = Y[j]; + for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j]; for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j]; } // End Quadrature Point Loop @@ -946,13 +922,13 @@ CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], + const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], + (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], + (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], + CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; // *INDENT-ON* // Context @@ -967,22 +943,16 @@ CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, // -- Interp-to-Grad q_data // ---- Inverse of change of coordinate matrix: X_i,j // *INDENT-OFF* - const CeedScalar dXdx[3][3] = {{q_data[1][i], - q_data[2][i], - q_data[3][i]}, - {q_data[4][i], - q_data[5][i], - q_data[6][i]}, - {q_data[7][i], - q_data[8][i], - q_data[9][i]} + const CeedScalar dXdx[3][3] = {{q_data[1][i], q_data[2][i], q_data[3][i]}, + {q_data[4][i], q_data[5][i], q_data[6][i]}, + {q_data[7][i], q_data[8][i], q_data[9][i]} }; // *INDENT-ON* CeedScalar Y[5], kmstress[6], Tau_d[3] __attribute((unused)); - for (int j=0; j<5; j++) Y[j] = jac_data[j][i]; + for (int j=0; j<5; j++) Y[j] = jac_data[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i]; - for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i]; + for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i]; const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]}; State s = StateFromY(context, Y, x_i); diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 879add7d13..574c4ab01b 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -50,7 +50,7 @@ struct NewtonianIdealGasContext_ { CeedScalar ijacobian_time_shift; CeedScalar P0; bool is_implicit; - bool is_primitive; + bool use_primitive; StabilizationType stabilization; }; diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 043328f55f..bbab48a554 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -321,7 +321,7 @@ CEED_QFUNCTION(ICsSTG)(void *ctx, CeedInt Q, for(CeedInt i=0; inewtonian_ctx.is_primitive) { + if (stg_ctx->newtonian_ctx.use_primitive) { q0[0][i] = P0; q0[1][i] = u[0]; q0[2][i] = u[1]; @@ -568,7 +568,7 @@ CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, for (CeedInt j=0; j<3; j++) u[j] = ubar[j]; } - if (stg_ctx->newtonian_ctx.is_primitive) { + if (stg_ctx->newtonian_ctx.use_primitive) { bcval[0][i] = 0; bcval[1][i] = scale[i] * u[0]; bcval[2][i] = scale[i] * u[1]; diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c index 26fa2c349a..b0c56802b0 100644 --- a/examples/fluids/src/setupdm.c +++ b/examples/fluids/src/setupdm.c @@ -102,7 +102,7 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree, PetscSection section; ierr = DMGetLocalSection(dm, §ion); CHKERRQ(ierr); ierr = PetscSectionSetFieldName(section, 0, ""); CHKERRQ(ierr); - if (phys->primitive) { + if (phys->use_primitive) { ierr = PetscSectionSetComponentName(section, 0, 0, "Pressure"); CHKERRQ(ierr); ierr = PetscSectionSetComponentName(section, 0, 1, "Velocity X"); From 5bce47c78373471bb00dacdcfd196665461caef2 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 26 Jul 2022 16:11:11 -0600 Subject: [PATCH 170/172] fluids: Add FluxTotal_Boundary, Refactor newt BI QFs - Refactoring the newtonian boundary integral QFunctions --- examples/fluids/qfunctions/newtonian.h | 86 ++++---------------- examples/fluids/qfunctions/newtonian_state.h | 14 ++++ 2 files changed, 30 insertions(+), 70 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 85e9266270..5a398009d1 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -470,7 +470,7 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, State s = StateFromQi(context, qi, x_i); const CeedScalar wdetJb = (is_implicit ? -1. : 1.) * q_data_sur[0][i]; - // ---- Normal vect + // ---- Normal vector const CeedScalar norm[3] = {q_data_sur[1][i], q_data_sur[2][i], q_data_sur[3][i] @@ -500,33 +500,12 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, StateConservative F_inviscid[3]; FluxInviscid(context, s, F_inviscid); - CeedScalar Flux[5] = {0.}; - for (int j=0; j<3; j++) { - Flux[0] += F_inviscid[j].density * norm[j]; - for (int k=0; k<3; k++) - Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j]; - Flux[4] += (F_inviscid[j].E_total + Fe[j]) * norm[j]; - } + CeedScalar Flux[5]; + FluxTotal_Boundary(F_inviscid, stress, Fe, norm, Flux); - // -- Density - v[0][i] = -wdetJb * Flux[0]; + for (CeedInt j=0; j<5; j++) v[j][i] = -wdetJb * Flux[j]; - // -- Momentum - for (CeedInt j=0; j<3; j++) - v[j+1][i] = -wdetJb * Flux[j+1]; - - // -- Total Energy Density - v[4][i] = -wdetJb * Flux[4]; - - if (context->use_primitive) { - jac_data_sur[0][i] = s.Y.pressure; - for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j]; - jac_data_sur[4][i] = s.Y.temperature; - } else { - jac_data_sur[0][i] = s.U.density; - for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.U.momentum[j]; - jac_data_sur[4][i] = s.U.E_total; - } + for (int j=0; j<5; j++) jac_data_sur[j][i] = qi[j]; for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j]; } return 0; @@ -601,16 +580,10 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, StateConservative dF_inviscid[3]; FluxInviscid_fwd(context, s, ds, dF_inviscid); - CeedScalar dFlux[5] = {0.}; - for (int j=0; j<3; j++) { - dFlux[0] += dF_inviscid[j].density * norm[j]; - for (int k=0; k<3; k++) - dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j]; - dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j]; - } + CeedScalar dFlux[5]; + FluxTotal_Boundary(dF_inviscid, dstress, dFe, norm, dFlux); - for (int j=0; j<5; j++) - v[j][i] = -wdetJb * dFlux[j]; + for (int j=0; j<5; j++) v[j][i] = -wdetJb * dFlux[j]; } // End Quadrature Point Loop return 0; } @@ -660,7 +633,7 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, // We can effect this by swapping the sign on this weight const CeedScalar wdetJb = (implicit ? -1. : 1.) * q_data_sur[0][i]; - // ---- Normal vect + // ---- Normal vector const CeedScalar norm[3] = {q_data_sur[1][i], q_data_sur[2][i], q_data_sur[3][i] @@ -690,34 +663,13 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, StateConservative F_inviscid[3]; FluxInviscid(context, s, F_inviscid); - CeedScalar Flux[5] = {0.}; - for (int j=0; j<3; j++) { - Flux[0] += F_inviscid[j].density * norm[j]; - for (int k=0; k<3; k++) - Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j]; - Flux[4] += (F_inviscid[j].E_total + Fe[j])*norm[j]; - } - - // -- Density - v[0][i] = -wdetJb * Flux[0]; - - // -- Momentum - for (CeedInt j=0; j<3; j++) - v[j+1][i] = -wdetJb * Flux[j+1]; + CeedScalar Flux[5]; + FluxTotal_Boundary(F_inviscid, stress, Fe, norm, Flux); - // -- Total Energy Density - v[4][i] = -wdetJb * Flux[4]; + for (CeedInt j=0; j<5; j++) v[j][i] = -wdetJb * Flux[j]; // Save values for Jacobian - if (context->use_primitive) { - jac_data_sur[0][i] = s.Y.pressure; - for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j]; - jac_data_sur[4][i] = s.Y.temperature; - } else { - jac_data_sur[0][i] = s.U.density; - for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.U.momentum[j]; - jac_data_sur[4][i] = s.U.E_total; - } + for (int j=0; j<5; j++) jac_data_sur[j][i] = qi[j]; for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j]; } // End Quadrature Point Loop return 0; @@ -794,16 +746,10 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, StateConservative dF_inviscid[3]; FluxInviscid_fwd(context, s, ds, dF_inviscid); - CeedScalar dFlux[5] = {0.}; - for (int j=0; j<3; j++) { - dFlux[0] += dF_inviscid[j].density * norm[j]; - for (int k=0; k<3; k++) - dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j]; - dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j]; - } + CeedScalar dFlux[5]; + FluxTotal_Boundary(dF_inviscid, dstress, dFe, norm, dFlux); - for (int j=0; j<5; j++) - v[j][i] = -wdetJb * dFlux[j]; + for (int j=0; j<5; j++) v[j][i] = -wdetJb * dFlux[j]; } // End Quadrature Point Loop return 0; } diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h index 776c90f03f..f29a64b0f2 100644 --- a/examples/fluids/qfunctions/newtonian_state.h +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -212,6 +212,20 @@ CEED_QFUNCTION_HELPER void FluxTotal(StateConservative F_inviscid[3], } } +CEED_QFUNCTION_HELPER void FluxTotal_Boundary( + const StateConservative F_inviscid[3], const CeedScalar stress[3][3], + const CeedScalar Fe[3], const CeedScalar normal[3], CeedScalar Flux[5]) { + + for(CeedInt j=0; j<5; j++) Flux[j] = 0.; + for (CeedInt j=0; j<3; j++) { + Flux[0] += F_inviscid[j].density * normal[j]; + for (CeedInt k=0; k<3; k++) { + Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * normal[j]; + } + Flux[4] += (F_inviscid[j].E_total + Fe[j]) * normal[j]; + } +} + // Kelvin-Mandel notation CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3], CeedScalar strain_rate[6]) { From 20840d5027c5dd695d8ebd45bd884db127586bc6 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 26 Jul 2022 08:54:14 -0600 Subject: [PATCH 171/172] fluids: Use StateFromQi* for newt boundary QFs --- examples/fluids/problems/newtonian.c | 34 +++--- examples/fluids/qfunctions/newtonian.h | 104 ++++++++++--------- examples/fluids/qfunctions/newtonian_state.h | 9 +- 3 files changed, 80 insertions(+), 67 deletions(-) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index 499d183954..e736c4927a 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -138,6 +138,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { // -- Conservative vs Primitive variables ierr = PetscOptionsBool("-primitive", "Use primitive variables", NULL, use_primitive=PETSC_FALSE, &use_primitive, NULL); CHKERRQ(ierr); + // *INDENT-OFF* if (use_primitive) { problem->ics.qfunction = ICsNewtonianIG_Prim; problem->ics.qfunction_loc = ICsNewtonianIG_Prim_loc; @@ -145,14 +146,14 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_Prim_loc; problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian_Prim; problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_Prim_loc; - problem->apply_inflow.qfunction = BoundaryIntegral; - problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; - problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian; - problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_loc; - problem->apply_outflow.qfunction = PressureOutflow; - problem->apply_outflow.qfunction_loc = PressureOutflow_loc; - problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; - problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_loc; + problem->apply_inflow.qfunction = BoundaryIntegral_Prim; + problem->apply_inflow.qfunction_loc = BoundaryIntegral_Prim_loc; + problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian_Prim; + problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_Prim_loc; + problem->apply_outflow.qfunction = PressureOutflow_Prim; + problem->apply_outflow.qfunction_loc = PressureOutflow_Prim_loc; + problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian_Prim; + problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_Prim_loc; } else { problem->ics.qfunction = ICsNewtonianIG; problem->ics.qfunction_loc = ICsNewtonianIG_loc; @@ -162,15 +163,16 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; - problem->apply_inflow.qfunction = BoundaryIntegral; - problem->apply_inflow.qfunction_loc = BoundaryIntegral_loc; - problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian; - problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_loc; - problem->apply_outflow.qfunction = PressureOutflow; - problem->apply_outflow.qfunction_loc = PressureOutflow_loc; - problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian; - problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_loc; + problem->apply_inflow.qfunction = BoundaryIntegral_Conserv; + problem->apply_inflow.qfunction_loc = BoundaryIntegral_Conserv_loc; + problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian_Conserv; + problem->apply_inflow_jacobian.qfunction_loc = BoundaryIntegral_Jacobian_Conserv_loc; + problem->apply_outflow.qfunction = PressureOutflow_Conserv; + problem->apply_outflow.qfunction_loc = PressureOutflow_Conserv_loc; + problem->apply_outflow_jacobian.qfunction = PressureOutflow_Jacobian_Conserv; + problem->apply_outflow_jacobian.qfunction_loc = PressureOutflow_Jacobian_Conserv_loc; } + // *INDENT-ON* // -- Physics ierr = PetscOptionsScalar("-cv", "Heat capacity at constant volume", diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 5a398009d1..f5e91501fe 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -437,9 +437,9 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, // ***************************************************************************** // Compute boundary integral (ie. for strongly set inflows) // ***************************************************************************** -CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) { +CEED_QFUNCTION_HELPER int BoundaryIntegral(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out, + StateFromQi_t StateFromQi, StateFromQi_fwd_t StateFromQi_fwd) { //*INDENT-OFF* const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], @@ -454,14 +454,6 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, const NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx; const bool is_implicit = context->is_implicit; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dqi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD for(CeedInt i=0; iis_implicit; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dqi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; CeedPragmaSIMD // Quadrature Point Loop @@ -588,12 +583,22 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, return 0; } +CEED_QFUNCTION(BoundaryIntegral_Jacobian_Conserv)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + return BoundaryIntegral_Jacobian(ctx, Q, in, out, StateFromU, StateFromU_fwd); +} + +CEED_QFUNCTION(BoundaryIntegral_Jacobian_Prim)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + return BoundaryIntegral_Jacobian(ctx, Q, in, out, StateFromY, StateFromY_fwd); +} + // ***************************************************************************** // Outflow boundary condition, weakly setting a constant pressure // ***************************************************************************** -CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) { +CEED_QFUNCTION_HELPER int PressureOutflow(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out, + StateFromQi_t StateFromQi, StateFromQi_fwd_t StateFromQi_fwd) { // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], @@ -609,14 +614,6 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, const bool implicit = context->is_implicit; const CeedScalar P0 = context->P0; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dqi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iis_implicit; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dQi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->use_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->use_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; i State struct functions +typedef State (*StateFromQi_t)(NewtonianIdealGasContext gas, + const CeedScalar qi[5], const CeedScalar x[3]); +typedef State (*StateFromQi_fwd_t)(NewtonianIdealGasContext gas, + State s, const CeedScalar dqi[5], + const CeedScalar x[3], const CeedScalar dx[3]); + CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas, const CeedScalar U[5], const CeedScalar x[3]) { State s; @@ -202,7 +209,7 @@ CEED_QFUNCTION_HELPER void FluxInviscidStrong(NewtonianIdealGasContext gas, } } -CEED_QFUNCTION_HELPER void FluxTotal(StateConservative F_inviscid[3], +CEED_QFUNCTION_HELPER void FluxTotal(const StateConservative F_inviscid[3], CeedScalar stress[3][3], CeedScalar Fe[3], CeedScalar Flux[5][3]) { for (CeedInt j=0; j<3; j++) { Flux[0][j] = F_inviscid[j].density; From 3d02368af63ebc423f2fb3d1e98bcce242e9653b Mon Sep 17 00:00:00 2001 From: James Wright Date: Fri, 19 Aug 2022 11:57:47 -0600 Subject: [PATCH 172/172] fluids: Use newtonian with StateFromQi*_t func pointers This to "unify" the primitive and conservative formulations of the newtonian solver. By passing function pointers to a helper function, the compiler can optimize for a given state function(s). --- examples/fluids/problems/newtonian.c | 8 +- examples/fluids/qfunctions/newtonian.h | 292 +++++-------------------- 2 files changed, 60 insertions(+), 240 deletions(-) diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index e736c4927a..72da1079fc 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -159,10 +159,10 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) { problem->ics.qfunction_loc = ICsNewtonianIG_loc; problem->apply_vol_rhs.qfunction = RHSFunction_Newtonian; problem->apply_vol_rhs.qfunction_loc = RHSFunction_Newtonian_loc; - problem->apply_vol_ifunction.qfunction = IFunction_Newtonian; - problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc; - problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian; - problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_loc; + problem->apply_vol_ifunction.qfunction = IFunction_Newtonian_Conserv; + problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_Conserv_loc; + problem->apply_vol_ijacobian.qfunction = IJacobian_Newtonian_Conserv; + problem->apply_vol_ijacobian.qfunction_loc = IJacobian_Newtonian_Conserv_loc; problem->apply_inflow.qfunction = BoundaryIntegral_Conserv; problem->apply_inflow.qfunction_loc = BoundaryIntegral_Conserv_loc; problem->apply_inflow_jacobian.qfunction = BoundaryIntegral_Jacobian_Conserv; diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index f5e91501fe..04ed51ea2e 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -244,8 +244,9 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, // (diffussive terms will be added later) // // ***************************************************************************** -CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) { +CEED_QFUNCTION_HELPER int IFunction_Newtonian(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out, + StateFromQi_t StateFromQi, StateFromQi_fwd_t StateFromQi_fwd) { // *INDENT-OFF* // Inputs const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], @@ -266,10 +267,10 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iijacobian_time_shift * dU[j] - dbody_force[j]); @@ -434,6 +449,16 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, return 0; } +CEED_QFUNCTION(IJacobian_Newtonian_Conserv)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + return IJacobian_Newtonian(ctx, Q, in, out, StateFromU, StateFromU_fwd); +} + +CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) { + return IJacobian_Newtonian(ctx, Q, in, out, StateFromY, StateFromY_fwd); +} + // ***************************************************************************** // Compute boundary integral (ie. for strongly set inflows) // ***************************************************************************** @@ -758,209 +783,4 @@ CEED_QFUNCTION(PressureOutflow_Jacobian_Prim)(void *ctx, CeedInt Q, return PressureOutflow_Jacobian(ctx, Q, in, out, StateFromY, StateFromY_fwd); } -// ***************************************************************************** -// This QFunction implements the Navier-Stokes equations (mentioned above) in -// primitive variables and with implicit time stepping method -// -// ***************************************************************************** -CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) { - // *INDENT-OFF* - // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; - // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], - (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; - // *INDENT-ON* - // Context - NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; - const CeedScalar *g = context->g; - const CeedScalar dt = context->dt; - - CeedPragmaSIMD - // Quadrature Point Loop - for (CeedInt i=0; ig; - - CeedPragmaSIMD - // Quadrature Point Loop - for (CeedInt i=0; iijacobian_time_shift * dU[j] - dbody_force[j]); - - // -- Stabilization method: none (Galerkin), SU, or SUPG - CeedScalar dstab[5][3], U_dot[5] = {0}; - for (CeedInt j=0; j<5; j++) U_dot[j] = context->ijacobian_time_shift * dU[j]; - Stabilization(context, s, Tau_d, grad_ds, U_dot, dbody_force, x_i, dstab); - - for (int j=0; j<5; j++) - for (int k=0; k<3; k++) - Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] + - dstab[j][1] * dXdx[k][1] + - dstab[j][2] * dXdx[k][2]); - - } // End Quadrature Point Loop - return 0; -} -// ***************************************************************************** - #endif // newtonian_h