From 3aab95c02208308af393c9014d0e46d9d208d4a6 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 5 Aug 2024 09:56:54 -0600 Subject: [PATCH] op - minor performance improvement for op with repeat input rstr --- backends/blocked/ceed-blocked-operator.c | 38 +++++++++--- backends/blocked/ceed-blocked.h | 5 +- backends/cuda-ref/ceed-cuda-ref-operator.c | 42 ++++++++++--- backends/cuda-ref/ceed-cuda-ref.h | 3 +- backends/hip-ref/ceed-hip-ref-operator.c | 42 ++++++++++--- backends/hip-ref/ceed-hip-ref.h | 3 +- backends/opt/ceed-opt-operator.c | 33 ++++++++-- backends/ref/ceed-ref-operator.c | 70 ++++++++++++++++++---- backends/ref/ceed-ref.h | 3 +- 9 files changed, 195 insertions(+), 44 deletions(-) diff --git a/backends/blocked/ceed-blocked-operator.c b/backends/blocked/ceed-blocked-operator.c index 0df7846d57..788533cbff 100644 --- a/backends/blocked/ceed-blocked-operator.c +++ b/backends/blocked/ceed-blocked-operator.c @@ -16,7 +16,7 @@ //------------------------------------------------------------------------------ // Setup Input/Output Fields //------------------------------------------------------------------------------ -static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bool is_input, const CeedInt block_size, +static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bool is_input, bool *skip_rstr, const CeedInt block_size, CeedElemRestriction *block_rstr, CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields, CeedInt Q) { Ceed ceed; @@ -135,6 +135,28 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo break; } } + // Drop duplicate input restrictions + if (is_input) { + for (CeedInt i = 0; i < num_fields; i++) { + CeedVector vec_i; + CeedElemRestriction rstr_i; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i)); + for (CeedInt j = i + 1; j < num_fields; j++) { + CeedVector vec_j; + CeedElemRestriction rstr_j; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j)); + if (vec_i == vec_j && rstr_i == rstr_j) { + CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); + CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i], &e_vecs_full[j])); + skip_rstr[j] = true; + } + } + } + } return CEED_ERROR_SUCCESS; } @@ -166,6 +188,7 @@ static int CeedOperatorSetup_Blocked(CeedOperator op) { CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->block_rstr)); CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out)); @@ -177,11 +200,11 @@ static int CeedOperatorSetup_Blocked(CeedOperator op) { // Set up infield and outfield pointer arrays // Infields - CeedCallBackend(CeedOperatorSetupFields_Blocked(qf, op, true, block_size, impl->block_rstr, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0, - num_input_fields, Q)); + CeedCallBackend(CeedOperatorSetupFields_Blocked(qf, op, true, impl->skip_rstr_in, block_size, impl->block_rstr, impl->e_vecs_full, impl->e_vecs_in, + impl->q_vecs_in, 0, num_input_fields, Q)); // Outfields - CeedCallBackend(CeedOperatorSetupFields_Blocked(qf, op, false, block_size, impl->block_rstr, impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out, - num_input_fields, num_output_fields, Q)); + CeedCallBackend(CeedOperatorSetupFields_Blocked(qf, op, false, NULL, block_size, impl->block_rstr, impl->e_vecs_full, impl->e_vecs_out, + impl->q_vecs_out, num_input_fields, num_output_fields, Q)); // Identity QFunctions if (impl->is_identity_qf) { @@ -226,10 +249,10 @@ static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, Ceed } else { // Restrict CeedCallBackend(CeedVectorGetState(vec, &state)); - if (state != impl->input_states[i] || vec == in_vec) { + if ((state != impl->input_states[i] || vec == in_vec) && !impl->skip_rstr_in[i]) { CeedCallBackend(CeedElemRestrictionApply(impl->block_rstr[i], CEED_NOTRANSPOSE, vec, impl->e_vecs_full[i], request)); - impl->input_states[i] = state; } + impl->input_states[i] = state; // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data_full[i])); } @@ -647,6 +670,7 @@ static int CeedOperatorDestroy_Blocked(CeedOperator op) { CeedCallBackend(CeedOperatorGetData(op, &impl)); + CeedCallBackend(CeedFree(&impl->skip_rstr_in)); for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) { CeedCallBackend(CeedElemRestrictionDestroy(&impl->block_rstr[i])); CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_full[i])); diff --git a/backends/blocked/ceed-blocked.h b/backends/blocked/ceed-blocked.h index fef7967518..5876e969b7 100644 --- a/backends/blocked/ceed-blocked.h +++ b/backends/blocked/ceed-blocked.h @@ -17,13 +17,14 @@ typedef struct { typedef struct { bool is_identity_qf, is_identity_rstr_op; - CeedElemRestriction *block_rstr; /* Blocked versions of restrictions */ - CeedVector *e_vecs_full; /* Full E-vectors, inputs followed by outputs */ + bool *skip_rstr_in; uint64_t *input_states; /* State counter of inputs */ + CeedVector *e_vecs_full; /* Full E-vectors, inputs followed by outputs */ CeedVector *e_vecs_in; /* Element block input E-vectors */ CeedVector *e_vecs_out; /* Element block output E-vectors */ CeedVector *q_vecs_in; /* Element block input Q-vectors */ CeedVector *q_vecs_out; /* Element block output Q-vectors */ + CeedElemRestriction *block_rstr; /* Blocked versions of restrictions */ CeedInt num_inputs, num_outputs; CeedInt qf_size_in, qf_size_out; CeedVector qf_l_vec; diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index 98d4dfe406..9f6d3d14b0 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -27,6 +27,7 @@ static int CeedOperatorDestroy_Cuda(CeedOperator op) { CeedCallBackend(CeedOperatorGetData(op, &impl)); // Apply data + CeedCallBackend(CeedFree(&impl->skip_rstr_in)); for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) { CeedCallBackend(CeedVectorDestroy(&impl->e_vecs[i])); } @@ -96,8 +97,8 @@ static int CeedOperatorDestroy_Cuda(CeedOperator op) { //------------------------------------------------------------------------------ // Setup infields or outfields //------------------------------------------------------------------------------ -static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool is_input, bool is_at_points, CeedVector *e_vecs, CeedVector *q_vecs, - CeedInt start_e, CeedInt num_fields, CeedInt Q, CeedInt num_elem) { +static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool is_input, bool is_at_points, bool *skip_rstr, CeedVector *e_vecs, + CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields, CeedInt Q, CeedInt num_elem) { Ceed ceed; CeedQFunctionField *qf_fields; CeedOperatorField *op_fields; @@ -183,6 +184,27 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool break; } } + // Drop duplicate input restrictions + if (is_input) { + for (CeedInt i = 0; i < num_fields; i++) { + CeedVector vec_i; + CeedElemRestriction rstr_i; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i)); + for (CeedInt j = i + 1; j < num_fields; j++) { + CeedVector vec_j; + CeedElemRestriction rstr_j; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j)); + if (vec_i == vec_j && rstr_i == rstr_j) { + CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); + skip_rstr[j] = true; + } + } + } + } return CEED_ERROR_SUCCESS; } @@ -211,6 +233,7 @@ static int CeedOperatorSetup_Cuda(CeedOperator op) { // Allocate CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out)); @@ -219,10 +242,11 @@ static int CeedOperatorSetup_Cuda(CeedOperator op) { // Set up infield and outfield e_vecs and q_vecs // Infields - CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, true, false, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, Q, num_elem)); + CeedCallBackend( + CeedOperatorSetupFields_Cuda(qf, op, true, false, impl->skip_rstr_in, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, Q, num_elem)); // Outfields CeedCallBackend( - CeedOperatorSetupFields_Cuda(qf, op, false, false, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, Q, num_elem)); + CeedOperatorSetupFields_Cuda(qf, op, false, false, NULL, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, Q, num_elem)); CeedCallBackend(CeedOperatorSetSetupDone(op)); return CEED_ERROR_SUCCESS; @@ -262,10 +286,10 @@ static inline int CeedOperatorSetupInputs_Cuda(CeedInt num_input_fields, CeedQFu uint64_t state; CeedCallBackend(CeedVectorGetState(vec, &state)); - if (state != impl->input_states[i]) { + if (state != impl->input_states[i] && !impl->skip_rstr_in[i]) { CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs[i], request)); - impl->input_states[i] = state; } + impl->input_states[i] = state; // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); } @@ -474,6 +498,7 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) { // Allocate CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out)); @@ -482,9 +507,10 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) { // Set up infield and outfield e_vecs and q_vecs // Infields - CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, true, true, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, max_num_points, num_elem)); + CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, true, true, impl->skip_rstr_in, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, + max_num_points, num_elem)); // Outfields - CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, false, true, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, + CeedCallBackend(CeedOperatorSetupFields_Cuda(qf, op, false, true, NULL, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, max_num_points, num_elem)); CeedCallBackend(CeedOperatorSetSetupDone(op)); diff --git a/backends/cuda-ref/ceed-cuda-ref.h b/backends/cuda-ref/ceed-cuda-ref.h index 0c2c5b4972..f8430a1b12 100644 --- a/backends/cuda-ref/ceed-cuda-ref.h +++ b/backends/cuda-ref/ceed-cuda-ref.h @@ -128,8 +128,9 @@ typedef struct { } CeedOperatorAssemble_Cuda; typedef struct { - CeedVector *e_vecs; // E-vectors, inputs followed by outputs + bool *skip_rstr_in; uint64_t *input_states; // State tracking for passive inputs + CeedVector *e_vecs; // E-vectors, inputs followed by outputs CeedVector *q_vecs_in; // Input Q-vectors needed to apply operator CeedVector *q_vecs_out; // Output Q-vectors needed to apply operator CeedInt num_inputs, num_outputs; diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 34fb349f0d..bb5d09816d 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -26,6 +26,7 @@ static int CeedOperatorDestroy_Hip(CeedOperator op) { CeedCallBackend(CeedOperatorGetData(op, &impl)); // Apply data + CeedCallBackend(CeedFree(&impl->skip_rstr_in)); for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) { CeedCallBackend(CeedVectorDestroy(&impl->e_vecs[i])); } @@ -95,8 +96,8 @@ static int CeedOperatorDestroy_Hip(CeedOperator op) { //------------------------------------------------------------------------------ // Setup infields or outfields //------------------------------------------------------------------------------ -static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool is_input, bool is_at_points, CeedVector *e_vecs, CeedVector *q_vecs, - CeedInt start_e, CeedInt num_fields, CeedInt Q, CeedInt num_elem) { +static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool is_input, bool is_at_points, bool *skip_rstr, CeedVector *e_vecs, + CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields, CeedInt Q, CeedInt num_elem) { Ceed ceed; CeedQFunctionField *qf_fields; CeedOperatorField *op_fields; @@ -182,6 +183,27 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool i break; } } + // Drop duplicate input restrictions + if (is_input) { + for (CeedInt i = 0; i < num_fields; i++) { + CeedVector vec_i; + CeedElemRestriction rstr_i; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i)); + for (CeedInt j = i + 1; j < num_fields; j++) { + CeedVector vec_j; + CeedElemRestriction rstr_j; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j)); + if (vec_i == vec_j && rstr_i == rstr_j) { + CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); + skip_rstr[j] = true; + } + } + } + } return CEED_ERROR_SUCCESS; } @@ -210,6 +232,7 @@ static int CeedOperatorSetup_Hip(CeedOperator op) { // Allocate CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out)); @@ -218,10 +241,11 @@ static int CeedOperatorSetup_Hip(CeedOperator op) { // Set up infield and outfield e_vecs and q_vecs // Infields - CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, true, false, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, Q, num_elem)); + CeedCallBackend( + CeedOperatorSetupFields_Hip(qf, op, true, false, impl->skip_rstr_in, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, Q, num_elem)); // Outfields CeedCallBackend( - CeedOperatorSetupFields_Hip(qf, op, false, false, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, Q, num_elem)); + CeedOperatorSetupFields_Hip(qf, op, false, false, NULL, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, Q, num_elem)); CeedCallBackend(CeedOperatorSetSetupDone(op)); return CEED_ERROR_SUCCESS; @@ -261,10 +285,10 @@ static inline int CeedOperatorSetupInputs_Hip(CeedInt num_input_fields, CeedQFun uint64_t state; CeedCallBackend(CeedVectorGetState(vec, &state)); - if (state != impl->input_states[i]) { + if (state != impl->input_states[i] && !impl->skip_rstr_in[i]) { CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs[i], request)); - impl->input_states[i] = state; } + impl->input_states[i] = state; // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); } @@ -473,6 +497,7 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) { // Allocate CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out)); @@ -481,9 +506,10 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) { // Set up infield and outfield e_vecs and q_vecs // Infields - CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, true, true, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, max_num_points, num_elem)); + CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, true, true, impl->skip_rstr_in, impl->e_vecs, impl->q_vecs_in, 0, num_input_fields, + max_num_points, num_elem)); // Outfields - CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, false, true, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, + CeedCallBackend(CeedOperatorSetupFields_Hip(qf, op, false, true, NULL, impl->e_vecs, impl->q_vecs_out, num_input_fields, num_output_fields, max_num_points, num_elem)); CeedCallBackend(CeedOperatorSetSetupDone(op)); diff --git a/backends/hip-ref/ceed-hip-ref.h b/backends/hip-ref/ceed-hip-ref.h index b0df567c55..5199ce8767 100644 --- a/backends/hip-ref/ceed-hip-ref.h +++ b/backends/hip-ref/ceed-hip-ref.h @@ -132,8 +132,9 @@ typedef struct { } CeedOperatorAssemble_Hip; typedef struct { - CeedVector *e_vecs; // E-vectors, inputs followed by outputs + bool *skip_rstr_in; uint64_t *input_states; // State tracking for passive inputs + CeedVector *e_vecs; // E-vectors, inputs followed by outputs CeedVector *q_vecs_in; // Input Q-vectors needed to apply operator CeedVector *q_vecs_out; // Output Q-vectors needed to apply operator CeedInt num_inputs, num_outputs; diff --git a/backends/opt/ceed-opt-operator.c b/backends/opt/ceed-opt-operator.c index db292879ce..eaacaedc12 100644 --- a/backends/opt/ceed-opt-operator.c +++ b/backends/opt/ceed-opt-operator.c @@ -139,6 +139,28 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i // Initialize E-vec arrays if (e_vecs[i]) CeedCallBackend(CeedVectorSetValue(e_vecs[i], 0.0)); } + // Drop duplicate input restrictions + if (is_input) { + for (CeedInt i = 0; i < num_fields; i++) { + CeedVector vec_i; + CeedElemRestriction rstr_i; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i)); + for (CeedInt j = i + 1; j < num_fields; j++) { + CeedVector vec_j; + CeedElemRestriction rstr_j; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j)); + if (vec_i == vec_j && rstr_i == rstr_j) { + CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); + CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i], &e_vecs_full[j])); + CeedCallBackend(CeedElemRestrictionDestroy(&block_rstr[j])); + } + } + } + } return CEED_ERROR_SUCCESS; } @@ -216,22 +238,23 @@ static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, CeedQFun CeedVector in_vec, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Opt *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { - uint64_t state; CeedEvalMode eval_mode; - CeedVector vec; CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { + uint64_t state; + CeedVector vec; + // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); if (vec != CEED_VECTOR_ACTIVE) { // Restrict CeedCallBackend(CeedVectorGetState(vec, &state)); - if (state != impl->input_states[i]) { + if (state != impl->input_states[i] && impl->block_rstr[i]) { CeedCallBackend(CeedElemRestrictionApply(impl->block_rstr[i], CEED_NOTRANSPOSE, vec, impl->e_vecs_full[i], request)); - impl->input_states[i] = state; } + impl->input_states[i] = state; // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data[i])); } else { @@ -272,7 +295,7 @@ static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunction CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Restrict block active input - if (is_active_input) { + if (is_active_input && impl->block_rstr[i]) { CeedCallBackend(CeedElemRestrictionApplyBlock(impl->block_rstr[i], e / block_size, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request)); } // Basis action diff --git a/backends/ref/ceed-ref-operator.c b/backends/ref/ceed-ref-operator.c index 0c472d1d3c..d605c943e7 100644 --- a/backends/ref/ceed-ref-operator.c +++ b/backends/ref/ceed-ref-operator.c @@ -16,7 +16,7 @@ //------------------------------------------------------------------------------ // Setup Input/Output Fields //------------------------------------------------------------------------------ -static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool is_input, CeedVector *e_vecs_full, CeedVector *e_vecs, +static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool is_input, bool *skip_rstr, CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields, CeedInt Q) { Ceed ceed; CeedSize e_size, q_size; @@ -78,6 +78,28 @@ static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool i break; } } + // Drop duplicate input restrictions + if (is_input) { + for (CeedInt i = 0; i < num_fields; i++) { + CeedVector vec_i; + CeedElemRestriction rstr_i; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i)); + for (CeedInt j = i + 1; j < num_fields; j++) { + CeedVector vec_j; + CeedElemRestriction rstr_j; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j)); + if (vec_i == vec_j && rstr_i == rstr_j) { + CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); + CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i], &e_vecs_full[j])); + skip_rstr[j] = true; + } + } + } + } return CEED_ERROR_SUCCESS; } @@ -105,6 +127,7 @@ static int CeedOperatorSetup_Ref(CeedOperator op) { // Allocate CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out)); @@ -116,10 +139,11 @@ static int CeedOperatorSetup_Ref(CeedOperator op) { // Set up infield and outfield e_vecs and q_vecs // Infields - CeedCallBackend(CeedOperatorSetupFields_Ref(qf, op, true, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0, num_input_fields, Q)); - // Outfields CeedCallBackend( - CeedOperatorSetupFields_Ref(qf, op, false, impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out, num_input_fields, num_output_fields, Q)); + CeedOperatorSetupFields_Ref(qf, op, true, impl->skip_rstr_in, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0, num_input_fields, Q)); + // Outfields + CeedCallBackend(CeedOperatorSetupFields_Ref(qf, op, false, NULL, impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out, num_input_fields, + num_output_fields, Q)); // Identity QFunctions if (impl->is_identity_qf) { @@ -167,11 +191,11 @@ static inline int CeedOperatorSetupInputs_Ref(CeedInt num_input_fields, CeedQFun // Restrict CeedCallBackend(CeedVectorGetState(vec, &state)); // Skip restriction if input is unchanged - if (state != impl->input_states[i] || vec == in_vec) { + if ((state != impl->input_states[i] || vec == in_vec) && !impl->skip_rstr_in[i]) { CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs_full[i], request)); - impl->input_states[i] = state; } + impl->input_states[i] = state; // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data_full[i])); } @@ -566,8 +590,8 @@ static int CeedOperatorLinearAssembleQFunctionUpdate_Ref(CeedOperator op, CeedVe //------------------------------------------------------------------------------ // Setup Input/Output Fields //------------------------------------------------------------------------------ -static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op, bool is_input, CeedVector *e_vecs_full, CeedVector *e_vecs, - CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields, CeedInt Q) { +static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op, bool is_input, bool *skip_rstr, CeedVector *e_vecs_full, + CeedVector *e_vecs, CeedVector *q_vecs, CeedInt start_e, CeedInt num_fields, CeedInt Q) { Ceed ceed; CeedSize e_size, q_size; CeedInt max_num_points, num_comp, size, P; @@ -661,6 +685,27 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op if (e_vecs[i]) CeedCallBackend(CeedVectorSetValue(e_vecs[i], 0.0)); if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorSetValue(q_vecs[i], 0.0)); } + // Drop duplicate input restrictions + if (is_input) { + for (CeedInt i = 0; i < num_fields; i++) { + CeedVector vec_i; + CeedElemRestriction rstr_i; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec_i)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr_i)); + for (CeedInt j = i + 1; j < num_fields; j++) { + CeedVector vec_j; + CeedElemRestriction rstr_j; + + CeedCallBackend(CeedOperatorFieldGetVector(op_fields[j], &vec_j)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[j], &rstr_j)); + if (vec_i == vec_j && rstr_i == rstr_j) { + CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); + skip_rstr[j] = true; + } + } + } + } return CEED_ERROR_SUCCESS; } @@ -688,6 +733,7 @@ static int CeedOperatorSetupAtPoints_Ref(CeedOperator op) { // Allocate CeedCallBackend(CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full)); + CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->skip_rstr_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->input_states)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in)); CeedCallBackend(CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out)); @@ -699,9 +745,10 @@ static int CeedOperatorSetupAtPoints_Ref(CeedOperator op) { // Set up infield and outfield pointer arrays // Infields - CeedCallBackend(CeedOperatorSetupFieldsAtPoints_Ref(qf, op, true, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0, num_input_fields, Q)); + CeedCallBackend(CeedOperatorSetupFieldsAtPoints_Ref(qf, op, true, impl->skip_rstr_in, impl->e_vecs_full, impl->e_vecs_in, impl->q_vecs_in, 0, + num_input_fields, Q)); // Outfields - CeedCallBackend(CeedOperatorSetupFieldsAtPoints_Ref(qf, op, false, impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out, num_input_fields, + CeedCallBackend(CeedOperatorSetupFieldsAtPoints_Ref(qf, op, false, NULL, impl->e_vecs_full, impl->e_vecs_out, impl->q_vecs_out, num_input_fields, num_output_fields, Q)); // Identity QFunctions @@ -741,7 +788,7 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Restrict block active input - if (is_active_input) { + if (is_active_input && !impl->skip_rstr_in[i]) { if (rstr_type == CEED_RESTRICTION_POINTS) { CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request)); } else { @@ -1341,6 +1388,7 @@ static int CeedOperatorDestroy_Ref(CeedOperator op) { CeedOperator_Ref *impl; CeedCallBackend(CeedOperatorGetData(op, &impl)); + CeedCallBackend(CeedFree(&impl->skip_rstr_in)); for (CeedInt i = 0; i < impl->num_inputs + impl->num_outputs; i++) { CeedCallBackend(CeedVectorDestroy(&impl->e_vecs_full[i])); } diff --git a/backends/ref/ceed-ref.h b/backends/ref/ceed-ref.h index 369a27c049..ff8e9fa773 100644 --- a/backends/ref/ceed-ref.h +++ b/backends/ref/ceed-ref.h @@ -49,8 +49,9 @@ typedef struct { typedef struct { bool is_identity_qf, is_identity_rstr_op; - CeedVector *e_vecs_full; /* Full E-vectors, inputs followed by outputs */ + bool *skip_rstr_in; uint64_t *input_states; /* State counter of inputs */ + CeedVector *e_vecs_full; /* Full E-vectors, inputs followed by outputs */ CeedVector *e_vecs_in; /* Single element input E-vectors */ CeedVector *e_vecs_out; /* Single element output E-vectors */ CeedVector *q_vecs_in; /* Single element input Q-vectors */