From 681d0ea73ee05192cf73f31e6e4a886b41175395 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 19 Aug 2024 14:05:02 -0600 Subject: [PATCH 1/6] op - ReferenceCopy for CeedOperatorFieldGet* --- backends/blocked/ceed-blocked-operator.c | 56 ++- .../cuda-gen/ceed-cuda-gen-operator-build.cpp | 17 + backends/cuda-gen/ceed-cuda-gen-operator.c | 36 +- backends/cuda-ref/ceed-cuda-ref-operator.c | 230 +++++++--- .../hip-gen/ceed-hip-gen-operator-build.cpp | 2 + backends/hip-ref/ceed-hip-ref-operator.c | 236 +++++++--- backends/occa/ceed-occa-basis.cpp | 8 +- backends/occa/ceed-occa-elem-restriction.cpp | 6 +- backends/occa/ceed-occa-operator-field.cpp | 6 +- backends/opt/ceed-opt-operator.c | 74 +-- backends/ref/ceed-ref-operator.c | 132 ++++-- .../ceed-sycl-gen-operator-build.sycl.cpp | 16 +- .../sycl-gen/ceed-sycl-gen-operator.sycl.cpp | 20 +- .../sycl-ref/ceed-sycl-ref-operator.sycl.cpp | 428 ++++++++++-------- backends/sycl-ref/ceed-sycl-ref.hpp | 8 +- doc/sphinx/source/releasenotes.md | 1 + examples/fluids/problems/advection.c | 10 +- examples/fluids/problems/newtonian.c | 10 +- examples/fluids/src/differential_filter.c | 5 +- examples/fluids/src/setuplibceed.c | 12 +- interface/ceed-operator.c | 74 ++- interface/ceed-preconditioning.c | 59 ++- 22 files changed, 989 insertions(+), 457 deletions(-) diff --git a/backends/blocked/ceed-blocked-operator.c b/backends/blocked/ceed-blocked-operator.c index 80b1f44865..c9f5ccfd46 100644 --- a/backends/blocked/ceed-blocked-operator.c +++ b/backends/blocked/ceed-blocked-operator.c @@ -105,6 +105,7 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo // Empty case - won't occur break; } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); CeedCallBackend(CeedElemRestrictionCreateVector(block_rstr[i + start_e], NULL, &e_vecs_full[i + start_e])); } @@ -122,6 +123,7 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); CeedCallBackend(CeedBasisGetNumNodes(basis, &P)); CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); + CeedCallBackend(CeedBasisDestroy(&basis)); e_size = (CeedSize)P * num_comp * block_size; CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i])); q_size = (CeedSize)Q * size * block_size; @@ -132,6 +134,7 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo q_size = (CeedSize)Q * block_size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } } @@ -154,7 +157,11 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e])); skip_rstr[j] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } else { for (CeedInt i = num_fields - 1; i >= 0; i--) { @@ -176,7 +183,11 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo apply_add_basis[i] = true; e_data_out_indices[j] = i; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } return CEED_ERROR_SUCCESS; @@ -259,13 +270,15 @@ static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, Ceed CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2 * CEED_FIELD_MAX], CeedOperator_Blocked *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { + bool is_active; uint64_t state; CeedEvalMode eval_mode; CeedVector vec; // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) { + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) { if (skip_active) continue; else vec = in_vec; } @@ -282,6 +295,7 @@ static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, Ceed // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data_full[i])); } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } return CEED_ERROR_SUCCESS; } @@ -300,15 +314,19 @@ static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, CeedQFunc // Skip active input if (skip_active) { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (is_active) continue; } // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Basis action @@ -324,6 +342,7 @@ static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, CeedQFunc CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data_full[i][(CeedSize)e * elem_size * num_comp])); CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs_in[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: break; // No action @@ -347,6 +366,7 @@ static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, CeedQFun // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); // Basis action switch (eval_mode) { @@ -365,6 +385,7 @@ static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, CeedQFun } else { CeedCallBackend(CeedBasisApply(basis, block_size, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs_out[i])); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { @@ -386,10 +407,13 @@ static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields, Ce // Skip active inputs if (skip_active) { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (is_active) continue; } CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip @@ -470,6 +494,7 @@ static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, Ceed // Output restriction for (CeedInt i = 0; i < num_output_fields; i++) { + bool is_active; CeedVector vec; if (impl->skip_rstr_out[i]) continue; @@ -477,11 +502,13 @@ static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, Ceed CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_full[i + impl->num_inputs], &e_data_full[i + num_input_fields])); // Get output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; // Active - if (vec == CEED_VECTOR_ACTIVE) vec = out_vec; + if (is_active) vec = out_vec; // Restrict CeedCallBackend( CeedElemRestrictionApply(impl->block_rstr[i + impl->num_inputs], CEED_TRANSPOSE, impl->e_vecs_full[i + impl->num_inputs], vec, request)); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } // Restore input arrays @@ -533,14 +560,14 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(CeedOperator o CeedInt field_size; CeedVector vec; - // Get input vector - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Check if active input + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); qf_size_in += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_in = qf_size_in; @@ -552,13 +579,13 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(CeedOperator o CeedInt field_size; CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size)); qf_size_out += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_out = qf_size_out; @@ -601,13 +628,15 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(CeedOperator o // Assemble QFunction for (CeedInt i = 0; i < num_input_fields; i++) { + bool is_active; CeedInt field_size; CeedVector vec; - // Get input vector - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Check if active input - if (vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (!is_active) continue; CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); for (CeedInt field = 0; field < field_size; field++) { // Set current portion of input to 1.0 @@ -633,6 +662,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(CeedOperator o CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &field_size)); l_vec_array += field_size * Q * block_size; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, Q * block_size, impl->q_vecs_in, impl->q_vecs_out)); @@ -664,12 +694,12 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(CeedOperator o for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL)); } + CeedCallBackend(CeedVectorDestroy(&vec)); } } diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp index 40ddf59ad1..c744ea4254 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp +++ b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp @@ -55,6 +55,7 @@ static int CeedOperatorBuildKernelData_Cuda_gen(Ceed ceed, CeedInt num_input_fie CeedCheck(*Q_1d == 0 || field_Q_1d == *Q_1d, ceed, CEED_ERROR_BACKEND, "Quadrature spaces must be compatible"); *Q_1d = field_Q_1d; } + CeedCallBackend(CeedBasisDestroy(&basis)); } for (CeedInt i = 0; i < num_output_fields; i++) { CeedBasis basis; @@ -77,6 +78,7 @@ static int CeedOperatorBuildKernelData_Cuda_gen(Ceed ceed, CeedInt num_input_fie CeedCheck(*Q_1d == 0 || field_Q_1d == *Q_1d, ceed, CEED_ERROR_BACKEND, "Quadrature spaces must be compatible"); *Q_1d = field_Q_1d; } + CeedCallBackend(CeedBasisDestroy(&basis)); } // Only use 3D collocated gradient parallelization strategy when gradient is computed @@ -96,6 +98,7 @@ static int CeedOperatorBuildKernelData_Cuda_gen(Ceed ceed, CeedInt num_input_fie CeedCallBackend(CeedBasisGetData(basis, &basis_data)); *use_3d_slices = basis_data->d_collo_grad_1d && (was_grad_found ? *use_3d_slices : true); was_grad_found = true; + CeedCallBackend(CeedBasisDestroy(&basis)); } } for (CeedInt i = 0; i < num_output_fields; i++) { @@ -110,6 +113,7 @@ static int CeedOperatorBuildKernelData_Cuda_gen(Ceed ceed, CeedInt num_input_fie CeedCallBackend(CeedBasisGetData(basis, &basis_data)); *use_3d_slices = basis_data->d_collo_grad_1d && (was_grad_found ? *use_3d_slices : true); was_grad_found = true; + CeedCallBackend(CeedBasisDestroy(&basis)); } } } @@ -138,6 +142,7 @@ static int CeedOperatorBuildKernelFieldData_Cuda_gen(std::ostringstream &code, C CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedOperatorFieldGetBasis(op_field, &basis)); if (basis != CEED_BASIS_NONE) { CeedCallBackend(CeedBasisGetData(basis, &basis_data)); @@ -150,6 +155,7 @@ static int CeedOperatorBuildKernelFieldData_Cuda_gen(std::ostringstream &code, C code << " const CeedInt " << P_name << " = " << (basis == CEED_BASIS_NONE ? Q_1d : P_1d) << ";\n"; code << " const CeedInt num_comp" << var_suffix << " = " << num_comp << ";\n"; } + CeedCallBackend(CeedBasisDestroy(&basis)); // Load basis data code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; @@ -224,6 +230,7 @@ static int CeedOperatorBuildKernelRestriction_Cuda_gen(std::ostringstream &code, if (basis != CEED_BASIS_NONE) { CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); } + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_field, &eval_mode)); // Restriction @@ -302,6 +309,7 @@ static int CeedOperatorBuildKernelRestriction_Cuda_gen(std::ostringstream &code, << strides[2] << ">(data, elem, r_e" << var_suffix << ", d" << var_suffix << ");\n"; } } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); return CEED_ERROR_SUCCESS; } @@ -324,6 +332,7 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedOperatorFieldGetBasis(op_field, &basis)); if (basis != CEED_BASIS_NONE) { CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); @@ -401,6 +410,7 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO // LCOV_EXCL_STOP } } + CeedCallBackend(CeedBasisDestroy(&basis)); return CEED_ERROR_SUCCESS; } @@ -489,6 +499,7 @@ static int CeedOperatorBuildKernelQFunction_Cuda_gen(std::ostringstream &code, C code << " readSliceQuadsOffset3d(data, l_size" << var_suffix << ", elem, q, indices.inputs[" << i << "], d" << var_suffix << ", r_s" << var_suffix << ");\n"; } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); break; case CEED_EVAL_INTERP: code << " CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; @@ -809,6 +820,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) { CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); max_rstr_buffer_size = CeedIntMax(max_rstr_buffer_size, num_comp * elem_size); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } for (CeedInt i = 0; i < num_output_fields; i++) { CeedInt num_comp, elem_size; @@ -818,6 +830,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) { CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); max_rstr_buffer_size = CeedIntMax(max_rstr_buffer_size, num_comp * elem_size); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } code << " // Scratch restriction buffer space\n"; code << " CeedScalar r_e_scratch[" << max_rstr_buffer_size << "];\n"; @@ -858,7 +871,11 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) { input_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } diff --git a/backends/cuda-gen/ceed-cuda-gen-operator.c b/backends/cuda-gen/ceed-cuda-gen-operator.c index 840d97afb9..4c235984fd 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator.c +++ b/backends/cuda-gen/ceed-cuda-gen-operator.c @@ -133,30 +133,35 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec, // Input vectors for (CeedInt i = 0; i < num_input_fields; i++) { - CeedVector vec; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip data->fields.inputs[i] = NULL; } else { + bool is_active; + CeedVector vec; + // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = input_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = input_vec; CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } // Output vectors for (CeedInt i = 0; i < num_output_fields; i++) { - CeedVector vec; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip data->fields.outputs[i] = NULL; } else { + bool is_active; + CeedVector vec; + // Get output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = output_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = output_vec; output_vecs[i] = vec; // Check for multiple output modes CeedInt index = -1; @@ -172,6 +177,7 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec, } else { data->fields.outputs[i] = data->fields.outputs[index]; } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -203,26 +209,31 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec, // Restore input arrays for (CeedInt i = 0; i < num_input_fields; i++) { - CeedVector vec; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { + bool is_active; + CeedVector vec; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = input_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = input_vec; CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } // Restore output arrays for (CeedInt i = 0; i < num_output_fields; i++) { - CeedVector vec; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { + bool is_active; + CeedVector vec; + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = output_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = output_vec; // Check for multiple output modes CeedInt index = -1; for (CeedInt j = 0; j < i; j++) { @@ -234,6 +245,7 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec, if (index == -1) { CeedCallBackend(CeedVectorRestoreArray(vec, &data->fields.outputs[i])); } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index 986d240dff..ceb940dad6 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -133,6 +133,7 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool // Input passive vectorr with CEED_EVAL_NONE and strided restriction with CEED_STRIDES_BACKEND CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); skip_e_vec = (is_input && is_active) || (is_active && eval_mode != CEED_EVAL_NONE) || (eval_mode == CEED_EVAL_WEIGHT); @@ -145,6 +146,7 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool } else { CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs[i])); } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); switch (eval_mode) { case CEED_EVAL_NONE: @@ -171,6 +173,7 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool } else { CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; } } @@ -193,7 +196,11 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool if (e_vecs[i]) CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); skip_rstr[j] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } else { for (CeedInt i = num_fields - 1; i >= 0; i--) { @@ -213,7 +220,11 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool skip_rstr[j] = true; apply_add_basis[i] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } return CEED_ERROR_SUCCESS; @@ -279,7 +290,11 @@ static int CeedOperatorSetup_Cuda(CeedOperator op) { impl->input_field_order[curr_index] = i; curr_index++; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec_i)); - if (vec_i == CEED_VECTOR_NONE) continue; // CEED_EVAL_WEIGHT + if (vec_i == CEED_VECTOR_NONE) { + // CEED_EVAL_WEIGHT + CeedCallBackend(CeedVectorDestroy(&vec_i)); + continue; + }; CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr_i)); CeedCallBackend(CeedElemRestrictionGetEVectorSize(rstr_i, &e_vec_len_i)); impl->max_active_e_vec_len = e_vec_len_i > impl->max_active_e_vec_len ? e_vec_len_i : impl->max_active_e_vec_len; @@ -294,7 +309,11 @@ static int CeedOperatorSetup_Cuda(CeedOperator op) { impl->input_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } { @@ -326,7 +345,11 @@ static int CeedOperatorSetup_Cuda(CeedOperator op) { impl->output_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } CeedCallBackend(CeedOperatorSetSetupDone(op)); @@ -363,10 +386,12 @@ static inline int CeedOperatorInputRestrict_Cuda(CeedOperatorField op_input_fiel CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_field, &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, l_vec, e_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } impl->input_states[input_field] = state; } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -411,11 +436,13 @@ static inline int CeedOperatorInputBasis_Cuda(CeedOperatorField op_input_field, CeedCallBackend(CeedOperatorFieldGetBasis(op_input_field, &basis)); CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, eval_mode, e_vec, q_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } case CEED_EVAL_WEIGHT: break; // No action } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -449,6 +476,7 @@ static inline int CeedOperatorInputRestore_Cuda(CeedOperatorField op_input_field CeedCallBackend(CeedVectorRestoreArrayRead(l_vec, &e_vec_array)); } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -510,12 +538,10 @@ static int CeedOperatorApplyAdd_Cuda(CeedOperator op, CeedVector in_vec, CeedVec // Output basis and restriction for (CeedInt i = 0; i < num_output_fields; i++) { - bool is_active = false; - CeedInt field = impl->output_field_order[i]; - CeedEvalMode eval_mode; - CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; - CeedElemRestriction elem_rstr; - CeedBasis basis; + bool is_active = false; + CeedInt field = impl->output_field_order[i]; + CeedEvalMode eval_mode; + CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; // Output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[field], &l_vec)); @@ -533,14 +559,18 @@ static int CeedOperatorApplyAdd_Cuda(CeedOperator op, CeedVector in_vec, CeedVec case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[field], &basis)); if (impl->apply_add_basis_out[field]) { CeedCallBackend(CeedBasisApplyAdd(basis, num_elem, CEED_TRANSPOSE, eval_mode, q_vec, e_vec)); } else { CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, eval_mode, q_vec, e_vec)); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); @@ -557,9 +587,18 @@ static int CeedOperatorApplyAdd_Cuda(CeedOperator op, CeedVector in_vec, CeedVec } // Restrict - if (impl->skip_rstr_out[field]) continue; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); - CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + if (impl->skip_rstr_out[field]) { + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); + continue; + } + { + CeedElemRestriction elem_rstr; + + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); + CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Return work vector @@ -641,7 +680,11 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) { impl->input_field_order[curr_index] = i; curr_index++; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec_i)); - if (vec_i == CEED_VECTOR_NONE) continue; // CEED_EVAL_WEIGHT + if (vec_i == CEED_VECTOR_NONE) { + // CEED_EVAL_WEIGHT + CeedCallBackend(CeedVectorDestroy(&vec_i)); + continue; + }; CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr_i)); CeedCallBackend(CeedElemRestrictionGetEVectorSize(rstr_i, &e_vec_len_i)); impl->max_active_e_vec_len = e_vec_len_i > impl->max_active_e_vec_len ? e_vec_len_i : impl->max_active_e_vec_len; @@ -656,7 +699,11 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) { impl->input_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } { @@ -688,10 +735,13 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) { impl->output_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } - CeedCallBackend(CeedOperatorSetSetupDone(op)); return CEED_ERROR_SUCCESS; } @@ -737,11 +787,13 @@ static inline int CeedOperatorInputBasisAtPoints_Cuda(CeedOperatorField op_input CeedCallBackend(CeedOperatorFieldGetBasis(op_input_field, &basis)); CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, e_vec, q_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } case CEED_EVAL_WEIGHT: break; // No action } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -817,12 +869,10 @@ static int CeedOperatorApplyAddAtPoints_Cuda(CeedOperator op, CeedVector in_vec, // Output basis and restriction for (CeedInt i = 0; i < num_output_fields; i++) { - bool is_active = false; - CeedInt field = impl->output_field_order[i]; - CeedEvalMode eval_mode; - CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; - CeedElemRestriction elem_rstr; - CeedBasis basis; + bool is_active = false; + CeedInt field = impl->output_field_order[i]; + CeedEvalMode eval_mode; + CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; // Output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[field], &l_vec)); @@ -840,14 +890,18 @@ static int CeedOperatorApplyAddAtPoints_Cuda(CeedOperator op, CeedVector in_vec, case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[field], &basis)); if (impl->apply_add_basis_out[field]) { CeedCallBackend(CeedBasisApplyAddAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); } else { CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); @@ -864,9 +918,18 @@ static int CeedOperatorApplyAddAtPoints_Cuda(CeedOperator op, CeedVector in_vec, } // Restrict - if (impl->skip_rstr_out[field]) continue; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); - CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + if (impl->skip_rstr_out[field]) { + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); + continue; + } + { + CeedElemRestriction elem_rstr; + + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); + CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Restore work vector @@ -931,6 +994,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Cuda(CeedOperator op, num_active_in += size; CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &q_vec_array)); } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } impl->num_active_in = num_active_in; impl->qf_active_in = active_inputs; @@ -947,6 +1011,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Cuda(CeedOperator op, CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); num_active_out += size; } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } impl->num_active_out = num_active_out; } @@ -980,14 +1045,14 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Cuda(CeedOperator op, for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector l_vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &l_vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &l_vec)); if (l_vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, CEED_USE_POINTER, assembled_array)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &size)); assembled_array += size * Q * num_elem; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, Q * num_elem, impl->q_vecs_in, impl->q_vecs_out)); @@ -1001,6 +1066,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Cuda(CeedOperator op, if (l_vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, NULL)); } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Restore input arrays @@ -1053,13 +1119,14 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedBasis basis; CeedEvalMode eval_mode; + CeedBasis basis; CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator diagonal assembly with multiple active bases"); - basis_in = basis; + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { @@ -1069,6 +1136,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) { num_eval_modes_in += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis @@ -1085,7 +1153,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator diagonal assembly with multiple active bases"); - basis_out = basis; + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { @@ -1095,6 +1164,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) { num_eval_modes_out += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Operator data struct @@ -1206,6 +1276,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) { CeedCallCuda(ceed, cudaMemcpy(diag->d_eval_modes_out, eval_modes_out, num_eval_modes_out * eval_modes_bytes, cudaMemcpyHostToDevice)); CeedCallBackend(CeedFree(&eval_modes_in)); CeedCallBackend(CeedFree(&eval_modes_out)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1237,14 +1309,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op, CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedEvalMode eval_mode; + CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { num_eval_modes_in += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis @@ -1256,14 +1332,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op, CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedEvalMode eval_mode; + CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { num_eval_modes_out += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Operator data struct @@ -1287,6 +1367,8 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op, CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, *module, "LinearDiagonal", is_point_block ? &diag->LinearPointBlock : &diag->LinearDiagonal)); CeedCallBackend(CeedFree(&diagonal_kernel_path)); CeedCallBackend(CeedFree(&diagonal_kernel_source)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1338,6 +1420,8 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, CeedVec CeedCallBackend(CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag->point_block_diag_rstr)); CeedCallBackend(CeedElemRestrictionCreateVector(diag->point_block_diag_rstr, NULL, &diag->point_block_elem_diag)); } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); diag_rstr = is_point_block ? diag->point_block_diag_rstr : diag->diag_rstr; elem_diag = is_point_block ? diag->point_block_elem_diag : diag->elem_diag; CeedCallBackend(CeedVectorSetValue(elem_diag, 0.0)); @@ -1423,13 +1507,17 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee CeedCallBackend(CeedOperatorFieldGetVector(input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedBasis basis; - CeedEvalMode eval_mode; + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis)); CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); - basis_in = basis; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in)); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &elem_rstr)); + if (!rstr_in) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); @@ -1444,6 +1532,7 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee num_eval_modes_in += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis; basis_out and rstr_out only used if same as input, TODO @@ -1453,14 +1542,18 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee CeedCallBackend(CeedOperatorFieldGetVector(output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedBasis basis; - CeedEvalMode eval_mode; + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis)); CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); - basis_out = basis; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out)); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &elem_rstr)); + if (!rstr_out) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_out)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); @@ -1477,6 +1570,7 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee num_eval_modes_out += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(num_eval_modes_in > 0 && num_eval_modes_out > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); @@ -1579,6 +1673,10 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee CeedCallBackend(CeedFree(&identity)); } CeedCallBackend(CeedFree(&eval_modes_out)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1683,6 +1781,8 @@ static int CeedSingleOperatorAssemble_Cuda(CeedOperator op, CeedInt offset, Ceed CeedCallBackend(CeedElemRestrictionRestoreCurlOrientations(rstr_out, &curl_orients_out)); } } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); return CEED_ERROR_SUCCESS; } @@ -1754,10 +1854,13 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C // Clear active input Qvecs for (CeedInt i = 0; i < num_input_fields; i++) { - CeedVector vec; + bool is_active = false; + CeedVector l_vec; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); } @@ -1776,15 +1879,17 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C // Loop over active fields for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active_at_points = true; + bool is_active = false, is_active_at_points = true; CeedInt elem_size = 1, num_comp_active = 1, e_vec_size = 0; CeedRestrictionType rstr_type; CeedVector l_vec; CeedElemRestriction elem_rstr; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); // -- Skip non-active input - if (l_vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; // -- Get active restriction type CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); @@ -1793,18 +1898,19 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C if (!is_active_at_points) CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); else elem_size = max_num_points; CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp_active)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); e_vec_size = elem_size * num_comp_active; for (CeedInt s = 0; s < e_vec_size; s++) { - bool is_active_input = false; + bool is_active = false; CeedEvalMode eval_mode; CeedVector l_vec, q_vec = impl->q_vecs_in[i]; - CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); // Skip non-active input - is_active_input = l_vec == CEED_VECTOR_ACTIVE; - if (!is_active_input) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; // Update unit vector if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); @@ -1824,11 +1930,15 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCallBackend( CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, active_e_vec_in, q_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } case CEED_EVAL_WEIGHT: break; // No action } @@ -1838,18 +1948,18 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C // Output basis apply if needed for (CeedInt j = 0; j < num_output_fields; j++) { - bool is_active_output = false; - CeedInt elem_size = 0; + bool is_active = false; + CeedInt elem_size = 0; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; CeedVector l_vec, e_vec = impl->e_vecs_out[j], q_vec = impl->q_vecs_out[j]; CeedElemRestriction elem_rstr; - CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &l_vec)); // ---- Skip non-active output - is_active_output = l_vec == CEED_VECTOR_ACTIVE; - if (!is_active_output) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; if (!e_vec) e_vec = active_e_vec_out; // ---- Check if elem size matches @@ -1881,10 +1991,14 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis)); CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); @@ -1896,8 +2010,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C CeedCallBackend(CeedVectorPointwiseMult(e_vec, active_e_vec_in, e_vec)); // Restrict - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, assembled, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); // Reset q_vec for if (eval_mode == CEED_EVAL_NONE) { diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp index c3298b1b27..ee0dea2609 100644 --- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp +++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp @@ -818,6 +818,7 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) { CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); max_rstr_buffer_size = CeedIntMax(max_rstr_buffer_size, num_comp * elem_size); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } for (CeedInt i = 0; i < num_output_fields; i++) { CeedInt num_comp, elem_size; @@ -827,6 +828,7 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) { CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); max_rstr_buffer_size = CeedIntMax(max_rstr_buffer_size, num_comp * elem_size); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } code << " // Scratch restriction buffer space\n"; code << " CeedScalar r_e_scratch[" << max_rstr_buffer_size << "];\n"; diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 6525c0b47a..c6307037fa 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -132,6 +132,7 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool i // Input passive vectorr with CEED_EVAL_NONE and strided restriction with CEED_STRIDES_BACKEND CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); skip_e_vec = (is_input && is_active) || (is_active && eval_mode != CEED_EVAL_NONE) || (eval_mode == CEED_EVAL_WEIGHT); @@ -144,6 +145,7 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool i } else { CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs[i])); } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); switch (eval_mode) { case CEED_EVAL_NONE: @@ -170,6 +172,7 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool i } else { CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; } } @@ -192,7 +195,11 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool i if (e_vecs[i]) CeedCallBackend(CeedVectorReferenceCopy(e_vecs[i], &e_vecs[j])); skip_rstr[j] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } else { for (CeedInt i = num_fields - 1; i >= 0; i--) { @@ -212,7 +219,11 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool i skip_rstr[j] = true; apply_add_basis[i] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } return CEED_ERROR_SUCCESS; @@ -278,7 +289,11 @@ static int CeedOperatorSetup_Hip(CeedOperator op) { impl->input_field_order[curr_index] = i; curr_index++; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec_i)); - if (vec_i == CEED_VECTOR_NONE) continue; // CEED_EVAL_WEIGHT + if (vec_i == CEED_VECTOR_NONE) { + // CEED_EVAL_WEIGHT + CeedCallBackend(CeedVectorDestroy(&vec_i)); + continue; + }; CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr_i)); CeedCallBackend(CeedElemRestrictionGetEVectorSize(rstr_i, &e_vec_len_i)); impl->max_active_e_vec_len = e_vec_len_i > impl->max_active_e_vec_len ? e_vec_len_i : impl->max_active_e_vec_len; @@ -293,7 +308,11 @@ static int CeedOperatorSetup_Hip(CeedOperator op) { impl->input_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } { @@ -325,7 +344,11 @@ static int CeedOperatorSetup_Hip(CeedOperator op) { impl->output_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } CeedCallBackend(CeedOperatorSetSetupDone(op)); @@ -362,10 +385,12 @@ static inline int CeedOperatorInputRestrict_Hip(CeedOperatorField op_input_field CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_field, &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, l_vec, e_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } impl->input_states[input_field] = state; } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -410,11 +435,13 @@ static inline int CeedOperatorInputBasis_Hip(CeedOperatorField op_input_field, C CeedCallBackend(CeedOperatorFieldGetBasis(op_input_field, &basis)); CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, eval_mode, e_vec, q_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } case CEED_EVAL_WEIGHT: break; // No action } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -448,6 +475,7 @@ static inline int CeedOperatorInputRestore_Hip(CeedOperatorField op_input_field, CeedCallBackend(CeedVectorRestoreArrayRead(l_vec, &e_vec_array)); } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -508,12 +536,10 @@ static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector in_vec, CeedVect // Output basis and restriction for (CeedInt i = 0; i < num_output_fields; i++) { - bool is_active = false; - CeedInt field = impl->output_field_order[i]; - CeedEvalMode eval_mode; - CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; - CeedElemRestriction elem_rstr; - CeedBasis basis; + bool is_active = false; + CeedInt field = impl->output_field_order[i]; + CeedEvalMode eval_mode; + CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; // Output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[field], &l_vec)); @@ -531,14 +557,18 @@ static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector in_vec, CeedVect case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[field], &basis)); if (impl->apply_add_basis_out[field]) { CeedCallBackend(CeedBasisApplyAdd(basis, num_elem, CEED_TRANSPOSE, eval_mode, q_vec, e_vec)); } else { CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, eval_mode, q_vec, e_vec)); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); @@ -555,9 +585,18 @@ static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector in_vec, CeedVect } // Restrict - if (impl->skip_rstr_out[field]) continue; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); - CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + if (impl->skip_rstr_out[field]) { + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); + continue; + } + { + CeedElemRestriction elem_rstr; + + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); + CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Return work vector @@ -639,7 +678,11 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) { impl->input_field_order[curr_index] = i; curr_index++; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec_i)); - if (vec_i == CEED_VECTOR_NONE) continue; // CEED_EVAL_WEIGHT + if (vec_i == CEED_VECTOR_NONE) { + // CEED_EVAL_WEIGHT + CeedCallBackend(CeedVectorDestroy(&vec_i)); + continue; + }; CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr_i)); CeedCallBackend(CeedElemRestrictionGetEVectorSize(rstr_i, &e_vec_len_i)); impl->max_active_e_vec_len = e_vec_len_i > impl->max_active_e_vec_len ? e_vec_len_i : impl->max_active_e_vec_len; @@ -654,7 +697,11 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) { impl->input_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } { @@ -686,10 +733,13 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) { impl->output_field_order[curr_index] = j; curr_index++; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } - CeedCallBackend(CeedOperatorSetSetupDone(op)); return CEED_ERROR_SUCCESS; } @@ -735,11 +785,13 @@ static inline int CeedOperatorInputBasisAtPoints_Hip(CeedOperatorField op_input_ CeedCallBackend(CeedOperatorFieldGetBasis(op_input_field, &basis)); CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, e_vec, q_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } case CEED_EVAL_WEIGHT: break; // No action } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); return CEED_ERROR_SUCCESS; } @@ -814,12 +866,10 @@ static int CeedOperatorApplyAddAtPoints_Hip(CeedOperator op, CeedVector in_vec, // Output basis and restriction for (CeedInt i = 0; i < num_output_fields; i++) { - bool is_active = false; - CeedInt field = impl->output_field_order[i]; - CeedEvalMode eval_mode; - CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; - CeedElemRestriction elem_rstr; - CeedBasis basis; + bool is_active = false; + CeedInt field = impl->output_field_order[i]; + CeedEvalMode eval_mode; + CeedVector l_vec, e_vec = impl->e_vecs_out[field], q_vec = impl->q_vecs_out[field]; // Output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[field], &l_vec)); @@ -837,14 +887,18 @@ static int CeedOperatorApplyAddAtPoints_Hip(CeedOperator op, CeedVector in_vec, case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[field], &basis)); if (impl->apply_add_basis_out[field]) { CeedCallBackend(CeedBasisApplyAddAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); } else { CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); @@ -861,9 +915,18 @@ static int CeedOperatorApplyAddAtPoints_Hip(CeedOperator op, CeedVector in_vec, } // Restrict - if (impl->skip_rstr_out[field]) continue; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); - CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + if (impl->skip_rstr_out[field]) { + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); + continue; + } + { + CeedElemRestriction elem_rstr; + + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field], &elem_rstr)); + CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, l_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Restore work vector @@ -928,6 +991,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Hip(CeedOperator op, b num_active_in += size; CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &q_vec_array)); } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } impl->num_active_in = num_active_in; impl->qf_active_in = active_inputs; @@ -944,6 +1008,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Hip(CeedOperator op, b CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); num_active_out += size; } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } impl->num_active_out = num_active_out; } @@ -977,14 +1042,14 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Hip(CeedOperator op, b for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector l_vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &l_vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &l_vec)); if (l_vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, CEED_USE_POINTER, assembled_array)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &size)); assembled_array += size * Q * num_elem; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, Q * num_elem, impl->q_vecs_in, impl->q_vecs_out)); @@ -998,6 +1063,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Hip(CeedOperator op, b if (l_vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, NULL)); } + CeedCallBackend(CeedVectorDestroy(&l_vec)); } // Restore input arrays @@ -1050,13 +1116,14 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedBasis basis; CeedEvalMode eval_mode; + CeedBasis basis; CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator diagonal assembly with multiple active bases"); - basis_in = basis; + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { @@ -1066,6 +1133,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op) { num_eval_modes_in += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis @@ -1082,7 +1150,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator diagonal assembly with multiple active bases"); - basis_out = basis; + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { @@ -1092,6 +1161,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op) { num_eval_modes_out += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Operator data struct @@ -1203,6 +1273,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op) { CeedCallHip(ceed, hipMemcpy(diag->d_eval_modes_out, eval_modes_out, num_eval_modes_out * eval_modes_bytes, hipMemcpyHostToDevice)); CeedCallBackend(CeedFree(&eval_modes_in)); CeedCallBackend(CeedFree(&eval_modes_out)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1234,14 +1306,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Hip(CeedOperator op, CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedEvalMode eval_mode; + CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { num_eval_modes_in += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis @@ -1253,14 +1329,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Hip(CeedOperator op, CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedEvalMode eval_mode; + CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp)); if (eval_mode != CEED_EVAL_WEIGHT) { num_eval_modes_out += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Operator data struct @@ -1284,6 +1364,8 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Hip(CeedOperator op, CeedCallHip(ceed, CeedGetKernel_Hip(ceed, *module, "LinearDiagonal", is_point_block ? &diag->LinearPointBlock : &diag->LinearDiagonal)); CeedCallBackend(CeedFree(&diagonal_kernel_path)); CeedCallBackend(CeedFree(&diagonal_kernel_source)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1335,6 +1417,8 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, CeedVect CeedCallBackend(CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag->point_block_diag_rstr)); CeedCallBackend(CeedElemRestrictionCreateVector(diag->point_block_diag_rstr, NULL, &diag->point_block_elem_diag)); } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); diag_rstr = is_point_block ? diag->point_block_diag_rstr : diag->diag_rstr; elem_diag = is_point_block ? diag->point_block_elem_diag : diag->elem_diag; CeedCallBackend(CeedVectorSetValue(elem_diag, 0.0)); @@ -1393,7 +1477,7 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op, //------------------------------------------------------------------------------ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceedsize_idx) { Ceed ceed; - Ceed_Hip *Hip_data; + Ceed_Hip *hip_data; char *assembly_kernel_source; const char *assembly_kernel_path; CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0; @@ -1420,13 +1504,17 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed CeedCallBackend(CeedOperatorFieldGetVector(input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedBasis basis; - CeedEvalMode eval_mode; + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis)); CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); - basis_in = basis; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in)); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &elem_rstr)); + if (!rstr_in) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in)); if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in; else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in)); @@ -1441,6 +1529,7 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed num_eval_modes_in += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis; basis_out and rstr_out only used if same as input, TODO @@ -1450,14 +1539,18 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed CeedCallBackend(CeedOperatorFieldGetVector(output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedBasis basis; - CeedEvalMode eval_mode; + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis)); CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); - basis_out = basis; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out)); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &elem_rstr)); + if (!rstr_out) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_out)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out)); if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out; else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out)); @@ -1474,6 +1567,7 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed num_eval_modes_out += q_comp; } } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(num_eval_modes_in > 0 && num_eval_modes_out > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); @@ -1483,8 +1577,8 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed asmb->block_size_x = elem_size_in; asmb->block_size_y = elem_size_out; - CeedCallBackend(CeedGetData(ceed, &Hip_data)); - bool fallback = asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block > Hip_data->device_prop.maxThreadsPerBlock; + CeedCallBackend(CeedGetData(ceed, &hip_data)); + bool fallback = asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block > hip_data->device_prop.maxThreadsPerBlock; if (fallback) { // Use fallback kernel with 1D threadblock @@ -1576,6 +1670,10 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed CeedCallBackend(CeedFree(&identity)); } CeedCallBackend(CeedFree(&eval_modes_out)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1680,6 +1778,8 @@ static int CeedSingleOperatorAssemble_Hip(CeedOperator op, CeedInt offset, CeedV CeedCallBackend(CeedElemRestrictionRestoreCurlOrientations(rstr_out, &curl_orients_out)); } } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); return CEED_ERROR_SUCCESS; } @@ -1751,10 +1851,13 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce // Clear active input Qvecs for (CeedInt i = 0; i < num_input_fields; i++) { - CeedVector vec; + bool is_active = false; + CeedVector l_vec; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); } @@ -1773,15 +1876,17 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce // Loop over active fields for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active_at_points = true; + bool is_active = false, is_active_at_points = true; CeedInt elem_size = 1, num_comp_active = 1, e_vec_size = 0; CeedRestrictionType rstr_type; CeedVector l_vec; CeedElemRestriction elem_rstr; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); // -- Skip non-active input - if (l_vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; // -- Get active restriction type CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); @@ -1790,18 +1895,19 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce if (!is_active_at_points) CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); else elem_size = max_num_points; CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp_active)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); e_vec_size = elem_size * num_comp_active; for (CeedInt s = 0; s < e_vec_size; s++) { - bool is_active_input = false; + bool is_active = false; CeedEvalMode eval_mode; CeedVector l_vec, q_vec = impl->q_vecs_in[i]; - CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); // Skip non-active input - is_active_input = l_vec == CEED_VECTOR_ACTIVE; - if (!is_active_input) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; // Update unit vector if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); @@ -1821,11 +1927,15 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCallBackend( CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, active_e_vec_in, q_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } case CEED_EVAL_WEIGHT: break; // No action } @@ -1835,18 +1945,18 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce // Output basis apply if needed for (CeedInt j = 0; j < num_output_fields; j++) { - bool is_active_output = false; - CeedInt elem_size = 0; + bool is_active = false; + CeedInt elem_size = 0; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; CeedVector l_vec, e_vec = impl->e_vecs_out[j], q_vec = impl->q_vecs_out[j]; CeedElemRestriction elem_rstr; - CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &l_vec)); // ---- Skip non-active output - is_active_output = l_vec == CEED_VECTOR_ACTIVE; - if (!is_active_output) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &l_vec)); + is_active = l_vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&l_vec)); + if (!is_active) continue; if (!e_vec) e_vec = active_e_vec_out; // ---- Check if elem size matches @@ -1878,10 +1988,14 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce case CEED_EVAL_INTERP: case CEED_EVAL_GRAD: case CEED_EVAL_DIV: - case CEED_EVAL_CURL: + case CEED_EVAL_CURL: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis)); CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); @@ -1893,8 +2007,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce CeedCallBackend(CeedVectorPointwiseMult(e_vec, active_e_vec_in, e_vec)); // Restrict - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, e_vec, assembled, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); // Reset q_vec for if (eval_mode == CEED_EVAL_NONE) { diff --git a/backends/occa/ceed-occa-basis.cpp b/backends/occa/ceed-occa-basis.cpp index 0c33da5453..c9f94d18d0 100644 --- a/backends/occa/ceed-occa-basis.cpp +++ b/backends/occa/ceed-occa-basis.cpp @@ -43,9 +43,11 @@ Basis *Basis::from(CeedBasis basis) { } Basis *Basis::from(CeedOperatorField operatorField) { - CeedBasis basis; - CeedCallOcca(CeedOperatorFieldGetBasis(operatorField, &basis)); - return from(basis); + CeedBasis ceedBasis; + CeedCallOcca(CeedOperatorFieldGetBasis(operatorField, &ceedBasis)); + Basis *basis = from(ceedBasis); + CeedCallOcca(CeedBasisDestroy(&ceedBasis)); + return basis; } int Basis::setCeedFields(CeedBasis basis) { diff --git a/backends/occa/ceed-occa-elem-restriction.cpp b/backends/occa/ceed-occa-elem-restriction.cpp index 7bfae3d87f..140041cb1d 100644 --- a/backends/occa/ceed-occa-elem-restriction.cpp +++ b/backends/occa/ceed-occa-elem-restriction.cpp @@ -200,10 +200,10 @@ ElemRestriction *ElemRestriction::from(CeedElemRestriction r) { ElemRestriction *ElemRestriction::from(CeedOperatorField operatorField) { CeedElemRestriction ceedElemRestriction; - CeedCallOcca(CeedOperatorFieldGetElemRestriction(operatorField, &ceedElemRestriction)); - - return from(ceedElemRestriction); + ElemRestriction *elemRestriction = from(ceedElemRestriction); + CeedCallOcca(CeedElemRestrictionDestroy(&ceedElemRestriction)); + return elemRestriction; } ElemRestriction *ElemRestriction::setupFrom(CeedElemRestriction r) { diff --git a/backends/occa/ceed-occa-operator-field.cpp b/backends/occa/ceed-occa-operator-field.cpp index 6716d11e06..4745b8dfc0 100644 --- a/backends/occa/ceed-occa-operator-field.cpp +++ b/backends/occa/ceed-occa-operator-field.cpp @@ -19,9 +19,7 @@ OperatorField::OperatorField(CeedOperatorField opField) : _isValid(false), _uses CeedElemRestriction ceedElemRestriction; CeedCallOccaValid(_isValid, CeedOperatorFieldGetBasis(opField, &ceedBasis)); - CeedCallOccaValid(_isValid, CeedOperatorFieldGetVector(opField, &ceedVector)); - CeedCallOccaValid(_isValid, CeedOperatorFieldGetElemRestriction(opField, &ceedElemRestriction)); _isValid = true; @@ -30,6 +28,10 @@ OperatorField::OperatorField(CeedOperatorField opField) : _isValid(false), _uses vec = Vector::from(ceedVector); basis = Basis::from(ceedBasis); elemRestriction = ElemRestriction::from(ceedElemRestriction); + + CeedCallOccaValid(_isValid, CeedBasisDestroy(&ceedBasis)); + CeedCallOccaValid(_isValid, CeedVectorDestroy(&ceedVector)); + CeedCallOccaValid(_isValid, CeedElemRestrictionDestroy(&ceedElemRestriction)); } bool OperatorField::isValid() const { return _isValid; } diff --git a/backends/opt/ceed-opt-operator.c b/backends/opt/ceed-opt-operator.c index 4de37eaf66..8057741208 100644 --- a/backends/opt/ceed-opt-operator.c +++ b/backends/opt/ceed-opt-operator.c @@ -105,6 +105,7 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i // Empty case - won't occur break; } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); CeedCallBackend(CeedElemRestrictionCreateVector(block_rstr[i + start_e], NULL, &e_vecs_full[i + start_e])); } @@ -124,6 +125,7 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); CeedCallBackend(CeedBasisGetNumNodes(basis, &P)); CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); + CeedCallBackend(CeedBasisDestroy(&basis)); e_size = (CeedSize)P * num_comp * block_size; CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i])); q_size = (CeedSize)Q * size * block_size; @@ -134,6 +136,7 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i q_size = (CeedSize)Q * block_size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } // Initialize E-vec arrays @@ -158,7 +161,11 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e])); skip_rstr[j] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } else { for (CeedInt i = num_fields - 1; i >= 0; i--) { @@ -179,7 +186,11 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i skip_rstr[j] = true; apply_add_basis[i] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } return CEED_ERROR_SUCCESS; @@ -289,6 +300,7 @@ static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, CeedQFun CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs_in[i], (const CeedScalar **)&e_data[i])); } } + CeedCallBackend(CeedVectorDestroy(&vec)); } } return CEED_ERROR_SUCCESS; @@ -301,31 +313,33 @@ static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunction CeedInt num_input_fields, CeedInt block_size, CeedVector in_vec, bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Opt *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active_input = false; + bool is_active; CeedInt elem_size, size, num_comp; CeedEvalMode eval_mode; CeedVector vec; CeedElemRestriction elem_rstr; CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Skip active input - is_active_input = vec == CEED_VECTOR_ACTIVE; - if (skip_active && is_active_input) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (skip_active && is_active) continue; // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Restrict block active input - if (is_active_input && impl->block_rstr[i]) { + if (is_active && impl->block_rstr[i]) { CeedCallBackend(CeedElemRestrictionApplyBlock(impl->block_rstr[i], e / block_size, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request)); } // Basis action switch (eval_mode) { case CEED_EVAL_NONE: - if (!is_active_input) { + if (!is_active) { CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][(CeedSize)e * Q * size])); } break; @@ -334,11 +348,12 @@ static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunction case CEED_EVAL_DIV: case CEED_EVAL_CURL: CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - if (!is_active_input) { + if (!is_active) { CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][(CeedSize)e * elem_size * num_comp])); } CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs_in[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: break; // No action @@ -354,13 +369,12 @@ static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunctio CeedInt block_size, CeedInt num_input_fields, CeedInt num_output_fields, bool *apply_add_basis, bool *skip_rstr, CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_output_fields; i++) { - CeedEvalMode eval_mode; - CeedVector vec; - CeedElemRestriction elem_rstr; - CeedBasis basis; + bool is_active; + CeedEvalMode eval_mode; + CeedVector vec; + CeedBasis basis; - // Get elem_size, eval_mode, size - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); + // Get eval_mode CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); // Basis action switch (eval_mode) { @@ -376,6 +390,7 @@ static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunctio } else { CeedCallBackend(CeedBasisApply(basis, block_size, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs_out[i])); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { @@ -387,10 +402,12 @@ static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q, CeedQFunctio if (skip_rstr[i]) continue; // Get output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = out_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = out_vec; // Restrict CeedCallBackend( CeedElemRestrictionApplyBlock(impl->block_rstr[i + impl->num_inputs], e / block_size, CEED_TRANSPOSE, impl->e_vecs_out[i], vec, request)); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } return CEED_ERROR_SUCCESS; } @@ -409,6 +426,7 @@ static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, CeedQF if (eval_mode != CEED_EVAL_WEIGHT && vec != CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs_full[i], (const CeedScalar **)&e_data[i])); } + CeedCallBackend(CeedVectorDestroy(&vec)); } return CEED_ERROR_SUCCESS; } @@ -531,14 +549,14 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b CeedInt field_size; CeedVector vec; - // Get input vector - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Check if active input + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); qf_size_in += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_in = qf_size_in; @@ -550,13 +568,13 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b CeedInt field_size; CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size)); qf_size_out += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_out = qf_size_out; @@ -603,13 +621,15 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b // Assemble QFunction for (CeedInt i = 0; i < num_input_fields; i++) { + bool is_active; CeedInt field_size; CeedVector vec; - // Get input vector - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Check if active input - if (vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (!is_active) continue; CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); for (CeedInt field = 0; field < field_size; field++) { // Set current portion of input to 1.0 @@ -626,9 +646,8 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedInt field_size; @@ -636,6 +655,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &field_size)); l_vec_array += field_size * Q * block_size; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, Q * block_size, impl->q_vecs_in, impl->q_vecs_out)); @@ -666,12 +686,12 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); if (vec == CEED_VECTOR_ACTIVE && num_elem > 0) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL)); } + CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -684,10 +704,10 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, b for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); // Initialize array if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); if (vec == CEED_VECTOR_ACTIVE) CeedCallBackend(CeedVectorSetValue(impl->q_vecs_out[out], 0.0)); + CeedCallBackend(CeedVectorDestroy(&vec)); } // Restore input arrays diff --git a/backends/ref/ceed-ref-operator.c b/backends/ref/ceed-ref-operator.c index de79e96d5b..4714387744 100644 --- a/backends/ref/ceed-ref-operator.c +++ b/backends/ref/ceed-ref-operator.c @@ -50,6 +50,7 @@ static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool i if (eval_mode != CEED_EVAL_WEIGHT) { CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs_full[i + start_e])); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } switch (eval_mode) { @@ -70,12 +71,14 @@ static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool i CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i])); q_size = (CeedSize)Q * size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: // Only on input fields CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); q_size = (CeedSize)Q; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); CeedCallBackend(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } } @@ -98,7 +101,11 @@ static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool i CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e])); skip_rstr[j] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } else { for (CeedInt i = num_fields - 1; i >= 0; i--) { @@ -120,7 +127,11 @@ static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op, bool i apply_add_basis[i] = true; e_data_out_indices[j] = i; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } return CEED_ERROR_SUCCESS; @@ -198,14 +209,15 @@ static inline int CeedOperatorSetupInputs_Ref(CeedInt num_input_fields, CeedQFun CeedVector in_vec, const bool skip_active, CeedScalar *e_data_full[2 * CEED_FIELD_MAX], CeedOperator_Ref *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { - uint64_t state; - CeedEvalMode eval_mode; - CeedVector vec; - CeedElemRestriction elem_rstr; + bool is_active; + uint64_t state; + CeedEvalMode eval_mode; + CeedVector vec; // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) { + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) { if (skip_active) continue; else vec = in_vec; } @@ -218,13 +230,17 @@ static inline int CeedOperatorSetupInputs_Ref(CeedInt num_input_fields, CeedQFun CeedCallBackend(CeedVectorGetState(vec, &state)); // Skip restriction if input is unchanged if ((state != impl->input_states[i] || vec == in_vec) && !impl->skip_rstr_in[i]) { + CeedElemRestriction elem_rstr; + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs_full[i], request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } impl->input_states[i] = state; // Get evec CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, (const CeedScalar **)&e_data_full[i])); } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } return CEED_ERROR_SUCCESS; } @@ -243,14 +259,18 @@ static inline int CeedOperatorInputBasis_Ref(CeedInt e, CeedInt Q, CeedQFunction // Skip active input if (skip_active) { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (is_active) continue; } // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Basis action @@ -266,6 +286,7 @@ static inline int CeedOperatorInputBasis_Ref(CeedInt e, CeedInt Q, CeedQFunction CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data_full[i][(CeedSize)e * elem_size * num_comp])); CeedCallBackend(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs_in[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: break; // No action @@ -289,6 +310,7 @@ static inline int CeedOperatorOutputBasis_Ref(CeedInt e, CeedInt Q, CeedQFunctio // Get elem_size, eval_mode CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); // Basis action switch (eval_mode) { @@ -307,6 +329,7 @@ static inline int CeedOperatorOutputBasis_Ref(CeedInt e, CeedInt Q, CeedQFunctio } else { CeedCallBackend(CeedBasisApply(basis, 1, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs_out[i])); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { @@ -328,10 +351,13 @@ static inline int CeedOperatorRestoreInputs_Ref(CeedInt num_input_fields, CeedQF // Skip active inputs if (skip_active) { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (is_active) continue; } // Restore input CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); @@ -371,8 +397,10 @@ static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector in_vec, CeedVect CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[0], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_full[0], request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[0], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs_full[0], out_vec, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); return CEED_ERROR_SUCCESS; } @@ -415,6 +443,7 @@ static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector in_vec, CeedVect // Output restriction for (CeedInt i = 0; i < num_output_fields; i++) { + bool is_active; CeedVector vec; CeedElemRestriction elem_rstr; @@ -424,10 +453,13 @@ static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector in_vec, CeedVect // Get output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); // Active - if (vec == CEED_VECTOR_ACTIVE) vec = out_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = out_vec; // Restrict CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs_full[i + impl->num_inputs], vec, request)); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } // Restore input arrays @@ -482,6 +514,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, b CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); qf_size_in += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_in = qf_size_in; @@ -500,6 +533,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, b CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size)); qf_size_out += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_out = qf_size_out; @@ -528,12 +562,15 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, b // Assemble QFunction for (CeedInt i = 0; i < num_input_fields; i++) { + bool is_active; CeedInt field_size; CeedVector vec; // Set Inputs CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec != CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (!is_active) continue; CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); for (CeedInt field = 0; field < field_size; field++) { // Set current portion of input to 1.0 @@ -560,6 +597,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, b CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &field_size)); assembled_array += field_size * Q; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, Q, impl->q_vecs_in, impl->q_vecs_out)); @@ -597,6 +635,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Ref(CeedOperator op, b if (vec == CEED_VECTOR_ACTIVE && num_elem > 0) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL)); } + CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -677,6 +716,7 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs_full[i + start_e])); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedVectorSetValue(e_vecs_full[i + start_e], 0.0)); } @@ -694,6 +734,7 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op q_size = (CeedSize)max_num_points * size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); } + CeedCallBackend(CeedVectorDestroy(&vec)); break; } case CEED_EVAL_INTERP: @@ -708,6 +749,7 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op CeedCallBackend(CeedVectorCreate(ceed, e_size, &e_vecs[i])); q_size = (CeedSize)max_num_points * size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: // Only on input fields CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); @@ -715,6 +757,7 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); CeedCallBackend( CeedBasisApplyAtPoints(basis, 1, &max_num_points, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, CEED_VECTOR_NONE, q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; } // Initialize full arrays for E-vectors and Q-vectors @@ -740,7 +783,11 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op CeedCallBackend(CeedVectorReferenceCopy(e_vecs_full[i + start_e], &e_vecs_full[j + start_e])); skip_rstr[j] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } else { for (CeedInt i = num_fields - 1; i >= 0; i--) { @@ -761,7 +808,11 @@ static int CeedOperatorSetupFieldsAtPoints_Ref(CeedQFunction qf, CeedOperator op skip_rstr[j] = true; apply_add_basis[i] = true; } + CeedCallBackend(CeedVectorDestroy(&vec_j)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_j)); } + CeedCallBackend(CeedVectorDestroy(&vec_i)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_i)); } } return CEED_ERROR_SUCCESS; @@ -829,7 +880,7 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin CeedVector point_coords_elem, bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Ref *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active_input = false; + bool is_active; CeedInt elem_size, size, num_comp; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; @@ -837,10 +888,11 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin CeedElemRestriction elem_rstr; CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Skip active input - is_active_input = vec == CEED_VECTOR_ACTIVE; - if (skip_active && is_active_input) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (skip_active && is_active) continue; // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); @@ -848,7 +900,7 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Restrict block active input - if (is_active_input && !impl->skip_rstr_in[i]) { + if (is_active && !impl->skip_rstr_in[i]) { if (rstr_type == CEED_RESTRICTION_POINTS) { CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request)); } else { @@ -858,7 +910,7 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin // Basis action switch (eval_mode) { case CEED_EVAL_NONE: - if (!is_active_input) { + if (!is_active) { CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][num_points_offset * size])); } break; @@ -868,17 +920,19 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin case CEED_EVAL_DIV: case CEED_EVAL_CURL: CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - if (!is_active_input) { + if (!is_active) { CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); CeedCallBackend(CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, &e_data[i][(CeedSize)e * elem_size * num_comp])); } CeedCallBackend( CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_NOTRANSPOSE, eval_mode, point_coords_elem, impl->e_vecs_in[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: break; // No action } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } return CEED_ERROR_SUCCESS; } @@ -891,6 +945,7 @@ static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_poi bool *apply_add_basis, bool *skip_rstr, CeedOperator op, CeedVector out_vec, CeedVector point_coords_elem, CeedOperator_Ref *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_output_fields; i++) { + bool is_active; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; CeedVector vec; @@ -916,6 +971,7 @@ static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_poi CeedCallBackend( CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_TRANSPOSE, eval_mode, point_coords_elem, impl->q_vecs_out[i], impl->e_vecs_out[i])); } + CeedCallBackend(CeedBasisDestroy(&basis)); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { @@ -928,13 +984,16 @@ static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_poi // Get output vector CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type)); CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = out_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = out_vec; // Restrict if (rstr_type == CEED_RESTRICTION_POINTS) { CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[i], vec, request)); } else { CeedCallBackend(CeedElemRestrictionApplyBlock(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[i], vec, request)); } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } return CEED_ERROR_SUCCESS; } @@ -1055,12 +1114,14 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionIsAtPoints(elem_rstr, &is_at_points)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCheck(!is_at_points, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction with active input at points"); } // Get size of active input CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); qf_size_in += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_in, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_in = qf_size_in; @@ -1083,6 +1144,7 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionIsAtPoints(elem_rstr, &is_at_points)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCheck(!is_at_points, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction with active input at points"); } // Get size of active output @@ -1090,6 +1152,7 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); qf_size_out += field_size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); impl->qf_size_out = qf_size_out; @@ -1129,13 +1192,16 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat // Assemble QFunction for (CeedInt i = 0; i < num_input_fields; i++) { + bool is_active; CeedInt field_size; CeedVector vec; // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); // Check if active input - if (vec != CEED_VECTOR_ACTIVE) continue; + if (!is_active) continue; // Get size of active input CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); for (CeedInt field = 0; field < field_size; field++) { @@ -1162,6 +1228,7 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &field_size)); assembled_array += field_size * num_points; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, num_points, impl->q_vecs_in, impl->q_vecs_out)); @@ -1200,6 +1267,7 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat if (vec == CEED_VECTOR_ACTIVE && num_elem > 0) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_HOST, NULL)); } + CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -1277,10 +1345,13 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce // Clear input Qvecs for (CeedInt i = 0; i < num_input_fields; i++) { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec != CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (!is_active) continue; CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); } @@ -1301,15 +1372,17 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce // Loop over points on element for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active_at_points = true; + bool is_active_at_points = true, is_active; CeedInt elem_size_active = 1; CeedRestrictionType rstr_type; CeedVector vec; CeedElemRestriction elem_rstr; - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // -- Skip non-active input - if (vec != CEED_VECTOR_ACTIVE) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (!is_active) continue; // -- Get active restriction type CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); @@ -1318,6 +1391,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce if (!is_active_at_points) CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size_active)); else elem_size_active = num_points; CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp_active)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); e_vec_size = elem_size_active * num_comp_active; for (CeedInt s = 0; s < e_vec_size; s++) { @@ -1347,6 +1421,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCallBackend(CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, impl->e_vecs_in[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: break; // No action @@ -1364,18 +1439,19 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce // -- Grab diagonal value for (CeedInt j = 0; j < num_output_fields; j++) { - bool is_active_output = false; - CeedInt elem_size = 0; + bool is_active; + CeedInt elem_size = 0; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; CeedVector vec; CeedElemRestriction elem_rstr; CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &vec)); // ---- Skip non-active output - is_active_output = vec == CEED_VECTOR_ACTIVE; - if (!is_active_output) continue; + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (!is_active) continue; // ---- Check if elem size matches CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr)); @@ -1405,6 +1481,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis)); CeedCallBackend(CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, impl->q_vecs_out[j], impl->e_vecs_out[j])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: { @@ -1430,6 +1507,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce } else { CeedCallBackend(CeedElemRestrictionApplyBlock(elem_rstr, e, CEED_TRANSPOSE, impl->e_vecs_out[j], assembled, request)); } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } // -- Reset unit vector if (s == e_vec_size - 1) CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); diff --git a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp index a4edb6fc2b..b14e155124 100644 --- a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp @@ -155,12 +155,12 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { // LCOV_EXCL_STOP } } + CeedCallBackend(CeedBasisDestroy(&basis)); } // Check output bases for Q_1d, dim as well // The only input basis might be CEED_BASIS_NONE for (CeedInt i = 0; i < num_output_fields; i++) { CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); - if (basis != CEED_BASIS_NONE) { bool is_tensor; @@ -178,6 +178,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { // LCOV_EXCL_STOP } } + CeedCallBackend(CeedBasisDestroy(&basis)); } impl->dim = dim; impl->Q_1d = Q_1d; @@ -196,6 +197,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedCallBackend(CeedBasisGetData(basis, &basis_impl)); use_collograd_parallelization = basis_impl->d_collo_grad_1d && (was_grad_found ? use_collograd_parallelization : true); was_grad_found = true; + CeedCallBackend(CeedBasisDestroy(&basis)); } } for (CeedInt i = 0; i < num_output_fields; i++) { @@ -205,6 +207,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedCallBackend(CeedBasisGetData(basis, &basis_impl)); use_collograd_parallelization = basis_impl->d_collo_grad_1d && (was_grad_found ? use_collograd_parallelization : true); was_grad_found = true; + CeedCallBackend(CeedBasisDestroy(&basis)); } } } @@ -273,6 +276,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); // Set field constants if (eval_mode != CEED_EVAL_WEIGHT) { @@ -321,6 +325,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { case CEED_EVAL_CURL: break; // TODO: Not implemented } + CeedCallBackend(CeedBasisDestroy(&basis)); } code << "\n // -- Output field constants and basis data --\n"; @@ -331,6 +336,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); // Set field constants CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); @@ -382,6 +388,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { } // LCOV_EXCL_STOP } + CeedCallBackend(CeedBasisDestroy(&basis)); } code << "\n // -- Element loop --\n"; code << " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"; @@ -431,6 +438,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { << ", num_elem, d_u_" << i << ", r_u_" << i << ");\n"; } } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); // Basis action code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; @@ -452,12 +460,14 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { << i << ", r_t_" << i << ", elem_scratch);\n"; } else { CeedInt P_1d; + CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); code << " CeedScalar r_t_" << i << "[num_comp_in_" << i << "*DIM*Q_1D];\n"; code << " Grad" << (dim > 1 ? "Tensor" : "") << (dim == 3 && Q_1d >= P_1d ? "Collocated" : "") << dim << "d(num_comp_in_" << i << ", P_in_" << i << ", Q_1D, r_u_" << i << (dim > 1 ? ", s_B_in_" : "") << (dim > 1 ? std::to_string(i) : "") << ", s_G_in_" << i << ", r_t_" << i << ", elem_scratch);\n"; + CeedCallBackend(CeedBasisDestroy(&basis)); } break; case CEED_EVAL_WEIGHT: @@ -466,6 +476,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedCallBackend(CeedBasisGetData(basis, &basis_impl)); impl->W = basis_impl->d_q_weight_1d; code << " Weight" << (dim > 1 ? "Tensor" : "") << dim << "d(Q_1D, W, r_t_" << i << ");\n"; + CeedCallBackend(CeedBasisDestroy(&basis)); break; // No action case CEED_EVAL_DIV: break; // TODO: Not implemented @@ -544,6 +555,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { << "3d(num_comp_in_" << i << ", Q_1D," << strides[0] << ", " << strides[1] << ", " << strides[2] << ", num_elem, q, d_u_" << i << ", r_q_" << i << ");\n"; } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); break; case CEED_EVAL_INTERP: code << " CeedScalar r_q_" << i << "[num_comp_in_" << i << "];\n"; @@ -690,6 +702,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { code << " GradTranspose" << (dim > 1 ? "Tensor" : "") << (dim == 3 && Q_1d >= P_1d ? "Collocated" : "") << dim << "d(num_comp_out_" << i << ", P_out_" << i << ", Q_1D, r_tt_" << i << (dim > 1 ? ", s_B_out_" : "") << (dim > 1 ? std::to_string(i) : "") << ", s_G_out_" << i << ", r_v_" << i << ", elem_scratch);\n"; + CeedCallBackend(CeedBasisDestroy(&basis)); } break; // LCOV_EXCL_START @@ -734,6 +747,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { code << " writeDofsStrided" << dim << "d(num_comp_out_" << i << ",P_out_" << i << "," << strides[0] << "," << strides[1] << "," << strides[2] << ", num_elem, r_v_" << i << ", d_v_" << i << ");\n"; } + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } code << " }\n"; diff --git a/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp index 100176b2d7..650a52cf4d 100644 --- a/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp @@ -73,12 +73,15 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, if (eval_mode == CEED_EVAL_WEIGHT) { // Skip impl->fields->inputs[i] = NULL; } else { + bool is_active; CeedVector vec; // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = input_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = input_vec; CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &impl->fields->inputs[i])); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -88,11 +91,13 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, if (eval_mode == CEED_EVAL_WEIGHT) { // Skip impl->fields->outputs[i] = NULL; } else { + bool is_active; CeedVector vec; // Get output vector CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = output_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = output_vec; output_vecs[i] = vec; // Check for multiple output modes CeedInt index = -1; @@ -102,6 +107,7 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, break; } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); if (index == -1) { CeedCallBackend(CeedVectorGetArray(vec, CEED_MEM_DEVICE, &impl->fields->outputs[i])); } else { @@ -152,11 +158,14 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = input_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = input_vec; CeedCallBackend(CeedVectorRestoreArrayRead(vec, &impl->fields->inputs[i])); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -165,10 +174,12 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = output_vec; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = output_vec; // Check for multiple output modes CeedInt index = -1; @@ -178,6 +189,7 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, break; } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); if (index == -1) { CeedCallBackend(CeedVectorRestoreArray(vec, &impl->fields->outputs[i])); } diff --git a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp index 8939d84a26..35c0fd5097 100644 --- a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp @@ -23,9 +23,9 @@ class CeedOperatorSyclLinearAssembleFallback; //------------------------------------------------------------------------------ // Get Basis Emode Pointer //------------------------------------------------------------------------------ -void CeedOperatorGetBasisPointer_Sycl(const CeedScalar **basis_ptr, CeedEvalMode e_mode, const CeedScalar *identity, const CeedScalar *interp, +void CeedOperatorGetBasisPointer_Sycl(const CeedScalar **basis_ptr, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad) { - switch (e_mode) { + switch (eval_mode) { case CEED_EVAL_NONE: *basis_ptr = identity; break; @@ -70,7 +70,7 @@ static int CeedOperatorDestroy_Sycl(CeedOperator op) { } CeedCallBackend(CeedFree(&impl->q_vecs_out)); - // QFunction assembly data + // QFunction assembly dataf for (CeedInt i = 0; i < impl->num_active_in; i++) { CeedCallBackend(CeedVectorDestroy(&impl->qf_active_in[i])); } @@ -78,12 +78,12 @@ static int CeedOperatorDestroy_Sycl(CeedOperator op) { // Diag data if (impl->diag) { - CeedCallBackend(CeedFree(&impl->diag->h_e_mode_in)); - CeedCallBackend(CeedFree(&impl->diag->h_e_mode_out)); + CeedCallBackend(CeedFree(&impl->diag->h_eval_mode_in)); + CeedCallBackend(CeedFree(&impl->diag->h_eval_mode_out)); CeedCallSycl(ceed, sycl_data->sycl_queue.wait_and_throw()); - CeedCallSycl(ceed, sycl::free(impl->diag->d_e_mode_in, sycl_data->sycl_context)); - CeedCallSycl(ceed, sycl::free(impl->diag->d_e_mode_out, sycl_data->sycl_context)); + CeedCallSycl(ceed, sycl::free(impl->diag->d_eval_mode_in, sycl_data->sycl_context)); + CeedCallSycl(ceed, sycl::free(impl->diag->d_eval_mode_out, sycl_data->sycl_context)); CeedCallSycl(ceed, sycl::free(impl->diag->d_identity, sycl_data->sycl_context)); CeedCallSycl(ceed, sycl::free(impl->diag->d_interp_in, sycl_data->sycl_context)); CeedCallSycl(ceed, sycl::free(impl->diag->d_interp_out, sycl_data->sycl_context)); @@ -130,28 +130,28 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool // Loop over fields for (CeedInt i = 0; i < num_fields; i++) { - CeedEvalMode e_mode; + CeedEvalMode eval_mode; CeedVector vec; CeedElemRestriction rstr; CeedBasis basis; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &e_mode)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); is_strided = false; skip_restriction = false; - if (e_mode != CEED_EVAL_WEIGHT) { + if (eval_mode != CEED_EVAL_WEIGHT) { CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); // Check whether this field can skip the element restriction: - // must be passive input, with e_mode NONE, and have a strided restriction with CEED_STRIDES_BACKEND. + // must be passive input, with eval_mode NONE, and have a strided restriction with CEED_STRIDES_BACKEND. // First, check whether the field is input or output: if (is_input) { // Check for passive input: CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec != CEED_VECTOR_ACTIVE) { - // Check e_mode - if (e_mode == CEED_EVAL_NONE) { + // Check eval_mode + if (eval_mode == CEED_EVAL_NONE) { // Check for is_strided restriction CeedCallBackend(CeedElemRestrictionIsStrided(rstr, &is_strided)); if (is_strided) { @@ -160,6 +160,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool } } } + CeedCallBackend(CeedVectorDestroy(&vec)); } if (skip_restriction) { // We do not need an E-Vector, but will use the input field vector's data directly in the operator application @@ -167,9 +168,10 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool } else { CeedCallBackend(CeedElemRestrictionCreateVector(rstr, NULL, &e_vecs[i + start_e])); } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); } - switch (e_mode) { + switch (eval_mode) { case CEED_EVAL_NONE: CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); q_size = (CeedSize)num_elem * Q * size; @@ -184,6 +186,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); CeedCallBackend(CeedBasisGetDimension(basis, &dim)); + CeedCallBackend(CeedBasisDestroy(&basis)); q_size = (CeedSize)num_elem * Q * size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); break; @@ -192,6 +195,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool q_size = (CeedSize)num_elem * Q; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_DIV: break; // TODO: Not implemented @@ -252,35 +256,35 @@ static inline int CeedOperatorSetupInputs_Sycl(CeedInt num_input_fields, CeedQFu CeedVector in_vec, const bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Sycl *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { - CeedEvalMode e_mode; - CeedVector vec; - CeedElemRestriction rstr; + bool is_active; + CeedEvalMode eval_mode; + CeedVector vec; // Get input vector CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) { + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) { if (skip_active) continue; else vec = in_vec; } - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &e_mode)); - if (e_mode == CEED_EVAL_WEIGHT) { // Skip + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); + if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { - // Get input vector - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - // Get input element restriction - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr)); - if (vec == CEED_VECTOR_ACTIVE) vec = in_vec; // Restrict, if necessary if (!impl->e_vecs[i]) { // No restriction for this field; read data directly from vec. CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); } else { + CeedElemRestriction rstr; + + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr)); CeedCallBackend(CeedElemRestrictionApply(rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs[i], request)); - // Get evec + CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } return CEED_ERROR_SUCCESS; } @@ -292,35 +296,30 @@ static inline int CeedOperatorInputBasis_Sycl(CeedInt num_elem, CeedQFunctionFie CeedInt num_input_fields, const bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Sycl *impl) { for (CeedInt i = 0; i < num_input_fields; i++) { - CeedInt elem_size, size; - CeedElemRestriction rstr; - CeedEvalMode e_mode; - CeedBasis basis; + CeedEvalMode eval_mode; + CeedBasis basis; // Skip active input if (skip_active) { + bool is_active; CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) continue; + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (is_active) continue; } - // Get elem_size, e_mode, size - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr)); - CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &e_mode)); - CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Basis action - switch (e_mode) { + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); + switch (eval_mode) { case CEED_EVAL_NONE: CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_DEVICE, CEED_USE_POINTER, e_data[i])); break; case CEED_EVAL_INTERP: - CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, impl->e_vecs[i], impl->q_vecs_in[i])); - break; case CEED_EVAL_GRAD: CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, impl->e_vecs[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs[i], impl->q_vecs_in[i])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; case CEED_EVAL_WEIGHT: break; // No action @@ -339,24 +338,26 @@ static inline int CeedOperatorInputBasis_Sycl(CeedInt num_elem, CeedQFunctionFie static inline int CeedOperatorRestoreInputs_Sycl(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, const bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Sycl *impl) { for (CeedInt i = 0; i < num_input_fields; i++) { - CeedEvalMode e_mode; + bool is_active; + CeedEvalMode eval_mode; CeedVector vec; + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; // Skip active input if (skip_active) { - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) continue; + if (is_active) continue; } - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &e_mode)); - if (e_mode == CEED_EVAL_WEIGHT) { // Skip + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); + if (eval_mode == CEED_EVAL_WEIGHT) { // Skip } else { if (!impl->e_vecs[i]) { // This was a skip_restriction case - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); CeedCallBackend(CeedVectorRestoreArrayRead(vec, (const CeedScalar **)&e_data[i])); } else { CeedCallBackend(CeedVectorRestoreArrayRead(impl->e_vecs[i], (const CeedScalar **)&e_data[i])); } } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } return CEED_ERROR_SUCCESS; } @@ -407,9 +408,10 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec CeedElemRestriction rstr; CeedBasis basis; - // Get elem_size, eval_mode, size + // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); // Basis action @@ -417,12 +419,10 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec case CEED_EVAL_NONE: break; case CEED_EVAL_INTERP: - CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); - CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, CEED_EVAL_INTERP, impl->q_vecs_out[i], impl->e_vecs[i + impl->num_e_in])); - break; case CEED_EVAL_GRAD: CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); - CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, CEED_EVAL_GRAD, impl->q_vecs_out[i], impl->e_vecs[i + impl->num_e_in])); + CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs[i + impl->num_e_in])); + CeedCallBackend(CeedBasisDestroy(&basis)); break; // LCOV_EXCL_START case CEED_EVAL_WEIGHT: @@ -445,6 +445,8 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec // Output restriction for (CeedInt i = 0; i < num_output_fields; i++) { + bool is_active; + CeedEvalMode eval_mode; CeedVector vec; CeedElemRestriction rstr; @@ -453,14 +455,14 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec if (eval_mode == CEED_EVAL_NONE) { CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs[i + impl->num_e_in], &e_data[i + num_input_fields])); } - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); // Restrict + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &rstr)); - // Active - if (vec == CEED_VECTOR_ACTIVE) vec = out_vec; - - CeedCallBackend(CeedElemRestrictionApply(rstr, CEED_TRANSPOSE, impl->e_vecs[i + impl->num_e_in], vec, request)); + if (is_active) vec = out_vec; + CeedCallBackend(CeedElemRestrictionApply(rstr, CEED_TRANSPOSE, impl->e_vecs[i + impl->num_inputs], vec, request)); + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); } // Restore input arrays @@ -506,9 +508,8 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, CeedScalar *q_vec_array; CeedVector vec; - // Get input vector - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); // Check if active input + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); @@ -523,6 +524,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, num_active_in += size; CeedCallBackend(CeedVectorRestoreArray(impl->q_vecs_in[i], &q_vec_array)); } + CeedCallBackend(CeedVectorDestroy(&vec)); } impl->num_active_in = num_active_in; impl->qf_active_in = active_in; @@ -533,13 +535,13 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, for (CeedInt i = 0; i < num_output_fields; i++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); num_active_out += size; } + CeedCallBackend(CeedVectorDestroy(&vec)); } impl->num_active_out = num_active_out; } @@ -574,14 +576,14 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, CEED_USE_POINTER, assembled_array)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[out], &size)); assembled_array += size * Q * num_elem; // Advance the pointer by the size of the output } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Apply QFunction CeedCallBackend(CeedQFunctionApply(qf, Q * num_elem, impl->q_vecs_in, impl->q_vecs_out)); @@ -591,12 +593,12 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, for (CeedInt out = 0; out < num_output_fields; out++) { CeedVector vec; - // Get output vector - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); // Check if active output + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[out], &vec)); if (vec == CEED_VECTOR_ACTIVE) { CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[out], CEED_MEM_DEVICE, NULL)); } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Restore input arrays @@ -627,10 +629,9 @@ static int CeedOperatorLinearAssembleQFunctionUpdate_Sycl(CeedOperator op, CeedV static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { Ceed ceed; Ceed_Sycl *sycl_data; - CeedInt num_input_fields, num_output_fields, num_e_mode_in = 0, num_comp = 0, dim = 1, num_e_mode_out = 0; - CeedEvalMode *e_mode_in = NULL, *e_mode_out = NULL; + CeedInt num_input_fields, num_output_fields, num_eval_mode_in = 0, num_comp = 0, dim = 1, num_eval_mode_out = 0; + CeedEvalMode *eval_mode_in = NULL, *eval_mode_out = NULL; CeedBasis basis_in = NULL, basis_out = NULL; - CeedElemRestriction rstr_in = NULL, rstr_out = NULL; CeedQFunctionField *qf_fields; CeedQFunction qf; CeedOperatorField *op_fields; @@ -648,28 +649,26 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedEvalMode e_mode; - CeedElemRestriction rstr; - - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_in)); - CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp)); - CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); - CeedCheck(!rstr_in || rstr_in == rstr, ceed, CEED_ERROR_BACKEND, - "Backend does not implement multi-field non-composite operator diagonal assembly"); - rstr_in = rstr; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &e_mode)); - switch (e_mode) { + CeedEvalMode eval_mode; + CeedBasis basis; + + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, + "Backend does not implement operator diagonal assembly with multiple active bases"); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); + switch (eval_mode) { case CEED_EVAL_NONE: case CEED_EVAL_INTERP: - CeedCallBackend(CeedRealloc(num_e_mode_in + 1, &e_mode_in)); - e_mode_in[num_e_mode_in] = e_mode; - num_e_mode_in += 1; + CeedCallBackend(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in)); + eval_mode_in[num_eval_mode_in] = eval_mode; + num_eval_mode_in += 1; break; case CEED_EVAL_GRAD: - CeedCallBackend(CeedRealloc(num_e_mode_in + dim, &e_mode_in)); - for (CeedInt d = 0; d < dim; d++) e_mode_in[num_e_mode_in + d] = e_mode; - num_e_mode_in += dim; + CeedCallBackend(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in)); + for (CeedInt d = 0; d < dim; d++) eval_mode_in[num_eval_mode_in + d] = eval_mode; + num_eval_mode_in += dim; break; case CEED_EVAL_WEIGHT: case CEED_EVAL_DIV: @@ -677,6 +676,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { break; // Caught by QF Assembly } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis @@ -687,26 +687,26 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedEvalMode e_mode; - CeedElemRestriction rstr; - - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis_out)); - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); - CeedCheck(!rstr_out || rstr_out == rstr, ceed, CEED_ERROR_BACKEND, - "Backend does not implement multi-field non-composite operator diagonal assembly"); - rstr_out = rstr; - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &e_mode)); - switch (e_mode) { + CeedEvalMode eval_mode; + CeedBasis basis; + + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, + "Backend does not implement operator diagonal assembly with multiple active bases"); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); + switch (eval_mode) { case CEED_EVAL_NONE: case CEED_EVAL_INTERP: - CeedCallBackend(CeedRealloc(num_e_mode_out + 1, &e_mode_out)); - e_mode_out[num_e_mode_out] = e_mode; - num_e_mode_out += 1; + CeedCallBackend(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in)); + eval_mode_in[num_eval_mode_in] = eval_mode; + num_eval_mode_in += 1; break; case CEED_EVAL_GRAD: - CeedCallBackend(CeedRealloc(num_e_mode_out + dim, &e_mode_out)); - for (CeedInt d = 0; d < dim; d++) e_mode_out[num_e_mode_out + d] = e_mode; - num_e_mode_out += dim; + CeedCallBackend(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in)); + for (CeedInt d = 0; d < dim; d++) eval_mode_in[num_eval_mode_in + d] = eval_mode; + num_eval_mode_in += dim; break; case CEED_EVAL_WEIGHT: case CEED_EVAL_DIV: @@ -714,6 +714,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { break; // Caught by QF Assembly } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Operator data struct @@ -722,12 +723,12 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedCalloc(1, &impl->diag)); CeedOperatorDiag_Sycl *diag = impl->diag; - diag->basis_in = basis_in; - diag->basis_out = basis_out; - diag->h_e_mode_in = e_mode_in; - diag->h_e_mode_out = e_mode_out; - diag->num_e_mode_in = num_e_mode_in; - diag->num_e_mode_out = num_e_mode_out; + diag->basis_in = basis_in; + diag->basis_out = basis_out; + diag->h_eval_mode_in = eval_mode_in; + diag->h_eval_mode_out = eval_mode_out; + diag->num_eval_mode_in = num_eval_mode_in; + diag->num_eval_mode_out = num_eval_mode_out; // Kernel parameters CeedInt num_nodes, num_qpts; @@ -745,8 +746,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { // CEED_EVAL_NONE CeedScalar *identity = NULL; bool has_eval_none = false; - for (CeedInt i = 0; i < num_e_mode_in; i++) has_eval_none = has_eval_none || (e_mode_in[i] == CEED_EVAL_NONE); - for (CeedInt i = 0; i < num_e_mode_out; i++) has_eval_none = has_eval_none || (e_mode_out[i] == CEED_EVAL_NONE); + for (CeedInt i = 0; i < num_eval_mode_in; i++) has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE); + for (CeedInt i = 0; i < num_eval_mode_out; i++) has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE); std::vector e; @@ -784,17 +785,23 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { sycl::event grad_out_copy = sycl_data->sycl_queue.copy(grad_out, diag->d_grad_out, g_len, e); copy_events.push_back(grad_out_copy); - // Arrays of e_modes - CeedCallSycl(ceed, diag->d_e_mode_in = sycl::malloc_device(num_e_mode_in, sycl_data->sycl_device, sycl_data->sycl_context)); - sycl::event e_mode_in_copy = sycl_data->sycl_queue.copy(e_mode_in, diag->d_e_mode_in, num_e_mode_in, e); - copy_events.push_back(e_mode_in_copy); + // Arrays of eval_modes + CeedCallSycl(ceed, diag->d_eval_mode_in = sycl::malloc_device(num_eval_mode_in, sycl_data->sycl_device, sycl_data->sycl_context)); + sycl::event eval_mode_in_copy = sycl_data->sycl_queue.copy(eval_mode_in, diag->d_eval_mode_in, num_eval_mode_in, e); + copy_events.push_back(eval_mode_in_copy); - CeedCallSycl(ceed, diag->d_e_mode_out = sycl::malloc_device(num_e_mode_out, sycl_data->sycl_device, sycl_data->sycl_context)); - sycl::event e_mode_out_copy = sycl_data->sycl_queue.copy(e_mode_out, diag->d_e_mode_out, num_e_mode_out, e); - copy_events.push_back(e_mode_out_copy); + CeedCallSycl(ceed, diag->d_eval_mode_out = sycl::malloc_device(num_eval_mode_out, sycl_data->sycl_device, sycl_data->sycl_context)); + sycl::event eval_mode_out_copy = sycl_data->sycl_queue.copy(eval_mode_out, diag->d_eval_mode_out, num_eval_mode_out, e); + copy_events.push_back(eval_mode_out_copy); // Restriction - diag->diag_rstr = rstr_out; + { + CeedElemRestriction rstr_out; + + CeedCallBackend(CeedOperatorGetActiveElemRestrictions(op, NULL, &rstr_out)); + diag->diag_rstr = rstr_out; + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); + } // Wait for all copies to complete and handle exceptions CeedCallSycl(ceed, sycl::event::wait_and_throw(copy_events)); @@ -806,18 +813,18 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { //------------------------------------------------------------------------------ static int CeedOperatorLinearDiagonal_Sycl(sycl::queue &sycl_queue, const bool is_point_block, const CeedInt num_elem, const CeedOperatorDiag_Sycl *diag, const CeedScalar *assembled_qf_array, CeedScalar *elem_diag_array) { - const CeedSize num_nodes = diag->num_nodes; - const CeedSize num_qpts = diag->num_qpts; - const CeedSize num_comp = diag->num_comp; - const CeedSize num_e_mode_in = diag->num_e_mode_in; - const CeedSize num_e_mode_out = diag->num_e_mode_out; - const CeedScalar *identity = diag->d_identity; - const CeedScalar *interp_in = diag->d_interp_in; - const CeedScalar *grad_in = diag->d_grad_in; - const CeedScalar *interp_out = diag->d_interp_out; - const CeedScalar *grad_out = diag->d_grad_out; - const CeedEvalMode *e_mode_in = diag->d_e_mode_in; - const CeedEvalMode *e_mode_out = diag->d_e_mode_out; + const CeedSize num_nodes = diag->num_nodes; + const CeedSize num_qpts = diag->num_qpts; + const CeedSize num_comp = diag->num_comp; + const CeedSize num_eval_mode_in = diag->num_eval_mode_in; + const CeedSize num_eval_mode_out = diag->num_eval_mode_out; + const CeedScalar *identity = diag->d_identity; + const CeedScalar *interp_in = diag->d_interp_in; + const CeedScalar *grad_in = diag->d_grad_in; + const CeedScalar *interp_out = diag->d_interp_out; + const CeedScalar *grad_out = diag->d_grad_out; + const CeedEvalMode *eval_mode_in = diag->d_eval_mode_in; + const CeedEvalMode *eval_mode_out = diag->d_eval_mode_out; sycl::range<1> kernel_range(num_elem * num_nodes); @@ -833,18 +840,18 @@ static int CeedOperatorLinearDiagonal_Sycl(sycl::queue &sycl_queue, const bool i // Each element CeedInt d_out = -1; // Each basis eval mode pair - for (CeedSize e_out = 0; e_out < num_e_mode_out; e_out++) { + for (CeedSize e_out = 0; e_out < num_eval_mode_out; e_out++) { const CeedScalar *bt = NULL; - if (e_mode_out[e_out] == CEED_EVAL_GRAD) ++d_out; - CeedOperatorGetBasisPointer_Sycl(&bt, e_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes]); + if (eval_mode_out[e_out] == CEED_EVAL_GRAD) ++d_out; + CeedOperatorGetBasisPointer_Sycl(&bt, eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes]); CeedInt d_in = -1; - for (CeedSize e_in = 0; e_in < num_e_mode_in; e_in++) { + for (CeedSize e_in = 0; e_in < num_eval_mode_in; e_in++) { const CeedScalar *b = NULL; - if (e_mode_in[e_in] == CEED_EVAL_GRAD) ++d_in; - CeedOperatorGetBasisPointer_Sycl(&b, e_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes]); + if (eval_mode_in[e_in] == CEED_EVAL_GRAD) ++d_in; + CeedOperatorGetBasisPointer_Sycl(&b, eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes]); // Each component for (CeedSize comp_out = 0; comp_out < num_comp; comp_out++) { // Each qpoint/node pair @@ -855,7 +862,7 @@ static int CeedOperatorLinearDiagonal_Sycl(sycl::queue &sycl_queue, const bool i for (CeedSize q = 0; q < num_qpts; q++) { const CeedScalar qf_value = - assembled_qf_array[((((e_in * num_comp + comp_in) * num_e_mode_out + e_out) * num_comp + comp_out) * num_elem + e) * num_qpts + + assembled_qf_array[((((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out) * num_elem + e) * num_qpts + q]; e_value += bt[q * num_nodes + tid] * qf_value * b[q * num_nodes + tid]; @@ -868,7 +875,8 @@ static int CeedOperatorLinearDiagonal_Sycl(sycl::queue &sycl_queue, const bool i for (CeedSize q = 0; q < num_qpts; q++) { const CeedScalar qf_value = - assembled_qf_array[((((e_in * num_comp + comp_out) * num_e_mode_out + e_out) * num_comp + comp_out) * num_elem + e) * num_qpts + q]; + assembled_qf_array[((((e_in * num_comp + comp_out) * num_eval_mode_out + e_out) * num_comp + comp_out) * num_elem + e) * num_qpts + + q]; e_value += bt[q * num_nodes + tid] * qf_value * b[q * num_nodes + tid]; } elem_diag_array[(comp_out * num_elem + e) * num_nodes + tid] += e_value; @@ -969,7 +977,7 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Sycl(CeedOperator op, //------------------------------------------------------------------------------ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { Ceed ceed; - CeedInt num_input_fields, num_output_fields, num_e_mode_in = 0, dim = 1, num_B_in_mats_to_load = 0, size_B_in = 0, num_e_mode_out = 0, + CeedInt num_input_fields, num_output_fields, num_eval_mode_in = 0, dim = 1, num_B_in_mats_to_load = 0, size_B_in = 0, num_eval_mode_out = 0, num_B_out_mats_to_load = 0, size_B_out = 0, num_qpts = 0, elem_size = 0, num_elem, num_comp, mat_start = 0; CeedEvalMode *eval_mode_in = NULL, *eval_mode_out = NULL; @@ -991,19 +999,27 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL)); // Note that the kernel will treat each dimension of a gradient action separately; - // i.e., when an active input has a CEED_EVAL_GRAD mode, num_ e_mode_in will increment by dim. - // However, for the purposes of load_ing the B matrices, it will be treated as one mode, and we will load/copy the entire gradient matrix at once, + // i.e., when an active input has a CEED_EVAL_GRAD mode, num_ eval_mode_in will increment by dim. + // However, for the purposes of loading the B matrices, it will be treated as one mode, and we will load/copy the entire gradient matrix at once, // so num_B_in_mats_to_load will be incremented by 1. for (CeedInt i = 0; i < num_input_fields; i++) { - CeedEvalMode eval_mode; - CeedVector vec; + CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis_in)); + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; + + CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis)); + CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); - CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &elem_rstr)); + if (!rstr_in) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); if (eval_mode != CEED_EVAL_NONE) { @@ -1011,43 +1027,53 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { eval_mode_in[num_B_in_mats_to_load] = eval_mode; num_B_in_mats_to_load += 1; if (eval_mode == CEED_EVAL_GRAD) { - num_e_mode_in += dim; + num_eval_mode_in += dim; size_B_in += dim * elem_size * num_qpts; } else { - num_e_mode_in += 1; + num_eval_mode_in += 1; size_B_in += elem_size * num_qpts; } } } + CeedCallBackend(CeedVectorDestroy(&vec)); } // Determine active output basis; basis_out and rstr_out only used if same as input, TODO CeedCallBackend(CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields)); for (CeedInt i = 0; i < num_output_fields; i++) { - CeedEvalMode eval_mode; - CeedVector vec; + CeedVector vec; CeedCallBackend(CeedOperatorFieldGetVector(output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis_out)); - CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out)); - CeedCheck(!rstr_out || rstr_out == rstr_in, ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator assembly"); + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; + + CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis)); + CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, + "Backend does not implement operator assembly with multiple active bases"); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &elem_rstr)); + if (!rstr_out) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_out)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); if (eval_mode != CEED_EVAL_NONE) { CeedCallBackend(CeedRealloc(num_B_out_mats_to_load + 1, &eval_mode_out)); eval_mode_out[num_B_out_mats_to_load] = eval_mode; num_B_out_mats_to_load += 1; if (eval_mode == CEED_EVAL_GRAD) { - num_e_mode_out += dim; + num_eval_mode_out += dim; size_B_out += dim * elem_size * num_qpts; } else { - num_e_mode_out += 1; + num_eval_mode_out += 1; size_B_out += elem_size * num_qpts; } } } + CeedCallBackend(CeedVectorDestroy(&vec)); } - CeedCheck(num_e_mode_in > 0 && num_e_mode_out > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); + CeedCheck(num_eval_mode_in > 0 && num_eval_mode_out > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs"); CeedCallBackend(CeedElemRestrictionGetNumElements(rstr_in, &num_elem)); CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp)); @@ -1060,16 +1086,16 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedGetData(ceed, &sycl_data)); // Kernel setup - int elems_per_block = 1; - asmb->elems_per_block = elems_per_block; - asmb->block_size_x = elem_size; - asmb->block_size_y = elem_size; - asmb->num_e_mode_in = num_e_mode_in; - asmb->num_e_mode_out = num_e_mode_out; - asmb->num_qpts = num_qpts; - asmb->num_nodes = elem_size; - asmb->block_size = elem_size * elem_size * elems_per_block; - asmb->num_comp = num_comp; + int elems_per_block = 1; + asmb->elems_per_block = elems_per_block; + asmb->block_size_x = elem_size; + asmb->block_size_y = elem_size; + asmb->num_eval_mode_in = num_eval_mode_in; + asmb->num_eval_mode_out = num_eval_mode_out; + asmb->num_qpts = num_qpts; + asmb->num_nodes = elem_size; + asmb->block_size = elem_size * elem_size * elems_per_block; + asmb->num_comp = num_comp; // Build 'full' B matrices (not 1D arrays used for tensor-product matrices CeedCallBackend(CeedBasisGetInterp(basis_in, &interp_in)); @@ -1126,6 +1152,10 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { mat_start += dim * elem_size * num_qpts; } } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; } @@ -1136,25 +1166,25 @@ static int CeedOperatorLinearAssemble_Sycl(sycl::queue &sycl_queue, const CeedOp CeedScalar *values_array) { // This kernels assumes B_in and B_out have the same number of quadrature points and basis points. // TODO: expand to more general cases - CeedOperatorAssemble_Sycl *asmb = impl->asmb; - const CeedInt num_elem = asmb->num_elem; - const CeedSize num_nodes = asmb->num_nodes; - const CeedSize num_comp = asmb->num_comp; - const CeedSize num_qpts = asmb->num_qpts; - const CeedSize num_e_mode_in = asmb->num_e_mode_in; - const CeedSize num_e_mode_out = asmb->num_e_mode_out; + CeedOperatorAssemble_Sycl *asmb = impl->asmb; + const CeedInt num_elem = asmb->num_elem; + const CeedSize num_nodes = asmb->num_nodes; + const CeedSize num_comp = asmb->num_comp; + const CeedSize num_qpts = asmb->num_qpts; + const CeedSize num_eval_mode_in = asmb->num_eval_mode_in; + const CeedSize num_eval_mode_out = asmb->num_eval_mode_out; // Strides for final output ordering, determined by the reference (inference) implementation of the symbolic assembly, slowest --> fastest: element, // comp_in, comp_out, node_row, node_col const CeedSize comp_out_stride = num_nodes * num_nodes; const CeedSize comp_in_stride = comp_out_stride * num_comp; const CeedSize e_stride = comp_in_stride * num_comp; - // Strides for QF array, slowest --> fastest: e_mode_in, comp_in, e_mode_out, comp_out, elem, qpt - const CeedSize q_e_stride = num_qpts; - const CeedSize q_comp_out_stride = num_elem * q_e_stride; - const CeedSize q_e_mode_out_stride = q_comp_out_stride * num_comp; - const CeedSize q_comp_in_stride = q_e_mode_out_stride * num_e_mode_out; - const CeedSize q_e_mode_in_stride = q_comp_in_stride * num_comp; + // Strides for QF array, slowest --> fastest: eval_mode_in, comp_in, eval_mode_out, comp_out, elem, qpt + const CeedSize q_e_stride = num_qpts; + const CeedSize q_comp_out_stride = num_elem * q_e_stride; + const CeedSize q_eval_mode_out_stride = q_comp_out_stride * num_comp; + const CeedSize q_comp_in_stride = q_eval_mode_out_stride * num_eval_mode_out; + const CeedSize q_eval_mode_in_stride = q_comp_in_stride * num_comp; CeedScalar *B_in, *B_out; B_in = asmb->d_B_in; @@ -1177,19 +1207,19 @@ static int CeedOperatorLinearAssemble_Sycl(sycl::queue &sycl_queue, const CeedOp CeedScalar result = 0.0; CeedSize qf_index_comp = q_comp_in_stride * comp_in + q_comp_out_stride * comp_out + q_e_stride * e; - for (CeedSize e_mode_in = 0; e_mode_in < num_e_mode_in; e_mode_in++) { - CeedSize b_in_index = e_mode_in * num_qpts * num_nodes; + for (CeedSize eval_mode_in = 0; eval_mode_in < num_eval_mode_in; eval_mode_in++) { + CeedSize b_in_index = eval_mode_in * num_qpts * num_nodes; - for (CeedSize e_mode_out = 0; e_mode_out < num_e_mode_out; e_mode_out++) { - CeedSize b_out_index = e_mode_out * num_qpts * num_nodes; - CeedSize qf_index = qf_index_comp + q_e_mode_out_stride * e_mode_out + q_e_mode_in_stride * e_mode_in; + for (CeedSize eval_mode_out = 0; eval_mode_out < num_eval_mode_out; eval_mode_out++) { + CeedSize b_out_index = eval_mode_out * num_qpts * num_nodes; + CeedSize qf_index = qf_index_comp + q_eval_mode_out_stride * eval_mode_out + q_eval_mode_in_stride * eval_mode_in; // Perform the B^T D B operation for this 'chunk' of D (the qf_array) for (CeedSize j = 0; j < num_qpts; j++) { result += B_out[b_out_index + j * num_nodes + i] * qf_array[qf_index + j] * B_in[b_in_index + j * num_nodes + l]; } - } // end of e_mode_out - } // end of e_mode_in + } // end of eval_mode_out + } // end of eval_mode_in CeedSize val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + num_nodes * i + l; values_array[val_index] = result; @@ -1212,20 +1242,20 @@ static int CeedOperatorLinearAssembleFallback_Sycl(sycl::queue &sycl_queue, cons const CeedInt num_nodes = asmb->num_nodes; const CeedInt num_comp = asmb->num_comp; const CeedInt num_qpts = asmb->num_qpts; - const CeedInt num_e_mode_in = asmb->num_e_mode_in; - const CeedInt num_e_mode_out = asmb->num_e_mode_out; + const CeedInt num_eval_mode_in = asmb->num_eval_mode_in; + const CeedInt num_eval_mode_out = asmb->num_eval_mode_out; // Strides for final output ordering, determined by the reference (interface) implementation of the symbolic assembly, slowest --> fastest: elememt, // comp_in, comp_out, node_row, node_col const CeedInt comp_out_stride = num_nodes * num_nodes; const CeedInt comp_in_stride = comp_out_stride * num_comp; const CeedInt e_stride = comp_in_stride * num_comp; - // Strides for QF array, slowest --> fastest: e_mode_in, comp_in, e_mode_out, comp_out, elem, qpt + // Strides for QF array, slowest --> fastest: eval_mode_in, comp_in, eval_mode_out, comp_out, elem, qpt const CeedInt q_e_stride = num_qpts; const CeedInt q_comp_out_stride = num_elem * q_e_stride; - const CeedInt q_e_mode_out_stride = q_comp_out_stride * num_comp; - const CeedInt q_comp_in_stride = q_e_mode_out_stride * num_e_mode_out; - const CeedInt q_e_mode_in_stride = q_comp_in_stride * num_comp; + const CeedInt q_eval_mode_out_stride = q_comp_out_stride * num_comp; + const CeedInt q_comp_in_stride = q_eval_mode_out_stride * num_eval_mode_out; + const CeedInt q_eval_mode_in_stride = q_comp_in_stride * num_comp; CeedScalar *B_in, *B_out; B_in = asmb->d_B_in; @@ -1254,17 +1284,17 @@ static int CeedOperatorLinearAssembleFallback_Sycl(sycl::queue &sycl_queue, cons for (CeedInt i = 0; i < num_nodes; i++) { CeedScalar result = 0.0; CeedInt qf_index_comp = q_comp_in_stride * comp_in + q_comp_out_stride * comp_out + q_e_stride * e; - for (CeedInt e_mode_in = 0; e_mode_in < num_e_mode_in; e_mode_in++) { - CeedInt b_in_index = e_mode_in * num_qpts * num_nodes; - for (CeedInt e_mode_out = 0; e_mode_out < num_e_mode_out; e_mode_out++) { - CeedInt b_out_index = e_mode_out * num_qpts * num_nodes; - CeedInt qf_index = qf_index_comp + q_e_mode_out_stride * e_mode_out + q_e_mode_in_stride * e_mode_in; + for (CeedInt eval_mode_in = 0; eval_mode_in < num_eval_mode_in; eval_mode_in++) { + CeedInt b_in_index = eval_mode_in * num_qpts * num_nodes; + for (CeedInt eval_mode_out = 0; eval_mode_out < num_eval_mode_out; eval_mode_out++) { + CeedInt b_out_index = eval_mode_out * num_qpts * num_nodes; + CeedInt qf_index = qf_index_comp + q_eval_mode_out_stride * eval_mode_out + q_eval_mode_in_stride * eval_mode_in; // Perform the B^T D B operation for this 'chunk' of D (the qf_array) for (CeedInt j = 0; j < num_qpts; j++) { result += B_out[b_out_index + j * num_nodes + i] * qf_array[qf_index + j] * B_in[b_in_index + j * num_nodes + l]; } - } // end of e_mode_out - } // end of e_mode_in + } // end of eval_mode_out + } // end of eval_mode_in CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + num_nodes * i + l; values_array[val_index] = result; } // end of loop over element node index, i diff --git a/backends/sycl-ref/ceed-sycl-ref.hpp b/backends/sycl-ref/ceed-sycl-ref.hpp index ae765dbafc..c435e65e4f 100644 --- a/backends/sycl-ref/ceed-sycl-ref.hpp +++ b/backends/sycl-ref/ceed-sycl-ref.hpp @@ -86,16 +86,16 @@ typedef struct { CeedBasis basis_in, basis_out; CeedElemRestriction diag_rstr, point_block_diag_rstr; CeedVector elem_diag, point_block_elem_diag; - CeedInt num_e_mode_in, num_e_mode_out, num_nodes; + CeedInt num_eval_mode_in, num_eval_mode_out, num_nodes; CeedInt num_qpts, num_comp; // Kernel parameters - CeedEvalMode *h_e_mode_in, *h_e_mode_out; - CeedEvalMode *d_e_mode_in, *d_e_mode_out; + CeedEvalMode *h_eval_mode_in, *h_eval_mode_out; + CeedEvalMode *d_eval_mode_in, *d_eval_mode_out; CeedScalar *d_identity, *d_interp_in, *d_interp_out, *d_grad_in, *d_grad_out; } CeedOperatorDiag_Sycl; typedef struct { CeedInt num_elem, block_size_x, block_size_y, elems_per_block; - CeedInt num_e_mode_in, num_e_mode_out, num_qpts, num_nodes, block_size, num_comp; // Kernel parameters + CeedInt num_eval_mode_in, num_eval_mode_out, num_qpts, num_nodes, block_size, num_comp; // Kernel parameters bool fallback; CeedScalar *d_B_in, *d_B_out; } CeedOperatorAssemble_Sycl; diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 19300bc7c5..764e2949ea 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -11,6 +11,7 @@ On this page we provide a summary of the main API changes, new features and exam - Add `bool` field type for `CeedQFunctionContext` and related interfaces to use `bool` fields. - `CEED_BASIS_COLLOCATED` removed; users should only use `CEED_BASIS_NONE`. - Remove unneeded pointer for `CeedElemRestrictionGetELayout`. +- Require use of `Ceed*Destroy()` on Ceed objects returned from `CeedOperatorFieldGet*()`; ### New features diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c index 1795d6db0b..d1ce21755b 100644 --- a/examples/fluids/problems/advection.c +++ b/examples/fluids/problems/advection.c @@ -37,12 +37,10 @@ PetscErrorCode CreateKSPMassOperator_AdvectionStabilized(User user, CeedOperator PetscCallCeed(ceed, CeedCompositeOperatorGetSubList(user->op_rhs_ctx->op, &sub_ops)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "q", &field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_q)); - PetscCallCeed(ceed, CeedOperatorFieldGetBasis(field, &basis_q)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(field, NULL, &elem_restr_q, &basis_q, NULL)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "qdata", &field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_qd_i)); - PetscCallCeed(ceed, CeedOperatorFieldGetVector(field, &q_data)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(field, NULL, &elem_restr_qd_i, NULL, &q_data)); PetscCallCeed(ceed, CeedOperatorGetContext(sub_ops[sub_op_index], &qf_ctx)); } @@ -74,6 +72,10 @@ PetscErrorCode CreateKSPMassOperator_AdvectionStabilized(User user, CeedOperator PetscCallCeed(ceed, CeedOperatorSetField(*op_mass, "v", elem_restr_q, basis_q, CEED_VECTOR_ACTIVE)); PetscCallCeed(ceed, CeedOperatorSetField(*op_mass, "Grad_v", elem_restr_q, basis_q, CEED_VECTOR_ACTIVE)); + PetscCallCeed(ceed, CeedVectorDestroy(&q_data)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_q)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_qd_i)); + PetscCallCeed(ceed, CeedBasisDestroy(&basis_q)); PetscCallCeed(ceed, CeedQFunctionContextDestroy(&qf_ctx)); PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_mass)); PetscFunctionReturn(PETSC_SUCCESS); diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c index b7349474e0..f6033cd392 100644 --- a/examples/fluids/problems/newtonian.c +++ b/examples/fluids/problems/newtonian.c @@ -173,12 +173,10 @@ PetscErrorCode CreateKSPMassOperator_NewtonianStabilized(User user, CeedOperator PetscCallCeed(ceed, CeedCompositeOperatorGetSubList(user->op_rhs_ctx->op, &sub_ops)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "q", &field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_q)); - PetscCallCeed(ceed, CeedOperatorFieldGetBasis(field, &basis_q)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(field, NULL, &elem_restr_q, &basis_q, NULL)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "qdata", &field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_qd_i)); - PetscCallCeed(ceed, CeedOperatorFieldGetVector(field, &q_data)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(field, NULL, &elem_restr_qd_i, NULL, &q_data)); PetscCallCeed(ceed, CeedOperatorGetContext(sub_ops[sub_op_index], &qf_ctx)); } @@ -203,6 +201,10 @@ PetscErrorCode CreateKSPMassOperator_NewtonianStabilized(User user, CeedOperator PetscCallCeed(ceed, CeedOperatorSetField(*op_mass, "v", elem_restr_q, basis_q, CEED_VECTOR_ACTIVE)); PetscCallCeed(ceed, CeedOperatorSetField(*op_mass, "Grad_v", elem_restr_q, basis_q, CEED_VECTOR_ACTIVE)); + PetscCallCeed(ceed, CeedVectorDestroy(&q_data)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_q)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_qd_i)); + PetscCallCeed(ceed, CeedBasisDestroy(&basis_q)); PetscCallCeed(ceed, CeedQFunctionContextDestroy(&qf_ctx)); PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_mass)); PetscFunctionReturn(PETSC_SUCCESS); diff --git a/examples/fluids/src/differential_filter.c b/examples/fluids/src/differential_filter.c index c476f1896c..12c8771ca3 100644 --- a/examples/fluids/src/differential_filter.c +++ b/examples/fluids/src/differential_filter.c @@ -137,8 +137,7 @@ PetscErrorCode DifferentialFilterCreateOperators(Ceed ceed, User user, CeedData char field_name[PETSC_MAX_PATH_LEN]; PetscCall(PetscSNPrintf(field_name, PETSC_MAX_PATH_LEN, "v%" PetscInt_FMT, i)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(diff_filter->op_rhs_ctx->op, field_name, &op_field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(op_field, &elem_restr_filter)); - PetscCallCeed(ceed, CeedOperatorFieldGetBasis(op_field, &basis_filter)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(op_field, NULL, &elem_restr_filter, &basis_filter, NULL)); } PetscCallCeed(ceed, CeedOperatorCreate(ceed, qf_lhs, NULL, NULL, &op_lhs_sub)); @@ -151,6 +150,8 @@ PetscErrorCode DifferentialFilterCreateOperators(Ceed ceed, User user, CeedData PetscCallCeed(ceed, CeedOperatorSetField(op_lhs_sub, "Grad_v", elem_restr_filter, basis_filter, CEED_VECTOR_ACTIVE)); PetscCallCeed(ceed, CeedCompositeOperatorAddSub(op_lhs, op_lhs_sub)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_filter)); + PetscCallCeed(ceed, CeedBasisDestroy(&basis_filter)); PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_lhs)); PetscCallCeed(ceed, CeedOperatorDestroy(&op_lhs_sub)); } diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c index a5bdfe1bb1..1754d5e521 100644 --- a/examples/fluids/src/setuplibceed.c +++ b/examples/fluids/src/setuplibceed.c @@ -30,12 +30,10 @@ static PetscErrorCode CreateKSPMassOperator_Unstabilized(User user, CeedOperator PetscCallCeed(ceed, CeedCompositeOperatorGetSubList(user->op_rhs_ctx->op, &sub_ops)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "q", &field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_q)); - PetscCallCeed(ceed, CeedOperatorFieldGetBasis(field, &basis_q)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(field, NULL, &elem_restr_q, &basis_q, NULL)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "qdata", &field)); - PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_qd_i)); - PetscCallCeed(ceed, CeedOperatorFieldGetVector(field, &q_data)); + PetscCallCeed(ceed, CeedOperatorFieldGetData(field, NULL, &elem_restr_qd_i, NULL, &q_data)); } PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(elem_restr_q, &num_comp_q)); @@ -47,6 +45,10 @@ static PetscErrorCode CreateKSPMassOperator_Unstabilized(User user, CeedOperator PetscCallCeed(ceed, CeedOperatorSetField(*op_mass, "qdata", elem_restr_qd_i, CEED_BASIS_NONE, q_data)); PetscCallCeed(ceed, CeedOperatorSetField(*op_mass, "v", elem_restr_q, basis_q, CEED_VECTOR_ACTIVE)); + PetscCallCeed(ceed, CeedVectorDestroy(&q_data)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_q)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_qd_i)); + PetscCallCeed(ceed, CeedBasisDestroy(&basis_q)); PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_mass)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -199,10 +201,12 @@ static PetscErrorCode AddBCSubOperators(User user, Ceed ceed, DM dm, SimpleBC bc PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "q", &field)); PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_q)); PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(elem_restr_q, &num_comp_q)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_q)); PetscCallCeed(ceed, CeedOperatorGetFieldByName(sub_ops[sub_op_index], "x", &field)); PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(field, &elem_restr_x)); PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(elem_restr_x, &num_comp_x)); + PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_x)); } { // Get bases diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c index 5881846665..66c952117c 100644 --- a/interface/ceed-operator.c +++ b/interface/ceed-operator.c @@ -118,6 +118,9 @@ static int CeedOperatorFieldView(CeedOperatorField op_field, CeedQFunctionField if (basis == CEED_BASIS_NONE) fprintf(stream, "%s No basis\n", pre); if (vec == CEED_VECTOR_ACTIVE) fprintf(stream, "%s Active vector\n", pre); else if (vec == CEED_VECTOR_NONE) fprintf(stream, "%s No vector\n", pre); + + CeedCall(CeedVectorDestroy(&vec)); + CeedCall(CeedBasisDestroy(&basis)); return CEED_ERROR_SUCCESS; } @@ -160,7 +163,9 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) { } /** - @brief Find the active input vector `CeedBasis` for a non-composite `CeedOperator` + @brief Find the active input vector `CeedBasis` for a non-composite `CeedOperator`. + + Note: Caller is responsible for destroying the `active_basis` with @ref CeedBasisDestroy(). @param[in] op `CeedOperator` to find active `CeedBasis` for @param[out] active_basis `CeedBasis` for active input vector or `NULL` for composite operator @@ -175,7 +180,9 @@ int CeedOperatorGetActiveBasis(CeedOperator op, CeedBasis *active_basis) { } /** - @brief Find the active input and output vector `CeedBasis` for a non-composite `CeedOperator` + @brief Find the active input and output vector `CeedBasis` for a non-composite `CeedOperator`. + + Note: Caller is responsible for destroying the bases with @ref CeedBasisDestroy(). @param[in] op `CeedOperator` to find active `CeedBasis` for @param[out] active_input_basis `CeedBasis` for active input vector or `NULL` for composite operator @@ -207,8 +214,10 @@ int CeedOperatorGetActiveBases(CeedOperator op, CeedBasis *active_input_basis, C CeedCall(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCheck(!*active_input_basis || *active_input_basis == basis, ceed, CEED_ERROR_MINOR, "Multiple active input CeedBases found"); - *active_input_basis = basis; + if (!*active_input_basis) CeedCall(CeedBasisReferenceCopy(basis, active_input_basis)); + CeedCall(CeedBasisDestroy(&basis)); } + CeedCall(CeedVectorDestroy(&vec)); } CeedCheck(*active_input_basis, ceed, CEED_ERROR_INCOMPLETE, "No active input CeedBasis found"); } @@ -225,8 +234,10 @@ int CeedOperatorGetActiveBases(CeedOperator op, CeedBasis *active_input_basis, C CeedCall(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); CeedCheck(!*active_output_basis || *active_output_basis == basis, ceed, CEED_ERROR_MINOR, "Multiple active output CeedBases found"); - *active_output_basis = basis; + if (!*active_output_basis) CeedCall(CeedBasisReferenceCopy(basis, active_output_basis)); + CeedCall(CeedBasisDestroy(&basis)); } + CeedCall(CeedVectorDestroy(&vec)); } CeedCheck(*active_output_basis, ceed, CEED_ERROR_INCOMPLETE, "No active output CeedBasis found"); } @@ -235,7 +246,9 @@ int CeedOperatorGetActiveBases(CeedOperator op, CeedBasis *active_input_basis, C } /** - @brief Find the active vector `CeedElemRestriction` for a non-composite `CeedOperator` + @brief Find the active vector `CeedElemRestriction` for a non-composite `CeedOperator`. + + Note: Caller is responsible for destroying the `active_rstr` with @ref CeedElemRestrictionDestroy(). @param[in] op `CeedOperator` to find active `CeedElemRestriction` for @param[out] active_rstr `CeedElemRestriction` for active input vector or NULL for composite operator @@ -250,7 +263,9 @@ int CeedOperatorGetActiveElemRestriction(CeedOperator op, CeedElemRestriction *a } /** - @brief Find the active input and output vector `CeedElemRestriction` for a non-composite `CeedOperator` + @brief Find the active input and output vector `CeedElemRestriction` for a non-composite `CeedOperator`. + + Note: Caller is responsible for destroying the restrictions with @ref CeedElemRestrictionDestroy(). @param[in] op `CeedOperator` to find active `CeedElemRestriction` for @param[out] active_input_rstr `CeedElemRestriction` for active input vector or NULL for composite operator @@ -282,8 +297,10 @@ int CeedOperatorGetActiveElemRestrictions(CeedOperator op, CeedElemRestriction * CeedCall(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr)); CeedCheck(!*active_input_rstr || *active_input_rstr == rstr, ceed, CEED_ERROR_MINOR, "Multiple active input CeedElemRestrictions found"); - *active_input_rstr = rstr; + if (!*active_input_rstr) CeedCall(CeedElemRestrictionReferenceCopy(rstr, active_input_rstr)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); } + CeedCall(CeedVectorDestroy(&vec)); } CeedCheck(*active_input_rstr, ceed, CEED_ERROR_INCOMPLETE, "No active input CeedElemRestriction found"); } @@ -300,8 +317,10 @@ int CeedOperatorGetActiveElemRestrictions(CeedOperator op, CeedElemRestriction * CeedCall(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &rstr)); CeedCheck(!*active_output_rstr || *active_output_rstr == rstr, ceed, CEED_ERROR_MINOR, "Multiple active output CeedElemRestrictions found"); - *active_output_rstr = rstr; + if (!*active_output_rstr) CeedCall(CeedElemRestrictionReferenceCopy(rstr, active_output_rstr)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); } + CeedCall(CeedVectorDestroy(&vec)); } CeedCheck(*active_output_rstr, ceed, CEED_ERROR_INCOMPLETE, "No active output CeedElemRestriction found"); } @@ -563,6 +582,7 @@ int CeedOperatorHasTensorBases(CeedOperator op, bool *has_tensor_bases) { CeedCall(CeedBasisIsTensor(basis, &is_tensor)); *has_tensor_bases &= is_tensor; } + CeedCall(CeedBasisDestroy(&basis)); } for (CeedInt i = 0; i < num_outputs; i++) { bool is_tensor; @@ -573,6 +593,7 @@ int CeedOperatorHasTensorBases(CeedOperator op, bool *has_tensor_bases) { CeedCall(CeedBasisIsTensor(basis, &is_tensor)); *has_tensor_bases &= is_tensor; } + CeedCall(CeedBasisDestroy(&basis)); } return CEED_ERROR_SUCCESS; } @@ -1138,7 +1159,9 @@ int CeedOperatorFieldGetName(CeedOperatorField op_field, const char **field_name } /** - @brief Get the `CeedElemRestriction` of a `CeedOperator` Field + @brief Get the `CeedElemRestriction` of a `CeedOperator` Field. + + Note: Caller is responsible for destroying the `rstr` with @ref CeedElemRestrictionDestroy(). @param[in] op_field `CeedOperator` Field @param[out] rstr Variable to store `CeedElemRestriction` @@ -1148,12 +1171,15 @@ int CeedOperatorFieldGetName(CeedOperatorField op_field, const char **field_name @ref Advanced **/ int CeedOperatorFieldGetElemRestriction(CeedOperatorField op_field, CeedElemRestriction *rstr) { - *rstr = op_field->elem_rstr; + *rstr = NULL; + CeedCall(CeedElemRestrictionReferenceCopy(op_field->elem_rstr, rstr)); return CEED_ERROR_SUCCESS; } /** - @brief Get the `CeedBasis` of a `CeedOperator` Field + @brief Get the `CeedBasis` of a `CeedOperator` Field. + + Note: Caller is responsible for destroying the `basis` with @ref CeedBasisDestroy(). @param[in] op_field `CeedOperator` Field @param[out] basis Variable to store `CeedBasis` @@ -1163,12 +1189,15 @@ int CeedOperatorFieldGetElemRestriction(CeedOperatorField op_field, CeedElemRest @ref Advanced **/ int CeedOperatorFieldGetBasis(CeedOperatorField op_field, CeedBasis *basis) { - *basis = op_field->basis; + *basis = NULL; + CeedCall(CeedBasisReferenceCopy(op_field->basis, basis)); return CEED_ERROR_SUCCESS; } /** - @brief Get the `CeedVector` of a `CeedOperator` Field + @brief Get the `CeedVector` of a `CeedOperator` Field. + + Note: Caller is responsible for destroying the `vec` with @ref CeedVectorDestroy(). @param[in] op_field `CeedOperator` Field @param[out] vec Variable to store `CeedVector` @@ -1178,14 +1207,17 @@ int CeedOperatorFieldGetBasis(CeedOperatorField op_field, CeedBasis *basis) { @ref Advanced **/ int CeedOperatorFieldGetVector(CeedOperatorField op_field, CeedVector *vec) { - *vec = op_field->vec; + *vec = NULL; + CeedCall(CeedVectorReferenceCopy(op_field->vec, vec)); return CEED_ERROR_SUCCESS; } /** @brief Get the data of a `CeedOperator` Field. - Any arguments set as `NULL` are ignored. + Any arguments set as `NULL` are ignored.. + + Note: Caller is responsible for destroying the `rstr`, `basis`, and `vec`. @param[in] op_field `CeedOperator` Field @param[out] field_name Variable to store the field name @@ -1652,12 +1684,15 @@ int CeedOperatorGetFlopsEstimate(CeedOperator op, CeedSize *flops) { CeedCall(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr)); CeedCall(CeedElemRestrictionGetFlopsEstimate(rstr, CEED_NOTRANSPOSE, &rstr_flops)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); *flops += rstr_flops; CeedCall(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCall(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCall(CeedBasisGetFlopsEstimate(basis, CEED_NOTRANSPOSE, eval_mode, &basis_flops)); + CeedCall(CeedBasisDestroy(&basis)); *flops += basis_flops * num_elem; } + CeedCall(CeedVectorDestroy(&vec)); } // QF FLOPs { @@ -1686,12 +1721,15 @@ int CeedOperatorGetFlopsEstimate(CeedOperator op, CeedSize *flops) { CeedCall(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &rstr)); CeedCall(CeedElemRestrictionGetFlopsEstimate(rstr, CEED_TRANSPOSE, &rstr_flops)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); *flops += rstr_flops; CeedCall(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); CeedCall(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); CeedCall(CeedBasisGetFlopsEstimate(basis, CEED_TRANSPOSE, eval_mode, &basis_flops)); + CeedCall(CeedBasisDestroy(&basis)); *flops += basis_flops * num_elem; } + CeedCall(CeedVectorDestroy(&vec)); } } return CEED_ERROR_SUCCESS; @@ -2036,6 +2074,7 @@ int CeedOperatorApply(CeedOperator op, CeedVector in, CeedVector out, CeedReques if (vec != CEED_VECTOR_ACTIVE && vec != CEED_VECTOR_NONE) { CeedCall(CeedVectorSetValue(vec, 0.0)); } + CeedCall(CeedVectorDestroy(&vec)); } } // Apply @@ -2054,11 +2093,14 @@ int CeedOperatorApply(CeedOperator op, CeedVector in, CeedVector out, CeedReques CeedCall(CeedOperatorGetFields(op, NULL, NULL, &num_output_fields, &output_fields)); // Zero all output vectors for (CeedInt i = 0; i < num_output_fields; i++) { + bool is_active; CeedVector vec; CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec)); - if (vec == CEED_VECTOR_ACTIVE) vec = out; + is_active = vec == CEED_VECTOR_ACTIVE; + if (is_active) vec = out; if (vec != CEED_VECTOR_NONE) CeedCall(CeedVectorSetValue(vec, 0.0)); + if (!is_active) CeedCall(CeedVectorDestroy(&vec)); } // Apply if (op->num_elem > 0) CeedCall(op->ApplyAdd(op, in, out, request)); diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index c7f97ae721..346135ebdf 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -150,6 +150,9 @@ static int CeedOperatorCreateFallback(CeedOperator op) { CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec)); CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec)); + CeedCall(CeedVectorDestroy(&vec)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); + CeedCall(CeedBasisDestroy(&basis)); } for (CeedInt i = 0; i < num_output_fields; i++) { const char *field_name; @@ -159,6 +162,9 @@ static int CeedOperatorCreateFallback(CeedOperator op) { CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec)); CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec)); + CeedCall(CeedVectorDestroy(&vec)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); + CeedCall(CeedBasisDestroy(&basis)); } { CeedQFunctionAssemblyData data; @@ -528,6 +534,8 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, C CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out)); CeedCall(CeedVectorDestroy(&elem_dof_out)); } + CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in)); + CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out)); return CEED_ERROR_SUCCESS; } @@ -778,6 +786,8 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVecto } CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array)); CeedCall(CeedVectorDestroy(&assembled_qf)); + CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in)); + CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out)); return CEED_ERROR_SUCCESS; } @@ -818,6 +828,8 @@ static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num elem_size_out = elem_size_in; num_comp_out = num_comp_in; } + CeedCall(CeedElemRestrictionDestroy(&rstr_in)); + CeedCall(CeedElemRestrictionDestroy(&rstr_out)); *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in; return CEED_ERROR_SUCCESS; } @@ -860,39 +872,45 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_m for (CeedInt i = 0; i < num_input_fields; i++) { const char *field_name; CeedVector vec; - CeedElemRestriction rstr; - CeedBasis basis; + CeedElemRestriction rstr = NULL; + CeedBasis basis = NULL; CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name)); CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - rstr = rstr_coarse; - basis = basis_coarse; - CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine)); + CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr)); + CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis)); + if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine)); } else { CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr)); CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis)); } CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec)); + CeedCall(CeedVectorDestroy(&vec)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); + CeedCall(CeedBasisDestroy(&basis)); } // -- Clone output fields for (CeedInt i = 0; i < num_output_fields; i++) { const char *field_name; CeedVector vec; - CeedElemRestriction rstr; - CeedBasis basis; + CeedElemRestriction rstr = NULL; + CeedBasis basis = NULL; CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name)); CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - rstr = rstr_coarse; - basis = basis_coarse; - CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine)); + CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr)); + CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis)); + if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine)); } else { CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr)); CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis)); } CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec)); + CeedCall(CeedVectorDestroy(&vec)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); + CeedCall(CeedBasisDestroy(&basis)); } // -- Clone QFunctionAssemblyData { @@ -1014,6 +1032,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_m // Cleanup CeedCall(CeedVectorDestroy(&mult_vec)); + CeedCall(CeedElemRestrictionDestroy(&rstr_fine)); CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine)); CeedCall(CeedBasisDestroy(&basis_c_to_f)); return CEED_ERROR_SUCCESS; @@ -1429,6 +1448,7 @@ int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssem (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL; CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in)); CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in])); + CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in)); CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in)); num_eval_modes_in[index] = 0; CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in)); @@ -1450,7 +1470,9 @@ int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssem } num_eval_modes_in[index] += q_comp; } + CeedCall(CeedBasisDestroy(&basis_in)); } + CeedCall(CeedVectorDestroy(&vec)); } // Determine active output basis @@ -1484,6 +1506,7 @@ int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssem (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL; CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out)); CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out])); + CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out)); CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out)); num_eval_modes_out[index] = 0; CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out)); @@ -1505,7 +1528,9 @@ int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssem } num_eval_modes_out[index] += q_comp; } + CeedCall(CeedBasisDestroy(&basis_out)); } + CeedCall(CeedVectorDestroy(&vec)); } (*data)->num_active_bases_in = num_active_bases_in; (*data)->num_eval_modes_in = num_eval_modes_in; @@ -2166,6 +2191,7 @@ int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSi CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets)); CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets)); + CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr)); CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr)); } return CEED_ERROR_SUCCESS; @@ -2494,6 +2520,7 @@ int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indic // -- Sub operator multiplicity CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr)); CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr)); + CeedCall(CeedElemRestrictionDestroy(&elem_rstr)); CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec)); CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0)); CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE)); @@ -2542,6 +2569,7 @@ int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fin CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine)); CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f)); + CeedCall(CeedBasisDestroy(&basis_fine)); } // Core code @@ -2597,6 +2625,7 @@ int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_ CeedCall(CeedBasisGetDimension(basis_fine, &dim)); CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f)); + CeedCall(CeedBasisDestroy(&basis_fine)); CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c); CeedCall(CeedCalloc(P_1d_f, &q_ref)); @@ -2660,6 +2689,7 @@ int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_f CeedCall(CeedBasisGetDimension(basis_fine, &dim)); CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp)); CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f)); + CeedCall(CeedBasisDestroy(&basis_fine)); CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c)); CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref)); CeedCall(CeedCalloc(num_nodes_f, &q_weight)); @@ -2743,9 +2773,10 @@ int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); interp = interp || eval_mode == CEED_EVAL_INTERP; grad = grad || eval_mode == CEED_EVAL_GRAD; - CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); - CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); + if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); } + CeedCall(CeedVectorDestroy(&vec)); } CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set"); CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); @@ -2906,8 +2937,10 @@ int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, // Cleanup CeedCall(CeedVectorDestroy(&q_data)); - CeedCall(CeedBasisDestroy(&fdm_basis)); + CeedCall(CeedElemRestrictionDestroy(&rstr)); CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i)); + CeedCall(CeedBasisDestroy(&basis)); + CeedCall(CeedBasisDestroy(&fdm_basis)); CeedCall(CeedQFunctionDestroy(&qf_fdm)); return CEED_ERROR_SUCCESS; } From e03fef56705b317edc4a39dfee40c8366660a6d6 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 9 Sep 2024 12:26:07 -0600 Subject: [PATCH 2/6] rust - fix CeedOperatorFieldGet* --- rust/libceed/src/basis.rs | 7 ++ rust/libceed/src/elem_restriction.rs | 7 ++ rust/libceed/src/operator.rs | 129 ++++++++++++++++++--------- 3 files changed, 100 insertions(+), 43 deletions(-) diff --git a/rust/libceed/src/basis.rs b/rust/libceed/src/basis.rs index 4c11fb79b4..b308076ab6 100644 --- a/rust/libceed/src/basis.rs +++ b/rust/libceed/src/basis.rs @@ -173,6 +173,13 @@ impl<'a> Basis<'a> { }) } + pub(crate) fn from_raw(ptr: bind_ceed::CeedBasis) -> crate::Result { + Ok(Self { + ptr, + _lifeline: PhantomData, + }) + } + pub fn create_tensor_H1_Lagrange( ceed: &crate::Ceed, dim: usize, diff --git a/rust/libceed/src/elem_restriction.rs b/rust/libceed/src/elem_restriction.rs index 950a840403..e800f56c1e 100644 --- a/rust/libceed/src/elem_restriction.rs +++ b/rust/libceed/src/elem_restriction.rs @@ -193,6 +193,13 @@ impl<'a> ElemRestriction<'a> { }) } + pub(crate) fn from_raw(ptr: bind_ceed::CeedElemRestriction) -> crate::Result { + Ok(Self { + ptr, + _lifeline: PhantomData, + }) + } + pub fn create_oriented( ceed: &crate::Ceed, nelem: usize, diff --git a/rust/libceed/src/operator.rs b/rust/libceed/src/operator.rs index bd5d7f37f4..a1e5db55cb 100644 --- a/rust/libceed/src/operator.rs +++ b/rust/libceed/src/operator.rs @@ -17,6 +17,9 @@ use crate::prelude::*; #[derive(Debug)] pub struct OperatorField<'a> { pub(crate) ptr: bind_ceed::CeedOperatorField, + pub(crate) vector: crate::Vector<'a>, + pub(crate) elem_restriction: crate::ElemRestriction<'a>, + pub(crate) basis: crate::Basis<'a>, _lifeline: PhantomData<&'a ()>, } @@ -24,6 +27,39 @@ pub struct OperatorField<'a> { // Implementations // ----------------------------------------------------------------------------- impl<'a> OperatorField<'a> { + pub(crate) fn from_raw( + ptr: bind_ceed::CeedOperatorField, + ceed: crate::Ceed, + ) -> crate::Result { + let vector = { + let mut vector_ptr = std::ptr::null_mut(); + let ierr = unsafe { bind_ceed::CeedOperatorFieldGetVector(ptr, &mut vector_ptr) }; + ceed.check_error(ierr)?; + crate::Vector::from_raw(vector_ptr)? + }; + let elem_restriction = { + let mut elem_restriction_ptr = std::ptr::null_mut(); + let ierr = unsafe { + bind_ceed::CeedOperatorFieldGetElemRestriction(ptr, &mut elem_restriction_ptr) + }; + ceed.check_error(ierr)?; + crate::ElemRestriction::from_raw(elem_restriction_ptr)? + }; + let basis = { + let mut basis_ptr = std::ptr::null_mut(); + let ierr = unsafe { bind_ceed::CeedOperatorFieldGetBasis(ptr, &mut basis_ptr) }; + ceed.check_error(ierr)?; + crate::Basis::from_raw(basis_ptr)? + }; + Ok(Self { + ptr, + vector, + elem_restriction, + basis, + _lifeline: PhantomData, + }) + } + /// Get the name of an OperatorField /// /// ``` @@ -110,24 +146,21 @@ impl<'a> OperatorField<'a> { /// inputs[1].elem_restriction().is_none(), /// "Incorrect field ElemRestriction" /// ); + /// + /// let outputs = op.outputs()?; + /// + /// assert!( + /// outputs[0].elem_restriction().is_some(), + /// "Incorrect field ElemRestriction" + /// ); /// # Ok(()) /// # } /// ``` pub fn elem_restriction(&self) -> ElemRestrictionOpt { - let mut ptr = std::ptr::null_mut(); - unsafe { - bind_ceed::CeedOperatorFieldGetElemRestriction(self.ptr, &mut ptr); - } - if ptr == unsafe { bind_ceed::CEED_ELEMRESTRICTION_NONE } { + if self.elem_restriction.ptr == unsafe { bind_ceed::CEED_ELEMRESTRICTION_NONE } { ElemRestrictionOpt::None } else { - let slice = unsafe { - std::slice::from_raw_parts( - &ptr as *const bind_ceed::CeedElemRestriction as *const crate::ElemRestriction, - 1 as usize, - ) - }; - ElemRestrictionOpt::Some(&slice[0]) + ElemRestrictionOpt::Some(&self.elem_restriction) } } @@ -172,20 +205,10 @@ impl<'a> OperatorField<'a> { /// # } /// ``` pub fn basis(&self) -> BasisOpt { - let mut ptr = std::ptr::null_mut(); - unsafe { - bind_ceed::CeedOperatorFieldGetBasis(self.ptr, &mut ptr); - } - if ptr == unsafe { bind_ceed::CEED_BASIS_NONE } { + if self.basis.ptr == unsafe { bind_ceed::CEED_BASIS_NONE } { BasisOpt::None } else { - let slice = unsafe { - std::slice::from_raw_parts( - &ptr as *const bind_ceed::CeedBasis as *const crate::Basis, - 1 as usize, - ) - }; - BasisOpt::Some(&slice[0]) + BasisOpt::Some(&self.basis) } } @@ -222,26 +245,20 @@ impl<'a> OperatorField<'a> { /// /// assert!(inputs[0].vector().is_active(), "Incorrect field Vector"); /// assert!(inputs[1].vector().is_none(), "Incorrect field Vector"); + /// + /// let outputs = op.outputs()?; + /// + /// assert!(outputs[0].vector().is_active(), "Incorrect field Vector"); /// # Ok(()) /// # } /// ``` pub fn vector(&self) -> VectorOpt { - let mut ptr = std::ptr::null_mut(); - unsafe { - bind_ceed::CeedOperatorFieldGetVector(self.ptr, &mut ptr); - } - if ptr == unsafe { bind_ceed::CEED_VECTOR_ACTIVE } { + if self.vector.ptr == unsafe { bind_ceed::CEED_VECTOR_ACTIVE } { VectorOpt::Active - } else if ptr == unsafe { bind_ceed::CEED_VECTOR_NONE } { + } else if self.vector.ptr == unsafe { bind_ceed::CEED_VECTOR_NONE } { VectorOpt::None } else { - let slice = unsafe { - std::slice::from_raw_parts( - &ptr as *const bind_ceed::CeedVector as *const crate::Vector, - 1 as usize, - ) - }; - VectorOpt::Some(&slice[0]) + VectorOpt::Some(&self.vector) } } } @@ -814,7 +831,7 @@ impl<'a> Operator<'a> { /// # Ok(()) /// # } /// ``` - pub fn inputs(&self) -> crate::Result<&[crate::OperatorField]> { + pub fn inputs(&self) -> crate::Result> { // Get array of raw C pointers for inputs let mut num_inputs = 0; let mut inputs_ptr = std::ptr::null_mut(); @@ -831,11 +848,24 @@ impl<'a> Operator<'a> { // Convert raw C pointers to fixed length slice let inputs_slice = unsafe { std::slice::from_raw_parts( - inputs_ptr as *const crate::OperatorField, + inputs_ptr as *mut bind_ceed::CeedOperatorField, num_inputs as usize, ) }; - Ok(inputs_slice) + // And finally build vec + let ceed = { + let mut ptr = std::ptr::null_mut(); + let mut ptr_copy = std::ptr::null_mut(); + unsafe { + bind_ceed::CeedOperatorGetCeed(self.op_core.ptr, &mut ptr); + bind_ceed::CeedReferenceCopy(ptr, &mut ptr_copy); // refcount + } + crate::Ceed { ptr } + }; + let inputs = (0..num_inputs as usize) + .map(|i| crate::OperatorField::from_raw(inputs_slice[i], ceed.clone())) + .collect::>>()?; + Ok(inputs) } /// Get a slice of Operator outputs @@ -873,7 +903,7 @@ impl<'a> Operator<'a> { /// # Ok(()) /// # } /// ``` - pub fn outputs(&self) -> crate::Result<&[crate::OperatorField]> { + pub fn outputs(&self) -> crate::Result> { // Get array of raw C pointers for outputs let mut num_outputs = 0; let mut outputs_ptr = std::ptr::null_mut(); @@ -890,11 +920,24 @@ impl<'a> Operator<'a> { // Convert raw C pointers to fixed length slice let outputs_slice = unsafe { std::slice::from_raw_parts( - outputs_ptr as *const crate::OperatorField, + outputs_ptr as *mut bind_ceed::CeedOperatorField, num_outputs as usize, ) }; - Ok(outputs_slice) + // And finally build vec + let ceed = { + let mut ptr = std::ptr::null_mut(); + let mut ptr_copy = std::ptr::null_mut(); + unsafe { + bind_ceed::CeedOperatorGetCeed(self.op_core.ptr, &mut ptr); + bind_ceed::CeedReferenceCopy(ptr, &mut ptr_copy); // refcount + } + crate::Ceed { ptr } + }; + let outputs = (0..num_outputs as usize) + .map(|i| crate::OperatorField::from_raw(outputs_slice[i], ceed.clone())) + .collect::>>()?; + Ok(outputs) } /// Check if Operator is setup correctly From 567c3c299f2b1379a2173048e3e4ce3fc0fd52d0 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 9 Sep 2024 14:48:28 -0600 Subject: [PATCH 3/6] rust - restore tests from #1451 --- rust/libceed/src/operator.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/rust/libceed/src/operator.rs b/rust/libceed/src/operator.rs index a1e5db55cb..809c78f6e6 100644 --- a/rust/libceed/src/operator.rs +++ b/rust/libceed/src/operator.rs @@ -142,6 +142,14 @@ impl<'a> OperatorField<'a> { /// inputs[0].elem_restriction().is_some(), /// "Incorrect field ElemRestriction" /// ); + /// if let ElemRestrictionOpt::Some(r) = inputs[0].elem_restriction() { + /// assert_eq!( + /// r.num_elements(), + /// ne, + /// "Incorrect field ElemRestriction number of elements" + /// ); + /// } + /// /// assert!( /// inputs[1].elem_restriction().is_none(), /// "Incorrect field ElemRestriction" @@ -153,6 +161,13 @@ impl<'a> OperatorField<'a> { /// outputs[0].elem_restriction().is_some(), /// "Incorrect field ElemRestriction" /// ); + /// if let ElemRestrictionOpt::Some(r) = outputs[0].elem_restriction() { + /// assert_eq!( + /// r.num_elements(), + /// ne, + /// "Incorrect field ElemRestriction number of elements" + /// ); + /// } /// # Ok(()) /// # } /// ``` @@ -196,7 +211,21 @@ impl<'a> OperatorField<'a> { /// let inputs = op.inputs()?; /// /// assert!(inputs[0].basis().is_some(), "Incorrect field Basis"); + /// if let BasisOpt::Some(b) = inputs[0].basis() { + /// assert_eq!( + /// b.num_quadrature_points(), + /// q, + /// "Incorrect field Basis number of quadrature points" + /// ); + /// } /// assert!(inputs[1].basis().is_some(), "Incorrect field Basis"); + /// if let BasisOpt::Some(b) = inputs[1].basis() { + /// assert_eq!( + /// b.num_quadrature_points(), + /// q, + /// "Incorrect field Basis number of quadrature points" + /// ); + /// } /// /// let outputs = op.outputs()?; /// From ac2269bd6b9128b5262c5b76aac90f6c3e7aeebf Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 11 Sep 2024 11:03:32 -0600 Subject: [PATCH 4/6] sycl - consistency and correctness fixes --- .../sycl-ref/ceed-sycl-ref-operator.sycl.cpp | 91 ++++++++++--------- backends/sycl-ref/ceed-sycl-ref.hpp | 1 - 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp index 35c0fd5097..a046690420 100644 --- a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp @@ -70,7 +70,7 @@ static int CeedOperatorDestroy_Sycl(CeedOperator op) { } CeedCallBackend(CeedFree(&impl->q_vecs_out)); - // QFunction assembly dataf + // QFunction assembly data for (CeedInt i = 0; i < impl->num_active_in; i++) { CeedCallBackend(CeedVectorDestroy(&impl->qf_active_in[i])); } @@ -132,7 +132,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool for (CeedInt i = 0; i < num_fields; i++) { CeedEvalMode eval_mode; CeedVector vec; - CeedElemRestriction rstr; + CeedElemRestriction elem_rstr; CeedBasis basis; CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); @@ -140,7 +140,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool is_strided = false; skip_restriction = false; if (eval_mode != CEED_EVAL_WEIGHT) { - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); // Check whether this field can skip the element restriction: // must be passive input, with eval_mode NONE, and have a strided restriction with CEED_STRIDES_BACKEND. @@ -153,10 +153,10 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool // Check eval_mode if (eval_mode == CEED_EVAL_NONE) { // Check for is_strided restriction - CeedCallBackend(CeedElemRestrictionIsStrided(rstr, &is_strided)); + CeedCallBackend(CeedElemRestrictionIsStrided(elem_rstr, &is_strided)); if (is_strided) { // Check if vector is already in preferred backend ordering - CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &skip_restriction)); + CeedCallBackend(CeedElemRestrictionHasBackendStrides(elem_rstr, &skip_restriction)); } } } @@ -166,9 +166,9 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool // We do not need an E-Vector, but will use the input field vector's data directly in the operator application e_vecs[i + start_e] = NULL; } else { - CeedCallBackend(CeedElemRestrictionCreateVector(rstr, NULL, &e_vecs[i + start_e])); + CeedCallBackend(CeedElemRestrictionCreateVector(elem_rstr, NULL, &e_vecs[i + start_e])); } - CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } switch (eval_mode) { @@ -276,11 +276,11 @@ static inline int CeedOperatorSetupInputs_Sycl(CeedInt num_input_fields, CeedQFu // No restriction for this field; read data directly from vec. CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); } else { - CeedElemRestriction rstr; + CeedElemRestriction elem_rstr; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &rstr)); - CeedCallBackend(CeedElemRestrictionApply(rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs[i], request)); - CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); + CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, vec, impl->e_vecs[i], request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_DEVICE, (const CeedScalar **)&e_data[i])); } } @@ -297,7 +297,6 @@ static inline int CeedOperatorInputBasis_Sycl(CeedInt num_elem, CeedQFunctionFie CeedOperator_Sycl *impl) { for (CeedInt i = 0; i < num_input_fields; i++) { CeedEvalMode eval_mode; - CeedBasis basis; // Skip active input if (skip_active) { @@ -316,11 +315,14 @@ static inline int CeedOperatorInputBasis_Sycl(CeedInt num_elem, CeedQFunctionFie CeedCallBackend(CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_DEVICE, CEED_USE_POINTER, e_data[i])); break; case CEED_EVAL_INTERP: - case CEED_EVAL_GRAD: + case CEED_EVAL_GRAD: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, eval_mode, impl->e_vecs[i], impl->q_vecs_in[i])); CeedCallBackend(CeedBasisDestroy(&basis)); break; + } case CEED_EVAL_WEIGHT: break; // No action case CEED_EVAL_DIV: @@ -405,13 +407,12 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec // Output basis apply if needed for (CeedInt i = 0; i < num_output_fields; i++) { - CeedElemRestriction rstr; - CeedBasis basis; + CeedElemRestriction elem_rstr; // Get elem_size, eval_mode, size - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &rstr)); - CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); - CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); + CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &size)); // Basis action @@ -419,18 +420,22 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec case CEED_EVAL_NONE: break; case CEED_EVAL_INTERP: - case CEED_EVAL_GRAD: + case CEED_EVAL_GRAD: { + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_TRANSPOSE, eval_mode, impl->q_vecs_out[i], impl->e_vecs[i + impl->num_e_in])); CeedCallBackend(CeedBasisDestroy(&basis)); break; + } // LCOV_EXCL_START - case CEED_EVAL_WEIGHT: + case CEED_EVAL_WEIGHT: { Ceed ceed; CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); break; // Should not occur + } case CEED_EVAL_DIV: case CEED_EVAL_CURL: { Ceed ceed; @@ -448,7 +453,7 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec bool is_active; CeedEvalMode eval_mode; CeedVector vec; - CeedElemRestriction rstr; + CeedElemRestriction elem_rstr; // Restore evec CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); @@ -458,11 +463,11 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec // Restrict CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); is_active = vec == CEED_VECTOR_ACTIVE; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); if (is_active) vec = out_vec; - CeedCallBackend(CeedElemRestrictionApply(rstr, CEED_TRANSPOSE, impl->e_vecs[i + impl->num_inputs], vec, request)); + CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs[i + impl->num_e_in], vec, request)); if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); - CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } // Restore input arrays @@ -473,8 +478,8 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec //------------------------------------------------------------------------------ // Core code for assembling linear QFunction //------------------------------------------------------------------------------ -static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, - CeedRequest *request) { +static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, bool build_objects, CeedVector *assembled, + CeedElemRestriction *elem_rstr, CeedRequest *request) { Ceed ceed, ceed_parent; CeedSize q_size; CeedInt num_active_in, num_active_out, Q, num_elem, num_input_fields, num_output_fields, size; @@ -555,7 +560,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, CeedInt strides[3] = {1, num_elem * Q, Q}; /* *NOPAD* */ // Create output restriction - CeedCallBackend(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, Q, num_active_in * num_active_out, l_size, strides, rstr)); + CeedCallBackend(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, Q, num_active_in * num_active_out, l_size, strides, elem_rstr)); // Create assembled vector CeedCallBackend(CeedVectorCreate(ceed_parent, l_size, assembled)); } @@ -612,15 +617,16 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Sycl(CeedOperator op, //------------------------------------------------------------------------------ // Assemble Linear QFunction //------------------------------------------------------------------------------ -static int CeedOperatorLinearAssembleQFunction_Sycl(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { - return CeedOperatorLinearAssembleQFunctionCore_Sycl(op, true, assembled, rstr, request); +static int CeedOperatorLinearAssembleQFunction_Sycl(CeedOperator op, CeedVector *assembled, CeedElemRestriction *elem_rstr, CeedRequest *request) { + return CeedOperatorLinearAssembleQFunctionCore_Sycl(op, true, assembled, elem_rstr, request); } //------------------------------------------------------------------------------ // Update Assembled Linear QFunction //------------------------------------------------------------------------------ -static int CeedOperatorLinearAssembleQFunctionUpdate_Sycl(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { - return CeedOperatorLinearAssembleQFunctionCore_Sycl(op, false, &assembled, &rstr, request); +static int CeedOperatorLinearAssembleQFunctionUpdate_Sycl(CeedOperator op, CeedVector assembled, CeedElemRestriction elem_rstr, + CeedRequest *request) { + return CeedOperatorLinearAssembleQFunctionCore_Sycl(op, false, &assembled, &elem_rstr, request); } //------------------------------------------------------------------------------ @@ -892,22 +898,25 @@ static int CeedOperatorLinearDiagonal_Sycl(sycl::queue &sycl_queue, const bool i // Assemble diagonal common code //------------------------------------------------------------------------------ static inline int CeedOperatorAssembleDiagonalCore_Sycl(CeedOperator op, CeedVector assembled, CeedRequest *request, const bool is_point_block) { - Ceed ceed; - Ceed_Sycl *sycl_data; - CeedInt num_elem; - CeedScalar *elem_diag_array; - const CeedScalar *assembled_qf_array; - CeedVector assembled_qf = NULL; - CeedElemRestriction rstr = NULL; - CeedOperator_Sycl *impl; + Ceed ceed; + Ceed_Sycl *sycl_data; + CeedInt num_elem; + CeedScalar *elem_diag_array; + const CeedScalar *assembled_qf_array; + CeedVector assembled_qf = NULL; + CeedOperator_Sycl *impl; CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); CeedCallBackend(CeedOperatorGetData(op, &impl)); CeedCallBackend(CeedGetData(ceed, &sycl_data)); // Assemble QFunction - CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request)); - CeedCallBackend(CeedElemRestrictionDestroy(&rstr)); + { + CeedElemRestriction elem_rstr = NULL; + + CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &elem_rstr, request)); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + } // Setup if (!impl->diag) { diff --git a/backends/sycl-ref/ceed-sycl-ref.hpp b/backends/sycl-ref/ceed-sycl-ref.hpp index c435e65e4f..46123d5569 100644 --- a/backends/sycl-ref/ceed-sycl-ref.hpp +++ b/backends/sycl-ref/ceed-sycl-ref.hpp @@ -106,7 +106,6 @@ typedef struct { CeedVector *q_vecs_out; // Output Q-vectors needed to apply operator CeedInt num_e_in; CeedInt num_e_out; - CeedInt num_inputs, num_outputs; CeedInt num_active_in, num_active_out; CeedVector *qf_active_in; CeedOperatorDiag_Sycl *diag; From 85938a6d1dd5e68e6deadca612b182f7422c5a77 Mon Sep 17 00:00:00 2001 From: James Wright Date: Wed, 11 Sep 2024 17:26:41 +0000 Subject: [PATCH 5/6] sycl - Misc fixes --- backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp | 4 ++-- backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp index 650a52cf4d..0736446f4c 100644 --- a/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-operator.sycl.cpp @@ -107,12 +107,12 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, break; } } - if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); if (index == -1) { CeedCallBackend(CeedVectorGetArray(vec, CEED_MEM_DEVICE, &impl->fields->outputs[i])); } else { impl->fields->outputs[i] = impl->fields->outputs[index]; } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } @@ -189,10 +189,10 @@ static int CeedOperatorApplyAdd_Sycl_gen(CeedOperator op, CeedVector input_vec, break; } } - if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); if (index == -1) { CeedCallBackend(CeedVectorRestoreArray(vec, &impl->fields->outputs[i])); } + if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); } } diff --git a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp index a046690420..69761c340e 100644 --- a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp @@ -1023,9 +1023,9 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis)); CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCallBackend(CeedBasisGetDimension(basis, &dim)); + CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); CeedCallBackend(CeedBasisDestroy(&basis)); - CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); - CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &elem_rstr)); if (!rstr_in) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_in)); CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); From 6782e2f8092b6c5e48173c28c35c372c61b457a7 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 17 Sep 2024 11:39:38 -0600 Subject: [PATCH 6/6] sycl - fix regresions --- .../ceed-sycl-gen-operator-build.sycl.cpp | 8 +- .../sycl-ref/ceed-sycl-ref-operator.sycl.cpp | 98 +++++++++++-------- 2 files changed, 60 insertions(+), 46 deletions(-) diff --git a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp index b14e155124..ee7aab812c 100644 --- a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp @@ -274,9 +274,9 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { // Get elem_size, eval_mode, num_comp CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); // Set field constants if (eval_mode != CEED_EVAL_WEIGHT) { @@ -334,9 +334,9 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { // Get elem_size, eval_mode, num_comp CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); // Set field constants CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis)); @@ -401,8 +401,8 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { // Get elem_size, eval_mode, num_comp CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); // Restriction if (eval_mode != CEED_EVAL_WEIGHT && !((eval_mode == CEED_EVAL_NONE) && use_collograd_parallelization)) { @@ -677,8 +677,8 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { // Get elem_size, eval_mode, num_comp CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); CeedCallBackend(CeedElemRestrictionGetNumComponents(elem_rstr, &num_comp)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); // Basis action code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; switch (eval_mode) { diff --git a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp index 69761c340e..e43981c217 100644 --- a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp @@ -89,10 +89,13 @@ static int CeedOperatorDestroy_Sycl(CeedOperator op) { CeedCallSycl(ceed, sycl::free(impl->diag->d_interp_out, sycl_data->sycl_context)); CeedCallSycl(ceed, sycl::free(impl->diag->d_grad_in, sycl_data->sycl_context)); CeedCallSycl(ceed, sycl::free(impl->diag->d_grad_out, sycl_data->sycl_context)); - CeedCallBackend(CeedElemRestrictionDestroy(&impl->diag->point_block_diag_rstr)); CeedCallBackend(CeedVectorDestroy(&impl->diag->elem_diag)); CeedCallBackend(CeedVectorDestroy(&impl->diag->point_block_elem_diag)); + CeedCallBackend(CeedElemRestrictionDestroy(&impl->diag->diag_rstr)); + CeedCallBackend(CeedElemRestrictionDestroy(&impl->diag->point_block_diag_rstr)); + CeedCallBackend(CeedBasisDestroy(&impl->diag->basis_in)); + CeedCallBackend(CeedBasisDestroy(&impl->diag->basis_out)); } CeedCallBackend(CeedFree(&impl->diag)); @@ -115,7 +118,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool Ceed ceed; CeedSize q_size; bool is_strided, skip_restriction; - CeedInt dim, size; + CeedInt size; CeedOperatorField *op_fields; CeedQFunctionField *qf_fields; @@ -133,7 +136,6 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool CeedEvalMode eval_mode; CeedVector vec; CeedElemRestriction elem_rstr; - CeedBasis basis; CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); @@ -183,20 +185,21 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); break; case CEED_EVAL_GRAD: - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_fields[i], &size)); - CeedCallBackend(CeedBasisGetDimension(basis, &dim)); - CeedCallBackend(CeedBasisDestroy(&basis)); q_size = (CeedSize)num_elem * Q * size; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); break; - case CEED_EVAL_WEIGHT: // Only on input fields - CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); + case CEED_EVAL_WEIGHT: { + CeedBasis basis; + + // Note: only on input fields q_size = (CeedSize)num_elem * Q; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); + CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); CeedCallBackend(CeedBasisDestroy(&basis)); break; + } case CEED_EVAL_DIV: break; // TODO: Not implemented case CEED_EVAL_CURL: @@ -463,8 +466,8 @@ static int CeedOperatorApplyAdd_Sycl(CeedOperator op, CeedVector in_vec, CeedVec // Restrict CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); is_active = vec == CEED_VECTOR_ACTIVE; - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); if (is_active) vec = out_vec; + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, impl->e_vecs[i + impl->num_e_in], vec, request)); if (!is_active) CeedCallBackend(CeedVectorDestroy(&vec)); CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); @@ -637,6 +640,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { Ceed_Sycl *sycl_data; CeedInt num_input_fields, num_output_fields, num_eval_mode_in = 0, num_comp = 0, dim = 1, num_eval_mode_out = 0; CeedEvalMode *eval_mode_in = NULL, *eval_mode_out = NULL; + CeedElemRestriction rstr_in = NULL, rstr_out = NULL; CeedBasis basis_in = NULL, basis_out = NULL; CeedQFunctionField *qf_fields; CeedQFunction qf; @@ -655,14 +659,19 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedEvalMode eval_mode; - CeedBasis basis; + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); + if (!rstr_in) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_in)); + CeedCheck(rstr_in == elem_rstr, ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator diagonal assembly"); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); - CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, - "Backend does not implement operator diagonal assembly with multiple active bases"); if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCheck(basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator diagonal assembly with multiple active bases"); CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); switch (eval_mode) { case CEED_EVAL_NONE: @@ -684,6 +693,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { } CeedCallBackend(CeedVectorDestroy(&vec)); } + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_in)); // Determine active output basis CeedCallBackend(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields)); @@ -693,26 +703,30 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedOperatorFieldGetVector(op_fields[i], &vec)); if (vec == CEED_VECTOR_ACTIVE) { - CeedEvalMode eval_mode; - CeedBasis basis; + CeedEvalMode eval_mode; + CeedElemRestriction elem_rstr; + CeedBasis basis; + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr)); + if (!rstr_out) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_out)); + CeedCheck(rstr_out == elem_rstr, ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator diagonal assembly"); + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); - CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, - "Backend does not implement operator diagonal assembly with multiple active bases"); if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCheck(basis_out == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator diagonal assembly with multiple active bases"); CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); switch (eval_mode) { case CEED_EVAL_NONE: case CEED_EVAL_INTERP: - CeedCallBackend(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in)); - eval_mode_in[num_eval_mode_in] = eval_mode; - num_eval_mode_in += 1; + CeedCallBackend(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out)); + eval_mode_out[num_eval_mode_out] = eval_mode; + num_eval_mode_out += 1; break; case CEED_EVAL_GRAD: - CeedCallBackend(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in)); - for (CeedInt d = 0; d < dim; d++) eval_mode_in[num_eval_mode_in + d] = eval_mode; - num_eval_mode_in += dim; + CeedCallBackend(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out)); + for (CeedInt d = 0; d < dim; d++) eval_mode_out[num_eval_mode_out + d] = eval_mode; + num_eval_mode_out += dim; break; case CEED_EVAL_WEIGHT: case CEED_EVAL_DIV: @@ -729,8 +743,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedCallBackend(CeedCalloc(1, &impl->diag)); CeedOperatorDiag_Sycl *diag = impl->diag; - diag->basis_in = basis_in; - diag->basis_out = basis_out; + CeedCallBackend(CeedBasisReferenceCopy(basis_in, &diag->basis_in)); + CeedCallBackend(CeedBasisReferenceCopy(basis_out, &diag->basis_out)); diag->h_eval_mode_in = eval_mode_in; diag->h_eval_mode_out = eval_mode_out; diag->num_eval_mode_in = num_eval_mode_in; @@ -740,6 +754,7 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { CeedInt num_nodes, num_qpts; CeedCallBackend(CeedBasisGetNumNodes(basis_in, &num_nodes)); CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); + CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp)); diag->num_nodes = num_nodes; diag->num_qpts = num_qpts; diag->num_comp = num_comp; @@ -801,13 +816,12 @@ static inline int CeedOperatorAssembleDiagonalSetup_Sycl(CeedOperator op) { copy_events.push_back(eval_mode_out_copy); // Restriction - { - CeedElemRestriction rstr_out; + CeedCallBackend(CeedElemRestrictionReferenceCopy(rstr_out, &diag->diag_rstr)); + CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); - CeedCallBackend(CeedOperatorGetActiveElemRestrictions(op, NULL, &rstr_out)); - diag->diag_rstr = rstr_out; - CeedCallBackend(CeedElemRestrictionDestroy(&rstr_out)); - } + // Cleanup + CeedCallBackend(CeedBasisDestroy(&basis_in)); + CeedCallBackend(CeedBasisDestroy(&basis_out)); // Wait for all copies to complete and handle exceptions CeedCallSycl(ceed, sycl::event::wait_and_throw(copy_events)); @@ -1020,16 +1034,16 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { CeedElemRestriction elem_rstr; CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis)); - CeedCheck(!basis_in || basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); - if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); - CeedCallBackend(CeedBasisGetDimension(basis, &dim)); - CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); - CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedOperatorFieldGetElemRestriction(input_fields[i], &elem_rstr)); if (!rstr_in) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_in)); CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); CeedCallBackend(CeedElemRestrictionGetElementSize(rstr_in, &elem_size)); + CeedCallBackend(CeedOperatorFieldGetBasis(input_fields[i], &basis)); + if (!basis_in) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_in)); + CeedCheck(basis_in == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); + CeedCallBackend(CeedBasisDestroy(&basis)); + CeedCallBackend(CeedBasisGetDimension(basis_in, &dim)); + CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); if (eval_mode != CEED_EVAL_NONE) { CeedCallBackend(CeedRealloc(num_B_in_mats_to_load + 1, &eval_mode_in)); @@ -1058,14 +1072,14 @@ static int CeedSingleOperatorAssembleSetup_Sycl(CeedOperator op) { CeedElemRestriction elem_rstr; CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis)); - CeedCheck(!basis_out || basis_out == basis, ceed, CEED_ERROR_BACKEND, - "Backend does not implement operator assembly with multiple active bases"); - if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); - CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedOperatorFieldGetElemRestriction(output_fields[i], &elem_rstr)); if (!rstr_out) CeedCallBackend(CeedElemRestrictionReferenceCopy(elem_rstr, &rstr_out)); + CeedCheck(rstr_out == rstr_in, ceed, CEED_ERROR_BACKEND, "Backend does not implement multi-field non-composite operator assembly"); CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + CeedCallBackend(CeedOperatorFieldGetBasis(output_fields[i], &basis)); + if (!basis_out) CeedCallBackend(CeedBasisReferenceCopy(basis, &basis_out)); + CeedCheck(basis_out == basis, ceed, CEED_ERROR_BACKEND, "Backend does not implement operator assembly with multiple active bases"); + CeedCallBackend(CeedBasisDestroy(&basis)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode)); if (eval_mode != CEED_EVAL_NONE) { CeedCallBackend(CeedRealloc(num_B_out_mats_to_load + 1, &eval_mode_out));