Skip to content

Commit

Permalink
basis - add CeedBasisApplyAdd + CPU impl
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylt committed Aug 6, 2024
1 parent 50ca0d9 commit 855975c
Show file tree
Hide file tree
Showing 11 changed files with 383 additions and 30 deletions.
48 changes: 30 additions & 18 deletions backends/ref/ceed-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
//------------------------------------------------------------------------------
// Basis Apply
//------------------------------------------------------------------------------
static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector U, CeedVector V) {
static int CeedBasisApplyCore_Ref(CeedBasis basis, bool apply_add, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector U,
CeedVector V) {
Ceed ceed;
bool is_tensor_basis;
bool is_tensor_basis, add = apply_add || (t_mode == CEED_TRANSPOSE);
CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
const CeedInt add = (t_mode == CEED_TRANSPOSE);
const CeedScalar *u;
CeedScalar *v;
CeedTensorContract contract;
Expand All @@ -36,14 +36,10 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMo
CeedCallBackend(CeedBasisGetTensorContract(basis, &contract));
if (U != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
CeedCallBackend(CeedVectorGetArrayWrite(V, CEED_MEM_HOST, &v));

// Clear v if operating in transpose
if (t_mode == CEED_TRANSPOSE) {
const CeedInt v_size = num_elem * num_comp * num_nodes;

for (CeedInt i = 0; i < v_size; i++) v[i] = (CeedScalar)0.0;
}
if (t_mode == CEED_TRANSPOSE && !apply_add) CeedCallBackend(CeedVectorSetValue(V, 0.0));
if (apply_add) CeedCallBackend(CeedVectorGetArray(V, CEED_MEM_HOST, &v));
else CeedCallBackend(CeedVectorGetArrayWrite(V, CEED_MEM_HOST, &v));

CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor_basis));
if (is_tensor_basis) {
Expand Down Expand Up @@ -101,12 +97,13 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMo
// or Grad to quadrature points (Transpose)
for (CeedInt d = 0; d < dim; d++) {
CeedCallBackend(CeedTensorContractApply(contract, pre, P, post, Q, (t_mode == CEED_NOTRANSPOSE ? interp_1d : impl->collo_grad_1d), t_mode,
add && (d > 0),
(t_mode == CEED_NOTRANSPOSE ? (d == 0 ? u : tmp[d % 2]) : u + d * num_qpts * num_comp * num_elem),
(t_mode == CEED_TRANSPOSE) && (d > 0),
(t_mode == CEED_NOTRANSPOSE ? (d == 0 ? u : tmp[d % 2]) : &u[d * num_qpts * num_comp * num_elem]),
(t_mode == CEED_NOTRANSPOSE ? (d == dim - 1 ? interp : tmp[(d + 1) % 2]) : interp)));
pre /= P;
post *= Q;
}

// Grad to quadrature points (NoTranspose)
// or Interpolate to nodes (Transpose)
P = Q_1d, Q = Q_1d;
Expand All @@ -117,9 +114,10 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMo
pre = num_comp * CeedIntPow(P, dim - 1), post = num_elem;
for (CeedInt d = 0; d < dim; d++) {
CeedCallBackend(CeedTensorContractApply(
contract, pre, P, post, Q, (t_mode == CEED_NOTRANSPOSE ? impl->collo_grad_1d : interp_1d), t_mode, add && (d == dim - 1),
contract, pre, P, post, Q, (t_mode == CEED_NOTRANSPOSE ? impl->collo_grad_1d : interp_1d), t_mode,
(t_mode == CEED_NOTRANSPOSE && apply_add) || (t_mode == CEED_TRANSPOSE && (d == dim - 1)),
(t_mode == CEED_NOTRANSPOSE ? interp : (d == 0 ? interp : tmp[d % 2])),
(t_mode == CEED_NOTRANSPOSE ? v + d * num_qpts * num_comp * num_elem : (d == dim - 1 ? v : tmp[(d + 1) % 2]))));
(t_mode == CEED_NOTRANSPOSE ? &v[d * num_qpts * num_comp * num_elem] : (d == dim - 1 ? v : tmp[(d + 1) % 2]))));
pre /= P;
post *= Q;
}
Expand All @@ -133,8 +131,8 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMo

for (CeedInt d = 0; d < dim; d++) {
CeedCallBackend(CeedTensorContractApply(contract, pre, P, post, Q, grad_1d, t_mode, add && (d > 0),
t_mode == CEED_NOTRANSPOSE ? u : u + d * num_comp * num_qpts * num_elem,
t_mode == CEED_TRANSPOSE ? v : v + d * num_comp * num_qpts * num_elem));
t_mode == CEED_NOTRANSPOSE ? u : &u[d * num_comp * num_qpts * num_elem],
t_mode == CEED_TRANSPOSE ? v : &v[d * num_comp * num_qpts * num_elem]));
pre /= P;
post *= Q;
}
Expand All @@ -156,8 +154,8 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMo
for (CeedInt d = 0; d < dim; d++) {
CeedCallBackend(CeedTensorContractApply(
contract, pre, P, post, Q, (p == d) ? grad_1d : interp_1d, t_mode, add && (d == dim - 1),
(d == 0 ? (t_mode == CEED_NOTRANSPOSE ? u : u + p * num_comp * num_qpts * num_elem) : tmp[d % 2]),
(d == dim - 1 ? (t_mode == CEED_TRANSPOSE ? v : v + p * num_comp * num_qpts * num_elem) : tmp[(d + 1) % 2])));
(d == 0 ? (t_mode == CEED_NOTRANSPOSE ? u : &u[p * num_comp * num_qpts * num_elem]) : tmp[d % 2]),
(d == dim - 1 ? (t_mode == CEED_TRANSPOSE ? v : &v[p * num_comp * num_qpts * num_elem]) : tmp[(d + 1) % 2])));
pre /= P;
post *= Q;
}
Expand Down Expand Up @@ -249,6 +247,16 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMo
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector U, CeedVector V) {
CeedCallBackend(CeedBasisApplyCore_Ref(basis, false, num_elem, t_mode, eval_mode, U, V));
return CEED_ERROR_SUCCESS;
}

static int CeedBasisApplyAdd_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector U, CeedVector V) {
CeedCallBackend(CeedBasisApplyCore_Ref(basis, true, num_elem, t_mode, eval_mode, U, V));
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Basis Destroy Tensor
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -297,6 +305,7 @@ int CeedBasisCreateTensorH1_Ref(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const C
CeedCallBackend(CeedBasisSetTensorContract(basis, contract));

CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Ref));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Ref));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyTensor_Ref));
return CEED_ERROR_SUCCESS;
}
Expand All @@ -316,6 +325,7 @@ int CeedBasisCreateH1_Ref(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes,
CeedCallBackend(CeedBasisSetTensorContract(basis, contract));

CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Ref));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Ref));
return CEED_ERROR_SUCCESS;
}

Expand All @@ -334,6 +344,7 @@ int CeedBasisCreateHdiv_Ref(CeedElemTopology topo, CeedInt dim, CeedInt num_node
CeedCallBackend(CeedBasisSetTensorContract(basis, contract));

CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Ref));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Ref));
return CEED_ERROR_SUCCESS;
}

Expand All @@ -352,6 +363,7 @@ int CeedBasisCreateHcurl_Ref(CeedElemTopology topo, CeedInt dim, CeedInt num_nod
CeedCallBackend(CeedBasisSetTensorContract(basis, contract));

CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Ref));
CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Ref));
return CEED_ERROR_SUCCESS;
}

Expand Down
1 change: 1 addition & 0 deletions include/ceed-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ struct CeedElemRestriction_private {
struct CeedBasis_private {
Ceed ceed;
int (*Apply)(CeedBasis, CeedInt, CeedTransposeMode, CeedEvalMode, CeedVector, CeedVector);
int (*ApplyAdd)(CeedBasis, CeedInt, CeedTransposeMode, CeedEvalMode, CeedVector, CeedVector);
int (*ApplyAtPoints)(CeedBasis, CeedInt, const CeedInt *, CeedTransposeMode, CeedEvalMode, CeedVector, CeedVector, CeedVector);
int (*Destroy)(CeedBasis);
int ref_count;
Expand Down
1 change: 1 addition & 0 deletions include/ceed/ceed.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ CEED_EXTERN int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_
CEED_EXTERN int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy);
CEED_EXTERN int CeedBasisView(CeedBasis basis, FILE *stream);
CEED_EXTERN int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v);
CEED_EXTERN int CeedBasisApplyAdd(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v);
CEED_EXTERN int CeedBasisApplyAtPoints(CeedBasis basis, CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
CeedVector x_ref, CeedVector u, CeedVector v);
CEED_EXTERN int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed);
Expand Down
12 changes: 6 additions & 6 deletions include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ extern "C" __global__ void Interp(const CeedInt num_elem, const CeedInt is_trans

// Contract along middle index
for (CeedInt k = i; k < writeLen; k += blockDim.x) {
const CeedInt c = k % post;
const CeedInt j = (k / post) % Q;
const CeedInt a = k / (post * Q);
CeedScalar vk = 0;
const CeedInt c = k % post;
const CeedInt j = (k / post) % Q;
const CeedInt a = k / (post * Q);
CeedScalar v_k = 0;

for (CeedInt b = 0; b < P; b++) vk += s_interp_1d[j * stride_0 + b * stride_1] * in[(a * P + b) * post + c];
out[k] = vk;
for (CeedInt b = 0; b < P; b++) v_k += s_interp_1d[j * stride_0 + b * stride_1] * in[(a * P + b) * post + c];
out[k] = v_k;
}
post *= Q;
}
Expand Down
62 changes: 57 additions & 5 deletions interface/ceed-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ int CeedBasisView(CeedBasis basis, FILE *stream) {
}

/**
@brief Apply basis evaluation from nodes to quadrature points or vice versa
@brief Check input vector dimensions for CeedBasisApply[Add]
@param[in] basis `CeedBasis` to evaluate
@param[in] num_elem The number of elements to apply the basis evaluation to;
Expand All @@ -1504,9 +1504,9 @@ int CeedBasisView(CeedBasis basis, FILE *stream) {
@return An error code: 0 - success, otherwise - failure
@ref User
@ref Developer
**/
int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
static int CeedBasisApplyCheckDims(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
CeedSize u_length = 0, v_length;
Ceed ceed;
Expand All @@ -1520,8 +1520,6 @@ int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
CeedCall(CeedVectorGetLength(v, &v_length));
if (u) CeedCall(CeedVectorGetLength(u, &u_length));

CeedCheck(basis->Apply, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedBasisApply");

// Check compatibility of topological and geometrical dimensions
CeedCheck((t_mode == CEED_TRANSPOSE && v_length % num_nodes == 0 && u_length % num_qpts == 0) ||
(t_mode == CEED_NOTRANSPOSE && u_length % num_nodes == 0 && v_length % num_qpts == 0),
Expand All @@ -1544,11 +1542,65 @@ int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
break;
}
CeedCheck(has_good_dims, ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
return CEED_ERROR_SUCCESS;
}

/**
@brief Apply basis evaluation from nodes to quadrature points or vice versa
@param[in] basis `CeedBasis` to evaluate
@param[in] num_elem The number of elements to apply the basis evaluation to;
the backend will specify the ordering in @ref CeedElemRestrictionCreate()
@param[in] t_mode @ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
@ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
@param[in] eval_mode @ref CEED_EVAL_NONE to use values directly,
@ref CEED_EVAL_INTERP to use interpolated values,
@ref CEED_EVAL_GRAD to use gradients,
@ref CEED_EVAL_DIV to use divergence,
@ref CEED_EVAL_CURL to use curl,
@ref CEED_EVAL_WEIGHT to use quadrature weights
@param[in] u Input `CeedVector`
@param[out] v Output `CeedVector`
@return An error code: 0 - success, otherwise - failure
@ref User
**/
int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
CeedCall(CeedBasisApplyCheckDims(basis, num_elem, t_mode, eval_mode, u, v));
CeedCheck(basis->Apply, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedBasisApply");
CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
return CEED_ERROR_SUCCESS;
}

/**
@brief Apply basis evaluation from quadrature points to nodes and sum into target vector
@param[in] basis `CeedBasis` to evaluate
@param[in] num_elem The number of elements to apply the basis evaluation to;
the backend will specify the ordering in @ref CeedElemRestrictionCreate()
@param[in] t_mode @ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
@param[in] eval_mode @ref CEED_EVAL_NONE to use values directly,
@ref CEED_EVAL_INTERP to use interpolated values,
@ref CEED_EVAL_GRAD to use gradients,
@ref CEED_EVAL_DIV to use divergence,
@ref CEED_EVAL_CURL to use curl,
@ref CEED_EVAL_WEIGHT to use quadrature weights
@param[in] u Input `CeedVector`
@param[out] v Output `CeedVector` to sum into
@return An error code: 0 - success, otherwise - failure
@ref User
**/
int CeedBasisApplyAdd(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
CeedCheck(t_mode == CEED_TRANSPOSE, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "CeedBasisApplyAdd only supports CEED_TRANSPOSE");
CeedCall(CeedBasisApplyCheckDims(basis, num_elem, t_mode, eval_mode, u, v));
CeedCheck(basis->ApplyAdd, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "Backend does not implement CeedBasisApplyAdd");
CeedCall(basis->ApplyAdd(basis, num_elem, t_mode, eval_mode, u, v));
return CEED_ERROR_SUCCESS;
}

/**
@brief Apply basis evaluation from nodes to arbitrary points
Expand Down
1 change: 1 addition & 0 deletions interface/ceed.c
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,7 @@ int CeedInit(const char *resource, Ceed *ceed) {
CEED_FTABLE_ENTRY(CeedElemRestriction, GetAtPointsElementOffset),
CEED_FTABLE_ENTRY(CeedElemRestriction, Destroy),
CEED_FTABLE_ENTRY(CeedBasis, Apply),
CEED_FTABLE_ENTRY(CeedBasis, ApplyAdd),
CEED_FTABLE_ENTRY(CeedBasis, ApplyAtPoints),
CEED_FTABLE_ENTRY(CeedBasis, Destroy),
CEED_FTABLE_ENTRY(CeedTensorContract, Apply),
Expand Down
3 changes: 2 additions & 1 deletion tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ The tests are organized by API object, and some tests are further organized, as
    2. CeedBasis simplex basis tests\
    3. CeedBasis non-tensor H(div) basis tests\
    4. CeedBasis non-tensor H(curl) basis tests\
    5. CeedBasis evaluation at arbitrary points tests
    5. CeedBasis evaluation at arbitrary points tests\
6. CeedBasis ApplyAdd tests
4. CeedQFunction Tests\
    0. CeedQFunction user code tests\
    1. CeedQFunction gallery code tests
Expand Down
56 changes: 56 additions & 0 deletions tests/t360-basis.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/// @file
/// Test interpolation ApplyAdd in multiple dimensions
/// \test Test interpolation ApplyAdd in multiple dimensions
#include <ceed.h>
#include <math.h>
#include <stdio.h>

int main(int argc, char **argv) {
Ceed ceed;

CeedInit(argv[1], &ceed);

for (CeedInt dim = 1; dim <= 3; dim++) {
CeedVector u, u_q, v, v_q, w_q;
CeedBasis basis;
CeedInt p = 4, q = 5, p_dim = CeedIntPow(p, dim), q_dim = CeedIntPow(q, dim);

CeedVectorCreate(ceed, p_dim, &u);
CeedVectorCreate(ceed, p_dim, &v);
CeedVectorSetValue(u, 1.0);
CeedVectorSetValue(v, 0.0);
CeedVectorCreate(ceed, q_dim, &u_q);
CeedVectorCreate(ceed, q_dim, &v_q);
CeedVectorCreate(ceed, q_dim, &w_q);

CeedBasisCreateTensorH1Lagrange(ceed, dim, 1, p, q, CEED_GAUSS, &basis);

// Compute area
CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, u_q);
CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, w_q);
CeedVectorPointwiseMult(v_q, u_q, w_q);
CeedBasisApply(basis, 1, CEED_TRANSPOSE, CEED_EVAL_INTERP, v_q, v);
// Double area computed
CeedBasisApplyAdd(basis, 1, CEED_TRANSPOSE, CEED_EVAL_INTERP, v_q, v);

// Check area
{
const CeedScalar *v_array;
CeedScalar area = 0.0;

CeedVectorGetArrayRead(v, CEED_MEM_HOST, &v_array);
for (CeedInt i = 0; i < p_dim; i++) area += v_array[i];
if (fabs(area - 2.0 * CeedIntPow(2, dim)) > 1E-6) printf("Incorrect area computed %f != %f\n", area, 2.0 * CeedIntPow(2, dim));
CeedVectorRestoreArrayRead(v, &v_array);
}

CeedVectorDestroy(&u);
CeedVectorDestroy(&v);
CeedVectorDestroy(&u_q);
CeedVectorDestroy(&v_q);
CeedVectorDestroy(&w_q);
CeedBasisDestroy(&basis);
}
CeedDestroy(&ceed);
return 0;
}
Loading

0 comments on commit 855975c

Please sign in to comment.