diff --git a/Makefile b/Makefile index cb25f658b5..023d1ed559 100644 --- a/Makefile +++ b/Makefile @@ -572,7 +572,7 @@ $(OBJDIR)/%.o : $(CURDIR)/%.sycl.cpp | $$(@D)/.DIR $(call quiet,SYCLCXX) $(SYCLFLAGS) $(CPPFLAGS) -c -o $@ $(abspath $<) $(OBJDIR)/%$(EXE_SUFFIX) : tests/%.c | $$(@D)/.DIR - $(call quiet,LINK.c) $(CEED_LDFLAGS) -o $@ $(abspath $<) $(CEED_LIBS) $(CEED_LDLIBS) $(LDLIBS) + $(call quiet,LINK.c) $(CEED_LDFLAGS) -o $@ $(abspath $<) $(CEED_LIBS) $(CEED_LDLIBS) $(LDLIBS) -I./tests/test-include $(OBJDIR)/%$(EXE_SUFFIX) : tests/%.f90 | $$(@D)/.DIR $(call quiet,LINK.F) -DSOURCE_DIR='"$(abspath $( #include -#ifdef CEED_F64_H +#ifdef CEED_SCALAR_IS_FP64 #define rtype __m256d #define loadu _mm256_loadu_pd #define storeu _mm256_storeu_pd diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp index c744ea4254..315db3844f 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp +++ b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp @@ -696,42 +696,17 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) { CeedCallBackend(CeedGetData(ceed, &ceed_data)); CeedCallBackend(cudaGetDeviceProperties(&prop, ceed_data->device_id)); if ((prop.major < 6) && (CEED_SCALAR_TYPE != CEED_SCALAR_FP32)) { - char *atomic_add_source; - const char *atomic_add_path; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-atomic-add-fallback.h", &atomic_add_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Atomic Add Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, atomic_add_path, &atomic_add_source)); - code << atomic_add_source; - CeedCallBackend(CeedFree(&atomic_add_path)); - CeedCallBackend(CeedFree(&atomic_add_source)); + code << "// AtomicAdd fallback source\n"; + code << "#include \n\n"; } } // Load basis source files // TODO: Add non-tensor, AtPoints - { - char *tensor_basis_kernel_source; - const char *tensor_basis_kernel_path; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h", &tensor_basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Tensor Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, tensor_basis_kernel_path, &tensor_basis_kernel_source)); - code << tensor_basis_kernel_source; - CeedCallBackend(CeedFree(&tensor_basis_kernel_path)); - CeedCallBackend(CeedFree(&tensor_basis_kernel_source)); - } - { - char *cuda_gen_template_source; - const char *cuda_gen_template_path; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-gen-templates.h", &cuda_gen_template_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Cuda-Gen Template Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, cuda_gen_template_path, &cuda_gen_template_source)); - code << cuda_gen_template_source; - CeedCallBackend(CeedFree(&cuda_gen_template_path)); - CeedCallBackend(CeedFree(&cuda_gen_template_source)); - } + code << "// Tensor basis source\n"; + code << "#include \n\n"; + code << "// CodeGen operator source\n"; + code << "#include \n\n"; // Get QFunction name std::string qfunction_name(qf_data->qfunction_name); @@ -749,9 +724,13 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) { // Add user QFunction source { - std::string qfunction_source(qf_data->qfunction_source); + const char *source_path; + + CeedCallBackend(CeedQFunctionGetSourcePath(qf, &source_path)); + CeedCheck(source_path, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/cuda/gen backend requires QFunction source code file"); - code << qfunction_source; + code << "// User QFunction source\n"; + code << "#include \"" << source_path << "\"\n\n"; } // Setup diff --git a/backends/cuda-gen/ceed-cuda-gen-qfunction.c b/backends/cuda-gen/ceed-cuda-gen-qfunction.c index ccff67a476..aec5294a8d 100644 --- a/backends/cuda-gen/ceed-cuda-gen-qfunction.c +++ b/backends/cuda-gen/ceed-cuda-gen-qfunction.c @@ -27,7 +27,6 @@ static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) { CeedCallBackend(CeedQFunctionGetData(qf, &data)); CeedCallCuda(CeedQFunctionReturnCeed(qf), cudaFree(data->d_c)); - CeedCallBackend(CeedFree(&data->qfunction_source)); CeedCallBackend(CeedFree(&data)); return CEED_ERROR_SUCCESS; } @@ -45,10 +44,6 @@ int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) { // Read QFunction source CeedCallBackend(CeedQFunctionGetKernelName(qf, &data->qfunction_name)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n"); - CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n"); - CeedCheck(data->qfunction_source, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/cuda/gen backend requires QFunction source code file"); CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", CeedQFunctionApply_Cuda_gen)); CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", CeedQFunctionDestroy_Cuda_gen)); diff --git a/backends/cuda-gen/ceed-cuda-gen.h b/backends/cuda-gen/ceed-cuda-gen.h index d10dece242..bd0c76e671 100644 --- a/backends/cuda-gen/ceed-cuda-gen.h +++ b/backends/cuda-gen/ceed-cuda-gen.h @@ -26,7 +26,6 @@ typedef struct { typedef struct { const char *qfunction_name; - const char *qfunction_source; void *d_c; } CeedQFunction_Cuda_gen; diff --git a/backends/cuda-ref/ceed-cuda-ref-basis.c b/backends/cuda-ref/ceed-cuda-ref-basis.c index 1c38ce002c..3eca8134c3 100644 --- a/backends/cuda-ref/ceed-cuda-ref-basis.c +++ b/backends/cuda-ref/ceed-cuda-ref-basis.c @@ -182,24 +182,17 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons } // -- Compile kernels - char *basis_kernel_source; - const char *basis_kernel_path; - CeedInt num_comp; + const char basis_kernel_source[] = "// AtPoints basis source\n#include \n"; + CeedInt num_comp; if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints)); CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); } // Get read/write access to u, v @@ -419,8 +412,6 @@ static int CeedBasisDestroyNonTensor_Cuda(CeedBasis basis) { int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp; const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); const CeedInt interp_bytes = q_bytes * P_1d; @@ -440,19 +431,15 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedCallCuda(ceed, cudaMemcpy(data->d_grad_1d, grad_1d, interp_bytes, cudaMemcpyHostToDevice)); // Compile basis kernels + const char basis_kernel_source[] = "// Tensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); @@ -471,8 +458,6 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp, q_comp_interp, q_comp_grad; const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); CeedBasisNonTensor_Cuda *data; @@ -501,11 +486,9 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes } // Compile basis kernels + const char basis_kernel_source[] = "// Nontensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp)); @@ -513,8 +496,6 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); @@ -531,8 +512,6 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp, q_comp_interp, q_comp_div; const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); CeedBasisNonTensor_Cuda *data; @@ -561,11 +540,9 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod } // Compile basis kernels + const char basis_kernel_source[] = "// Nontensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp)); @@ -573,8 +550,6 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); @@ -591,8 +566,6 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp, q_comp_interp, q_comp_curl; const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); CeedBasisNonTensor_Cuda *data; @@ -621,11 +594,9 @@ int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_no } // Compile basis kernels + const char basis_kernel_source[] = "// Nontensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp)); @@ -633,8 +604,6 @@ int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_no CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index ceb940dad6..8cb8855ba7 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -1286,8 +1286,6 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) { //------------------------------------------------------------------------------ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op, CeedInt use_ceedsize_idx, const bool is_point_block) { Ceed ceed; - char *diagonal_kernel_source; - const char *diagonal_kernel_path; CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0; CeedInt num_comp, q_comp, num_nodes, num_qpts; CeedBasis basis_in = NULL, basis_out = NULL; @@ -1351,22 +1349,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op, CeedOperatorDiag_Cuda *diag = impl->diag; // Assemble kernel - CUmodule *module = is_point_block ? &diag->module_point_block : &diag->module; - CeedInt elems_per_block = 1; + const char diagonal_kernel_source[] = "// Diagonal assembly source\n#include \n"; + CUmodule *module = is_point_block ? &diag->module_point_block : &diag->module; + CeedInt elems_per_block = 1; + CeedCallBackend(CeedBasisGetNumNodes(basis_in, &num_nodes)); CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp)); if (basis_in == CEED_BASIS_NONE) num_qpts = num_nodes; else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h", &diagonal_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, diagonal_kernel_path, &diagonal_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Source Complete! -----\n"); CeedCallCuda(ceed, CeedCompile_Cuda(ceed, diagonal_kernel_source, module, 8, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP", num_comp, "NUM_NODES", num_nodes, "NUM_QPTS", num_qpts, "USE_CEEDSIZE", use_ceedsize_idx, "USE_POINT_BLOCK", is_point_block ? 1 : 0, "BLOCK_SIZE", num_nodes * elems_per_block)); CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, *module, "LinearDiagonal", is_point_block ? &diag->LinearPointBlock : &diag->LinearDiagonal)); - CeedCallBackend(CeedFree(&diagonal_kernel_path)); - CeedCallBackend(CeedFree(&diagonal_kernel_source)); CeedCallBackend(CeedBasisDestroy(&basis_in)); CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; @@ -1481,8 +1475,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda(CeedOperator op, static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_ceedsize_idx) { Ceed ceed; Ceed_Cuda *cuda_data; - char *assembly_kernel_source; - const char *assembly_kernel_path; CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0; CeedInt elem_size_in, num_qpts_in = 0, num_comp_in, elem_size_out, num_qpts_out, num_comp_out, q_comp; CeedEvalMode *eval_modes_in = NULL, *eval_modes_out = NULL; @@ -1589,20 +1581,16 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee } // Compile kernels + const char assembly_kernel_source[] = "// Full assembly source\n#include \n"; + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-operator-assemble.h", &assembly_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, assembly_kernel_path, &assembly_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, assembly_kernel_source, &asmb->module, 10, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out, "NUM_NODES_IN", elem_size_in, "NUM_NODES_OUT", elem_size_out, "NUM_QPTS", num_qpts_in, "BLOCK_SIZE", asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_y, "USE_CEEDSIZE", use_ceedsize_idx)); CeedCallBackend(CeedGetKernel_Cuda(ceed, asmb->module, "LinearAssemble", &asmb->LinearAssemble)); - CeedCallBackend(CeedFree(&assembly_kernel_path)); - CeedCallBackend(CeedFree(&assembly_kernel_source)); // Load into B_in, in order that they will be used in eval_modes_in { diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp index ed40b1fca9..2d5540ead8 100644 --- a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp +++ b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp @@ -24,7 +24,6 @@ extern "C" int CeedQFunctionBuildKernel_Cuda_ref(CeedQFunction qf) { using std::string; Ceed ceed; - const char *read_write_kernel_path, *read_write_kernel_source; CeedInt num_input_fields, num_output_fields, size; CeedQFunctionField *input_fields, *output_fields; CeedQFunction_Cuda *data; @@ -35,31 +34,26 @@ extern "C" int CeedQFunctionBuildKernel_Cuda_ref(CeedQFunction qf) { // QFunction is built if (data->QFunction) return CEED_ERROR_SUCCESS; - CeedCheck(data->qfunction_source, ceed, CEED_ERROR_BACKEND, "No QFunction source or CUfunction provided."); - // QFunction kernel generation CeedCallBackend(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); // Build strings for final kernel - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-qfunction.h", &read_write_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source -----\n"); - { - char *source; - - CeedCallBackend(CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &source)); - read_write_kernel_source = source; - } - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n"); - string qfunction_source(data->qfunction_source); string qfunction_name(data->qfunction_name); - string read_write(read_write_kernel_source); string kernel_name = "CeedKernelCudaRefQFunction_" + qfunction_name; ostringstream code; - // Defintions - code << read_write; - code << qfunction_source; - code << "\n"; + // Definitions + code << "// QFunction source\n"; + code << "#include \n\n"; + { + const char *source_path; + + CeedCallBackend(CeedQFunctionGetSourcePath(qf, &source_path)); + CeedCheck(source_path, ceed, CEED_ERROR_BACKEND, "No QFunction source or CUfunction provided."); + + code << "// User QFunction source\n"; + code << "#include \"" << source_path << "\"\n\n"; + } code << "extern \"C\" __global__ void " << kernel_name << "(void *ctx, CeedInt Q, Fields_Cuda fields) {\n"; // Inputs @@ -118,11 +112,6 @@ extern "C" int CeedQFunctionBuildKernel_Cuda_ref(CeedQFunction qf) { // Compile kernel CeedCallBackend(CeedCompile_Cuda(ceed, code.str().c_str(), &data->module, 0)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, kernel_name.c_str(), &data->QFunction)); - - // Cleanup - CeedCallBackend(CeedFree(&data->qfunction_source)); - CeedCallBackend(CeedFree(&read_write_kernel_path)); - CeedCallBackend(CeedFree(&read_write_kernel_source)); return CEED_ERROR_SUCCESS; } diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction.c b/backends/cuda-ref/ceed-cuda-ref-qfunction.c index 957f02cbbe..eec4aea26c 100644 --- a/backends/cuda-ref/ceed-cuda-ref-qfunction.c +++ b/backends/cuda-ref/ceed-cuda-ref-qfunction.c @@ -68,7 +68,6 @@ static int CeedQFunctionDestroy_Cuda(CeedQFunction qf) { CeedQFunction_Cuda *data; CeedCallBackend(CeedQFunctionGetData(qf, &data)); - CeedCallBackend(CeedFree(&data->qfunction_source)); if (data->module) CeedCallCuda(CeedQFunctionReturnCeed(qf), cuModuleUnload(data->module)); CeedCallBackend(CeedFree(&data)); return CEED_ERROR_SUCCESS; @@ -96,11 +95,8 @@ int CeedQFunctionCreate_Cuda(CeedQFunction qf) { CeedCallBackend(CeedCalloc(1, &data)); CeedCallBackend(CeedQFunctionSetData(qf, data)); - // Read QFunction source + // Read QFunction name CeedCallBackend(CeedQFunctionGetKernelName(qf, &data->qfunction_name)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n"); - CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n"); // Register backend functions CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", CeedQFunctionApply_Cuda)); diff --git a/backends/cuda-ref/ceed-cuda-ref-restriction.c b/backends/cuda-ref/ceed-cuda-ref-restriction.c index f89e1f694d..c4a5c22dda 100644 --- a/backends/cuda-ref/ceed-cuda-ref-restriction.c +++ b/backends/cuda-ref/ceed-cuda-ref-restriction.c @@ -24,8 +24,6 @@ static inline int CeedElemRestrictionSetupCompile_Cuda(CeedElemRestriction rstr) { Ceed ceed; bool is_deterministic; - char *restriction_kernel_source; - const char *restriction_kernel_path; CeedInt num_elem, num_comp, elem_size, comp_stride; CeedRestrictionType rstr_type; CeedElemRestriction_Cuda *impl; @@ -46,67 +44,45 @@ static inline int CeedElemRestrictionSetupCompile_Cuda(CeedElemRestriction rstr) // Compile CUDA kernels switch (rstr_type) { case CEED_RESTRICTION_STRIDED: { - bool has_backend_strides; - CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; + const char restriction_kernel_source[] = "// Strided restriction source\n#include \n"; + bool has_backend_strides; + CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); if (!has_backend_strides) { CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); } - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-strided.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_STRIDE_NODES", strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", strides[2])); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedNoTranspose", &impl->ApplyNoTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedTranspose", &impl->ApplyTranspose)); } break; - case CEED_RESTRICTION_POINTS: { - const char *offset_kernel_path; - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-at-points.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &offset_kernel_path)); - CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + case CEED_RESTRICTION_STANDARD: { + const char restriction_kernel_source[] = "// Standard restriction source\n#include \n"; + CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); - CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); - // Cleanup - CeedCallBackend(CeedFree(&offset_kernel_path)); - for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); - CeedCallBackend(CeedFree(&file_paths)); + CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); } break; - case CEED_RESTRICTION_STANDARD: { - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + case CEED_RESTRICTION_POINTS: { + const char restriction_kernel_source[] = + "// AtPoints restriction source\n#include \n\n" + "// Standard restriction source\n#include \n"; + CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); - CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); + CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); } break; case CEED_RESTRICTION_ORIENTED: { - const char *offset_kernel_path; - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-oriented.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &offset_kernel_path)); - CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + const char restriction_kernel_source[] = + "// Oriented restriction source\n#include \n\n" + "// Standard restriction source\n#include \n"; + CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); @@ -114,22 +90,11 @@ static inline int CeedElemRestrictionSetupCompile_Cuda(CeedElemRestriction rstr) CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnsignedNoTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedTranspose", &impl->ApplyTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnsignedTranspose)); - // Cleanup - CeedCallBackend(CeedFree(&offset_kernel_path)); - for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); - CeedCallBackend(CeedFree(&file_paths)); } break; case CEED_RESTRICTION_CURL_ORIENTED: { - const char *offset_kernel_path; - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-curl-oriented.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &offset_kernel_path)); - CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + const char restriction_kernel_source[] = + "// Curl oriented restriction source\n#include \n\n" + "// Standard restriction source\n#include \n"; CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); @@ -139,14 +104,8 @@ static inline int CeedElemRestrictionSetupCompile_Cuda(CeedElemRestriction rstr) CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedTranspose", &impl->ApplyTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->ApplyUnsignedTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnorientedTranspose)); - // Cleanup - CeedCallBackend(CeedFree(&offset_kernel_path)); - for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); - CeedCallBackend(CeedFree(&file_paths)); } break; } - CeedCallBackend(CeedFree(&restriction_kernel_path)); - CeedCallBackend(CeedFree(&restriction_kernel_source)); return CEED_ERROR_SUCCESS; } diff --git a/backends/cuda-ref/ceed-cuda-ref.h b/backends/cuda-ref/ceed-cuda-ref.h index 9e167463bd..0f6ca9d1cb 100644 --- a/backends/cuda-ref/ceed-cuda-ref.h +++ b/backends/cuda-ref/ceed-cuda-ref.h @@ -97,7 +97,6 @@ typedef struct { typedef struct { CUmodule module; const char *qfunction_name; - const char *qfunction_source; CUfunction QFunction; Fields_Cuda fields; void *d_c; diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c index fcd09b10f3..b1709787ec 100644 --- a/backends/cuda-shared/ceed-cuda-shared-basis.c +++ b/backends/cuda-shared/ceed-cuda-shared-basis.c @@ -283,24 +283,17 @@ static int CeedBasisApplyAtPointsCore_Cuda_shared(CeedBasis basis, bool apply_ad } // -- Compile kernels - char *basis_kernel_source; - const char *basis_kernel_path; - CeedInt num_comp; + const char basis_kernel_source[] = "// AtPoints basis source\n#include \n"; + CeedInt num_comp; if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints)); CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); } // Get read/write access to u, v @@ -395,8 +388,6 @@ static int CeedBasisDestroy_Cuda_shared(CeedBasis basis) { int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp; const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); const CeedInt interp_bytes = q_bytes * P_1d; @@ -430,11 +421,9 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, } // Compile basis kernels + const char basis_kernel_source[] = "// Tensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-shared-basis-tensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete -----\n"); CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 8, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D", CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_HAS_COLLOCATED_GRAD", has_collocated_grad)); @@ -445,8 +434,6 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "GradTranspose", &data->GradTranspose)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "GradTransposeAdd", &data->GradTransposeAdd)); CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); diff --git a/backends/cuda/ceed-cuda-compile.cpp b/backends/cuda/ceed-cuda-compile.cpp index 9bd433fd01..6d3faf4cd1 100644 --- a/backends/cuda/ceed-cuda-compile.cpp +++ b/backends/cuda/ceed-cuda-compile.cpp @@ -37,9 +37,9 @@ int CeedCompile_Cuda(Ceed ceed, const char *source, CUmodule *module, const CeedInt num_defines, ...) { size_t ptx_size; char *ptx; - const char *jit_defs_path, *jit_defs_source; - const int num_opts = 3; - const char *opts[num_opts]; + const int num_opts = 4; + CeedInt num_jit_source_dirs = 0; + const char **opts; nvrtcProgram prog; struct cudaDeviceProp prop; Ceed_Cuda *ceed_data; @@ -64,19 +64,10 @@ int CeedCompile_Cuda(Ceed ceed, const char *source, CUmodule *module, const Ceed } // Standard libCEED definitions for CUDA backends - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-jit.h", &jit_defs_path)); - { - char *source; - - CeedCallBackend(CeedLoadSourceToBuffer(ceed, jit_defs_path, &source)); - jit_defs_source = source; - } - code << jit_defs_source; - code << "\n\n"; - CeedCallBackend(CeedFree(&jit_defs_path)); - CeedCallBackend(CeedFree(&jit_defs_source)); + code << "#include \n\n"; // Non-macro options + CeedCallBackend(CeedCalloc(num_opts, &opts)); opts[0] = "-default-device"; CeedCallBackend(CeedGetData(ceed, &ceed_data)); CeedCallCuda(ceed, cudaGetDeviceProperties(&prop, ceed_data->device_id)); @@ -93,6 +84,20 @@ int CeedCompile_Cuda(Ceed ceed, const char *source, CUmodule *module, const Ceed + std::to_string(prop.major) + std::to_string(prop.minor); opts[1] = arch_arg.c_str(); opts[2] = "-Dint32_t=int"; + opts[3] = "-DCEED_RUNNING_JIT_PASS=1"; + { + const char **jit_source_dirs; + + CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs)); + CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs, &opts)); + for (CeedInt i = 0; i < num_jit_source_dirs; i++) { + std::ostringstream include_dirs_arg; + + include_dirs_arg << "-I" << jit_source_dirs[i]; + CeedCallBackend(CeedStringAllocCopy(include_dirs_arg.str().c_str(), (char **)&opts[num_opts + i])); + } + CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs)); + } // Add string source argument provided in call code << source; @@ -101,8 +106,12 @@ int CeedCompile_Cuda(Ceed ceed, const char *source, CUmodule *module, const Ceed CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); // Compile kernel - nvrtcResult result = nvrtcCompileProgram(prog, num_opts, opts); + nvrtcResult result = nvrtcCompileProgram(prog, num_opts + num_jit_source_dirs, opts); + for (CeedInt i = 0; i < num_jit_source_dirs; i++) { + CeedCallBackend(CeedFree(&opts[num_opts + i])); + } + CeedCallBackend(CeedFree(&opts)); if (result != NVRTC_SUCCESS) { char *log; size_t log_size; diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp index ee0dea2609..f1a876ce26 100644 --- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp +++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp @@ -707,28 +707,10 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) { // Load basis source files // TODO: Add non-tensor, AtPoints - { - char *tensor_basis_kernel_source; - const char *tensor_basis_kernel_path; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-shared-basis-tensor-templates.h", &tensor_basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Tensor Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, tensor_basis_kernel_path, &tensor_basis_kernel_source)); - code << tensor_basis_kernel_source; - CeedCallBackend(CeedFree(&tensor_basis_kernel_path)); - CeedCallBackend(CeedFree(&tensor_basis_kernel_source)); - } - { - char *hip_gen_template_source; - const char *hip_gen_template_path; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-gen-templates.h", &hip_gen_template_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Hip-Gen Template Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, hip_gen_template_path, &hip_gen_template_source)); - code << hip_gen_template_source; - CeedCallBackend(CeedFree(&hip_gen_template_path)); - CeedCallBackend(CeedFree(&hip_gen_template_source)); - } + code << "// Tensor basis source\n"; + code << "#include \n\n"; + code << "// CodeGen operator source\n"; + code << "#include \n\n"; // Get QFunction name std::string qfunction_name(qf_data->qfunction_name); @@ -746,9 +728,13 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) { // Add user QFunction source { - std::string qfunction_source(qf_data->qfunction_source); + const char *source_path; + + CeedCallBackend(CeedQFunctionGetSourcePath(qf, &source_path)); + CeedCheck(source_path, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/hip/gen backend requires QFunction source code file"); - code << qfunction_source; + code << "// User QFunction source\n"; + code << "#include \"" << source_path << "\"\n\n"; } // Setup diff --git a/backends/hip-gen/ceed-hip-gen-qfunction.c b/backends/hip-gen/ceed-hip-gen-qfunction.c index ed10d81ad3..32d5653b98 100644 --- a/backends/hip-gen/ceed-hip-gen-qfunction.c +++ b/backends/hip-gen/ceed-hip-gen-qfunction.c @@ -27,7 +27,6 @@ static int CeedQFunctionDestroy_Hip_gen(CeedQFunction qf) { CeedCallBackend(CeedQFunctionGetData(qf, &data)); CeedCallHip(CeedQFunctionReturnCeed(qf), hipFree(data->d_c)); - CeedCallBackend(CeedFree(&data->qfunction_source)); CeedCallBackend(CeedFree(&data)); return CEED_ERROR_SUCCESS; } @@ -45,10 +44,6 @@ int CeedQFunctionCreate_Hip_gen(CeedQFunction qf) { // Read QFunction source CeedCallBackend(CeedQFunctionGetKernelName(qf, &data->qfunction_name)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n"); - CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n"); - CeedCheck(data->qfunction_source, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/hip/gen backend requires QFunction source code file"); CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", CeedQFunctionApply_Hip_gen)); CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", CeedQFunctionDestroy_Hip_gen)); diff --git a/backends/hip-gen/ceed-hip-gen.h b/backends/hip-gen/ceed-hip-gen.h index a0a8ac5511..139bab43bb 100644 --- a/backends/hip-gen/ceed-hip-gen.h +++ b/backends/hip-gen/ceed-hip-gen.h @@ -26,7 +26,6 @@ typedef struct { typedef struct { const char *qfunction_name; - const char *qfunction_source; void *d_c; } CeedQFunction_Hip_gen; diff --git a/backends/hip-ref/ceed-hip-ref-basis.c b/backends/hip-ref/ceed-hip-ref-basis.c index f54184f28d..7e7f0e97e4 100644 --- a/backends/hip-ref/ceed-hip-ref-basis.c +++ b/backends/hip-ref/ceed-hip-ref-basis.c @@ -180,24 +180,17 @@ static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const } // -- Compile kernels - char *basis_kernel_source; - const char *basis_kernel_path; - CeedInt num_comp; + const char basis_kernel_source[] = "// AtPoints basis source\n#include \n"; + CeedInt num_comp; if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); } // Get read/write access to u, v @@ -414,8 +407,6 @@ static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp; const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); const CeedInt interp_bytes = q_bytes * P_1d; @@ -435,19 +426,15 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const C CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); // Compile basis kernels + const char basis_kernel_source[] = "// Tensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); @@ -466,8 +453,6 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const C int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp, q_comp_interp, q_comp_grad; const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); CeedBasisNonTensor_Hip *data; @@ -496,11 +481,9 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, } // Compile basis kernels + const char basis_kernel_source[] = "// Nontensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); @@ -508,8 +491,6 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); @@ -526,8 +507,6 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp, q_comp_interp, q_comp_div; const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); CeedBasisNonTensor_Hip *data; @@ -556,11 +535,9 @@ int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_node } // Compile basis kernels + const char basis_kernel_source[] = "// Nontensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); @@ -568,8 +545,6 @@ int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_node CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); @@ -586,8 +561,6 @@ int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_node int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp, q_comp_interp, q_comp_curl; const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); CeedBasisNonTensor_Hip *data; @@ -616,11 +589,9 @@ int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nod } // Compile basis kernels + const char basis_kernel_source[] = "// Nontensor basis source\n#include \n"; + CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); @@ -628,8 +599,6 @@ int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nod CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index c6307037fa..c2e58a1e7f 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -1283,8 +1283,6 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op) { //------------------------------------------------------------------------------ static inline int CeedOperatorAssembleDiagonalSetupCompile_Hip(CeedOperator op, CeedInt use_ceedsize_idx, const bool is_point_block) { Ceed ceed; - char *diagonal_kernel_source; - const char *diagonal_kernel_path; CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0; CeedInt num_comp, q_comp, num_nodes, num_qpts; CeedBasis basis_in = NULL, basis_out = NULL; @@ -1348,22 +1346,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Hip(CeedOperator op, CeedOperatorDiag_Hip *diag = impl->diag; // Assemble kernel - hipModule_t *module = is_point_block ? &diag->module_point_block : &diag->module; - CeedInt elems_per_block = 1; + const char diagonal_kernel_source[] = "// Diagonal assembly source\n#include \n"; + hipModule_t *module = is_point_block ? &diag->module_point_block : &diag->module; + CeedInt elems_per_block = 1; + CeedCallBackend(CeedBasisGetNumNodes(basis_in, &num_nodes)); CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp)); if (basis_in == CEED_BASIS_NONE) num_qpts = num_nodes; else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h", &diagonal_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, diagonal_kernel_path, &diagonal_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Source Complete! -----\n"); CeedCallHip(ceed, CeedCompile_Hip(ceed, diagonal_kernel_source, module, 8, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP", num_comp, "NUM_NODES", num_nodes, "NUM_QPTS", num_qpts, "USE_CEEDSIZE", use_ceedsize_idx, "USE_POINT_BLOCK", is_point_block ? 1 : 0, "BLOCK_SIZE", num_nodes * elems_per_block)); CeedCallHip(ceed, CeedGetKernel_Hip(ceed, *module, "LinearDiagonal", is_point_block ? &diag->LinearPointBlock : &diag->LinearDiagonal)); - CeedCallBackend(CeedFree(&diagonal_kernel_path)); - CeedCallBackend(CeedFree(&diagonal_kernel_source)); CeedCallBackend(CeedBasisDestroy(&basis_in)); CeedCallBackend(CeedBasisDestroy(&basis_out)); return CEED_ERROR_SUCCESS; @@ -1478,8 +1472,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op, static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceedsize_idx) { Ceed ceed; Ceed_Hip *hip_data; - char *assembly_kernel_source; - const char *assembly_kernel_path; CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0; CeedInt elem_size_in, num_qpts_in = 0, num_comp_in, elem_size_out, num_qpts_out, num_comp_out, q_comp; CeedEvalMode *eval_modes_in = NULL, *eval_modes_out = NULL; @@ -1586,20 +1578,16 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed } // Compile kernels + const char assembly_kernel_source[] = "// Full assembly source\n#include \n"; + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in)); CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-operator-assemble.h", &assembly_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, assembly_kernel_path, &assembly_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, assembly_kernel_source, &asmb->module, 10, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT", num_eval_modes_out, "NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out, "NUM_NODES_IN", elem_size_in, "NUM_NODES_OUT", elem_size_out, "NUM_QPTS", num_qpts_in, "BLOCK_SIZE", asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_y, "USE_CEEDSIZE", use_ceedsize_idx)); CeedCallBackend(CeedGetKernel_Hip(ceed, asmb->module, "LinearAssemble", &asmb->LinearAssemble)); - CeedCallBackend(CeedFree(&assembly_kernel_path)); - CeedCallBackend(CeedFree(&assembly_kernel_source)); // Load into B_in, in order that they will be used in eval_modes_in { diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp index 3ba4f23266..2311f8a332 100644 --- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp +++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp @@ -25,8 +25,6 @@ extern "C" int CeedQFunctionBuildKernel_Hip_ref(CeedQFunction qf) { using std::string; Ceed ceed; - char *read_write_kernel_source; - const char *read_write_kernel_path; Ceed_Hip *ceed_Hip; CeedInt num_input_fields, num_output_fields, size; CeedQFunctionField *input_fields, *output_fields; @@ -39,26 +37,26 @@ extern "C" int CeedQFunctionBuildKernel_Hip_ref(CeedQFunction qf) { // QFunction is built if (data->QFunction) return CEED_ERROR_SUCCESS; - CeedCheck(data->qfunction_source, ceed, CEED_ERROR_BACKEND, "No QFunction source or hipFunction_t provided."); - // QFunction kernel generation CeedCallBackend(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); // Build strings for final kernel - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-qfunction.h", &read_write_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n"); - string qfunction_source(data->qfunction_source); string qfunction_name(data->qfunction_name); - string read_write(read_write_kernel_source); string kernel_name = "CeedKernelHipRefQFunction_" + qfunction_name; ostringstream code; - // Defintions - code << read_write; - code << qfunction_source; - code << "\n"; + // Definitions + code << "// QFunction source\n"; + code << "#include \n\n"; + { + const char *source_path; + + CeedCallBackend(CeedQFunctionGetSourcePath(qf, &source_path)); + CeedCheck(source_path, ceed, CEED_ERROR_BACKEND, "No QFunction source or hipFunction_t provided."); + + code << "// User QFunction source\n"; + code << "#include \"" << source_path << "\"\n\n"; + } code << "extern \"C\" __launch_bounds__(BLOCK_SIZE)\n"; code << "__global__ void " << kernel_name << "(void *ctx, CeedInt Q, Fields_Hip fields) {\n"; @@ -118,11 +116,6 @@ extern "C" int CeedQFunctionBuildKernel_Hip_ref(CeedQFunction qf) { // Compile kernel CeedCallBackend(CeedCompile_Hip(ceed, code.str().c_str(), &data->module, 1, "BLOCK_SIZE", ceed_Hip->opt_block_size)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, kernel_name.c_str(), &data->QFunction)); - - // Cleanup - CeedCallBackend(CeedFree(&data->qfunction_source)); - CeedCallBackend(CeedFree(&read_write_kernel_path)); - CeedCallBackend(CeedFree(&read_write_kernel_source)); return CEED_ERROR_SUCCESS; } diff --git a/backends/hip-ref/ceed-hip-ref-qfunction.c b/backends/hip-ref/ceed-hip-ref-qfunction.c index 15d2dc7ae4..18d531ac11 100644 --- a/backends/hip-ref/ceed-hip-ref-qfunction.c +++ b/backends/hip-ref/ceed-hip-ref-qfunction.c @@ -70,7 +70,6 @@ static int CeedQFunctionDestroy_Hip(CeedQFunction qf) { CeedQFunction_Hip *data; CeedCallBackend(CeedQFunctionGetData(qf, &data)); - CeedCallBackend(CeedFree(&data->qfunction_source)); if (data->module) CeedCallHip(CeedQFunctionReturnCeed(qf), hipModuleUnload(data->module)); CeedCallBackend(CeedFree(&data)); return CEED_ERROR_SUCCESS; @@ -89,11 +88,8 @@ int CeedQFunctionCreate_Hip(CeedQFunction qf) { CeedCallBackend(CeedQFunctionSetData(qf, data)); CeedCallBackend(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields)); - // Read QFunction source + // Read QFunction name CeedCallBackend(CeedQFunctionGetKernelName(qf, &data->qfunction_name)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n"); - CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n"); // Register backend functions CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", CeedQFunctionApply_Hip)); diff --git a/backends/hip-ref/ceed-hip-ref-restriction.c b/backends/hip-ref/ceed-hip-ref-restriction.c index 95b0961387..ca1d19d7a6 100644 --- a/backends/hip-ref/ceed-hip-ref-restriction.c +++ b/backends/hip-ref/ceed-hip-ref-restriction.c @@ -23,8 +23,6 @@ static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) { Ceed ceed; bool is_deterministic; - char *restriction_kernel_source; - const char *restriction_kernel_path; CeedInt num_elem, num_comp, elem_size, comp_stride; CeedRestrictionType rstr_type; CeedElemRestriction_Hip *impl; @@ -45,67 +43,46 @@ static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) // Compile HIP kernels switch (rstr_type) { case CEED_RESTRICTION_STRIDED: { - bool has_backend_strides; - CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; + const char restriction_kernel_source[] = "// Strided restriction source\n#include \n"; + bool has_backend_strides; + CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); if (!has_backend_strides) { CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); } - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-strided.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_STRIDE_NODES", strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", strides[2])); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedNoTranspose", &impl->ApplyNoTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedTranspose", &impl->ApplyTranspose)); } break; - case CEED_RESTRICTION_POINTS: { - const char *offset_kernel_path; - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-at-points.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-offset.h", &offset_kernel_path)); - CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + case CEED_RESTRICTION_STANDARD: { + const char restriction_kernel_source[] = "// Standard restriction source\n#include \n"; + CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); - CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); - // Cleanup - CeedCallBackend(CeedFree(&offset_kernel_path)); - for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); - CeedCallBackend(CeedFree(&file_paths)); + CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); } break; - case CEED_RESTRICTION_STANDARD: { - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-offset.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); + case CEED_RESTRICTION_POINTS: { + const char restriction_kernel_source[] = + "// AtPoints restriction source\n#include \n\n" + "// Standard restriction source\n#include \n"; + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); - CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); + CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); } break; case CEED_RESTRICTION_ORIENTED: { - const char *offset_kernel_path; - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-oriented.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-offset.h", &offset_kernel_path)); - CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + const char restriction_kernel_source[] = + "// Oriented restriction source\n#include \n\n" + "// Standard restriction source\n#include \n"; + CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); @@ -113,22 +90,12 @@ static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnsignedNoTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedTranspose", &impl->ApplyTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnsignedTranspose)); - // Cleanup - CeedCallBackend(CeedFree(&offset_kernel_path)); - for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); - CeedCallBackend(CeedFree(&file_paths)); } break; case CEED_RESTRICTION_CURL_ORIENTED: { - const char *offset_kernel_path; - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h", &restriction_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-offset.h", &offset_kernel_path)); - CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); + const char restriction_kernel_source[] = + "// Curl oriented restriction source\n#include \n\n" + "// Standard restriction source\n#include \n"; + CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); @@ -138,14 +105,9 @@ static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedTranspose", &impl->ApplyTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->ApplyUnsignedTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnorientedTranspose)); - // Cleanup - CeedCallBackend(CeedFree(&offset_kernel_path)); - for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i])); - CeedCallBackend(CeedFree(&file_paths)); + } break; } - CeedCallBackend(CeedFree(&restriction_kernel_path)); - CeedCallBackend(CeedFree(&restriction_kernel_source)); return CEED_ERROR_SUCCESS; } diff --git a/backends/hip-ref/ceed-hip-ref.h b/backends/hip-ref/ceed-hip-ref.h index 52e88129a1..5a695761a9 100644 --- a/backends/hip-ref/ceed-hip-ref.h +++ b/backends/hip-ref/ceed-hip-ref.h @@ -101,7 +101,6 @@ typedef struct { typedef struct { hipModule_t module; const char *qfunction_name; - const char *qfunction_source; hipFunction_t QFunction; Fields_Hip fields; void *d_c; diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c index 05b564e7f2..cdcc28ce07 100644 --- a/backends/hip-shared/ceed-hip-shared-basis.c +++ b/backends/hip-shared/ceed-hip-shared-basis.c @@ -342,24 +342,17 @@ static int CeedBasisApplyAtPointsCore_Hip_shared(CeedBasis basis, bool apply_add } // -- Compile kernels - char *basis_kernel_source; - const char *basis_kernel_path; - CeedInt num_comp; + const char basis_kernel_source[] = "// AtPoints basis source\n#include \n"; + CeedInt num_comp; if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); } // Get read/write access to u, v @@ -454,8 +447,6 @@ static int CeedBasisDestroy_Hip_shared(CeedBasis basis) { int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { Ceed ceed; - char *basis_kernel_source; - const char *basis_kernel_path; CeedInt num_comp; const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); const CeedInt interp_bytes = q_bytes * P_1d; @@ -493,10 +484,8 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, CeedCallBackend(ComputeBasisThreadBlockSizes(dim, P_1d, Q_1d, num_comp, data->block_sizes)); // Compile basis kernels - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-shared-basis-tensor.h", &basis_kernel_path)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); + const char basis_kernel_source[] = "// Tensor basis source\n#include \n"; + CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 11, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D", CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_INTERP_BLOCK_SIZE", data->block_sizes[0], "BASIS_GRAD_BLOCK_SIZE", @@ -509,8 +498,6 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "GradTranspose", &data->GradTranspose)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "GradTransposeAdd", &data->GradTransposeAdd)); CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); - CeedCallBackend(CeedFree(&basis_kernel_path)); - CeedCallBackend(CeedFree(&basis_kernel_source)); CeedCallBackend(CeedBasisSetData(basis, data)); diff --git a/backends/hip/ceed-hip-compile.cpp b/backends/hip/ceed-hip-compile.cpp index cafb79ed7f..582c0a2e24 100644 --- a/backends/hip/ceed-hip-compile.cpp +++ b/backends/hip/ceed-hip-compile.cpp @@ -35,10 +35,10 @@ //------------------------------------------------------------------------------ int CeedCompile_Hip(Ceed ceed, const char *source, hipModule_t *module, const CeedInt num_defines, ...) { size_t ptx_size; - char *jit_defs_source, *ptx; - const char *jit_defs_path; - const int num_opts = 3; - const char *opts[num_opts]; + char *ptx; + const int num_opts = 4; + CeedInt num_jit_source_dirs = 0; + const char **opts; int runtime_version; hiprtcProgram prog; struct hipDeviceProp_t prop; @@ -76,20 +76,30 @@ int CeedCompile_Hip(Ceed ceed, const char *source, hipModule_t *module, const Ce } // Standard libCEED definitions for HIP backends - CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-jit.h", &jit_defs_path)); - CeedCallBackend(CeedLoadSourceToBuffer(ceed, jit_defs_path, &jit_defs_source)); - code << jit_defs_source; - code << "\n\n"; - CeedCallBackend(CeedFree(&jit_defs_path)); - CeedCallBackend(CeedFree(&jit_defs_source)); + code << "#include \n\n"; // Non-macro options + CeedCallBackend(CeedCalloc(num_opts, &opts)); opts[0] = "-default-device"; CeedCallBackend(CeedGetData(ceed, (void **)&ceed_data)); CeedCallHip(ceed, hipGetDeviceProperties(&prop, ceed_data->device_id)); std::string arch_arg = "--gpu-architecture=" + std::string(prop.gcnArchName); opts[1] = arch_arg.c_str(); opts[2] = "-munsafe-fp-atomics"; + opts[3] = "-DCEED_RUNNING_JIT_PASS=1"; + { + const char **jit_source_dirs; + + CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs)); + CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs, &opts)); + for (CeedInt i = 0; i < num_jit_source_dirs; i++) { + std::ostringstream include_dirs_arg; + + include_dirs_arg << "-I" << jit_source_dirs[i]; + CeedCallBackend(CeedStringAllocCopy(include_dirs_arg.str().c_str(), (char **)&opts[num_opts + i])); + } + CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs)); + } // Add string source argument provided in call code << source; @@ -98,8 +108,12 @@ int CeedCompile_Hip(Ceed ceed, const char *source, hipModule_t *module, const Ce CeedCallHiprtc(ceed, hiprtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); // Compile kernel - hiprtcResult result = hiprtcCompileProgram(prog, num_opts, opts); + hiprtcResult result = hiprtcCompileProgram(prog, num_opts + num_jit_source_dirs, opts); + for (CeedInt i = 0; i < num_jit_source_dirs; i++) { + CeedCallBackend(CeedFree(&opts[num_opts + i])); + } + CeedCallBackend(CeedFree(&opts)); if (result != HIPRTC_SUCCESS) { size_t log_size; char *log; diff --git a/examples/ceed/ex1-volume.h b/examples/ceed/ex1-volume.h index d78ea16c6f..3ec78c4366 100644 --- a/examples/ceed/ex1-volume.h +++ b/examples/ceed/ex1-volume.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include /// A structure used to pass additional data to f_build_mass struct BuildContext { diff --git a/examples/ceed/ex2-surface.h b/examples/ceed/ex2-surface.h index 4258a1e944..1355918b70 100644 --- a/examples/ceed/ex2-surface.h +++ b/examples/ceed/ex2-surface.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include /// A structure used to pass additional data to f_build_diff struct BuildContext { diff --git a/examples/deal.II/bps-qfunctions.h b/examples/deal.II/bps-qfunctions.h index 6161fdf840..b6a0c498c7 100644 --- a/examples/deal.II/bps-qfunctions.h +++ b/examples/deal.II/bps-qfunctions.h @@ -15,7 +15,7 @@ // // --------------------------------------------------------------------- -#include +#include diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h index 43b5293837..41f6b249e7 100644 --- a/examples/fluids/qfunctions/advection.h +++ b/examples/fluids/qfunctions/advection.h @@ -7,8 +7,11 @@ /// @file /// Advection initial condition and operator for Navier-Stokes example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#include +#endif #include "advection_types.h" #include "newtonian_state.h" diff --git a/examples/fluids/qfunctions/advection_types.h b/examples/fluids/qfunctions/advection_types.h index 838995191c..daaee10bf7 100644 --- a/examples/fluids/qfunctions/advection_types.h +++ b/examples/fluids/qfunctions/advection_types.h @@ -6,7 +6,11 @@ // This file is part of CEED: http://github.com/ceed #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS +#include +#endif + #include "stabilization_types.h" typedef enum { diff --git a/examples/fluids/qfunctions/bc_freestream.h b/examples/fluids/qfunctions/bc_freestream.h index 90700496e0..b6c0aa33cf 100644 --- a/examples/fluids/qfunctions/bc_freestream.h +++ b/examples/fluids/qfunctions/bc_freestream.h @@ -7,6 +7,10 @@ /// @file /// QFunctions for the `bc_freestream` and `bc_outflow` boundary conditions +#ifndef CEED_RUNNING_JIT_PASS +#include +#endif + #include "bc_freestream_type.h" #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 738af58898..e372aeedfb 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -7,7 +7,10 @@ /// @file /// Operator for Navier-Stokes example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS +#include +#endif #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 21db7c8dd6..9d458b0f31 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -7,8 +7,11 @@ /// @file /// Operator for Navier-Stokes example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#include +#endif #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h index c5fe3752a0..d1e61a0a10 100644 --- a/examples/fluids/qfunctions/densitycurrent.h +++ b/examples/fluids/qfunctions/densitycurrent.h @@ -11,8 +11,10 @@ // Model from: // Semi-Implicit Formulations of the Navier-Stokes Equations: Application to // Nonhydrostatic Atmospheric Modeling, Giraldo, Restelli, and Lauter (2010). -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/differential_filter.h b/examples/fluids/qfunctions/differential_filter.h index 10b89b70c7..36b4cfa2a5 100644 --- a/examples/fluids/qfunctions/differential_filter.h +++ b/examples/fluids/qfunctions/differential_filter.h @@ -7,7 +7,10 @@ // /// @file /// Implementation of differential filtering -#include +#include +#ifndef CEED_RUNNING_JIT_PASS +#include +#endif #include "differential_filter_enums.h" #include "newtonian_state.h" diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h index 308cb50cea..878c5f615c 100644 --- a/examples/fluids/qfunctions/eulervortex.h +++ b/examples/fluids/qfunctions/eulervortex.h @@ -11,8 +11,11 @@ // Model from: // On the Order of Accuracy and Numerical Performance of Two Classes of Finite Volume WENO Schemes, Zhang, Zhang, and Shu (2011). -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#include +#endif #include "utils.h" diff --git a/examples/fluids/qfunctions/gaussianwave.h b/examples/fluids/qfunctions/gaussianwave.h index 88f9feb126..f48de3bcf2 100644 --- a/examples/fluids/qfunctions/gaussianwave.h +++ b/examples/fluids/qfunctions/gaussianwave.h @@ -7,8 +7,10 @@ /// @file /// Thermodynamic wave propogation for testing freestream/non-reflecting boundary conditions. Proposed in Mengaldo et. al. 2014 -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #include "newtonian_state.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/grid_anisotropy_tensor.h b/examples/fluids/qfunctions/grid_anisotropy_tensor.h index ef59a54c6d..cea712726f 100644 --- a/examples/fluids/qfunctions/grid_anisotropy_tensor.h +++ b/examples/fluids/qfunctions/grid_anisotropy_tensor.h @@ -8,7 +8,7 @@ /// @file /// Element anisotropy tensor, as defined in 'Invariant data-driven subgrid stress modeling in the strain-rate eigenframe for large eddy simulation' /// Prakash et al. 2022 -#include +#include #include "utils.h" #include "utils_eigensolver_jacobi.h" diff --git a/examples/fluids/qfunctions/inverse_multiplicity.h b/examples/fluids/qfunctions/inverse_multiplicity.h index c51fc0586b..6f83c7b39c 100644 --- a/examples/fluids/qfunctions/inverse_multiplicity.h +++ b/examples/fluids/qfunctions/inverse_multiplicity.h @@ -4,7 +4,7 @@ // SPDX-License-Identifier: BSD-2-Clause // // This file is part of CEED: http://github.com/ceed -#include +#include // @brief Calculate the inverse of the multiplicity, reducing to a single component CEED_QFUNCTION(InverseMultiplicity)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/examples/fluids/qfunctions/mass.h b/examples/fluids/qfunctions/mass.h index 1147a2bb31..42d27b2f68 100644 --- a/examples/fluids/qfunctions/mass.h +++ b/examples/fluids/qfunctions/mass.h @@ -7,8 +7,10 @@ /// @file /// Mass operator for Navier-Stokes example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ***************************************************************************** // This QFunction applies the mass matrix to five interlaced fields. diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index 66fc309018..a5ca161b3b 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -7,9 +7,11 @@ /// @file /// Operator for Navier-Stokes example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include #include +#endif #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/newtonian_state.h b/examples/fluids/qfunctions/newtonian_state.h index fa38c45e68..ab49f0d2c2 100644 --- a/examples/fluids/qfunctions/newtonian_state.h +++ b/examples/fluids/qfunctions/newtonian_state.h @@ -9,8 +9,10 @@ /// Structs and helper functions regarding the state of a newtonian simulation #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #include "newtonian_types.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 60478c397b..b7c4e7e36e 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -6,7 +6,10 @@ // This file is part of CEED: http://github.com/ceed #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS +#include +#endif #include "stabilization_types.h" diff --git a/examples/fluids/qfunctions/setupgeo.h b/examples/fluids/qfunctions/setupgeo.h index a4d5181ad7..4e8e9cf8f4 100644 --- a/examples/fluids/qfunctions/setupgeo.h +++ b/examples/fluids/qfunctions/setupgeo.h @@ -7,8 +7,10 @@ /// @file /// Geometric factors (3D) for Navier-Stokes example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #include "setupgeo_helpers.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/setupgeo2d.h b/examples/fluids/qfunctions/setupgeo2d.h index 4bbb39795c..0c2662906c 100644 --- a/examples/fluids/qfunctions/setupgeo2d.h +++ b/examples/fluids/qfunctions/setupgeo2d.h @@ -7,7 +7,8 @@ /// @file /// Geometric factors (2D) for Navier-Stokes example using PETSc -#include +#include + #include "setupgeo_helpers.h" #include "utils.h" diff --git a/examples/fluids/qfunctions/setupgeo_helpers.h b/examples/fluids/qfunctions/setupgeo_helpers.h index 930ff7bb72..b52c3cdcff 100644 --- a/examples/fluids/qfunctions/setupgeo_helpers.h +++ b/examples/fluids/qfunctions/setupgeo_helpers.h @@ -9,8 +9,10 @@ /// Geometric factors (3D) for Navier-Stokes example using PETSc #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #include "utils.h" diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h index 87cdf73d4d..3ff908f4af 100644 --- a/examples/fluids/qfunctions/shocktube.h +++ b/examples/fluids/qfunctions/shocktube.h @@ -10,8 +10,11 @@ // Model from: // On the Order of Accuracy and Numerical Performance of Two Classes of Finite Volume WENO Schemes, Zhang, Zhang, and Shu (2011). -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#include +#endif #include "utils.h" diff --git a/examples/fluids/qfunctions/stabilization.h b/examples/fluids/qfunctions/stabilization.h index 55d99820c3..655d49e7a8 100644 --- a/examples/fluids/qfunctions/stabilization.h +++ b/examples/fluids/qfunctions/stabilization.h @@ -7,7 +7,7 @@ /// @file /// Helper functions for computing stabilization terms of a newtonian simulation -#include +#include #include "newtonian_state.h" diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index d1fec17ce5..28a779aa8c 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -12,9 +12,11 @@ /// SetupSTG_Rand reads in the input files and fills in STGShur14Context. /// Then STGShur14_CalcQF is run over quadrature points. /// Before the program exits, TearDownSTG is run to free the memory of the allocated arrays. -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include #include +#endif #include "newtonian_state.h" #include "setupgeo_helpers.h" diff --git a/examples/fluids/qfunctions/stg_shur14_type.h b/examples/fluids/qfunctions/stg_shur14_type.h index 5e369cd702..f7c8942614 100644 --- a/examples/fluids/qfunctions/stg_shur14_type.h +++ b/examples/fluids/qfunctions/stg_shur14_type.h @@ -6,7 +6,10 @@ // This file is part of CEED: http://github.com/ceed #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS +#include +#endif #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/strong_boundary_conditions.h b/examples/fluids/qfunctions/strong_boundary_conditions.h index a503a236d9..7bb0453796 100644 --- a/examples/fluids/qfunctions/strong_boundary_conditions.h +++ b/examples/fluids/qfunctions/strong_boundary_conditions.h @@ -4,7 +4,7 @@ // SPDX-License-Identifier: BSD-2-Clause // // This file is part of CEED: http://github.com/ceed -#include +#include #include "setupgeo_helpers.h" diff --git a/examples/fluids/qfunctions/taylorgreen.h b/examples/fluids/qfunctions/taylorgreen.h index ddf33e665b..3b42fe18d8 100644 --- a/examples/fluids/qfunctions/taylorgreen.h +++ b/examples/fluids/qfunctions/taylorgreen.h @@ -4,8 +4,10 @@ // SPDX-License-Identifier: BSD-2-Clause // // This file is part of CEED: http://github.com/ceed -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/fluids/qfunctions/turb_spanstats.h b/examples/fluids/qfunctions/turb_spanstats.h index 344adeebaa..377a0bbf75 100644 --- a/examples/fluids/qfunctions/turb_spanstats.h +++ b/examples/fluids/qfunctions/turb_spanstats.h @@ -4,7 +4,7 @@ // SPDX-License-Identifier: BSD-2-Clause // // This file is part of CEED: http://github.com/ceed -#include +#include #include "newtonian_state.h" #include "turb_stats_types.h" diff --git a/examples/fluids/qfunctions/utils.h b/examples/fluids/qfunctions/utils.h index f414e14e9c..90f67fad24 100644 --- a/examples/fluids/qfunctions/utils.h +++ b/examples/fluids/qfunctions/utils.h @@ -6,8 +6,10 @@ // This file is part of CEED: http://github.com/ceed #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #ifndef M_PI #define M_PI 3.14159265358979323846 diff --git a/examples/fluids/qfunctions/utils_eigensolver_jacobi.h b/examples/fluids/qfunctions/utils_eigensolver_jacobi.h index b8236789d2..1c0390d3b9 100644 --- a/examples/fluids/qfunctions/utils_eigensolver_jacobi.h +++ b/examples/fluids/qfunctions/utils_eigensolver_jacobi.h @@ -9,8 +9,11 @@ /// Eigen system solver for symmetric NxN matrices. Modified from the CC0 code provided at https://github.com/jewettaij/jacobi_pd #pragma once -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#include +#endif #include "utils.h" diff --git a/examples/fluids/qfunctions/velocity_gradient_projection.h b/examples/fluids/qfunctions/velocity_gradient_projection.h index c21bb68adc..28914c13d9 100644 --- a/examples/fluids/qfunctions/velocity_gradient_projection.h +++ b/examples/fluids/qfunctions/velocity_gradient_projection.h @@ -4,7 +4,7 @@ // SPDX-License-Identifier: BSD-2-Clause // // This file is part of CEED: http://github.com/ceed -#include +#include #include "newtonian_state.h" #include "newtonian_types.h" diff --git a/examples/mfem/bp1.h b/examples/mfem/bp1.h index 332340340f..df23dd4b51 100644 --- a/examples/mfem/bp1.h +++ b/examples/mfem/bp1.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include /// A structure used to pass additional data to f_build_mass struct BuildContext { diff --git a/examples/mfem/bp3.h b/examples/mfem/bp3.h index dde37b7446..a546d8aea6 100644 --- a/examples/mfem/bp3.h +++ b/examples/mfem/bp3.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include /// A structure used to pass additional data to f_build_diff and f_apply_diff struct BuildContext { diff --git a/examples/nek/bps/bps.h b/examples/nek/bps/bps.h index 446377b044..a0b6a022c1 100644 --- a/examples/nek/bps/bps.h +++ b/examples/nek/bps/bps.h @@ -4,12 +4,12 @@ // SPDX-License-Identifier: BSD-2-Clause // // This file is part of CEED: http://github.com/ceed +#pragma once -#ifndef bps_h -#define bps_h - -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #ifndef M_PI #define M_PI 3.14159265358979323846 @@ -110,5 +110,3 @@ CEED_QFUNCTION(diffusionf)(void *ctx, CeedInt Q, const CeedScalar *const *in, Ce } // End of Quadrature Point Loop return 0; } - -#endif // bps_h diff --git a/examples/petsc/qfunctions/area/areacube.h b/examples/petsc/qfunctions/area/areacube.h index 93be0594b6..1cc7fcccab 100644 --- a/examples/petsc/qfunctions/area/areacube.h +++ b/examples/petsc/qfunctions/area/areacube.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example for a scalar field on the sphere using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factor required for integration when reference coordinates have a different dimension than the one of physical diff --git a/examples/petsc/qfunctions/area/areasphere.h b/examples/petsc/qfunctions/area/areasphere.h index 7cd73ca354..88ee221a7f 100644 --- a/examples/petsc/qfunctions/area/areasphere.h +++ b/examples/petsc/qfunctions/area/areasphere.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example for a scalar field on the sphere using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factor required for integration when reference coordinates have a different dimension than the one of physical diff --git a/examples/petsc/qfunctions/bps/bp1.h b/examples/petsc/qfunctions/bps/bp1.h index a902b29f7c..b5a1f0ad11 100644 --- a/examples/petsc/qfunctions/bps/bp1.h +++ b/examples/petsc/qfunctions/bps/bp1.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factors required to apply the mass operator diff --git a/examples/petsc/qfunctions/bps/bp1sphere.h b/examples/petsc/qfunctions/bps/bp1sphere.h index d604406f29..0129a3ba66 100644 --- a/examples/petsc/qfunctions/bps/bp1sphere.h +++ b/examples/petsc/qfunctions/bps/bp1sphere.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example for a scalar field on the sphere using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factors required for integration and coordinate transformations when reference coordinates have a different diff --git a/examples/petsc/qfunctions/bps/bp2.h b/examples/petsc/qfunctions/bps/bp2.h index 22ba9fb788..12c5fc3521 100644 --- a/examples/petsc/qfunctions/bps/bp2.h +++ b/examples/petsc/qfunctions/bps/bp2.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the rhs and true solution for the problem diff --git a/examples/petsc/qfunctions/bps/bp2sphere.h b/examples/petsc/qfunctions/bps/bp2sphere.h index 36a8e95778..2ebff9ef91 100644 --- a/examples/petsc/qfunctions/bps/bp2sphere.h +++ b/examples/petsc/qfunctions/bps/bp2sphere.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example for a vector field on the sphere using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the rhs and true solution for the problem diff --git a/examples/petsc/qfunctions/bps/bp3.h b/examples/petsc/qfunctions/bps/bp3.h index dcf84defae..a3674ed031 100644 --- a/examples/petsc/qfunctions/bps/bp3.h +++ b/examples/petsc/qfunctions/bps/bp3.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for diffusion operator example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factors required to apply the diffusion operator diff --git a/examples/petsc/qfunctions/bps/bp3sphere.h b/examples/petsc/qfunctions/bps/bp3sphere.h index 1f901dd97a..fdc16b4c84 100644 --- a/examples/petsc/qfunctions/bps/bp3sphere.h +++ b/examples/petsc/qfunctions/bps/bp3sphere.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for diffusion operator example for a scalar field on the sphere using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factors required for integration and coordinate transformations when reference coordinates have a different diff --git a/examples/petsc/qfunctions/bps/bp4.h b/examples/petsc/qfunctions/bps/bp4.h index 46307c338a..4f8f6fd58d 100644 --- a/examples/petsc/qfunctions/bps/bp4.h +++ b/examples/petsc/qfunctions/bps/bp4.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for diffusion operator example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the rhs and true solution for the problem diff --git a/examples/petsc/qfunctions/bps/bp4sphere.h b/examples/petsc/qfunctions/bps/bp4sphere.h index 517f353371..39b631173b 100644 --- a/examples/petsc/qfunctions/bps/bp4sphere.h +++ b/examples/petsc/qfunctions/bps/bp4sphere.h @@ -8,8 +8,10 @@ /// @file /// libCEED QFunctions for mass operator example for a vector field on the sphere using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // This QFunction sets up the rhs and true solution for the problem diff --git a/examples/petsc/qfunctions/bps/common.h b/examples/petsc/qfunctions/bps/common.h index 26f374d5d4..fd38dbc13d 100644 --- a/examples/petsc/qfunctions/bps/common.h +++ b/examples/petsc/qfunctions/bps/common.h @@ -8,7 +8,7 @@ /// @file /// libCEED QFunctions for BP examples using PETSc -#include +#include // ----------------------------------------------------------------------------- CEED_QFUNCTION(Error)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { diff --git a/examples/petsc/qfunctions/swarm/swarmmass.h b/examples/petsc/qfunctions/swarm/swarmmass.h index e355eff8d7..4c321871fe 100644 --- a/examples/petsc/qfunctions/swarm/swarmmass.h +++ b/examples/petsc/qfunctions/swarm/swarmmass.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(SetupMass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar(*J)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0]; diff --git a/examples/solids/qfunctions/common.h b/examples/solids/qfunctions/common.h index bfdb92522f..acaa815cc5 100644 --- a/examples/solids/qfunctions/common.h +++ b/examples/solids/qfunctions/common.h @@ -8,7 +8,7 @@ /// @file /// Geometric factors for solid mechanics example using PETSc -#include +#include // ----------------------------------------------------------------------------- // This QFunction sets up the geometric factors required for integration and coordinate transformations diff --git a/examples/solids/qfunctions/constant-force.h b/examples/solids/qfunctions/constant-force.h index a94dc4f3bf..232f97588e 100644 --- a/examples/solids/qfunctions/constant-force.h +++ b/examples/solids/qfunctions/constant-force.h @@ -8,8 +8,10 @@ /// @file /// Constant forcing term for solid mechanics example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #ifndef PHYSICS_STRUCT #define PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/finite-strain-mooney-rivlin.h b/examples/solids/qfunctions/finite-strain-mooney-rivlin.h index f9c19e81b1..7a802693d2 100644 --- a/examples/solids/qfunctions/finite-strain-mooney-rivlin.h +++ b/examples/solids/qfunctions/finite-strain-mooney-rivlin.h @@ -8,8 +8,10 @@ /// @file /// Hyperelasticity, finite strain for solid mechanics example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // Mooney-Rivlin context diff --git a/examples/solids/qfunctions/finite-strain-neo-hookean.h b/examples/solids/qfunctions/finite-strain-neo-hookean.h index 42b2b46e2c..9b1ff27979 100644 --- a/examples/solids/qfunctions/finite-strain-neo-hookean.h +++ b/examples/solids/qfunctions/finite-strain-neo-hookean.h @@ -8,8 +8,10 @@ /// @file /// Hyperelasticity, finite strain for solid mechanics example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #ifndef PHYSICS_STRUCT #define PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/linear.h b/examples/solids/qfunctions/linear.h index 20b293b6f1..57f5fe4f61 100644 --- a/examples/solids/qfunctions/linear.h +++ b/examples/solids/qfunctions/linear.h @@ -8,8 +8,10 @@ /// @file /// Linear elasticity for solid mechanics example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #ifndef PHYSICS_STRUCT #define PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/manufactured-force.h b/examples/solids/qfunctions/manufactured-force.h index 0764d103e3..de48be4ba3 100644 --- a/examples/solids/qfunctions/manufactured-force.h +++ b/examples/solids/qfunctions/manufactured-force.h @@ -8,8 +8,10 @@ /// @file /// Linear elasticity manufactured solution forcing term for solid mechanics example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif #ifndef PHYSICS_STRUCT #define PHYSICS_STRUCT diff --git a/examples/solids/qfunctions/manufactured-true.h b/examples/solids/qfunctions/manufactured-true.h index 389fb6596f..6fd97c1b13 100644 --- a/examples/solids/qfunctions/manufactured-true.h +++ b/examples/solids/qfunctions/manufactured-true.h @@ -8,8 +8,10 @@ /// @file /// Linear elasticity manufactured solution true solution for solid mechanics example using PETSc -#include +#include +#ifndef CEED_RUNNING_JIT_PASS #include +#endif // ----------------------------------------------------------------------------- // True solution for linear elasticity manufactured solution diff --git a/examples/solids/qfunctions/traction-boundary.h b/examples/solids/qfunctions/traction-boundary.h index 181b176d0a..7fd59c742c 100644 --- a/examples/solids/qfunctions/traction-boundary.h +++ b/examples/solids/qfunctions/traction-boundary.h @@ -8,7 +8,7 @@ /// @file /// Geometric factors for solid mechanics example using PETSc -#include +#include // ----------------------------------------------------------------------------- // This QFunction computes the surface integral of the user traction vector on the constrained faces. diff --git a/include/ceed.h b/include/ceed.h index effe28eaf1..b905b30851 100644 --- a/include/ceed.h +++ b/include/ceed.h @@ -1 +1,5 @@ +#ifdef CEED_RUNNING_JIT_PASS +#include "ceed/types.h" +#else #include "ceed/ceed.h" +#endif diff --git a/include/ceed/backend.h b/include/ceed/backend.h index 05da6f8981..43f2d52d20 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -254,6 +254,8 @@ CEED_EXTERN int CeedSetData(Ceed ceed, void *data); CEED_EXTERN int CeedReference(Ceed ceed); CEED_EXTERN int CeedGetWorkVector(Ceed ceed, CeedSize len, CeedVector *vec); CEED_EXTERN int CeedRestoreWorkVector(Ceed ceed, CeedVector *vec); +CEED_EXTERN int CeedGetJitSourceRoots(Ceed ceed, CeedInt *num_source_roots, const char ***jit_source_roots); +CEED_EXTERN int CeedRestoreJitSourceRoots(Ceed ceed, const char ***jit_source_roots); CEED_EXTERN int CeedVectorHasValidArray(CeedVector vec, bool *has_valid_array); CEED_EXTERN int CeedVectorHasBorrowedArrayOfType(CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type); diff --git a/include/ceed/ceed-f32.h b/include/ceed/ceed-f32.h index e605c47a4b..d928f5158a 100644 --- a/include/ceed/ceed-f32.h +++ b/include/ceed/ceed-f32.h @@ -8,8 +8,9 @@ /// @file /// Public header for definitions related to using FP32 floating point (single precision) for CeedScalar. /// Include this header in ceed.h to use float instead of double. -#ifndef CEED_F32_H -#define CEED_F32_H +#pragma once + +#define CEED_SCALAR_IS_FP32 /// Set base scalar type to FP32. (See CeedScalarType enum in ceed.h for all options.) #define CEED_SCALAR_TYPE CEED_SCALAR_FP32 @@ -17,5 +18,3 @@ typedef float CeedScalar; /// Machine epsilon #define CEED_EPSILON 6e-08 - -#endif // CEED_F32_H diff --git a/include/ceed/ceed-f64.h b/include/ceed/ceed-f64.h index 3e6876cc19..bcab40cfd2 100644 --- a/include/ceed/ceed-f64.h +++ b/include/ceed/ceed-f64.h @@ -8,8 +8,9 @@ /// @file /// Public header for definitions related to using FP64 floating point (double precision) for CeedScalar. /// This is the default header included in ceed.h. -#ifndef CEED_F64_H -#define CEED_F64_H +#pragma once + +#define CEED_SCALAR_IS_FP64 /// Set base scalar type to FP64. (See CeedScalarType enum in ceed.h for all options.) #define CEED_SCALAR_TYPE CEED_SCALAR_FP64 @@ -17,5 +18,3 @@ typedef double CeedScalar; /// Machine epsilon #define CEED_EPSILON 1e-16 - -#endif // CEED_F64_H diff --git a/include/ceed/jit-source/cuda/cuda-atomic-add-fallback.h b/include/ceed/jit-source/cuda/cuda-atomic-add-fallback.h index da92667707..6c3712c36b 100644 --- a/include/ceed/jit-source/cuda/cuda-atomic-add-fallback.h +++ b/include/ceed/jit-source/cuda/cuda-atomic-add-fallback.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA atomic add fallback definition - -#include +#include //------------------------------------------------------------------------------ // Atomic add, for older CUDA diff --git a/include/ceed/jit-source/cuda/cuda-gen-templates.h b/include/ceed/jit-source/cuda/cuda-gen-templates.h index f3d7052e3c..eb566137ee 100644 --- a/include/ceed/jit-source/cuda/cuda-gen-templates.h +++ b/include/ceed/jit-source/cuda/cuda-gen-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA backend macro and type definitions for JiT source - -#include +#include //------------------------------------------------------------------------------ // Load matrices for basis actions diff --git a/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor-templates.h b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor-templates.h index 6b19ad448d..9f0fa61b49 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor-templates.h +++ b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA non-tensor product basis templates - -#include +#include //------------------------------------------------------------------------------ // Tensor contraction diff --git a/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h index 6dbf8771d8..afee25eb8d 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h +++ b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA non-tensor product basis - -#include +#include #include "cuda-ref-basis-nontensor-templates.h" diff --git a/include/ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h index 7355705660..2d17b55b2c 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h +++ b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA tensor product basis with AtPoints evaluation - -#include +#include //------------------------------------------------------------------------------ // Chebyshev values diff --git a/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h index 4c8c2f447c..a5ed841a11 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h +++ b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA tensor product basis - -#include +#include //------------------------------------------------------------------------------ // Tensor Basis Kernels diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h index df5b9ad338..7a74ea9723 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h +++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h @@ -7,7 +7,7 @@ /// @file /// Internal header for CUDA operator diagonal assembly -#include +#include #if USE_CEEDSIZE typedef CeedSize IndexType; diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h index 9fc02a1c7a..1de68e76c8 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h +++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA operator full assembly - -#include +#include #if USE_CEEDSIZE typedef CeedSize IndexType; diff --git a/include/ceed/jit-source/cuda/cuda-ref-qfunction.h b/include/ceed/jit-source/cuda/cuda-ref-qfunction.h index 7fbf7901bc..6b26aee037 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-qfunction.h +++ b/include/ceed/jit-source/cuda/cuda-ref-qfunction.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA backend QFunction read/write kernels - -#include +#include //------------------------------------------------------------------------------ // Read from quadrature points diff --git a/include/ceed/jit-source/cuda/cuda-ref-restriction-at-points.h b/include/ceed/jit-source/cuda/cuda-ref-restriction-at-points.h index 87aeda2e3b..039eab8cbf 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-restriction-at-points.h +++ b/include/ceed/jit-source/cuda/cuda-ref-restriction-at-points.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA offset element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // E-vector -> L-vector, standard (with offsets) diff --git a/include/ceed/jit-source/cuda/cuda-ref-restriction-curl-oriented.h b/include/ceed/jit-source/cuda/cuda-ref-restriction-curl-oriented.h index 86a4b53545..48d8bda313 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-restriction-curl-oriented.h +++ b/include/ceed/jit-source/cuda/cuda-ref-restriction-curl-oriented.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA curl-oriented element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, curl-oriented diff --git a/include/ceed/jit-source/cuda/cuda-ref-restriction-offset.h b/include/ceed/jit-source/cuda/cuda-ref-restriction-offset.h index 9492b31984..50c0ddbe92 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-restriction-offset.h +++ b/include/ceed/jit-source/cuda/cuda-ref-restriction-offset.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA offset element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, standard (with offsets) diff --git a/include/ceed/jit-source/cuda/cuda-ref-restriction-oriented.h b/include/ceed/jit-source/cuda/cuda-ref-restriction-oriented.h index 7c667922bf..dca2dbb6c7 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-restriction-oriented.h +++ b/include/ceed/jit-source/cuda/cuda-ref-restriction-oriented.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA oriented element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, oriented diff --git a/include/ceed/jit-source/cuda/cuda-ref-restriction-strided.h b/include/ceed/jit-source/cuda/cuda-ref-restriction-strided.h index d10f73c11d..4d297b09c3 100644 --- a/include/ceed/jit-source/cuda/cuda-ref-restriction-strided.h +++ b/include/ceed/jit-source/cuda/cuda-ref-restriction-strided.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA strided element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, strided diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis-read-write-templates.h b/include/ceed/jit-source/cuda/cuda-shared-basis-read-write-templates.h index 56234c28e4..8671dc6423 100644 --- a/include/ceed/jit-source/cuda/cuda-shared-basis-read-write-templates.h +++ b/include/ceed/jit-source/cuda/cuda-shared-basis-read-write-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA shared memory basis read/write templates - -#include +#include //------------------------------------------------------------------------------ // 1D diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h b/include/ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h index 56989f2b69..ba2a273a40 100644 --- a/include/ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h +++ b/include/ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA shared memory tensor product basis templates - -#include +#include //------------------------------------------------------------------------------ // 1D diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis-tensor.h b/include/ceed/jit-source/cuda/cuda-shared-basis-tensor.h index c295362978..9b80043996 100644 --- a/include/ceed/jit-source/cuda/cuda-shared-basis-tensor.h +++ b/include/ceed/jit-source/cuda/cuda-shared-basis-tensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA shared memory tensor product basis - -#include +#include #include "cuda-shared-basis-read-write-templates.h" #include "cuda-shared-basis-tensor-templates.h" diff --git a/include/ceed/jit-source/cuda/cuda-types.h b/include/ceed/jit-source/cuda/cuda-types.h index 9863caa7e0..3410286f78 100644 --- a/include/ceed/jit-source/cuda/cuda-types.h +++ b/include/ceed/jit-source/cuda/cuda-types.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA type definitions -#ifndef CEED_CUDA_TYPES_H -#define CEED_CUDA_TYPES_H +#pragma once #include @@ -31,5 +30,3 @@ typedef struct { CeedInt t_id; CeedScalar *slice; } SharedData_Cuda; - -#endif // CEED_CUDA_TYPES_H diff --git a/include/ceed/jit-source/gallery/ceed-identity.h b/include/ceed/jit-source/gallery/ceed-identity.h index 1a84718f4a..81a005d664 100644 --- a/include/ceed/jit-source/gallery/ceed-identity.h +++ b/include/ceed/jit-source/gallery/ceed-identity.h @@ -8,8 +8,7 @@ /** @brief Identity QFunction that copies inputs directly into outputs **/ - -#include +#include typedef struct { CeedInt size; diff --git a/include/ceed/jit-source/gallery/ceed-mass1dbuild.h b/include/ceed/jit-source/gallery/ceed-mass1dbuild.h index c266beff64..4db3634acd 100644 --- a/include/ceed/jit-source/gallery/ceed-mass1dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass1dbuild.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for building the geometric data for the 1D mass matrix **/ - -#include +#include CEED_QFUNCTION(Mass1DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is Jacobians, size (Q) diff --git a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h index 7e5f6fbd34..583441007a 100644 --- a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for building the geometric data for the 2D mass matrix **/ - -#include +#include CEED_QFUNCTION(Mass2DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is Jacobians with shape [2, nc=2, Q] diff --git a/include/ceed/jit-source/gallery/ceed-mass3dbuild.h b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h index 71dc961215..855f48682c 100644 --- a/include/ceed/jit-source/gallery/ceed-mass3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for building the geometric data for the 3D mass matrix **/ - -#include +#include CEED_QFUNCTION(Mass3DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is Jacobians with shape [2, nc=3, Q] diff --git a/include/ceed/jit-source/gallery/ceed-massapply.h b/include/ceed/jit-source/gallery/ceed-massapply.h index 8559ce8a26..4ec920ac7a 100644 --- a/include/ceed/jit-source/gallery/ceed-massapply.h +++ b/include/ceed/jit-source/gallery/ceed-massapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the mass matrix **/ - -#include +#include CEED_QFUNCTION(MassApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is u, size (Q) diff --git a/include/ceed/jit-source/gallery/ceed-poisson1dapply.h b/include/ceed/jit-source/gallery/ceed-poisson1dapply.h index dc38d4f21a..3d6bbfe513 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson1dapply.h +++ b/include/ceed/jit-source/gallery/ceed-poisson1dapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the 1D Poisson operator **/ - -#include +#include CEED_QFUNCTION(Poisson1DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u, size (Q) diff --git a/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h index dce08aabb2..07096cca96 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for building the geometric data for the 1D Poisson operator **/ - -#include +#include CEED_QFUNCTION(Poisson1DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // At every quadrature point, compute w/det(J).adj(J).adj(J)^T and store diff --git a/include/ceed/jit-source/gallery/ceed-poisson2dapply.h b/include/ceed/jit-source/gallery/ceed-poisson2dapply.h index dab64be671..5c46422ecf 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson2dapply.h +++ b/include/ceed/jit-source/gallery/ceed-poisson2dapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the 2D Poisson operator **/ - -#include +#include CEED_QFUNCTION(Poisson2DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u, shape [2, nc=1, Q] diff --git a/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h index 11e15255ad..0f4e0b3f54 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for building the geometric data for the 2D Poisson operator **/ - -#include +#include CEED_QFUNCTION(Poisson2DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // At every quadrature point, compute w/det(J).adj(J).adj(J)^T and store diff --git a/include/ceed/jit-source/gallery/ceed-poisson3dapply.h b/include/ceed/jit-source/gallery/ceed-poisson3dapply.h index 71e76926e7..c78c2ecbf4 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson3dapply.h +++ b/include/ceed/jit-source/gallery/ceed-poisson3dapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the geometric data for the 3D Poisson operator **/ - -#include +#include CEED_QFUNCTION(Poisson3DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u, shape [3, nc=1, Q] diff --git a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h index 2d4e0621e4..b2013de28b 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for building the geometric data for the 3D Poisson operator **/ - -#include +#include CEED_QFUNCTION(Poisson3DBuild)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // At every quadrature point, compute w/det(J).adj(J).adj(J)^T and store the symmetric part of the result. diff --git a/include/ceed/jit-source/gallery/ceed-scale.h b/include/ceed/jit-source/gallery/ceed-scale.h index 1249810987..6ffe081815 100644 --- a/include/ceed/jit-source/gallery/ceed-scale.h +++ b/include/ceed/jit-source/gallery/ceed-scale.h @@ -8,8 +8,7 @@ /** @brief Scaling QFunction that scales inputs **/ - -#include +#include CEED_QFUNCTION(Scale)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Ctx holds field size diff --git a/include/ceed/jit-source/gallery/ceed-vectormassapply.h b/include/ceed/jit-source/gallery/ceed-vectormassapply.h index 70a2f3e25c..40825f77f2 100644 --- a/include/ceed/jit-source/gallery/ceed-vectormassapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectormassapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the mass matrix on a vector system with three components **/ - -#include +#include CEED_QFUNCTION(Vector3MassApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is u, size (Q) diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h index e056729422..4101c7f886 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the 1D Poisson operator on a vector system with three components **/ - -#include +#include CEED_QFUNCTION(Vector3Poisson1DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u, shape [1, nc=3, Q] diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h index 1b56240048..061fe75355 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the 2D Poisson operator on a vector system with three components **/ - -#include +#include CEED_QFUNCTION(Vector3Poisson2DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u, shape [2, nc=3, Q] diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h index 9ca86dba01..7aabaa9025 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h @@ -8,8 +8,7 @@ /** @brief Ceed QFunction for applying the geometric data for the 3D Poisson on a vector system with three components operator **/ - -#include +#include CEED_QFUNCTION(Vector3Poisson3DApply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u, shape [3, nc=3, Q] diff --git a/include/ceed/jit-source/hip/hip-gen-templates.h b/include/ceed/jit-source/hip/hip-gen-templates.h index 812e901866..02b4a7fd51 100644 --- a/include/ceed/jit-source/hip/hip-gen-templates.h +++ b/include/ceed/jit-source/hip/hip-gen-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP backend macro and type definitions for JiT source - -#include +#include //------------------------------------------------------------------------------ // Load matrices for basis actions diff --git a/include/ceed/jit-source/hip/hip-ref-basis-nontensor-templates.h b/include/ceed/jit-source/hip/hip-ref-basis-nontensor-templates.h index 0374d459d5..9d840f1edd 100644 --- a/include/ceed/jit-source/hip/hip-ref-basis-nontensor-templates.h +++ b/include/ceed/jit-source/hip/hip-ref-basis-nontensor-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP non-tensor product basis templates - -#include +#include //------------------------------------------------------------------------------ // Tensor contraction diff --git a/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h b/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h index 953f6f48e3..6efbf47054 100644 --- a/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h +++ b/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP non-tensor product basis - -#include +#include #include "hip-ref-basis-nontensor-templates.h" diff --git a/include/ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h b/include/ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h index 4744b17eb2..9ce63a38de 100644 --- a/include/ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h +++ b/include/ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h @@ -7,8 +7,7 @@ /// @file /// Internal header for CUDA tensor product basis with AtPoints evaluation - -#include +#include //------------------------------------------------------------------------------ // Chebyshev values diff --git a/include/ceed/jit-source/hip/hip-ref-basis-tensor.h b/include/ceed/jit-source/hip/hip-ref-basis-tensor.h index db509ac2a0..e5cf318dc1 100644 --- a/include/ceed/jit-source/hip/hip-ref-basis-tensor.h +++ b/include/ceed/jit-source/hip/hip-ref-basis-tensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP tensor product basis - -#include +#include //------------------------------------------------------------------------------ // Tensor Basis Kernels diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h index e6a8b6e6a1..c9eed447e6 100644 --- a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h +++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP operator diagonal assembly - -#include +#include #if USE_CEEDSIZE typedef CeedSize IndexType; diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h index 838dcfd4a5..38625c7c3d 100644 --- a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h +++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP operator full assembly - -#include +#include #if USE_CEEDSIZE typedef CeedSize IndexType; diff --git a/include/ceed/jit-source/hip/hip-ref-qfunction.h b/include/ceed/jit-source/hip/hip-ref-qfunction.h index 1b423072af..f0d436572d 100644 --- a/include/ceed/jit-source/hip/hip-ref-qfunction.h +++ b/include/ceed/jit-source/hip/hip-ref-qfunction.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP backend QFunction read/write kernels - -#include +#include //------------------------------------------------------------------------------ // Read from quadrature points diff --git a/include/ceed/jit-source/hip/hip-ref-restriction-at-points.h b/include/ceed/jit-source/hip/hip-ref-restriction-at-points.h index f4cb95993b..cdc06d6061 100644 --- a/include/ceed/jit-source/hip/hip-ref-restriction-at-points.h +++ b/include/ceed/jit-source/hip/hip-ref-restriction-at-points.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP offset element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // E-vector -> L-vector, standard (with offsets) diff --git a/include/ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h b/include/ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h index 76d9758828..12b3a0250b 100644 --- a/include/ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h +++ b/include/ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP curl-oriented element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, curl-oriented diff --git a/include/ceed/jit-source/hip/hip-ref-restriction-offset.h b/include/ceed/jit-source/hip/hip-ref-restriction-offset.h index 65283b7193..3d0d68cb10 100644 --- a/include/ceed/jit-source/hip/hip-ref-restriction-offset.h +++ b/include/ceed/jit-source/hip/hip-ref-restriction-offset.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP offset element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, standard (with offsets) diff --git a/include/ceed/jit-source/hip/hip-ref-restriction-oriented.h b/include/ceed/jit-source/hip/hip-ref-restriction-oriented.h index f983a24fc0..155173de63 100644 --- a/include/ceed/jit-source/hip/hip-ref-restriction-oriented.h +++ b/include/ceed/jit-source/hip/hip-ref-restriction-oriented.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP oriented element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, oriented diff --git a/include/ceed/jit-source/hip/hip-ref-restriction-strided.h b/include/ceed/jit-source/hip/hip-ref-restriction-strided.h index de1335c117..8af0528756 100644 --- a/include/ceed/jit-source/hip/hip-ref-restriction-strided.h +++ b/include/ceed/jit-source/hip/hip-ref-restriction-strided.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP strided element restriction kernels - -#include +#include //------------------------------------------------------------------------------ // L-vector -> E-vector, strided diff --git a/include/ceed/jit-source/hip/hip-shared-basis-read-write-templates.h b/include/ceed/jit-source/hip/hip-shared-basis-read-write-templates.h index 379d52d13b..8691a92710 100644 --- a/include/ceed/jit-source/hip/hip-shared-basis-read-write-templates.h +++ b/include/ceed/jit-source/hip/hip-shared-basis-read-write-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP shared memory basis read/write templates - -#include +#include //------------------------------------------------------------------------------ // Helper function: load matrices for basis actions diff --git a/include/ceed/jit-source/hip/hip-shared-basis-tensor-templates.h b/include/ceed/jit-source/hip/hip-shared-basis-tensor-templates.h index 8dc50e4ed8..4f4cc58e78 100644 --- a/include/ceed/jit-source/hip/hip-shared-basis-tensor-templates.h +++ b/include/ceed/jit-source/hip/hip-shared-basis-tensor-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP shared memory tensor product basis templates - -#include +#include //------------------------------------------------------------------------------ // 1D diff --git a/include/ceed/jit-source/hip/hip-shared-basis-tensor.h b/include/ceed/jit-source/hip/hip-shared-basis-tensor.h index d052e53bf1..d84f5555c8 100644 --- a/include/ceed/jit-source/hip/hip-shared-basis-tensor.h +++ b/include/ceed/jit-source/hip/hip-shared-basis-tensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP shared memory tensor product basis - -#include +#include #include "hip-shared-basis-read-write-templates.h" #include "hip-shared-basis-tensor-templates.h" diff --git a/include/ceed/jit-source/hip/hip-types.h b/include/ceed/jit-source/hip/hip-types.h index 0042199c8b..418e6fb02c 100644 --- a/include/ceed/jit-source/hip/hip-types.h +++ b/include/ceed/jit-source/hip/hip-types.h @@ -7,8 +7,7 @@ /// @file /// Internal header for HIP type definitions -#ifndef CEED_HIP_TYPES_H -#define CEED_HIP_TYPES_H +#pragma once #include @@ -31,5 +30,3 @@ typedef struct { CeedInt t_id; CeedScalar *slice; } SharedData_Hip; - -#endif // CEED_HIP_TYPES_H diff --git a/include/ceed/jit-source/magma/magma-basis-grad-1d.h b/include/ceed/jit-source/magma/magma-basis-grad-1d.h index cd6f8548fb..998b0d5020 100644 --- a/include/ceed/jit-source/magma/magma-basis-grad-1d.h +++ b/include/ceed/jit-source/magma/magma-basis-grad-1d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis gradient in 1D - #include "magma-common-tensor.h" // macros to abstract access of shared memory and reg. file diff --git a/include/ceed/jit-source/magma/magma-basis-grad-2d.h b/include/ceed/jit-source/magma/magma-basis-grad-2d.h index b4e7e2981a..b9fedf5c8e 100644 --- a/include/ceed/jit-source/magma/magma-basis-grad-2d.h +++ b/include/ceed/jit-source/magma/magma-basis-grad-2d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis gradient in 2D - #include "magma-common-tensor.h" // macros to abstract access of shared memory and reg. file diff --git a/include/ceed/jit-source/magma/magma-basis-grad-3d.h b/include/ceed/jit-source/magma/magma-basis-grad-3d.h index c8028be756..64572a6510 100644 --- a/include/ceed/jit-source/magma/magma-basis-grad-3d.h +++ b/include/ceed/jit-source/magma/magma-basis-grad-3d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis gradient in 3D - #include "magma-common-tensor.h" // macros to abstract access of shared memory and reg. file diff --git a/include/ceed/jit-source/magma/magma-basis-interp-1d.h b/include/ceed/jit-source/magma/magma-basis-interp-1d.h index 02f894ecce..c281d430dc 100644 --- a/include/ceed/jit-source/magma/magma-basis-interp-1d.h +++ b/include/ceed/jit-source/magma/magma-basis-interp-1d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis interpolation in 1D - #include "magma-common-tensor.h" // macros to abstract access of shared memory and reg. file diff --git a/include/ceed/jit-source/magma/magma-basis-interp-2d.h b/include/ceed/jit-source/magma/magma-basis-interp-2d.h index 56c8081c83..fc2bba223a 100644 --- a/include/ceed/jit-source/magma/magma-basis-interp-2d.h +++ b/include/ceed/jit-source/magma/magma-basis-interp-2d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis interpolation in 1D - #include "magma-common-tensor.h" // macros to abstract access of shared memory and reg. file diff --git a/include/ceed/jit-source/magma/magma-basis-interp-3d.h b/include/ceed/jit-source/magma/magma-basis-interp-3d.h index ac11e3f8df..7c214c8624 100644 --- a/include/ceed/jit-source/magma/magma-basis-interp-3d.h +++ b/include/ceed/jit-source/magma/magma-basis-interp-3d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis interpolation in 3D - #include "magma-common-tensor.h" // macros to abstract access of shared memory and reg. file diff --git a/include/ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h b/include/ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h index 0614732f02..07b4386c07 100644 --- a/include/ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h +++ b/include/ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA non-tensor basis interpolation - #include "magma-common-nontensor.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/include/ceed/jit-source/magma/magma-basis-weight-1d.h b/include/ceed/jit-source/magma/magma-basis-weight-1d.h index 431fbb6d03..8333a3cfc4 100644 --- a/include/ceed/jit-source/magma/magma-basis-weight-1d.h +++ b/include/ceed/jit-source/magma/magma-basis-weight-1d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis weight in 1D - #include "magma-common-tensor.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/include/ceed/jit-source/magma/magma-basis-weight-2d.h b/include/ceed/jit-source/magma/magma-basis-weight-2d.h index 034992e8f1..8fa903096b 100644 --- a/include/ceed/jit-source/magma/magma-basis-weight-2d.h +++ b/include/ceed/jit-source/magma/magma-basis-weight-2d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis weight in 2D - #include "magma-common-tensor.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/include/ceed/jit-source/magma/magma-basis-weight-3d.h b/include/ceed/jit-source/magma/magma-basis-weight-3d.h index a5ee73bd96..2405188dcc 100644 --- a/include/ceed/jit-source/magma/magma-basis-weight-3d.h +++ b/include/ceed/jit-source/magma/magma-basis-weight-3d.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA tensor basis weight in 3D - #include "magma-common-tensor.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/include/ceed/jit-source/magma/magma-basis-weight-nontensor.h b/include/ceed/jit-source/magma/magma-basis-weight-nontensor.h index 6a20ecefd6..4052025c91 100644 --- a/include/ceed/jit-source/magma/magma-basis-weight-nontensor.h +++ b/include/ceed/jit-source/magma/magma-basis-weight-nontensor.h @@ -7,7 +7,6 @@ /// @file /// Internal header for MAGMA non-tensor basis weight - #include "magma-common-nontensor.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/include/ceed/jit-source/magma/magma-common-defs.h b/include/ceed/jit-source/magma/magma-common-defs.h index a4913c2082..5dc3550b76 100644 --- a/include/ceed/jit-source/magma/magma-common-defs.h +++ b/include/ceed/jit-source/magma/magma-common-defs.h @@ -7,8 +7,7 @@ /// @file /// Internal header for MAGMA backend common definitions -#ifndef CEED_MAGMA_COMMON_DEFS_H -#define CEED_MAGMA_COMMON_DEFS_H +#pragma once #define MAGMA_DEVICE_SHARED(type, name) extern __shared__ type name[]; @@ -21,5 +20,3 @@ // Define macro for computing the total threads in a block for use with __launch_bounds__() #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) - -#endif // CEED_MAGMA_COMMON_DEFS_H diff --git a/include/ceed/jit-source/sycl/sycl-gen-templates.h b/include/ceed/jit-source/sycl/sycl-gen-templates.h index aa54232c2d..cf6f6cbc15 100644 --- a/include/ceed/jit-source/sycl/sycl-gen-templates.h +++ b/include/ceed/jit-source/sycl/sycl-gen-templates.h @@ -7,7 +7,7 @@ /// @file /// Internal header for SYCL backend macro and type definitions for JiT source -#include +#include #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable diff --git a/include/ceed/jit-source/sycl/sycl-ref-qfunction.h b/include/ceed/jit-source/sycl/sycl-ref-qfunction.h index d62de2533a..24b7de724f 100644 --- a/include/ceed/jit-source/sycl/sycl-ref-qfunction.h +++ b/include/ceed/jit-source/sycl/sycl-ref-qfunction.h @@ -7,8 +7,7 @@ /// @file /// Internal header for SYCL backend QFunction read/write kernels - -#include +#include //------------------------------------------------------------------------------ // Read from quadrature points diff --git a/include/ceed/jit-source/sycl/sycl-shared-basis-read-write-templates.h b/include/ceed/jit-source/sycl/sycl-shared-basis-read-write-templates.h index 421875b509..06587592da 100644 --- a/include/ceed/jit-source/sycl/sycl-shared-basis-read-write-templates.h +++ b/include/ceed/jit-source/sycl/sycl-shared-basis-read-write-templates.h @@ -7,9 +7,7 @@ /// @file /// Internal header for SYCL shared memory basis read/write templates - -#include -#include "sycl-types.h" +#include //------------------------------------------------------------------------------ // Helper function: load matrices for basis actions diff --git a/include/ceed/jit-source/sycl/sycl-shared-basis-tensor-templates.h b/include/ceed/jit-source/sycl/sycl-shared-basis-tensor-templates.h index 28bd24d9f9..bd6ec34052 100644 --- a/include/ceed/jit-source/sycl/sycl-shared-basis-tensor-templates.h +++ b/include/ceed/jit-source/sycl/sycl-shared-basis-tensor-templates.h @@ -7,8 +7,7 @@ /// @file /// Internal header for SYCL shared memory tensor product basis templates - -#include +#include //------------------------------------------------------------------------------ // 1D diff --git a/include/ceed/jit-source/sycl/sycl-shared-basis-tensor.h b/include/ceed/jit-source/sycl/sycl-shared-basis-tensor.h index f8e4ccdc0a..fc38b00351 100644 --- a/include/ceed/jit-source/sycl/sycl-shared-basis-tensor.h +++ b/include/ceed/jit-source/sycl/sycl-shared-basis-tensor.h @@ -7,8 +7,7 @@ /// @file /// Internal header for SYCL shared memory tensor product basis - -#include +#include #include "sycl-shared-basis-read-write-templates.h" #include "sycl-shared-basis-tensor-templates.h" diff --git a/include/ceed/jit-source/sycl/sycl-types.h b/include/ceed/jit-source/sycl/sycl-types.h index 58938a4b2a..b42ad10385 100644 --- a/include/ceed/jit-source/sycl/sycl-types.h +++ b/include/ceed/jit-source/sycl/sycl-types.h @@ -7,8 +7,7 @@ /// @file /// Internal header for SYCL type definitions -#ifndef CEED_SYCL_TYPES_H -#define CEED_SYCL_TYPES_H +#pragma once #include @@ -35,5 +34,3 @@ typedef struct { CeedInt *outputs[CEED_SYCL_NUMBER_FIELDS]; } FieldsInt_Sycl; #endif - -#endif // CEED_SYCL_TYPES_H diff --git a/include/ceed/types.h b/include/ceed/types.h index 6817a73322..3f858a7ca4 100644 --- a/include/ceed/types.h +++ b/include/ceed/types.h @@ -7,11 +7,12 @@ /// @file /// Public header for types and macros used in user QFunction source code -#ifndef CEED_QFUNCTION_DEFS_H -#define CEED_QFUNCTION_DEFS_H +#pragma once +#ifndef CEED_RUNNING_JIT_PASS #include #include +#endif /** @ingroup CeedQFunction @@ -251,5 +252,3 @@ typedef enum { /// Boolean value CEED_CONTEXT_FIELD_BOOL = 3, } CeedContextFieldType; - -#endif // CEED_QFUNCTION_DEFS_H diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c index a45054cfc9..d89f713dfe 100644 --- a/interface/ceed-jit-tools.c +++ b/interface/ceed-jit-tools.c @@ -216,6 +216,9 @@ int CeedLoadSourceToInitializedBuffer(Ceed ceed, const char *source_file_path, C bool is_ceed_header = next_left_chevron && (next_new_line - next_left_chevron > 0) && (!strncmp(next_left_chevron, "", 14) || !strncmp(next_left_chevron, "", 17) || !strncmp(next_left_chevron, "", 17)); + bool is_std_header = + next_left_chevron && (next_new_line - next_left_chevron > 0) && + (!strncmp(next_left_chevron, "", 8) || !strncmp(next_left_chevron, "num_jit_source_roots; i++) { + CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_source_dirs, &jit_source_dirs)); + for (CeedInt i = 0; i < num_source_dirs; i++) { bool is_valid; // Debug CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Checking JiT root: "); - CeedDebug(ceed, "%s\n", ceed_parent->jit_source_roots[i]); + CeedDebug(ceed, "%s\n", jit_source_dirs[i]); // Build and check absolute path with current root - CeedCall(CeedPathConcatenate(ceed, ceed_parent->jit_source_roots[i], relative_file_path, (char **)absolute_file_path)); + CeedCall(CeedPathConcatenate(ceed, jit_source_dirs[i], relative_file_path, (char **)absolute_file_path)); CeedCall(CeedCheckFilePath(ceed, *absolute_file_path, &is_valid)); - if (is_valid) return CEED_ERROR_SUCCESS; + if (is_valid) { + CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs)); + return CEED_ERROR_SUCCESS; + } // LCOV_EXCL_START - else CeedCall(CeedFree(absolute_file_path)); + else + CeedCall(CeedFree(absolute_file_path)); // LCOV_EXCL_STOP } // LCOV_EXCL_START diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 346135ebdf..3e82b38493 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -49,10 +49,13 @@ static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, Cee if (qf->source_path) { size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name); + CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name)); memcpy(source_path_with_name, qf->source_path, path_len); memcpy(&source_path_with_name[path_len], ":", 1); memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len); + } else if (qf->user_source) { + CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name)); } else { CeedCall(CeedCalloc(1, &source_path_with_name)); } diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c index 02715249d4..7c0cf3a285 100644 --- a/interface/ceed-qfunction.c +++ b/interface/ceed-qfunction.c @@ -608,6 +608,8 @@ int CeedQFunctionGetFlopsEstimate(CeedQFunction qf, CeedSize *flops) { @param[in] source Absolute path to source of `CeedQFunctionUser`, "\abs_path\file.h:function_name". The entire source file must only contain constructs supported by all targeted backends (i.e. CUDA for `/gpu/cuda`, OpenCL/SYCL for `/gpu/sycl`, etc.). The entire contents of this file and all locally included files are used during JiT compilation for GPU backends. + The header `ceed/types.h` is preferred over `ceed.h` or `ceed/ceed.h` for `CeedQFunction` source files. + The macro `CEED_RUNNING_JIT_PASS` is set during JiT and can be used to guard include statements that JiT compilers cannot use, such as `math.h` or `std*.h`. All source files must be at the provided filepath at runtime for JiT to function. @param[out] qf Address of the variable where the newly created `CeedQFunction` will be stored diff --git a/interface/ceed.c b/interface/ceed.c index 1becb3de14..e214b6eb85 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -658,6 +658,16 @@ int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed) { fallback_ceed->op_fallback_parent = ceed; fallback_ceed->Error = ceed->Error; ceed->op_fallback_ceed = fallback_ceed; + { + const char **jit_source_dirs; + CeedInt num_jit_source_dirs = 0; + + CeedCall(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs)); + for (CeedInt i = 0; i < num_jit_source_dirs; i++) { + CeedCall(CeedAddJitSourceRoot(fallback_ceed, jit_source_dirs[i])); + } + CeedCall(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs)); + } } *fallback_ceed = ceed->op_fallback_ceed; return CEED_ERROR_SUCCESS; @@ -863,6 +873,43 @@ int CeedRestoreWorkVector(Ceed ceed, CeedVector *vec) { // LCOV_EXCL_STOP } +/** + @brief Retrieve list ofadditional JiT source roots from `Ceed` context. + + Note: The caller is responsible for restoring `jit_source_roots` with @ref CeedRestoreJitSourceRoots(). + + @param[in] ceed `Ceed` context + @param[out] num_source_roots Number of JiT source directories + @param[out] jit_source_roots Absolute paths to additional JiT source directories + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedGetJitSourceRoots(Ceed ceed, CeedInt *num_source_roots, const char ***jit_source_roots) { + Ceed ceed_parent; + + CeedCall(CeedGetParent(ceed, &ceed_parent)); + *num_source_roots = ceed_parent->num_jit_source_roots; + *jit_source_roots = (const char **)ceed_parent->jit_source_roots; + return CEED_ERROR_SUCCESS; +} + +/** + @brief Restore list of additional JiT source roots from with @ref CeedGetJitSourceRoots() + + @param[in] ceed `Ceed` context + @param[out] jit_source_roots Absolute paths to additional JiT source directories + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedRestoreJitSourceRoots(Ceed ceed, const char ***jit_source_roots) { + *jit_source_roots = NULL; + return CEED_ERROR_SUCCESS; +} + /// @} /// ---------------------------------------------------------------------------- diff --git a/tests/t400-qfunction.h b/tests/t400-qfunction.h index 1fb64842fd..b3e226df14 100644 --- a/tests/t400-qfunction.h +++ b/tests/t400-qfunction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *w = in[0]; diff --git a/tests/t401-qfunction.h b/tests/t401-qfunction.h index c61cdb8ac6..465ec0b119 100644 --- a/tests/t401-qfunction.h +++ b/tests/t401-qfunction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *w = in[0]; diff --git a/tests/t405-qfunction.h b/tests/t405-qfunction.h index eaf261791f..40be19b47d 100644 --- a/tests/t405-qfunction.h +++ b/tests/t405-qfunction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *w = in[0]; diff --git a/tests/t406-qfunction-helper.h b/tests/t406-qfunction-helper.h index 9db4901023..85fdf9999c 100644 --- a/tests/t406-qfunction-helper.h +++ b/tests/t406-qfunction-helper.h @@ -10,11 +10,15 @@ # pragma once // clang-format on +// Note - ceed/types.h should be used over ceed.h #include // Test include path with "/./" #include "./t406-qfunction-scales.h" -CEED_QFUNCTION_HELPER CeedScalar times_two(CeedScalar x) { return SCALE_TWO * x; } +// Test include via -I.... +#include -CEED_QFUNCTION_HELPER CeedScalar times_three(CeedScalar x) { return SCALE_THREE * x; } +CEED_QFUNCTION_HELPER CeedScalar times_two(CeedScalar x) { return FAKE_SYS_SCALE_ONE * SCALE_TWO * x; } + +CEED_QFUNCTION_HELPER CeedScalar times_three(CeedScalar x) { return FAKE_SYS_SCALE_ONE * SCALE_THREE * x; } diff --git a/tests/t406-qfunction.c b/tests/t406-qfunction.c index 201c3782a4..e558139146 100644 --- a/tests/t406-qfunction.c +++ b/tests/t406-qfunction.c @@ -18,6 +18,13 @@ int main(int argc, char **argv) { CeedScalar v_true[q]; CeedInit(argv[1], &ceed); + { + char file_path[2056] = __FILE__; + char *last_slash = strrchr(file_path, '/'); + + memcpy(&file_path[last_slash - file_path], "/test-include/", 15); + CeedAddJitSourceRoot(ceed, file_path); + } CeedVectorCreate(ceed, q, &w); CeedVectorCreate(ceed, q, &u); diff --git a/tests/t406-qfunction.h b/tests/t406-qfunction.h index f4782f7029..642de84a67 100644 --- a/tests/t406-qfunction.h +++ b/tests/t406-qfunction.h @@ -5,10 +5,14 @@ // // This file is part of CEED: http://github.com/ceed -// Note: intentionally testing strange spacing in '#include's +// Note: intentionally testing strange spacing in include's // clang-format off +// Note - ceed/types.h should be used over ceed.h #include -# include +// Note - system headers like math.h and std*.h should be guarded +#ifndef CEED_RUNNING_JIT_PASS +# include +#endif #include "t406-qfunction-helper.h" // Test duplicate includes of guarded files diff --git a/tests/t409-qfunction.h b/tests/t409-qfunction.h index 27e2c6585e..5348ffeb9d 100644 --- a/tests/t409-qfunction.h +++ b/tests/t409-qfunction.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(scale)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { CeedScalar *scale = (CeedScalar *)ctx; diff --git a/tests/t500-operator.h b/tests/t500-operator.h index de9ca8966a..777978bc34 100644 --- a/tests/t500-operator.h +++ b/tests/t500-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *dxdX = in[1]; diff --git a/tests/t502-operator.h b/tests/t502-operator.h index 9d343b5ab9..9915ee4282 100644 --- a/tests/t502-operator.h +++ b/tests/t502-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *dxdX = in[1]; diff --git a/tests/t507-operator.h b/tests/t507-operator.h index 5d245534be..3166f2ee69 100644 --- a/tests/t507-operator.h +++ b/tests/t507-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *dxdX = in[1]; diff --git a/tests/t510-operator.h b/tests/t510-operator.h index 01cf47450c..20677b157a 100644 --- a/tests/t510-operator.h +++ b/tests/t510-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *J = in[1]; diff --git a/tests/t522-operator.h b/tests/t522-operator.h index 3f70b7d354..52aa9bae28 100644 --- a/tests/t522-operator.h +++ b/tests/t522-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *qw = in[0], *J = in[1]; diff --git a/tests/t530-operator.h b/tests/t530-operator.h index 01cf47450c..20677b157a 100644 --- a/tests/t530-operator.h +++ b/tests/t530-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *J = in[1]; diff --git a/tests/t531-operator.h b/tests/t531-operator.h index 4050ca35dc..79a083f032 100644 --- a/tests/t531-operator.h +++ b/tests/t531-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T and store diff --git a/tests/t532-operator.h b/tests/t532-operator.h index e15e3aed19..6de6e8b669 100644 --- a/tests/t532-operator.h +++ b/tests/t532-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup_mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *J = in[0], *weight = in[1]; diff --git a/tests/t534-operator.h b/tests/t534-operator.h index 3fc4c58887..cfe2bf73ac 100644 --- a/tests/t534-operator.h +++ b/tests/t534-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T and store diff --git a/tests/t535-operator.h b/tests/t535-operator.h index 7f6797608c..ba3d5498cb 100644 --- a/tests/t535-operator.h +++ b/tests/t535-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup_mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *J = in[0], *weight = in[1]; diff --git a/tests/t537-operator.h b/tests/t537-operator.h index 80b2d22d73..f42f4fc1e4 100644 --- a/tests/t537-operator.h +++ b/tests/t537-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *J = in[1]; diff --git a/tests/t539-operator.h b/tests/t539-operator.h index 3a4fda2475..c51487250b 100644 --- a/tests/t539-operator.h +++ b/tests/t539-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(apply)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is gradient u_0, shape [2, num_comp=2, Q] diff --git a/tests/t540-operator.h b/tests/t540-operator.h index 79f5006719..f6052946aa 100644 --- a/tests/t540-operator.h +++ b/tests/t540-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup_mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *J = in[0], *weight = in[1]; diff --git a/tests/t541-operator.h b/tests/t541-operator.h index 7eaa675c97..2f588f76be 100644 --- a/tests/t541-operator.h +++ b/tests/t541-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup_diff)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // in[0] is Jacobians with shape [2, nc=2, Q] diff --git a/tests/t566-operator.h b/tests/t566-operator.h index dfd0da43a2..a1c57cae55 100644 --- a/tests/t566-operator.h +++ b/tests/t566-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *weight = in[0], *J = in[1]; diff --git a/tests/t567-operator.h b/tests/t567-operator.h index 6b645272dc..faee0aa5ac 100644 --- a/tests/t567-operator.h +++ b/tests/t567-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *w = in[0], (*J)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1]; diff --git a/tests/t568-operator.h b/tests/t568-operator.h index d52bc2d800..8cbb0ba8bf 100644 --- a/tests/t568-operator.h +++ b/tests/t568-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T and store diff --git a/tests/t580-operator.h b/tests/t580-operator.h index 940a3605fc..e53f7817de 100644 --- a/tests/t580-operator.h +++ b/tests/t580-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include // Compute det(A) CEED_QFUNCTION_HELPER CeedScalar MatDet2x2(const CeedScalar A[2][2]) { return A[0][0] * A[1][1] - A[1][0] * A[0][1]; } diff --git a/tests/t590-operator.h b/tests/t590-operator.h index a2018718f8..d4c45b3735 100644 --- a/tests/t590-operator.h +++ b/tests/t590-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(mass)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar *u = in[0]; diff --git a/tests/t591-operator.h b/tests/t591-operator.h index 1c64f1181f..3385bf9dcc 100644 --- a/tests/t591-operator.h +++ b/tests/t591-operator.h @@ -5,7 +5,7 @@ // // This file is part of CEED: http://github.com/ceed -#include +#include CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { const CeedScalar(*J)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[0]; diff --git a/tests/test-include/fake-sys-include.h b/tests/test-include/fake-sys-include.h new file mode 100644 index 0000000000..edb954cb54 --- /dev/null +++ b/tests/test-include/fake-sys-include.h @@ -0,0 +1,14 @@ +#define FAKE_SYS_SCALE_ONE 1 + +// Note - files included this way cannot transitively include any files CUDA/ROCm won't compile +// These are bad and need to be guarded +#ifndef CEED_RUNNING_JIT_PASS +#include +#include +#endif + +// These are ok +// Note - ceed/types.h should be used over ceed.h +// ceed.h is replaced with ceed/types.h during JiT +#include +#include