Skip to content

Commit

Permalink
Merge pull request #1382 from CEED/sjg/magma-formatting-dev
Browse files Browse the repository at this point in the history
Lazy RTC compilation of MAGMA basis kernels for small P, Q
  • Loading branch information
jeremylt authored Oct 17, 2023
2 parents 645ad15 + 913f846 commit db7ade3
Show file tree
Hide file tree
Showing 48 changed files with 3,187 additions and 2,318 deletions.
6 changes: 0 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,6 @@ cuda-gen.cu := $(sort $(wildcard backends/cuda-gen/kernels/*.cu))
occa.cpp := $(sort $(shell find backends/occa -type f -name *.cpp))
magma.c := $(sort $(wildcard backends/magma/*.c))
magma.cpp := $(sort $(wildcard backends/magma/*.cpp))
magma.cu := $(sort $(wildcard backends/magma/kernels/cuda/*.cu))
magma.hip := $(sort $(wildcard backends/magma/kernels/hip/*.hip.cpp))
hip.c := $(sort $(wildcard backends/hip/*.c))
hip.cpp := $(sort $(wildcard backends/hip/*.cpp))
hip-ref.c := $(sort $(wildcard backends/hip-ref/*.c))
Expand Down Expand Up @@ -491,10 +489,8 @@ ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),)
PKG_LIBS += $(magma_link)
libceed.c += $(magma.c)
libceed.cpp += $(magma.cpp)
libceed.cu += $(magma.cu)
$(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : CPPFLAGS += -DADD_ -I$(MAGMA_DIR)/include -I$(CUDA_DIR)/include
$(magma.cpp:%.cpp=$(OBJDIR)/%.o) $(magma.cpp:%=%.tidy) : CPPFLAGS += -DADD_ -I$(MAGMA_DIR)/include -I$(CUDA_DIR)/include
$(magma.cu:%.cu=$(OBJDIR)/%.o) : CPPFLAGS += --compiler-options=-fPIC -DADD_ -I$(MAGMA_DIR)/include -I$(MAGMA_DIR)/magmablas -I$(CUDA_DIR)/include
MAGMA_BACKENDS = /gpu/cuda/magma /gpu/cuda/magma/det
endif
else # HIP MAGMA
Expand All @@ -507,10 +503,8 @@ ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),)
PKG_LIBS += $(magma_link)
libceed.c += $(magma.c)
libceed.cpp += $(magma.cpp)
libceed.hip += $(magma.hip)
$(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : CPPFLAGS += $(HIPCONFIG_CPPFLAGS) -I$(MAGMA_DIR)/include -I$(ROCM_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
$(magma.cpp:%.cpp=$(OBJDIR)/%.o) $(magma.cpp:%=%.tidy) : CPPFLAGS += $(HIPCONFIG_CPPFLAGS) -I$(MAGMA_DIR)/include -I$(ROCM_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
$(magma.hip:%.hip.cpp=$(OBJDIR)/%.o) : CPPFLAGS += -I$(MAGMA_DIR)/include -I$(MAGMA_DIR)/magmablas -I$(ROCM_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
MAGMA_BACKENDS = /gpu/hip/magma /gpu/hip/magma/det
endif
endif
Expand Down
663 changes: 312 additions & 351 deletions backends/magma/ceed-magma-basis.c

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion backends/magma/ceed-magma-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
// Device information backend init
//------------------------------------------------------------------------------
int CeedInit_Magma_common(Ceed ceed, const char *resource) {
Ceed_Magma *data;
const char *device_spec = strstr(resource, ":device_id=");
const int device_id = (device_spec) ? atoi(device_spec + 11) : -1;
int current_device_id;
Ceed_Magma *data;

CeedCallBackend(magma_init());

Expand All @@ -28,6 +28,7 @@ int CeedInit_Magma_common(Ceed ceed, const char *resource) {
magma_setdevice(device_id);
current_device_id = device_id;
}

CeedCallBackend(CeedGetData(ceed, &data));
data->device_id = current_device_id;
#ifdef CEED_MAGMA_USE_HIP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
//
// This file is part of CEED: http://github.com/ceed

#include "ceed-magma.h"
#include "ceed-magma-gemm-nontensor.h"
#include "ceed-magma-gemm-selector.h"

#ifdef CEED_MAGMA_USE_HIP
#define devblasDgemmStridedBatched hipblasDgemmStridedBatched
Expand All @@ -20,9 +21,9 @@
#endif

////////////////////////////////////////////////////////////////////////////////
static int magmablas_gemm(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta, CeedScalar *d_C,
magma_int_t lddc, magma_queue_t queue) {
static inline int magmablas_gemm(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta, CeedScalar *d_C,
magma_int_t lddc, magma_queue_t queue) {
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
magmablas_sgemm(trans_A, trans_B, m, n, k, (float)alpha, (const float *)d_A, ldda, (const float *)d_B, lddb, (float)beta, (float *)d_C, lddc,
queue);
Expand All @@ -34,10 +35,10 @@ static int magmablas_gemm(magma_trans_t trans_A, magma_trans_t trans_B, magma_in
}

////////////////////////////////////////////////////////////////////////////////
static int magmablas_gemm_batched_strided(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, magma_int_t strideA, const CeedScalar *d_B, magma_int_t lddb,
magma_int_t strideB, CeedScalar beta, CeedScalar *d_C, magma_int_t lddc, magma_int_t strideC,
magma_int_t batchCount, magma_queue_t queue) {
static inline int magmablas_gemm_batched_strided(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k,
CeedScalar alpha, const CeedScalar *d_A, magma_int_t ldda, magma_int_t strideA,
const CeedScalar *d_B, magma_int_t lddb, magma_int_t strideB, CeedScalar beta, CeedScalar *d_C,
magma_int_t lddc, magma_int_t strideC, magma_int_t batchCount, magma_queue_t queue) {
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
magmablas_sgemm_batched_strided(trans_A, trans_B, m, n, k, (float)alpha, (const float *)d_A, ldda, strideA, (const float *)d_B, lddb, strideB,
(float)beta, (float *)d_C, lddc, strideC, batchCount, queue);
Expand All @@ -49,9 +50,9 @@ static int magmablas_gemm_batched_strided(magma_trans_t trans_A, magma_trans_t t
}

////////////////////////////////////////////////////////////////////////////////
static int devblas_gemm(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta, CeedScalar *d_C,
magma_int_t lddc, magma_queue_t queue) {
static inline int devblas_gemm(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta, CeedScalar *d_C,
magma_int_t lddc, magma_queue_t queue) {
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
magma_sgemm(trans_A, trans_B, m, n, k, (float)alpha, (const float *)d_A, ldda, (const float *)d_B, lddb, (float)beta, (float *)d_C, lddc, queue);
} else {
Expand All @@ -62,10 +63,10 @@ static int devblas_gemm(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_
}

////////////////////////////////////////////////////////////////////////////////
static int devblas_gemm_batched_strided(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, magma_int_t strideA, const CeedScalar *d_B, magma_int_t lddb,
magma_int_t strideB, CeedScalar beta, CeedScalar *d_C, magma_int_t lddc, magma_int_t strideC,
magma_int_t batchCount, magma_queue_t queue) {
static inline int devblas_gemm_batched_strided(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k,
CeedScalar alpha, const CeedScalar *d_A, magma_int_t ldda, magma_int_t strideA, const CeedScalar *d_B,
magma_int_t lddb, magma_int_t strideB, CeedScalar beta, CeedScalar *d_C, magma_int_t lddc,
magma_int_t strideC, magma_int_t batchCount, magma_queue_t queue) {
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
devblasSgemmStridedBatched(magma_queue_get_devblas_handle(queue), devblas_trans_const(trans_A), devblas_trans_const(trans_B), (int)m, (int)n,
(int)k, (const float *)&alpha, (const float *)d_A, (int)ldda, strideA, (const float *)d_B, (int)lddb, strideB,
Expand Down
18 changes: 18 additions & 0 deletions backends/magma/ceed-magma-gemm-nontensor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
//
// SPDX-License-Identifier: BSD-2-Clause
//
// This file is part of CEED: http://github.com/ceed

#ifndef CEED_MAGMA_GEMM_NONTENSOR_H
#define CEED_MAGMA_GEMM_NONTENSOR_H

#include "ceed-magma.h"

////////////////////////////////////////////////////////////////////////////////
CEED_INTERN int magma_gemm_nontensor(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,
CeedScalar *d_C, magma_int_t lddc, magma_queue_t queue);

#endif // CEED_MAGMA_GEMM_NONTENSOR_H
139 changes: 139 additions & 0 deletions backends/magma/ceed-magma-gemm-selector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
//
// SPDX-License-Identifier: BSD-2-Clause
//
// This file is part of CEED: http://github.com/ceed

#include <array>
#include <limits>
#include <vector>

#include "ceed-magma-gemm-selector.h"

#include "tuning/indices.h"
#ifdef CEED_MAGMA_USE_HIP
#include "tuning/mi100.h"
#include "tuning/mi250x.h"
#include "tuning/mi250x_grad_rtc.h"
#include "tuning/mi250x_interp_rtc.h"
#else
#include "tuning/a100.h"
#include "tuning/a100_grad_rtc.h"
#include "tuning/a100_interp_rtc.h"
#include "tuning/v100.h"
#endif

////////////////////////////////////////////////////////////////////////////////
#ifdef CEED_MAGMA_USE_HIP
static inline auto gemm_selector_get_data(int gpu_arch, char precision, char trans_A) -> decltype(dgemm_nn_mi250x) {
if (gpu_arch >= 910) {
// gfx90a or newer
return (precision == 's') ? ((trans_A == 'n') ? sgemm_nn_mi250x : sgemm_tn_mi250x) : ((trans_A == 'n') ? dgemm_nn_mi250x : dgemm_tn_mi250x);
} else {
// gfx908 or older
return (precision == 's') ? ((trans_A == 'n') ? sgemm_nn_mi100 : sgemm_tn_mi100) : ((trans_A == 'n') ? dgemm_nn_mi100 : dgemm_tn_mi100);
}
}
#else
static inline auto gemm_selector_get_data(int gpu_arch, char precision, char trans_A) -> decltype(dgemm_nn_a100) {
if (gpu_arch >= 800) {
// sm80 or newer
return (precision == 's') ? ((trans_A == 'n') ? sgemm_nn_a100 : sgemm_tn_a100) : ((trans_A == 'n') ? dgemm_nn_a100 : dgemm_tn_a100);
} else {
// sm70 or older
return (precision == 's') ? ((trans_A == 'n') ? sgemm_nn_v100 : sgemm_tn_v100) : ((trans_A == 'n') ? dgemm_nn_v100 : dgemm_tn_v100);
}
}
#endif

////////////////////////////////////////////////////////////////////////////////
void gemm_selector(int gpu_arch, char precision, char trans_A, int m, int n, int k, int *n_batch, int *use_magma) {
const auto &data = gemm_selector_get_data(gpu_arch, precision, trans_A);
int ir = -1;
double norm = std::numeric_limits<double>::max();

for (size_t i = 0; i < data.size(); i++) {
const int &im = data[i][M_INDEX];
const int &in = data[i][N_INDEX];
const int &ik = data[i][K_INDEX];

double mdiff = (double)(im - m);
double ndiff = (double)(in - n);
double kdiff = (double)(ik - k);
double nrm = mdiff * mdiff + ndiff * ndiff + kdiff * kdiff;

if (nrm < norm) {
norm = nrm;
ir = i;
}

if (im == m && in == n && ik == k) {
// The input (m, n, k) exactly matches a record in `data`, no need to search further
break;
}
}

if (ir >= 0) {
// If the closest match indicates that n = n_batch, that means calling the regular non-batch GEMM.
// So n_batch is set to n instead of the 'n_batch' entry of the matching record.
int n_ = data[ir][N_INDEX];
int n_batch_ = data[ir][N_BATCH_INDEX];
*n_batch = (n_ == n_batch_) ? n : n_batch_;
*use_magma = data[ir][USE_MAGMA_INDEX];
} else {
*n_batch = n;
*use_magma = 0;
}
}

//////////////////////////////////////////////////////////////////////////////
#ifdef CEED_MAGMA_USE_HIP
static inline auto nontensor_rtc_get_data(int gpu_arch, char trans_A, int q_comp) -> decltype(dinterp_n_mi250x) {
if (q_comp == 1) {
return (trans_A == 'n') ? dinterp_n_mi250x : dinterp_t_mi250x;
} else {
return (trans_A == 'n') ? dgrad_n_mi250x : dgrad_t_mi250x;
}
}
#else
static inline auto nontensor_rtc_get_data(int gpu_arch, char trans_A, int q_comp) -> decltype(dinterp_n_a100) {
if (q_comp == 1) {
return (trans_A == 'n') ? dinterp_n_a100 : dinterp_t_a100;
} else {
return (trans_A == 'n') ? dgrad_n_a100 : dgrad_t_a100;
}
}
#endif

////////////////////////////////////////////////////////////////////////////////
CeedInt nontensor_rtc_get_nb(int gpu_arch, char trans_A, int q_comp, int P, int Q, int n) {
const auto &data = nontensor_rtc_get_data(gpu_arch, trans_A, q_comp);
int ir = -1;
double norm = std::numeric_limits<double>::max();
CeedInt m = (trans_A == 'n') ? Q : P;
CeedInt k = (trans_A == 'n') ? P : Q;

for (size_t i = 0; i < data.size(); i++) {
const int &im = data[i][M_INDEX_RTC];
const int &in = data[i][N_INDEX_RTC];
const int &ik = data[i][K_INDEX_RTC];

double mdiff = (double)(im - m);
double ndiff = (double)(in - n);
double kdiff = (double)(ik - k);
double nrm = mdiff * mdiff + ndiff * ndiff + kdiff * kdiff;

if (nrm < norm) {
norm = nrm;
ir = i;
}

if (im == m && in == n && ik == k) {
// The input (m, n, k) exactly matches a record in `data`, no need to search further
break;
}
}

return (ir >= 0) ? data[ir][NB_INDEX_RTC] : 1;
}
19 changes: 19 additions & 0 deletions backends/magma/ceed-magma-gemm-selector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
//
// SPDX-License-Identifier: BSD-2-Clause
//
// This file is part of CEED: http://github.com/ceed

#ifndef CEED_MAGMA_GEMM_SELECTOR_H
#define CEED_MAGMA_GEMM_SELECTOR_H

#include "ceed-magma.h"

////////////////////////////////////////////////////////////////////////////////
CEED_INTERN void gemm_selector(int gpu_arch, char precision, char trans_A, int m, int n, int k, int *n_batch, int *use_magma);

////////////////////////////////////////////////////////////////////////////////
CEED_INTERN CeedInt nontensor_rtc_get_nb(int gpu_arch, char trans_A, int q_comp, int P, int Q, int n);

#endif // CEED_MAGMA_GEMM_SELECTOR_H
63 changes: 22 additions & 41 deletions backends/magma/ceed-magma.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,15 @@
#define MAGMA_MAXTHREADS_1D 128
#define MAGMA_MAXTHREADS_2D 128
#define MAGMA_MAXTHREADS_3D 64
#define MAGMA_NONTENSOR_MAXTHREADS (128)

// Define macro for determining number of threads in y-direction
// for basis kernels
// Define macro for determining number of threads in y-direction for basis kernels
#define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x)))
#define MAGMA_NONTENSOR_BASIS_NTCOL(N) (CeedIntMax(1, (MAGMA_NONTENSOR_MAXTHREADS / (N))))
#define MAGMA_CEILDIV(A, B) (((A) + (B)-1) / (B))

#define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P (40)
#define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q (40)

// Define macro for computing the total threads in a block
// for use with __launch_bounds__()
#define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt))

// Define macro for non-tensor kernel instances
#define MAGMA_NONTENSOR_KERNEL_INSTANCES (5)
#define MAGMA_NONTENSOR_N_VALUES 10240, 51200, 102400, 512000, 1024000
// Define macros for non-tensor kernel instances
#define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P 40
#define MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q 40
#define MAGMA_NONTENSOR_KERNEL_INSTANCES 5
#define MAGMA_NONTENSOR_KERNEL_N_VALUES 10240, 51200, 102400, 512000, 1024000

#ifdef CEED_MAGMA_USE_HIP
typedef hipModule_t CeedMagmaModule;
Expand All @@ -55,48 +46,38 @@ typedef CUfunction CeedMagmaFunction;

typedef struct {
CeedMagmaModule module;
CeedMagmaFunction magma_interp;
CeedMagmaFunction magma_interp_tr;
CeedMagmaFunction magma_grad;
CeedMagmaFunction magma_grad_tr;
CeedMagmaFunction magma_weight;
CeedScalar *d_q_ref_1d;
CeedMagmaFunction Interp;
CeedMagmaFunction InterpTranspose;
CeedMagmaFunction Grad;
CeedMagmaFunction GradTranspose;
CeedMagmaFunction Weight;
CeedScalar *d_interp_1d;
CeedScalar *d_grad_1d;
CeedScalar *d_q_weight_1d;
} CeedBasis_Magma;

typedef struct {
CeedMagmaModule module[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction magma_interp_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction magma_interp_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction magma_grad_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction magma_grad_tr_nontensor[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedScalar *d_q_ref;
CeedMagmaModule module_weight, module_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction Interp[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction InterpTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction Grad[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction GradTranspose[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedMagmaFunction Weight;
CeedInt NB_interp[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_interp_t[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedInt NB_grad[MAGMA_NONTENSOR_KERNEL_INSTANCES], NB_grad_t[MAGMA_NONTENSOR_KERNEL_INSTANCES];
CeedScalar *d_interp;
CeedScalar *d_grad;
CeedScalar *d_q_weight;
} CeedBasisNonTensor_Magma;

CEED_INTERN void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t num_elem, magma_int_t Q, CeedScalar *d_q_weight,
CeedScalar *d_v, magma_queue_t queue);

CEED_INTERN int magma_gemm_nontensor(magma_trans_t trans_A, magma_trans_t trans_B, magma_int_t m, magma_int_t n, magma_int_t k, CeedScalar alpha,
const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,
CeedScalar *d_C, magma_int_t lddc, magma_queue_t queue);

CEED_INTERN void gemm_selector(int gpu_arch, char precision, char trans_A, int m, int n, int k, int *n_batch, int *use_magma);

CEED_INTERN CeedInt nontensor_rtc_get_nb(int gpu_arch, char precision, CeedEvalMode e_mode, CeedTransposeMode t_mode, int P_, int N, int Q_);

CEED_INTERN magma_int_t magma_isdevptr(const void *A);

CEED_INTERN int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis);

CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_dof, CeedInt num_qpts, const CeedScalar *interp,
CEED_INTERN int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis);

CEED_INTERN magma_int_t magma_isdevptr(const void *);

// Comment the line below to use the default magma_is_devptr function
#define magma_is_devptr magma_isdevptr

Expand Down
Loading

0 comments on commit db7ade3

Please sign in to comment.