From 747da2ef60accfa78a71077abe2786bc3a532b59 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <19565938+yhmtsai@users.noreply.github.com>
Date: Thu, 26 Oct 2023 23:05:37 +0200
Subject: [PATCH] Add Ginkgo dpcpp into example (#351)

This PR adds the ginkgo dpcpp support in the examples: sunmatrix,
sunlinsol, and cvode/cv_heat2D

I currently only know `queue->wait_and_throw()` to synchronize which
requires queue unlike `cudaDeviceSynchronize` or `hipDeviceSynchronize`
In sunlinsol and cv_heat2D, some function signatures are also used in
other files such that I can not pass an additional parameter.
I use global variable to store ginkgo executor and then get the queue
when SYCL needs sync or submit the kernel in those functions.

---------

Signed-off-by: Yu-Hsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Cody Balos <balos1@llnl.gov>
---
 .clang-format                                 |   1 -
 cmake/macros/SundialsAddExamplesGinkgo.cmake  |   2 +
 examples/cvode/ginkgo/CMakeLists.txt          |   5 +-
 .../cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out   |  77 +++
 .../cvode/ginkgo/cv_heat2D_ginkgo.HIP.out     |  77 +++
 examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp    | 339 ++++++++---
 examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp    | 224 +++++---
 examples/sunlinsol/ginkgo/CMakeLists.txt      |   5 +-
 .../ginkgo/test_sunlinsol_ginkgo.cpp          | 543 ++++++++++++------
 examples/sunmatrix/ginkgo/CMakeLists.txt      |   5 +-
 .../ginkgo/test_sunmatrix_ginkgo.cpp          | 349 ++++++-----
 11 files changed, 1163 insertions(+), 464 deletions(-)
 create mode 100644 examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out
 create mode 100644 examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out

diff --git a/.clang-format b/.clang-format
index 35fa8341a5..c7692ae54e 100644
--- a/.clang-format
+++ b/.clang-format
@@ -140,7 +140,6 @@ SpacesInConditionalStatement        : false
 SpacesInContainerLiterals           : true
 SpacesInParentheses                 : false
 SpacesInSquareBrackets              : false
-SpaceBeforeSquareBrackets           : false
 Standard                            : c++14
 TabWidth: 2
 UseCRLF : false
diff --git a/cmake/macros/SundialsAddExamplesGinkgo.cmake b/cmake/macros/SundialsAddExamplesGinkgo.cmake
index 1e56b34230..7fc5ef2d5f 100644
--- a/cmake/macros/SundialsAddExamplesGinkgo.cmake
+++ b/cmake/macros/SundialsAddExamplesGinkgo.cmake
@@ -63,6 +63,8 @@ macro(sundials_add_examples_ginkgo EXAMPLES_VAR)
       elseif(backend MATCHES "HIP")
         set_source_files_properties(${example} PROPERTIES LANGUAGE CXX)
         set(vector nvechip)
+      elseif(backend MATCHES "DPCPP")
+        set(vector nvecsycl)
       elseif(backend MATCHES "OMP")
         set(vector nvecopenmp)
       elseif(backend MATCHES "REF")
diff --git a/examples/cvode/ginkgo/CMakeLists.txt b/examples/cvode/ginkgo/CMakeLists.txt
index c993e377c4..6c876d691c 100644
--- a/examples/cvode/ginkgo/CMakeLists.txt
+++ b/examples/cvode/ginkgo/CMakeLists.txt
@@ -20,7 +20,7 @@ set(cpu_gpu_examples
 
 sundials_add_examples_ginkgo(cpu_gpu_examples
   TARGETS sundials_cvode
-  BACKENDS REF OMP CUDA HIP)
+  BACKENDS REF OMP CUDA HIP DPCPP)
 
 # Examples that only support CPU Ginkgo backends
 set(cpu_examples
@@ -39,6 +39,9 @@ if(EXAMPLES_INSTALL)
   if(SUNDIALS_GINKGO_BACKENDS MATCHES "HIP")
     list(APPEND vectors nvechip)
   endif()
+  if(SUNDIALS_GINKGO_BACKENDS MATCHES "DPCPP")
+    list(APPEND vectors nvecsycl)
+  endif()
   if((SUNDIALS_GINKGO_BACKENDS MATCHES "OMP") OR
       (SUNDIALS_GINKGO_BACKENDS MATCHES "REF"))
     list(APPEND vectors nvecserial)
diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out b/examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out
new file mode 100644
index 0000000000..a843e62f28
--- /dev/null
+++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out
@@ -0,0 +1,77 @@
+
+2D Heat problem:
+ ----------------------------
+  kx        = 1
+  ky        = 1
+  tf        = 1
+  xu        = 1
+  yu        = 1
+  nx        = 64
+  ny        = 64
+  dx        = 0.015873
+  dy        = 0.015873
+ ----------------------------
+  rtol      = 0.0001
+  atol      = 1e-08
+ ----------------------------
+  lin iters = 20
+  eps lin   = 0
+ ----------------------------
+  output    = 0
+ ----------------------------
+
+          t                     ||u||_rms                max error
+ -----------------------------------------------------------------------
+ 0.000000000000000e+00    1.273091462283009e+00    0.000000000000000e+00
+ 5.000000000000000e-02    1.265953031236337e+00    5.779434661301597e-04
+ 1.000000000000000e-01    1.245126467995815e+00    8.596410825743028e-04
+ 1.500000000000000e-01    1.212971698816507e+00    1.027071183737238e-03
+ 2.000000000000000e-01    1.173149707911348e+00    1.049506292939650e-03
+ 2.500000000000000e-01    1.129970993609124e+00    7.767258516966358e-04
+ 3.000000000000000e-01    1.088067923761304e+00    3.857233565973672e-04
+ 3.500000000000000e-01    1.051569245238569e+00    2.296842605538085e-04
+ 4.000000000000000e-01    1.023519508142414e+00    1.160865021105906e-04
+ 4.500000000000000e-01    1.005965995289331e+00    3.382124480899584e-05
+ 4.999999999999999e-01    9.999934385586851e-01    6.776957530241212e-05
+ 5.499999999999999e-01    1.005920028619227e+00    1.074298825753939e-04
+ 6.000000000000000e-01    1.023439646225216e+00    1.256532195708093e-04
+ 6.500000000000000e-01    1.051474380012092e+00    4.493207354094864e-05
+ 7.000000000000001e-01    1.087965937316374e+00    8.048432853913212e-05
+ 7.500000000000001e-01    1.129792873621271e+00    2.390181284921411e-04
+ 8.000000000000002e-01    1.172918427971992e+00    4.322892993839922e-04
+ 8.500000000000002e-01    1.212840417005807e+00    6.887143222911174e-04
+ 9.000000000000002e-01    1.245177171528506e+00    9.911108446238881e-04
+ 9.500000000000003e-01    1.266240637720725e+00    1.309517693130591e-03
+ 1.000000000000000e+00    1.273471195699113e+00    1.189685923645323e-03
+ -----------------------------------------------------------------------
+
+Final integrator statistics:
+Current time                 = 1
+Steps                        = 41
+Error test fails             = 0
+NLS step fails               = 0
+Initial step size            = 0.002110117778857815
+Last step size               = 0.02437232616233551
+Current step size            = 0.02437232616233551
+Last method order            = 3
+Current method order         = 3
+Stab. lim. order reductions  = 0
+RHS fn evals                 = 52
+NLS iters                    = 49
+NLS fails                    = 0
+NLS iters per step           = 1.195121951219512
+LS setups                    = 7
+Jac fn evals                 = 1
+LS RHS fn evals              = 0
+Prec setup evals             = 0
+Prec solves                  = 0
+LS iters                     = 873
+LS fails                     = 0
+Jac-times setups             = 0
+Jac-times evals              = 0
+LS iters per NLS iter        = 17.81632653061224
+Jac evals per NLS iter       = 0.02040816326530612
+Prec evals per NLS iter      = 0
+Root fn evals                = 0
+
+Max error = 1.189685923645323e-03
diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out b/examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out
new file mode 100644
index 0000000000..4022dba817
--- /dev/null
+++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out
@@ -0,0 +1,77 @@
+
+2D Heat problem:
+ ----------------------------
+  kx        = 1
+  ky        = 1
+  tf        = 1
+  xu        = 1
+  yu        = 1
+  nx        = 64
+  ny        = 64
+  dx        = 0.015873
+  dy        = 0.015873
+ ----------------------------
+  rtol      = 0.0001
+  atol      = 1e-08
+ ----------------------------
+  lin iters = 20
+  eps lin   = 0
+ ----------------------------
+  output    = 0
+ ----------------------------
+
+          t                     ||u||_rms                max error
+ -----------------------------------------------------------------------
+ 0.000000000000000e+00    1.273091462283009e+00    0.000000000000000e+00
+ 5.000000000000000e-02    1.265953031236678e+00    5.779434664550109e-04
+ 1.000000000000000e-01    1.245126468025294e+00    8.596397006188639e-04
+ 1.500000000000000e-01    1.212971692633635e+00    1.027175433592653e-03
+ 2.000000000000000e-01    1.173149607363054e+00    1.048511182224932e-03
+ 2.500000000000000e-01    1.129971118809724e+00    7.777031646749588e-04
+ 3.000000000000000e-01    1.088068652479702e+00    3.867786296469777e-04
+ 3.500000000000000e-01    1.051569453796381e+00    2.291655197173004e-04
+ 4.000000000000000e-01    1.023519691519509e+00    1.152507676536185e-04
+ 4.500000000000000e-01    1.005966466128100e+00    3.464712602863074e-05
+ 4.999999999999999e-01    9.999941074735683e-01    6.549023902890916e-05
+ 5.499999999999999e-01    1.005920139252285e+00    1.085862394125670e-04
+ 6.000000000000000e-01    1.023440066617863e+00    1.253667245226797e-04
+ 6.500000000000000e-01    1.051474489311814e+00    4.368064039983466e-05
+ 7.000000000000001e-01    1.087966430721224e+00    8.251704806849780e-05
+ 7.500000000000001e-01    1.129793211633907e+00    2.403035068856418e-04
+ 8.000000000000002e-01    1.172918720617323e+00    4.322501964297842e-04
+ 8.500000000000002e-01    1.212839862652567e+00    6.883283219258907e-04
+ 9.000000000000002e-01    1.245175128903723e+00    9.857170108882318e-04
+ 9.500000000000003e-01    1.266235911062742e+00    1.301833676780495e-03
+ 1.000000000000000e+00    1.273469281873646e+00    1.183015268845011e-03
+ -----------------------------------------------------------------------
+
+Final integrator statistics:
+Current time                 = 1
+Steps                        = 41
+Error test fails             = 0
+NLS step fails               = 0
+Initial step size            = 0.002110117764420172
+Last step size               = 0.02782878040979117
+Current step size            = 0.02782878040979117
+Last method order            = 3
+Current method order         = 3
+Stab. lim. order reductions  = 0
+RHS fn evals                 = 52
+NLS iters                    = 49
+NLS fails                    = 0
+NLS iters per step           = 1.195121951219512
+LS setups                    = 7
+Jac fn evals                 = 1
+LS RHS fn evals              = 0
+Prec setup evals             = 0
+Prec solves                  = 0
+LS iters                     = 875
+LS fails                     = 0
+Jac-times setups             = 0
+Jac-times evals              = 0
+LS iters per NLS iter        = 17.85714285714286
+Jac evals per NLS iter       = 0.02040816326530612
+Prec evals per NLS iter      = 0
+Root fn evals                = 0
+
+Max error = 1.183015268845011e-03
diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp b/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp
index 2671c77934..e979d60459 100644
--- a/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp
+++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp
@@ -38,8 +38,9 @@
  * The spatial derivatives are computed using second-order centered differences,
  * with the data distributed over nx * ny points on a uniform spatial grid. The
  * problem is advanced in time with BDF methods using an inexact Newton method
- * paired with the CG linear solver from Ginkgo. Several command line options are
- * available to change the problem parameters and CVODE settings. Use the flag
+ * paired with the CG linear solver from Ginkgo. Several command line options
+ * are available to change the problem parameters and CVODE settings. Use the
+ * flag
  * --help for more information.
  * ---------------------------------------------------------------------------*/
 
@@ -53,19 +54,23 @@
 
 #if defined(USE_CUDA)
 #include <nvector/nvector_cuda.h>
-#define HIP_OR_CUDA(a, b) b
+#define HIP_OR_CUDA_OR_SYCL(a, b, c) b
 constexpr auto N_VNew = N_VNew_Cuda;
 #elif defined(USE_HIP)
 #include <nvector/nvector_hip.h>
-#define HIP_OR_CUDA(a, b) a
+#define HIP_OR_CUDA_OR_SYCL(a, b, c) a
 constexpr auto N_VNew = N_VNew_Hip;
+#elif defined(USE_DPCPP)
+#include <nvector/nvector_sycl.h>
+#define HIP_OR_CUDA_OR_SYCL(a, b, c) c
+constexpr auto N_VNew = N_VNew_Sycl;
 #elif defined(USE_OMP)
 #include <nvector/nvector_serial.h>
-#define HIP_OR_CUDA(a, b)
+#define HIP_OR_CUDA_OR_SYCL(a, b, c)
 constexpr auto N_VNew = N_VNew_Serial;
 #else
 #include <nvector/nvector_serial.h>
-#define HIP_OR_CUDA(a, b)
+#define HIP_OR_CUDA_OR_SYCL(a, b, c)
 constexpr auto N_VNew = N_VNew_Serial;
 #endif
 
@@ -73,7 +78,8 @@ using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
 using GkoSolverType = gko::solver::Cg<sunrealtype>;
 
 using SUNGkoMatrixType = sundials::ginkgo::Matrix<GkoMatrixType>;
-using SUNGkoSolverType = sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>;
+using SUNGkoSolverType =
+  sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>;
 
 // -----------------------------------------------------------------------------
 // Functions provided to the SUNDIALS integrator
@@ -83,7 +89,8 @@ using SUNGkoSolverType = sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrix
 int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data);
 
 // Jacobian of RHS function
-int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3);
+int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data,
+      N_Vector tmp1, N_Vector tmp2, N_Vector tmp3);
 
 // -----------------------------------------------------------------------------
 // Main Program
@@ -104,12 +111,36 @@ int main(int argc, char* argv[])
   if (ReadInputs(args, udata)) return 1;
   PrintUserData(udata);
 
+  // ---------------------------------------
+  // Create Ginkgo matrix and linear solver
+  // ---------------------------------------
+
+#if defined(USE_CUDA)
+  auto gko_exec{gko::CudaExecutor::create(0, gko::OmpExecutor::create(), false,
+                                          gko::allocation_mode::device)};
+#elif defined(USE_HIP)
+  auto gko_exec{gko::HipExecutor::create(0, gko::OmpExecutor::create(), false,
+                                         gko::allocation_mode::device)};
+#elif defined(USE_DPCPP)
+  auto gko_exec{gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create())};
+#elif defined(USE_OMP)
+  auto gko_exec{gko::OmpExecutor::create()};
+#else
+  auto gko_exec{gko::ReferenceExecutor::create()};
+#endif
+
+  udata.exec = gko_exec;
+
   // ---------------
   // Create vectors
   // ---------------
 
   // Create solution vector
+#if defined(USE_DPCPP)
+  N_Vector u = N_VNew(udata.nodes, gko_exec->get_queue(), sunctx);
+#else
   N_Vector u = N_VNew(udata.nodes, sunctx);
+#endif
   if (check_ptr(u, "N_VNew")) return 1;
 
   // Set initial condition
@@ -120,34 +151,26 @@ int main(int argc, char* argv[])
   N_Vector e = N_VClone(u);
   if (check_ptr(e, "N_VClone")) return 1;
 
-    // ---------------------------------------
-    // Create Ginkgo matrix and linear solver
-    // ---------------------------------------
-
-#if defined(USE_CUDA)
-  auto gko_exec{gko::CudaExecutor::create(0, gko::OmpExecutor::create(), false, gko::allocation_mode::device)};
-#elif defined(USE_HIP)
-  auto gko_exec{gko::HipExecutor::create(0, gko::OmpExecutor::create(), false, gko::allocation_mode::device)};
-#elif defined(USE_OMP)
-  auto gko_exec{gko::OmpExecutor::create()};
-#else
-  auto gko_exec{gko::ReferenceExecutor::create()};
-#endif
-
   auto gko_matrix_dim = gko::dim<2>(udata.nodes, udata.nodes);
   auto gko_matrix_nnz{(5 * (udata.nx - 2) + 2) * (udata.ny - 2) + 2 * udata.nx};
-  auto gko_matrix = gko::share(GkoMatrixType::create(gko_exec, gko_matrix_dim, gko_matrix_nnz));
+  auto gko_matrix =
+    gko::share(GkoMatrixType::create(gko_exec, gko_matrix_dim, gko_matrix_nnz));
 
   SUNGkoMatrixType A{gko_matrix, sunctx};
 
   // Use default stopping criteria
-  auto crit{sundials::ginkgo::DefaultStop::build().with_max_iters(static_cast<gko::uint64>(udata.liniters)).on(gko_exec)};
+  auto crit{sundials::ginkgo::DefaultStop::build()
+              .with_max_iters(static_cast<gko::uint64>(udata.liniters))
+              .on(gko_exec)};
 
   // Use Jacobi preconditioner
-  auto precon{gko::preconditioner::Jacobi<sunrealtype, sunindextype>::build().on(gko_exec)};
+  auto precon{
+    gko::preconditioner::Jacobi<sunrealtype, sunindextype>::build().on(gko_exec)};
 
-  auto gko_solver_factory = gko::share(
-      GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec));
+  auto gko_solver_factory = gko::share(GkoSolverType::build()
+                                         .with_criteria(std::move(crit))
+                                         .with_preconditioner(std::move(precon))
+                                         .on(gko_exec));
 
   SUNGkoSolverType LS{gko_solver_factory, sunctx};
 
@@ -206,7 +229,8 @@ int main(int argc, char* argv[])
   flag = WriteOutput(t, u, e, udata);
   if (check_flag(flag, "WriteOutput")) return 1;
 
-  for (int iout = 0; iout < udata.nout; iout++) {
+  for (int iout = 0; iout < udata.nout; iout++)
+  {
     // Evolve in time
     flag = CVode(cvode_mem, tout, u, &t, CV_NORMAL);
     if (check_flag(flag, "CVode")) break;
@@ -239,13 +263,15 @@ int main(int argc, char* argv[])
 
   sunrealtype maxerr = N_VMaxNorm(e);
 
-  std::cout << std::scientific << std::setprecision(std::numeric_limits<sunrealtype>::digits10)
+  std::cout << std::scientific
+            << std::setprecision(std::numeric_limits<sunrealtype>::digits10)
             << "\nMax error = " << maxerr << std::endl;
 
   // --------------------
   // Clean up and return
   // --------------------
 
+  udata.exec = nullptr;
   CVodeFree(&cvode_mem); // Free integrator memory
   N_VDestroy(u);         // Free vectors
   N_VDestroy(e);
@@ -259,15 +285,19 @@ int main(int argc, char* argv[])
 
 #if defined(USE_CUDA) || defined(USE_HIP)
 // GPU kernel to compute the ODE RHS function f(t,y).
-__global__ void f_kernel(const sunindextype nx, const sunindextype ny, const sunrealtype dx, const sunrealtype dy,
-                         const sunrealtype cx, const sunrealtype cy, const sunrealtype cc, const sunrealtype bx,
-                         const sunrealtype by, const sunrealtype sin_t_cos_t, const sunrealtype cos_sqr_t,
-                         sunrealtype* uarray, sunrealtype* farray)
+__global__ void f_kernel(const sunindextype nx, const sunindextype ny,
+                         const sunrealtype dx, const sunrealtype dy,
+                         const sunrealtype cx, const sunrealtype cy,
+                         const sunrealtype cc, const sunrealtype bx,
+                         const sunrealtype by, const sunrealtype sin_t_cos_t,
+                         const sunrealtype cos_sqr_t, sunrealtype* uarray,
+                         sunrealtype* farray)
 {
   const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x;
   const sunindextype j = blockIdx.y * blockDim.y + threadIdx.y;
 
-  if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) {
+  if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1)
+  {
     auto x = i * dx;
     auto y = j * dy;
 
@@ -284,7 +314,8 @@ __global__ void f_kernel(const sunindextype nx, const sunindextype ny, const sun
     auto idx_e = (i + 1) + j * nx;
     auto idx_w = (i - 1) + j * nx;
 
-    farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) + cy * (uarray[idx_s] + uarray[idx_n]) -
+    farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) +
+                    cy * (uarray[idx_s] + uarray[idx_n]) -
                     TWO * PI * sin_sqr_x * sin_sqr_y * sin_t_cos_t -
                     bx * (cos_sqr_x - sin_sqr_x) * sin_sqr_y * cos_sqr_t -
                     by * (cos_sqr_y - sin_sqr_y) * sin_sqr_x * cos_sqr_t;
@@ -328,14 +359,67 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data)
   if (check_ptr(farray, "N_VGetDeviceArrayPointer")) return -1;
 
   dim3 threads_per_block{16, 16};
-  const auto nbx{(static_cast<unsigned int>(nx) + threads_per_block.x - 1) / threads_per_block.x};
-  const auto nby{(static_cast<unsigned int>(ny) + threads_per_block.y - 1) / threads_per_block.y};
+  const auto nbx{(static_cast<unsigned int>(nx) + threads_per_block.x - 1) /
+                 threads_per_block.x};
+  const auto nby{(static_cast<unsigned int>(ny) + threads_per_block.y - 1) /
+                 threads_per_block.y};
   dim3 num_blocks{nbx, nby};
 
-  f_kernel<<<num_blocks, threads_per_block>>>(nx, ny, dx, dy, cx, cy, cc, bx, by, sin_t_cos_t, cos_sqr_t, uarray, farray);
+  f_kernel<<<num_blocks, threads_per_block>>>(nx, ny, dx, dy, cx, cy, cc, bx,
+                                              by, sin_t_cos_t, cos_sqr_t,
+                                              uarray, farray);
 
-  HIP_OR_CUDA(hipDeviceSynchronize();, cudaDeviceSynchronize(););
+  HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(), );
 
+#elif defined(USE_DPCPP)
+  // Access device data arrays
+  sunrealtype* uarray = N_VGetDeviceArrayPointer(u);
+  if (check_ptr(uarray, "N_VGetDeviceArrayPointer")) return -1;
+
+  sunrealtype* farray = N_VGetDeviceArrayPointer(f);
+  if (check_ptr(farray, "N_VGetDeviceArrayPointer")) return -1;
+
+  std::dynamic_pointer_cast<const gko::DpcppExecutor>(udata->exec)
+    ->get_queue()
+    ->submit(
+      [&](sycl::handler& cgh)
+      {
+        cgh.parallel_for(sycl::range<2>(ny, nx),
+                         [=](sycl::id<2> id)
+                         {
+                           const sunindextype i = id[1];
+                           const sunindextype j = id[0];
+                           if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1)
+                           {
+                             auto x = i * dx;
+                             auto y = j * dy;
+
+                             auto sin_sqr_x = sin(PI * x) * sin(PI * x);
+                             auto sin_sqr_y = sin(PI * y) * sin(PI * y);
+
+                             auto cos_sqr_x = cos(PI * x) * cos(PI * x);
+                             auto cos_sqr_y = cos(PI * y) * cos(PI * y);
+
+                             // center, north, south, east, and west indices
+                             auto idx_c = i + j * nx;
+                             auto idx_n = i + (j + 1) * nx;
+                             auto idx_s = i + (j - 1) * nx;
+                             auto idx_e = (i + 1) + j * nx;
+                             auto idx_w = (i - 1) + j * nx;
+
+                             farray[idx_c] =
+                               cc * uarray[idx_c] +
+                               cx * (uarray[idx_w] + uarray[idx_e]) +
+                               cy * (uarray[idx_s] + uarray[idx_n]) -
+                               TWO * PI * sin_sqr_x * sin_sqr_y * sin_t_cos_t -
+                               bx * (cos_sqr_x - sin_sqr_x) * sin_sqr_y *
+                                 cos_sqr_t -
+                               by * (cos_sqr_y - sin_sqr_y) * sin_sqr_x *
+                                 cos_sqr_t;
+                           }
+                         });
+      });
+  udata->exec->synchronize();
 #else
 
   // Access host data arrays
@@ -346,8 +430,10 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data)
   if (check_ptr(farray, "N_VGetArrayPointer")) return -1;
 
   // Iterate over domain interior and fill the RHS vector
-  for (sunindextype j = 1; j < ny - 1; j++) {
-    for (sunindextype i = 1; i < nx - 1; i++) {
+  for (sunindextype j = 1; j < ny - 1; j++)
+  {
+    for (sunindextype i = 1; i < nx - 1; i++)
+    {
       auto x = i * dx;
       auto y = j * dy;
 
@@ -364,7 +450,8 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data)
       auto idx_e = (i + 1) + j * nx;
       auto idx_w = (i - 1) + j * nx;
 
-      farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) + cy * (uarray[idx_s] + uarray[idx_n]) -
+      farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) +
+                      cy * (uarray[idx_s] + uarray[idx_n]) -
                       TWO * PI * sin_sqr_x * sin_sqr_y * sin_t_cos_t -
                       bx * (cos_sqr_x - sin_sqr_x) * sin_sqr_y * cos_sqr_t -
                       by * (cos_sqr_y - sin_sqr_y) * sin_sqr_x * cos_sqr_t;
@@ -380,12 +467,14 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data)
 #if defined(USE_CUDA) || defined(USE_HIP)
 // GPU kernel to fill southern (j = 0) and northern (j = nx - 1) boundary
 // entries including the corners.
-__global__ void J_sn_kernel(const sunindextype nx, const sunindextype ny, sunindextype* row_ptrs,
-                            sunindextype* col_idxs, sunrealtype* mat_data)
+__global__ void J_sn_kernel(const sunindextype nx, const sunindextype ny,
+                            sunindextype* row_ptrs, sunindextype* col_idxs,
+                            sunrealtype* mat_data)
 {
   const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x;
 
-  if (i >= 0 && i < nx) {
+  if (i >= 0 && i < nx)
+  {
     // Southern face
     mat_data[i] = ZERO;
     col_idxs[i] = i;
@@ -404,12 +493,14 @@ __global__ void J_sn_kernel(const sunindextype nx, const sunindextype ny, sunind
 
 // GPU kernel to fill western (i = 0) and eastern (i = nx - 1) boundary entries
 // excluding the corners (set by J_sn_kernel).
-__global__ void J_we_kernel(const sunindextype nx, const sunrealtype ny, sunindextype* row_ptrs, sunindextype* col_idxs,
+__global__ void J_we_kernel(const sunindextype nx, const sunrealtype ny,
+                            sunindextype* row_ptrs, sunindextype* col_idxs,
                             sunrealtype* mat_data)
 {
   const sunindextype j = blockIdx.x * blockDim.x + threadIdx.x;
 
-  if (j > 0 && j < ny - 1) {
+  if (j > 0 && j < ny - 1)
+  {
     // Western face
     auto col      = j * nx;
     auto idx      = (5 * (nx - 2) + 2) * (j - 1) + nx;
@@ -427,13 +518,16 @@ __global__ void J_we_kernel(const sunindextype nx, const sunrealtype ny, suninde
 }
 
 // GPU kernel to compute the ODE RHS Jacobian function df/dy(t,y).
-__global__ void J_kernel(const sunindextype nx, const sunindextype ny, const sunrealtype cx, const sunrealtype cy,
-                         const sunrealtype cc, sunindextype* row_ptrs, sunindextype* col_idxs, sunrealtype* mat_data)
+__global__ void J_kernel(const sunindextype nx, const sunindextype ny,
+                         const sunrealtype cx, const sunrealtype cy,
+                         const sunrealtype cc, sunindextype* row_ptrs,
+                         sunindextype* col_idxs, sunrealtype* mat_data)
 {
   const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x;
   const sunindextype j = blockIdx.y * blockDim.y + threadIdx.y;
 
-  if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) {
+  if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1)
+  {
     auto row   = i + j * nx;
     auto col_s = row - nx;
     auto col_w = row - 1;
@@ -467,7 +561,8 @@ __global__ void J_kernel(const sunindextype nx, const sunindextype ny, const sun
 // J routine to compute the ODE RHS Jacobian function df/dy(t,y). This
 // explicitly set boundary entries to zero so J(t,y) has the same sparsity
 // pattern as A = I - gamma * J(t,y).
-int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3)
+int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data,
+      N_Vector tmp1, N_Vector tmp2, N_Vector tmp3)
 {
   // Access problem data
   auto udata    = static_cast<UserData*>(user_data);
@@ -492,35 +587,141 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve
 #if defined(USE_CUDA) || defined(USE_HIP)
 
   unsigned threads_per_block_bx = 16;
-  unsigned num_blocks_bx        = ((nx + threads_per_block_bx - 1) / threads_per_block_bx);
+  unsigned num_blocks_bx =
+    ((nx + threads_per_block_bx - 1) / threads_per_block_bx);
 
-  J_sn_kernel<<<num_blocks_bx, threads_per_block_bx>>>(nx, ny, row_ptrs, col_idxs, mat_data);
+  J_sn_kernel<<<num_blocks_bx, threads_per_block_bx>>>(nx, ny, row_ptrs,
+                                                       col_idxs, mat_data);
 
   unsigned threads_per_block_by = 16;
-  unsigned num_blocks_by        = ((ny + threads_per_block_by - 1) / threads_per_block_by);
+  unsigned num_blocks_by =
+    ((ny + threads_per_block_by - 1) / threads_per_block_by);
 
-  J_we_kernel<<<num_blocks_by, threads_per_block_by>>>(nx, ny, row_ptrs, col_idxs, mat_data);
+  J_we_kernel<<<num_blocks_by, threads_per_block_by>>>(nx, ny, row_ptrs,
+                                                       col_idxs, mat_data);
 
   dim3 threads_per_block_i{16, 16};
-  const auto nbx{(static_cast<unsigned int>(nx) + threads_per_block_i.x - 1) / threads_per_block_i.x};
-  const auto nby{(static_cast<unsigned int>(ny) + threads_per_block_i.y - 1) / threads_per_block_i.y};
+  const auto nbx{(static_cast<unsigned int>(nx) + threads_per_block_i.x - 1) /
+                 threads_per_block_i.x};
+  const auto nby{(static_cast<unsigned int>(ny) + threads_per_block_i.y - 1) /
+                 threads_per_block_i.y};
   dim3 num_blocks_i{nbx, nby};
 
-  J_kernel<<<num_blocks_i, threads_per_block_i>>>(nx, ny, cx, cy, cc, row_ptrs, col_idxs, mat_data);
-
-  HIP_OR_CUDA(hipDeviceSynchronize();, cudaDeviceSynchronize(););
-
+  J_kernel<<<num_blocks_i, threads_per_block_i>>>(nx, ny, cx, cy, cc, row_ptrs,
+                                                  col_idxs, mat_data);
+
+  HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(), );
+#elif defined(USE_DPCPP)
+  auto queue =
+    std::dynamic_pointer_cast<const gko::DpcppExecutor>(udata->exec)->get_queue();
+  // J_sn_kernel
+  queue->submit(
+    [&](sycl::handler& cgh)
+    {
+      cgh.parallel_for(nx,
+                       [=](sycl::id<1> id)
+                       {
+                         const sunindextype i = id[0];
+
+                         // Southern face
+                         mat_data[i] = ZERO;
+                         col_idxs[i] = i;
+                         row_ptrs[i] = i;
+
+                         // Northern face
+                         auto col      = i + (ny - 1) * nx;
+                         auto idx      = (5 * (nx - 2) + 2) * (ny - 2) + nx + i;
+                         mat_data[idx] = ZERO;
+                         col_idxs[idx] = col;
+                         row_ptrs[col] = idx;
+
+                         if (i == nx - 1)
+                           row_ptrs[nx * ny] = (5 * (nx - 2) + 2) * (ny - 2) +
+                                               2 * nx;
+                       });
+    });
+  // J_we_kernel
+  queue->submit(
+    [&](sycl::handler& cgh)
+    {
+      cgh.parallel_for(ny,
+                       [=](sycl::id<1> id)
+                       {
+                         const sunindextype j = id[0];
+                         if (j > 0 && j < ny - 1)
+                         {
+                           // Western face
+                           auto col      = j * nx;
+                           auto idx      = (5 * (nx - 2) + 2) * (j - 1) + nx;
+                           mat_data[idx] = ZERO;
+                           col_idxs[idx] = col;
+                           row_ptrs[col] = idx;
+
+                           // Eastern face
+                           col = (nx - 1) + j * nx;
+                           idx = (5 * (nx - 2) + 2) * (j - 1) + nx + 1 +
+                                 5 * (nx - 2);
+                           mat_data[idx] = ZERO;
+                           col_idxs[idx] = col;
+                           row_ptrs[col] = idx;
+                         }
+                       });
+    });
+  // J_kernel
+  queue->submit(
+    [&](sycl::handler& cgh)
+    {
+      cgh.parallel_for(sycl::range<2>(ny, nx),
+                       [=](sycl::id<2> id)
+                       {
+                         const sunindextype i = id[1];
+                         const sunindextype j = id[0];
+
+                         if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1)
+                         {
+                           auto row   = i + j * nx;
+                           auto col_s = row - nx;
+                           auto col_w = row - 1;
+                           auto col_c = row;
+                           auto col_e = row + 1;
+                           auto col_n = row + nx;
+
+                           // Number of non-zero entries from preceding rows
+                           auto prior_nnz = (5 * (nx - 2) + 2) * (j - 1) + nx;
+
+                           // Starting index for this row
+                           auto idx = prior_nnz + 1 + 5 * (i - 1);
+
+                           mat_data[idx]     = cy;
+                           mat_data[idx + 1] = cx;
+                           mat_data[idx + 2] = cc;
+                           mat_data[idx + 3] = cx;
+                           mat_data[idx + 4] = cy;
+
+                           col_idxs[idx]     = col_s;
+                           col_idxs[idx + 1] = col_w;
+                           col_idxs[idx + 2] = col_c;
+                           col_idxs[idx + 3] = col_e;
+                           col_idxs[idx + 4] = col_n;
+
+                           row_ptrs[row] = idx;
+                         }
+                       });
+    });
+  udata->exec->synchronize();
 #else
 
   // Fill southern boundary entries (j = 0)
-  for (sunindextype i = 0; i < nx; i++) {
+  for (sunindextype i = 0; i < nx; i++)
+  {
     mat_data[i] = ZERO;
     col_idxs[i] = i;
     row_ptrs[i] = i;
   }
 
   // Fill western boundary entries (i = 0)
-  for (sunindextype j = 1; j < ny - 1; j++) {
+  for (sunindextype j = 1; j < ny - 1; j++)
+  {
     auto col      = j * nx;
     auto idx      = (5 * (nx - 2) + 2) * (j - 1) + nx;
     mat_data[idx] = ZERO;
@@ -529,7 +730,8 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve
   }
 
   // Fill eastern boundary entries (i = nx - 1)
-  for (sunindextype j = 1; j < ny - 1; j++) {
+  for (sunindextype j = 1; j < ny - 1; j++)
+  {
     auto col      = (nx - 1) + j * nx;
     auto idx      = (5 * (nx - 2) + 2) * (j - 1) + nx + 1 + 5 * (nx - 2);
     mat_data[idx] = ZERO;
@@ -538,7 +740,8 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve
   }
 
   // Fill northern boundary entries (j = ny - 1)
-  for (sunindextype i = 0; i < nx; i++) {
+  for (sunindextype i = 0; i < nx; i++)
+  {
     auto col      = i + (ny - 1) * nx;
     auto idx      = (5 * (nx - 2) + 2) * (ny - 2) + nx + i;
     mat_data[idx] = ZERO;
@@ -548,8 +751,10 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve
   row_ptrs[nx * ny] = (5 * (nx - 2) + 2) * (ny - 2) + 2 * nx;
 
   // Fill interior entries
-  for (sunindextype j = 1; j < ny - 1; j++) {
-    for (sunindextype i = 1; i < nx - 1; i++) {
+  for (sunindextype j = 1; j < ny - 1; j++)
+  {
+    for (sunindextype i = 1; i < nx - 1; i++)
+    {
       auto row   = i + j * nx;
       auto col_s = row - nx;
       auto col_w = row - 1;
diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp b/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp
index bc459c68d1..a363985c7b 100644
--- a/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp
+++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp
@@ -15,24 +15,30 @@
  * See cv_heat2D_ginkgo.cpp for more information.
  * ---------------------------------------------------------------------------*/
 
+#include <cmath>
 #include <cstdio>
-#include <iostream>
-#include <iomanip>
 #include <fstream>
+#include <iomanip>
+#include <iostream>
 #include <limits>
-#include <cmath>
+#include <memory>
 #include <string>
 
 // SUNDIALS types
-#include <sundials/sundials_types.h>
 #include <sundials/sundials_nvector.h>
+#include <sundials/sundials_types.h>
 
 #if defined(USE_CUDA)
 #include <nvector/nvector_cuda.h>
 #elif defined(USE_HIP)
 #include <nvector/nvector_hip.h>
+#elif defined(USE_DPCPP)
+#include <nvector/nvector_sycl.h>
 #endif
 
+// Ginkgo Type
+#include <ginkgo/ginkgo.hpp>
+
 // Common utility functions
 #include <example_utilities.hpp>
 
@@ -72,19 +78,22 @@ struct UserData
   sunrealtype dy = yu / (ny - 1);
 
   // Integrator settings
-  sunrealtype rtol     = SUN_RCONST(1.0e-4); // relative tolerance
-  sunrealtype atol     = SUN_RCONST(1.0e-8); // absolute tolerance
-  int         maxsteps = 0;                  // max number of steps between outputs
+  sunrealtype rtol = SUN_RCONST(1.0e-4); // relative tolerance
+  sunrealtype atol = SUN_RCONST(1.0e-8); // absolute tolerance
+  int maxsteps     = 0;                  // max number of steps between outputs
 
   // Linear solver settings
-  int         liniters = 20;   // number of linear iterations
-  sunrealtype epslin   = ZERO; // linear solver tolerance factor
+  int liniters       = 20;   // number of linear iterations
+  sunrealtype epslin = ZERO; // linear solver tolerance factor
 
   // Ouput variables
-  bool          output = false; // write solution to disk
-  int           nout   = 20;    // number of output times
-  std::ofstream uout;           // output file stream
-  std::ofstream eout;           // error file stream
+  bool output = false; // write solution to disk
+  int nout    = 20;    // number of output times
+  std::ofstream uout;  // output file stream
+  std::ofstream eout;  // error file stream
+
+  // Ginkgo executor for synchronization on sycl
+  std::shared_ptr<const gko::Executor> exec;
 };
 
 // -----------------------------------------------------------------------------
@@ -93,11 +102,9 @@ struct UserData
 
 #if defined(USE_CUDA) || defined(USE_HIP)
 // GPU kernel to compute the ODE RHS function f(t,y).
-__global__
-void solution_kernel(const sunindextype nx, const sunindextype ny,
-                     const sunrealtype dx, const sunrealtype dy,
-                     const sunrealtype cos_sqr_t,
-                     sunrealtype* uarray)
+__global__ void solution_kernel(const sunindextype nx, const sunindextype ny,
+                                const sunrealtype dx, const sunrealtype dy,
+                                const sunrealtype cos_sqr_t, sunrealtype* uarray)
 {
   const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x;
   const sunindextype j = blockIdx.y * blockDim.y + threadIdx.y;
@@ -110,14 +117,14 @@ void solution_kernel(const sunindextype nx, const sunindextype ny,
     auto sin_sqr_x = sin(PI * x) * sin(PI * x);
     auto sin_sqr_y = sin(PI * y) * sin(PI * y);
 
-    auto idx = i + j * nx;
+    auto idx    = i + j * nx;
     uarray[idx] = sin_sqr_x * sin_sqr_y * cos_sqr_t + ONE;
   }
 }
 #endif
 
 // Compute the exact solution
-int Solution(sunrealtype t, N_Vector u, UserData &udata)
+int Solution(sunrealtype t, N_Vector u, UserData& udata)
 {
   // Access problem data and set shortcuts
   const auto nx = udata.nx;
@@ -133,22 +140,49 @@ int Solution(sunrealtype t, N_Vector u, UserData &udata)
 
 #if defined(USE_CUDA) || defined(USE_HIP)
 
-  sunrealtype *uarray = N_VGetDeviceArrayPointer(u);
+  sunrealtype* uarray = N_VGetDeviceArrayPointer(u);
   if (check_ptr(uarray, "N_VGetDeviceArrayPointer")) return -1;
 
   dim3 threads_per_block{16, 16};
-  const auto nbx{(static_cast<unsigned int>(nx) + threads_per_block.x - 1)
-      / threads_per_block.x};
-  const auto nby{(static_cast<unsigned int>(ny) + threads_per_block.y - 1)
-      / threads_per_block.y};
+  const auto nbx{(static_cast<unsigned int>(nx) + threads_per_block.x - 1) /
+                 threads_per_block.x};
+  const auto nby{(static_cast<unsigned int>(ny) + threads_per_block.y - 1) /
+                 threads_per_block.y};
   dim3 num_blocks{nbx, nby};
 
-  solution_kernel<<<num_blocks, threads_per_block>>>
-    (nx, ny, dx, dy, cos_sqr_t, uarray);
-
+  solution_kernel<<<num_blocks, threads_per_block>>>(nx, ny, dx, dy, cos_sqr_t,
+                                                     uarray);
+#elif defined(USE_DPCPP)
+  sunrealtype* uarray = N_VGetDeviceArrayPointer(u);
+  if (check_ptr(uarray, "N_VGetDeviceArrayPointer")) return -1;
+  std::dynamic_pointer_cast<const gko::DpcppExecutor>(udata.exec)
+    ->get_queue()
+    ->submit(
+      [&](sycl::handler& cgh)
+      {
+        cgh.parallel_for(sycl::range<2>(ny, nx),
+                         [=](sycl::id<2> id)
+                         {
+                           const sunindextype i = id[1];
+                           const sunindextype j = id[0];
+
+                           if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1)
+                           {
+                             auto x = i * dx;
+                             auto y = j * dy;
+
+                             auto sin_sqr_x = sin(PI * x) * sin(PI * x);
+                             auto sin_sqr_y = sin(PI * y) * sin(PI * y);
+
+                             auto idx    = i + j * nx;
+                             uarray[idx] = sin_sqr_x * sin_sqr_y * cos_sqr_t +
+                                           ONE;
+                           }
+                         });
+      });
 #else
 
-  sunrealtype *uarray = N_VGetArrayPointer(u);
+  sunrealtype* uarray = N_VGetArrayPointer(u);
   if (check_ptr(uarray, "N_VGetArrayPointer")) return -1;
 
   for (sunindextype j = 1; j < ny - 1; j++)
@@ -161,18 +195,18 @@ int Solution(sunrealtype t, N_Vector u, UserData &udata)
       auto sin_sqr_x = sin(PI * x) * sin(PI * x);
       auto sin_sqr_y = sin(PI * y) * sin(PI * y);
 
-      auto idx = i + j * nx;
+      auto idx    = i + j * nx;
       uarray[idx] = sin_sqr_x * sin_sqr_y * cos_sqr_t + ONE;
     }
   }
 
 #endif
-
+  udata.exec->synchronize();
   return 0;
 }
 
 // Compute the solution error
-int SolutionError(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
+int SolutionError(sunrealtype t, N_Vector u, N_Vector e, UserData& udata)
 {
   // Compute true solution
   int flag = Solution(t, e, udata);
@@ -188,29 +222,28 @@ int SolutionError(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
 // Print command line options
 void InputHelp()
 {
-  std::cout
-    << std::endl
-    << "Command line options:\n"
-    << "  --nx <nx>          : number of x mesh points\n"
-    << "  --nx <nx>          : number of y mesh points\n"
-    << "  --xu <xu>          : x upper bound\n"
-    << "  --yu <yu>          : y upper bound\n"
-    << "  --kx <kx>          : x diffusion coefficient\n"
-    << "  --kx <ky>          : y diffusion coefficient\n"
-    << "  --tf <time>        : final time\n"
-    << "  --rtol <rtol>      : relative tolerance\n"
-    << "  --atol <atol>      : absoltue tolerance\n"
-    << "  --liniters <iters> : max number of iterations\n"
-    << "  --epslin <factor>  : linear tolerance factor\n"
-    << "  --msbp <steps>     : max steps between prec setups\n"
-    << "  --output           : write solution to disk\n"
-    << "  --nout <nout>      : number of outputs\n"
-    << "  --maxsteps <steps> : max steps between outputs\n"
-    << "  --help             : print this message and exit\n";
+  std::cout << std::endl
+            << "Command line options:\n"
+            << "  --nx <nx>          : number of x mesh points\n"
+            << "  --ny <ny>          : number of y mesh points\n"
+            << "  --xu <xu>          : x upper bound\n"
+            << "  --yu <yu>          : y upper bound\n"
+            << "  --kx <kx>          : x diffusion coefficient\n"
+            << "  --kx <ky>          : y diffusion coefficient\n"
+            << "  --tf <time>        : final time\n"
+            << "  --rtol <rtol>      : relative tolerance\n"
+            << "  --atol <atol>      : absoltue tolerance\n"
+            << "  --liniters <iters> : max number of iterations\n"
+            << "  --epslin <factor>  : linear tolerance factor\n"
+            << "  --msbp <steps>     : max steps between prec setups\n"
+            << "  --output           : write solution to disk\n"
+            << "  --nout <nout>      : number of outputs\n"
+            << "  --maxsteps <steps> : max steps between outputs\n"
+            << "  --help             : print this message and exit\n";
 }
 
 // Read command line inputs
-int ReadInputs(std::vector<std::string> &args, UserData &udata)
+int ReadInputs(std::vector<std::string>& args, UserData& udata)
 {
   if (find(args.begin(), args.end(), "--help") != args.end())
   {
@@ -242,46 +275,43 @@ int ReadInputs(std::vector<std::string> &args, UserData &udata)
   return 0;
 }
 
-
 // Print user data
-void PrintUserData(UserData &udata)
+void PrintUserData(UserData& udata)
 {
-  std::cout
-    << std::endl
-    << "2D Heat problem:\n"
-    << " ----------------------------\n"
-    << "  kx        = " << udata.kx << "\n"
-    << "  ky        = " << udata.ky << "\n"
-    << "  tf        = " << udata.tf << "\n"
-    << "  xu        = " << udata.xu << "\n"
-    << "  yu        = " << udata.yu << "\n"
-    << "  nx        = " << udata.nx << "\n"
-    << "  ny        = " << udata.ny << "\n"
-    << "  dx        = " << udata.dx << "\n"
-    << "  dy        = " << udata.dy << "\n"
-    << " ----------------------------\n"
-    << "  rtol      = " << udata.rtol << "\n"
-    << "  atol      = " << udata.atol << "\n"
-    << " ----------------------------\n"
-    << "  lin iters = " << udata.liniters << "\n"
-    << "  eps lin   = " << udata.epslin << "\n"
-    << " ----------------------------\n"
-    << "  output    = " << udata.output << "\n"
-    << " ----------------------------\n"
-    << std::endl;
+  std::cout << std::endl
+            << "2D Heat problem:\n"
+            << " ----------------------------\n"
+            << "  kx        = " << udata.kx << "\n"
+            << "  ky        = " << udata.ky << "\n"
+            << "  tf        = " << udata.tf << "\n"
+            << "  xu        = " << udata.xu << "\n"
+            << "  yu        = " << udata.yu << "\n"
+            << "  nx        = " << udata.nx << "\n"
+            << "  ny        = " << udata.ny << "\n"
+            << "  dx        = " << udata.dx << "\n"
+            << "  dy        = " << udata.dy << "\n"
+            << " ----------------------------\n"
+            << "  rtol      = " << udata.rtol << "\n"
+            << "  atol      = " << udata.atol << "\n"
+            << " ----------------------------\n"
+            << "  lin iters = " << udata.liniters << "\n"
+            << "  eps lin   = " << udata.epslin << "\n"
+            << " ----------------------------\n"
+            << "  output    = " << udata.output << "\n"
+            << " ----------------------------\n"
+            << std::endl;
 }
 
 // Initialize output
-int OpenOutput(UserData &udata)
+int OpenOutput(UserData& udata)
 {
   // Header for status output
-  std::cout
-    << std::scientific
-    << std::setprecision(std::numeric_limits<sunrealtype>::digits10)
-    << "          t                     ||u||_rms      "
-    << "          max error\n"
-    << " ----------------------------------------------"
-    << "-------------------------\n";
+  std::cout << std::scientific
+            << std::setprecision(std::numeric_limits<sunrealtype>::digits10)
+            << "          t                     ||u||_rms      "
+            << "          max error\n"
+            << " ----------------------------------------------"
+            << "-------------------------\n";
 
   // Output problem information and open output streams
   if (udata.output)
@@ -289,11 +319,11 @@ int OpenOutput(UserData &udata)
     // Each processor outputs subdomain information
     std::ofstream dout;
     dout.open("heat2d_info.txt");
-    dout <<  "xu  " << udata.xu       << std::endl;
-    dout <<  "yu  " << udata.yu       << std::endl;
-    dout <<  "nx  " << udata.nx       << std::endl;
-    dout <<  "ny  " << udata.ny       << std::endl;
-    dout <<  "nt  " << udata.nout + 1 << std::endl;
+    dout << "xu  " << udata.xu << std::endl;
+    dout << "yu  " << udata.yu << std::endl;
+    dout << "nx  " << udata.nx << std::endl;
+    dout << "ny  " << udata.ny << std::endl;
+    dout << "nt  " << udata.nout + 1 << std::endl;
     dout.close();
 
     // Open output streams for solution and error
@@ -310,7 +340,7 @@ int OpenOutput(UserData &udata)
 }
 
 // Write output
-int WriteOutput(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
+int WriteOutput(sunrealtype t, N_Vector u, N_Vector e, UserData& udata)
 {
   // Compute the error
   int flag = SolutionError(t, u, e, udata);
@@ -323,7 +353,8 @@ int WriteOutput(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
   sunrealtype urms = sqrt(N_VDotProd(u, u) / udata.nx / udata.ny);
 
   // Output current status
-  std::cout << std::setw(22) << t << std::setw(25) << urms << std::setw(25) << max << std::endl;
+  std::cout << std::setw(22) << t << std::setw(25) << urms << std::setw(25)
+            << max << std::endl;
 
   // Write solution and error to disk
   if (udata.output)
@@ -335,10 +366,13 @@ int WriteOutput(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
 #elif defined(USE_HIP)
     N_VCopyFromDevice_Hip(u);
     N_VCopyFromDevice_Hip(e);
+#elif defined(USE_DPCPP)
+    N_VCopyFromDevice_Sycl(u);
+    N_VCopyFromDevice_Sycl(e);
 #endif
 
     // Access host data array
-    sunrealtype *uarray = N_VGetArrayPointer(u);
+    sunrealtype* uarray = N_VGetArrayPointer(u);
     if (check_ptr(uarray, "N_VGetArrayPointer")) return -1;
 
     udata.uout << t << " ";
@@ -349,7 +383,7 @@ int WriteOutput(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
     udata.uout << std::endl;
 
     // Access host data array
-    sunrealtype *earray = N_VGetArrayPointer(e);
+    sunrealtype* earray = N_VGetArrayPointer(e);
     if (check_ptr(earray, "N_VGetArrayPointer")) return -1;
 
     udata.eout << t << " ";
@@ -364,7 +398,7 @@ int WriteOutput(sunrealtype t, N_Vector u, N_Vector e, UserData &udata)
 }
 
 // Finalize output
-int CloseOutput(UserData &udata)
+int CloseOutput(UserData& udata)
 {
   // Footer for status output
   std::cout << " ----------------------------------------------"
diff --git a/examples/sunlinsol/ginkgo/CMakeLists.txt b/examples/sunlinsol/ginkgo/CMakeLists.txt
index 3f616192e0..f64df0912b 100644
--- a/examples/sunlinsol/ginkgo/CMakeLists.txt
+++ b/examples/sunlinsol/ginkgo/CMakeLists.txt
@@ -32,7 +32,7 @@ include_directories(..)
 
 sundials_add_examples_ginkgo(examples
   TARGETS test_sunlinsol_obj
-  BACKENDS REF OMP CUDA HIP
+  BACKENDS REF OMP CUDA HIP DPCPP
   UNIT_TEST)
 
 # Install the targets
@@ -44,6 +44,9 @@ if(EXAMPLES_INSTALL)
   if(SUNDIALS_GINKGO_BACKENDS MATCHES "HIP")
     list(APPEND vectors nvechip)
   endif()
+  if(SUNDIALS_GINKGO_BACKENDS MATCHES "DPCPP")
+    list(APPEND vectors nvecsycl)
+  endif()
   if(SUNDIALS_GINKGO_BACKENDS MATCHES "OMP")
     list(APPEND vectors nvecopenmp)
   endif()
diff --git a/examples/sunlinsol/ginkgo/test_sunlinsol_ginkgo.cpp b/examples/sunlinsol/ginkgo/test_sunlinsol_ginkgo.cpp
index beb26f7c69..b5549c7e4e 100644
--- a/examples/sunlinsol/ginkgo/test_sunlinsol_ginkgo.cpp
+++ b/examples/sunlinsol/ginkgo/test_sunlinsol_ginkgo.cpp
@@ -20,6 +20,7 @@
 #include <cstdlib>
 #include <ginkgo/ginkgo.hpp>
 #include <map>
+#include <memory>
 #include <random>
 #include <sundials/sundials_context.hpp>
 #include <sundials/sundials_math.h>
@@ -31,26 +32,29 @@
 
 #if defined(USE_HIP)
 #include <nvector/nvector_hip.h>
-#define HIP_OR_CUDA(a, b) a
+#define HIP_OR_CUDA_OR_SYCL(a, b, c) a
 constexpr auto N_VNew = N_VNew_Hip;
 #elif defined(USE_CUDA)
 #include <nvector/nvector_cuda.h>
-#define HIP_OR_CUDA(a, b) b
+#define HIP_OR_CUDA_OR_SYCL(a, b, c) b
 constexpr auto N_VNew = N_VNew_Cuda;
+#elif defined(USE_DPCPP)
+#include <nvector/nvector_sycl.h>
+#define HIP_OR_CUDA_OR_SYCL(a, b, c) c
+constexpr auto N_VNew = N_VNew_Sycl;
 #elif defined(USE_OMP)
 #include <nvector/nvector_openmp.h>
-#define HIP_OR_CUDA(a, b)
-auto N_VNew = [](sunindextype length, SUNContext sunctx) {
+#define HIP_OR_CUDA_OR_SYCL(a, b, c)
+auto N_VNew = [](sunindextype length, SUNContext sunctx)
+{
   auto omp_num_threads_var{std::getenv("OMP_NUM_THREADS")};
   int num_threads{1};
-  if (omp_num_threads_var) {
-    num_threads = std::atoi(omp_num_threads_var);
-  }
+  if (omp_num_threads_var) { num_threads = std::atoi(omp_num_threads_var); }
   return N_VNew_OpenMP(length, num_threads, sunctx);
 };
 #else
 #include <nvector/nvector_serial.h>
-#define HIP_OR_CUDA(a, b)
+#define HIP_OR_CUDA_OR_SYCL(a, b, c)
 constexpr auto N_VNew = N_VNew_Serial;
 #endif
 
@@ -60,8 +64,10 @@ constexpr auto N_VNew = N_VNew_Serial;
 
 // "multigrid" does not support the combined stopping criteria we use
 // "cbgmres" does not support setting stopping criteria
-const std::unordered_map<std::string, int> methods{{"bicg", 0}, {"bicgstab", 1}, {"cg", 2}, {"cgs", 3},
-                                                   {"fcg", 4},  {"gmres", 5},    {"idr", 6}};
+const std::unordered_map<std::string, int> methods{{"bicg", 0}, {"bicgstab", 1},
+                                                   {"cg", 2},   {"cgs", 3},
+                                                   {"fcg", 4},  {"gmres", 5},
+                                                   {"idr", 6}};
 
 const std::unordered_map<std::string, int> matrix_types{{"csr", 0}, {"dense", 1}};
 
@@ -70,13 +76,15 @@ const std::unordered_map<std::string, int> matrix_types{{"csr", 0}, {"dense", 1}
  * -------------------------------------------------------------------------- */
 
 #if defined(USE_CUDA) || defined(USE_HIP)
-__global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols, sunindextype* row_ptrs,
-                            sunindextype* col_idxs, sunrealtype* mat_data)
+__global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols,
+                            sunindextype* row_ptrs, sunindextype* col_idxs,
+                            sunrealtype* mat_data)
 {
   const sunindextype row = blockIdx.x * blockDim.x + threadIdx.x;
   const sunindextype nnz = 3 * mat_rows - 2;
 
-  if (row == 0) {
+  if (row == 0)
+  {
     // first row
     mat_data[0] = 2;
     mat_data[1] = -1;
@@ -84,7 +92,8 @@ __global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols, sunind
     col_idxs[1] = 1;
     row_ptrs[0] = 0;
   }
-  else if (row == mat_rows - 1) {
+  else if (row == mat_rows - 1)
+  {
     // last row
     mat_data[nnz - 2]      = -1;
     mat_data[nnz - 1]      = 2;
@@ -93,7 +102,8 @@ __global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols, sunind
     row_ptrs[mat_rows - 1] = nnz - 2;
     row_ptrs[mat_rows]     = nnz;
   }
-  else if (row < mat_rows) {
+  else if (row < mat_rows)
+  {
     // other rows
     sunindextype idx  = 3 * row - 1;
     mat_data[idx]     = -1;
@@ -106,21 +116,25 @@ __global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols, sunind
   }
 }
 
-__global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols, sunrealtype* mat_data)
+__global__ void fill_kernel(sunindextype mat_rows, sunindextype mat_cols,
+                            sunrealtype* mat_data)
 {
   const sunindextype row = blockIdx.x * blockDim.x + threadIdx.x;
 
-  if (row == 0) {
+  if (row == 0)
+  {
     // first row
     mat_data[0] = 2;
     mat_data[1] = -1;
   }
-  else if (row == mat_rows - 1) {
+  else if (row == mat_rows - 1)
+  {
     // last row
     mat_data[mat_cols * mat_rows - 2] = -1;
     mat_data[mat_cols * mat_rows - 1] = 2;
   }
-  else if (row < mat_rows) {
+  else if (row < mat_rows)
+  {
     // other rows
     sunindextype idx  = mat_cols * row + row;
     mat_data[idx - 1] = -1;
@@ -140,10 +154,58 @@ void fill_matrix(gko::matrix::Csr<sunrealtype, sunindextype>* matrix)
 
 #if defined(USE_CUDA) || defined(USE_HIP)
   unsigned threads_per_block = 256;
-  unsigned num_blocks        = (mat_rows + threads_per_block - 1) / threads_per_block;
-
-  fill_kernel<<<num_blocks, threads_per_block>>>(mat_rows, mat_cols, row_ptrs, col_idxs, mat_data);
-  HIP_OR_CUDA(hipDeviceSynchronize();, cudaDeviceSynchronize(););
+  unsigned num_blocks = (mat_rows + threads_per_block - 1) / threads_per_block;
+
+  fill_kernel<<<num_blocks, threads_per_block>>>(mat_rows, mat_cols, row_ptrs,
+                                                 col_idxs, mat_data);
+  HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(), );
+#elif defined(USE_DPCPP)
+  std::dynamic_pointer_cast<const gko::DpcppExecutor>(matrix->get_executor())
+    ->get_queue()
+    ->submit(
+      [&](sycl::handler& cgh)
+      {
+        cgh.parallel_for(mat_rows,
+                         [=](sycl::id<1> id)
+                         {
+                           const sunindextype row = id[0];
+                           // copied from fill_kernel for csr`
+                           const sunindextype nnz = 3 * mat_rows - 2;
+
+                           if (row == 0)
+                           {
+                             // first row
+                             mat_data[0] = 2;
+                             mat_data[1] = -1;
+                             col_idxs[0] = 0;
+                             col_idxs[1] = 1;
+                             row_ptrs[0] = 0;
+                           }
+                           else if (row == mat_rows - 1)
+                           {
+                             // last row
+                             mat_data[nnz - 2]      = -1;
+                             mat_data[nnz - 1]      = 2;
+                             col_idxs[nnz - 2]      = mat_rows - 2;
+                             col_idxs[nnz - 1]      = mat_rows - 1;
+                             row_ptrs[mat_rows - 1] = nnz - 2;
+                             row_ptrs[mat_rows]     = nnz;
+                           }
+                           else if (row < mat_rows)
+                           {
+                             // other rows
+                             sunindextype idx  = 3 * row - 1;
+                             mat_data[idx]     = -1;
+                             mat_data[idx + 1] = 2;
+                             mat_data[idx + 2] = -1;
+                             col_idxs[idx]     = row - 1;
+                             col_idxs[idx + 1] = row;
+                             col_idxs[idx + 2] = row + 1;
+                             row_ptrs[row]     = idx;
+                           }
+                         });
+      });
+  matrix->get_executor()->synchronize();
 #else
   // Matrix entries
   const sunrealtype vals[] = {-1, 2, -1};
@@ -151,10 +213,13 @@ void fill_matrix(gko::matrix::Csr<sunrealtype, sunindextype>* matrix)
   // Fill matrix
   int idx     = 0;
   row_ptrs[0] = idx;
-  for (auto row = 0; row < mat_rows; ++row) {
-    for (auto diag_offset : {-1, 0, 1}) {
+  for (auto row = 0; row < mat_rows; ++row)
+  {
+    for (auto diag_offset : {-1, 0, 1})
+    {
       auto col = row + diag_offset;
-      if (0 <= col && col < mat_cols) {
+      if (0 <= col && col < mat_cols)
+      {
         mat_data[idx] = vals[diag_offset + 1];
         col_idxs[idx] = col;
         ++idx;
@@ -171,21 +236,58 @@ void fill_matrix(gko::matrix::Dense<sunrealtype>* matrix)
   sunindextype mat_cols = matrix->get_size()[1];
   sunrealtype* mat_data = matrix->get_values();
 
-#if defined(USE_CUDA)
+#if defined(USE_CUDA) || defined(USE_HIP)
   unsigned threads_per_block = 256;
-  unsigned num_blocks        = (mat_rows + threads_per_block - 1) / threads_per_block;
+  unsigned num_blocks = (mat_rows + threads_per_block - 1) / threads_per_block;
 
   fill_kernel<<<num_blocks, threads_per_block>>>(mat_rows, mat_cols, mat_data);
-  HIP_OR_CUDA(hipDeviceSynchronize();, cudaDeviceSynchronize(););
+  HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(), );
+#elif defined(USE_DPCPP)
+  std::dynamic_pointer_cast<const gko::DpcppExecutor>(matrix->get_executor())
+    ->get_queue()
+    ->submit(
+      [&](sycl::handler& cgh)
+      {
+        cgh.parallel_for(mat_rows,
+                         [=](sycl::id<1> id)
+                         {
+                           const sunindextype row = id[0];
+                           // copied from fill_kernel for dense
+                           if (row == 0)
+                           {
+                             // first row
+                             mat_data[0] = 2;
+                             mat_data[1] = -1;
+                           }
+                           else if (row == mat_rows - 1)
+                           {
+                             // last row
+                             mat_data[mat_cols * mat_rows - 2] = -1;
+                             mat_data[mat_cols * mat_rows - 1] = 2;
+                           }
+                           else if (row < mat_rows)
+                           {
+                             // other rows
+                             sunindextype idx  = mat_cols * row + row;
+                             mat_data[idx - 1] = -1;
+                             mat_data[idx]     = 2;
+                             mat_data[idx + 1] = -1;
+                           }
+                         });
+      });
+  matrix->get_executor()->synchronize();
 #else
   // Matrix entries
   const sunrealtype vals[] = {-1, 2, -1};
 
   // Fill matrix
-  for (auto row = 0; row < mat_rows; ++row) {
-    for (auto diag_offset : {-1, 0, 1}) {
+  for (auto row = 0; row < mat_rows; ++row)
+  {
+    for (auto diag_offset : {-1, 0, 1})
+    {
       auto col = row + diag_offset;
-      if (0 <= col && col < mat_cols) {
+      if (0 <= col && col < mat_cols)
+      {
         // Data stored in row-major format
         auto idx      = row * mat_cols + col;
         mat_data[idx] = vals[diag_offset + 1];
@@ -200,11 +302,14 @@ void fill_matrix(gko::matrix::Dense<sunrealtype>* matrix)
  * -------------------------------------------------------------------------- */
 
 template<class GkoSolverType, class GkoMatrixType>
-void Test_Move(std::unique_ptr<typename GkoSolverType::Factory>&& gko_solver_factory, sundials::Context& sunctx)
+void Test_Move(std::unique_ptr<typename GkoSolverType::Factory>&& gko_solver_factory,
+               sundials::Context& sunctx)
 {
   // Move constructor
-  sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType> solver{std::move(gko_solver_factory), sunctx};
-  sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType> solver2{std::move(solver)};
+  sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>
+    solver{std::move(gko_solver_factory), sunctx};
+  sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType> solver2{
+    std::move(solver)};
   assert(solver2.GkoFactory());
   assert(solver2.GkoExec());
   assert(SUNLinSolNumIters(solver2) == 0);
@@ -219,6 +324,12 @@ void Test_Move(std::unique_ptr<typename GkoSolverType::Factory>&& gko_solver_fac
   std::cout << "    PASSED test -- Test_Move\n";
 }
 
+/* -------------------------------------------------------------------------- *
+ * Global Executor for sync_device                                            *
+ * -------------------------------------------------------------------------- */
+// sycl only provides synchronize on queue
+std::shared_ptr<const gko::Executor> global_exec;
+
 /* -------------------------------------------------------------------------- *
  * SUNLinSol_Ginkgo Testing Routine                                           *
  * -------------------------------------------------------------------------- */
@@ -231,20 +342,28 @@ int main(int argc, char* argv[])
   sundials::Context sunctx;
 
 #if defined(USE_HIP)
-  auto gko_exec{gko::HipExecutor::create(0, gko::OmpExecutor::create(), false, gko::allocation_mode::device)};
+  auto gko_exec{gko::HipExecutor::create(0, gko::OmpExecutor::create(), false,
+                                         gko::allocation_mode::device)};
 #elif defined(USE_CUDA)
-  auto gko_exec{gko::CudaExecutor::create(0, gko::OmpExecutor::create(), false, gko::allocation_mode::device)};
+  auto gko_exec{gko::CudaExecutor::create(0, gko::OmpExecutor::create(), false,
+                                          gko::allocation_mode::device)};
+#elif defined(USE_DPCPP)
+  auto gko_exec{gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create())};
 #elif defined(USE_OMP)
   auto gko_exec{gko::OmpExecutor::create()};
 #else
   auto gko_exec{gko::ReferenceExecutor::create()};
 #endif
 
+  // For sync_device
+  global_exec = gko_exec;
+
   /* ------------ *
    * Check inputs *
    * ------------ */
 
-  if (argc < 7) {
+  if (argc < 7)
+  {
     std::cerr << "ERROR: SIX (6) inputs required:\n"
               << "  1) method\n"
               << "  2) matrix type\n"
@@ -256,13 +375,13 @@ int main(int argc, char* argv[])
   }
 
   std::string method{argv[++argi]};
-  std::transform(method.begin(), method.end(), method.begin(), [](unsigned char c) { return std::tolower(c); });
+  std::transform(method.begin(), method.end(), method.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
 
-  if (!methods.count(method)) {
+  if (!methods.count(method))
+  {
     std::cerr << "ERROR: method must be one of ";
-    for (const auto& m : methods) {
-      std::cout << m.first << ", ";
-    }
+    for (const auto& m : methods) { std::cout << m.first << ", "; }
     std::cout << std::endl;
     return 1;
   }
@@ -271,31 +390,33 @@ int main(int argc, char* argv[])
   std::transform(matrix_type.begin(), matrix_type.end(), matrix_type.begin(),
                  [](unsigned char c) { return std::tolower(c); });
 
-  if (!matrix_types.count(matrix_type)) {
+  if (!matrix_types.count(matrix_type))
+  {
     std::cerr << "ERROR: matrix type must be one of ";
-    for (const auto& m : matrix_types) {
-      std::cout << m.first << ", ";
-    }
+    for (const auto& m : matrix_types) { std::cout << m.first << ", "; }
     std::cout << std::endl;
     return 1;
   }
 
   const auto matcols{static_cast<sunindextype>(atoll(argv[++argi]))};
-  if (matcols <= 0) {
+  if (matcols <= 0)
+  {
     std::cerr << "ERROR: number of matrix columns must be a positive integer\n";
     return 1;
   }
   const auto matrows{matcols};
 
   const auto matcond{static_cast<sunrealtype>(atof(argv[++argi]))};
-  if (matcond < 0) {
+  if (matcond < 0)
+  {
     std::cerr << "ERROR: matrix condition number must be positive or 0 "
                  "(poisson test)\n";
     return 1;
   }
 
   const auto max_iters{static_cast<unsigned long>(atoll(argv[++argi]))};
-  if (max_iters <= 0) {
+  if (max_iters <= 0)
+  {
     std::cerr << "ERROR: max iterations must be a positive integer\n";
     return 1;
   }
@@ -314,7 +435,11 @@ int main(int argc, char* argv[])
    * Create solution and RHS vectors *
    * ------------------------------- */
 
+#if defined(USE_DPCPP)
+  N_Vector x{N_VNew(matcols, gko_exec->get_queue(), sunctx)};
+#else
   N_Vector x{N_VNew(matcols, sunctx)};
+#endif
   N_Vector b{N_VClone(x)};
 
   /* Fill x with random data */
@@ -322,10 +447,12 @@ int main(int argc, char* argv[])
   std::uniform_real_distribution<sunrealtype> distribution_real(8, 10);
 
   auto xdata{N_VGetArrayPointer(x)};
-  for (sunindextype i = 0; i < matcols; i++) {
+  for (sunindextype i = 0; i < matcols; i++)
+  {
     xdata[i] = distribution_real(engine);
   }
-  HIP_OR_CUDA(N_VCopyToDevice_Hip(x), N_VCopyToDevice_Cuda(x));
+  HIP_OR_CUDA_OR_SYCL(N_VCopyToDevice_Hip(x), N_VCopyToDevice_Cuda(x),
+                      N_VCopyToDevice_Sycl(x));
 
   /* -------------------- *
    * Create system matrix *
@@ -338,44 +465,55 @@ int main(int argc, char* argv[])
 
   auto matrix_dim{gko::dim<2>(matrows, matcols)};
 
-  if (matrix_type == "csr") {
+  if (matrix_type == "csr")
+  {
     using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
     auto matrix_nnz{3 * matrows - 2};
-    auto gko_matrix = gko::share(GkoMatrixType::create(gko_exec, matrix_dim, matrix_nnz));
-
-    if (matcond) {
-      auto gko_matdata{gko::matrix_data<sunrealtype, sunindextype>::cond(matrows,
-                                                                         gko::remove_complex<sunrealtype>{matcond},
-                                                                         distribution_real, engine)};
+    auto gko_matrix =
+      gko::share(GkoMatrixType::create(gko_exec, matrix_dim, matrix_nnz));
+
+    if (matcond)
+    {
+      auto gko_matdata{gko::matrix_data<
+        sunrealtype, sunindextype>::cond(matrows,
+                                         gko::remove_complex<sunrealtype>{matcond},
+                                         distribution_real, engine)};
       gko_matdata.remove_zeros();
       gko_matrix->read(gko_matdata);
     }
-    else {
-      fill_matrix(gko::lend(gko_matrix));
-    }
-    A = std::make_unique<sundials::ginkgo::Matrix<GkoMatrixType>>(std::move(gko_matrix), sunctx);
+    else { fill_matrix(gko::lend(gko_matrix)); }
+    A = std::make_unique<sundials::ginkgo::Matrix<GkoMatrixType>>(std::move(
+                                                                    gko_matrix),
+                                                                  sunctx);
   }
-  else if (matrix_type == "dense") {
+  else if (matrix_type == "dense")
+  {
     using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-    auto gko_matrix     = gko::share(GkoMatrixType::create(gko_exec, matrix_dim));
-    if (matcond) {
-      auto gko_matdata{gko::matrix_data<sunrealtype, sunindextype>::cond(matrows,
-                                                                         gko::remove_complex<sunrealtype>{matcond},
-                                                                         distribution_real, engine)};
+    auto gko_matrix = gko::share(GkoMatrixType::create(gko_exec, matrix_dim));
+    if (matcond)
+    {
+      auto gko_matdata{gko::matrix_data<
+        sunrealtype, sunindextype>::cond(matrows,
+                                         gko::remove_complex<sunrealtype>{matcond},
+                                         distribution_real, engine)};
       gko_matdata.remove_zeros();
       gko_matrix->read(gko_matdata);
     }
-    else {
+    else
+    {
       gko_matrix->fill(0.0);
       fill_matrix(gko::lend(gko_matrix));
     }
-    A = std::make_unique<sundials::ginkgo::Matrix<GkoMatrixType>>(std::move(gko_matrix), sunctx);
+    A = std::make_unique<sundials::ginkgo::Matrix<GkoMatrixType>>(std::move(
+                                                                    gko_matrix),
+                                                                  sunctx);
   }
 
   /* Create right-hand side vector for linear solve */
   fails += SUNMatMatvecSetup(A->Convert());
   fails += SUNMatMatvec(A->Convert(), x, b);
-  if (fails) {
+  if (fails)
+  {
     std::cerr << "FAIL: SUNLinSol SUNMatMatvec failure\n";
     N_VDestroy(x);
     N_VDestroy(b);
@@ -387,154 +525,211 @@ int main(int argc, char* argv[])
    * -------------------- */
 
   /* Use default stopping criteria */
-  auto crit{sundials::ginkgo::DefaultStop::build().with_max_iters(max_iters).on(gko_exec)};
+  auto crit{sundials::ginkgo::DefaultStop::build().with_max_iters(max_iters).on(
+    gko_exec)};
 
   /* Use a Jacobi preconditioner */
-  auto precon{gko::preconditioner::Jacobi<sunrealtype, sunindextype>::build().on(gko_exec)};
+  auto precon{
+    gko::preconditioner::Jacobi<sunrealtype, sunindextype>::build().on(gko_exec)};
 
   /* Wrap ginkgo matrix for SUNDIALS,
      Matrix is overloaded to a SUNLinearSolver. */
   std::unique_ptr<sundials::ConvertibleTo<SUNLinearSolver>> LS;
 
-  if (method == "bicg") {
+  if (method == "bicg")
+  {
     using GkoSolverType = gko::solver::Bicg<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
-  else if (method == "bicgstab") {
+  else if (method == "bicgstab")
+  {
     using GkoSolverType = gko::solver::Bicgstab<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
-  else if (method == "cg") {
+  else if (method == "cg")
+  {
     using GkoSolverType = gko::solver::Cg<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
-  else if (method == "cgs") {
+  else if (method == "cgs")
+  {
     using GkoSolverType = gko::solver::Cgs<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
-  else if (method == "fcg") {
+  else if (method == "fcg")
+  {
     using GkoSolverType = gko::solver::Fcg<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
-  else if (method == "gmres") {
+  else if (method == "gmres")
+  {
     using GkoSolverType = gko::solver::Gmres<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
-  else if (method == "idr") {
+  else if (method == "idr")
+  {
     using GkoSolverType = gko::solver::Idr<sunrealtype>;
-    auto gko_solver_factory{
-        GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)};
-    if (matrix_type == "csr") {
+    auto gko_solver_factory{GkoSolverType::build()
+                              .with_criteria(std::move(crit))
+                              .with_preconditioner(std::move(precon))
+                              .on(gko_exec)};
+    if (matrix_type == "csr")
+    {
       using GkoMatrixType = gko::matrix::Csr<sunrealtype, sunindextype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
-    else if (matrix_type == "dense") {
+    else if (matrix_type == "dense")
+    {
       using GkoMatrixType = gko::matrix::Dense<sunrealtype>;
-      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(), sunctx);
-      LS = std::make_unique<sundials::ginkgo::LinearSolver<GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory),
-                                                                                          sunctx);
+      Test_Move<GkoSolverType, GkoMatrixType>(gko_solver_factory->clone(),
+                                              sunctx);
+      LS = std::make_unique<sundials::ginkgo::LinearSolver<
+        GkoSolverType, GkoMatrixType>>(std::move(gko_solver_factory), sunctx);
     }
   }
 
   /* Run Tests */
   fails += Test_SUNLinSolGetID(LS->Convert(), SUNLINEARSOLVER_GINKGO, 0);
-  fails += Test_SUNLinSolGetType(LS->Convert(), SUNLINEARSOLVER_MATRIX_ITERATIVE, 0);
+  fails += Test_SUNLinSolGetType(LS->Convert(),
+                                 SUNLINEARSOLVER_MATRIX_ITERATIVE, 0);
   fails += Test_SUNLinSolInitialize(LS->Convert(), 0);
   fails += Test_SUNLinSolSetup(LS->Convert(), A->Convert(), 0);
-  fails += Test_SUNLinSolSolve(LS->Convert(), A->Convert(), x, b, 1e4 * SUN_UNIT_ROUNDOFF, SUNTRUE, 0);
+  fails += Test_SUNLinSolSolve(LS->Convert(), A->Convert(), x, b,
+                               1e4 * SUN_UNIT_ROUNDOFF, SUNTRUE, 0);
 
   /* Print result */
-  if (fails) {
+  if (fails)
+  {
     std::cerr << "FAIL: SUNLinSol module failed " << fails << " tests\n\n";
   }
-  else {
-    std::cout << "\nSUCCESS: SUNLinSol module passed all tests\n\n";
-  }
+  else { std::cout << "\nSUCCESS: SUNLinSol module passed all tests\n\n"; }
 
   /* Print solve information */
-  std::cout << "Number of linear solver iterations: " << static_cast<long int>(SUNLinSolNumIters(LS->Convert()))
+  std::cout << "Number of linear solver iterations: "
+            << static_cast<long int>(SUNLinSolNumIters(LS->Convert()))
             << std::endl;
-  std::cout << "Final residual norm: " << SUNLinSolResNorm(LS->Convert()) << std::endl;
+  std::cout << "Final residual norm: " << SUNLinSolResNorm(LS->Convert())
+            << std::endl;
+
+  // clear global_exec
+  global_exec = nullptr;
 
   /* Free solver, matrix and vectors */
   N_VDestroy(x);
@@ -552,8 +747,12 @@ int check_vector(N_Vector expected, N_Vector actual, sunrealtype check_tol)
   int failure{0};
 
   /* copy vectors to host */
-  HIP_OR_CUDA(N_VCopyFromDevice_Hip(actual), N_VCopyFromDevice_Cuda(actual));
-  HIP_OR_CUDA(N_VCopyFromDevice_Hip(expected), N_VCopyFromDevice_Cuda(expected));
+  HIP_OR_CUDA_OR_SYCL(N_VCopyFromDevice_Hip(actual),
+                      N_VCopyFromDevice_Cuda(actual),
+                      N_VCopyFromDevice_Sycl(actual));
+  HIP_OR_CUDA_OR_SYCL(N_VCopyFromDevice_Hip(expected),
+                      N_VCopyFromDevice_Cuda(expected),
+                      N_VCopyFromDevice_Sycl(expected));
 
   /* get vector data */
   auto xdata{N_VGetArrayPointer(actual)};
@@ -563,22 +762,27 @@ int check_vector(N_Vector expected, N_Vector actual, sunrealtype check_tol)
   auto xldata{N_VGetLength(actual)};
   auto yldata{N_VGetLength(expected)};
 
-  if (xldata != yldata) {
+  if (xldata != yldata)
+  {
     std::cerr << ">>> ERROR: check_vector: Different data array lengths\n";
     return 1;
   }
 
   /* check vector data */
-  for (sunindextype i = 0; i < xldata; i++) {
+  for (sunindextype i = 0; i < xldata; i++)
+  {
     failure += SUNRCompareTol(xdata[i], ydata[i], check_tol);
   }
 
-  if (failure > ZERO) {
+  if (failure > ZERO)
+  {
     std::cerr << "check_vector failures:\n";
-    for (sunindextype i = 0; i < xldata; i++) {
-      if (SUNRCompareTol(xdata[i], ydata[i], check_tol) != 0) {
-        std::cerr << "  x[" << i << "] = " << xdata[i] << " != " << ydata[i] << " (err = " << abs(xdata[i] - ydata[i])
-                  << ")\n";
+    for (sunindextype i = 0; i < xldata; i++)
+    {
+      if (SUNRCompareTol(xdata[i], ydata[i], check_tol) != 0)
+      {
+        std::cerr << "  x[" << i << "] = " << xdata[i] << " != " << ydata[i]
+                  << " (err = " << abs(xdata[i] - ydata[i]) << ")\n";
       }
     }
   }
@@ -588,5 +792,6 @@ int check_vector(N_Vector expected, N_Vector actual, sunrealtype check_tol)
 
 void sync_device()
 {
-  HIP_OR_CUDA(hipDeviceSynchronize(), cudaDeviceSynchronize());
+  HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(),
+                      global_exec->synchronize());
 }
diff --git a/examples/sunmatrix/ginkgo/CMakeLists.txt b/examples/sunmatrix/ginkgo/CMakeLists.txt
index 29b861e408..09474ac97b 100644
--- a/examples/sunmatrix/ginkgo/CMakeLists.txt
+++ b/examples/sunmatrix/ginkgo/CMakeLists.txt
@@ -28,7 +28,7 @@ include_directories(..)
 
 sundials_add_examples_ginkgo(examples
   TARGETS test_sunmatrix_obj sundials_sunmatrixdense
-  BACKENDS REF OMP CUDA HIP
+  BACKENDS REF OMP CUDA HIP DPCPP
   UNIT_TEST)
 
 # Install the targets
@@ -40,6 +40,9 @@ if(EXAMPLES_INSTALL)
   if(SUNDIALS_GINKGO_BACKENDS MATCHES "HIP")
     list(APPEND vectors nvechip)
   endif()
+  if(SUNDIALS_GINKGO_BACKENDS MATCHES "DPCPP")
+    list(APPEND vectors nvecsycl)
+  endif()
   if(SUNDIALS_GINKGO_BACKENDS MATCHES "OMP")
     list(APPEND vectors nvecopenmp)
   endif()
diff --git a/examples/sunmatrix/ginkgo/test_sunmatrix_ginkgo.cpp b/examples/sunmatrix/ginkgo/test_sunmatrix_ginkgo.cpp
index 6cb291b273..f0e3bc2685 100644
--- a/examples/sunmatrix/ginkgo/test_sunmatrix_ginkgo.cpp
+++ b/examples/sunmatrix/ginkgo/test_sunmatrix_ginkgo.cpp
@@ -25,13 +25,15 @@
 #include "test_sunmatrix.h"
 
 #if defined(USE_HIP)
-#define REF_OR_OMP_OR_HIP_OR_CUDA(a, b, c, d) c
+#define REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(a, b, c, d, e) c
 #elif defined(USE_CUDA)
-#define REF_OR_OMP_OR_HIP_OR_CUDA(a, b, c, d) d
+#define REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(a, b, c, d, e) d
+#elif defined(USE_DPCPP)
+#define REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(a, b, c, d, e) e
 #elif defined(USE_OMP)
-#define REF_OR_OMP_OR_HIP_OR_CUDA(a, b, c, d) b
+#define REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(a, b, c, d, e) b
 #else
-#define REF_OR_OMP_OR_HIP_OR_CUDA(a, b, c, d) a
+#define REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(a, b, c, d, e) a
 #endif
 
 #if defined(USE_CUDA)
@@ -40,6 +42,8 @@
 #include <nvector/nvector_hip.h>
 #elif defined(USE_OMP)
 #include <nvector/nvector_openmp.h>
+#elif defined(USE_DPCPP)
+#include <nvector/nvector_sycl.h>
 #else
 #include <nvector/nvector_serial.h>
 #endif
@@ -92,73 +96,91 @@ int main(int argc, char* argv[])
   /* Create SUNDIALS context before calling any other SUNDIALS function*/
   sundials::Context sunctx;
 
-  auto gko_exec{REF_OR_OMP_OR_HIP_OR_CUDA(gko::ReferenceExecutor::create(), gko::OmpExecutor::create(),
-                                          gko::HipExecutor::create(0, gko::OmpExecutor::create(), true),
-                                          gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true))};
+  auto gko_exec{
+    REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(gko::ReferenceExecutor::create(),
+                                      gko::OmpExecutor::create(),
+                                      gko::HipExecutor::create(0,
+                                                               gko::OmpExecutor::create(),
+                                                               true),
+                                      gko::CudaExecutor::create(0,
+                                                                gko::OmpExecutor::create(),
+                                                                true),
+                                      gko::DpcppExecutor::
+                                        create(0,
+                                               gko::ReferenceExecutor::create()))};
 
   /* check input and set vector length */
-  if (argc < 4) {
-    std::cerr << "ERROR: THREE (3) Input required: matrix rows, matrix cols, format (0 = csr, 1 = dense)\n";
+  if (argc < 4)
+  {
+    std::cerr << "ERROR: THREE (3) Input required: matrix rows, matrix cols, "
+                 "format (0 = csr, 1 = dense)\n";
     return 1;
   }
 
   int argi{0};
 
   auto matrows{static_cast<sunindextype>(atol(argv[++argi]))};
-  if (matrows <= 0) {
+  if (matrows <= 0)
+  {
     std::cerr << "ERROR: number of rows must be a positive integer \n";
     return 1;
   }
 
   auto matcols{static_cast<sunindextype>(atol(argv[++argi]))};
-  if (matcols <= 0) {
+  if (matcols <= 0)
+  {
     std::cerr << "ERROR: number of cols must be a positive integer \n";
     return 1;
   }
 
   auto format{static_cast<int>(atoi(argv[++argi]))};
-  if (format != 0 && format != 1) {
+  if (format != 0 && format != 1)
+  {
     std::cerr << "ERROR: format must be 0 (csr) or 1 (dense) \n";
     return 1;
   }
 
-  if (format == 0) {
-    using_csr_matrix_type = true;
-  }
-  else if (format == 1) {
-    using_dense_matrix_type = true;
-  }
+  if (format == 0) { using_csr_matrix_type = true; }
+  else if (format == 1) { using_dense_matrix_type = true; }
 
 #if defined(USE_OMP)
   int num_threads{1};
   auto omp_num_threads_var{std::getenv("OMP_NUM_THREADS")};
-  if (omp_num_threads_var) {
-    num_threads = std::atoi(omp_num_threads_var);
-  }
+  if (omp_num_threads_var) { num_threads = std::atoi(omp_num_threads_var); }
 #endif
 
   SetTiming(0);
 
   int square{matrows == matcols ? 1 : 0};
-  std::cout << "\n SUNMATRIX_GINKGO test: size " << matrows << " x " << matcols << ", format ";
-  if (using_csr_matrix_type) {
-    std::cout << "csr\n";
-  }
-  else if (using_dense_matrix_type) {
-    std::cout << "dense\n";
-  }
+  std::cout << "\n SUNMATRIX_GINKGO test: size " << matrows << " x " << matcols
+            << ", format ";
+  if (using_csr_matrix_type) { std::cout << "csr\n"; }
+  else if (using_dense_matrix_type) { std::cout << "dense\n"; }
 
   /* Create vectors and matrices */
   std::default_random_engine generator;
-  std::uniform_real_distribution<sunrealtype> distribution{0.0, static_cast<sunrealtype>(matrows)};
-
-  N_Vector x{REF_OR_OMP_OR_HIP_OR_CUDA(N_VNew_Serial(matcols, sunctx), N_VNew_OpenMP(matcols, num_threads, sunctx),
-                                       N_VNew_Hip(matcols, sunctx), N_VNew_Cuda(matcols, sunctx))};
-  N_Vector y{REF_OR_OMP_OR_HIP_OR_CUDA(N_VNew_Serial(matrows, sunctx), N_VNew_OpenMP(matrows, num_threads, sunctx),
-                                       N_VNew_Hip(matrows, sunctx), N_VNew_Cuda(matrows, sunctx))};
+  std::uniform_real_distribution<sunrealtype>
+    distribution{0.0, static_cast<sunrealtype>(matrows)};
+
+  N_Vector x{
+    REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(N_VNew_Serial(matcols, sunctx),
+                                      N_VNew_OpenMP(matcols, num_threads, sunctx),
+                                      N_VNew_Hip(matcols, sunctx),
+                                      N_VNew_Cuda(matcols, sunctx),
+                                      N_VNew_Sycl(matcols, gko_exec->get_queue(),
+                                                  sunctx))};
+  N_Vector y{
+    REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(N_VNew_Serial(matrows, sunctx),
+                                      N_VNew_OpenMP(matrows, num_threads, sunctx),
+                                      N_VNew_Hip(matrows, sunctx),
+                                      N_VNew_Cuda(matrows, sunctx),
+                                      N_VNew_Sycl(matrows, gko_exec->get_queue(),
+                                                  sunctx))};
 
   auto matrix_dim{gko::dim<2>(matrows, matcols)};
-  auto gko_matdata{gko::matrix_data<sunrealtype, sunindextype>(matrix_dim, distribution, generator)};
+  auto gko_matdata{gko::matrix_data<sunrealtype, sunindextype>(matrix_dim,
+                                                               distribution,
+                                                               generator)};
 
   /* Wrap ginkgo matrices for SUNDIALS.
      sundials::ginkgo::Matrix is overloaded to a SUNMatrix. */
@@ -167,84 +189,105 @@ int main(int argc, char* argv[])
   std::unique_ptr<sundials::ConvertibleTo<SUNMatrix>> I;
 
   auto xdata{N_VGetArrayPointer(x)};
-  for (sunindextype i = 0; i < matcols; i++) {
+  for (sunindextype i = 0; i < matcols; i++)
+  {
     xdata[i] = distribution(generator);
   }
-  REF_OR_OMP_OR_HIP_OR_CUDA(, , N_VCopyToDevice_Hip(x), N_VCopyToDevice_Cuda(x));
+  REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(, , N_VCopyToDevice_Hip(x),
+                                    N_VCopyToDevice_Cuda(x),
+                                    N_VCopyToDevice_Sycl(x));
 
   /* Compute true solution */
   SUNMatrix Aref{SUNDenseMatrix(matrows, matcols, sunctx)};
-  if (using_csr_matrix_type) {
+  if (using_csr_matrix_type)
+  {
     auto gko_matrix{GkoCsrMat::create(gko_exec, matrix_dim)};
     gko_matrix->read(gko_matdata);
     auto gko_ident{GkoCsrMat::create(gko_exec, matrix_dim)};
-    if (square) {
-      gko_ident->read(gko::matrix_data<sunrealtype, sunindextype>::diag(matrix_dim, 1.0));
+    if (square)
+    {
+      gko_ident->read(
+        gko::matrix_data<sunrealtype, sunindextype>::diag(matrix_dim, 1.0));
     }
 
     auto Arowptrs{gko_matrix->get_const_row_ptrs()};
     auto Acolidxs{gko_matrix->get_const_col_idxs()};
     auto Avalues{gko_matrix->get_const_values()};
-    for (auto irow = 0; irow < gko_matrix->get_size()[0]; irow++) {
-      for (auto inz = Arowptrs[irow]; inz < Arowptrs[irow + 1]; inz++) {
-        SM_ELEMENT_D(Aref, irow, Acolidxs[inz]) = Avalues[inz];
+    for (auto irow = 0; irow < gko_matrix->get_size()[0]; irow++)
+    {
+      for (auto inz = gko_exec->copy_val_to_host(Arowptrs + irow);
+           inz < gko_exec->copy_val_to_host(Arowptrs + irow + 1); inz++)
+      {
+        SM_ELEMENT_D(Aref, irow, gko_exec->copy_val_to_host(Acolidxs + inz)) =
+          gko_exec->copy_val_to_host(Avalues + inz);
       }
     }
 
     fails += Test_CopyAndMove(gko_matrix->clone(), sunctx);
 
-    A = std::make_unique<sundials::ginkgo::Matrix<GkoCsrMat>>(std::move(gko_matrix), sunctx);
-    I = std::make_unique<sundials::ginkgo::Matrix<GkoCsrMat>>(std::move(gko_ident), sunctx);
+    A = std::make_unique<sundials::ginkgo::Matrix<GkoCsrMat>>(std::move(gko_matrix),
+                                                              sunctx);
+    I = std::make_unique<sundials::ginkgo::Matrix<GkoCsrMat>>(std::move(gko_ident),
+                                                              sunctx);
   }
-  else if (using_dense_matrix_type) {
+  else if (using_dense_matrix_type)
+  {
     auto gko_matrix{GkoDenseMat::create(gko_exec, matrix_dim)};
     gko_matrix->read(gko_matdata);
     auto gko_ident{GkoDenseMat::create(gko_exec, matrix_dim)};
-    if (square) {
-      gko_ident->read(gko::matrix_data<sunrealtype, sunindextype>::diag(matrix_dim, 1.0));
+    if (square)
+    {
+      gko_ident->read(
+        gko::matrix_data<sunrealtype, sunindextype>::diag(matrix_dim, 1.0));
     }
 
-    for (sunindextype j = 0; j < matcols; j++) {
-      for (sunindextype i = 0; i < matrows; i++) {
-        SM_ELEMENT_D(Aref, i, j) = gko_matrix->at(i, j);
+    auto Avalues{gko_matrix->get_const_values()};
+    for (sunindextype j = 0; j < matcols; j++)
+    {
+      for (sunindextype i = 0; i < matrows; i++)
+      {
+        SM_ELEMENT_D(Aref, i, j) =
+          gko_exec->copy_val_to_host(Avalues + i * gko_matrix->get_stride() + j);
       }
     }
 
     fails += Test_CopyAndMove(gko_matrix->clone(), sunctx);
 
-    A = std::make_unique<sundials::ginkgo::Matrix<GkoDenseMat>>(std::move(gko_matrix), sunctx);
-    I = std::make_unique<sundials::ginkgo::Matrix<GkoDenseMat>>(std::move(gko_ident), sunctx);
+    A = std::make_unique<sundials::ginkgo::Matrix<GkoDenseMat>>(std::move(
+                                                                  gko_matrix),
+                                                                sunctx);
+    I = std::make_unique<sundials::ginkgo::Matrix<GkoDenseMat>>(std::move(
+                                                                  gko_ident),
+                                                                sunctx);
   }
   SUNMatMatvec_Dense(Aref, x, y);
   SUNMatDestroy(Aref);
-
-  REF_OR_OMP_OR_HIP_OR_CUDA(, , N_VCopyToDevice_Hip(y), N_VCopyToDevice_Cuda(y));
-
+  REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(, , N_VCopyToDevice_Hip(y),
+                                    N_VCopyToDevice_Cuda(y),
+                                    N_VCopyToDevice_Sycl(y));
   /* SUNMatrix Tests */
   fails += Test_SUNMatGetID(*A, SUNMATRIX_GINKGO, 0);
   fails += Test_SUNMatClone(*A, 0);
   fails += Test_SUNMatCopy(*A, 0);
   fails += Test_SUNMatZero(*A, 0);
-  if (square) {
+  if (square)
+  {
 #if !defined(USE_OMP)
     // TODO(CJB): ScaleAdd with a dense matrix is not supported in develop
     // branch, possibly supported on the batch-develop branch. CSR matrix with
     // OMP executor currently fails on the develop branch.
-    if (!using_dense_matrix_type) {
-      fails += Test_SUNMatScaleAdd(*A, *I, 0);
-    }
+    if (!using_dense_matrix_type) { fails += Test_SUNMatScaleAdd(*A, *I, 0); }
 #endif
     fails += Test_SUNMatScaleAddI(*A, *I, 0);
   }
   fails += Test_SUNMatMatvec(*A, x, y, 0);
 
   /* Print result */
-  if (fails) {
+  if (fails)
+  {
     std::cerr << " FAIL: SUNMatrix module failed " << fails << " tests \n\n";
   }
-  else {
-    std::cout << " SUCCESS: SUNMatrix module passed all tests \n\n";
-  }
+  else { std::cout << " SUCCESS: SUNMatrix module passed all tests \n\n"; }
 
   /* Free vectors */
   N_VDestroy(x);
@@ -259,24 +302,31 @@ int main(int argc, char* argv[])
 int check_matrix_csr(SUNMatrix A, SUNMatrix B, realtype tol)
 {
   int failure{0};
-  auto Amat{static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
-  auto Bmat{static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(B->content)->GkoMtx()};
-  auto Arowptrs{Amat->get_const_row_ptrs()};
-  auto Acolidxs{Amat->get_const_col_idxs()};
-  auto Avalues{Amat->get_const_values()};
-  auto Browptrs{Bmat->get_const_row_ptrs()};
-  auto Bcolidxs{Bmat->get_const_col_idxs()};
-  auto Bvalues{Bmat->get_const_values()};
+  auto Amat{
+    static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
+  auto Bmat{
+    static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(B->content)->GkoMtx()};
+  auto Amat_ref = Amat->clone(Amat->get_executor()->get_master());
+  auto Bmat_ref = Bmat->clone(Bmat->get_executor()->get_master());
+  auto Arowptrs{Amat_ref->get_const_row_ptrs()};
+  auto Acolidxs{Amat_ref->get_const_col_idxs()};
+  auto Avalues{Amat_ref->get_const_values()};
+  auto Browptrs{Bmat_ref->get_const_row_ptrs()};
+  auto Bcolidxs{Bmat_ref->get_const_col_idxs()};
+  auto Bvalues{Bmat_ref->get_const_values()};
 
   /* check lengths */
-  if (Amat->get_size() != Bmat->get_size()) {
+  if (Amat_ref->get_size() != Bmat_ref->get_size())
+  {
     std::cerr << ">>> ERROR: check_matrix: Different data array lengths \n";
     return 1;
   }
 
   /* compare data */
-  for (sunindextype irow = 0; irow < Amat->get_size()[0]; irow++) {
-    for (sunindextype inz = Arowptrs[irow]; inz < Arowptrs[irow + 1]; inz++) {
+  for (sunindextype irow = 0; irow < Amat_ref->get_size()[0]; irow++)
+  {
+    for (sunindextype inz = Arowptrs[irow]; inz < Arowptrs[irow + 1]; inz++)
+    {
       failure += SUNRCompareTol(Avalues[inz], Bvalues[inz], tol);
     }
   }
@@ -287,21 +337,28 @@ int check_matrix_csr(SUNMatrix A, SUNMatrix B, realtype tol)
 int check_matrix_dense(SUNMatrix A, SUNMatrix B, realtype tol)
 {
   int failure{0};
-  auto Amat{static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
-  auto Bmat{static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(B->content)->GkoMtx()};
+  auto Amat{
+    static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
+  auto Bmat{
+    static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(B->content)->GkoMtx()};
+  auto Amat_ref = Amat->clone(Amat->get_executor()->get_master());
+  auto Bmat_ref = Bmat->clone(Bmat->get_executor()->get_master());
   auto rows{Amat->get_size()[0]};
   auto cols{Amat->get_size()[1]};
 
   /* check lengths */
-  if (Amat->get_size() != Bmat->get_size()) {
+  if (Amat->get_size() != Bmat->get_size())
+  {
     std::cerr << ">>> ERROR: check_matrix: Different data array lengths \n";
     return 1;
   }
 
   /* compare data */
-  for (sunindextype i = 0; i < rows; i++) {
-    for (sunindextype j = 0; j < cols; j++) {
-      failure += SUNRCompareTol(Amat->at(i, j), Bmat->at(i, j), tol);
+  for (sunindextype i = 0; i < rows; i++)
+  {
+    for (sunindextype j = 0; j < cols; j++)
+    {
+      failure += SUNRCompareTol(Amat_ref->at(i, j), Bmat_ref->at(i, j), tol);
     }
   }
 
@@ -318,17 +375,23 @@ extern "C" int check_matrix(SUNMatrix A, SUNMatrix B, realtype tol)
 int check_matrix_entry_csr(SUNMatrix A, realtype val, realtype tol)
 {
   int failure{0};
-  auto Amat{static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
-  auto Arowptrs{Amat->get_const_row_ptrs()};
-  auto Acolidxs{Amat->get_const_col_idxs()};
-  auto Avalues{Amat->get_const_values()};
+  auto Amat{
+    static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
+  auto Amat_ref = Amat->clone(Amat->get_executor()->get_master());
+  auto Arowptrs{Amat_ref->get_const_row_ptrs()};
+  auto Acolidxs{Amat_ref->get_const_col_idxs()};
+  auto Avalues{Amat_ref->get_const_values()};
 
   /* compare data */
-  for (sunindextype irow = 0; irow < Amat->get_size()[0]; irow++) {
-    for (sunindextype inz = Arowptrs[irow]; inz < Arowptrs[irow + 1]; inz++) {
+  for (sunindextype irow = 0; irow < Amat_ref->get_size()[0]; irow++)
+  {
+    for (sunindextype inz = Arowptrs[irow]; inz < Arowptrs[irow + 1]; inz++)
+    {
       int check = SUNRCompareTol(Avalues[inz], val, tol);
-      if (check) {
-        std::cerr << "  actual = " << Avalues[inz] << " != " << val << " (err = " << SUNRabs(Avalues[inz] - val) << ")\n";
+      if (check)
+      {
+        std::cerr << "  actual = " << Avalues[inz] << " != " << val
+                  << " (err = " << SUNRabs(Avalues[inz] - val) << ")\n";
         failure += check;
       }
     }
@@ -340,17 +403,23 @@ int check_matrix_entry_csr(SUNMatrix A, realtype val, realtype tol)
 int check_matrix_entry_dense(SUNMatrix A, realtype val, realtype tol)
 {
   int failure{0};
-  auto Amat{static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
+  auto Amat{
+    static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
   auto rows{Amat->get_size()[0]};
   auto cols{Amat->get_size()[1]};
 
+  auto Amat_ref = Amat->clone(Amat->get_executor()->get_master());
   /* compare data */
-  for (sunindextype i = 0; i < rows; i++) {
-    for (sunindextype j = 0; j < cols; j++) {
-      int check = SUNRCompareTol(Amat->at(i, j), val, tol);
-      if (check) {
-        std::cerr << "  actual[" << i << "," << j << "] = " << Amat->at(i, j) << " != " << val
-                  << " (err = " << SUNRabs(Amat->at(i, j) - val) << ")\n";
+  for (sunindextype i = 0; i < rows; i++)
+  {
+    for (sunindextype j = 0; j < cols; j++)
+    {
+      int check = SUNRCompareTol(Amat_ref->at(i, j), val, tol);
+      if (check)
+      {
+        std::cerr << "  actual[" << i << "," << j
+                  << "] = " << Amat_ref->at(i, j) << " != " << val
+                  << " (err = " << SUNRabs(Amat_ref->at(i, j) - val) << ")\n";
         failure += check;
       }
     }
@@ -361,15 +430,12 @@ int check_matrix_entry_dense(SUNMatrix A, realtype val, realtype tol)
 
 extern "C" int check_matrix_entry(SUNMatrix A, realtype val, realtype tol)
 {
-  if (using_csr_matrix_type) {
-    return check_matrix_entry_csr(A, val, tol);
-  }
-  else if (using_dense_matrix_type) {
+  if (using_csr_matrix_type) { return check_matrix_entry_csr(A, val, tol); }
+  else if (using_dense_matrix_type)
+  {
     return check_matrix_entry_dense(A, val, tol);
   }
-  else {
-    return 1;
-  }
+  else { return 1; }
 }
 
 extern "C" int check_vector(N_Vector expected, N_Vector computed, realtype tol)
@@ -377,8 +443,12 @@ extern "C" int check_vector(N_Vector expected, N_Vector computed, realtype tol)
   int failure{0};
 
   /* copy vectors to host */
-  REF_OR_OMP_OR_HIP_OR_CUDA(, , N_VCopyFromDevice_Hip(computed), N_VCopyFromDevice_Cuda(computed));
-  REF_OR_OMP_OR_HIP_OR_CUDA(, , N_VCopyFromDevice_Hip(expected), N_VCopyFromDevice_Cuda(expected));
+  REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(, , N_VCopyFromDevice_Hip(computed),
+                                    N_VCopyFromDevice_Cuda(computed),
+                                    N_VCopyFromDevice_Sycl(computed));
+  REF_OR_OMP_OR_HIP_OR_CUDA_OR_SYCL(, , N_VCopyFromDevice_Hip(expected),
+                                    N_VCopyFromDevice_Cuda(expected),
+                                    N_VCopyFromDevice_Sycl(expected));
 
   /* get vector data */
   auto xdata{N_VGetArrayPointer(computed)};
@@ -388,19 +458,23 @@ extern "C" int check_vector(N_Vector expected, N_Vector computed, realtype tol)
   auto xldata{N_VGetLength(computed)};
   auto yldata{N_VGetLength(expected)};
 
-  if (xldata != yldata) {
+  if (xldata != yldata)
+  {
     std::cerr << "ERROR check_vector: different vector lengths\n";
     return 1;
   }
 
   /* check vector data */
-  for (sunindextype i = 0; i < xldata; i++) failure += SUNRCompareTol(xdata[i], ydata[i], tol);
+  for (sunindextype i = 0; i < xldata; i++)
+    failure += SUNRCompareTol(xdata[i], ydata[i], tol);
 
-  if (failure > ZERO) {
+  if (failure > ZERO)
+  {
     std::cerr << "Check_vector failures:\n";
     for (sunindextype i = 0; i < xldata; i++)
       if (SUNRCompareTol(xdata[i], ydata[i], tol) != 0)
-        std::cerr << "  computed[" << i << "] = " << xdata[i] << " != " << ydata[i]
+        std::cerr << "  computed[" << i << "] = " << xdata[i]
+                  << " != " << ydata[i]
                   << " (err = " << SUNRabs(xdata[i] - ydata[i]) << ")\n";
   }
 
@@ -409,35 +483,52 @@ extern "C" int check_vector(N_Vector expected, N_Vector computed, realtype tol)
 
 extern "C" booleantype has_data(SUNMatrix A)
 {
-  if (using_csr_matrix_type) {
-    auto Amat{static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
-    return !(Amat->get_values() == nullptr || Amat->get_size()[0] == 0 || Amat->get_size()[1] == 0);
-  }
-  else if (using_dense_matrix_type) {
-    auto Amat{static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
-    return !(Amat->get_values() == nullptr || Amat->get_size()[0] == 0 || Amat->get_size()[1] == 0);
-  }
-  else {
-    return SUNFALSE;
-  }
+  if (using_csr_matrix_type)
+  {
+    auto Amat{
+      static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
+    return !(Amat->get_values() == nullptr || Amat->get_size()[0] == 0 ||
+             Amat->get_size()[1] == 0);
+  }
+  else if (using_dense_matrix_type)
+  {
+    auto Amat{
+      static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
+    return !(Amat->get_values() == nullptr || Amat->get_size()[0] == 0 ||
+             Amat->get_size()[1] == 0);
+  }
+  else { return SUNFALSE; }
 }
 
 extern "C" booleantype is_square(SUNMatrix A)
 {
-  if (using_csr_matrix_type) {
-    auto Amat{static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
+  if (using_csr_matrix_type)
+  {
+    auto Amat{
+      static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)->GkoMtx()};
     return Amat->get_size()[0] == Amat->get_size()[1];
   }
-  else if (using_dense_matrix_type) {
-    auto Amat{static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
+  else if (using_dense_matrix_type)
+  {
+    auto Amat{
+      static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)->GkoMtx()};
     return Amat->get_size()[0] == Amat->get_size()[1];
   }
-  else {
-    return SUNTRUE;
-  }
+  else { return SUNTRUE; }
 }
 
 extern "C" void sync_device(SUNMatrix A)
 {
-  REF_OR_OMP_OR_HIP_OR_CUDA(, , hipDeviceSynchronize(), cudaDeviceSynchronize());
-}
+  if (using_csr_matrix_type)
+  {
+    static_cast<sundials::ginkgo::Matrix<GkoCsrMat>*>(A->content)
+      ->GkoExec()
+      ->synchronize();
+  }
+  else if (using_dense_matrix_type)
+  {
+    static_cast<sundials::ginkgo::Matrix<GkoDenseMat>*>(A->content)
+      ->GkoExec()
+      ->synchronize();
+  }
+}
\ No newline at end of file