diff --git a/.github/workflows/c-fortran-test-linux-osx.yml b/.github/workflows/c-fortran-test-linux-osx.yml
index 87d62af5c4..d7433397f1 100644
--- a/.github/workflows/c-fortran-test-linux-osx.yml
+++ b/.github/workflows/c-fortran-test-linux-osx.yml
@@ -20,6 +20,7 @@ jobs:
         CC: ${{ matrix.compiler }}
         FC: gfortran-9
       run: |
+        make -v
         make info
         make -j2
         PROVE_OPTS=-v make prove -j2
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7cc9afbd4b..18ae891f7c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -55,7 +55,7 @@ noether-cpu:
     - export COVERAGE=0
     - cd .. && export NEK5K_VERSION=Nek5000-19.0 && { [[ -d $NEK5K_VERSION ]] || { git clone --depth 1 --branch v19.0 https://github.com/Nek5000/Nek5000.git $NEK5K_VERSION && cd $NEK5K_VERSION/tools && ./maketools genbox genmap reatore2 && cd ../..; }; } && export NEK5K_DIR=$PWD/$NEK5K_VERSION && export PATH=$NEK5K_DIR/bin:$PATH MPI=0 && cd libCEED
     - echo "-------------- Nek5000 -------------" && git -C $NEK5K_DIR describe --tags
-    - make -k -j$NPROC_CPU BACKENDS="$BACKENDS_CPU" JUNIT_BATCH="cpu" junit search=nek
+    - make -k -j$NPROC_CPU BACKENDS="$BACKENDS_CPU" JUNIT_BATCH="cpu" junit search=nek NEK5K_DIR=$NEK5K_DIR
 # Clang-tidy
     - echo "-------------- clang-tidy ----------" && clang-tidy --version
     - TIDY_OPTS="-fix-errors" make -j$NPROC_CPU tidy && git diff --exit-code
@@ -123,7 +123,7 @@ noether-rocm:
     - export COVERAGE=0
     - cd .. && export NEK5K_VERSION=Nek5000-19.0 && { [[ -d $NEK5K_VERSION ]] || { git clone --depth 1 --branch v19.0 https://github.com/Nek5000/Nek5000.git $NEK5K_VERSION && cd $NEK5K_VERSION/tools && ./maketools genbox genmap reatore2 && cd ../..; }; } && export NEK5K_DIR=$PWD/$NEK5K_VERSION && export PATH=$NEK5K_DIR/bin:$PATH MPI=0 && cd libCEED
     - echo "-------------- Nek5000 -------------" && git -C $NEK5K_DIR describe --tags
-    - make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=nek
+    - make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=nek NEK5K_DIR=$NEK5K_DIR
 # Clang-tidy
     - echo "-------------- clang-tidy ----------" && clang-tidy --version
     - TIDY_OPTS="-fix-errors" make -j$NPROC_CPU tidy && git diff --exit-code
@@ -240,8 +240,7 @@ lv-cuda:
 # -- PETSc with CUDA (minimal)
     - export PETSC_DIR=/home/jeth8984/petsc PETSC_ARCH=cuda-O && git -C $PETSC_DIR describe
     - echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
-#     Note: Skipping fluids and solids due to CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES on RTX 2080 Super
-    - nice make -k -j$NPROC_GPU JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc"
+    - nice make -k -j$NPROC_GPU JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc fluids solids"
 # Report status
     - touch .SUCCESS
   after_script:
diff --git a/CITATION.cff b/CITATION.cff
index 9324c1ead8..bdbf150421 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,6 +1,6 @@
 cff-version: 1.2.0
 title: "libCEED: Efficient Extensible Discretization"
-version: 0.10.0
+version: 0.10.1
 date-released: 2021-07-07
 license:  BSD-2-Clause
 message: "Please cite the following works when using this software."
diff --git a/Doxyfile b/Doxyfile
index 839bde80d3..8f0be2cdea 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = libCEED
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = v0.10.0
+PROJECT_NUMBER         = v0.10.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/Makefile b/Makefile
index 102f4edab2..54c997698c 100644
--- a/Makefile
+++ b/Makefile
@@ -151,7 +151,9 @@ CEED_LDFLAGS += $(if $(ASAN),$(AFLAGS))
 CPPFLAGS += -I./include
 CEED_LDLIBS = -lm
 OBJDIR := build
-LIBDIR := lib
+for_install := $(filter install,$(MAKECMDGOALS))
+LIBDIR := $(if $(for_install),$(OBJDIR),lib)
+
 
 # Installation variables
 prefix ?= /usr/local
@@ -180,7 +182,7 @@ libceed.so := $(LIBDIR)/libceed.$(SO_EXT)
 libceed.a := $(LIBDIR)/libceed.a
 libceed := $(if $(STATIC),$(libceed.a),$(libceed.so))
 CEED_LIBS = -lceed
-libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c, $(wildcard interface/ceed*.c backends/*.c gallery/*.c))
+libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c interface/ceed-jit-source-root-$(if $(for_install),default,install).c, $(wildcard interface/ceed*.c backends/*.c gallery/*.c))
 gallery.c := $(wildcard gallery/*/ceed*.c)
 libceed.c += $(gallery.c)
 libceeds = $(libceed)
@@ -624,9 +626,14 @@ $(OBJDIR)/ceed.pc : pkgconfig-prefix = $(prefix)
 	    -e "s:%prefix%:$(pkgconfig-prefix):" \
 	    -e "s:%libs_private%:$(pkgconfig-libs-private):" $< > $@
 
+$(OBJDIR)/interface/ceed-jit-source-root-default.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
+$(OBJDIR)/interface/ceed-jit-source-root-install.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath $(includedir))/\""
+
 install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL) -d $(addprefix $(if $(DESTDIR),"$(DESTDIR)"),"$(includedir)"\
-	  "$(includedir)/ceed/" "$(libdir)" "$(pkgconfigdir)")
+	  "$(includedir)/ceed/" "$(includedir)/ceed/jit-source/"\
+	  "$(includedir)/ceed/jit-source/cuda/" "$(includedir)/ceed/jit-source/hip/"\
+	  "$(includedir)/ceed/jit-source/gallery/" "$(libdir)" "$(pkgconfigdir)")
 	$(INSTALL_DATA) include/ceed/ceed.h "$(DESTDIR)$(includedir)/ceed/"
 	$(INSTALL_DATA) include/ceed/ceed-f32.h "$(DESTDIR)$(includedir)/ceed/"
 	$(INSTALL_DATA) include/ceed/ceed-f64.h "$(DESTDIR)$(includedir)/ceed/"
@@ -640,6 +647,9 @@ install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL_DATA) $(OBJDIR)/ceed.pc "$(DESTDIR)$(pkgconfigdir)/"
 	$(INSTALL_DATA) include/ceed.h "$(DESTDIR)$(includedir)/"
 	$(INSTALL_DATA) include/ceedf.h "$(DESTDIR)$(includedir)/"
+	$(INSTALL_DATA) $(wildcard include/ceed/jit-source/cuda/*.h) "$(DESTDIR)$(includedir)/ceed/jit-source/cuda/"
+	$(INSTALL_DATA) $(wildcard include/ceed/jit-source/hip/*.h) "$(DESTDIR)$(includedir)/ceed/jit-source/hip/"
+	$(INSTALL_DATA) $(wildcard include/ceed/jit-source/gallery/*.h) "$(DESTDIR)$(includedir)/ceed/jit-source/gallery/"
 
 .PHONY : all cln clean doxygen doc lib install par print test tst prove prv prove-all junit examples style style-c style-py tidy iwyu info info-backends info-backends-all
 
@@ -677,7 +687,7 @@ style : style-c style-py
 CLANG_TIDY ?= clang-tidy
 
 %.c.tidy : %.c
-	$(CLANG_TIDY) $(TIDY_OPTS) $^ -- $(CPPFLAGS) --std=c99 -I$(CUDA_DIR)/include -I$(HIP_DIR)/include
+	$(CLANG_TIDY) $(TIDY_OPTS) $^ -- $(CPPFLAGS) --std=c99 -I$(CUDA_DIR)/include -I$(HIP_DIR)/include -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
 
 %.cpp.tidy : %.cpp
 	$(CLANG_TIDY) $(TIDY_OPTS) $^ -- $(CPPFLAGS) --std=c++11 -I$(CUDA_DIR)/include -I$(OCCA_DIR)/include -I$(HIP_DIR)/include
diff --git a/README.md b/README.md
index e479a25b42..fee7ed6c0d 100644
--- a/README.md
+++ b/README.md
@@ -375,7 +375,7 @@ For more details about the benchmarks, see the `benchmarks/README.md` file.
 To install libCEED, run:
 
 ```
-make install prefix=/usr/local
+make install prefix=/path/to/install/dir
 ```
 
 or (e.g., if creating packages):
@@ -384,6 +384,13 @@ or (e.g., if creating packages):
 make install prefix=/usr DESTDIR=/packaging/path
 ```
 
+To build and install in separate steps, run:
+
+```
+make for_install=1 prefix=/path/to/install/dir
+make install prefix=/path/to/install/dir
+```
+
 The usual variables like `CC` and `CFLAGS` are used, and optimization flags
 for all languages can be set using the likes of `OPT='-O3 -march=native'`. Use
 `STATIC=1` to build static libraries (`libceed.a`).
diff --git a/backends/cuda-ref/ceed-cuda-ref-basis.c b/backends/cuda-ref/ceed-cuda-ref-basis.c
index dbe1221559..d9343a7839 100644
--- a/backends/cuda-ref/ceed-cuda-ref-basis.c
+++ b/backends/cuda-ref/ceed-cuda-ref-basis.c
@@ -266,8 +266,9 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-basis-tensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-basis-tensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
@@ -335,8 +336,9 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-basis-nontensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c
index cbe2f812d8..b0691a3662 100644
--- a/backends/cuda-ref/ceed-cuda-ref-operator.c
+++ b/backends/cuda-ref/ceed-cuda-ref-operator.c
@@ -7,6 +7,7 @@
 
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
+#include <ceed/jit-tools.h>
 #include <assert.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -719,149 +720,6 @@ static int CeedOperatorLinearAssembleQFunctionUpdate_Cuda(CeedOperator op,
          &rstr, request);
 }
 
-//------------------------------------------------------------------------------
-// Diagonal assembly kernels
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *diagonalkernels = QUOTE(
-
-typedef enum {
-  /// Perform no evaluation (either because there is no data or it is already at
-  /// quadrature points)
-  CEED_EVAL_NONE   = 0,
-  /// Interpolate from nodes to quadrature points
-  CEED_EVAL_INTERP = 1,
-  /// Evaluate gradients at quadrature points from input in a nodal basis
-  CEED_EVAL_GRAD   = 2,
-  /// Evaluate divergence at quadrature points from input in a nodal basis
-  CEED_EVAL_DIV    = 4,
-  /// Evaluate curl at quadrature points from input in a nodal basis
-  CEED_EVAL_CURL   = 8,
-  /// Using no input, evaluate quadrature weights on the reference element
-  CEED_EVAL_WEIGHT = 16,
-} CeedEvalMode;
-
-//------------------------------------------------------------------------------
-// Get Basis Emode Pointer
-//------------------------------------------------------------------------------
-extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr,
-    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
-    const CeedScalar *grad) {
-  switch (emode) {
-  case CEED_EVAL_NONE:
-    *basisptr = identity;
-    break;
-  case CEED_EVAL_INTERP:
-    *basisptr = interp;
-    break;
-  case CEED_EVAL_GRAD:
-    *basisptr = grad;
-    break;
-  case CEED_EVAL_WEIGHT:
-  case CEED_EVAL_DIV:
-  case CEED_EVAL_CURL:
-    break; // Caught by QF Assembly
-  }
-}
-
-//------------------------------------------------------------------------------
-// Core code for diagonal assembly
-//------------------------------------------------------------------------------
-__device__ void diagonalCore(const CeedInt nelem,
-    const CeedScalar maxnorm, const bool pointBlock,
-    const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  const int tid = threadIdx.x; // running with P threads, tid is evec node
-  const CeedScalar qfvaluebound = maxnorm*1e-12;
-
-  // Compute the diagonal of B^T D B
-  // Each element
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
-       e += gridDim.x*blockDim.z) {
-    CeedInt dout = -1;
-    // Each basis eval mode pair
-    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
-      const CeedScalar *bt = NULL;
-      if (emodeout[eout] == CEED_EVAL_GRAD)
-        dout += 1;
-      CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout,
-                                      &gradout[dout*NQPTS*NNODES]);
-      CeedInt din = -1;
-      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
-        const CeedScalar *b = NULL;
-        if (emodein[ein] == CEED_EVAL_GRAD)
-          din += 1;
-        CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin,
-                                        &gradin[din*NQPTS*NNODES]);
-        // Each component
-        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
-          // Each qpoint/node pair
-          if (pointBlock) {
-            // Point Block Diagonal
-            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
-              CeedScalar evalue = 0.;
-              for (CeedInt q = 0; q < NQPTS; q++) {
-                const CeedScalar qfvalue =
-                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
-                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
-                if (abs(qfvalue) > qfvaluebound)
-                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-              }
-              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
-            }
-          } else {
-            // Diagonal Only
-            CeedScalar evalue = 0.;
-            for (CeedInt q = 0; q < NQPTS; q++) {
-              const CeedScalar qfvalue =
-                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
-                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
-              if (abs(qfvalue) > qfvaluebound)
-                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-            }
-            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
-          }
-        }
-      }
-    }
-  }
-}
-
-//------------------------------------------------------------------------------
-// Linear diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-//------------------------------------------------------------------------------
-// Linear point block diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Create point block restriction
 //------------------------------------------------------------------------------
@@ -1027,11 +885,21 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op,
   diag->numemodeout = numemodeout;
 
   // Assemble kernel
+  char *diagonal_kernel_path, *diagonal_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h",
+                                &diagonal_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Diagonal Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, diagonal_kernel_path,
+                                &diagonal_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2,
+               "----- Loading Diagonal Assembly Source Complete! -----\n");
   CeedInt nnodes, nqpts;
   ierr = CeedBasisGetNumNodes(basisin, &nnodes); CeedChkBackend(ierr);
   ierr = CeedBasisGetNumQuadraturePoints(basisin, &nqpts); CeedChkBackend(ierr);
   diag->nnodes = nnodes;
-  ierr = CeedCompileCuda(ceed, diagonalkernels, &diag->module, 5,
+  ierr = CeedCompileCuda(ceed, diagonal_kernel_source, &diag->module, 5,
                          "NUMEMODEIN", numemodein,
                          "NUMEMODEOUT", numemodeout,
                          "NNODES", nnodes,
@@ -1043,6 +911,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op,
   ierr = CeedGetKernelCuda(ceed, diag->module, "linearPointBlockDiagonal",
                            &diag->linearPointBlock);
   CeedChk_Cu(ceed, ierr);
+  ierr = CeedFree(&diagonal_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&diagonal_kernel_source); CeedChkBackend(ierr);
 
   // Basis matrices
   const CeedInt qBytes = nqpts * sizeof(CeedScalar);
@@ -1246,119 +1116,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda(CeedOperator op,
   }
 }
 
-//------------------------------------------------------------------------------
-// Matrix assembly kernel for low-order elements (2D thread block)
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *assemblykernel = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int i = threadIdx.x; // The output row index of each B^TDB operation 
-  const int l = threadIdx.y; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-  // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        CeedScalar result = 0.0;
-        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-          CeedInt b_in_index = emode_in * NQPTS * NNODES;
-      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-             CeedInt b_out_index = emode_out * NQPTS * NNODES;
-             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
- 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-            for (CeedInt j = 0; j < NQPTS; j++) {
-     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-	    }
-
-          }// end of emode_out 
-        } // end of emode_in
-        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-   	values_array[val_index] = result;
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-
-//------------------------------------------------------------------------------
-// Fallback kernel for larger orders (1D thread block)
-//------------------------------------------------------------------------------
-static const char *assemblykernelbigelem = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int l = threadIdx.x; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-    // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        for (CeedInt i = 0; i < NNODES; i++) {
-          CeedScalar result = 0.0;
-          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-            CeedInt b_in_index = emode_in * NQPTS * NNODES;
-        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-               CeedInt b_out_index = emode_out * NQPTS * NNODES;
-               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
-   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-              for (CeedInt j = 0; j < NQPTS; j++) {
-       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-  	    }
-
-            }// end of emode_out 
-          } // end of emode_in
-          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-     	  values_array[val_index] = result;
-        } // end of loop over element node index, i
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Single operator assembly setup
 //------------------------------------------------------------------------------
@@ -1482,35 +1239,39 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op) {
   CeedInt block_size = esize * esize * elemsPerBlock;
   Ceed_Cuda *cuda_data;
   ierr = CeedGetData(ceed, &cuda_data); CeedChkBackend(ierr);
-  if (block_size > cuda_data->device_prop.maxThreadsPerBlock) {
+  char *assembly_kernel_path, *assembly_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-operator-assemble.h",
+                                &assembly_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, assembly_kernel_path,
+                                &assembly_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Source Complete! -----\n");
+  bool fallback = block_size > cuda_data->device_prop.maxThreadsPerBlock;
+  if (fallback) {
     // Use fallback kernel with 1D threadblock
     block_size = esize * elemsPerBlock;
     asmb->block_size_x = esize;
     asmb->block_size_y = 1;
-    ierr = CeedCompileCuda(ceed, assemblykernelbigelem, &asmb->module, 7,
-                           "NELEM", nelem,
-                           "NUMEMODEIN", num_emode_in,
-                           "NUMEMODEOUT", num_emode_out,
-                           "NQPTS", nqpts,
-                           "NNODES", esize,
-                           "BLOCK_SIZE", block_size,
-                           "NCOMP", ncomp
-                          ); CeedChk_Cu(ceed, ierr);
   } else {  // Use kernel with 2D threadblock
     asmb->block_size_x = esize;
     asmb->block_size_y = esize;
-    ierr = CeedCompileCuda(ceed, assemblykernel, &asmb->module, 7,
-                           "NELEM", nelem,
-                           "NUMEMODEIN", num_emode_in,
-                           "NUMEMODEOUT", num_emode_out,
-                           "NQPTS", nqpts,
-                           "NNODES", esize,
-                           "BLOCK_SIZE", block_size,
-                           "NCOMP", ncomp
-                          ); CeedChk_Cu(ceed, ierr);
   }
-  ierr = CeedGetKernelCuda(ceed, asmb->module, "linearAssemble",
+  ierr = CeedCompileCuda(ceed, assembly_kernel_source, &asmb->module, 7,
+                         "NELEM", nelem,
+                         "NUMEMODEIN", num_emode_in,
+                         "NUMEMODEOUT", num_emode_out,
+                         "NQPTS", nqpts,
+                         "NNODES", esize,
+                         "BLOCK_SIZE", block_size,
+                         "NCOMP", ncomp
+                        ); CeedChk_Cu(ceed, ierr);
+  ierr = CeedGetKernelCuda(ceed, asmb->module,
+                           fallback ? "linearAssembleFallback" : "linearAssemble",
                            &asmb->linearAssemble); CeedChk_Cu(ceed, ierr);
+  ierr = CeedFree(&assembly_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&assembly_kernel_source); CeedChkBackend(ierr);
 
   // Build 'full' B matrices (not 1D arrays used for tensor-product matrices)
   const CeedScalar *interp_in, *grad_in;
diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
index eb00f21c6c..aee2038a34 100644
--- a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
+++ b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
@@ -45,8 +45,8 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-qfunction.h",
-                             &read_write_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-qfunction.h",
+                                &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c b/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c
index 2a9a584b26..0bb14e0089 100644
--- a/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c
+++ b/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c
@@ -72,7 +72,7 @@ static inline int CeedQFunctionContextSyncD2H_Cuda(
   } else if (impl->h_data_owned) {
     impl->h_data = impl->h_data_owned;
   } else {
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
     CeedChkBackend(ierr);
     impl->h_data = impl->h_data_owned;
   }
@@ -184,7 +184,8 @@ static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx,
   case CEED_COPY_VALUES: {
     size_t ctxsize;
     ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned); CeedChkBackend(ierr);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
+    CeedChkBackend(ierr);
     impl->h_data_borrowed = NULL;
     impl->h_data = impl->h_data_owned;
     memcpy(impl->h_data, data, ctxsize);
diff --git a/backends/cuda-ref/ceed-cuda-restriction.c b/backends/cuda-ref/ceed-cuda-restriction.c
index 8bcd779949..57b92e36c9 100644
--- a/backends/cuda-ref/ceed-cuda-restriction.c
+++ b/backends/cuda-ref/ceed-cuda-restriction.c
@@ -341,8 +341,9 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType m_type, CeedCopyMode copy_mode,
   // Compile CUDA kernels
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-restriction.h",
-                             &restriction_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-restriction.h",
+                                &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
                                 &restriction_kernel_source);
diff --git a/backends/cuda-ref/ceed-cuda-vector.c b/backends/cuda-ref/ceed-cuda-vector.c
index 3e605abb68..2ec324c2ff 100644
--- a/backends/cuda-ref/ceed-cuda-vector.c
+++ b/backends/cuda-ref/ceed-cuda-vector.c
@@ -13,6 +13,30 @@
 #include <string.h>
 #include "ceed-cuda-ref.h"
 
+
+//------------------------------------------------------------------------------
+// Check if host/device sync is needed
+//------------------------------------------------------------------------------
+static inline int CeedVectorNeedSync_Cuda(const CeedVector vec,
+    CeedMemType mem_type, bool *need_sync) {
+  int ierr;
+  CeedVector_Cuda *impl;
+  ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
+
+  bool has_valid_array = false;
+  ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr);
+  switch (mem_type) {
+  case CEED_MEM_HOST:
+    *need_sync = has_valid_array && !impl->h_array;
+    break;
+  case CEED_MEM_DEVICE:
+    *need_sync = has_valid_array && !impl->d_array;
+    break;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 //------------------------------------------------------------------------------
 // Sync host to device
 //------------------------------------------------------------------------------
@@ -88,12 +112,20 @@ static inline int CeedVectorSyncD2H_Cuda(const CeedVector vec) {
 //------------------------------------------------------------------------------
 // Sync arrays
 //------------------------------------------------------------------------------
-static inline int CeedVectorSync_Cuda(const CeedVector vec,
-                                      CeedScalarType prec,
-                                      CeedMemType mem_type) {
+static int CeedVectorSyncArrayGeneric_Cuda(const CeedVector vec,
+                                           CeedMemType mem_type,
+					   CeedScalarType prec) {
+  int ierr;
+  // Check whether device/host sync is needed
+  bool need_sync = false;
+  ierr = CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync);
+  CeedChkBackend(ierr);
+  if (!need_sync)
+    return CEED_ERROR_SUCCESS;
+
   switch (mem_type) {
-  case CEED_MEM_HOST: return CeedVectorSyncD2H_Cuda(vec);
-  case CEED_MEM_DEVICE: return CeedVectorSyncH2D_Cuda(vec);
+  case CEED_MEM_HOST: return CeedVectorSyncD2H_Cuda(vec, prec);
+  case CEED_MEM_DEVICE: return CeedVectorSyncH2D_Cuda(vec, prec);
   }
   return CEED_ERROR_UNSUPPORTED;
 }
@@ -169,29 +201,6 @@ static inline int CeedVectorHasBorrowedArrayOfType_Cuda(const CeedVector vec,
   return CEED_ERROR_SUCCESS;
 }
 
-//------------------------------------------------------------------------------
-// Check if is any array of given type
-//------------------------------------------------------------------------------
-static inline int CeedVectorNeedSync_Cuda(const CeedVector vec,
-    CeedMemType mem_type, bool *need_sync) {
-  int ierr;
-  CeedVector_Cuda *impl;
-  ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
-
-  bool has_valid_array = false;
-  ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr);
-  switch (mem_type) {
-  case CEED_MEM_HOST:
-    *need_sync = has_valid_array && !impl->h_array;
-    break;
-  case CEED_MEM_DEVICE:
-    *need_sync = has_valid_array && !impl->d_array;
-    break;
-  }
-
-  return CEED_ERROR_SUCCESS;
-}
-
 //------------------------------------------------------------------------------
 // Set array from host
 //------------------------------------------------------------------------------
@@ -372,11 +381,7 @@ static int CeedVectorTakeArrayGeneric_Cuda(CeedVector vec, CeedMemType mem_type,
   ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
 
   // Sync array to requested mem_type
-  bool need_sync = false;
-  ierr = CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync); CeedChkBackend(ierr);
-  if (need_sync) {
-    ierr = CeedVectorSync_Cuda(vec, prec, mem_type); CeedChkBackend(ierr);
-  }
+  ierr = CeedVectorSyncArray(vec, mem_type); CeedChkBackend(ierr);
 
   // Update pointer
   switch (mem_type) {
@@ -409,14 +414,8 @@ static int CeedVectorGetArrayCore_Cuda(const CeedVector vec,
   CeedVector_Cuda *impl;
   ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
 
-  bool need_sync = false, has_array_of_type = true;
-  ierr = CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync); CeedChkBackend(ierr);
-  ierr = CeedVectorHasArrayOfType_Cuda(vec, mem_type, &has_array_of_type);
-  CeedChkBackend(ierr);
-  if (need_sync) {
-    // Sync array to requested mem_type
-    ierr = CeedVectorSync_Cuda(vec, prec, mem_type); CeedChkBackend(ierr);
-  }
+  // Sync array to requested mem_type
+  ierr = CeedVectorSyncArray(vec, mem_type); CeedChkBackend(ierr);
 
   // Update pointer
   switch (mem_type) {
@@ -776,6 +775,8 @@ int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec) {
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetValue",
                                 (int (*)())(CeedVectorSetValue_Cuda));
   CeedChkBackend(ierr);
+  ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SyncArrayGeneric",
+                                CeedVectorSyncArrayGeneric_Cuda); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayGeneric",
                                 CeedVectorGetArrayGeneric_Cuda); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayReadGeneric",
diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c
index 77657a0c3a..dd20b8fa4c 100644
--- a/backends/cuda-shared/ceed-cuda-shared-basis.c
+++ b/backends/cuda-shared/ceed-cuda-shared-basis.c
@@ -270,8 +270,9 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-shared-basis.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-shared-basis.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp
index 3ede5d090d..10364f84f8 100644
--- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp
+++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp
@@ -807,7 +807,7 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) {
   oper = "CeedKernel_Hip_gen_" + qFunctionName;
 
   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
-  code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
+  code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n";
   code << "#define CeedPragmaSIMD\n";
   code << "#define CEED_ERROR_SUCCESS 0\n\n";
 
diff --git a/backends/hip-ref/ceed-hip-ref-basis.c b/backends/hip-ref/ceed-hip-ref-basis.c
index 6e0b3de402..ee535012fa 100644
--- a/backends/hip-ref/ceed-hip-ref-basis.c
+++ b/backends/hip-ref/ceed-hip-ref-basis.c
@@ -276,8 +276,9 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt ncomp;
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-tensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-basis-tensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
@@ -344,8 +345,9 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes,
   CeedInt ncomp;
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-nontensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-basis-nontensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c
index 809575caf9..0a8236a807 100644
--- a/backends/hip-ref/ceed-hip-ref-operator.c
+++ b/backends/hip-ref/ceed-hip-ref-operator.c
@@ -7,6 +7,7 @@
 
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
+#include <ceed/jit-tools.h>
 #include <hip/hip_runtime.h>
 #include <assert.h>
 #include <stdbool.h>
@@ -731,149 +732,6 @@ static int CeedOperatorLinearAssembleQFunctionUpdate_Hip(CeedOperator op,
          request);
 }
 
-//------------------------------------------------------------------------------
-// Diagonal assembly kernels
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *diagonalkernels = QUOTE(
-
-typedef enum {
-  /// Perform no evaluation (either because there is no data or it is already at
-  /// quadrature points)
-  CEED_EVAL_NONE   = 0,
-  /// Interpolate from nodes to quadrature points
-  CEED_EVAL_INTERP = 1,
-  /// Evaluate gradients at quadrature points from input in a nodal basis
-  CEED_EVAL_GRAD   = 2,
-  /// Evaluate divergence at quadrature points from input in a nodal basis
-  CEED_EVAL_DIV    = 4,
-  /// Evaluate curl at quadrature points from input in a nodal basis
-  CEED_EVAL_CURL   = 8,
-  /// Using no input, evaluate quadrature weights on the reference element
-  CEED_EVAL_WEIGHT = 16,
-} CeedEvalMode;
-
-//------------------------------------------------------------------------------
-// Get Basis Emode Pointer
-//------------------------------------------------------------------------------
-extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr,
-    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
-    const CeedScalar *grad) {
-  switch (emode) {
-  case CEED_EVAL_NONE:
-    *basisptr = identity;
-    break;
-  case CEED_EVAL_INTERP:
-    *basisptr = interp;
-    break;
-  case CEED_EVAL_GRAD:
-    *basisptr = grad;
-    break;
-  case CEED_EVAL_WEIGHT:
-  case CEED_EVAL_DIV:
-  case CEED_EVAL_CURL:
-    break; // Caught by QF Assembly
-  }
-}
-
-//------------------------------------------------------------------------------
-// Core code for diagonal assembly
-//------------------------------------------------------------------------------
-__device__ void diagonalCore(const CeedInt nelem,
-    const CeedScalar maxnorm, const bool pointBlock,
-    const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  const int tid = threadIdx.x; // running with P threads, tid is evec node
-  const CeedScalar qfvaluebound = maxnorm*1e-12;
-
-  // Compute the diagonal of B^T D B
-  // Each element
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
-       e += gridDim.x*blockDim.z) {
-    CeedInt dout = -1;
-    // Each basis eval mode pair
-    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
-      const CeedScalar *bt = NULL;
-      if (emodeout[eout] == CEED_EVAL_GRAD)
-        dout += 1;
-      CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout,
-                                      &gradout[dout*NQPTS*NNODES]);
-      CeedInt din = -1;
-      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
-        const CeedScalar *b = NULL;
-        if (emodein[ein] == CEED_EVAL_GRAD)
-          din += 1;
-        CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin,
-                                        &gradin[din*NQPTS*NNODES]);
-        // Each component
-        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
-          // Each qpoint/node pair
-          if (pointBlock) {
-            // Point Block Diagonal
-            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
-              CeedScalar evalue = 0.;
-              for (CeedInt q = 0; q < NQPTS; q++) {
-                const CeedScalar qfvalue =
-                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
-                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
-                if (abs(qfvalue) > qfvaluebound)
-                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-              }
-              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
-            }
-          } else {
-            // Diagonal Only
-            CeedScalar evalue = 0.;
-            for (CeedInt q = 0; q < NQPTS; q++) {
-              const CeedScalar qfvalue =
-                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
-                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
-              if (abs(qfvalue) > qfvaluebound)
-                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-            }
-            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
-          }
-        }
-      }
-    }
-  }
-}
-
-//------------------------------------------------------------------------------
-// Linear diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-//------------------------------------------------------------------------------
-// Linear point block diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Create point block restriction
 //------------------------------------------------------------------------------
@@ -1039,11 +897,22 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op,
   diag->numemodeout = numemodeout;
 
   // Assemble kernel
+
+  char *diagonal_kernel_path, *diagonal_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h",
+                                &diagonal_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Diagonal Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, diagonal_kernel_path,
+                                &diagonal_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2,
+               "----- Loading Diagonal Assembly Source Complete! -----\n");
   CeedInt nnodes, nqpts;
   ierr = CeedBasisGetNumNodes(basisin, &nnodes); CeedChkBackend(ierr);
   ierr = CeedBasisGetNumQuadraturePoints(basisin, &nqpts); CeedChkBackend(ierr);
   diag->nnodes = nnodes;
-  ierr = CeedCompileHip(ceed, diagonalkernels, &diag->module, 5,
+  ierr = CeedCompileHip(ceed, diagonal_kernel_source, &diag->module, 5,
                         "NUMEMODEIN", numemodein,
                         "NUMEMODEOUT", numemodeout,
                         "NNODES", nnodes,
@@ -1055,6 +924,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op,
   ierr = CeedGetKernelHip(ceed, diag->module, "linearPointBlockDiagonal",
                           &diag->linearPointBlock);
   CeedChk_Hip(ceed, ierr);
+  ierr = CeedFree(&diagonal_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&diagonal_kernel_source); CeedChkBackend(ierr);
 
   // Basis matrices
   const CeedInt qBytes = nqpts * sizeof(CeedScalar);
@@ -1263,119 +1134,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op,
   }
 }
 
-//------------------------------------------------------------------------------
-// Matrix assembly kernel for low-order elements (2D thread block)
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *assemblykernel = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int i = threadIdx.x; // The output row index of each B^TDB operation 
-  const int l = threadIdx.y; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-  // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        CeedScalar result = 0.0;
-        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-          CeedInt b_in_index = emode_in * NQPTS * NNODES;
-      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-             CeedInt b_out_index = emode_out * NQPTS * NNODES;
-             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
- 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-            for (CeedInt j = 0; j < NQPTS; j++) {
-     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-	    }
-
-          }// end of emode_out 
-        } // end of emode_in
-        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-   	values_array[val_index] = result;
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-
-//------------------------------------------------------------------------------
-// Fallback kernel for larger orders (1D thread block)
-//------------------------------------------------------------------------------
-static const char *assemblykernelbigelem = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int l = threadIdx.x; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-    // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        for (CeedInt i = 0; i < NNODES; i++) {
-          CeedScalar result = 0.0;
-          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-            CeedInt b_in_index = emode_in * NQPTS * NNODES;
-        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-               CeedInt b_out_index = emode_out * NQPTS * NNODES;
-               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
-   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-              for (CeedInt j = 0; j < NQPTS; j++) {
-       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-  	    }
-
-            }// end of emode_out 
-          } // end of emode_in
-          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-     	  values_array[val_index] = result;
-        } // end of loop over element node index, i
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Single operator assembly setup
 //------------------------------------------------------------------------------
@@ -1497,34 +1255,38 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op) {
   int elemsPerBlock = 1;
   asmb->elemsPerBlock = elemsPerBlock;
   CeedInt block_size = esize * esize * elemsPerBlock;
-  if (block_size > 1024) { // Use fallback kernel with 1D threadblock
+  char *assembly_kernel_path, *assembly_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-operator-assemble.h",
+                                &assembly_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, assembly_kernel_path,
+                                &assembly_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Source Complete! -----\n");
+  bool fallback = block_size > 1024;
+  if (fallback) { // Use fallback kernel with 1D threadblock
     block_size = esize * elemsPerBlock;
     asmb->block_size_x = esize;
     asmb->block_size_y = 1;
-    ierr = CeedCompileHip(ceed, assemblykernelbigelem, &asmb->module, 7,
-                          "NELEM", nelem,
-                          "NUMEMODEIN", num_emode_in,
-                          "NUMEMODEOUT", num_emode_out,
-                          "NQPTS", nqpts,
-                          "NNODES", esize,
-                          "BLOCK_SIZE", block_size,
-                          "NCOMP", ncomp
-                         ); CeedChk_Hip(ceed, ierr);
   } else {  // Use kernel with 2D threadblock
     asmb->block_size_x = esize;
     asmb->block_size_y = esize;
-    ierr = CeedCompileHip(ceed, assemblykernel, &asmb->module, 7,
-                          "NELEM", nelem,
-                          "NUMEMODEIN", num_emode_in,
-                          "NUMEMODEOUT", num_emode_out,
-                          "NQPTS", nqpts,
-                          "NNODES", esize,
-                          "BLOCK_SIZE", block_size,
-                          "NCOMP", ncomp
-                         ); CeedChk_Hip(ceed, ierr);
   }
-  ierr = CeedGetKernelHip(ceed, asmb->module, "linearAssemble",
+  ierr = CeedCompileHip(ceed, assembly_kernel_source, &asmb->module, 7,
+                        "NELEM", nelem,
+                        "NUMEMODEIN", num_emode_in,
+                        "NUMEMODEOUT", num_emode_out,
+                        "NQPTS", nqpts,
+                        "NNODES", esize,
+                        "BLOCK_SIZE", block_size,
+                        "NCOMP", ncomp
+                       ); CeedChk_Hip(ceed, ierr);
+  ierr = CeedGetKernelHip(ceed, asmb->module,
+                          fallback ? "linearAssembleFallback" : "linearAssemble",
                           &asmb->linearAssemble); CeedChk_Hip(ceed, ierr);
+  ierr = CeedFree(&assembly_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&assembly_kernel_source); CeedChkBackend(ierr);
 
   // Build 'full' B matrices (not 1D arrays used for tensor-product matrices)
   const CeedScalar *interp_in, *grad_in;
diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
index 4666dbcffb..3d81f0ee26 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
+++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
@@ -47,8 +47,8 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-qfunction.h",
-                             &read_write_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-qfunction.h",
+                                &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
   CeedChkBackend(ierr);
@@ -61,7 +61,7 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
 
   // Defintions
   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
-  code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
+  code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n";
   code << "#define CeedPragmaSIMD\n";
   code << "#define CEED_ERROR_SUCCESS 0\n";
   code << "#define CEED_Q_VLA 1\n\n";
diff --git a/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c b/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c
index 46cf1b13f5..e4c71e21c1 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c
+++ b/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c
@@ -72,7 +72,7 @@ static inline int CeedQFunctionContextSyncD2H_Hip(
   } else if (impl->h_data_owned) {
     impl->h_data = impl->h_data_owned;
   } else {
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
     CeedChkBackend(ierr);
     impl->h_data = impl->h_data_owned;
   }
@@ -184,7 +184,8 @@ static int CeedQFunctionContextSetDataHost_Hip(const CeedQFunctionContext ctx,
   case CEED_COPY_VALUES: {
     size_t ctxsize;
     ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned); CeedChkBackend(ierr);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
+    CeedChkBackend(ierr);
     impl->h_data_borrowed = NULL;
     impl->h_data = impl->h_data_owned;
     memcpy(impl->h_data, data, ctxsize);
diff --git a/backends/hip-ref/ceed-hip-ref-restriction.c b/backends/hip-ref/ceed-hip-ref-restriction.c
index 01f275e9ac..9915a0e37f 100644
--- a/backends/hip-ref/ceed-hip-ref-restriction.c
+++ b/backends/hip-ref/ceed-hip-ref-restriction.c
@@ -344,8 +344,9 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode,
   // Compile HIP kernels
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-restriction.h",
-                             &restriction_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-restriction.h",
+                                &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
                                 &restriction_kernel_source);
diff --git a/backends/hip-ref/ceed-hip-ref-vector.c b/backends/hip-ref/ceed-hip-ref-vector.c
index 68b9e8a617..1102b1eb8f 100644
--- a/backends/hip-ref/ceed-hip-ref-vector.c
+++ b/backends/hip-ref/ceed-hip-ref-vector.c
@@ -13,6 +13,7 @@
 #include <string.h>
 #include "ceed-hip-ref.h"
 
+
 //------------------------------------------------------------------------------
 // Get size of the scalar type
 // TODO: move to interface level for all backends?
diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c
index 2fcb6569cc..2cd202d0cb 100644
--- a/backends/hip-shared/ceed-hip-shared-basis.c
+++ b/backends/hip-shared/ceed-hip-shared-basis.c
@@ -326,8 +326,9 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
 
   // Compile basis kernels
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-shared-basis.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-shared-basis.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/benchmarks/petsc-bps.sh b/benchmarks/petsc-bps.sh
index e589b36412..36f900cc16 100755
--- a/benchmarks/petsc-bps.sh
+++ b/benchmarks/petsc-bps.sh
@@ -21,7 +21,7 @@ function run_tests()
 
    # Some of the available options are:
    # -degree <1>: Polynomial degree of tensor product basis
-   # -qextra <1>: Number of extra quadrature points
+   # -q_extra <1>: Number of extra quadrature points
    # -ceed </cpu/self>: CEED resource specifier
    # -local_nodes <1000>: Target number of locally (per rank) owned nodes
 
diff --git a/benchmarks/petsc-bpsraw.sh b/benchmarks/petsc-bpsraw.sh
index f39f7ac521..bcfc97c16e 100755
--- a/benchmarks/petsc-bpsraw.sh
+++ b/benchmarks/petsc-bpsraw.sh
@@ -21,7 +21,7 @@ function run_tests()
 
    # Some of the available options are:
    # -degree <1>: Polynomial degree of tensor product basis
-   # -qextra <1>: Number of extra quadrature points
+   # -q_extra <1>: Number of extra quadrature points
    # -ceed </cpu/self>: CEED resource specifier
    # -local <1000>: Target number of locally (per rank) owned nodes
 
diff --git a/ceed.pc.template b/ceed.pc.template
index f884794467..bd8485ed88 100644
--- a/ceed.pc.template
+++ b/ceed.pc.template
@@ -4,7 +4,7 @@ libdir=${prefix}/lib
 
 Name: CEED
 Description: Code for Efficient Extensible Discretization
-Version: 0.10.0
+Version: 0.10.1
 Cflags: -I${includedir}
 Libs: -L${libdir} -lceed
 Libs.private: %libs_private%
diff --git a/doc/sphinx/source/libCEEDapi.md b/doc/sphinx/source/libCEEDapi.md
index 18699021a7..5e0ba6b2e3 100644
--- a/doc/sphinx/source/libCEEDapi.md
+++ b/doc/sphinx/source/libCEEDapi.md
@@ -470,7 +470,7 @@ be added according to demand.
 There are two common approaches for supporting non-conforming elements: applying the node constraints via $\bm P$ so that the **L-vector** can be processed uniformly and applying the constraints via $\bm{\mathcal{E}}$ so that the **E-vector** is uniform.
 The former can be done with the existing interface while the latter will require a generalization to element restriction that would define field values at constrained nodes as linear combinations of the values at primary nodes.
 
-These operations, $\bm{P}$, $\bm{B}$, and $\bm{D}$,
+These operations, $\bm{\mathcal{E}}$, $\bm{B}$, and $\bm{D}$,
 are combined with a {ref}`CeedOperator`. As with {ref}`CeedQFunction`s, operator fields are added
 separately with a matching field name, basis ($\bm{B}$), element restriction
 ($\bm{\mathcal{E}}$), and **L-vector**. The flag
diff --git a/doc/sphinx/source/references.bib b/doc/sphinx/source/references.bib
index 5a7829a4e1..a1f6d382a9 100644
--- a/doc/sphinx/source/references.bib
+++ b/doc/sphinx/source/references.bib
@@ -163,3 +163,42 @@ @book{toro2009
   publisher={Springer, Berlin, Heidelberg},
   isbn={978-3-540-49834-6}
 }
+
+@misc{sodshocktubewiki,
+  title={Sod shock tube},
+  howpublished={\url{https://en.wikipedia.org/wiki/Sod_shock_tube}},
+  note={Accessed: 01-30-2022}
+}
+
+@article{tezduyar2007yzb,
+  title={{SUPG} finite element computation of inviscid supersonic flows with $YZ\beta$ shock capturing},
+  author={Tezduyar, Tayfun E and Senga, Masayoshi},
+  journal={Computers and Fluids},
+  volume={36},
+  number={1},
+  pages={147-159},
+  year={2007},
+  publisher={Elsevier},
+  doi={10.1016/j.compfluid.2005.07.009}
+}
+
+@phdthesis{whitingStabilizedFEM1999,
+  title = {Stabilized {{Finite Element Methods}} for {{Fluid Dynamics}} Using a {{Hierarchical Basis}}},
+  author = {Whiting, Christian H},
+  year = {1999},
+  address = {{Troy, NY}},
+  langid = {english},
+  school = {Rennselear Polytechnic Institute},
+}
+
+@article{shurSTG2014,
+  title = {Synthetic Turbulence Generators for {RANS-LES} Interfaces in Zonal Simulations of Aerodynamic and Aeroacoustic Problems},
+  author = {Shur, Michael L. and Spalart, Philippe R. and Strelets, Michael K. and Travin, Andrey K.},
+  year = {2014},
+  journal = {Flow, Turbulence and Combustion},
+  volume = {93},
+  number = {1},
+  pages = {63--92},
+  doi = {10.1007/s10494-014-9534-8},
+  langid = {english},
+}
diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md
index ccfcda1470..1c0b7a93f0 100644
--- a/doc/sphinx/source/releasenotes.md
+++ b/doc/sphinx/source/releasenotes.md
@@ -1,7 +1,6 @@
 # Changes/Release Notes
 
-On this page we provide a summary of the main API changes, new features and examples
-for each release of libCEED.
+On this page we provide a summary of the main API changes, new features and examples for each release of libCEED.
 
 (main)=
 
@@ -9,8 +8,20 @@ for each release of libCEED.
 
 ### Interface changes
 
+- Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output.
+
+(v0-10-1)=
+
+## v0.10.1 (Apr 11, 2022)
+
+### Interface changes
+
 - Added {c:func}`CeedQFunctionSetUserFlopsEstimate` and {c:func}`CeedOperatorGetFlopsEstimate` to facilitate estimating FLOPs in operator application.
 
+### Bugfix
+
+- Install JiT source files in install directory to fix GPU functionality for installed libCEED.
+
 (v0-10)=
 
 ## v0.10 (Mar 21, 2022)
diff --git a/examples/fluids/README.md b/examples/fluids/README.md
index 43b41194cf..602803fc1e 100644
--- a/examples/fluids/README.md
+++ b/examples/fluids/README.md
@@ -60,7 +60,7 @@ The following options are common among all problem types:
   - Polynomial degree of tensor product basis (must be >= 1)
   - `1`
 
-* - `-qextra`
+* - `-q_extra`
   - Number of extra quadrature points
   - `2`
 
@@ -123,18 +123,65 @@ The following options are common among all problem types:
 
 For the case of a square/cubic mesh, the list of face indices to be used with `-bc_wall`, `bc_inflow`, `bc_outflow` and/or `-bc_slip_x`, `-bc_slip_y`, and `-bc_slip_z` are:
 
-* 2D:
-  - faceMarkerBottom = 1
-  - faceMarkerRight  = 2
-  - faceMarkerTop    = 3
-  - faceMarkerLeft   = 4
-* 3D:
-  - faceMarkerBottom = 1
-  - faceMarkerTop    = 2
-  - faceMarkerFront  = 3
-  - faceMarkerBack   = 4
-  - faceMarkerRight  = 5
-  - faceMarkerLeft   = 6
+:::{list-table} 2D Face ID Labels
+:header-rows: 1
+* - PETSc Face Name
+  - Cartesian direction
+  - Face ID
+
+* - faceMarkerBottom
+  - -z
+  - 1
+
+* - faceMarkerRight
+  - +x
+  - 2
+
+* - faceMarkerTop
+  - +z
+  - 3
+
+* - faceMarkerLeft
+  - -x
+  - 4
+:::
+
+:::{list-table} 2D Face ID Labels
+:header-rows: 1
+* - PETSc Face Name
+  - Cartesian direction
+  - Face ID
+
+* - faceMarkerBottom
+  - -z
+  - 1
+
+* - faceMarkerTop
+  - +z
+  - 2
+
+* - faceMarkerFront
+  - -y
+  - 3
+
+* - faceMarkerBack
+  - +y
+  - 4
+
+* - faceMarkerRight
+  - +x
+  - 5
+
+* - faceMarkerLeft
+  - -x
+  - 6
+:::
+
+### Advection
+
+For testing purposes, there is a reduced mode for pure advection, which holds density $\rho$ and momentum density $\rho \bm u$ constant while advecting "total energy density" $E$. These are available in 2D and 3D.
+
+#### 2D advection
 
 For the 2D advection problem, the following additional command-line options are available:
 
@@ -210,6 +257,8 @@ and the `translation` mode with:
 ```
 Note the lengths in `-dm_plex_box_upper` are given in meters, and will be nondimensionalized according to `-units_meter`.
 
+#### 3D advection
+
 For the 3D advection problem, the following additional command-line options are available:
 
 :::{list-table} Advection3D Runtime Options
@@ -293,6 +342,10 @@ and the `translation` mode with:
 ./navierstokes -problem advection -dm_plex_box_faces 10,10,10 -dm_plex_dim 3 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 8000,8000,8000 -wind_type translation -wind_translation .5,-1,0 -bc_inflow 1,2,3,4,5,6
 ```
 
+### Inviscid Ideal Gas
+
+#### Isentropic Euler vortex
+
 For the Isentropic Vortex problem, the following additional command-line options are available:
 
 :::{list-table} Isentropic Vortex Runtime Options
@@ -340,9 +393,11 @@ This problem can be run with:
 ./navierstokes -problem euler_vortex -dm_plex_box_faces 20,20,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,1000,50 -dm_plex_dim 3 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -mean_velocity .5,-.8,0.
 ```
 
-For the Density Current problem, the following additional command-line options are available:
+#### Sod shock tube
+
+For the Shock Tube problem, the following additional command-line options are available:
 
-:::{list-table} Euler Vortex Runtime Options
+:::{list-table} Shock Tube Runtime Options
 :header-rows: 1
 
 * - Option
@@ -350,34 +405,58 @@ For the Density Current problem, the following additional command-line options a
   - Default value
   - Unit
 
-* - `-center`
-  - Location of bubble center
-  - `(lx,ly,lz)/2`
-  - `(m,m,m)`
+* - `-units_meter`
+  - 1 meter in scaled length units
+  - `1E-2`
+  -
 
-* - `-dc_axis`
-  - Axis of density current cylindrical anomaly, or `(0,0,0)` for spherically symmetric
-  - `(0,0,0)`
+* - `-units_second`
+  - 1 second in scaled time units
+  - `1E-2`
   -
 
-* - `-rc`
-  - Characteristic radius of thermal bubble
-  - `1000`
-  - `m`
+* - `-yzb`
+  - Use YZB discontinuity capturing
+  - `none`
+  -
+
+* - `-stab`
+  - Stabilization method (`none`, `su`, or `supg`)
+  - `none`
+  -
+:::
+
+This problem can be run with:
+
+```
+./navierstokes -problem shocktube -yzb -stab su -bc_slip_z 3,4 -bc_slip_y 1,2 -bc_wall 5,6 -dm_plex_dim 3 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,100,100 -dm_plex_box_faces 200,1,1 -units_second 0.1 
+```
+
+### Newtonian viscosity, Ideal Gas
+
+For the Density Current, Channel, and Blasius problems, the following common command-line options are available:
+
+:::{list-table} Newtonian Ideal Gas problems Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
 
 * - `-units_meter`
   - 1 meter in scaled length units
-  - `1E-2`
+  - `1`
   -
 
 * - `-units_second`
   - 1 second in scaled time units
-  - `1E-2`
+  - `1`
   -
 
 * - `-units_kilogram`
   - 1 kilogram in scaled mass units
-  - `1E-6`
+  - `1`
   -
 
 * - `-units_Kelvin`
@@ -391,29 +470,34 @@ For the Density Current problem, the following additional command-line options a
   -
 
 * - `-c_tau`
-  - Stabilization constant
+  - Stabilization constant, $c_\tau$
   - `0.5`
   -
 
-* - `-theta0`
-  - Reference potential temperature
-  - `300`
-  - `K`
+* - `-Ctau_t`
+  - Stabilization time constant, $C_t$
+  - `1.0`
+  -
 
-* - `-thetaC`
-  - Perturbation of potential temperature
-  - `-15`
-  - `K`
+* - `-Ctau_v`
+  - Stabilization viscous constant, $C_v$
+  - `36.0`
+  -
 
-* - `-P0`
-  - Atmospheric pressure
-  - `1E5`
-  - `Pa`
+* - `-Ctau_C`
+  - Stabilization continuity constant, $C_c$
+  - `1.0`
+  -
 
-* - `-N`
-  - Brunt-Vaisala frequency
-  - `0.01`
-  - `1/s`
+* - `-Ctau_M`
+  - Stabilization momentum constant, $C_m$
+  - `1.0`
+  -
+
+* - `-Ctau_E`
+  - Stabilization energy constant, $C_E$
+  - `1.0`
+  -
 
 * - `-cv`
   - Heat capacity at constant volume
@@ -446,8 +530,218 @@ For the Density Current problem, the following additional command-line options a
   - `W/(m K)`
 :::
 
+#### Density current
+
+The Density Current problem the following command-line options are available in
+addition to the Newtonian Ideal Gas options:
+
+:::{list-table} Density Current Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-center`
+  - Location of bubble center
+  - `(lx,ly,lz)/2`
+  - `(m,m,m)`
+
+* - `-dc_axis`
+  - Axis of density current cylindrical anomaly, or `(0,0,0)` for spherically symmetric
+  - `(0,0,0)`
+  -
+
+* - `-rc`
+  - Characteristic radius of thermal bubble
+  - `1000`
+  - `m`
+
+* - `-theta0`
+  - Reference potential temperature
+  - `300`
+  - `K`
+
+* - `-thetaC`
+  - Perturbation of potential temperature
+  - `-15`
+  - `K`
+
+* - `-P0`
+  - Atmospheric pressure
+  - `1E5`
+  - `Pa`
+
+* - `-N`
+  - Brunt-Vaisala frequency
+  - `0.01`
+  - `1/s`
+:::
+
 This problem can be run with:
 
 ```
-./navierstokes -problem density_current -dm_plex_box_faces 16,1,8 -degree 1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 2000,125,1000 -dm_plex_dim 3 -rc 400. -bc_wall 1,2,5,6 -wall_comps 1,2,3 -bc_slip_y 3,4 -viz_refine 2
+./navierstokes -problem density_current -dm_plex_box_faces 16,1,8 -degree 1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 2000,125,1000 -dm_plex_dim 3 -rc 400. -bc_wall 1,2,5,6 -wall_comps 1,2,3 -bc_slip_y 3,4 -mu 75
+```
+
+#### Channel flow
+
+The Channel problem the following command-line options are available in
+addition to the Newtonian Ideal Gas options:
+
+:::{list-table} Channel Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-umax`
+  - Maximum/centerline velocity of the flow
+  - `10`
+  - `m/s`
+
+* - `-theta0`
+  - Reference potential temperature
+  - `300`
+  - `K`
+
+* - `-P0`
+  - Atmospheric pressure
+  - `1E5`
+  - `Pa`
+:::
+
+This problem can be run with the `channel.yaml` file via:
+
+```
+./navierstokes -options_file channel.yaml
+```
+```{literalinclude} ../../../../../examples/fluids/channel.yaml
+:language: yaml
+```
+
+#### Blasius boundary layer
+
+The Blasius problem the following command-line options are available in
+addition to the Newtonian Ideal Gas options:
+
+:::{list-table} Blasius Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-Uinf`
+  - Freestream velocity
+  - `40`
+  - `m/s`
+
+* - `-delta0`
+  - Boundary layer height at the inflow
+  - `4.2e-4`
+  - `m`
+
+* - `-theta0`
+  - Reference potential temperature
+  - `288`
+  - `K`
+
+* - `-P0`
+  - Atmospheric pressure
+  - `1.01E5`
+  - `Pa`
+
+* - `-platemesh_refine_height`
+  - Height at which `-platemesh_Ndelta` number of elements should refined into
+  - `5.9E-4`
+  - `m`
+
+* - `-platemesh_Ndelta`
+  - Number of elements to keep below `-platemesh_refine_height`
+  - `45`
+  -
+
+* - `-platemesh_growth`
+  - Growth rate of the elements in the refinement region
+  - `1.08`
+  -
+
+* - `-platemesh_top_angle`
+  - Downward angle of the top face of the domain. This face serves as an outlet.
+  - `5`
+  - `degrees`
+
+* - `-stg_use`
+  - Whether to use stg for the inflow conditions
+  - `false`
+  -
+
+* - `-platemesh_y_node_locs_path`
+  - Path to file with y node locations. If empty, will use mesh warping instead.
+  - `""`
+  -
+:::
+
+This problem can be run with the `blasius.yaml` file via:
+
+```
+./navierstokes -options_file blasius.yaml
 ```
+
+```{literalinclude} ../../../../../examples/fluids/blasius.yaml
+:language: yaml
+```
+
+#### STG Inflow for Flat Plate
+
+Using the STG Inflow for the blasius problem adds the following command-line
+options:
+
+:::{list-table} Blasius Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-stg_inflow_path`
+  - Path to the STGInflow file
+  - `./STGInflow.dat`
+  -
+
+* - `-stg_rand_path`
+  - Path to the STGRand file
+  - `./STGRand.dat`
+  -
+
+* - `-stg_alpha`
+  - Growth rate of the wavemodes
+  - `1.01`
+  -
+
+* - `-stg_u0`
+  - Convective velocity, $U_0$
+  - `0.0`
+  - `m/s`
+
+* - `-stg_mean_only`
+  - Only impose the mean velocity (no fluctutations)
+  - `false`
+  -
+
+:::
+
+This problem can be run with the `blasius.yaml` file via:
+
+```
+./navierstokes -options_file blasius.yaml -stg_use true
+```
+
+Note the added `-stg_use true` flag. This overrides the `stg: use: false`
+setting in the `blasius.yaml` file, enabling the use of the STG inflow.
diff --git a/examples/fluids/STGInflow_blasius.dat b/examples/fluids/STGInflow_blasius.dat
new file mode 100644
index 0000000000..2b12fb7e88
--- /dev/null
+++ b/examples/fluids/STGInflow_blasius.dat
@@ -0,0 +1,102 @@
+101 14
+0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000002986e-06 6.641099321171224368e-01 -2.688275721802099928e-10 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.503522332443360197e-06 7.121033120206911038e-01 5.625993976502234810e-06 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.828979380670493501e-06 7.635650401647769980e-01 1.250142628597567334e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.177956305656680519e-06 8.187457644416862301e-01 2.084248362952813662e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.552152823557324425e-06 8.779142463706052224e-01 3.090016074070208580e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.953391484292584302e-06 9.413586701190954642e-01 4.296531750310341824e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.383626548402324046e-06 1.009388046123934402e+00 5.737481358311177256e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.844953505406707854e-06 1.082333716147739100e+00 7.451857365815305122e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.339619280032274129e-06 1.160550967101845909e+00 9.484772346308667436e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.870033176013613616e-06 1.244420761495690142e+00 1.188839579386735820e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+8.438778610773199320e-06 1.334350999618601818e+00 1.458894855932344988e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.048625698133911575e-06 1.430771551613953863e+00 1.581560748020152847e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.702544740349270590e-06 1.534160149046711830e+00 1.747102975094025161e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.040372069516518436e-05 1.645020353203479058e+00 1.963713567554627713e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.115556868837608125e-05 1.763892116346153394e+00 2.240939354861193116e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.196175064743038823e-05 1.891354411586620987e+00 2.589894105302736720e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.282619313710034132e-05 2.028028052815169957e+00 3.023503103593241275e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.375310648408648365e-05 2.174578718417349066e+00 3.556785072386167831e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.474700528370354663e-05 2.331720193506353400e+00 4.207177082015844298e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.581273038852838925e-05 2.500217846462653437e+00 4.994908941792155924e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.695547248610555476e-05 2.680883861457040496e+00 5.889655958320385437e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.818079738054887299e-05 2.874548263010431093e+00 6.602281079523934521e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.949467310117369896e-05 3.082208252866376785e+00 7.503663649287806939e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.090349897019489394e-05 3.304875253311903460e+00 8.627999991332345117e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.241413677106653097e-05 3.543633779312999721e+00 1.001503600974398660e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.403394416927094298e-05 3.799646720719475290e+00 1.171092923974609594e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.577081054833535852e-05 4.074092395254440113e+00 1.357158033230107053e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.763319543561788559e-05 4.368221208458913374e+00 1.540937148582322697e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.963016970501833084e-05 4.683605893906867657e+00 1.769681736351030443e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.177145975729444162e-05 5.021782554436745372e+00 2.051386266834596614e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.406749489316689041e-05 5.384299303649559221e+00 2.379111642902919863e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.652945810994693139e-05 5.772598926784612061e+00 2.710603356932465152e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.916934056909526795e-05 6.188959884991291460e+00 3.121345968641189156e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000003156e-05 6.635410093001123499e+00 3.625347520186018510e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.503522332443360367e-05 7.113125338991705071e+00 4.133124687953142543e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.828979380670492993e-05 7.625363738722001017e+00 4.761465522541206800e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.177956305656680519e-05 8.173969710347833484e+00 5.483355699626112773e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.552152823557324933e-05 8.761155029286637586e+00 6.288849893152771361e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.953391484292584133e-05 9.390306008098571411e+00 7.253785738742655163e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.383626548402323707e-05 1.006267945968176925e+01 8.310353563038722774e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.844953505406707345e-05 1.078260765264970900e+01 9.566162359885154245e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.339619280032274467e-05 1.155157312880497855e+01 1.097617661229051932e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.870033176013614294e-05 1.237288013889156701e+01 1.259644675763084941e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+8.438778610773198642e-05 1.324659660273999329e+01 1.442147547686566111e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.048625698133931227e-05 1.411776521727952627e+01 1.550008743586907094e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.702544740349271267e-05 1.505189076749435273e+01 1.696394574577553271e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.040372069516519487e-04 1.605352297623211655e+01 1.888691112446131207e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.115556868837609277e-04 1.712754036217140907e+01 2.135507382076490615e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.196175064743037705e-04 1.827917400098332124e+01 2.446866950728195447e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.282619313710035521e-04 1.951403300364581384e+01 2.834428819338968841e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.375310648408647010e-04 2.083813183600515728e+01 3.311742048129684424e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.474700528370356052e-04 2.225791961264622643e+01 3.894539215743021698e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.581273038852837570e-04 2.378031150774935654e+01 4.601074578209482513e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.695547248610555476e-04 2.535075036351783240e+01 5.382694402282042073e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.818079738054887231e-04 2.661271076395308555e+01 5.835740987402229796e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.949467310117369964e-04 2.796586953681483223e+01 6.410968369375426301e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.090349897019489259e-04 2.941681733513018671e+01 7.130600537355311075e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.241413677106652961e-04 3.097262110011179459e+01 8.020473439929037829e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.403394416927094298e-04 3.264085848118182298e+01 9.110596407971430222e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.577081054833535445e-04 3.413934342938573963e+01 1.014467216134999905e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.763319543561788762e-04 3.511021163702044134e+01 1.073549573507629751e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.963016970501833151e-04 3.615124179295903417e+01 1.147360075583040984e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.177145975729444433e-04 3.726750430629629562e+01 1.238529307048257255e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.406749489316688770e-04 3.830092385213693262e+01 1.332273456401334155e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.652945810994693275e-04 3.872325220338986185e+01 1.367304294075183113e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.916934056909526524e-04 3.917610105136403575e+01 1.410880782566600322e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000002885e-04 3.966167602757187893e+01 1.464521141298004425e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.503522332443360638e-04 3.976907916245198749e+01 1.475593379115296422e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.828979380670493128e-04 3.988424402352507769e+01 1.489351353721966587e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.177956305656680790e-04 3.996514744096749894e+01 1.500061967534003227e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.552152823557325069e-04 3.998205278113282901e+01 1.502314641381908289e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.953391484292584268e-04 3.999712512512702034e+01 1.504595355677841584e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.383626548402323436e-04 3.999865492740099882e+01 1.504828737190639643e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.844953505406714935e-04 3.999990491833290207e+01 1.505044551060463753e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.339619280032281786e-04 3.999998824873846814e+01 1.505059777264049770e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.870033176013621883e-04 3.999999720599615216e+01 1.505061502639564730e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+8.438778610773207045e-04 3.999999992423261119e+01 1.505062073695294655e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.048625698133920927e-04 3.999999998123077205e+01 1.505062086204041749e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.702544740349281025e-04 3.999999999957353225e+01 1.505062090609098069e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.040372069516519622e-03 3.999999999999438671e+01 1.505062090717198819e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.115556868837609249e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.196175064743037596e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.282619313710035413e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.375310648408646901e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.474700528370356161e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.581273038852837407e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.695547248610553741e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.818079738054885226e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.949467310117369801e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.090349897019489530e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.241413677106653124e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.403394416927094081e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.577081054833535879e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.763319543561788653e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.963016970501833151e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.177145975729444433e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.406749489316688879e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.652945810994693166e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.916934056909526958e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000003210e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
diff --git a/examples/fluids/STGRand.dat b/examples/fluids/STGRand.dat
new file mode 100644
index 0000000000..9711655715
--- /dev/null
+++ b/examples/fluids/STGRand.dat
@@ -0,0 +1,3 @@
+2 7
+1.0E0 0.0E0                0.0E0                1.4E0 0.0E0 7.071067811865475E-1 7.071067811865475E-1
+0.0E0 7.071067811865475E-1 7.071067811865475E-1 2.4E0 1.0E0 0.0E0                0.0E0
diff --git a/examples/fluids/blasius.yaml b/examples/fluids/blasius.yaml
new file mode 100644
index 0000000000..cf3056b1ed
--- /dev/null
+++ b/examples/fluids/blasius.yaml
@@ -0,0 +1,53 @@
+problem: 'blasius'
+
+implicit: true
+ts:
+  adapt_type: 'none'
+  type: 'beuler'
+  dt: 0.2e-5
+  max_time: 1.0e-3
+output_freq: 10
+
+## Linear Settings:
+degree: 1
+dm_plex_box_faces: 40,60,1
+platemesh_nDelta: 45
+
+# # Quadratic Settings:
+# degree: 2
+# dm_plex_box_faces: 20,30,1
+# platemesh:
+#   nDelta: 22
+#   growth: 1.1664 # 1.08^2
+
+stab: 'supg'
+Ctau_t: 1
+#Ctau_v: 36,60,128 is what PHASTA has for p=1,2,3
+# Linear Settings:
+Ctau_v: 36
+Ctau_C: 0.25
+Ctau_M: 0.25
+Ctau_E: 0.125
+# # Quadratic Settings:
+# Ctau_v: 60
+# Ctau_C: 0.125
+# Ctau_M: 0.125
+# Ctau_E: 0.125
+
+q_extra: 0
+
+dm_plex_box_lower: 0,0,0
+dm_plex_box_upper: 4.2e-3,4.2e-3,5.e-5
+dm_plex_dim: 3
+# Faces labeled 1=z- 2=z+ 3=y- 4=y+ 5=x+ 6=x-
+bc_slip_z: 1,2
+bc_wall: 3
+wall_comps: 1,2,3
+bc_inflow: 6
+bc_outflow: 5,4
+g: 0,0,0
+
+stg:
+  use: false
+  inflow_path: "./STGInflow_blasius.dat"
+  mean_only: true
diff --git a/examples/fluids/channel.yaml b/examples/fluids/channel.yaml
new file mode 100644
index 0000000000..0c7e89d7f5
--- /dev/null
+++ b/examples/fluids/channel.yaml
@@ -0,0 +1,19 @@
+problem: 'channel'
+mu: .01
+
+umax: 40
+implicit: true
+ts:
+  type: 'beuler'
+  adapt_type: 'none'
+  dt: 5e-8
+
+dm_plex_box_lower: 0,0,0
+dm_plex_box_upper: 1,1,.1
+dm_plex_dim: 3
+degree: 1
+dm_plex_box_faces: 10,10,1
+bc_slip_z: 1,2
+bc_wall: 3,4
+wall_comps: 1,2,3
+dm_plex_box_bd: 'periodic,none,none'
diff --git a/examples/fluids/index.md b/examples/fluids/index.md
index 5f355cb17d..d4ccbac231 100644
--- a/examples/fluids/index.md
+++ b/examples/fluids/index.md
@@ -173,7 +173,7 @@ Our formulation follows {cite}`hughesetal2010`, which offers a comprehensive rev
   \int_{\Omega} \bm v \cdot \left( \frac{\partial \bm{q}_N}{\partial t} - \bm{S}(\bm{q}_N) \right)  \,dV
   - \int_{\Omega} \nabla \bm v \!:\! \bm{F}(\bm{q}_N)\,dV & \\
   + \int_{\partial \Omega} \bm v \cdot \bm{F}(\bm{q}_N) \cdot \widehat{\bm{n}} \,dS & \\
-  + \int_{\Omega} \bm{P}(\bm v)^T \, \left( \frac{\partial \bm{q}_N}{\partial t} \, + \,
+  + \int_{\Omega} \mathcal{P}(\bm v)^T \, \left( \frac{\partial \bm{q}_N}{\partial t} \, + \,
   \nabla \cdot \bm{F} \, (\bm{q}_N) - \bm{S}(\bm{q}_N) \right) \,dV &= 0
   \, , \; \forall \bm v \in \mathcal{V}_p
   \end{aligned}
@@ -201,11 +201,13 @@ In both {eq}`eq-weak-vector-ns-su` and {eq}`eq-weak-vector-ns-supg`, $\mathcal P
 It is defined as
 
 $$
-\mathcal P(\bm v) \equiv \left(\bm{\tau} \cdot \frac{\partial \bm{F}_{\text{adv}} (\bm{q}_N)}{\partial \bm{q}_N} \right)^T \, \nabla \bm v\,,
-$$
+\mathcal P(\bm v) \equiv \bm{\tau} \left(\frac{\partial \bm{F}_{\text{adv}} (\bm{q}_N)}{\partial \bm{q}_N} \right) \, \nabla \bm v\,,
+$$ (eq-streamline-P)
 
-where parameter $\bm{\tau} \in \mathbb R^{3\times 3}$ (spatial indices) or $\bm \tau \in \mathbb R^{5\times 5}$ (field indices) is an intrinsic time scale matrix.
-This expression contains the flux Jacobian, which we express in variational notation by differentiating the advective flux $\bm F_{\text{adv}}$ of {eq}`eq-ns-flux`
+where parameter $\bm{\tau} \in \mathbb R^{3}$ (spatial index) or $\bm \tau \in \mathbb R^{5\times 5}$ (field indices) is an intrinsic time scale matrix.
+Most generally, we consider $\bm\tau \in \mathbb R^{3,5,5}$.
+This expression contains the advective flux Jacobian, which may be thought of as mapping from a 5-vector (state) to a $(5,3)$ tensor (flux) or from a $(5,3)$ tensor (gradient of state) to a 5-vector (time derivative of state); the latter is used in {eq}`eq-streamline-P` because it's applied to $\nabla\bm v$.
+The forward variational form can be readily expressed by differentiating $\bm F_{\text{adv}}$ of {eq}`eq-ns-flux`
 
 $$
 \begin{aligned}
@@ -219,14 +221,14 @@ $$
 $$
 
 where $\diff P$ is defined by differentiating {eq}`eq-state`.
-In this notation, we may equivalently write the stabilization term as
+This action is also readily computed by forward-mode AD, but since $\bm v$ is a test function, we actually need the action of the adjoint to use {eq}`eq-streamline-P` in finite element computation; that can be computed by reverse-mode AD.
+We may equivalently write the stabilization term as
 
 $$
-\mathcal P(\bm v)^T \bm r = \nabla \bm v \bm\tau \diff\bm F_{\text{adv}}(\bm r),
+\mathcal P(\bm v)^T \bm r = \nabla \bm v \tcolon \left(\frac{\partial \bm F_{\text{adv}}}{\partial \bm q}\right)^T \, \bm\tau \bm r,
 $$
 
-where $\bm r$ is the strong form residual.
-Note that both $\nabla \bm v$ and $\diff \bm F$ are $5\times 3$ matrices and that $\bm\tau$ can be defined with spatial indices, or field indices, leading to a stabilization term of $(\nabla \bm v)_{\alpha i} \tau_{ij} \diff \bm F_{\alpha j}$ for spatial or $(\nabla \bm v)_{\alpha i} \tau_{\alpha \beta} \diff \bm F_{\beta i}$ for field, where $\alpha,\beta$ are field indices and $i,j$ are spatial indices.
+where $\bm r$ is the strong form residual and $\bm\tau$ is a $5\times 5$ matrix.
 
 :::{dropdown} Stabilization scale $\bm\tau$
 A velocity vector $\bm u$ can be pulled back to the reference element as $\bm u_{\bm X} = \nabla_{\bm x}\bm X \cdot \bm u$, with units of reference length (non-dimensional) per second.
@@ -260,12 +262,32 @@ $$ (eq-test-perturbation-advdiff)
 
 See {cite}`hughesetal2010` equations 15-17 and 34-36 for further discussion of this formulation.
 
-For the Navier-Stokes and Euler equations in primitive variables, {cite}`whiting2003hierarchical` defines a $5\times 5$ diagonal stabilization consisting of
+For the Navier-Stokes and Euler equations, {cite}`whiting2003hierarchical` defines a $5\times 5$ diagonal stabilization $\mathrm{diag}(\tau_c, \tau_m, \tau_m, \tau_m, \tau_E)$ consisting of
 1. continuity stabilization $\tau_c$
 2. momentum stabilization $\tau_m$
 3. energy stabilization $\tau_E$
 
-However, since our equations are in conservative form, we follow {cite}`hughesetal2010` in defining a $3\times 3$ diagonal stabilization according to spatial criterion 2 (equation 27) as follows.
+The Navier-Stokes code in this example uses the following formulation for $\tau_c$, $\tau_m$, $\tau_E$:
+
+$$ 
+\begin{aligned}
+
+\tau_c &= \frac{C_c \mathcal{F}}{8\rho \trace(\bm g)} \\
+\tau_m &= \frac{C_m}{\mathcal{F}} \\
+\tau_E &= \frac{C_E}{\mathcal{F} c_v} \\
+\end{aligned}
+$$
+
+$$
+\mathcal{F} = \sqrt{ \rho^2 \left [ \left(\frac{2C_t}{\Delta t}\right)^2
++ \bm u \cdot (\bm u \cdot  \bm g)
++ C_v \mu^2 \Vert \bm g \Vert_F ^2\right]}
+$$
+
+where $\bm g = \nabla_{\bm x} \bm{X} \cdot \nabla_{\bm x} \bm{X}$ is the metric tensor and $\Vert \cdot \Vert_F$ is the Frobenius norm.
+This formulation is currently not available in the Euler code.
+
+In the Euler code, we follow {cite}`hughesetal2010` in defining a $3\times 3$ diagonal stabilization according to spatial criterion 2 (equation 27) as follows.
 
 $$
 \tau_{ii} = c_{\tau} \frac{2 \xi(\mathrm{Pe})}{(\lambda_{\max \text{abs}})_i \lVert \nabla_{x_i} \bm X \rVert}
@@ -351,6 +373,44 @@ $$
 where $(\bar{x}, \, \bar{y}) = (x-x_c, \, y-y_c)$, $(x_c, \, y_c)$ represents the center of the domain, $r^2=\bar{x}^2 + \bar{y}^2$, and $\epsilon$ is the vortex strength ($\epsilon$ < 10).
 There is no perturbation in the entropy $S=P/\rho^\gamma$ ($\delta S=0)$.
 
+(problem-shock-tube)=
+
+## Shock Tube
+
+This test problem is based on Sod's Shock Tube (from{cite}`sodshocktubewiki`), a canonical test case for discontinuity capturing in one dimension. For this problem, the three-dimensional Euler equations are formulated exactly as in the Isentropic Vortex problem. The default initial conditions are $P=1$, $\rho=1$ for the driver section and $P=0.1$, $\rho=0.125$ for the driven section. The initial velocity is zero in both sections. Slip boundary conditions are applied to the side walls and wall boundary conditions are applied at the end walls.
+
+SU upwinding and discontinuity capturing have been implemented into the explicit timestepping operator for this problem. Discontinuity capturing is accomplished using a modified version of the $YZ\beta$ operator described in {cite}`tezduyar2007yzb`. This discontinuity capturing scheme involves the introduction of a dissipation term of the form
+
+$$
+\int_{\Omega} \nu_{SHOCK} \nabla \bm v \!:\! \nabla \bm q dV
+$$
+
+The shock capturing viscosity is implemented following the first formulation described in {cite} `tezduyar2007yzb`. The characteristic velocity $u_{cha}$ is taken to be the acoustic speed while the reference density $\rho_{ref}$ is just the local density. Shock capturing viscosity is defined by the following
+
+$$
+\nu_{SHOCK} = \tau_{SHOCK} u_{cha}^2
+$$
+
+where,
+
+$$
+\tau_{SHOCK} = \frac{h_{SHOCK}}{2u_{cha}} \left( \frac{ \,|\, \nabla \rho \,|\, h_{SHOCK}}{\rho_{ref}} \right)^{\beta}
+$$
+
+$\beta$ is a tuning parameter set between 1 (smoother shocks) and 2 (sharper shocks. The parameter $h_{SHOCK}$ is a length scale that is proportional to the element length in the direction of the density gradient unit vector. This density gradient unit vector is defined as $\hat{\bm j} = \frac{\nabla \rho}{|\nabla \rho|}$. The original formulation of Tezduyar and Senga relies on the shape function gradient to define the element length scale, but this gradient is not available to qFunctions in libCEED. To avoid this problem, $h_{SHOCK}$ is defined in the current implementation as
+
+$$
+h_{SHOCK} = 2 \left( C_{YZB} \,|\, \bm p \,|\, \right)^{-1}
+$$
+
+where
+
+$$
+p_k = \hat{j}_i \frac{\partial \xi_i}{x_k}
+$$
+
+The constant $C_{YZB}$ is set to 0.1 for piecewise linear elements in the current implementation. Larger values approaching unity are expected with more robust stabilization and implicit timestepping.
+
 (problem-density-current)=
 
 ## Density Current
@@ -364,3 +424,223 @@ $$
 
 where $P_0$ is the atmospheric pressure.
 For this problem, we have used no-slip and non-penetration boundary conditions for $\bm{u}$, and no-flux for mass and energy densities.
+
+## Channel
+
+A compressible channel flow. Analytical solution given in
+{cite}`whitingStabilizedFEM1999`:
+
+$$ u_1 = u_{\max} \left [ 1 - \left ( \frac{x_2}{H}\right)^2 \right] \quad \quad u_2 = u_3 = 0$$
+$$T = T_w \left [ 1 + \frac{Pr \hat{E}c}{3} \left \{1 - \left(\frac{x_2}{H}\right)^4  \right \} \right]$$
+$$p = p_0 - \frac{2\rho_0 u_{\max}^2 x_1}{Re_H H}$$
+
+where $H$ is the channel half-height, $u_{\max}$ is the center velocity, $T_w$ is the temperature at the wall, $Pr=\frac{\mu}{c_p \kappa}$ is the Prandlt number, $\hat E_c = \frac{u_{\max}^2}{c_p T_w}$ is the modified Eckert number, and $Re_h = \frac{u_{\max}H}{\nu}$ is the Reynolds number.
+
+Boundary conditions are periodic in the streamwise direction, and no-slip and non-penetration boundary conditions at the walls.
+The flow is driven by a body force.
+
+## Flat Plate Boundary Layer
+
+### Laminar Boundary Layer - Blasius
+
+Simulation of a laminar boundary layer flow, with the inflow being prescribed
+by a [Blasius similarity
+solution](https://en.wikipedia.org/wiki/Blasius_boundary_layer). At the inflow,
+the velocity is prescribed by the Blasius soution profile, density is set
+constant, and temperature is allowed to float. Using `weakT: true`, density is
+allowed to float and temperature is set constant. At the outlet, a user-set
+pressure is used for pressure in the inviscid flux terms (all other inviscid
+flux terms use interior solution values). The viscous traction is also set to
+the analytic Blasius profile value at both the inflow and the outflow. The wall
+is a no-slip, no-penetration, no-heat flux condition. The top of the domain is
+treated as an outflow and is tilted at a downward angle to ensure that flow is
+always exiting it.
+
+### Turbulent Boundary Layer
+
+Simulating a turbulent boundary layer without modeling the turbulence requires
+resolving the turbulent flow structures. These structures may be introduced
+into the simulations either by allowing a laminar boundary layer naturally
+transition to turbulence, or imposing turbulent structures at the inflow. The
+latter approach has been taken here, specifically using a *synthetic turbulence
+generation* (STG) method.
+
+#### Synthetic Turbulence Generation (STG) Boundary Condition
+
+We use the STG method described in
+{cite}`shurSTG2014`. Below follows a re-description of the formulation to match
+the present notation, and then a description of the implementation and usage.
+
+##### Equation Formulation
+
+$$
+\bm{u}(\bm{x}, t) = \bm{\overline{u}}(\bm{x}) + \bm{C}(\bm{x}) \cdot \bm{v}'
+$$
+
+$$
+\begin{aligned}
+\bm{v}' &= 2 \sqrt{3/2} \sum^N_{n=1} \sqrt{q^n(\bm{x})} \bm{\sigma}^n \cos(\kappa^n \bm{d}^n \cdot \bm{\hat{x}}^n(\bm{x}, t) + \phi^n ) \\
+\bm{\hat{x}}^n &= \left[(x - U_0 t)\max(2\kappa_{\min}/\kappa^n, 0.1) , y, z  \right]^T
+\end{aligned}
+$$
+
+Here, we define the number of wavemodes $N$, set of random numbers $ \{\bm{\sigma}^n,
+\bm{d}^n, \phi^n\}_{n=1}^N$, the Cholesky decomposition of the Reynolds stress
+tensor $\bm{C}$ (such that $\bm{R} = \bm{CC}^T$ ), bulk velocity $U_0$,
+wavemode amplitude $q^n$, wavemode frequency $\kappa^n$, and $\kappa_{\min} =
+0.5 \min_{\bm{x}} (\kappa_e)$.
+
+$$
+\kappa_e = \frac{2\pi}{\min(2d_w, 3.0 l_t)}
+$$
+
+where $l_t$ is the turbulence length scale, and $d_w$ is the distance to the
+nearest wall.
+
+
+The set of wavemode frequencies is defined by a geometric distribution:
+
+$$
+\kappa^n = \kappa_{\min} (1 + \alpha)^{n-1} \ , \quad \forall n=1, 2, ... , N
+$$
+
+The wavemode amplitudes $q^n$ are defined by a model energy spectrum $E(\kappa)$:
+
+$$
+q^n = \frac{E(\kappa^n) \Delta \kappa^n}{\sum^N_{n=1} E(\kappa^n)\Delta \kappa^n} \ ,\quad \Delta \kappa^n = \kappa^n - \kappa^{n-1}
+$$
+
+$$ E(\kappa) = \frac{(\kappa/\kappa_e)^4}{[1 + 2.4(\kappa/\kappa_e)^2]^{17/6}} f_\eta f_{\mathrm{cut}} $$
+
+$$
+f_\eta = \exp \left[-(12\kappa /\kappa_\eta)^2 \right], \quad
+f_\mathrm{cut} = \exp \left( - \left [ \frac{4\max(\kappa-0.9\kappa_\mathrm{cut}, 0)}{\kappa_\mathrm{cut}} \right]^3 \right)
+$$
+
+$\kappa_\eta$ represents turbulent dissipation frequency, and is given as $2\pi
+(\nu^3/\varepsilon)^{-1/4}$ with $\nu$ the kinematic viscosity and
+$\varepsilon$ the turbulent dissipation. $\kappa_\mathrm{cut}$ approximates the
+effective cutoff frequency of the mesh (viewing the mesh as a filter on
+solution over $\Omega$) and is given by:
+
+$$
+\kappa_\mathrm{cut} = \frac{2\pi}{ 2\min\{ [\max(h_y, h_z, 0.3h_{\max}) + 0.1 d_w], h_{\max} \} }
+$$
+
+The enforcement of the boundary condition is identical to the blasius inflow;
+it weakly enforces velocity, with the option of weakly enforcing either density
+or temperature using the the `-weakT` flag.
+
+##### Initialization Data Flow
+
+Data flow for initializing function (which creates the context data struct) is
+given below:
+```{mermaid}
+flowchart LR
+    subgraph STGInflow.dat
+    y
+    lt[l_t]
+    eps
+    Rij[R_ij]
+    ubar
+    end
+
+    subgraph STGRand.dat
+    rand[RN Set];
+    end
+
+    subgraph User Input
+    u0[U0];
+    end
+
+    subgraph init[Create Context Function]
+    ke[k_e]
+    N;
+    end
+    lt --Calc-->ke --Calc-->kn
+    y --Calc-->ke
+
+    subgraph context[Context Data]
+    yC[y]
+    randC[RN Set]
+    Cij[C_ij]
+    u0 --Copy--> u0C[U0]
+    kn[k^n];
+    ubarC[ubar]
+    ltC[l_t]
+    epsC[eps]
+    end
+    ubar --Copy--> ubarC;
+    y --Copy--> yC;
+    lt --Copy--> ltC;
+    eps --Copy--> epsC;
+
+    rand --Copy--> randC;
+    rand --> N --Calc--> kn;
+    Rij --Calc--> Cij[C_ij]
+```
+
+This is done once at runtime. The spatially-varying terms are then evaluated at
+each quadrature point on-the-fly, either by interpolation (for $l_t$,
+$\varepsilon$, $C_{ij}$, and $\overline{\bm u}$) or by calculation (for $q^n$).
+
+The `STGInflow.dat` file is a table of values at given distances from the wall.
+These values are then interpolated to a physical location (node or quadrature
+point). It has the following format:
+```
+[Total number of locations] 14
+[d_w] [u_1] [u_2] [u_3] [R_11] [R_22] [R_33] [R_12] [R_13] [R_23] [sclr_1] [sclr_2] [l_t] [eps]
+```
+where each `[  ]` item is a number in scientific notation (ie. `3.1415E0`), and `sclr_1` and
+`sclr_2` are reserved for turbulence modeling variables. They are not used in
+this example.
+
+The `STGRand.dat` file is the table of the random number set, $\{\bm{\sigma}^n,
+\bm{d}^n, \phi^n\}_{n=1}^N$. It has the format:
+```
+[Number of wavemodes] 7
+[d_1] [d_2] [d_3] [phi] [sigma_1] [sigma_2] [sigma_3]
+```
+
+The following table is presented to help clarify the dimensionality of the
+numerous terms in the STG formulation.
+
+| Math            | Label  | $f(\bm{x})$? | $f(n)$? |
+|-----------------|--------|--------------|---------|
+| $ \{\bm{\sigma}^n, \bm{d}^n, \phi^n\}_{n=1}^N$        | RN Set | No           | Yes     |
+| $\bm{\overline{u}}$ | ubar | Yes | No |
+| $U_0$           | U0     | No           | No      |
+| $l_t$           | l_t    | Yes          | No   |
+| $\varepsilon$   | eps    | Yes          | No   |
+| $\bm{R}$        | R_ij   | Yes          | No      |
+| $\bm{C}$        | C_ij   | Yes          | No      |
+| $q^n$           | q^n    | Yes           | Yes     |
+| $\{\kappa^n\}_{n=1}^N$ | k^n  | No           | Yes      |
+| $h_i$           | h_i    | Yes          | No   |
+| $d_w$           | d_w    | Yes          | No   |
+
+### Meshing
+
+The flat plate boundary layer example has custom meshing features to better
+resolve the flow. One of those is tilting the top of the domain, allowing for
+it to be a outflow boundary condition. The angle of this tilt is controled by
+`-platemesh_top_angle`
+
+The primary meshing feature is the ability to grade the mesh, providing better
+resolution near the wall. There are two methods to do this; algorithmically, or
+specifying the node locations via a file. Algorithmically, a base node
+distribution is defined at the inlet (assumed to be $\min(x)$) and then
+linearly stretched/squeezed to match the slanted top boundary condition. Nodes
+are placed such that `-platemesh_Ndelta` elements are within
+`-platemesh_refine_height` of the wall. They are placed such that the element
+height matches a geometric growth ratio defined by `-platemesh_growth`. The
+remaining elements are then distributed from `-platemesh_refine_height` to the
+top of the domain linearly in logarithmic space.
+
+Alternatively, a file may be specified containing the locations of each node.
+The file should be newline delimited, with the first line specifying the number
+of points and the rest being the locations of the nodes. The node locations
+used exactly at the inlet (assumed to be $\min(x)$) and linearly
+stretched/squeezed to match the slanted top boundary condition. The file is
+specified via `-platemesh_y_node_locs_path`. If this flag is given an empty
+string, then the algorithmic approach will be performed.
diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index c3f638c5b7..05d4a75bc8 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -22,8 +22,9 @@
 //     ./navierstokes -ceed /cpu/self -problem density_current -degree 1
 //     ./navierstokes -ceed /gpu/cuda -problem advection -degree 1
 //
-//TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin
-//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin
+//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin
+//TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin
+//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin
 //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin
 //TESTARGS(name="adv_rotation_implicit_sharp_cylinder") -ceed {ceed_resource} -test -problem advection -bubble_type cylinder -bubble_continuity back_sharp -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_Slip_z 1,2 -bc_wall 3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin
 //TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin
@@ -33,6 +34,9 @@
 //TESTARGS(name="adv2d_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -wind_type translation -wind_translation .53,-1.33,0 -bc_inflow 1,2,3,4 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin
 //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin
 //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin
+//TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin
+//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin
+//TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT
 
 /// @file
 /// Navier-Stokes example using PETSc
@@ -67,9 +71,6 @@ int main(int argc, char **argv) {
   SimpleBC bc;
   ierr = PetscCalloc1(1, &bc); CHKERRQ(ierr);
 
-  SetupContext setup_ctx;
-  ierr = PetscCalloc1(1, &setup_ctx); CHKERRQ(ierr);
-
   Physics phys_ctx;
   ierr = PetscCalloc1(1, &phys_ctx); CHKERRQ(ierr);
 
@@ -127,21 +128,21 @@ int main(int argc, char **argv) {
   // Choose the problem from the list of registered problems
   // ---------------------------------------------------------------------------
   {
-    PetscErrorCode (*p)(ProblemData *, DM, void *, void *);
+    PetscErrorCode (*p)(ProblemData *, DM, void *);
     ierr = PetscFunctionListFind(app_ctx->problems, app_ctx->problem_name, &p);
     CHKERRQ(ierr);
     if (!p) SETERRQ(PETSC_COMM_SELF, 1, "Problem '%s' not found",
                       app_ctx->problem_name);
-    ierr = (*p)(problem, dm, &setup_ctx, &user); CHKERRQ(ierr);
+    ierr = (*p)(problem, dm, &user); CHKERRQ(ierr);
   }
 
   // -- Set up DM
-  ierr = SetUpDM(dm, problem, app_ctx->degree, bc, phys_ctx, setup_ctx);
+  ierr = SetUpDM(dm, problem, app_ctx->degree, bc, phys_ctx);
   CHKERRQ(ierr);
 
   // -- Refine DM for high-order viz
   if (app_ctx->viz_refine) {
-    ierr = VizRefineDM(dm, user, problem, bc, phys_ctx, setup_ctx);
+    ierr = VizRefineDM(dm, user, problem, bc, phys_ctx);
     CHKERRQ(ierr);
   }
 
@@ -152,10 +153,6 @@ int main(int argc, char **argv) {
   ierr = SetupLibceed(ceed, ceed_data, dm, user, app_ctx, problem, bc);
   CHKERRQ(ierr);
 
-  // -- Set up context for QFunctions
-  ierr = problem->setup_ctx(ceed, ceed_data, app_ctx, setup_ctx, phys_ctx);
-  CHKERRQ(ierr);
-
   // ---------------------------------------------------------------------------
   // Set up ICs
   // ---------------------------------------------------------------------------
@@ -169,7 +166,7 @@ int main(int argc, char **argv) {
   ierr = DMGetLocalVector(dm, &Q_loc); CHKERRQ(ierr);
 
   // -- Fix multiplicity for ICs
-  ierr = ICs_FixMultiplicity(dm, ceed_data, Q_loc, Q, 0.0); CHKERRQ(ierr);
+  ierr = ICs_FixMultiplicity(dm, ceed_data, user, Q_loc, Q, 0.0); CHKERRQ(ierr);
 
   // ---------------------------------------------------------------------------
   // Set up lumped mass matrix
@@ -222,7 +219,7 @@ int main(int argc, char **argv) {
                        host_name, comm_size); CHKERRQ(ierr);
 
     // Problem specific info
-    ierr = problem->print_info(phys_ctx, setup_ctx, app_ctx); CHKERRQ(ierr);
+    ierr = problem->print_info(problem, app_ctx); CHKERRQ(ierr);
 
     // libCEED
     const char *used_resource;
@@ -259,11 +256,11 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (P)       : %d\n"
                        "    Number of 1D Quadrature Points (Q) : %d\n"
-                       "    Global DoFs                        : %D\n"
-                       "    Owned DoFs                         : %D\n"
-                       "    DoFs per node                      : %D\n"
-                       "    Global nodes                       : %D\n"
-                       "    Owned nodes                        : %D\n",
+                       "    Global DoFs                        : %" PetscInt_FMT "\n"
+                       "    Owned DoFs                         : %" PetscInt_FMT "\n"
+                       "    DoFs per node                      : %" PetscInt_FMT "\n"
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT "\n",
                        num_P, num_Q, glob_dofs, owned_dofs, num_comp_q,
                        glob_nodes, owned_nodes); CHKERRQ(ierr);
   }
@@ -281,7 +278,7 @@ int main(int argc, char **argv) {
   // ---------------------------------------------------------------------------
   // Post-processing
   // ---------------------------------------------------------------------------
-  ierr = PostProcess_NS(ts, ceed_data, dm, problem, app_ctx, Q, final_time);
+  ierr = PostProcess_NS(ts, ceed_data, dm, problem, user, Q, final_time);
   CHKERRQ(ierr);
 
   // ---------------------------------------------------------------------------
@@ -294,12 +291,6 @@ int main(int argc, char **argv) {
   CeedVectorDestroy(&user->q_dot_ceed);
   CeedVectorDestroy(&user->g_ceed);
 
-  // -- Contexts
-  CeedQFunctionContextDestroy(&ceed_data->setup_context);
-  CeedQFunctionContextDestroy(&ceed_data->newt_ig_context);
-  CeedQFunctionContextDestroy(&ceed_data->advection_context);
-  CeedQFunctionContextDestroy(&ceed_data->euler_context);
-
   // -- QFunctions
   CeedQFunctionDestroy(&ceed_data->qf_setup_vol);
   CeedQFunctionDestroy(&ceed_data->qf_ics);
@@ -352,15 +343,13 @@ int main(int argc, char **argv) {
   // -- Function list
   ierr = PetscFunctionListDestroy(&app_ctx->problems); CHKERRQ(ierr);
 
+  ierr = PetscFree(problem->bc_ctx); CHKERRQ(ierr);
+
   // -- Structs
   ierr = PetscFree(units); CHKERRQ(ierr);
   ierr = PetscFree(user); CHKERRQ(ierr);
   ierr = PetscFree(problem); CHKERRQ(ierr);
   ierr = PetscFree(bc); CHKERRQ(ierr);
-  ierr = PetscFree(setup_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->newtonian_ig_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->euler_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->advection_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx); CHKERRQ(ierr);
   ierr = PetscFree(app_ctx); CHKERRQ(ierr);
   ierr = PetscFree(ceed_data); CHKERRQ(ierr);
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 973c0efd1b..9275ea4f38 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -14,6 +14,7 @@
 #include <petscsys.h>
 #include <petscts.h>
 #include <stdbool.h>
+#include "qfunctions/stabilization_types.h"
 
 // -----------------------------------------------------------------------------
 // PETSc Version
@@ -85,11 +86,6 @@ static const char *const EulerTestTypes[] = {
 };
 
 // Stabilization methods
-typedef enum {
-  STAB_NONE = 0,
-  STAB_SU   = 1, // Streamline Upwind
-  STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
-} StabilizationType;
 static const char *const StabilizationTypes[] = {
   "none",
   "SU",
@@ -131,8 +127,6 @@ struct AppCtx_private {
 // libCEED data struct
 struct CeedData_private {
   CeedVector           x_coord, q_data;
-  CeedQFunctionContext setup_context, newt_ig_context, advection_context,
-                       euler_context;
   CeedQFunction        qf_setup_vol, qf_ics, qf_rhs_vol, qf_ifunction_vol,
                        qf_setup_sur, qf_apply_inflow, qf_apply_outflow;
   CeedBasis            basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur;
@@ -182,98 +176,8 @@ struct SimpleBC_private {
   PetscBool user_bc;
 };
 
-// Initial conditions
-#ifndef setup_context_struct
-#define setup_context_struct
-typedef struct SetupContext_ *SetupContext;
-struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
-  CeedScalar time;
-  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
-};
-#endif
-
-// DENSITY_CURRENT
-#ifndef dc_context_struct
-#define dc_context_struct
-typedef struct DCContext_ *DCContext;
-struct DCContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar c_tau;
-  int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
-};
-#endif
-
-// EULER_VORTEX
-#ifndef euler_context_struct
-#define euler_context_struct
-typedef struct EulerContext_ *EulerContext;
-struct EulerContext_ {
-  CeedScalar center[3];
-  CeedScalar curr_time;
-  CeedScalar vortex_strength;
-  CeedScalar c_tau;
-  CeedScalar mean_velocity[3];
-  bool implicit;
-  int euler_test;
-  int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
-};
-#endif
-
-// ADVECTION and ADVECTION2D
-#ifndef advection_context_struct
-#define advection_context_struct
-typedef struct AdvectionContext_ *AdvectionContext;
-struct AdvectionContext_ {
-  CeedScalar CtauS;
-  CeedScalar strong_form;
-  CeedScalar E_wind;
-  bool implicit;
-  int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
-};
-#endif
-
-// Newtonian Ideal Gas
-#ifndef newtonian_context_struct
-#define newtonian_context_struct
-typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
-struct NewtonianIdealGasContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar c_tau;
-  StabilizationType stabilization;
-};
-#endif
-
 // Struct that contains all enums and structs used for the physics of all problems
 struct Physics_private {
-  NewtonianIdealGasContext newtonian_ig_ctx;
-  EulerContext             euler_ctx;
-  AdvectionContext         advection_ctx;
   WindType                 wind_type;
   BubbleType               bubble_type;
   BubbleContinuityType     bubble_continuity_type;
@@ -282,80 +186,71 @@ struct Physics_private {
   PetscBool                implicit;
   PetscBool                has_curr_time;
   PetscBool                has_neumann;
+  CeedContextFieldLabel    solution_time_label;
+  CeedContextFieldLabel    timestep_size_label;
+  CeedContextFieldLabel    ics_time_label;
 };
 
+typedef struct {
+  CeedQFunctionUser    qfunction;
+  const char           *qfunction_loc;
+  CeedQFunctionContext qfunction_context;
+} ProblemQFunctionSpec;
+
 // Problem specific data
 // *INDENT-OFF*
-typedef struct {
+typedef struct ProblemData_private ProblemData;
+struct ProblemData_private {
   CeedInt           dim, q_data_size_vol, q_data_size_sur;
   CeedScalar        dm_scale;
-  CeedQFunctionUser setup_vol, setup_sur, ics, apply_vol_rhs, apply_vol_ifunction,
-                    apply_inflow, apply_outflow;
-  const char        *setup_vol_loc, *setup_sur_loc, *ics_loc,
-                    *apply_vol_rhs_loc, *apply_vol_ifunction_loc, *apply_inflow_loc, *apply_outflow_loc;
+  ProblemQFunctionSpec setup_vol, setup_sur, ics, apply_vol_rhs, apply_vol_ifunction,
+    apply_inflow, apply_outflow;
   bool              non_zero_time;
   PetscErrorCode    (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
                           PetscScalar[], void *);
-  PetscErrorCode    (*setup_ctx)(Ceed, CeedData, AppCtx, SetupContext, Physics);
-  PetscErrorCode    (*print_info)(Physics, SetupContext, AppCtx);
-} ProblemData;
+  void *bc_ctx;
+  PetscErrorCode    (*print_info)(ProblemData*, AppCtx);
+};
 // *INDENT-ON*
 
+extern int FreeContextPetsc(void *);
+
 // -----------------------------------------------------------------------------
 // Set up problems
 // -----------------------------------------------------------------------------
 // Set up function for each problem
+extern PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm,
+                                 void *ctx);
+extern PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm,
+                                 void *ctx);
 extern PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm,
-                                      void *setup_ctx, void *ctx);
+                                      void *ctx);
 extern PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm,
-    void *setup_ctx, void *ctx);
+    void *ctx);
+
 extern PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm,
-                                      void *setup_ctx, void *ctx);
-extern PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
+                                      void *ctx);
+extern PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm,
+                                   void *ctx);
+extern PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
                                    void *ctx);
 extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm,
-                                     void *setup_ctx, void *ctx);
-
-// Set up context for each problem
-extern PetscErrorCode SetupContext_NEWTONIAN_IG(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_DENSITY_CURRENT(Ceed ceed,
-    CeedData ceed_data, AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_EULER_VORTEX(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_ADVECTION(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_ADVECTION2D(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-// Boundary condition function for each problem
-extern PetscErrorCode BC_DENSITY_CURRENT(DM dm, SimpleBC bc, Physics phys,
-    void *setup_ctx);
-
-extern PetscErrorCode BC_EULER_VORTEX(DM dm, SimpleBC bc, Physics phys,
-                                      void *setup_ctx);
-
-extern PetscErrorCode BC_ADVECTION(DM dm, SimpleBC bc, Physics phys,
-                                   void *setup_ctx);
-
-extern PetscErrorCode BC_ADVECTION2D(DM dm, SimpleBC bc, Physics phys,
-                                     void *setup_ctx);
+                                     void *ctx);
 
 // Print function for each problem
-extern PetscErrorCode PRINT_DENSITY_CURRENT(Physics phys,
-    SetupContext setup_ctx, AppCtx app_ctx);
+extern PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
+    AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_EULER_VORTEX(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem,
     AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem,
+                                      AppCtx app_ctx);
+
+extern PetscErrorCode PRINT_ADVECTION(ProblemData *problem,
                                       AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_ADVECTION2D(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem,
                                         AppCtx app_ctx);
 
 // -----------------------------------------------------------------------------
@@ -416,11 +311,11 @@ PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm);
 
 // Set up DM
 PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
-                       SimpleBC bc, Physics phys, void *setup_ctx);
+                       SimpleBC bc, Physics phys);
 
 // Refine DM for high-order viz
 PetscErrorCode VizRefineDM(DM dm, User user, ProblemData *problem,
-                           SimpleBC bc, Physics phys, void *setup_ctx);
+                           SimpleBC bc, Physics phys);
 
 // -----------------------------------------------------------------------------
 // Process command line options
@@ -435,7 +330,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
 // -----------------------------------------------------------------------------
 // Miscellaneous utility functions
 // -----------------------------------------------------------------------------
-PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, Vec Q_loc, Vec Q,
+PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user,
+                                   Vec Q_loc, Vec Q,
                                    CeedScalar time);
 
 PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm,
@@ -446,12 +342,12 @@ PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm,
 PetscErrorCode RegressionTests_NS(AppCtx app_ctx, Vec Q);
 
 // Get error for problems with exact solutions
-PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
+PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, User user, Vec Q,
                            PetscScalar final_time);
 
 // Post-processing
 PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
-                              ProblemData *problem, AppCtx app_ctx,
+                              ProblemData *problem, User user,
                               Vec Q, PetscScalar final_time);
 
 // -- Gather initial Q values in case of continuation of simulation
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index 78e6f4f314..830b41fad9 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -12,44 +12,47 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/advection.h"
 
-PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
+PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
                             void *ctx) {
   WindType             wind_type;
   BubbleType           bubble_type;
   BubbleContinuityType bubble_continuity_type;
   StabilizationType    stab;
-  SetupContext         setup_context = *(SetupContext *)setup_ctx;
+  SetupContext         setup_context;
   User                 user = *(User *)ctx;
   MPI_Comm             comm = PETSC_COMM_WORLD;
   PetscBool            implicit;
   PetscBool            has_curr_time = PETSC_FALSE;
   PetscInt             ierr;
-  PetscFunctionBeginUser;
+  AdvectionContext     advection_ctx;
+  CeedQFunctionContext advection_context;
 
-  ierr = PetscCalloc1(1, &user->phys->advection_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &advection_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP ADVECTION
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->ics                     = ICsAdvection;
-  problem->ics_loc                 = ICsAdvection_loc;
-  problem->apply_vol_rhs           = Advection;
-  problem->apply_vol_rhs_loc       = Advection_loc;
-  problem->apply_vol_ifunction     = IFunction_Advection;
-  problem->apply_vol_ifunction_loc = IFunction_Advection_loc;
-  problem->apply_inflow            = Advection_InOutFlow;
-  problem->apply_inflow_loc        = Advection_InOutFlow_loc;
-  problem->bc                      = Exact_Advection;
-  problem->setup_ctx               = SetupContext_ADVECTION;
-  problem->non_zero_time           = PETSC_FALSE;
-  problem->print_info              = PRINT_ADVECTION;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 10;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->ics.qfunction                     = ICsAdvection;
+  problem->ics.qfunction_loc                 = ICsAdvection_loc;
+  problem->apply_vol_rhs.qfunction           = Advection;
+  problem->apply_vol_rhs.qfunction_loc       = Advection_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Advection;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Advection_loc;
+  problem->apply_inflow.qfunction            = Advection_InOutFlow;
+  problem->apply_inflow.qfunction_loc        = Advection_InOutFlow_loc;
+  problem->bc                                = Exact_Advection;
+  problem->bc_ctx                            = setup_context;
+  problem->non_zero_time                     = PETSC_FALSE;
+  problem->print_info                        = PRINT_ADVECTION;
 
   // ------------------------------------------------------
   //             Create the libCEED context
@@ -61,7 +64,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   PetscReal wind[3]      = {1., 0, 0}; // m/s
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
 
   // ------------------------------------------------------
@@ -75,8 +78,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for ADVECTION problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for ADVECTION problem", NULL);
   // -- Physics
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
@@ -148,7 +150,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
     CHKERRQ(ierr);
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
@@ -167,7 +169,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   // -- Scale variables to desired units
   E_wind *= Joule;
   rc = fabs(rc) * meter;
-  for (int i=0; i<3; i++) {
+  for (PetscInt i=0; i<3; i++) {
     wind[i] *= (meter/second);
     domain_size[i] *= meter;
   }
@@ -194,43 +196,41 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   //  if passed correctly
   user->phys->implicit                     = implicit;
   user->phys->has_curr_time                = has_curr_time;
-  user->phys->advection_ctx->CtauS         = CtauS;
-  user->phys->advection_ctx->E_wind        = E_wind;
-  user->phys->advection_ctx->implicit      = implicit;
-  user->phys->advection_ctx->strong_form   = strong_form;
-  user->phys->advection_ctx->stabilization = stab;
+  advection_ctx->CtauS         = CtauS;
+  advection_ctx->E_wind        = E_wind;
+  advection_ctx->implicit      = implicit;
+  advection_ctx->strong_form   = strong_form;
+  advection_ctx->stabilization = stab;
 
-  PetscFunctionReturn(0);
-}
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
 
-PetscErrorCode SetupContext_ADVECTION(Ceed ceed, CeedData ceed_data,
-                                      AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-  CeedQFunctionContextCreate(ceed, &ceed_data->advection_context);
-  CeedQFunctionContextSetData(ceed_data->advection_context, CEED_MEM_HOST,
+  CeedQFunctionContextCreate(user->ceed, &advection_context);
+  CeedQFunctionContextSetData(advection_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*phys->advection_ctx), phys->advection_ctx);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->advection_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->advection_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow,
-                            ceed_data->advection_context);
+                              sizeof(*advection_ctx), advection_ctx);
+  CeedQFunctionContextSetDataDestroy(advection_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  problem->apply_vol_rhs.qfunction_context = advection_context;
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_inflow.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
-                               AppCtx app_ctx) {
+PetscErrorCode PRINT_ADVECTION(ProblemData *problem, AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
-  PetscFunctionBeginUser;
+  SetupContext   setup_ctx;
+  AdvectionContext advection_ctx;
 
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->ics.qfunction_context,
+                              CEED_MEM_HOST, &setup_ctx);
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &advection_ctx);
   ierr = PetscPrintf(comm,
                      "  Problem:\n"
                      "    Problem Name                       : %s\n"
@@ -238,16 +238,20 @@ PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
                      "    Bubble Type                        : %s (%dD)\n"
                      "    Bubble Continuity                  : %s\n"
                      "    Wind Type                          : %s\n",
-                     app_ctx->problem_name, StabilizationTypes[phys->stab],
-                     BubbleTypes[phys->bubble_type],
-                     phys->bubble_type == BUBBLE_SPHERE ? 3 : 2,
-                     BubbleContinuityTypes[phys->bubble_continuity_type],
-                     WindTypes[phys->wind_type]); CHKERRQ(ierr);
+                     app_ctx->problem_name, StabilizationTypes[advection_ctx->stabilization],
+                     BubbleTypes[setup_ctx->bubble_type],
+                     setup_ctx->bubble_type == BUBBLE_SPHERE ? 3 : 2,
+                     BubbleContinuityTypes[setup_ctx->bubble_continuity_type],
+                     WindTypes[setup_ctx->wind_type]); CHKERRQ(ierr);
 
-  if (phys->wind_type == WIND_TRANSLATION) {
+  if (setup_ctx->wind_type == WIND_TRANSLATION) {
     ierr = PetscPrintf(comm,
                        "    Background Wind                    : %f,%f,%f\n",
                        setup_ctx->wind[0], setup_ctx->wind[1], setup_ctx->wind[2]); CHKERRQ(ierr);
   }
+  CeedQFunctionContextRestoreData(problem->ics.qfunction_context,
+                                  &setup_ctx);
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &advection_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index f8014956c1..44f6e3547e 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -12,42 +12,45 @@
 #include "../qfunctions/setupgeo2d.h"
 #include "../qfunctions/advection2d.h"
 
-PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
-                              void *ctx) {
+PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx) {
   WindType          wind_type;
   StabilizationType stab;
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
+  SetupContext      setup_context;
   User              user = *(User *)ctx;
   MPI_Comm          comm = PETSC_COMM_WORLD;
   PetscBool         implicit;
   PetscBool         has_curr_time = PETSC_FALSE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  AdvectionContext     advection_ctx;
+  CeedQFunctionContext advection_context;
+
 
-  ierr = PetscCalloc1(1, &user->phys->advection_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &advection_ctx); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP ADVECTION2D
   // ------------------------------------------------------
-  problem->dim                     = 2;
-  problem->q_data_size_vol         = 5;
-  problem->q_data_size_sur         = 3;
-  problem->setup_vol               = Setup2d;
-  problem->setup_vol_loc           = Setup2d_loc;
-  problem->setup_sur               = SetupBoundary2d;
-  problem->setup_sur_loc           = SetupBoundary2d_loc;
-  problem->ics                     = ICsAdvection2d;
-  problem->ics_loc                 = ICsAdvection2d_loc;
-  problem->apply_vol_rhs           = Advection2d;
-  problem->apply_vol_rhs_loc       = Advection2d_loc;
-  problem->apply_vol_ifunction     = IFunction_Advection2d;
-  problem->apply_vol_ifunction_loc = IFunction_Advection2d_loc;
-  problem->apply_inflow            = Advection2d_InOutFlow;
-  problem->apply_inflow_loc        = Advection2d_InOutFlow_loc;
-  problem->bc                      = Exact_Advection2d;
-  problem->setup_ctx               = SetupContext_ADVECTION2D;
-  problem->non_zero_time           = PETSC_TRUE;
-  problem->print_info              = PRINT_ADVECTION2D;
+  problem->dim                               = 2;
+  problem->q_data_size_vol                   = 5;
+  problem->q_data_size_sur                   = 3;
+  problem->setup_vol.qfunction               = Setup2d;
+  problem->setup_vol.qfunction_loc           = Setup2d_loc;
+  problem->setup_sur.qfunction               = SetupBoundary2d;
+  problem->setup_sur.qfunction_loc           = SetupBoundary2d_loc;
+  problem->ics.qfunction                     = ICsAdvection2d;
+  problem->ics.qfunction_loc                 = ICsAdvection2d_loc;
+  problem->apply_vol_rhs.qfunction           = Advection2d;
+  problem->apply_vol_rhs.qfunction_loc       = Advection2d_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Advection2d;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Advection2d_loc;
+  problem->apply_inflow.qfunction            = Advection2d_InOutFlow;
+  problem->apply_inflow.qfunction_loc        = Advection2d_InOutFlow_loc;
+  problem->bc                                = Exact_Advection2d;
+  problem->bc_ctx                            = setup_context;
+  problem->non_zero_time                     = PETSC_TRUE;
+  problem->print_info                        = PRINT_ADVECTION2D;
 
   // ------------------------------------------------------
   //             Create the libCEED context
@@ -59,7 +62,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   PetscReal wind[2]      = {1., 0.};   // m/s
   PetscReal domain_min[2], domain_max[2], domain_size[2];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<2; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<2; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
 
   // ------------------------------------------------------
@@ -73,8 +76,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for ADVECTION2D problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for ADVECTION2D problem", NULL);
   // -- Physics
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
@@ -130,7 +132,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
     CHKERRQ(ierr);
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
@@ -149,7 +151,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   // -- Scale variables to desired units
   E_wind *= Joule;
   rc = fabs(rc) * meter;
-  for (int i=0; i<2; i++) {
+  for (PetscInt i=0; i<2; i++) {
     wind[i] *= (meter/second);
     domain_size[i] *= meter;
   }
@@ -169,55 +171,58 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   user->phys->wind_type                    = wind_type;
   user->phys->implicit                     = implicit;
   user->phys->has_curr_time                = has_curr_time;
-  user->phys->advection_ctx->CtauS         = CtauS;
-  user->phys->advection_ctx->E_wind        = E_wind;
-  user->phys->advection_ctx->implicit      = implicit;
-  user->phys->advection_ctx->strong_form   = strong_form;
-  user->phys->advection_ctx->stabilization = stab;
-
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_ADVECTION2D(Ceed ceed, CeedData ceed_data,
-                                        AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-  CeedQFunctionContextCreate(ceed, &ceed_data->advection_context);
-  CeedQFunctionContextSetData(ceed_data->advection_context, CEED_MEM_HOST,
+  advection_ctx->CtauS         = CtauS;
+  advection_ctx->E_wind        = E_wind;
+  advection_ctx->implicit      = implicit;
+  advection_ctx->strong_form   = strong_form;
+  advection_ctx->stabilization = stab;
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+
+  CeedQFunctionContextCreate(user->ceed, &advection_context);
+  CeedQFunctionContextSetData(advection_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*phys->advection_ctx), phys->advection_ctx);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->advection_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->advection_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow,
-                            ceed_data->advection_context);
+                              sizeof(*advection_ctx), advection_ctx);
+  CeedQFunctionContextSetDataDestroy(advection_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  problem->apply_vol_rhs.qfunction_context = advection_context;
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_inflow.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_ADVECTION2D(Physics phys, SetupContext setup_ctx,
+PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem,
                                  AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
-  PetscFunctionBeginUser;
+  SetupContext setup_ctx;
+  AdvectionContext advection_ctx;
 
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->ics.qfunction_context,
+                              CEED_MEM_HOST, &setup_ctx);
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &advection_ctx);
   ierr = PetscPrintf(comm,
                      "  Problem:\n"
                      "    Problem Name                       : %s\n"
                      "    Stabilization                      : %s\n"
                      "    Wind Type                          : %s\n",
-                     app_ctx->problem_name, StabilizationTypes[phys->stab],
-                     WindTypes[phys->wind_type]); CHKERRQ(ierr);
+                     app_ctx->problem_name, StabilizationTypes[advection_ctx->stabilization],
+                     WindTypes[setup_ctx->wind_type]); CHKERRQ(ierr);
 
-  if (phys->wind_type == WIND_TRANSLATION) {
+  if (setup_ctx->wind_type == WIND_TRANSLATION) {
     ierr = PetscPrintf(comm,
                        "    Background Wind                    : %f,%f\n",
                        setup_ctx->wind[0], setup_ctx->wind[1]); CHKERRQ(ierr);
   }
+  CeedQFunctionContextRestoreData(problem->ics.qfunction_context,
+                                  &setup_ctx);
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &advection_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
new file mode 100644
index 0000000000..730e9e533b
--- /dev/null
+++ b/examples/fluids/problems/blasius.c
@@ -0,0 +1,258 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Utility functions for setting up Blasius Boundary Layer
+
+#include "../navierstokes.h"
+#include "../qfunctions/blasius.h"
+#include "stg_shur14.h"
+
+static PetscErrorCode GetYNodeLocs(const MPI_Comm comm,
+                                   const char path[PETSC_MAX_PATH_LEN], PetscReal **pynodes,
+                                   PetscInt *nynodes) {
+  PetscErrorCode ierr;
+  PetscInt ndims, dims[2];
+  FILE *fp;
+  const PetscInt char_array_len = 512;
+  char line[char_array_len];
+  char **array;
+  PetscReal *node_locs;
+  PetscFunctionBeginUser;
+
+  ierr = PetscFOpen(comm, path, "r", &fp); CHKERRQ(ierr);
+  ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+  ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+
+  for (PetscInt i=0; i<ndims; i++)  dims[i] = atoi(array[i]);
+  if (ndims<2) dims[1] = 1; // Assume 1 column of data is not otherwise specified
+  *nynodes = dims[0];
+  ierr = PetscMalloc1(*nynodes, &node_locs); CHKERRQ(ierr);
+
+  for (PetscInt i=0; i<dims[0]; i++) {
+    ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+    ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+    if (ndims < dims[1]) SETERRQ(comm, -1,
+                                   "Line %d of %s does not contain enough columns (%d instead of %d)", i,
+                                   path, ndims, dims[1]);
+
+    node_locs[i] = (PetscReal) atof(array[0]);
+  }
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  *pynodes = node_locs;
+  PetscFunctionReturn(0);
+}
+
+/* \brief Modify the domain and mesh for blasius
+ *
+ * Modifies mesh such that `N` elements are within `refine_height` with a
+ * geometric growth ratio of `growth`. Excess elements are then distributed
+ * linearly in logspace to the top surface.
+ *
+ * The top surface is also angled downwards, so that it may be used as an
+ * outflow. It's angle is controlled by `top_angle` (in units of degrees).
+ *
+ * If `node_locs` is not NULL, then the nodes will be placed at `node_locs`
+ * locations.
+ */
+static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim,
+                                 PetscReal growth, PetscInt N,
+                                 PetscReal refine_height, PetscReal top_angle,
+                                 PetscReal node_locs[], PetscInt num_node_locs) {
+
+  PetscInt ierr, narr, ncoords;
+  PetscReal domain_min[3], domain_max[3], domain_size[3];
+  PetscScalar *arr_coords;
+  Vec vec_coords;
+  PetscFunctionBeginUser;
+
+  PetscReal angle_coeff = tan(top_angle*(M_PI/180));
+
+  // Get domain boundary information
+  ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  // Get coords array from DM
+  ierr = DMGetCoordinatesLocal(dm, &vec_coords); CHKERRQ(ierr);
+  ierr = VecGetLocalSize(vec_coords, &narr); CHKERRQ(ierr);
+  ierr = VecGetArray(vec_coords, &arr_coords); CHKERRQ(ierr);
+
+  PetscScalar (*coords)[dim] = (PetscScalar(*)[dim]) arr_coords;
+  ncoords = narr/dim;
+
+  // Get mesh information
+  PetscInt nmax = 3, faces[3];
+  ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax,
+                                 NULL); CHKERRQ(ierr);
+  // Get element size of the box mesh, for indexing each node
+  const PetscReal dybox = domain_size[1]/faces[1];
+
+  if (!node_locs) {
+    // Calculate the first element height
+    PetscReal dy1   = refine_height*(growth-1)/(pow(growth, N)-1);
+
+    // Calculate log of sizing outside BL
+    PetscReal logdy = (log(domain_max[1]) - log(refine_height)) / (faces[1] - N);
+
+    for (PetscInt i=0; i<ncoords; i++) {
+      PetscInt y_box_index = round(coords[i][1]/dybox);
+      if (y_box_index <= N) {
+        coords[i][1] = (1 - ((coords[i][0] - domain_min[0])/domain_size[0])*angle_coeff)
+                       * dy1 * (pow(growth, coords[i][1]/dybox)-1)/(growth-1);
+      } else {
+        PetscInt j = y_box_index - N;
+        coords[i][1] = (1 - ((coords[i][0] - domain_min[0])/domain_size[0])*angle_coeff)
+                       * exp(log(refine_height) + logdy*j);
+      }
+    }
+  } else {
+    // Error checking
+    if (num_node_locs < faces[1] +1)
+      SETERRQ(comm, -1, "The y_node_locs_path has too few locations; "
+              "There are %d + 1 nodes, but only %d locations given",
+              faces[1]+1, num_node_locs);
+    if (num_node_locs > faces[1] +1) {
+      ierr = PetscPrintf(comm, "WARNING: y_node_locs_path has more locations (%d) "
+                         "than the mesh has nodes (%d). This maybe unintended.",
+                         num_node_locs, faces[1]+1); CHKERRQ(ierr);
+    }
+
+    for (PetscInt i=0; i<ncoords; i++) {
+      // Determine which y-node we're at
+      PetscInt y_box_index = round(coords[i][1]/dybox);
+      coords[i][1] = (1 - ((coords[i][0] - domain_min[0])/domain_size[0])*angle_coeff)
+                     * node_locs[y_box_index];
+    }
+  }
+
+  ierr = VecRestoreArray(vec_coords, &arr_coords); CHKERRQ(ierr);
+  ierr = DMSetCoordinatesLocal(dm, vec_coords); CHKERRQ(ierr);
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
+
+  PetscInt ierr;
+  User           user    = *(User *)ctx;
+  MPI_Comm       comm    = PETSC_COMM_WORLD;
+  PetscBool      use_stg = PETSC_FALSE;
+  BlasiusContext blasius_ctx;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  CeedQFunctionContext blasius_context;
+
+  PetscFunctionBeginUser;
+  ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &blasius_ctx); CHKERRQ(ierr);
+
+  // ------------------------------------------------------
+  //               SET UP Blasius
+  // ------------------------------------------------------
+  CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
+  problem->ics.qfunction               = ICsBlasius;
+  problem->ics.qfunction_loc           = ICsBlasius_loc;
+  problem->apply_outflow.qfunction     = Blasius_Outflow;
+  problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc;
+  problem->apply_inflow.qfunction      = Blasius_Inflow;
+  problem->apply_inflow.qfunction_loc  = Blasius_Inflow_loc;
+
+  CeedScalar Uinf   = 40;          // m/s
+  CeedScalar delta0 = 4.2e-4;      // m
+  CeedScalar theta0 = 288.;        // K
+  CeedScalar P0     = 1.01e5;      // Pa
+  PetscBool  weakT  = PETSC_FALSE; // weak density or temperature
+  PetscReal  mesh_refine_height = 5.9e-4; // m
+  PetscReal  mesh_growth        = 1.08;   // [-]
+  PetscInt   mesh_Ndelta        = 45;     // [-]
+  PetscReal  mesh_top_angle     = 5;      // degrees
+  char mesh_ynodes_path[PETSC_MAX_PATH_LEN] = "";
+
+  PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
+  ierr = PetscOptionsBool("-weakT", "Change from rho weak to T weak at inflow",
+                          NULL, weakT, &weakT, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Uinf", "Velocity at boundary layer edge",
+                            NULL, Uinf, &Uinf, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-delta0", "Boundary layer height at inflow",
+                            NULL, delta0, &delta0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-theta0", "Wall temperature",
+                            NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-P0", "Pressure at outflow",
+                            NULL, P0, &P0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBoundedInt("-platemesh_Ndelta",
+                                "Velocity at boundary layer edge",
+                                NULL, mesh_Ndelta, &mesh_Ndelta, NULL, 1); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-platemesh_refine_height",
+                            "Height of boundary layer mesh refinement",
+                            NULL, mesh_refine_height, &mesh_refine_height, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-platemesh_growth",
+                            "Geometric growth rate of boundary layer mesh",
+                            NULL, mesh_growth, &mesh_growth, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-platemesh_top_angle",
+                            "Geometric top_angle rate of boundary layer mesh",
+                            NULL, mesh_top_angle, &mesh_top_angle, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsString("-platemesh_y_node_locs_path",
+                            "Path to file with y node locations. "
+                            "If empty, will use the algorithmic mesh warping.", NULL,
+                            mesh_ynodes_path, mesh_ynodes_path,
+                            sizeof(mesh_ynodes_path), NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-stg_use", "Use STG inflow boundary condition",
+                          NULL, use_stg, &use_stg, NULL); CHKERRQ(ierr);
+  PetscOptionsEnd();
+
+  PetscScalar meter  = user->units->meter;
+  PetscScalar second = user->units->second;
+  PetscScalar Kelvin = user->units->Kelvin;
+  PetscScalar Pascal = user->units->Pascal;
+
+  theta0 *= Kelvin;
+  P0     *= Pascal;
+  Uinf   *= meter / second;
+  delta0 *= meter;
+
+  PetscReal *mesh_ynodes = NULL;
+  PetscInt  mesh_nynodes = 0;
+  if (strcmp(mesh_ynodes_path, "")) {
+    ierr = GetYNodeLocs(comm, mesh_ynodes_path, &mesh_ynodes, &mesh_nynodes);
+    CHKERRQ(ierr);
+  }
+  ierr = ModifyMesh(comm, dm, problem->dim, mesh_growth, mesh_Ndelta,
+                    mesh_refine_height, mesh_top_angle, mesh_ynodes,
+                    mesh_nynodes); CHKERRQ(ierr);
+  ierr = PetscFree(mesh_ynodes); CHKERRQ(ierr);
+
+  // Some properties depend on parameters from NewtonianIdealGas
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ig_ctx);
+
+  blasius_ctx->weakT     = weakT;
+  blasius_ctx->Uinf      = Uinf;
+  blasius_ctx->delta0    = delta0;
+  blasius_ctx->theta0    = theta0;
+  blasius_ctx->P0        = P0;
+  blasius_ctx->implicit  = user->phys->implicit;
+  blasius_ctx->newtonian_ctx = *newtonian_ig_ctx;
+
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ig_ctx);
+
+  CeedQFunctionContextCreate(user->ceed, &blasius_context);
+  CeedQFunctionContextSetData(blasius_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*blasius_ctx), blasius_ctx);
+  CeedQFunctionContextSetDataDestroy(blasius_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+
+  problem->ics.qfunction_context = blasius_context;
+  CeedQFunctionContextReferenceCopy(blasius_context,
+                                    &problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextReferenceCopy(blasius_context,
+                                    &problem->apply_outflow.qfunction_context);
+  if (use_stg) {
+    ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0); CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c
new file mode 100644
index 0000000000..388ea8a4ec
--- /dev/null
+++ b/examples/fluids/problems/channel.c
@@ -0,0 +1,110 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Utility functions for setting up Channel flow
+
+#include "../navierstokes.h"
+#include "../qfunctions/channel.h"
+
+PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm,
+                          void *ctx) {
+
+  PetscInt ierr;
+  User              user = *(User *)ctx;
+  MPI_Comm          comm = PETSC_COMM_WORLD;
+  ChannelContext    channel_ctx;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  CeedQFunctionContext channel_context;
+
+  PetscFunctionBeginUser;
+  ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &channel_ctx); CHKERRQ(ierr);
+
+  // ------------------------------------------------------
+  //               SET UP Channel
+  // ------------------------------------------------------
+  CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
+  problem->ics.qfunction               = ICsChannel;
+  problem->ics.qfunction_loc           = ICsChannel_loc;
+  problem->apply_inflow.qfunction      = Channel_Inflow;
+  problem->apply_inflow.qfunction_loc  = Channel_Inflow_loc;
+  problem->apply_outflow.qfunction     = Channel_Outflow;
+  problem->apply_outflow.qfunction_loc = Channel_Outflow_loc;
+
+  // -- Command Line Options
+  CeedScalar umax   = 10.;  // m/s
+  CeedScalar theta0 = 300.; // K
+  CeedScalar P0     = 1.e5; // Pa
+  PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
+  ierr = PetscOptionsScalar("-umax", "Centerline velocity of the Channel",
+                            NULL, umax, &umax, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-theta0", "Wall temperature",
+                            NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-P0", "Pressure at outflow",
+                            NULL, P0, &P0, NULL); CHKERRQ(ierr);
+  PetscOptionsEnd();
+
+  PetscScalar meter  = user->units->meter;
+  PetscScalar second = user->units->second;
+  PetscScalar Kelvin = user->units->Kelvin;
+  PetscScalar Pascal = user->units->Pascal;
+
+  theta0 *= Kelvin;
+  P0     *= Pascal;
+  umax   *= meter / second;
+
+  //-- Setup Problem information
+  CeedScalar H, center;
+  {
+    PetscReal domain_min[3], domain_max[3], domain_size[3];
+    ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+    for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+    H      = 0.5*domain_size[1]*meter;
+    center = H + domain_min[1]*meter;
+  }
+
+  // Some properties depend on parameters from NewtonianIdealGas
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ig_ctx);
+
+  channel_ctx->center   = center;
+  channel_ctx->H        = H;
+  channel_ctx->theta0   = theta0;
+  channel_ctx->P0       = P0;
+  channel_ctx->umax     = umax;
+  channel_ctx->implicit = user->phys->implicit;
+  channel_ctx->B = -2*umax*newtonian_ig_ctx->mu/H;
+
+  {
+    // Calculate Body force
+    CeedScalar cv  = newtonian_ig_ctx->cv,
+               cp  = newtonian_ig_ctx->cp;
+    CeedScalar Rd  = cp - cv;
+    CeedScalar rho = P0 / (Rd*theta0);
+    CeedScalar g[] = {channel_ctx->B / rho, 0., 0.};
+    ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
+  }
+  channel_ctx->newtonian_ctx = *newtonian_ig_ctx;
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ig_ctx);
+
+  CeedQFunctionContextCreate(user->ceed, &channel_context);
+  CeedQFunctionContextSetData(channel_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*channel_ctx), channel_ctx);
+  CeedQFunctionContextSetDataDestroy(channel_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+
+  problem->ics.qfunction_context = channel_context;
+  CeedQFunctionContextReferenceCopy(channel_context,
+                                    &problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextReferenceCopy(channel_context,
+                                    &problem->apply_outflow.qfunction_context);
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 1ad9a3f22b..92bc14d5c9 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -1,5 +1,6 @@
-// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
-// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other
+// CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
+// files for details.
 //
 // SPDX-License-Identifier: BSD-2-Clause
 //
@@ -8,96 +9,104 @@
 /// @file
 /// Utility functions for setting up DENSITY_CURRENT
 
-#include "../navierstokes.h"
 #include "../qfunctions/densitycurrent.h"
+#include "../navierstokes.h"
 
-PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
-                                  void *ctx) {
+PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) {
 
   PetscInt ierr;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
-  SetupContext setup_context = *(SetupContext *)setup_ctx;
-  User         user          = *(User *)ctx;
-  MPI_Comm     comm          = PETSC_COMM_WORLD;
-  PetscFunctionBeginUser;
+  SetupContext setup_context;
+  User user = *(User *)ctx;
+  MPI_Comm comm = PETSC_COMM_WORLD;
 
+  PetscFunctionBeginUser;
+  ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr);
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
   // ------------------------------------------------------
-  problem->ics     = ICsDC;
-  problem->ics_loc = ICsDC_loc;
-  problem->bc      = Exact_DC;
+  problem->ics.qfunction = ICsDC;
+  problem->ics.qfunction_loc = ICsDC_loc;
+  problem->bc = Exact_DC;
+  setup_context = problem->bc_ctx;
 
   // ------------------------------------------------------
   //             Create the libCEED context
   // ------------------------------------------------------
-  CeedScalar rc     = 1000.;   // m (Radius of bubble)
+  CeedScalar theta0 = 300.; // K
+  CeedScalar thetaC = -15.; // K
+  CeedScalar P0 = 1.e5;     // Pa
+  CeedScalar N = 0.01;      // 1/s
+  CeedScalar rc = 1000.;    // m (Radius of bubble)
   PetscReal center[3], dc_axis[3] = {0, 0, 0};
   PetscReal domain_min[3], domain_max[3], domain_size[3];
-  ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  ierr = DMGetBoundingBox(dm, domain_min, domain_max);
+  CHKERRQ(ierr);
+  for (PetscInt i = 0; i < 3; i++)
+    domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for DENSITY_CURRENT problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for DENSITY_CURRENT problem", NULL);
+  ierr = PetscOptionsScalar("-theta0", "Reference potential temperature", NULL,
+                            theta0, &theta0, NULL);
+  CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-thetaC", "Perturbation of potential temperature",
+                            NULL, thetaC, &thetaC, NULL);
+  CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-P0", "Atmospheric pressure", NULL, P0, &P0, NULL);
+  CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-N", "Brunt-Vaisala frequency", NULL, N, &N, NULL);
+  CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
-                            NULL, rc, &rc, NULL); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) center[i] = .5*domain_size[i];
+                            NULL, rc, &rc, NULL);
+  CHKERRQ(ierr);
+  for (PetscInt i = 0; i < 3; i++)
+    center[i] = .5 * domain_size[i];
   PetscInt n = problem->dim;
-  ierr = PetscOptionsRealArray("-center", "Location of bubble center",
-                               NULL, center, &n, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsRealArray("-center", "Location of bubble center", NULL,
+                               center, &n, NULL);
+  CHKERRQ(ierr);
   n = problem->dim;
   ierr = PetscOptionsRealArray("-dc_axis",
-                               "Axis of density current cylindrical anomaly, or {0,0,0} for spherically symmetric",
-                               NULL, dc_axis, &n, NULL); CHKERRQ(ierr);
+                               "Axis of density current cylindrical anomaly, "
+                               "or {0,0,0} for spherically symmetric",
+                               NULL, dc_axis, &n, NULL);
+  CHKERRQ(ierr);
   {
     PetscReal norm = PetscSqrtReal(PetscSqr(dc_axis[0]) + PetscSqr(dc_axis[1]) +
                                    PetscSqr(dc_axis[2]));
     if (norm > 0) {
-      for (int i=0; i<3; i++)  dc_axis[i] /= norm;
+      for (PetscInt i = 0; i < 3; i++)
+        dc_axis[i] /= norm;
     }
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
-  PetscScalar meter = user->units->meter;
+  PetscScalar meter           = user->units->meter;
+  PetscScalar second          = user->units->second;
+  PetscScalar Kelvin          = user->units->Kelvin;
+  PetscScalar Pascal          = user->units->Pascal;
   rc = fabs(rc) * meter;
-  for (int i=0; i<3; i++) center[i] *= meter;
+  theta0 *= Kelvin;
+  thetaC *= Kelvin;
+  P0 *= Pascal;
+  N *= (1. / second);
+  for (PetscInt i = 0; i < 3; i++)
+    center[i] *= meter;
 
-  setup_context->rc         = rc;
-  setup_context->center[0]  = center[0];
-  setup_context->center[1]  = center[1];
-  setup_context->center[2]  = center[2];
+  setup_context->theta0 = theta0;
+  setup_context->thetaC = thetaC;
+  setup_context->P0 = P0;
+  setup_context->N = N;
+  setup_context->rc = rc;
+  setup_context->center[0] = center[0];
+  setup_context->center[1] = center[1];
+  setup_context->center[2] = center[2];
   setup_context->dc_axis[0] = dc_axis[0];
   setup_context->dc_axis[1] = dc_axis[1];
   setup_context->dc_axis[2] = dc_axis[2];
 
   PetscFunctionReturn(0);
 }
-
-PetscErrorCode SetupContext_DENSITY_CURRENT(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  PetscInt ierr = SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx,
-                  phys);
-  CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode PRINT_DENSITY_CURRENT(Physics phys, SetupContext setup_ctx,
-                                     AppCtx app_ctx) {
-  MPI_Comm       comm = PETSC_COMM_WORLD;
-  PetscErrorCode ierr;
-  PetscFunctionBeginUser;
-
-  ierr = PetscPrintf(comm,
-                     "  Problem:\n"
-                     "    Problem Name                       : %s\n"
-                     "    Stabilization                      : %s\n",
-                     app_ctx->problem_name, StabilizationTypes[phys->stab]);
-  CHKERRQ(ierr);
-
-  PetscFunctionReturn(0);
-}
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index 65007d6a2a..b851afb6ee 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -12,10 +12,8 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/eulervortex.h"
 
-PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
-                               void *ctx) {
+PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) {
   EulerTestType     euler_test;
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
   User              user = *(User *)ctx;
   StabilizationType stab;
   MPI_Comm          comm = PETSC_COMM_WORLD;
@@ -23,34 +21,36 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   PetscBool         has_curr_time = PETSC_TRUE;
   PetscBool         has_neumann = PETSC_TRUE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  EulerContext      euler_ctx;
+  CeedQFunctionContext euler_context;
 
-  ierr = PetscCalloc1(1, &user->phys->euler_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &euler_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->ics                     = ICsEuler;
-  problem->ics_loc                 = ICsEuler_loc;
-  problem->apply_vol_rhs           = Euler;
-  problem->apply_vol_rhs_loc       = Euler_loc;
-  problem->apply_vol_ifunction     = IFunction_Euler;
-  problem->apply_vol_ifunction_loc = IFunction_Euler_loc;
-  problem->apply_inflow            = TravelingVortex_Inflow;
-  problem->apply_inflow_loc        = TravelingVortex_Inflow_loc;
-  problem->apply_outflow           = Euler_Outflow;
-  problem->apply_outflow_loc       = Euler_Outflow_loc;
-  problem->bc                      = Exact_Euler;
-  problem->setup_ctx               = SetupContext_EULER_VORTEX;
-  problem->non_zero_time           = PETSC_TRUE;
-  problem->print_info              = PRINT_EULER_VORTEX;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 10;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->ics.qfunction                     = ICsEuler;
+  problem->ics.qfunction_loc                 = ICsEuler_loc;
+  problem->apply_vol_rhs.qfunction           = Euler;
+  problem->apply_vol_rhs.qfunction_loc       = Euler_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Euler;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Euler_loc;
+  problem->apply_inflow.qfunction            = TravelingVortex_Inflow;
+  problem->apply_inflow.qfunction_loc        = TravelingVortex_Inflow_loc;
+  problem->apply_outflow.qfunction           = Euler_Outflow;
+  problem->apply_outflow.qfunction_loc       = Euler_Outflow_loc;
+  problem->bc                                = Exact_Euler;
+  problem->bc_ctx                            = euler_ctx;
+  problem->non_zero_time                     = PETSC_TRUE;
+  problem->print_info                        = PRINT_EULER_VORTEX;
 
   // ------------------------------------------------------
   //             Create the libCEED context
@@ -62,7 +62,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
             mean_velocity[3] = {1., 1., 0}; // m/s
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //             Create the PETSc context
@@ -73,8 +73,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for EULER_VORTEX problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for EULER_VORTEX problem", NULL);
   // -- Physics
   ierr = PetscOptionsScalar("-vortex_strength", "Strength of Vortex",
                             NULL, vortex_strength, &vortex_strength, NULL);
@@ -84,7 +83,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   ierr = PetscOptionsRealArray("-mean_velocity", "Background velocity vector",
                                NULL, mean_velocity, &n, &user_velocity);
   CHKERRQ(ierr);
-  for (int i=0; i<3; i++) center[i] = .5*domain_size[i];
+  for (PetscInt i=0; i<3; i++) center[i] = .5*domain_size[i];
   n = problem->dim;
   ierr = PetscOptionsRealArray("-center", "Location of vortex center",
                                NULL, center, &n, NULL); CHKERRQ(ierr);
@@ -120,7 +119,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
     CHKERRQ(ierr);
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
@@ -132,85 +131,73 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   //           Set up the libCEED context
   // ------------------------------------------------------
   // -- Scale variables to desired units
-  for (int i=0; i<3; i++) {
+  for (PetscInt i=0; i<3; i++) {
     center[i] *= meter;
     domain_size[i] *= meter;
     mean_velocity[i] *= (meter/second);
   }
   problem->dm_scale = meter;
 
-  // -- Setup Context
-  setup_context->lx        = domain_size[0];
-  setup_context->ly        = domain_size[1];
-  setup_context->lz        = domain_size[2];
-  setup_context->center[0] = center[0];
-  setup_context->center[1] = center[1];
-  setup_context->center[2] = center[2];
-  setup_context->time      = 0;
-
   // -- QFunction Context
   user->phys->stab                        = stab;
   user->phys->euler_test                  = euler_test;
   user->phys->implicit                    = implicit;
   user->phys->has_curr_time               = has_curr_time;
   user->phys->has_neumann                 = has_neumann;
-  user->phys->euler_ctx->curr_time        = 0.;
-  user->phys->euler_ctx->implicit         = implicit;
-  user->phys->euler_ctx->euler_test       = euler_test;
-  user->phys->euler_ctx->center[0]        = center[0];
-  user->phys->euler_ctx->center[1]        = center[1];
-  user->phys->euler_ctx->center[2]        = center[2];
-  user->phys->euler_ctx->vortex_strength  = vortex_strength;
-  user->phys->euler_ctx->c_tau            = c_tau;
-  user->phys->euler_ctx->mean_velocity[0] = mean_velocity[0];
-  user->phys->euler_ctx->mean_velocity[1] = mean_velocity[1];
-  user->phys->euler_ctx->mean_velocity[2] = mean_velocity[2];
-  user->phys->euler_ctx->stabilization    = stab;
-
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_EULER_VORTEX(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER,
-                              sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionContextCreate(ceed, &ceed_data->euler_context);
-  CeedQFunctionContextSetData(ceed_data->euler_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER,
-                              sizeof(*phys->euler_ctx), phys->euler_ctx);
-  if (ceed_data->qf_ics)
-    CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->euler_context);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->euler_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol, ceed_data->euler_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow, ceed_data->euler_context);
-  if (ceed_data->qf_apply_outflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_outflow, ceed_data->euler_context);
+  euler_ctx->curr_time        = 0.;
+  euler_ctx->implicit         = implicit;
+  euler_ctx->euler_test       = euler_test;
+  euler_ctx->center[0]        = center[0];
+  euler_ctx->center[1]        = center[1];
+  euler_ctx->center[2]        = center[2];
+  euler_ctx->vortex_strength  = vortex_strength;
+  euler_ctx->c_tau            = c_tau;
+  euler_ctx->mean_velocity[0] = mean_velocity[0];
+  euler_ctx->mean_velocity[1] = mean_velocity[1];
+  euler_ctx->mean_velocity[2] = mean_velocity[2];
+  euler_ctx->stabilization    = stab;
+
+  CeedQFunctionContextCreate(user->ceed, &euler_context);
+  CeedQFunctionContextSetData(euler_context, CEED_MEM_HOST, CEED_USE_POINTER,
+                              sizeof(*euler_ctx), euler_ctx);
+  CeedQFunctionContextSetDataDestroy(euler_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(euler_context, "solution time",
+                                     offsetof(struct EulerContext_, curr_time), 1, "Phyiscal time of the solution");
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->ics.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_vol_rhs.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_outflow.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_EULER_VORTEX(Physics phys, SetupContext setup_ctx,
+PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem,
                                   AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
-  PetscFunctionBeginUser;
+  EulerContext   euler_ctx;
 
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              &euler_ctx);
   ierr = PetscPrintf(comm,
                      "  Problem:\n"
                      "    Problem Name                       : %s\n"
                      "    Test Case                          : %s\n"
                      "    Background Velocity                : %f,%f,%f\n"
                      "    Stabilization                      : %s\n",
-                     app_ctx->problem_name, EulerTestTypes[phys->euler_test],
-                     phys->euler_ctx->mean_velocity[0],
-                     phys->euler_ctx->mean_velocity[1],
-                     phys->euler_ctx->mean_velocity[2],
-                     StabilizationTypes[phys->stab]); CHKERRQ(ierr);
+                     app_ctx->problem_name, EulerTestTypes[euler_ctx->euler_test],
+                     euler_ctx->mean_velocity[0],
+                     euler_ctx->mean_velocity[1],
+                     euler_ctx->mean_velocity[2],
+                     StabilizationTypes[euler_ctx->stabilization]); CHKERRQ(ierr);
 
+  CeedQFunctionContextRestoreData(problem->ics.qfunction_context, &euler_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index 5c5b2880d5..66028b3807 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -12,89 +12,81 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/newtonian.h"
 
-PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
-                               void *ctx) {
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
+PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) {
+  SetupContext      setup_context;
   User              user = *(User *)ctx;
   StabilizationType stab;
   MPI_Comm          comm = PETSC_COMM_WORLD;
   PetscBool         implicit;
   PetscBool         has_curr_time = PETSC_FALSE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  CeedQFunctionContext newtonian_ig_context;
 
-  ierr = PetscCalloc1(1, &user->phys->newtonian_ig_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &newtonian_ig_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //           Setup Generic Newtonian IG Problem
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->ics                     = ICsNewtonianIG;
-  problem->ics_loc                 = ICsNewtonianIG_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->apply_vol_rhs           = Newtonian;
-  problem->apply_vol_rhs_loc       = Newtonian_loc;
-  problem->apply_vol_ifunction     = IFunction_Newtonian;
-  problem->apply_vol_ifunction_loc = IFunction_Newtonian_loc;
-  problem->setup_ctx               = SetupContext_DENSITY_CURRENT;
-  problem->non_zero_time           = PETSC_FALSE;
-  problem->print_info              = PRINT_DENSITY_CURRENT;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 10;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->ics.qfunction                     = ICsNewtonianIG;
+  problem->ics.qfunction_loc                 = ICsNewtonianIG_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->apply_vol_rhs.qfunction           = Newtonian;
+  problem->apply_vol_rhs.qfunction_loc       = Newtonian_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Newtonian;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc;
+  problem->bc                                = NULL;
+  problem->bc_ctx                            = setup_context;
+  problem->non_zero_time                     = PETSC_FALSE;
+  problem->print_info                        = PRINT_DENSITY_CURRENT;
 
   // ------------------------------------------------------
   //             Create the libCEED context
   // ------------------------------------------------------
-  CeedScalar theta0 = 300.;    // K
-  CeedScalar thetaC = -15.;    // K
-  CeedScalar P0     = 1.e5;    // Pa
-  CeedScalar N      = 0.01;    // 1/s
-  CeedScalar cv     = 717.;    // J/(kg K)
-  CeedScalar cp     = 1004.;   // J/(kg K)
-  CeedScalar g      = 9.81;    // m/s^2
-  CeedScalar lambda = -2./3.;  // -
-  CeedScalar mu     = 75.;     // Pa s, dynamic viscosity
-  // mu = 75 is not physical for air, but is good for numerical stability
-  CeedScalar k      = 0.02638; // W/(m K)
-  CeedScalar c_tau  = 0.5;     // -
-  // c_tau = 0.5 is reported as "optimal" in Hughes et al 2010
+  CeedScalar cv     = 717.;          // J/(kg K)
+  CeedScalar cp     = 1004.;         // J/(kg K)
+  CeedScalar g[3]   = {0, 0, -9.81}; // m/s^2
+  CeedScalar lambda = -2./3.;        // -
+  CeedScalar mu     = 1.8e-5;        // Pa s, dynamic viscosity
+  CeedScalar k      = 0.02638;       // W/(m K)
+  CeedScalar c_tau  = 0.5;           // -
+  CeedScalar Ctau_t  = 1.0;          // -
+  CeedScalar Ctau_v  = 36.0;         // TODO make function of degree
+  CeedScalar Ctau_C  = 1.0;          // TODO make function of degree
+  CeedScalar Ctau_M  = 1.0;          // TODO make function of degree
+  CeedScalar Ctau_E  = 1.0;          // TODO make function of degree
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //             Create the PETSc context
   // ------------------------------------------------------
-  PetscScalar meter    = 1e-2;  // 1 meter in scaled length units
-  PetscScalar kilogram = 1e-6;  // 1 kilogram in scaled mass units
-  PetscScalar second   = 1e-2;  // 1 second in scaled time units
+  PetscScalar meter    = 1;  // 1 meter in scaled length units
+  PetscScalar kilogram = 1;  // 1 kilogram in scaled mass units
+  PetscScalar second   = 1;  // 1 second in scaled time units
   PetscScalar Kelvin   = 1;     // 1 Kelvin in scaled temperature units
   PetscScalar W_per_m_K, Pascal, J_per_kg_K, m_per_squared_s;
 
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Options for Newtonian Ideal Gas based problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for Newtonian Ideal Gas based problem",
+                    NULL);
+
   // -- Physics
-  ierr = PetscOptionsScalar("-theta0", "Reference potential temperature",
-                            NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-thetaC", "Perturbation of potential temperature",
-                            NULL, thetaC, &thetaC, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-P0", "Atmospheric pressure",
-                            NULL, P0, &P0, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-N", "Brunt-Vaisala frequency",
-                            NULL, N, &N, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-cv", "Heat capacity at constant volume",
                             NULL, cv, &cv, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-cp", "Heat capacity at constant pressure",
                             NULL, cp, &cp, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-g", "Gravitational acceleration",
-                            NULL, g, &g, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-lambda",
                             "Stokes hypothesis second viscosity coefficient",
                             NULL, lambda, &lambda, NULL); CHKERRQ(ierr);
@@ -103,11 +95,24 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   ierr = PetscOptionsScalar("-k", "Thermal conductivity",
                             NULL, k, &k, NULL); CHKERRQ(ierr);
 
+  PetscInt dim = problem->dim;
+  ierr = PetscOptionsRealArray("-g", "Gravitational acceleration",
+                               NULL, g, &dim, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsEnum("-stab", "Stabilization method", NULL,
                           StabilizationTypes, (PetscEnum)(stab = STAB_NONE),
                           (PetscEnum *)&stab, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-c_tau", "Stabilization constant",
                             NULL, c_tau, &c_tau, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_t", "Stabilization time constant",
+                            NULL, Ctau_t, &Ctau_t, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_v", "Stabilization viscous constant",
+                            NULL, Ctau_v, &Ctau_v, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_C", "Stabilization continuity constant",
+                            NULL, Ctau_C, &Ctau_C, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_M", "Stabilization momentum constant",
+                            NULL, Ctau_M, &Ctau_M, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_E", "Stabilization energy constant",
+                            NULL, Ctau_E, &Ctau_E, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation",
                           NULL, implicit=PETSC_FALSE, &implicit, NULL);
   CHKERRQ(ierr);
@@ -133,7 +138,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
                        "Warning! Use -stab supg only with -implicit\n");
     CHKERRQ(ierr);
   }
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
@@ -157,30 +162,22 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   //           Set up the libCEED context
   // ------------------------------------------------------
   // -- Scale variables to desired units
-  theta0 *= Kelvin;
-  thetaC *= Kelvin;
-  P0     *= Pascal;
-  N      *= (1./second);
   cv     *= J_per_kg_K;
   cp     *= J_per_kg_K;
-  g      *= m_per_squared_s;
   mu     *= Pascal * second;
   k      *= W_per_m_K;
-  for (int i=0; i<3; i++) domain_size[i] *= meter;
+  for (PetscInt i=0; i<3; i++) domain_size[i] *= meter;
+  for (PetscInt i=0; i<3; i++) g[i]           *= m_per_squared_s;
   problem->dm_scale = meter;
 
   // -- Setup Context
-  setup_context->theta0     = theta0;
-  setup_context->thetaC     = thetaC;
-  setup_context->P0         = P0;
-  setup_context->N          = N;
   setup_context->cv         = cv;
   setup_context->cp         = cp;
-  setup_context->g          = g;
   setup_context->lx         = domain_size[0];
   setup_context->ly         = domain_size[1];
   setup_context->lz         = domain_size[2];
   setup_context->time       = 0;
+  ierr = PetscArraycpy(setup_context->g, g, 3); CHKERRQ(ierr);
 
   // -- Solver Settings
   user->phys->stab          = stab;
@@ -188,33 +185,57 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   user->phys->has_curr_time = has_curr_time;
 
   // -- QFunction Context
-  user->phys->newtonian_ig_ctx->lambda        = lambda;
-  user->phys->newtonian_ig_ctx->mu            = mu;
-  user->phys->newtonian_ig_ctx->k             = k;
-  user->phys->newtonian_ig_ctx->cv            = cv;
-  user->phys->newtonian_ig_ctx->cp            = cp;
-  user->phys->newtonian_ig_ctx->g             = g;
-  user->phys->newtonian_ig_ctx->c_tau         = c_tau;
-  user->phys->newtonian_ig_ctx->stabilization = stab;
-
+  newtonian_ig_ctx->lambda        = lambda;
+  newtonian_ig_ctx->mu            = mu;
+  newtonian_ig_ctx->k             = k;
+  newtonian_ig_ctx->cv            = cv;
+  newtonian_ig_ctx->cp            = cp;
+  newtonian_ig_ctx->c_tau         = c_tau;
+  newtonian_ig_ctx->Ctau_t        = Ctau_t;
+  newtonian_ig_ctx->Ctau_v        = Ctau_v;
+  newtonian_ig_ctx->Ctau_C        = Ctau_C;
+  newtonian_ig_ctx->Ctau_M        = Ctau_M;
+  newtonian_ig_ctx->Ctau_E        = Ctau_E;
+  newtonian_ig_ctx->stabilization = stab;
+  ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+  CeedQFunctionContextRegisterDouble(problem->ics.qfunction_context,
+                                     "evaluation time",
+                                     (char *)&setup_context->time - (char *)setup_context, 1, "Time of evaluation");
+
+  CeedQFunctionContextCreate(user->ceed, &newtonian_ig_context);
+  CeedQFunctionContextSetData(newtonian_ig_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*newtonian_ig_ctx), newtonian_ig_ctx);
+  CeedQFunctionContextSetDataDestroy(newtonian_ig_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(newtonian_ig_context, "timestep size",
+                                     offsetof(struct NewtonianIdealGasContext_, dt), 1, "Size of timestep, delta t");
+  problem->apply_vol_rhs.qfunction_context = newtonian_ig_context;
+  CeedQFunctionContextReferenceCopy(newtonian_ig_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode SetupContext_NEWTONIAN_IG(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
+PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
+                                     AppCtx app_ctx) {
+  MPI_Comm comm = PETSC_COMM_WORLD;
+  PetscErrorCode ierr;
+  NewtonianIdealGasContext newtonian_ctx;
+
   PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-  CeedQFunctionContextCreate(ceed, &ceed_data->newt_ig_context);
-  CeedQFunctionContextSetData(ceed_data->newt_ig_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER,
-                              sizeof(*phys->newtonian_ig_ctx), phys->newtonian_ig_ctx);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->newt_ig_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->newt_ig_context);
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ctx);
+  ierr = PetscPrintf(comm,
+                     "  Problem:\n"
+                     "    Problem Name                       : %s\n"
+                     "    Stabilization                      : %s\n",
+                     app_ctx->problem_name, StabilizationTypes[newtonian_ctx->stabilization]);
+  CHKERRQ(ierr);
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
new file mode 100644
index 0000000000..0e234556a5
--- /dev/null
+++ b/examples/fluids/problems/shocktube.c
@@ -0,0 +1,186 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up SHOCKTUBE
+
+#include "../navierstokes.h"
+#include "../qfunctions/setupgeo.h"
+#include "../qfunctions/shocktube.h"
+
+PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) {
+  SetupContext      setup_context;
+  User              user = *(User *)ctx;
+  MPI_Comm          comm = PETSC_COMM_WORLD;
+  PetscBool         implicit;
+  PetscBool         yzb;
+  PetscInt          stab;
+  PetscBool         has_curr_time = PETSC_FALSE;
+  PetscInt          ierr;
+  ShockTubeContext  shocktube_ctx;
+  CeedQFunctionContext shocktube_context;
+
+
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &shocktube_ctx); CHKERRQ(ierr);
+
+  // ------------------------------------------------------
+  //               SET UP SHOCKTUBE
+  // ------------------------------------------------------
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 4;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->ics.qfunction                     = ICsShockTube;
+  problem->ics.qfunction_loc                 = ICsShockTube_loc;
+  problem->apply_vol_rhs.qfunction           = EulerShockTube;
+  problem->apply_vol_rhs.qfunction_loc       = EulerShockTube_loc;
+  problem->apply_vol_ifunction.qfunction     = NULL;
+  problem->apply_vol_ifunction.qfunction_loc = NULL;
+  problem->bc                                = Exact_ShockTube;
+  problem->bc_ctx                            = setup_context;
+  problem->non_zero_time                     = PETSC_FALSE;
+  problem->print_info                        = PRINT_SHOCKTUBE;
+
+  // ------------------------------------------------------
+  //             Create the libCEED context
+  // ------------------------------------------------------
+  // Driver section initial conditions
+  CeedScalar P_high          = 1.0;     // Pa
+  CeedScalar rho_high        = 1.0;     // kg/m^3
+  // Driven section initial conditions
+  CeedScalar P_low           = 0.1;     // Pa
+  CeedScalar rho_low         = 0.125;   // kg/m^3
+  // Stabilization parameter
+  CeedScalar c_tau           = 0.5;     // -, based on Hughes et al (2010)
+  // Tuning parameters for the YZB shock capturing
+  CeedScalar Cyzb            = 0.1;     // -, used in approximation of (Na),x
+  CeedScalar Byzb            = 2.0;     // -, 1 for smooth shocks
+  //                                          2 for sharp shocks
+  PetscReal domain_min[3], domain_max[3], domain_size[3];
+  ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  // ------------------------------------------------------
+  //             Create the PETSc context
+  // ------------------------------------------------------
+  PetscScalar meter    = 1e-2; // 1 meter in scaled length units
+  PetscScalar second   = 1e-2; // 1 second in scaled time units
+
+  // ------------------------------------------------------
+  //              Command line Options
+  // ------------------------------------------------------
+  PetscOptionsBegin(comm, NULL, "Options for SHOCKTUBE problem", NULL);
+
+  // -- Numerical formulation options
+  ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation",
+                          NULL, implicit=PETSC_FALSE, &implicit, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsEnum("-stab", "Stabilization method", NULL,
+                          StabilizationTypes, (PetscEnum)(stab = STAB_NONE),
+                          (PetscEnum *)&stab, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-c_tau", "Stabilization constant",
+                            NULL, c_tau, &c_tau, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-yzb", "Use YZB discontinuity capturing",
+                          NULL, yzb=PETSC_FALSE, &yzb, NULL); CHKERRQ(ierr);
+
+  // -- Units
+  ierr = PetscOptionsScalar("-units_meter", "1 meter in scaled length units",
+                            NULL, meter, &meter, NULL); CHKERRQ(ierr);
+  meter = fabs(meter);
+  ierr = PetscOptionsScalar("-units_second","1 second in scaled time units",
+                            NULL, second, &second, NULL); CHKERRQ(ierr);
+  second = fabs(second);
+
+  // -- Warnings
+  if (stab == STAB_SUPG) {
+    ierr = PetscPrintf(comm,
+                       "Warning! -stab supg not implemented for the shocktube problem. \n");
+    CHKERRQ(ierr);
+  }
+  if (yzb && implicit) {
+    ierr = PetscPrintf(comm,
+                       "Warning! -yzb only implemented for explicit timestepping. \n");
+    CHKERRQ(ierr);
+  }
+
+
+  PetscOptionsEnd();
+
+  // ------------------------------------------------------
+  //           Set up the PETSc context
+  // ------------------------------------------------------
+  user->units->meter  = meter;
+  user->units->second = second;
+
+  // ------------------------------------------------------
+  //           Set up the libCEED context
+  // ------------------------------------------------------
+  // -- Scale variables to desired units
+  for (PetscInt i=0; i<3; i++) {
+    domain_size[i] *= meter;
+    domain_min[i] *= meter;
+  }
+  problem->dm_scale = meter;
+  CeedScalar mid_point = 0.5*(domain_size[0]+domain_min[0]);
+
+  // -- Setup Context
+  setup_context->mid_point = mid_point;
+  setup_context->time      = 0.0;
+  setup_context->P_high    = P_high;
+  setup_context->rho_high  = rho_high;
+  setup_context->P_low     = P_low;
+  setup_context->rho_low   = rho_low;
+
+  // -- QFunction Context
+  user->phys->implicit                      = implicit;
+  user->phys->has_curr_time                 = has_curr_time;
+  shocktube_ctx->implicit       = implicit;
+  shocktube_ctx->stabilization  = stab;
+  shocktube_ctx->yzb            = yzb;
+  shocktube_ctx->Cyzb           = Cyzb;
+  shocktube_ctx->Byzb           = Byzb;
+  shocktube_ctx->c_tau          = c_tau;
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+
+  CeedQFunctionContextCreate(user->ceed, &shocktube_context);
+  CeedQFunctionContextSetData(shocktube_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*shocktube_ctx), shocktube_ctx);
+  CeedQFunctionContextSetDataDestroy(shocktube_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  problem->apply_vol_rhs.qfunction_context = shocktube_context;
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, AppCtx app_ctx) {
+  MPI_Comm       comm = PETSC_COMM_WORLD;
+  PetscErrorCode ierr;
+  PetscFunctionBeginUser;
+
+  ierr = PetscPrintf(comm,
+                     "  Problem:\n"
+                     "    Problem Name                       : %s\n",
+                     app_ctx->problem_name); CHKERRQ(ierr);
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c
new file mode 100644
index 0000000000..1be568a104
--- /dev/null
+++ b/examples/fluids/problems/stg_shur14.c
@@ -0,0 +1,388 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Implementation of the Synthetic Turbulence Generation (STG) algorithm
+/// presented in Shur et al. 2014
+
+#include <stdlib.h>
+#include <math.h>
+#include <petsc.h>
+#include "../navierstokes.h"
+#include "stg_shur14.h"
+#include "../qfunctions/stg_shur14.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+/*
+ * @brief Perform Cholesky decomposition on array of symmetric 3x3 matrices
+ *
+ * This assumes the input matrices are in order [11,22,33,12,13,23]. This
+ * format is also used for the output.
+ *
+ * @param[in]  comm   MPI_Comm
+ * @param[in]  nprofs Number of matrices in Rij
+ * @param[in]  Rij    Array of the symmetric matrices [6,nprofs]
+ * @param[out] Cij    Array of the Cholesky Decomposition matrices, [6,nprofs]
+ */
+PetscErrorCode CalcCholeskyDecomp(MPI_Comm comm, PetscInt nprofs,
+                                  const CeedScalar Rij[6][nprofs], CeedScalar Cij[6][nprofs]) {
+
+  PetscFunctionBeginUser;
+  for (PetscInt i=0; i<nprofs; i++) {
+    Cij[0][i] = sqrt(Rij[0][i]);
+    Cij[3][i] = Rij[3][i] / Cij[0][i];
+    Cij[1][i] = sqrt(Rij[1][i] - pow(Cij[3][i], 2) );
+    Cij[4][i] = Rij[4][i] / Cij[0][i];
+    Cij[5][i] = (Rij[5][i] - Cij[3][i]*Cij[4][i]) / Cij[1][i];
+    Cij[2][i] = sqrt(Rij[2][i] - pow(Cij[4][i], 2) - pow(Cij[5][i], 2));
+
+    if (isnan(Cij[0][i]) || isnan(Cij[1][i]) || isnan(Cij[2][i]))
+      SETERRQ(comm, -1, "Cholesky decomposition failed at profile point %d. "
+              "Either STGInflow has non-SPD matrix or contains nan.", i+1);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+/*
+ * @brief Open a PHASTA *.dat file, grabbing dimensions and file pointer
+ *
+ * This function opens the file specified by `path` using `PetscFOpen` and
+ * passes the file pointer in `fp`. It is not closed in this function, thus
+ * `fp` must be closed sometime after this function has been called (using
+ * `PetscFClose` for example).
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space
+ *
+ * @param[in] comm MPI_Comm for the program
+ * @param[in] path Path to the file
+ * @param[in] char_array_len Length of the character array that should contain each line
+ * @param[out] dims Dimensions of the file, taken from the first line of the file
+ * @param[out] fp File pointer to the opened file
+ */
+static PetscErrorCode OpenPHASTADatFile(const MPI_Comm comm,
+                                        const char path[PETSC_MAX_PATH_LEN], const PetscInt char_array_len,
+                                        PetscInt dims[2], FILE **fp) {
+  PetscErrorCode ierr;
+  PetscInt ndims;
+  char line[char_array_len];
+  char **array;
+
+  PetscFunctionBeginUser;
+  ierr = PetscFOpen(comm, path, "r", fp); CHKERRQ(ierr);
+  ierr = PetscSynchronizedFGets(comm, *fp, char_array_len, line); CHKERRQ(ierr);
+  ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+  if (ndims != 2) SETERRQ(comm, -1,
+                            "Found %d dimensions instead of 2 on the first line of %s",
+                            ndims, path);
+
+  for (PetscInt i=0; i<ndims; i++)  dims[i] = atoi(array[i]);
+  ierr = PetscStrToArrayDestroy(ndims, array); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Get the number of rows for the PHASTA file at path
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space
+ *
+ * @param[in] comm MPI_Comm for the program
+ * @param[in] path Path to the file
+ * @param[out] nrows Number of rows
+ */
+static PetscErrorCode GetNRows(const MPI_Comm comm,
+                               const char path[PETSC_MAX_PATH_LEN], PetscInt *nrows) {
+  PetscErrorCode ierr;
+  const PetscInt char_array_len = 512;
+  PetscInt dims[2];
+  FILE *fp;
+
+  PetscFunctionBeginUser;
+  ierr = OpenPHASTADatFile(comm, path, char_array_len, dims, &fp); CHKERRQ(ierr);
+  *nrows = dims[0];
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Read the STGInflow file and load the contents into stg_ctx
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space.
+ * Assumes there are 14 columns in the file
+ *
+ * Function calculates the Cholesky decomposition from the Reynolds stress
+ * profile found in the file
+ *
+ * @param[in] comm MPI_Comm for the program
+ * @param[in] path Path to the STGInflow.dat file
+ * @param[inout] stg_ctx STGShur14Context where the data will be loaded into
+ */
+static PetscErrorCode ReadSTGInflow(const MPI_Comm comm,
+                                    const char path[PETSC_MAX_PATH_LEN], STGShur14Context stg_ctx) {
+  PetscErrorCode ierr;
+  PetscInt ndims, dims[2];
+  FILE *fp;
+  const PetscInt char_array_len=512;
+  char line[char_array_len];
+  char **array;
+
+  PetscFunctionBeginUser;
+
+  ierr = OpenPHASTADatFile(comm, path, char_array_len, dims, &fp); CHKERRQ(ierr);
+
+  CeedScalar rij[6][stg_ctx->nprofs];
+  CeedScalar *prof_dw = &stg_ctx->data[stg_ctx->offsets.prof_dw];
+  CeedScalar *eps = &stg_ctx->data[stg_ctx->offsets.eps];
+  CeedScalar *lt = &stg_ctx->data[stg_ctx->offsets.lt];
+  CeedScalar (*ubar)[stg_ctx->nprofs] = (CeedScalar (*)[stg_ctx->nprofs])
+                                        &stg_ctx->data[stg_ctx->offsets.ubar];
+
+  for (PetscInt i=0; i<stg_ctx->nprofs; i++) {
+    ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+    ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+    if (ndims < dims[1]) SETERRQ(comm, -1,
+                                   "Line %d of %s does not contain enough columns (%d instead of %d)", i,
+                                   path, ndims, dims[1]);
+
+    prof_dw[i] = (CeedScalar) atof(array[0]);
+    ubar[0][i] = (CeedScalar) atof(array[1]);
+    ubar[1][i] = (CeedScalar) atof(array[2]);
+    ubar[2][i] = (CeedScalar) atof(array[3]);
+    rij[0][i]  = (CeedScalar) atof(array[4]);
+    rij[1][i]  = (CeedScalar) atof(array[5]);
+    rij[2][i]  = (CeedScalar) atof(array[6]);
+    rij[3][i]  = (CeedScalar) atof(array[7]);
+    rij[4][i]  = (CeedScalar) atof(array[8]);
+    rij[5][i]  = (CeedScalar) atof(array[9]);
+    lt[i]      = (CeedScalar) atof(array[12]);
+    eps[i]     = (CeedScalar) atof(array[13]);
+
+    if (prof_dw[i] < 0) SETERRQ(comm, -1,
+                                  "Distance to wall in %s cannot be negative", path);
+    if (lt[i] < 0) SETERRQ(comm, -1,
+                             "Turbulent length scale in %s cannot be negative", path);
+    if (eps[i] < 0) SETERRQ(comm, -1,
+                              "Turbulent dissipation in %s cannot be negative", path);
+
+  }
+  CeedScalar (*cij)[stg_ctx->nprofs]  = (CeedScalar (*)[stg_ctx->nprofs])
+                                        &stg_ctx->data[stg_ctx->offsets.cij];
+  ierr = CalcCholeskyDecomp(comm, stg_ctx->nprofs, rij, cij); CHKERRQ(ierr);
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Read the STGRand file and load the contents into stg_ctx
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space.
+ * Assumes there are 7 columns in the file
+ *
+ * @param[in]    comm    MPI_Comm for the program
+ * @param[in]    path    Path to the STGRand.dat file
+ * @param[inout] stg_ctx STGShur14Context where the data will be loaded into
+ */
+static PetscErrorCode ReadSTGRand(const MPI_Comm comm,
+                                  const char path[PETSC_MAX_PATH_LEN],
+                                  STGShur14Context stg_ctx) {
+
+  PetscErrorCode ierr;
+  PetscInt ndims, dims[2];
+  FILE *fp;
+  const PetscInt char_array_len = 512;
+  char line[char_array_len];
+  char **array;
+
+  PetscFunctionBeginUser;
+  ierr = OpenPHASTADatFile(comm, path, char_array_len, dims, &fp); CHKERRQ(ierr);
+
+  CeedScalar *phi = &stg_ctx->data[stg_ctx->offsets.phi];
+  CeedScalar (*d)[stg_ctx->nmodes]     = (CeedScalar (*)[stg_ctx->nmodes])
+                                         &stg_ctx->data[stg_ctx->offsets.d];
+  CeedScalar (*sigma)[stg_ctx->nmodes] = (CeedScalar (*)[stg_ctx->nmodes])
+                                         &stg_ctx->data[stg_ctx->offsets.sigma];
+
+  for (PetscInt i=0; i<stg_ctx->nmodes; i++) {
+    ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+    ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+    if (ndims < dims[1]) SETERRQ(comm, -1,
+                                   "Line %d of %s does not contain enough columns (%d instead of %d)", i,
+                                   path, ndims, dims[1]);
+
+    d[0][i]     = (CeedScalar) atof(array[0]);
+    d[1][i]     = (CeedScalar) atof(array[1]);
+    d[2][i]     = (CeedScalar) atof(array[2]);
+    phi[i]      = (CeedScalar) atof(array[3]);
+    sigma[0][i] = (CeedScalar) atof(array[4]);
+    sigma[1][i] = (CeedScalar) atof(array[5]);
+    sigma[2][i] = (CeedScalar) atof(array[6]);
+  }
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Read STG data from input paths and put in STGShur14Context
+ *
+ * Reads data from input paths and puts them into a STGShur14Context object.
+ * Data stored initially in `*pstg_ctx` will be copied over to the new
+ * STGShur14Context instance.
+ *
+ * @param[in]    comm            MPI_Comm for the program
+ * @param[in]    dm              DM for the program
+ * @param[in]    stg_inflow_path Path to STGInflow.dat file
+ * @param[in]    stg_rand_path   Path to STGRand.dat file
+ * @param[inout] pstg_ctx        Pointer to STGShur14Context where the data will be loaded into
+ */
+PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm,
+                                 char stg_inflow_path[PETSC_MAX_PATH_LEN],
+                                 char stg_rand_path[PETSC_MAX_PATH_LEN],
+                                 STGShur14Context *pstg_ctx) {
+  PetscErrorCode ierr;
+  PetscInt nmodes, nprofs;
+  STGShur14Context stg_ctx;
+  PetscFunctionBeginUser;
+
+  // Get options
+  ierr = GetNRows(comm, stg_rand_path, &nmodes); CHKERRQ(ierr);
+  ierr = GetNRows(comm, stg_inflow_path, &nprofs); CHKERRQ(ierr);
+  if (nmodes > STG_NMODES_MAX)
+    SETERRQ(comm, 1, "Number of wavemodes in %s (%d) exceeds STG_NMODES_MAX (%d). "
+            "Change size of STG_NMODES_MAX and recompile", stg_rand_path, nmodes,
+            STG_NMODES_MAX);
+
+  {
+    STGShur14Context s;
+    ierr = PetscCalloc1(1, &s); CHKERRQ(ierr);
+    *s = **pstg_ctx;
+    s->nmodes = nmodes;
+    s->nprofs = nprofs;
+    s->offsets.sigma   = 0;
+    s->offsets.d       = nmodes*3;
+    s->offsets.phi     = s->offsets.d       + nmodes*3;
+    s->offsets.kappa   = s->offsets.phi     + nmodes;
+    s->offsets.prof_dw = s->offsets.kappa   + nmodes;
+    s->offsets.ubar    = s->offsets.prof_dw + nprofs;
+    s->offsets.cij     = s->offsets.ubar    + nprofs*3;
+    s->offsets.eps     = s->offsets.cij     + nprofs*6;
+    s->offsets.lt      = s->offsets.eps     + nprofs;
+    PetscInt total_num_scalars = s->offsets.lt + nprofs;
+    s->total_bytes = sizeof(*stg_ctx) + total_num_scalars*sizeof(stg_ctx->data[0]);
+    ierr = PetscMalloc(s->total_bytes, &stg_ctx); CHKERRQ(ierr);
+    *stg_ctx = *s;
+    ierr = PetscFree(s); CHKERRQ(ierr);
+  }
+
+  ierr = ReadSTGInflow(comm, stg_inflow_path, stg_ctx); CHKERRQ(ierr);
+  ierr = ReadSTGRand(comm, stg_rand_path, stg_ctx); CHKERRQ(ierr);
+
+  // -- Calculate kappa
+  {
+    CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa];
+    CeedScalar *prof_dw = &stg_ctx->data[stg_ctx->offsets.prof_dw];
+    CeedScalar *lt = &stg_ctx->data[stg_ctx->offsets.lt];
+    CeedScalar le, le_max=0;
+
+    CeedPragmaSIMD
+    for (PetscInt i=0; i<stg_ctx->nprofs; i++) {
+      le = PetscMin(2*prof_dw[i], 3*lt[i]);
+      if (le_max < le) le_max = le;
+    }
+    CeedScalar kmin = M_PI/le_max;
+
+    CeedPragmaSIMD
+    for (PetscInt i=0; i<stg_ctx->nmodes; i++) {
+      kappa[i] = kmin*pow(stg_ctx->alpha, i);
+    }
+  } //end calculate kappa
+
+  *pstg_ctx = stg_ctx;
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem,
+                        User user, const bool prescribe_T,
+                        const CeedScalar theta0, const CeedScalar P0) {
+  PetscErrorCode ierr;
+  char stg_inflow_path[PETSC_MAX_PATH_LEN] = "./STGInflow.dat";
+  char stg_rand_path[PETSC_MAX_PATH_LEN] = "./STGRand.dat";
+  PetscBool mean_only = PETSC_FALSE;
+  CeedScalar u0=0.0, alpha=1.01;
+  STGShur14Context stg_ctx;
+  CeedQFunctionContext stg_context;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  PetscFunctionBeginUser;
+
+  // Get options
+  PetscOptionsBegin(comm, NULL, "STG Boundary Condition Options", NULL);
+  ierr = PetscOptionsString("-stg_inflow_path", "Path to STGInflow.dat", NULL,
+                            stg_inflow_path, stg_inflow_path,
+                            sizeof(stg_inflow_path), NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsString("-stg_rand_path", "Path to STGInflow.dat", NULL,
+                            stg_rand_path,stg_rand_path,
+                            sizeof(stg_rand_path), NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsReal("-stg_alpha", "Growth rate of the wavemodes", NULL,
+                          alpha, &alpha, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsReal("-stg_u0", "Advective velocity for the fluctuations",
+                          NULL, u0, &u0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-stg_mean_only", "Only apply mean profile",
+                          NULL, mean_only, &mean_only, NULL); CHKERRQ(ierr);
+  PetscOptionsEnd();
+
+  ierr = PetscCalloc1(1, &stg_ctx); CHKERRQ(ierr);
+  stg_ctx->alpha         = alpha;
+  stg_ctx->u0            = u0;
+  stg_ctx->is_implicit   = user->phys->implicit;
+  stg_ctx->prescribe_T   = prescribe_T;
+  stg_ctx->mean_only     = mean_only;
+  stg_ctx->theta0        = theta0;
+  stg_ctx->P0            = P0;
+
+  {
+    // Calculate dx assuming constant spacing
+    PetscReal domain_min[3], domain_max[3], domain_size[3];
+    ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+    for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+    PetscInt nmax = 3, faces[3];
+    ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax,
+                                   NULL); CHKERRQ(ierr);
+    stg_ctx->dx = domain_size[0]/faces[0];
+  }
+
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ig_ctx);
+  stg_ctx->newtonian_ctx = *newtonian_ig_ctx;
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ig_ctx);
+
+  ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, &stg_ctx);
+  CHKERRQ(ierr);
+
+  CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextCreate(user->ceed, &stg_context);
+  CeedQFunctionContextSetData(stg_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, stg_ctx->total_bytes, stg_ctx);
+  CeedQFunctionContextSetDataDestroy(stg_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(stg_context, "solution time",
+                                     offsetof(struct STGShur14Context_, time), 1,
+                                     "Phyiscal time of the solution");
+
+  problem->apply_inflow.qfunction         = STGShur14_Inflow;
+  problem->apply_inflow.qfunction_loc     = STGShur14_Inflow_loc;
+  problem->apply_inflow.qfunction_context = stg_context;
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h
new file mode 100644
index 0000000000..7d5dc90bfd
--- /dev/null
+++ b/examples/fluids/problems/stg_shur14.h
@@ -0,0 +1,16 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed.h>
+#include <petsc.h>
+#include "../qfunctions/stg_shur14_type.h"
+#include "../navierstokes.h"
+
+extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm,
+                               ProblemData *problem, User user,
+                               const bool prescribe_T, const CeedScalar theta0,
+                               const CeedScalar P0);
diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index ad69ff0762..06560167e0 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -12,34 +12,21 @@
 #define advection_h
 
 #include <math.h>
+#include <ceed.h>
 
-#ifndef setup_context_struct
-#define setup_context_struct
 typedef struct SetupContext_ *SetupContext;
 struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
   CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
 };
-#endif
 
-#ifndef advection_context_struct
-#define advection_context_struct
 typedef struct AdvectionContext_ *AdvectionContext;
 struct AdvectionContext_ {
   CeedScalar CtauS;
@@ -48,7 +35,8 @@ struct AdvectionContext_ {
   bool implicit;
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
-#endif
+
+CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
 
 // *****************************************************************************
 // This QFunction sets the initial conditions and the boundary conditions
@@ -101,7 +89,7 @@ struct AdvectionContext_ {
 // This helper function provides support for the exact, time-dependent solution
 //   (currently not implemented) and IC formulation for 3D advection
 // *****************************************************************************
-CEED_QFUNCTION_HELPER int Exact_Advection(CeedInt dim, CeedScalar time,
+CEED_QFUNCTION_HELPER CeedInt Exact_Advection(CeedInt dim, CeedScalar time,
     const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
   const SetupContext context = (SetupContext)ctx;
   const CeedScalar rc    = context->rc;
@@ -124,14 +112,13 @@ CEED_QFUNCTION_HELPER int Exact_Advection(CeedInt dim, CeedScalar time,
   switch (context->bubble_type) {
   //  original sphere
   case 0: { // (dim=3)
-    r = sqrt(pow((x - x0[0]), 2) +
-             pow((y - x0[1]), 2) +
-             pow((z - x0[2]), 2));
+    r = sqrt(Square(x - x0[0]) +
+             Square(y - x0[1]) +
+             Square(z - x0[2]));
   } break;
   // cylinder (needs periodicity to work properly)
   case 1: { // (dim=2)
-    r = sqrt(pow((x - x0[0]), 2) +
-             pow((y - x0[1]), 2) );
+    r = sqrt(Square(x - x0[0]) + Square(y - x0[1]));
   } break;
   }
 
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index f828f83f15..40867fb82b 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -12,38 +12,22 @@
 #define advection2d_h
 
 #include <math.h>
+#include <ceed.h>
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
 typedef struct SetupContext_ *SetupContext;
 struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
 };
-#endif
 
-#ifndef advection_context_struct
-#define advection_context_struct
 typedef struct AdvectionContext_ *AdvectionContext;
 struct AdvectionContext_ {
   CeedScalar CtauS;
@@ -52,7 +36,8 @@ struct AdvectionContext_ {
   bool implicit;
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
-#endif
+
+CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
 
 // *****************************************************************************
 // This QFunction sets the initial conditions and the boundary conditions
@@ -105,7 +90,7 @@ struct AdvectionContext_ {
 // This helper function provides the exact, time-dependent solution
 //   and IC formulation for 2D advection
 // *****************************************************************************
-CEED_QFUNCTION_HELPER int Exact_Advection2d(CeedInt dim, CeedScalar time,
+CEED_QFUNCTION_HELPER CeedInt Exact_Advection2d(CeedInt dim, CeedScalar time,
     const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
   const SetupContext context = (SetupContext)ctx;
   const CeedScalar rc    = context->rc;
@@ -142,15 +127,15 @@ CEED_QFUNCTION_HELPER int Exact_Advection2d(CeedInt dim, CeedScalar time,
     return 1;
   }
 
-  CeedScalar r = sqrt(pow(x - x0[0], 2) + pow(y - x0[1], 2));
+  CeedScalar r = sqrt(Square(x - x0[0]) + Square(y - x0[1]));
   CeedScalar E = 1 - r/rc;
 
   if (0) { // non-smooth initial conditions
     if (q[4] < E) q[4] = E;
-    r = sqrt(pow(x - x1[0], 2) + pow(y - x1[1], 2));
+    r = sqrt(Square(x - x1[0]) + Square(y - x1[1]));
     if (r <= rc) q[4] = 1;
   }
-  r = sqrt(pow(x - x2[0], 2) + pow(y - x2[1], 2));
+  r = sqrt(Square(x - x2[0]) + Square(y - x2[1]));
   E = (r <= rc) ? .5 + .5*cos(r*M_PI/rc) : 0;
   if (q[4] < E) q[4] = E;
 
diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h
new file mode 100644
index 0000000000..06b13aef11
--- /dev/null
+++ b/examples/fluids/qfunctions/blasius.h
@@ -0,0 +1,347 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Operator for Navier-Stokes example using PETSc
+
+
+#ifndef blasius_h
+#define blasius_h
+
+#include <math.h>
+#include <ceed.h>
+#include "newtonian_types.h"
+
+typedef struct BlasiusContext_ *BlasiusContext;
+struct BlasiusContext_ {
+  bool       implicit;  // !< Using implicit timesteping or not
+  bool       weakT;     // !< flag to set Temperature weakly at inflow
+  CeedScalar delta0;    // !< Boundary layer height at inflow
+  CeedScalar Uinf;      // !< Velocity at boundary layer edge
+  CeedScalar P0;        // !< Pressure at outflow
+  CeedScalar theta0;    // !< Temperature at inflow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+void CEED_QFUNCTION_HELPER(BlasiusSolution)(const CeedScalar y,
+    const CeedScalar Uinf, const CeedScalar x0, const CeedScalar x,
+    const CeedScalar rho, CeedScalar *u, CeedScalar *v, CeedScalar *t12,
+    const NewtonianIdealGasContext newt_ctx) {
+
+  CeedInt nprofs = 50;
+  // *INDENT-OFF*
+  CeedScalar eta_table[] = {
+    0.000000000000000000e+00, 1.282051282051281937e-01, 2.564102564102563875e-01, 3.846153846153845812e-01, 5.128205128205127750e-01,
+    6.410256410256409687e-01, 7.692307692307691624e-01, 8.974358974358973562e-01, 1.025641025641025550e+00, 1.153846153846153744e+00,
+    1.282051282051281937e+00, 1.410256410256410131e+00, 1.538461538461538325e+00, 1.666666666666666519e+00, 1.794871794871794712e+00,
+    1.923076923076922906e+00, 2.051282051282051100e+00, 2.179487179487179294e+00, 2.307692307692307487e+00, 2.435897435897435681e+00,
+    2.564102564102563875e+00, 2.692307692307692069e+00, 2.820512820512820262e+00, 2.948717948717948456e+00, 3.076923076923076650e+00,
+    3.205128205128204844e+00, 3.333333333333333037e+00, 3.461538461538461231e+00, 3.589743589743589425e+00, 3.717948717948717618e+00,
+    3.846153846153845812e+00, 3.974358974358974006e+00, 4.102564102564102200e+00, 4.230769230769229949e+00, 4.358974358974358587e+00,
+    4.487179487179487225e+00, 4.615384615384614975e+00, 4.743589743589742724e+00, 4.871794871794871362e+00, 5.000000000000000000e+00,
+    5.500000000000000000e+00, 6.000000000000000000e+00, 6.500000000000000000e+00, 7.000000000000000000e+00, 7.500000000000000000e+00,
+    8.000000000000000000e+00, 8.500000000000000000e+00, 9.000000000000000000e+00, 9.500000000000000000e+00, 1.000000000000000000e+01};
+
+  CeedScalar f_table[] = {
+    0.000000000000000000e+00, 2.728923405566200267e-03, 1.091524811461423369e-02, 2.455658828897525764e-02, 4.364674649279581820e-02,
+    6.817382707725749835e-02, 9.811838418932711248e-02, 1.334516294237205192e-01, 1.741337304561980659e-01, 2.201122374410622862e-01,
+    2.713206781625860375e-01, 3.276773654929600599e-01, 3.890844612583744255e-01, 4.554273387986328414e-01, 5.265742820946719416e-01,
+    6.023765522220410062e-01, 6.826688421431770237e-01, 7.672701287583111318e-01, 8.559849171804534418e-01, 9.486048570979430661e-01,
+    1.044910695686512625e+00, 1.144674516826549082e+00, 1.247662203367335465e+00, 1.353636048811749593e+00, 1.462357437868362364e+00,
+    1.573589512396551759e+00, 1.687099740622293842e+00, 1.802662313062363353e+00, 1.920060297987626230e+00, 2.039087501786055245e+00,
+    2.159549994377929050e+00, 2.281267275838891884e+00, 2.404073076539093190e+00, 2.527815798402052838e+00, 2.652358618452637540e+00,
+    2.777579287003750341e+00, 2.903369661199559637e+00, 3.029635020019957992e+00, 3.156293209307130088e+00, 3.283273665161465349e+00,
+    3.780571892998292771e+00, 4.279620922520262383e+00, 4.779322325882148448e+00, 5.279238811036782053e+00, 5.779218028455369804e+00,
+    6.279213431354994768e+00, 6.779212528163703233e+00, 7.279212370655419484e+00, 7.779212346288013613e+00, 8.279212342945751146e+00};
+
+  CeedScalar fp_table[] = {
+    0.000000000000000000e+00, 4.257083277988830267e-02, 8.513297869782740501e-02, 1.276641169537044151e-01, 1.701271279078802878e-01,
+    2.124702831905590783e-01, 2.546276046951935212e-01, 2.965194442747576264e-01, 3.380533304776729975e-01, 3.791251204629754179e-01,
+    4.196204840172004791e-01, 4.594167322894788796e-01, 4.983849866855867838e-01, 5.363926638765821320e-01, 5.733062319885513514e-01,
+    6.089941719927144392e-01, 6.433300586189647507e-01, 6.761956584341198839e-01, 7.074839307288774970e-01, 7.371018110314454530e-01,
+    7.649726585225528064e-01, 7.910382579383948842e-01, 8.152602836158657773e-01, 8.376211573266827415e-01, 8.581242609418713307e-01,
+    8.767934976651666767e-01, 8.936722290953328374e-01, 9.088216471306606037e-01, 9.223186672607004422e-01, 9.342534510898168332e-01,
+    9.447266795705382414e-01, 9.538467037387058367e-01, 9.617266968332524035e-01, 9.684819213624265011e-01, 9.742272083384174719e-01,
+    9.790747253056680810e-01, 9.831320868743089747e-01, 9.865008381344084754e-01, 9.892753192614093249e-01, 9.915419001656551323e-01,
+    9.968788209317821503e-01, 9.989728724371175206e-01, 9.996990677381791812e-01, 9.999216041491896245e-01, 9.999818594083667023e-01,
+    9.999962745365539307e-01, 9.999993214550036980e-01, 9.999998904550418954e-01, 9.999999843329338001e-01, 9.999999980166356384e-01};
+
+  CeedScalar fpp_table[] = {
+    3.320573362157903663e-01, 3.320379743512646420e-01, 3.319024760665882368e-01, 3.315350015070190337e-01, 3.308206767975666041e-01,
+    3.296466995822193158e-01, 3.279038639411161471e-01, 3.254884713737624113e-01, 3.223045750196085746e-01, 3.182664816607024272e-01,
+    3.133014118810801829e-01, 3.073521951089355775e-01, 3.003798556086043625e-01, 2.923659305537876785e-01, 2.833143548208253981e-01,
+    2.732527514995234941e-01, 2.622329840371728227e-01, 2.503308560706500874e-01, 2.376448876931176457e-01, 2.242941499773744018e-01,
+    2.104151994284793603e-01, 1.961582158440171031e-01, 1.816825052623964043e-01, 1.671515786102889534e-01, 1.527280512426029968e-01,
+    1.385686249977987894e-01, 1.248194106805364800e-01, 1.116118251613979206e-01, 9.905925581301598670e-02, 8.725462988794610575e-02,
+    7.626896310981794158e-02, 6.615089622448211415e-02, 5.692716644118058639e-02, 4.860390768479891377e-02, 4.116863313890323922e-02,
+    3.459272784597366285e-02, 2.883426862493499582e-02, 2.384099224121952881e-02, 1.955324839409207718e-02, 1.590679868531958210e-02,
+    6.578593141419011685e-03, 2.402039843751689954e-03, 7.741093231657678389e-04, 2.201689553063347941e-04, 5.526217815680267893e-05,
+    1.224092624232004387e-05, 2.392841910090350858e-06, 4.127879363882133676e-07, 6.284244603762621373e-08, 8.442944409712819646e-09};
+  // *INDENT-ON*
+
+  CeedScalar nu = newt_ctx->mu / rho;
+  CeedScalar eta = y*sqrt(Uinf/(nu*(x0+x)));
+  CeedInt idx=-1;
+
+  for(CeedInt i=0; i<nprofs; i++) {
+    if (eta < eta_table[i]) {
+      idx = i;
+      break;
+    }
+  }
+  CeedScalar f, fp, fpp;
+
+  if (idx > 0) { // eta within the bounds of eta_table
+    CeedScalar coeff = (eta - eta_table[idx-1]) / (eta_table[idx] - eta_table[idx
+                       -1]);
+
+    f   = f_table[idx-1]   + coeff*( f_table[idx]   - f_table[idx-1] );
+    fp  = fp_table[idx-1]  + coeff*( fp_table[idx]  - fp_table[idx-1] );
+    fpp = fpp_table[idx-1] + coeff*( fpp_table[idx] - fpp_table[idx-1] );
+  } else { // eta outside bounds of eta_table
+    f   = f_table[nprofs-1];
+    fp  = fp_table[nprofs-1];
+    fpp = fpp_table[nprofs-1];
+    eta = eta_table[nprofs-1];
+  }
+
+  *u = Uinf*fp;
+  *t12 = rho*nu*Uinf*fpp*sqrt(Uinf/(nu*(x0+x)));
+  *v = 0.5*sqrt(nu*Uinf/(x0+x))*(eta*fp - f);
+}
+
+// *****************************************************************************
+// This QFunction sets a Blasius boundary layer for the initial condition
+// *****************************************************************************
+CEED_QFUNCTION(ICsBlasius)(void *ctx, CeedInt Q,
+                           const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
+
+  // Outputs
+  CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  const BlasiusContext context = (BlasiusContext)ctx;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar gamma  = cp/cv;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar delta0 = context->delta0;
+  const CeedScalar Uinf   = context->Uinf;
+
+  const CeedScalar e_internal = cv * theta0;
+  const CeedScalar rho        = P0 / ((gamma - 1) * e_internal);
+  const CeedScalar x0         = Uinf*rho / (mu*25/ (delta0*delta0) );
+  CeedScalar u, v, t12;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD
+  for (CeedInt i=0; i<Q; i++) {
+    const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]};
+
+    BlasiusSolution(x[1], Uinf, x0, x[0], rho, &u, &v, &t12,
+                    &context->newtonian_ctx);
+
+    q0[0][i] = rho;
+    q0[1][i] = u * rho;
+    q0[2][i] = v * rho;
+    q0[3][i] = 0.;
+    q0[4][i] = rho * e_internal + 0.5*(u*u + v*v)*rho;
+  } // End of Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q,
+                               const CeedScalar *const *in,
+                               CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+  const BlasiusContext context = (BlasiusContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+  const CeedScalar gamma  = cp/cv;
+
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar delta0 = context->delta0;
+  const CeedScalar Uinf   = context->Uinf;
+  const bool weakT        = context->weakT;
+  const CeedScalar rho_0  = P0 / (Rd * theta0);
+  const CeedScalar x0     = Uinf*rho_0 / (mu*25/ (delta0*delta0) );
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // Calculate inflow values
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar velocity[3] = {0.};
+    CeedScalar t12;
+    BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1],
+                    &t12, &context->newtonian_ctx);
+
+    // enabling user to choose between weak T and weak rho inflow
+    CeedScalar rho,E_internal, P, E_kinetic;
+    if (weakT) {
+      // rho should be from the current solution
+      rho = q[0][i];
+      // Temperature is being set weakly (theta0) and for constant cv this sets E_internal
+      E_internal = rho * cv * theta0;
+      // Find pressure using
+      P=rho*Rd*theta0; // interior rho with exterior T
+      E_kinetic = .5 * rho * (velocity[0]*velocity[0] +
+                              velocity[1]*velocity[1] +
+                              velocity[2]*velocity[2]);
+    } else {
+      //  Fixing rho weakly on the inflow to a value  consistent with theta0 and P0
+      rho =  rho_0;
+      E_kinetic = .5 * rho * (velocity[0]*velocity[0] +
+                              velocity[1]*velocity[1] +
+                              velocity[2]*velocity[2]);
+      E_internal = q[4][i] - E_kinetic; // uses set rho and u but E from solution
+      P = E_internal * (gamma - 1.);
+    }
+    const CeedScalar E = E_internal + E_kinetic;
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
+
+    const CeedScalar u_normal = norm[0]*velocity[0] +
+                                norm[1]*velocity[1] +
+                                norm[2]*velocity[2];
+
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal; // interior rho
+
+    // -- Momentum
+    for (CeedInt j=0; j<3; j++)
+      v[j+1][i] -= wdetJb * (rho * u_normal * velocity[j] + // interior rho
+                             norm[j] * P); // mixed P
+    v[2][i] -= wdetJb * t12  ;
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+    v[4][i] -= wdetJb * t12 * velocity[1];
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q,
+                                const CeedScalar *const *in,
+                                CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+
+  const BlasiusContext context = (BlasiusContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar rho_0  = P0 / (Rd*theta0);
+  const CeedScalar delta0 = context->delta0;
+  const CeedScalar Uinf   = context->Uinf;
+  const CeedScalar x0     = Uinf*rho_0 / (mu*25/ (delta0*delta0) );
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp in
+    const CeedScalar rho      =  q[0][i];
+    const CeedScalar u[3]     = {q[1][i] / rho,
+                                 q[2][i] / rho,
+                                 q[3][i] / rho
+                                };
+    const CeedScalar E        =  q[4][i];
+
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
+
+    // Implementing outflow condition
+    const CeedScalar P         = P0; // pressure
+    const CeedScalar u_normal  = norm[0]*u[0] + norm[1]*u[1] +
+                                 norm[2]*u[2]; // Normal velocity
+
+    // Calculate prescribed outflow traction values
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar velocity[3] = {0.};
+    CeedScalar t12;
+    BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1],
+                    &t12, &context->newtonian_ctx);
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal;
+
+    // -- Momentum
+    for (CeedInt j=0; j<3; j++)
+      v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
+    v[2][i] += wdetJb * t12  ;
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+    v[4][i] += wdetJb * t12 * velocity[1];
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+#endif // blasius_h
diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h
new file mode 100644
index 0000000000..3d8d60e948
--- /dev/null
+++ b/examples/fluids/qfunctions/channel.h
@@ -0,0 +1,238 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Operator for Navier-Stokes example using PETSc
+
+
+#ifndef channel_h
+#define channel_h
+
+#include <math.h>
+#include <ceed/ceed.h>
+#include "newtonian_types.h"
+
+typedef struct ChannelContext_ *ChannelContext;
+struct ChannelContext_ {
+  bool       implicit; // !< Using implicit timesteping or not
+  CeedScalar theta0;   // !< Reference temperature
+  CeedScalar P0;       // !< Reference Pressure
+  CeedScalar umax;     // !< Centerline velocity
+  CeedScalar center;   // !< Y Coordinate for center of channel
+  CeedScalar H;        // !< Channel half-height
+  CeedScalar B;        // !< Body-force driving the flow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+
+CEED_QFUNCTION_HELPER CeedInt Exact_Channel(CeedInt dim, CeedScalar time,
+    const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
+
+  const ChannelContext context = (ChannelContext)ctx;
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar umax   = context->umax;
+  const CeedScalar center = context->center;
+  const CeedScalar H      = context->H;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+  const CeedScalar k      = context->newtonian_ctx.k;
+
+  const CeedScalar y=X[1];
+
+  const CeedScalar Pr    = mu / (cp*k);
+  const CeedScalar Ec    = (umax*umax) / (cp*theta0);
+  const CeedScalar theta = theta0*(1 + (Pr*Ec/3)
+                                   * (1 - Square(Square((y-center)/H))));
+
+  const CeedScalar p = P0;
+
+  const CeedScalar rho = p / (Rd*theta);
+
+  q[0] = rho;
+  q[1] = rho * umax*(1 - Square((y-center)/H));
+  q[2] = 0;
+  q[3] = 0;
+  q[4] = rho * (cv*theta) + .5 * (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]) / rho;
+
+  return 0;
+}
+
+// *****************************************************************************
+// This QFunction sets the initial condition
+// *****************************************************************************
+CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q,
+                           const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
+
+  // Outputs
+  CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD
+  for (CeedInt i=0; i<Q; i++) {
+    const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar q[5] = {0.};
+    Exact_Channel(3, 0., x, 5, q, ctx);
+
+    for (CeedInt j=0; j<5; j++)
+      q0[j][i] = q[j];
+  } // End of Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q,
+                               const CeedScalar *const *in,
+                               CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+  const ChannelContext context = (ChannelContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar gamma  = cp/cv;
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // Calcualte prescribed inflow values
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar q_exact[5] = {0.};
+    Exact_Channel(3, 0., x, 5, q_exact, ctx);
+    const CeedScalar E_kinetic_exact = 0.5*(q_exact[1]*q_exact[1] +
+                                            q_exact[2]*q_exact[2] +
+                                            q_exact[3]*q_exact[3]) / q_exact[0];
+    const CeedScalar velocity[3] = {q_exact[1]/q_exact[0],
+                                    q_exact[2]/q_exact[0],
+                                    q_exact[3]/q_exact[0]
+                                   };
+    const CeedScalar theta = (q_exact[4] - E_kinetic_exact) / (q_exact[0]*cv);
+
+    // Find pressure using state inside the domain
+    const CeedScalar rho = q[0][i];
+    const CeedScalar u[3] = {q[1][i]/rho, q[2][i]/rho, q[3][i]/rho};
+    const CeedScalar E_internal = q[4][i] - .5 * rho * (u[0]*u[0] + u[1]*u[1] +
+                                  u[2]*u[2]);
+    const CeedScalar P = E_internal * (gamma - 1.);
+
+    // Find inflow state using calculated P and prescribed velocity, theta0
+    const CeedScalar e_internal = cv * theta;
+    const CeedScalar rho_in = P / ((gamma - 1) * e_internal);
+    const CeedScalar E_kinetic = .5 * rho_in * (velocity[0]*velocity[0] +
+                                 velocity[1]*velocity[1] +
+                                 velocity[2]*velocity[2]);
+    const CeedScalar E = rho_in * e_internal + E_kinetic;
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
+
+    const CeedScalar u_normal = norm[0]*velocity[0] +
+                                norm[1]*velocity[1] +
+                                norm[2]*velocity[2];
+
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho_in * u_normal;
+
+    // -- Momentum
+    for (CeedInt j=0; j<3; j++)
+      v[j+1][i] -= wdetJb * (rho_in * u_normal * velocity[j] +
+                             norm[j] * P);
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q,
+                                const CeedScalar *const *in,
+                                CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1];
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+
+  const ChannelContext context = (ChannelContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar P0     = context->P0;
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp in
+    const CeedScalar rho      =  q[0][i];
+    const CeedScalar u[3]     = {q[1][i] / rho,
+                                 q[2][i] / rho,
+                                 q[3][i] / rho
+                                };
+    const CeedScalar E        =  q[4][i];
+
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
+
+    // Implementing outflow condition
+    const CeedScalar P         = P0; // pressure
+    const CeedScalar u_normal  = norm[0]*u[0] + norm[1]*u[1] +
+                                 norm[2]*u[2]; // Normal velocity
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal;
+
+    // -- Momentum
+    for (CeedInt j=0; j<3; j++)
+      v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+#endif // channel_h
diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h
index 0bc1fbfb94..2d503d8165 100644
--- a/examples/fluids/qfunctions/densitycurrent.h
+++ b/examples/fluids/qfunctions/densitycurrent.h
@@ -17,36 +17,12 @@
 
 #include <math.h>
 #include <ceed.h>
+#include "newtonian_types.h"
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
-typedef struct SetupContext_ *SetupContext;
-struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
-  CeedScalar time;
-  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
-};
-#endif
-
 // *****************************************************************************
 // This function sets the initial conditions and the boundary conditions
 //
@@ -110,11 +86,12 @@ CEED_QFUNCTION_HELPER int Exact_DC(CeedInt dim, CeedScalar time,
   const CeedScalar N        = context->N;
   const CeedScalar cv       = context->cv;
   const CeedScalar cp       = context->cp;
-  const CeedScalar g        = context->g;
+  const CeedScalar *g_vec   = context->g;
   const CeedScalar rc       = context->rc;
   const CeedScalar *center  = context->center;
   const CeedScalar *dc_axis = context->dc_axis;
   const CeedScalar Rd       = cp - cv;
+  const CeedScalar g = -g_vec[2];
 
   // Setup
   // -- Coordinates
diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h
index 65efe2c20d..ebe425d05d 100644
--- a/examples/fluids/qfunctions/eulervortex.h
+++ b/examples/fluids/qfunctions/eulervortex.h
@@ -23,8 +23,6 @@
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef euler_context_struct
-#define euler_context_struct
 typedef struct EulerContext_ *EulerContext;
 struct EulerContext_ {
   CeedScalar center[3];
@@ -36,7 +34,6 @@ struct EulerContext_ {
   int euler_test;
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
-#endif
 
 // *****************************************************************************
 // This function sets the initial conditions
@@ -234,7 +231,7 @@ CEED_QFUNCTION_HELPER void ConvectiveFluxJacobian_Euler(CeedScalar dF[3][5][5],
 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
                                        const CeedScalar sound_speed, const CeedScalar c_tau) {
-  for (int i=0; i<3; i++) {
+  for (CeedInt i=0; i<3; i++) {
     // length of element in direction i
     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
                             dXdx[2][i]*dXdx[2][i]);
@@ -365,11 +362,11 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0.};
     CeedScalar dUdx[3][3] = {{0.}};
     CeedScalar dXdxdXdxT[3][3] = {{0.}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
@@ -383,27 +380,27 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v and dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++) {
+    for (CeedInt j=0; j<5; j++) {
       v[j][i] = 0.;
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0.;
     }
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0.))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0.))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0.))*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
 
@@ -412,27 +409,20 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
     ConvectiveFluxJacobian_Euler(jacob_F_conv, rho, u, E, gamma);
 
-    // ---- Transpose of the Jacobian
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
-
     // ---- dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // ---- strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0.};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Stabilization
@@ -442,18 +432,18 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
 
     // -- Stabilization method: none or SU
-    CeedScalar stab[5][3];
+    CeedScalar stab[5][3] = {{0.}};
     switch (context->stabilization) {
     case 0:        // Galerkin
       break;
     case 1:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
+            stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -540,11 +530,11 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0.};
     CeedScalar dUdx[3][3] = {{0.}};
     CeedScalar dXdxdXdxT[3][3] = {{0.}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
@@ -557,30 +547,30 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v and dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++) {
+    for (CeedInt j=0; j<5; j++) {
       v[j][i] = 0.;
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0.;
     }
     //-----mass matrix
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] += wdetJ*q_dot[j][i];
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0.))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0.))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0.))*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
 
@@ -589,32 +579,25 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
     ConvectiveFluxJacobian_Euler(jacob_F_conv, rho, u, E, gamma);
 
-    // ---- Transpose of the Jacobian
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
-
     // ---- dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // ---- strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0.};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // ---- Strong residual
     CeedScalar strong_res[5];
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       strong_res[j] = q_dot[j][i] + strong_conv[j];
 
     // Stabilization
@@ -624,30 +607,30 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
 
     // -- Stabilization method: none, SU, or SUPG
-    CeedScalar stab[5][3];
+    CeedScalar stab[5][3] = {{0.}};
     switch (context->stabilization) {
     case 0:        // Galerkin
       break;
     case 1:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
+            stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
       break;
     case 2:        // SUPG
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_res[l];
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
+            stab[k][j] = jacob_F_conv[j][k][l] * Tau_x[j] * strong_res[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -713,7 +696,7 @@ CEED_QFUNCTION(TravelingVortex_Inflow)(void *ctx, CeedInt Q,
                                    norm[2]*mean_velocity[2];
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0.;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
 
     // Implementing in/outflow BCs
     if (face_normal > 0) {
@@ -729,7 +712,7 @@ CEED_QFUNCTION(TravelingVortex_Inflow)(void *ctx, CeedInt Q,
       v[0][i] -= wdetJb * rho_inlet * face_normal;
 
       // -- Momentum
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         v[j+1][i] -= wdetJb *(rho_inlet * face_normal * mean_velocity[j] +
                               norm[j] * P_inlet);
 
@@ -795,7 +778,7 @@ CEED_QFUNCTION(Euler_Outflow)(void *ctx, CeedInt Q,
                                    norm[2]*mean_velocity[2];
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0;
 
     // Implementing in/outflow BCs
     if (face_normal > 0) { // outflow
@@ -808,7 +791,7 @@ CEED_QFUNCTION(Euler_Outflow)(void *ctx, CeedInt Q,
       v[0][i] -= wdetJb * rho * u_normal;
 
       // -- Momentum
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
 
       // -- Total Energy Density
diff --git a/examples/fluids/qfunctions/mass.h b/examples/fluids/qfunctions/mass.h
index 94d1daafd5..45ae3198b1 100644
--- a/examples/fluids/qfunctions/mass.h
+++ b/examples/fluids/qfunctions/mass.h
@@ -12,6 +12,7 @@
 #define mass_h
 
 #include <math.h>
+#include <ceed.h>
 
 // *****************************************************************************
 // This QFunction applies the mass matrix to five interlaced fields.
diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h
index 2396e58cf2..9c77601d84 100644
--- a/examples/fluids/qfunctions/newtonian.h
+++ b/examples/fluids/qfunctions/newtonian.h
@@ -14,67 +14,24 @@
 
 #include <math.h>
 #include <ceed.h>
+#include "newtonian_types.h"
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
-typedef struct SetupContext_ *SetupContext;
-struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
-  CeedScalar time;
-  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
-};
-#endif
-
-#ifndef newtonian_context_struct
-#define newtonian_context_struct
-typedef enum {
-  STAB_NONE = 0,
-  STAB_SU   = 1, // Streamline Upwind
-  STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
-} StabilizationType;
-
-typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
-struct NewtonianIdealGasContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar c_tau;
-  StabilizationType stabilization;
-};
-#endif
-
 // *****************************************************************************
 // Helper function for computing flux Jacobian
 // *****************************************************************************
 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
-    const CeedScalar gamma, const CeedScalar g, CeedScalar z) {
+    const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
+  CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
-      dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - g*z)) : 0.) - u[i]*u[j];
+      dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
+                      u[i]*u[j];
       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
@@ -90,6 +47,137 @@ CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
   }
 }
 
+// *****************************************************************************
+// Helper function for computing flux Jacobian of Primitive variables
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
+    const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
+    const CeedScalar Rd, const CeedScalar cv) {
+  CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
+  // TODO Add in gravity's contribution
+
+  CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
+  CeedScalar drdT = -rho / T;
+  CeedScalar drdP = 1. / ( Rd * T);
+  CeedScalar etot =  E / rho ;
+  CeedScalar e2p  = drdP * etot + 1. ;
+  CeedScalar e3p  = ( E  + rho * Rd * T );
+  CeedScalar e4p  = drdT * etot + rho * cv ;
+
+  for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
+    for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
+//        [row][col] of A_i
+      dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
+      for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
+        dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
+        dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
+                           ((i==k) ? u[j] : 0.) ) * rho;
+        dF[i][4][k+1]   = rho * u[i] * u[k]
+                          + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
+      }
+      dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
+    }
+    dF[i][4][0] = u[i] * e2p; // F^e wrt p
+    dF[i][4][4] = u[i] * e4p; // F^e wrt T
+    dF[i][0][0] = u[i] * drdP; // F^c wrt p
+    dF[i][0][4] = u[i] * drdT; // F^c wrt T
+  }
+}
+
+CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
+    const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
+    const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
+  CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
+  CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
+  CeedScalar drdT = -rho / T;
+  CeedScalar drdP = 1. / ( Rd * T);
+  dU[0] = drdP * dY[0] + drdT * dY[4];
+  CeedScalar de_kinetic = 0;
+  for (CeedInt i=0; i<3; i++) {
+    dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
+    de_kinetic += u[i] * dY[1+i];
+  }
+  dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
+          + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
+}
+
+// *****************************************************************************
+// Helper function for computing Tau elements (stabilization constant)
+//   Model from:
+//     PHASTA
+//
+//   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
+//
+// Where NOT UPDATED YET
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
+                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
+                                        const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
+                                        const CeedScalar mu, const CeedScalar dt,
+                                        const CeedScalar rho) {
+  // Context
+  const CeedScalar Ctau_t = newt_ctx->Ctau_t;
+  const CeedScalar Ctau_v = newt_ctx->Ctau_v;
+  const CeedScalar Ctau_C = newt_ctx->Ctau_C;
+  const CeedScalar Ctau_M = newt_ctx->Ctau_M;
+  const CeedScalar Ctau_E = newt_ctx->Ctau_E;
+  CeedScalar gijd[6];
+  CeedScalar tau;
+  CeedScalar dts;
+  CeedScalar fact;
+
+  //*INDENT-OFF*
+  gijd[0] =   dXdx[0][0] * dXdx[0][0]
+            + dXdx[1][0] * dXdx[1][0]
+            + dXdx[2][0] * dXdx[2][0];
+
+  gijd[1] =   dXdx[0][0] * dXdx[0][1]
+            + dXdx[1][0] * dXdx[1][1]
+            + dXdx[2][0] * dXdx[2][1];
+
+  gijd[2] =   dXdx[0][1] * dXdx[0][1]
+            + dXdx[1][1] * dXdx[1][1]
+            + dXdx[2][1] * dXdx[2][1];
+
+  gijd[3] =   dXdx[0][0] * dXdx[0][2]
+            + dXdx[1][0] * dXdx[1][2]
+            + dXdx[2][0] * dXdx[2][2];
+
+  gijd[4] =   dXdx[0][1] * dXdx[0][2]
+            + dXdx[1][1] * dXdx[1][2]
+            + dXdx[2][1] * dXdx[2][2];
+
+  gijd[5] =   dXdx[0][2] * dXdx[0][2]
+            + dXdx[1][2] * dXdx[1][2]
+            + dXdx[2][2] * dXdx[2][2];
+  //*INDENT-ON*
+
+  dts = Ctau_t / dt ;
+
+  tau = rho*rho*((4. * dts * dts)
+                 + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
+                 + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
+                 + u[2] *   u[2] * gijd[5])
+        + Ctau_v* mu * mu *
+        (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
+         + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
+
+  fact=sqrt(tau);
+
+  Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
+
+  Tau_d[1] = Ctau_M / fact;
+  Tau_d[2] = Ctau_E / ( fact * cv );
+
+// consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
+//  to avoid a division if the compiler is smart enough to see that cv IS
+// a constant that it could invert once for all elements
+// but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
+// OR we could absorb cv into Ctau_E but this puts more burden on user to
+// know how to change constants with a change of fluid or units.  Same for
+// Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
+}
+
 // *****************************************************************************
 // Helper function for computing Tau elements (stabilization constant)
 //   Model from:
@@ -108,14 +196,20 @@ CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
 // *****************************************************************************
 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
-                                       const CeedScalar sound_speed, const CeedScalar c_tau) {
-  for (int i=0; i<3; i++) {
+                                       /* const CeedScalar sound_speed, const CeedScalar c_tau) { */
+                                       const CeedScalar sound_speed, const CeedScalar c_tau,
+                                       const CeedScalar viscosity) {
+  const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
+                                (2*viscosity);
+  for (CeedInt i=0; i<3; i++) {
     // length of element in direction i
     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
                             dXdx[2][i]*dXdx[2][i]);
+    CeedScalar Pe = mag_u_visc*h;
+    CeedScalar Xi = 1/tanh(Pe) - 1/Pe;
     // fastest wave in direction i
     CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
-    Tau_x[i] = c_tau * h / fastest_wave;
+    Tau_x[i] = c_tau * h * Xi / fastest_wave;
   }
 }
 
@@ -130,37 +224,34 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
   // Outputs
   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
 
+  // Context
+  const SetupContext context = (SetupContext)ctx;
+  const CeedScalar theta0    = context->theta0;
+  const CeedScalar P0        = context->P0;
+  const CeedScalar cv        = context->cv;
+  const CeedScalar cp        = context->cp;
+  const CeedScalar *g        = context->g;
+  const CeedScalar Rd        = cp - cv;
+
   // Quadrature Point Loop
   CeedPragmaSIMD
   for (CeedInt i=0; i<Q; i++) {
     CeedScalar q[5] = {0.};
 
-    // Context
-    const SetupContext context = (SetupContext)ctx;
-    const CeedScalar theta0    = context->theta0;
-    const CeedScalar P0        = context->P0;
-    const CeedScalar N         = context->N;
-    const CeedScalar cv        = context->cv;
-    const CeedScalar cp        = context->cp;
-    const CeedScalar g         = context->g;
-    const CeedScalar Rd        = cp - cv;
-
     // Setup
     // -- Coordinates
-    const CeedScalar z = X[2][i];
-
-    // -- Exner pressure, hydrostatic balance
-    const CeedScalar Pi = 1. + g*g*(exp(-N*N*z/g) - 1.) / (cp*theta0*N*N);
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
 
     // -- Density
-    const CeedScalar rho = P0 * pow(Pi, cv/Rd) / (Rd*theta0);
+    const CeedScalar rho = P0 / (Rd*theta0);
 
     // Initial Conditions
     q[0] = rho;
     q[1] = 0.0;
     q[2] = 0.0;
     q[3] = 0.0;
-    q[4] = rho * (cv*theta0*Pi + g*z);
+    q[4] = rho * (cv*theta0 + e_potential);
 
     for (CeedInt j=0; j<5; j++)
       q0[j][i] = q[j];
@@ -190,8 +281,7 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
 //
 // Thermal Stress:
 //   Fe = u Fu + k grad( T )
-//
-// Equation of State:
+// Equation of State
 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
 //
 // Stabilization:
@@ -238,9 +328,10 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
   const CeedScalar k      = context->k;
   const CeedScalar cv     = context->cv;
   const CeedScalar cp     = context->cp;
-  const CeedScalar g      = context->g;
-  const CeedScalar c_tau  = context->c_tau;
+  const CeedScalar *g     = context->g;
+  const CeedScalar dt     = context->dt;
   const CeedScalar gamma  = cp / cv;
+  const CeedScalar Rd     = cp - cv;
 
   CeedPragmaSIMD
   // Quadrature Point Loop
@@ -288,6 +379,7 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
                                     q_data[8][i],
                                     q_data[9][i]}
                                   };
+    const CeedScalar x_i[3]       = {x[0][i], x[1][i], x[2][i]};
     // *INDENT-ON*
     // -- Grad-to-Grad q_data
     // dU/dx
@@ -296,29 +388,29 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0};
     CeedScalar dUdx[3][3] = {{0}};
     CeedScalar dXdxdXdxT[3][3] = {{0}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
       }
     }
     CeedScalar dudx[3][3] = {{0}};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
-        for (int l=0; l<3; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt l=0; l<3; l++)
           dudx[j][k] += du[j][l] * dXdx[l][k];
     // -- grad_T
     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]))/cv,
+                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]))/cv,
+                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) - g)/cv
+                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
                                   };
 
     // -- Fuvisc
@@ -344,97 +436,100 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
     // Pressure
     const CeedScalar
     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
-    E_potential = rho*g*x[2][i],
+    E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
     E_internal  = E - E_kinetic - E_potential,
     P           = E_internal * (gamma - 1.); // P = pressure
 
     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
-    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x[2][i]);
-
-    // jacob_F_conv_T = jacob_F_conv^T
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
+    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
 
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Body force
-    const CeedScalar body_force[5] = {0, 0, 0, -rho*g, 0};
+    const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
 
     // The Physics
     // Zero dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<5; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0;
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
     // ---- Fuvisc
     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i] -= wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
     // ---- Fevisc
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i] -= wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
                               Fe[2]*dXdx[j][2]);
     // Body Force
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] = wdetJ * body_force[j];
 
-    // Stabilization
-    // -- Tau elements
-    const CeedScalar sound_speed = sqrt(gamma * P / rho);
-    CeedScalar Tau_x[3] = {0.};
-    Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
+    // Spatial Stabilization
+    // -- Not used in favor of diagonal tau. Kept for future testing
+    // const CeedScalar sound_speed = sqrt(gamma * P / rho);
+    // CeedScalar Tau_x[3] = {0.};
+    // Tau_spatial(Tau_x, dXdx, u, sound_speed, context->c_tau, mu);
 
-    // -- Stabilization method: none or SU
-    CeedScalar stab[5][3];
+    // -- Stabilization method: none, SU, or SUPG
+    CeedScalar stab[5][3] = {{0.}};
+    CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
+    CeedScalar Tau_d[3] = {0.};
     switch (context->stabilization) {
     case STAB_NONE:        // Galerkin
       break;
     case STAB_SU:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
-
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
+      tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
+      tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
+      tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
+      tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
+      tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
+      PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
+                                  tau_strong_conv_conservative);
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
+            stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
+
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -479,9 +574,10 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
   const CeedScalar k      = context->k;
   const CeedScalar cv     = context->cv;
   const CeedScalar cp     = context->cp;
-  const CeedScalar g      = context->g;
-  const CeedScalar c_tau  = context->c_tau;
+  const CeedScalar *g     = context->g;
+  const CeedScalar dt     = context->dt;
   const CeedScalar gamma  = cp / cv;
+  const CeedScalar Rd     = cp-cv;
 
   CeedPragmaSIMD
   // Quadrature Point Loop
@@ -530,6 +626,7 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
                                     q_data[8][i],
                                     q_data[9][i]}
                                   };
+    const CeedScalar x_i[3]     = {x[0][i], x[1][i], x[2][i]};
     // *INDENT-ON*
     // -- Grad-to-Grad q_data
     // dU/dx
@@ -538,29 +635,29 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0};
     CeedScalar dUdx[3][3] = {{0}};
     CeedScalar dXdxdXdxT[3][3] = {{0}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
       }
     }
     CeedScalar dudx[3][3] = {{0}};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
-        for (int l=0; l<3; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt l=0; l<3; l++)
           dudx[j][k] += du[j][l] * dXdx[l][k];
     // -- grad_T
     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]))/cv,
+                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]))/cv,
+                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) - g)/cv
+                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
                                   };
     // -- Fuvisc
     // ---- Symmetric 3x3 matrix
@@ -585,116 +682,136 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
     // Pressure
     const CeedScalar
     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
-    E_potential = rho*g*x[2][i],
+    E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
     E_internal  = E - E_kinetic - E_potential,
     P           = E_internal * (gamma - 1.); // P = pressure
 
     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
-    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x[2][i]);
-
-    // jacob_F_conv_T = jacob_F_conv^T
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
+    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
+
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
     // strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Body force
-    const CeedScalar body_force[5] = {0, 0, 0, -rho*g, 0};
+    const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
 
     // Strong residual
     CeedScalar strong_res[5];
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
 
     // The Physics
     //-----mass matrix
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] = wdetJ*q_dot[j][i];
 
     // Zero dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<5; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0;
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
     // ---- Fuvisc
     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i] += wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
     // ---- Fevisc
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i] += wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
                               Fe[2]*dXdx[j][2]);
     // Body Force
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] -= wdetJ*body_force[j];
 
-    // Stabilization
-    // -- Tau elements
-    const CeedScalar sound_speed = sqrt(gamma * P / rho);
-    CeedScalar Tau_x[3] = {0.};
-    Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
+    // Spatial Stabilization
+    // -- Not used in favor of diagonal tau. Kept for future testing
+    // const CeedScalar sound_speed = sqrt(gamma * P / rho);
+    // CeedScalar Tau_x[3] = {0.};
+    // Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau, mu);
 
     // -- Stabilization method: none, SU, or SUPG
-    CeedScalar stab[5][3];
+    CeedScalar stab[5][3] = {{0.}};
+    CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
+    CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
+    CeedScalar Tau_d[3] = {0.};
     switch (context->stabilization) {
     case STAB_NONE:        // Galerkin
       break;
     case STAB_SU:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
-
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
+      tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
+      tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
+      tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
+      tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
+      tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
+      PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
+                                  tau_strong_conv_conservative);
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
+            stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
+
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
       break;
     case STAB_SUPG:        // SUPG
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_res[l];
-
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
+      tau_strong_res[0] = Tau_d[0] * strong_res[0];
+      tau_strong_res[1] = Tau_d[1] * strong_res[1];
+      tau_strong_res[2] = Tau_d[1] * strong_res[2];
+      tau_strong_res[3] = Tau_d[1] * strong_res[3];
+      tau_strong_res[4] = Tau_d[2] * strong_res[4];
+// Alternate route (useful later with primitive variable code)
+// this function was verified against PHASTA for as IC that was as close as possible
+//    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
+// it has also been verified to compute a correct through the following
+//   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
+// applied in the triple loop below
+//  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
+      PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
+                                  tau_strong_res_conservative);
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
+            stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
+
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h
new file mode 100644
index 0000000000..31c198f38d
--- /dev/null
+++ b/examples/fluids/qfunctions/newtonian_types.h
@@ -0,0 +1,48 @@
+#ifndef newtonian_types_h
+#define newtonian_types_h
+
+#include <ceed/ceed.h>
+#include "stabilization_types.h"
+
+typedef struct SetupContext_ *SetupContext;
+struct SetupContext_ {
+  CeedScalar theta0;
+  CeedScalar thetaC;
+  CeedScalar P0;
+  CeedScalar N;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar g[3];
+  CeedScalar rc;
+  CeedScalar lx;
+  CeedScalar ly;
+  CeedScalar lz;
+  CeedScalar center[3];
+  CeedScalar dc_axis[3];
+  CeedScalar time;
+  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
+  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
+  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
+};
+
+typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
+struct NewtonianIdealGasContext_ {
+  CeedScalar lambda;
+  CeedScalar mu;
+  CeedScalar k;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar g[3];
+  CeedScalar c_tau;
+  CeedScalar Ctau_t;
+  CeedScalar Ctau_v;
+  CeedScalar Ctau_C;
+  CeedScalar Ctau_M;
+  CeedScalar Ctau_E;
+  CeedScalar dt;
+  StabilizationType stabilization;
+};
+
+CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
+
+#endif // newtonian_types_h
diff --git a/examples/fluids/qfunctions/setupgeo.h b/examples/fluids/qfunctions/setupgeo.h
index 0c90de0112..01406fd941 100644
--- a/examples/fluids/qfunctions/setupgeo.h
+++ b/examples/fluids/qfunctions/setupgeo.h
@@ -113,12 +113,19 @@ CEED_QFUNCTION(Setup)(void *ctx, CeedInt Q,
 // Physical (current) 3D coordinates: x
 // Change of coordinate matrix:
 //   dxdX_{i,j} = dx_i/dX_j (indicial notation) [3 * 2]
+// Inverse change of coordinate matrix:
+//   dXdx_{i,j} = dX_i/dx_j (indicial notation) [2 * 3]
 //
 // (J1,J2,J3) is given by the cross product of the columns of dxdX_{i,j}
 //
 // detJb is the magnitude of (J1,J2,J3)
 //
-// All quadrature data is stored in 4 field vector of quadrature data.
+// dXdx is calculated via Moore–Penrose inverse:
+//
+//   dX_i/dx_j = (dxdX^T dxdX)^(-1) dxdX
+//             = (dx_l/dX_i * dx_l/dX_k)^(-1) dx_j/dX_k
+//
+// All quadrature data is stored in 10 field vector of quadrature data.
 //
 // We require the determinant of the Jacobian to properly compute integrals of
 //   the form: int( u v )
@@ -128,12 +135,18 @@ CEED_QFUNCTION(Setup)(void *ctx, CeedInt Q,
 //
 // Normal vector = (J1,J2,J3) / detJb
 //
+//   - TODO Could possibly remove normal vector, as it could be calculated in the Qfunction from dXdx
 // Stored: (J1,J2,J3) / detJb
 //   in q_data_sur[1:3] as
 //   (detJb^-1) * [ J1 ]
 //                [ J2 ]
 //                [ J3 ]
 //
+// Stored: dXdx_{i,j}
+//   in q_data_sur[4:9] as
+//    [dXdx_11 dXdx_12 dXdx_13]
+//    [dXdx_21 dXdx_22 dXdx_23]
+//
 // *****************************************************************************
 CEED_QFUNCTION(SetupBoundary)(void *ctx, CeedInt Q,
                               const CeedScalar *const *in, CeedScalar *const *out) {
@@ -170,6 +183,37 @@ CEED_QFUNCTION(SetupBoundary)(void *ctx, CeedInt Q,
     q_data_sur[2][i] = J2 / detJb;
     q_data_sur[3][i] = J3 / detJb;
 
+    // dxdX_k,j * dxdX_j,k
+    CeedScalar dxdXTdxdX[2][2] = {{ 0. }};
+    for (CeedInt j=0; j<2; j++)
+      for (CeedInt k=0; k<2; k++)
+        for (CeedInt l=0; l<3; l++)
+          dxdXTdxdX[j][k] += dxdX[l][j]*dxdX[l][k];
+
+    const CeedScalar detdxdXTdxdX =  dxdXTdxdX[0][0] * dxdXTdxdX[1][1]
+                                     -dxdXTdxdX[1][0] * dxdXTdxdX[0][1];
+
+    // Compute inverse of dxdXTdxdX
+    CeedScalar dxdXTdxdX_inv[2][2];
+    dxdXTdxdX_inv[0][0] =  dxdXTdxdX[1][1] / detdxdXTdxdX;
+    dxdXTdxdX_inv[0][1] = -dxdXTdxdX[0][1] / detdxdXTdxdX;
+    dxdXTdxdX_inv[1][0] = -dxdXTdxdX[1][0] / detdxdXTdxdX;
+    dxdXTdxdX_inv[1][1] =  dxdXTdxdX[0][0] / detdxdXTdxdX;
+
+    // Compute dXdx from dxdXTdxdX^-1 and dxdX
+    CeedScalar dXdx[2][3] = {{ 0. }};
+    for (CeedInt j=0; j<2; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt l=0; l<2; l++)
+          dXdx[j][k] += dxdXTdxdX_inv[l][j] * dxdX[k][l];
+
+    q_data_sur[4][i] = dXdx[0][0];
+    q_data_sur[5][i] = dXdx[0][1];
+    q_data_sur[6][i] = dXdx[0][2];
+    q_data_sur[7][i] = dXdx[1][0];
+    q_data_sur[8][i] = dXdx[1][1];
+    q_data_sur[9][i] = dXdx[1][2];
+
   } // End of Quadrature Point Loop
 
   // Return
diff --git a/examples/fluids/qfunctions/setupgeo2d.h b/examples/fluids/qfunctions/setupgeo2d.h
index b1272f1a8b..2a3c715f3b 100644
--- a/examples/fluids/qfunctions/setupgeo2d.h
+++ b/examples/fluids/qfunctions/setupgeo2d.h
@@ -12,6 +12,7 @@
 #define setup_geo_2d_h
 
 #include <math.h>
+#include <ceed.h>
 
 // *****************************************************************************
 // This QFunction sets up the geometric factors required for integration and
diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h
new file mode 100644
index 0000000000..3da66cbba1
--- /dev/null
+++ b/examples/fluids/qfunctions/shocktube.h
@@ -0,0 +1,468 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Shock tube initial condition and Euler equation operator for Navier-Stokes
+/// example using PETSc - modified from eulervortex.h
+
+// Model from:
+//   On the Order of Accuracy and Numerical Performance of Two Classes of
+//   Finite Volume WENO Schemes, Zhang, Zhang, and Shu (2011).
+
+#ifndef shocktube_h
+#define shocktube_h
+
+#include <math.h>
+#include <ceed.h>
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+typedef struct SetupContext_ *SetupContext;
+struct SetupContext_ {
+  CeedScalar theta0;
+  CeedScalar thetaC;
+  CeedScalar P0;
+  CeedScalar N;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar time;
+  CeedScalar mid_point;
+  CeedScalar P_high;
+  CeedScalar rho_high;
+  CeedScalar P_low;
+  CeedScalar rho_low;
+  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
+  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
+  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
+};
+
+typedef struct ShockTubeContext_ *ShockTubeContext;
+struct ShockTubeContext_ {
+  CeedScalar Cyzb;
+  CeedScalar Byzb;
+  CeedScalar c_tau;
+  bool implicit;
+  bool yzb;
+  int stabilization;
+};
+
+// *****************************************************************************
+// This function sets the initial conditions
+//
+//   Temperature:
+//     T   = P / (rho * R)
+//   Density:
+//     rho = 1.0        if x <= mid_point
+//         = 0.125      if x >  mid_point
+//   Pressure:
+//     P   = 1.0        if x <= mid_point
+//         = 0.1        if x >  mid_point
+//   Velocity:
+//     u   = 0
+//   Velocity/Momentum Density:
+//     Ui  = rho ui
+//   Total Energy:
+//     E   = P / (gamma - 1) + rho (u u)/2
+//
+// Constants:
+//   cv              ,  Specific heat, constant volume
+//   cp              ,  Specific heat, constant pressure
+//   mid_point       ,  Location of initial domain mid_point
+//   gamma  = cp / cv,  Specific heat ratio
+//
+// *****************************************************************************
+
+// *****************************************************************************
+// This helper function provides support for the exact, time-dependent solution
+//   (currently not implemented) and IC formulation for Euler traveling vortex
+// *****************************************************************************
+CEED_QFUNCTION_HELPER CeedInt Exact_ShockTube(CeedInt dim, CeedScalar time,
+    const CeedScalar X[], CeedInt Nf, CeedScalar q[],
+    void *ctx) {
+
+  // Context
+  const SetupContext context = (SetupContext)ctx;
+  const CeedScalar mid_point = context->mid_point;      // Midpoint of the domain
+  const CeedScalar P_high = context->P_high;            // Driver section pressure
+  const CeedScalar rho_high = context->rho_high;        // Driver section density
+  const CeedScalar P_low = context->P_low;              // Driven section pressure
+  const CeedScalar rho_low = context->rho_low;          // Driven section density
+
+  // Setup
+  const CeedScalar gamma = 1.4;    // ratio of specific heats
+  const CeedScalar x     = X[0];   // Coordinates
+
+  CeedScalar rho, P, u[3] = {0.};
+
+  // Initial Conditions
+  if (x <= mid_point) {
+    rho = rho_high;
+    P   = P_high;
+  } else {
+    rho = rho_low;
+    P   = P_low;
+  }
+
+  // Assign exact solution
+  q[0] = rho;
+  q[1] = rho * u[0];
+  q[2] = rho * u[1];
+  q[3] = rho * u[2];
+  q[4] = P / (gamma-1.0) + rho * (u[0]*u[0]) / 2.;
+
+  // Return
+  return 0;
+}
+
+// *****************************************************************************
+// Helper function for computing flux Jacobian
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void ConvectiveFluxJacobian_Euler(CeedScalar dF[3][5][5],
+    const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
+    const CeedScalar gamma) {
+  CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
+  for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
+    for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
+      dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2.)) : 0.) - u[i]*u[j];
+      for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
+        dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
+        dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
+                          ((i==k) ? u[j] : 0.) -
+                          ((i==j) ? u[k] : 0.) * (gamma-1.);
+        dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
+                          (gamma-1.)*u[i]*u[k];
+      }
+      dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
+    }
+    dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
+    dF[i][4][4] = u[i] * gamma;
+  }
+}
+
+// *****************************************************************************
+// Helper function for calculating the covariant length scale in the direction
+// of some 3 element input vector
+//
+// Where
+//  vec         = vector that length is measured in the direction of
+//  h           = covariant element length along vec
+// *****************************************************************************
+CEED_QFUNCTION_HELPER CeedScalar Covariant_length_along_vector(
+  CeedScalar vec[3], const CeedScalar dXdx[3][3]) {
+
+  CeedScalar vec_norm = sqrt(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]);
+  CeedScalar vec_dot_jacobian[3] = {0.0};
+  for (CeedInt i=0; i<3; i++) {
+    for (CeedInt j=0; j<3; j++) {
+      vec_dot_jacobian[i] += dXdx[j][i]*vec[i];
+    }
+  }
+  CeedScalar norm_vec_dot_jacobian = sqrt(vec_dot_jacobian[0]*vec_dot_jacobian[0]+
+                                          vec_dot_jacobian[1]*vec_dot_jacobian[1]+
+                                          vec_dot_jacobian[2]*vec_dot_jacobian[2]);
+  CeedScalar h = 2.0 * vec_norm / norm_vec_dot_jacobian;
+  return h;
+}
+
+
+// *****************************************************************************
+// Helper function for computing Tau elements (stabilization constant)
+//   Model from:
+//     Stabilized Methods for Compressible Flows, Hughes et al 2010
+//
+//   Spatial criterion #2 - Tau is a 3x3 diagonal matrix
+//   Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum)
+//
+// Where
+//   c_tau     = stabilization constant (0.5 is reported as "optimal")
+//   h[i]      = 2 length(dxdX[i])
+//   Pe        = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity )
+//   Xi(Pe)    = coth Pe - 1. / Pe (1. at large local Peclet number )
+//   rho(A[i]) = spectral radius of the convective flux Jacobian i,
+//               wave speed in direction i
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
+                                       const CeedScalar dXdx[3][3], const CeedScalar u[3],
+                                       const CeedScalar sound_speed, const CeedScalar c_tau) {
+  for (CeedInt i=0; i<3; i++) {
+    // length of element in direction i
+    CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
+                            dXdx[2][i]*dXdx[2][i]);
+    // fastest wave in direction i
+    CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
+    Tau_x[i] = c_tau * h / fastest_wave;
+  }
+}
+
+// *****************************************************************************
+// This QFunction sets the initial conditions for shock tube
+// *****************************************************************************
+CEED_QFUNCTION(ICsShockTube)(void *ctx, CeedInt Q,
+                             const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
+
+  // Outputs
+  CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar q[5];
+
+    Exact_ShockTube(3, 0., x, 5, q, ctx);
+
+    for (CeedInt j=0; j<5; j++)
+      q0[j][i] = q[j];
+  } // End of Quadrature Point Loop
+
+  // Return
+  return 0;
+}
+
+// *****************************************************************************
+// This QFunction implements the following formulation of Euler equations
+//   with explicit time stepping method
+//
+// This is 3D Euler for compressible gas dynamics in conservation
+//   form with state variables of density, momentum density, and total
+//   energy density.
+//
+// State Variables: q = ( rho, U1, U2, U3, E )
+//   rho - Mass Density
+//   Ui  - Momentum Density,      Ui = rho ui
+//   E   - Total Energy Density,  E  = P / (gamma - 1) + rho (u u)/2
+//
+// Euler Equations:
+//   drho/dt + div( U )                   = 0
+//   dU/dt   + div( rho (u x u) + P I3 )  = 0
+//   dE/dt   + div( (E + P) u )           = 0
+//
+// Equation of State:
+//   P = (gamma - 1) (E - rho (u u) / 2)
+//
+// Constants:
+//   cv              ,  Specific heat, constant volume
+//   cp              ,  Specific heat, constant pressure
+//   g               ,  Gravity
+//   gamma  = cp / cv,  Specific heat ratio
+// *****************************************************************************
+CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
+                               const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
+                   (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
+             (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
+
+  const CeedScalar gamma = 1.4;
+
+  ShockTubeContext context = (ShockTubeContext)ctx;
+  const CeedScalar Cyzb  = context->Cyzb;
+  const CeedScalar Byzb  = context->Byzb;
+  const CeedScalar c_tau = context->c_tau;
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // *INDENT-OFF*
+    // Setup
+    // -- Interp in
+    const CeedScalar rho        =   q[0][i];
+    const CeedScalar u[3]       =  {q[1][i] / rho,
+                                    q[2][i] / rho,
+                                    q[3][i] / rho
+                                   };
+    const CeedScalar E          =   q[4][i];
+    const CeedScalar drho[3]    =  {dq[0][0][i],
+                                    dq[1][0][i],
+                                    dq[2][0][i]
+                                   };
+    const CeedScalar dU[3][3]   = {{dq[0][1][i],
+                                    dq[1][1][i],
+                                    dq[2][1][i]},
+                                   {dq[0][2][i],
+                                    dq[1][2][i],
+                                    dq[2][2][i]},
+                                   {dq[0][3][i],
+                                    dq[1][3][i],
+                                    dq[2][3][i]}
+                                  };
+    const CeedScalar dE[3]      =  {dq[0][4][i],
+                                    dq[1][4][i],
+                                    dq[2][4][i]
+                                   };
+    // -- Interp-to-Interp q_data
+    const CeedScalar wdetJ      =   q_data[0][i];
+    // -- Interp-to-Grad q_data
+    // ---- Inverse of change of coordinate matrix: X_i,j
+    // *INDENT-OFF*
+    const CeedScalar dXdx[3][3] = {{q_data[1][i],
+                                    q_data[2][i],
+                                    q_data[3][i]},
+                                   {q_data[4][i],
+                                    q_data[5][i],
+                                    q_data[6][i]},
+                                   {q_data[7][i],
+                                    q_data[8][i],
+                                    q_data[9][i]}
+                                  };
+    // dU/dx
+    CeedScalar du[3][3] = {{0}};
+    CeedScalar drhodx[3] = {0};
+    CeedScalar dEdx[3] = {0};
+    CeedScalar dUdx[3][3] = {{0}};
+    CeedScalar dXdxdXdxT[3][3] = {{0}};
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
+        du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
+        drhodx[j] += drho[k] * dXdx[k][j];
+        dEdx[j] += dE[k] * dXdx[k][j];
+        for (CeedInt l=0; l<3; l++) {
+          dUdx[j][k] += dU[j][l] * dXdx[l][k];
+          dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
+        }
+      }
+    }
+
+    // *INDENT-ON*
+    const CeedScalar
+    E_kinetic  = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
+    E_internal = E - E_kinetic,
+    P          = E_internal * (gamma - 1); // P = pressure
+
+    // The Physics
+    // Zero v and dv so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) {
+      v[j][i] = 0;
+      for (CeedInt k=0; k<3; k++)
+        dv[k][j][i] = 0;
+    }
+
+    // -- Density
+    // ---- u rho
+    for (CeedInt j=0; j<3; j++)
+      dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
+                             rho*u[2]*dXdx[j][2]);
+    // -- Momentum
+    // ---- rho (u x u) + P I3
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
+        dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
+                                 (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
+                                 (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
+    // -- Total Energy Density
+    // ---- (E + P) u
+    for (CeedInt j=0; j<3; j++)
+      dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
+                                         u[2]*dXdx[j][2]);
+
+    // -- YZB stabilization
+    if (context->yzb) {
+      CeedScalar drho_norm = 0.0;         // magnitude of the density gradient
+      CeedScalar j_vec[3] = {0.0};        // unit vector aligned with the density gradient
+      CeedScalar h_shock = 0.0;           // element lengthscale
+      CeedScalar acoustic_vel = 0.0;      // characteristic velocity, acoustic speed
+      CeedScalar tau_shock = 0.0;         // timescale
+      CeedScalar nu_shock = 0.0;          // artificial diffusion
+
+      // Unit vector aligned with the density gradient
+      drho_norm = sqrt(drhodx[0]*drhodx[0] + drhodx[1]*drhodx[1] +
+                       drhodx[2]*drhodx[2]);
+      for (CeedInt j=0; j<3; j++)
+        j_vec[j] = drhodx[j] / (drho_norm + 1e-20);
+
+      if (drho_norm == 0.0) {
+        nu_shock = 0.0;
+      } else {
+        h_shock = Covariant_length_along_vector(j_vec, dXdx);
+        h_shock /= Cyzb;
+        acoustic_vel = sqrt(gamma*P/rho);
+        tau_shock = h_shock / (2*acoustic_vel) * pow(drho_norm * h_shock / rho, Byzb);
+        nu_shock = fabs(tau_shock * acoustic_vel * acoustic_vel);
+      }
+
+      for (CeedInt j=0; j<3; j++)
+        dv[j][0][i] -= wdetJ * nu_shock * drhodx[j];
+
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt j=0; j<3; j++)
+          dv[j][k][i] -= wdetJ * nu_shock * du[k][j];
+
+      for (CeedInt j=0; j<3; j++)
+        dv[j][4][i] -= wdetJ * nu_shock * dEdx[j];
+    }
+
+    // Stabilization
+    // Need the Jacobian for the advective fluxes for stabilization
+    //    indexed as: jacob_F_conv[direction][flux component][solution component]
+    CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
+    ConvectiveFluxJacobian_Euler(jacob_F_conv, rho, u, E, gamma);
+
+
+    // dqdx collects drhodx, dUdx and dEdx in one vector
+    CeedScalar dqdx[5][3];
+    for (CeedInt j=0; j<3; j++) {
+      dqdx[0][j] = drhodx[j];
+      dqdx[4][j] = dEdx[j];
+      for (CeedInt k=0; k<3; k++)
+        dqdx[k+1][j] = dUdx[k][j];
+    }
+
+    // strong_conv = dF/dq * dq/dx    (Strong convection)
+    CeedScalar strong_conv[5] = {0};
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
+          strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
+
+    // Stabilization
+    // -- Tau elements
+    const CeedScalar sound_speed = sqrt(gamma * P / rho);
+    CeedScalar Tau_x[3] = {0.};
+    Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
+
+    CeedScalar stab[5][3] = {0};
+    switch (context->stabilization) {
+    case 0:        // Galerkin
+      break;
+    case 1:        // SU
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++) {
+            stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
+          }
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
+          dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
+                                stab[j][1] * dXdx[k][1] +
+                                stab[j][2] * dXdx[k][2]);
+      break;
+    }
+
+  } // End Quadrature Point Loop
+
+  // Return
+  return 0;
+}
+
+#endif // shocktube_h
diff --git a/examples/fluids/qfunctions/stabilization_types.h b/examples/fluids/qfunctions/stabilization_types.h
new file mode 100644
index 0000000000..7e484df200
--- /dev/null
+++ b/examples/fluids/qfunctions/stabilization_types.h
@@ -0,0 +1,10 @@
+#ifndef stabilization_types_h
+#define stabilization_types_h
+
+typedef enum {
+  STAB_NONE = 0,
+  STAB_SU   = 1, // Streamline Upwind
+  STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
+} StabilizationType;
+
+#endif // stabilization_types_h
diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h
new file mode 100644
index 0000000000..e8909d1f2e
--- /dev/null
+++ b/examples/fluids/qfunctions/stg_shur14.h
@@ -0,0 +1,281 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Implementation of the Synthetic Turbulence Generation (STG) algorithm
+/// presented in Shur et al. 2014
+//
+/// SetupSTG_Rand reads in the input files and fills in STGShur14Context. Then
+/// STGShur14_CalcQF is run over quadrature points. Before the program exits,
+/// TearDownSTG is run to free the memory of the allocated arrays.
+
+#ifndef stg_shur14_h
+#define stg_shur14_h
+
+#include <math.h>
+#include <ceed.h>
+#include <stdlib.h>
+#include "stg_shur14_type.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#define STG_NMODES_MAX 1024
+
+CEED_QFUNCTION_HELPER CeedScalar Max(CeedScalar a, CeedScalar b) { return a < b ? b : a; }
+CEED_QFUNCTION_HELPER CeedScalar Min(CeedScalar a, CeedScalar b) { return a < b ? a : b; }
+
+/*
+ * @brief Interpolate quantities from input profile to given location
+ *
+ * Assumed that prof_dw[i+1] > prof_dw[i] and prof_dw[0] = 0
+ * If dw > prof_dw[-1], then the interpolation takes the values at prof_dw[-1]
+ *
+ * @param[in]  dw      Distance to the nearest wall
+ * @param[out] ubar    Mean velocity at dw
+ * @param[out] cij     Cholesky decomposition at dw
+ * @param[out] eps     Turbulent dissipation at dw
+ * @param[out] lt      Turbulent length scale at dw
+ * @param[in]  stg_ctx STGShur14Context for the problem
+ */
+CEED_QFUNCTION_HELPER void InterpolateProfile(const CeedScalar dw,
+    CeedScalar ubar[3], CeedScalar cij[6], CeedScalar *eps, CeedScalar *lt,
+    const STGShur14Context stg_ctx) {
+
+  const CeedInt    nprofs    = stg_ctx->nprofs;
+  const CeedScalar *prof_dw  = &stg_ctx->data[stg_ctx->offsets.prof_dw];
+  const CeedScalar *prof_eps = &stg_ctx->data[stg_ctx->offsets.eps];
+  const CeedScalar *prof_lt  = &stg_ctx->data[stg_ctx->offsets.lt];
+  const CeedScalar *prof_ubar = &stg_ctx->data[stg_ctx->offsets.ubar];
+  const CeedScalar *prof_cij  = &stg_ctx->data[stg_ctx->offsets.cij];
+  CeedInt idx=-1;
+
+  for(CeedInt i=0; i<nprofs; i++) {
+    if (dw < prof_dw[i]) {
+      idx = i;
+      break;
+    }
+  }
+
+  if (idx > 0) { // y within the bounds of prof_dw
+    CeedScalar coeff = (dw - prof_dw[idx-1]) / (prof_dw[idx] - prof_dw[idx-1]);
+
+    //*INDENT-OFF*
+    ubar[0] = prof_ubar[0*nprofs+idx-1] + coeff*( prof_ubar[0*nprofs+idx] - prof_ubar[0*nprofs+idx-1] );
+    ubar[1] = prof_ubar[1*nprofs+idx-1] + coeff*( prof_ubar[1*nprofs+idx] - prof_ubar[1*nprofs+idx-1] );
+    ubar[2] = prof_ubar[2*nprofs+idx-1] + coeff*( prof_ubar[2*nprofs+idx] - prof_ubar[2*nprofs+idx-1] );
+    cij[0]  = prof_cij[0*nprofs+idx-1]  + coeff*( prof_cij[0*nprofs+idx]  - prof_cij[0*nprofs+idx-1] );
+    cij[1]  = prof_cij[1*nprofs+idx-1]  + coeff*( prof_cij[1*nprofs+idx]  - prof_cij[1*nprofs+idx-1] );
+    cij[2]  = prof_cij[2*nprofs+idx-1]  + coeff*( prof_cij[2*nprofs+idx]  - prof_cij[2*nprofs+idx-1] );
+    cij[3]  = prof_cij[3*nprofs+idx-1]  + coeff*( prof_cij[3*nprofs+idx]  - prof_cij[3*nprofs+idx-1] );
+    cij[4]  = prof_cij[4*nprofs+idx-1]  + coeff*( prof_cij[4*nprofs+idx]  - prof_cij[4*nprofs+idx-1] );
+    cij[5]  = prof_cij[5*nprofs+idx-1]  + coeff*( prof_cij[5*nprofs+idx]  - prof_cij[5*nprofs+idx-1] );
+    *eps    = prof_eps[idx-1]     + coeff*( prof_eps[idx]     - prof_eps[idx-1] );
+    *lt     = prof_lt[idx-1]      + coeff*( prof_lt[idx]      - prof_lt[idx-1] );
+    //*INDENT-ON*
+  } else { // y outside bounds of prof_dw
+    ubar[0] = prof_ubar[1*nprofs-1];
+    ubar[1] = prof_ubar[2*nprofs-1];
+    ubar[2] = prof_ubar[3*nprofs-1];
+    cij[0]  = prof_cij[1*nprofs-1];
+    cij[1]  = prof_cij[2*nprofs-1];
+    cij[2]  = prof_cij[3*nprofs-1];
+    cij[3]  = prof_cij[4*nprofs-1];
+    cij[4]  = prof_cij[5*nprofs-1];
+    cij[5]  = prof_cij[6*nprofs-1];
+    *eps    = prof_eps[nprofs-1];
+    *lt     = prof_lt[nprofs-1];
+  }
+}
+
+/*
+ * @brief Calculate spectrum coefficients for STG
+ *
+ * Calculates q_n at a given distance to the wall
+ *
+ * @param[in]  dw      Distance to the nearest wall
+ * @param[in]  eps     Turbulent dissipation w/rt dw
+ * @param[in]  lt      Turbulent length scale w/rt dw
+ * @param[in]  h       Element lengths in coordinate directions
+ * @param[in]  nu      Dynamic Viscosity;
+ * @param[in]  stg_ctx STGShur14Context for the problem
+ * @param[out] qn      Spectrum coefficients, [nmodes]
+ */
+void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw,
+    const CeedScalar eps, const CeedScalar lt, const CeedScalar h[3],
+    const CeedScalar nu, CeedScalar qn[], const STGShur14Context stg_ctx) {
+
+  const CeedInt    nmodes = stg_ctx->nmodes;
+  const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa];
+
+  const CeedScalar hmax = Max( Max(h[0], h[1]), h[2]);
+  const CeedScalar ke   = 2*M_PI/Min(2*dw, 3*lt);
+  const CeedScalar keta = 2*M_PI*pow(pow(nu,3.0)/eps, -0.25);
+  const CeedScalar kcut =
+    M_PI/ Min( Max(Max(h[1], h[2]), 0.3*hmax) + 0.1*dw, hmax );
+  CeedScalar fcut, feta, Ektot=0.0;
+
+  for(CeedInt n=0; n<nmodes; n++) {
+    feta   = exp(-Square(12*kappa[n]/keta));
+    fcut   = exp( -pow(4*Max(kappa[n] - 0.9*kcut, 0)/kcut, 3.) );
+    qn[n]  = pow(kappa[n]/ke, 4.)
+             * pow(1 + 2.4*Square(kappa[n]/ke),-17./6)*feta*fcut;
+    qn[n] *= n==0 ? kappa[0] : kappa[n] - kappa[n-1];
+    Ektot += qn[n];
+  }
+
+  for(CeedInt n=0; n<nmodes; n++) qn[n] /= Ektot;
+}
+
+/******************************************************
+ * @brief Calculate u(x,t) for STG inflow condition
+ *
+ * @param[in]  X       Location to evaluate u(X,t)
+ * @param[in]  t       Time to evaluate u(X,t)
+ * @param[in]  ubar    Mean velocity at X
+ * @param[in]  cij     Cholesky decomposition at X
+ * @param[in]  qn      Wavemode amplitudes at X, [nmodes]
+ * @param[out] u       Velocity at X and t
+ * @param[in]  stg_ctx STGShur14Context for the problem
+ */
+void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3],
+    const CeedScalar t, const CeedScalar ubar[3], const CeedScalar cij[6],
+    const CeedScalar qn[], CeedScalar u[3],
+    const STGShur14Context stg_ctx) {
+
+  //*INDENT-OFF*
+  const CeedInt    nmodes = stg_ctx->nmodes;
+  const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa];
+  const CeedScalar *phi   = &stg_ctx->data[stg_ctx->offsets.phi];
+  const CeedScalar *sigma = &stg_ctx->data[stg_ctx->offsets.sigma];
+  const CeedScalar *d     = &stg_ctx->data[stg_ctx->offsets.d];
+  //*INDENT-ON*
+  const CeedScalar tworoot1p5 = 2*sqrt(1.5);
+  CeedScalar xdotd, vp[3] = {0.};
+  CeedScalar xhat[] = {0., X[1], X[2]};
+
+  CeedPragmaSIMD
+  for(CeedInt n=0; n<nmodes; n++) {
+    xhat[0] = (X[0] - stg_ctx->u0*t)*Max(2*kappa[0]/kappa[n], 0.1);
+    xdotd = 0.;
+    for(CeedInt i=0; i<3; i++) xdotd += d[i*nmodes+n]*xhat[i];
+    const CeedScalar cos_kxdp = cos(kappa[n]*xdotd + phi[n]);
+    vp[0] += tworoot1p5*sqrt(qn[n])*sigma[0*nmodes+n] * cos_kxdp;
+    vp[1] += tworoot1p5*sqrt(qn[n])*sigma[1*nmodes+n] * cos_kxdp;
+    vp[2] += tworoot1p5*sqrt(qn[n])*sigma[2*nmodes+n] * cos_kxdp;
+  }
+
+  u[0] = ubar[0] + cij[0]*vp[0];
+  u[1] = ubar[1] + cij[3]*vp[0] + cij[1]*vp[1];
+  u[2] = ubar[2] + cij[4]*vp[0] + cij[5]*vp[1] + cij[2]*vp[2];
+}
+
+/********************************************************************
+ * @brief QFunction to calculate the inflow boundary condition
+ *
+ * This will loop through quadrature points, calculate the wavemode amplitudes
+ * at each location, then calculate the actual velocity.
+ */
+CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q,
+                                 const CeedScalar *const *in,
+                                 CeedScalar *const *out) {
+
+  //*INDENT-OFF*
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA]) in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA]) in[2];
+
+   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0];
+
+  //*INDENT-ON*
+
+  const STGShur14Context stg_ctx = (STGShur14Context) ctx;
+  CeedScalar qn[STG_NMODES_MAX], u[3], ubar[3], cij[6], eps, lt;
+  const bool is_implicit  = stg_ctx->is_implicit;
+  const bool mean_only    = stg_ctx->mean_only;
+  const bool prescribe_T  = stg_ctx->prescribe_T;
+  const CeedScalar dx     = stg_ctx->dx;
+  const CeedScalar mu     = stg_ctx->newtonian_ctx.mu;
+  const CeedScalar time   = stg_ctx->time;
+  const CeedScalar theta0 = stg_ctx->theta0;
+  const CeedScalar P0     = stg_ctx->P0;
+  const CeedScalar cv     = stg_ctx->newtonian_ctx.cv;
+  const CeedScalar cp     = stg_ctx->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+  const CeedScalar gamma  = cp/cv;
+
+  CeedPragmaSIMD
+  for(CeedInt i=0; i<Q; i++) {
+    const CeedScalar rho = prescribe_T ? q[0][i] : P0 / (Rd * theta0);
+    const CeedScalar x[] = { X[0][i], X[1][i], X[2][i] };
+    const CeedScalar dXdx[2][3] = {
+      {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
+      {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
+    };
+
+    CeedScalar h[3];
+    for (CeedInt j=0; j<3; j++)
+      h[j] = 2/sqrt(dXdx[0][j]*dXdx[0][j] + dXdx[1][j]*dXdx[1][j]);
+    h[0] = dx;
+
+    InterpolateProfile(X[1][i], ubar, cij, &eps, &lt, stg_ctx);
+    if (!mean_only) {
+      CalcSpectrum(X[1][i], eps, lt, h, mu/rho, qn, stg_ctx);
+      STGShur14_Calc(x, time, ubar, cij, qn, u, stg_ctx);
+    } else {
+      for (CeedInt j=0; j<3; j++) u[j] = ubar[j];
+    }
+
+    const CeedScalar E_kinetic = .5 * rho * (u[0]*u[0] +
+                                 u[1]*u[1] +
+                                 u[2]*u[2]);
+    CeedScalar E_internal, P;
+    if (prescribe_T) {
+      // Temperature is being set weakly (theta0) and for constant cv this sets E_internal
+      E_internal = rho * cv * theta0;
+      // Find pressure using
+      P = rho * Rd * theta0; // interior rho with exterior T
+    } else {
+      E_internal = q[4][i] - E_kinetic; // uses prescribed rho and u, E from solution
+      P = E_internal * (gamma - 1.);
+    }
+
+    const CeedScalar wdetJb  = (is_implicit ? -1. : 1.) * q_data_sur[0][i];
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    const CeedScalar E = E_internal + E_kinetic;
+
+    // Velocity normal to the boundary
+    const CeedScalar u_normal = norm[0]*u[0] +
+                                norm[1]*u[1] +
+                                norm[2]*u[2];
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
+
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal;
+
+    // -- Momentum
+    for (CeedInt j=0; j<3; j++)
+      v[j+1][i] -= wdetJb *(rho * u_normal * u[j] +
+                            norm[j] * P);
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+  }
+  return 0;
+}
+
+
+#endif // stg_shur14_h
diff --git a/examples/fluids/qfunctions/stg_shur14_type.h b/examples/fluids/qfunctions/stg_shur14_type.h
new file mode 100644
index 0000000000..da63979ca1
--- /dev/null
+++ b/examples/fluids/qfunctions/stg_shur14_type.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#ifndef stg_shur14_type_h
+#define stg_shur14_type_h
+
+#include <ceed.h>
+#include "newtonian_types.h"
+
+/* Access data arrays via:
+ *  CeedScalar (*sigma)[ctx->nmodes] = (CeedScalar (*)[ctx->nmodes])&ctx->data[ctx->offsets.sigma]; */
+typedef struct STGShur14Context_ *STGShur14Context;
+struct STGShur14Context_ {
+  CeedInt    nmodes;      // !< Number of wavemodes
+  CeedInt    nprofs;      // !< Number of profile points in STGInflow.dat
+  CeedScalar alpha;       // !< Geometric growth rate of kappa
+  CeedScalar u0;          // !< Convective velocity
+  CeedScalar time;        // !< Solution time
+  CeedScalar P0;          // !< Inlet pressure
+  CeedScalar theta0;      // !< Inlet temperature
+  bool       is_implicit; // !< Whether using implicit time integration
+  bool       mean_only;   // !< Only apply the mean profile
+  CeedScalar dx;          // !< dx used for h calculation
+  bool       prescribe_T; // !< Prescribe temperature weakly
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+
+  struct {
+    size_t sigma, d, phi; // !< Random number set, [nmodes,3], [nmodes,3], [nmodes]
+    size_t kappa;     // !< Wavemode frequencies in increasing order, [nmodes]
+    size_t prof_dw;   // !< Distance to wall for Inflow Profie, [nprof]
+    size_t ubar;      // !< Mean velocity, [nprof, 3]
+    size_t cij;       // !< Cholesky decomposition [nprof, 6]
+    size_t eps;       // !< Turbulent Disspation [nprof, 6]
+    size_t lt;        // !< Tubulent Length Scale [nprof, 6]
+  } offsets;          // !< Holds offsets for each array in data
+  size_t total_bytes; // !< Total size of struct plus array
+  CeedScalar data[1]; // !< Holds concatenated scalar array data
+};
+
+#endif
diff --git a/examples/fluids/src/cloptions.c b/examples/fluids/src/cloptions.c
index 065c548361..fd1471912d 100644
--- a/examples/fluids/src/cloptions.c
+++ b/examples/fluids/src/cloptions.c
@@ -24,12 +24,21 @@ PetscErrorCode RegisterProblems_NS(AppCtx app_ctx) {
   ierr = PetscFunctionListAdd(&app_ctx->problems, "euler_vortex",
                               NS_EULER_VORTEX); CHKERRQ(ierr);
 
+  ierr = PetscFunctionListAdd(&app_ctx->problems, "shocktube",
+                              NS_SHOCKTUBE); CHKERRQ(ierr);
+
   ierr = PetscFunctionListAdd(&app_ctx->problems, "advection",
                               NS_ADVECTION); CHKERRQ(ierr);
 
   ierr = PetscFunctionListAdd(&app_ctx->problems, "advection2d",
                               NS_ADVECTION2D); CHKERRQ(ierr);
 
+  ierr = PetscFunctionListAdd(&app_ctx->problems, "blasius",
+                              NS_BLASIUS); CHKERRQ(ierr);
+
+  ierr = PetscFunctionListAdd(&app_ctx->problems, "channel",
+                              NS_CHANNEL); CHKERRQ(ierr);
+
   PetscFunctionReturn(0);
 }
 
@@ -42,8 +51,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL, "Navier-Stokes in PETSc with libCEED",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Navier-Stokes in PETSc with libCEED",
+                    NULL);
 
   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
                             NULL, app_ctx->ceed_resource, app_ctx->ceed_resource,
@@ -134,7 +143,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
         for (PetscInt w = 0; w < bc->num_wall; w++)
           if (bc->slips[c][s] == bc->walls[w])
             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,
-                    "Boundary condition already set on face %D!\n",
+                    "Boundary condition already set on face %" PetscInt_FMT "!\n",
                     bc->walls[w]);
 
   // Inflow BCs
@@ -148,7 +157,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
                               "Face IDs to apply outflow BC",
                               NULL, bc->outflows, &bc->num_outflow, NULL); CHKERRQ(ierr);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/src/misc.c b/examples/fluids/src/misc.c
index 94c189c89c..d7b63e1581 100644
--- a/examples/fluids/src/misc.c
+++ b/examples/fluids/src/misc.c
@@ -10,19 +10,18 @@
 
 #include "../navierstokes.h"
 
-PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, Vec Q_loc, Vec Q,
+PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user,
+                                   Vec Q_loc, Vec Q,
                                    CeedScalar time) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
   // ---------------------------------------------------------------------------
-  // Update SetupContext
+  // Update time for evaluation
   // ---------------------------------------------------------------------------
-  SetupContext setup_ctx;
-  CeedQFunctionContextGetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              (void **)&setup_ctx);
-  setup_ctx->time = time;
-  CeedQFunctionContextRestoreData(ceed_data->setup_context, (void **)&setup_ctx);
+  if (user->phys->ics_time_label)
+    CeedOperatorContextSetDouble(ceed_data->op_ics, user->phys->ics_time_label,
+                                 &time);
 
   // ---------------------------------------------------------------------------
   // ICs
@@ -151,7 +150,7 @@ PetscErrorCode RegressionTests_NS(AppCtx app_ctx, Vec Q) {
 }
 
 // Get error for problems with exact solutions
-PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
+PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, User user, Vec Q,
                            PetscScalar final_time) {
   PetscInt       loc_nodes;
   Vec            Q_exact, Q_exact_loc;
@@ -163,7 +162,8 @@ PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
   ierr = DMCreateGlobalVector(dm, &Q_exact); CHKERRQ(ierr);
   ierr = DMGetLocalVector(dm, &Q_exact_loc); CHKERRQ(ierr);
   ierr = VecGetSize(Q_exact_loc, &loc_nodes); CHKERRQ(ierr);
-  ierr = ICs_FixMultiplicity(dm, ceed_data, Q_exact_loc, Q_exact, final_time);
+  ierr = ICs_FixMultiplicity(dm, ceed_data, user, Q_exact_loc, Q_exact,
+                             final_time);
   CHKERRQ(ierr);
 
   // Get |exact solution - obtained solution|
@@ -187,22 +187,22 @@ PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
 
 // Post-processing
 PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
-                              ProblemData *problem, AppCtx app_ctx,
+                              ProblemData *problem, User user,
                               Vec Q, PetscScalar final_time) {
   PetscInt       steps;
   PetscErrorCode ierr;
   PetscFunctionBegin;
 
   // Print relative error
-  if (problem->non_zero_time && !app_ctx->test_mode) {
-    ierr = GetError_NS(ceed_data, dm, app_ctx, Q, final_time); CHKERRQ(ierr);
+  if (problem->non_zero_time && !user->app_ctx->test_mode) {
+    ierr = GetError_NS(ceed_data, dm, user, Q, final_time); CHKERRQ(ierr);
   }
 
   // Print final time and number of steps
   ierr = TSGetStepNumber(ts, &steps); CHKERRQ(ierr);
-  if (!app_ctx->test_mode) {
+  if (!user->app_ctx->test_mode) {
     ierr = PetscPrintf(PETSC_COMM_WORLD,
-                       "Time integrator took %D time steps to reach final time %g\n",
+                       "Time integrator took %" PetscInt_FMT " time steps to reach final time %g\n",
                        steps, (double)final_time); CHKERRQ(ierr);
   }
 
@@ -210,8 +210,8 @@ PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
   ierr = VecViewFromOptions(Q, NULL, "-vec_view"); CHKERRQ(ierr);
 
   // Compare reference solution values with current test run for CI
-  if (app_ctx->test_mode) {
-    ierr = RegressionTests_NS(app_ctx, Q); CHKERRQ(ierr);
+  if (user->app_ctx->test_mode) {
+    ierr = RegressionTests_NS(user->app_ctx, Q); CHKERRQ(ierr);
   }
 
   PetscFunctionReturn(0);
@@ -259,3 +259,10 @@ PetscErrorCode SetBCsFromICs_NS(DM dm, Vec Q, Vec Q_loc) {
 
   PetscFunctionReturn(0);
 }
+
+// Free a plain data context that was allocated using PETSc; returning libCEED error codes
+int FreeContextPetsc(void *data) {
+  if (PetscFree(data)) return CeedError(NULL, CEED_ERROR_ACCESS,
+                                          "PetscFree failed");
+  return CEED_ERROR_SUCCESS;
+}
diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c
index 54846182d6..cefea0b012 100644
--- a/examples/fluids/src/setupdm.c
+++ b/examples/fluids/src/setupdm.c
@@ -27,7 +27,7 @@ PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm) {
 
 // Setup DM
 PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
-                       SimpleBC bc, Physics phys, void *setup_ctx) {
+                       SimpleBC bc, Physics phys) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
   {
@@ -41,44 +41,34 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
     ierr = PetscObjectSetName((PetscObject)fe, "Q"); CHKERRQ(ierr);
     ierr = DMAddField(dm, NULL,(PetscObject)fe); CHKERRQ(ierr);
     ierr = DMCreateDS(dm); CHKERRQ(ierr);
-    {
-      /* create FE field for coordinates */
-      PetscFE fe_coords;
-      PetscInt num_comp_coord;
-      ierr = DMGetCoordinateDim(dm, &num_comp_coord); CHKERRQ(ierr);
-      ierr = PetscFECreateLagrange(PETSC_COMM_SELF, problem->dim, num_comp_coord,
-                                   PETSC_FALSE, 1, 1, &fe_coords); CHKERRQ(ierr);
-      ierr = DMProjectCoordinates(dm, fe_coords); CHKERRQ(ierr);
-      ierr = PetscFEDestroy(&fe_coords); CHKERRQ(ierr);
-    }
     ierr = DMGetLabel(dm, "Face Sets", &label); CHKERRQ(ierr);
     // Set wall BCs
     if (bc->num_wall > 0) {
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label,
                            bc->num_wall, bc->walls, 0, bc->num_comps,
                            bc->wall_comps, (void(*)(void))problem->bc,
-                           NULL, setup_ctx, NULL);  CHKERRQ(ierr);
+                           NULL, problem->bc_ctx, NULL);  CHKERRQ(ierr);
     }
     // Set slip BCs in the x direction
     if (bc->num_slip[0] > 0) {
       PetscInt comps[1] = {1};
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipx", label,
                            bc->num_slip[0], bc->slips[0], 0, 1, comps,
-                           (void(*)(void))NULL, NULL, setup_ctx, NULL); CHKERRQ(ierr);
+                           (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr);
     }
     // Set slip BCs in the y direction
     if (bc->num_slip[1] > 0) {
       PetscInt comps[1] = {2};
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipy", label,
                            bc->num_slip[1], bc->slips[1], 0, 1, comps,
-                           (void(*)(void))NULL, NULL, setup_ctx, NULL); CHKERRQ(ierr);
+                           (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr);
     }
     // Set slip BCs in the z direction
     if (bc->num_slip[2] > 0) {
       PetscInt comps[1] = {3};
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipz", label,
                            bc->num_slip[2], bc->slips[2], 0, 1, comps,
-                           (void(*)(void))NULL, NULL, setup_ctx, NULL); CHKERRQ(ierr);
+                           (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr);
     }
     ierr = DMPlexSetClosurePermutationTensor(dm, PETSC_DETERMINE, NULL);
     CHKERRQ(ierr);
@@ -105,7 +95,7 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
 
 // Refine DM for high-order viz
 PetscErrorCode VizRefineDM(DM dm, User user, ProblemData *problem,
-                           SimpleBC bc, Physics phys, void *setup_ctx) {
+                           SimpleBC bc, Physics phys) {
   PetscErrorCode ierr;
   DM             dm_hierarchy[user->app_ctx->viz_refine + 1];
   VecType        vec_type;
@@ -126,7 +116,7 @@ PetscErrorCode VizRefineDM(DM dm, User user, ProblemData *problem,
     if (i + 1 == user->app_ctx->viz_refine) d = 1;
     ierr = DMGetVecType(dm, &vec_type); CHKERRQ(ierr);
     ierr = DMSetVecType(dm_hierarchy[i+1], vec_type); CHKERRQ(ierr);
-    ierr = SetUpDM(dm_hierarchy[i+1], problem, d, bc, phys, setup_ctx);
+    ierr = SetUpDM(dm_hierarchy[i+1], problem, d, bc, phys);
     CHKERRQ(ierr);
     ierr = DMCreateInterpolation(dm_hierarchy[i], dm_hierarchy[i+1], &interp_next,
                                  NULL); CHKERRQ(ierr);
diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c
index cae34465cb..84972e3456 100644
--- a/examples/fluids/src/setuplibceed.c
+++ b/examples/fluids/src/setuplibceed.c
@@ -84,7 +84,7 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
   CeedCompositeOperatorAddSub(*op_apply, op_apply_vol);
 
   // -- Create Sub-Operator for in/outflow BCs
-  if (phys->has_neumann) {
+  if (phys->has_neumann || 1) {
     // --- Setup
     ierr = DMGetLabel(dm, "Face Sets", &domain_label); CHKERRQ(ierr);
     //ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr);
@@ -205,6 +205,13 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
       CeedOperatorDestroy(&op_apply_outflow);
     }
   }
+
+  // ----- Get Context Labels for Operator
+  CeedOperatorContextGetFieldLabel(*op_apply, "solution time",
+                                   &phys->solution_time_label);
+  CeedOperatorContextGetFieldLabel(*op_apply, "timestep size",
+                                   &phys->timestep_size_label);
+
   PetscFunctionReturn(0);
 }
 
@@ -250,8 +257,14 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   // CEED QFunctions
   // -----------------------------------------------------------------------------
   // -- Create QFunction for quadrature data
-  CeedQFunctionCreateInterior(ceed, 1, problem->setup_vol, problem->setup_vol_loc,
+  CeedQFunctionCreateInterior(ceed, 1, problem->setup_vol.qfunction,
+                              problem->setup_vol.qfunction_loc,
                               &ceed_data->qf_setup_vol);
+  if (problem->setup_vol.qfunction_context) {
+    CeedQFunctionSetContext(ceed_data->qf_setup_vol,
+                            problem->setup_vol.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->setup_vol.qfunction_context);
+  }
   CeedQFunctionAddInput(ceed_data->qf_setup_vol, "dx", num_comp_x*dim,
                         CEED_EVAL_GRAD);
   CeedQFunctionAddInput(ceed_data->qf_setup_vol, "weight", 1, CEED_EVAL_WEIGHT);
@@ -259,15 +272,21 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
                          CEED_EVAL_NONE);
 
   // -- Create QFunction for ICs
-  CeedQFunctionCreateInterior(ceed, 1, problem->ics, problem->ics_loc,
+  CeedQFunctionCreateInterior(ceed, 1, problem->ics.qfunction,
+                              problem->ics.qfunction_loc,
                               &ceed_data->qf_ics);
+  CeedQFunctionSetContext(ceed_data->qf_ics, problem->ics.qfunction_context);
+  CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
   CeedQFunctionAddInput(ceed_data->qf_ics, "x", num_comp_x, CEED_EVAL_INTERP);
   CeedQFunctionAddOutput(ceed_data->qf_ics, "q0", num_comp_q, CEED_EVAL_NONE);
 
   // -- Create QFunction for RHS
-  if (problem->apply_vol_rhs) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_rhs,
-                                problem->apply_vol_rhs_loc, &ceed_data->qf_rhs_vol);
+  if (problem->apply_vol_rhs.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_rhs.qfunction,
+                                problem->apply_vol_rhs.qfunction_loc, &ceed_data->qf_rhs_vol);
+    CeedQFunctionSetContext(ceed_data->qf_rhs_vol,
+                            problem->apply_vol_rhs.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_vol_rhs.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "q", num_comp_q, CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "dq", num_comp_q*dim,
                           CEED_EVAL_GRAD);
@@ -281,9 +300,12 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   }
 
   // -- Create QFunction for IFunction
-  if (problem->apply_vol_ifunction) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_ifunction,
-                                problem->apply_vol_ifunction_loc, &ceed_data->qf_ifunction_vol);
+  if (problem->apply_vol_ifunction.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_ifunction.qfunction,
+                                problem->apply_vol_ifunction.qfunction_loc, &ceed_data->qf_ifunction_vol);
+    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
+                            problem->apply_vol_ifunction.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_vol_ifunction.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "dq", num_comp_q*dim,
@@ -347,6 +369,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
                        ceed_data->basis_xc, CEED_VECTOR_ACTIVE);
   CeedOperatorSetField(ceed_data->op_ics, "q0", ceed_data->elem_restr_q,
                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
+  CeedOperatorContextGetFieldLabel(ceed_data->op_ics, "evaluation time",
+                                   &user->phys->ics_time_label);
 
   // Create CEED operator for RHS
   if (ceed_data->qf_rhs_vol) {
@@ -409,8 +433,14 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   // CEED QFunctions
   // -----------------------------------------------------------------------------
   // -- Create QFunction for quadrature data
-  CeedQFunctionCreateInterior(ceed, 1, problem->setup_sur, problem->setup_sur_loc,
+  CeedQFunctionCreateInterior(ceed, 1, problem->setup_sur.qfunction,
+                              problem->setup_sur.qfunction_loc,
                               &ceed_data->qf_setup_sur);
+  if (problem->setup_sur.qfunction_context) {
+    CeedQFunctionSetContext(ceed_data->qf_setup_sur,
+                            problem->setup_sur.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->setup_sur.qfunction_context);
+  }
   CeedQFunctionAddInput(ceed_data->qf_setup_sur, "dx", num_comp_x*dim_sur,
                         CEED_EVAL_GRAD);
   CeedQFunctionAddInput(ceed_data->qf_setup_sur, "weight", 1, CEED_EVAL_WEIGHT);
@@ -418,9 +448,12 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
                          q_data_size_sur, CEED_EVAL_NONE);
 
   // -- Creat QFunction for inflow boundaries
-  if (problem->apply_inflow) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow,
-                                problem->apply_inflow_loc, &ceed_data->qf_apply_inflow);
+  if (problem->apply_inflow.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow.qfunction,
+                                problem->apply_inflow.qfunction_loc, &ceed_data->qf_apply_inflow);
+    CeedQFunctionSetContext(ceed_data->qf_apply_inflow,
+                            problem->apply_inflow.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "surface qdata",
@@ -432,9 +465,12 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   }
 
   // -- Creat QFunction for outflow boundaries
-  if (problem->apply_outflow) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow,
-                                problem->apply_outflow_loc, &ceed_data->qf_apply_outflow);
+  if (problem->apply_outflow.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow.qfunction,
+                                problem->apply_outflow.qfunction_loc, &ceed_data->qf_apply_outflow);
+    CeedQFunctionSetContext(ceed_data->qf_apply_outflow,
+                            problem->apply_outflow.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_outflow.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "surface qdata",
diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c
index 49374a1524..8bc6dedb1e 100644
--- a/examples/fluids/src/setupts.c
+++ b/examples/fluids/src/setupts.c
@@ -89,8 +89,15 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
-  // Update EulerContext
-  if (user->phys->has_curr_time) user->phys->euler_ctx->curr_time = t;
+  // Update context field labels
+  if (user->phys->solution_time_label)
+    CeedOperatorContextSetDouble(user->op_rhs, user->phys->solution_time_label, &t);
+  if (user->phys->timestep_size_label) {
+    PetscScalar dt;
+    ierr = TSGetTimeStep(ts,&dt); CHKERRQ(ierr);
+    CeedOperatorContextSetDouble(user->op_rhs, user->phys->timestep_size_label,
+                                 &dt);
+  }
 
   // Get local vectors
   ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr);
@@ -146,8 +153,16 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G,
   PetscErrorCode    ierr;
   PetscFunctionBeginUser;
 
-  // Update EulerContext
-  if (user->phys->has_curr_time) user->phys->euler_ctx->curr_time = t;
+  // Update context field labels
+  if (user->phys->solution_time_label)
+    CeedOperatorContextSetDouble(user->op_ifunction,
+                                 user->phys->solution_time_label, &t);
+  if (user->phys->timestep_size_label) {
+    PetscScalar dt;
+    ierr = TSGetTimeStep(ts,&dt); CHKERRQ(ierr);
+    CeedOperatorContextSetDouble(user->op_ifunction,
+                                 user->phys->timestep_size_label, &dt);
+  }
 
   // Get local vectors
   ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr);
@@ -220,7 +235,8 @@ PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time,
   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr);
 
   // Output
-  ierr = PetscSNPrintf(file_path, sizeof file_path, "%s/ns-%03D.vtu",
+  ierr = PetscSNPrintf(file_path, sizeof file_path,
+                       "%s/ns-%03" PetscInt_FMT ".vtu",
                        user->app_ctx->output_dir, step_no + user->app_ctx->cont_steps);
   CHKERRQ(ierr);
   ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q), file_path,
@@ -241,7 +257,7 @@ PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time,
     ierr = DMGlobalToLocal(user->dm_viz, Q_refined, INSERT_VALUES, Q_refined_loc);
     CHKERRQ(ierr);
     ierr = PetscSNPrintf(file_path_refined, sizeof file_path_refined,
-                         "%s/nsrefined-%03D.vtu", user->app_ctx->output_dir,
+                         "%s/nsrefined-%03" PetscInt_FMT ".vtu", user->app_ctx->output_dir,
                          step_no + user->app_ctx->cont_steps);
     CHKERRQ(ierr);
     ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q_refined),
diff --git a/examples/fluids/tests-output/blasius_stgtest.yaml b/examples/fluids/tests-output/blasius_stgtest.yaml
new file mode 100644
index 0000000000..7da68de046
--- /dev/null
+++ b/examples/fluids/tests-output/blasius_stgtest.yaml
@@ -0,0 +1,40 @@
+problem: 'blasius'
+
+implicit: true
+ts:
+  adapt_type: 'none'
+  type: 'beuler'
+  dt: 2e-6
+  max_steps: 10
+output_freq: 10
+
+dm_plex_box_faces: 3,30,1
+platemesh:
+  Ndelta: 22
+  growth: 1.1664 # 1.08^2
+
+stab: 'supg'
+Ctau_t: 1
+Ctau_v: 36
+Ctau_C: 0.25
+Ctau_M: 0.25
+Ctau_E: 0.125
+
+q_extra: 0
+
+dm_plex_box_lower: 0,0,0
+dm_plex_box_upper: 4.2e-3,4.2e-3,5.e-4
+dm_plex_dim: 3
+# Faces labeled 1=z- 2=z+ 3=y- 4=y+ 5=x+ 6=x-
+bc_slip_z: 1,2
+bc_wall: 3
+wall_comps: 1,2,3
+bc_inflow: 6
+bc_outflow: 5,4
+g: 0,0,0
+
+stg:
+  use: true
+  inflow_path: "./examples/fluids/STGInflow_blasius.dat"
+  rand_path: "./examples/fluids/STGRand.dat"
+  mean_only: false
diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin
new file mode 100644
index 0000000000..e46c092fc4
Binary files /dev/null and b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin differ
diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin
new file mode 100644
index 0000000000..698d50c7bf
Binary files /dev/null and b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin differ
diff --git a/examples/fluids/tests-output/fluids-navierstokes-channel.bin b/examples/fluids/tests-output/fluids-navierstokes-channel.bin
new file mode 100644
index 0000000000..23d6f0e271
Binary files /dev/null and b/examples/fluids/tests-output/fluids-navierstokes-channel.bin differ
diff --git a/examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin b/examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin
new file mode 100644
index 0000000000..b42c972cc5
Binary files /dev/null and b/examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin differ
diff --git a/examples/petsc/README.md b/examples/petsc/README.md
index 6ac521ff7b..9339cb791a 100644
--- a/examples/petsc/README.md
+++ b/examples/petsc/README.md
@@ -61,7 +61,7 @@ The following arguments can be specified for all of the above examples:
 - `-ceed`              - CEED resource specifier
 - `-problem`           - CEED benchmark problem to solve
 - `-degree`            - Polynomial degree of tensor product basis
-- `-qextra`            - Number of extra quadrature points
+- `-q_extra`           - Number of extra quadrature points
 - `-test`              - Testing mode (do not print unless error is large)
 - `-benchmark`         - Benchmarking mode (prints benchmark statistics)
 
@@ -84,7 +84,7 @@ The following arguments can be specified for the area example:
 - `-ceed`              - CEED resource specifier
 - `-problem`           - Problem to solve, either 'cube' or 'sphere'
 - `-petscspace_degree` - Polynomial degree of tensor product basis
-- `-qextra`            - Number of extra quadrature points
+- `-q_extra`           - Number of extra quadrature points
 - `-test`              - Testing mode (do not print unless error is large)
 - `-mesh`              - Read mesh from file
 
diff --git a/examples/petsc/area.c b/examples/petsc/area.c
index d929b8d1cb..35828cbeb4 100644
--- a/examples/petsc/area.c
+++ b/examples/petsc/area.c
@@ -92,9 +92,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED surface area problem with PETSc",
-                           NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED surface area problem with PETSc", NULL);
   problem_choice = SPHERE;
   ierr = PetscOptionsEnum("-problem",
                           "Problem to solve", NULL,
@@ -116,7 +114,7 @@ int main(int argc, char **argv) {
                           NULL, simplex, &simplex, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis",
                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Setup DM
   if (read_mesh) {
@@ -192,9 +190,9 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    DoF per node                       : %D\n"
-                       "    Global DoFs                        : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n"
+                       "    Global DoFs                        : %" PetscInt_FMT "\n",
                        used_resource, CeedMemTypes[mem_type_backend], P, Q,
                        g_size/num_comp_u, num_comp_u, g_size); CHKERRQ(ierr);
   }
diff --git a/examples/petsc/bps.c b/examples/petsc/bps.c
index ce72b1e144..3d3c66da91 100644
--- a/examples/petsc/bps.c
+++ b/examples/petsc/bps.c
@@ -183,10 +183,10 @@ static PetscErrorCode RunWithDM(RunParams rp, DM dm,
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (P)       : %d\n"
                        "    Number of 1D Quadrature Points (Q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    Local Elements                     : %D\n"
-                       "    Owned nodes                        : %D\n"
-                       "    DoF per node                       : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Local Elements                     : %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n",
                        rp->bp_choice+1, rp->hostname, comm_size,
                        rp->ranks_per_node, vec_type, used_resource,
                        CeedMemTypes[mem_type_backend],
@@ -308,7 +308,7 @@ static PetscErrorCode RunWithDM(RunParams rp, DM dm,
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
@@ -469,8 +469,7 @@ int main(int argc, char **argv) {
   rp->comm = comm;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   {
     PetscBool set;
     ierr = PetscOptionsEnumArray("-problem", "CEED benchmark problem to solve",
@@ -558,8 +557,7 @@ int main(int argc, char **argv) {
     if (flg) ranks_per_node = p;
   }
 
-  ierr = PetscOptionsEnd();
-  CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Register PETSc logging stage
   ierr = PetscLogStageRegister("Solve Stage", &rp->solve_stage);
diff --git a/examples/petsc/bpsraw.c b/examples/petsc/bpsraw.c
index d7e350005d..166516f8ba 100644
--- a/examples/petsc/bpsraw.c
+++ b/examples/petsc/bpsraw.c
@@ -439,7 +439,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   bp_choice = CEED_BP1;
   ierr = PetscOptionsEnum("-problem",
                           "CEED benchmark problem to solve", NULL,
@@ -480,7 +480,7 @@ int main(int argc, char **argv) {
                               "Min and max number of iterations to use during benchmarking",
                               NULL, ksp_max_it_clip, &two, NULL);
   CHKERRQ(ierr);
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
   P = degree + 1;
   Q = P + q_extra;
 
@@ -555,11 +555,14 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (P)       : %d\n"
                        "    Number of 1D Quadrature Points (Q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    Process Decomposition              : %D %D %D\n"
-                       "    Local Elements                     : %D = %D %D %D\n"
-                       "    Owned nodes                        : %D = %D %D %D\n"
-                       "    DoF per node                       : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Process Decomposition              : %" PetscInt_FMT
+                       " %" PetscInt_FMT " %" PetscInt_FMT "\n"
+                       "    Local Elements                     : %" PetscInt_FMT
+                       " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT
+                       " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n",
                        bp_choice+1, vec_type, used_resource,
                        CeedMemTypes[mem_type_backend],
                        P, Q,  gsize/num_comp_u, p[0], p[1], p[2], local_elem,
@@ -903,7 +906,7 @@ int main(int argc, char **argv) {
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/petsc/bpssphere.c b/examples/petsc/bpssphere.c
index 6c7576b9c6..79a4019261 100644
--- a/examples/petsc/bpssphere.c
+++ b/examples/petsc/bpssphere.c
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   bp_choice = CEED_BP1;
   ierr = PetscOptionsEnum("-problem",
                           "CEED benchmark problem to solve", NULL,
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
   simplex = PETSC_FALSE;
   ierr = PetscOptionsBool("-simplex", "Use simplices, or tensor product cells",
                           NULL, simplex, &simplex, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Setup DM
   if (read_mesh) {
@@ -200,7 +200,7 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global nodes                       : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n",
                        bp_choice+1, ceed_resource, CeedMemTypes[mem_type_backend], P, Q,
                        g_size/num_comp_u); CHKERRQ(ierr);
   }
@@ -326,7 +326,7 @@ int main(int argc, char **argv) {
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/petsc/multigrid.c b/examples/petsc/multigrid.c
index 41b9fe26c7..c32fd93524 100644
--- a/examples/petsc/multigrid.c
+++ b/examples/petsc/multigrid.c
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Parse command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   bp_choice = CEED_BP3;
   ierr = PetscOptionsEnum("-problem",
                           "CEED benchmark problem to solve", NULL,
@@ -112,12 +112,12 @@ int main(int argc, char **argv) {
                             "Epsilon parameter for Kershaw mesh transformation",
                             NULL, eps, &eps, NULL);
   if (eps > 1 || eps <= 0) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE,
-                                     "-eps %D must be (0,1]", eps);
+                                     "-eps %g must be (0,1]", (double)PetscRealPart(eps));
   degree = test_mode ? 3 : 2;
   ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis",
                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
   if (degree < 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE,
-                            "-degree %D must be at least 1", degree);
+                            "-degree %" PetscInt_FMT " must be at least 1", degree);
   q_extra = bp_options[bp_choice].q_extra;
   ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points",
                          NULL, q_extra, &q_extra, NULL); CHKERRQ(ierr);
@@ -138,7 +138,7 @@ int main(int argc, char **argv) {
     ierr = PetscOptionsIntArray("-cells","Number of cells per dimension", NULL,
                                 mesh_elem, &tmp, NULL); CHKERRQ(ierr);
   }
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Set up libCEED
   CeedInit(ceed_resource, &ceed);
@@ -273,9 +273,9 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global Nodes                       : %D\n"
-                       "    Owned Nodes                        : %D\n"
-                       "    DoF per node                       : %D\n"
+                       "    Global Nodes                       : %" PetscInt_FMT "\n"
+                       "    Owned Nodes                        : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n"
                        "  Multigrid:\n"
                        "    Number of Levels                   : %d\n",
                        bp_choice+1, vec_type, used_resource,
@@ -296,10 +296,10 @@ int main(int argc, char **argv) {
   for (int i=0; i<num_levels; i++) {
     // Print level information
     if (!test_mode && (i == 0 || i == fine_level)) {
-      ierr = PetscPrintf(comm,"    Level %D (%s):\n"
+      ierr = PetscPrintf(comm,"    Level %" PetscInt_FMT " (%s):\n"
                          "      Number of 1D Basis Nodes (p)     : %d\n"
-                         "      Global Nodes                     : %D\n"
-                         "      Owned Nodes                      : %D\n",
+                         "      Global Nodes                     : %" PetscInt_FMT "\n"
+                         "      Owned Nodes                      : %" PetscInt_FMT "\n",
                          i, (i? "fine" : "coarse"), level_degrees[i] + 1,
                          g_size[i]/num_comp_u, l_size[i]/num_comp_u); CHKERRQ(ierr);
     }
@@ -562,7 +562,7 @@ int main(int argc, char **argv) {
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/rust/ex1-volume/src/main.rs b/examples/rust/ex1-volume/src/main.rs
index 60af355708..8efda23bdc 100644
--- a/examples/rust/ex1-volume/src/main.rs
+++ b/examples/rust/ex1-volume/src/main.rs
@@ -178,6 +178,7 @@ fn example_1(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the mass operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -226,6 +227,7 @@ fn example_1(options: opt::Opt) -> libceed::Result<()> {
     // Mass Operator
     let op_mass = ceed
         .operator(qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("mass")?
         .field("u", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("v", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/rust/ex2-surface/src/main.rs b/examples/rust/ex2-surface/src/main.rs
index 8cd8a6b203..42882590a5 100644
--- a/examples/rust/ex2-surface/src/main.rs
+++ b/examples/rust/ex2-surface/src/main.rs
@@ -220,6 +220,7 @@ fn example_2(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the diff operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -305,6 +306,7 @@ fn example_2(options: opt::Opt) -> libceed::Result<()> {
     // Diff Operator
     let op_diff = ceed
         .operator(qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("Poisson")?
         .field("du", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("dv", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/rust/ex3-vector-volume/src/main.rs b/examples/rust/ex3-vector-volume/src/main.rs
index 20f3ff92ff..518aa47a52 100644
--- a/examples/rust/ex3-vector-volume/src/main.rs
+++ b/examples/rust/ex3-vector-volume/src/main.rs
@@ -187,6 +187,7 @@ fn example_3(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the mass operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -239,6 +240,7 @@ fn example_3(options: opt::Opt) -> libceed::Result<()> {
     // Mass Operator
     let op_mass = ceed
         .operator(qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("mass")?
         .field("u", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("v", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/rust/ex4-vector-surface/src/main.rs b/examples/rust/ex4-vector-surface/src/main.rs
index 5583235f87..5a0c1e25dd 100644
--- a/examples/rust/ex4-vector-surface/src/main.rs
+++ b/examples/rust/ex4-vector-surface/src/main.rs
@@ -227,6 +227,7 @@ fn example_4(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the diff operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -326,6 +327,7 @@ fn example_4(options: opt::Opt) -> libceed::Result<()> {
     // Diff Operator
     let op_diff = ceed
         .operator(qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("Poisson")?
         .field("du", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("dv", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/solids/README.md b/examples/solids/README.md
index 0a1fd57629..c204333939 100644
--- a/examples/solids/README.md
+++ b/examples/solids/README.md
@@ -96,7 +96,7 @@ The command line options just shown are the minimum requirements to run the mini
   - CEED resource specifier
   - `/cpu/self`
 
-* - `-qextra`
+* - `-q_extra`
   - Number of extra quadrature points
   - `0`
 
diff --git a/examples/solids/elasticity.c b/examples/solids/elasticity.c
index d5c763357a..71a6cb0313 100644
--- a/examples/solids/elasticity.c
+++ b/examples/solids/elasticity.c
@@ -388,9 +388,9 @@ int main(int argc, char **argv) {
                        "    File                               : %s\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    Owned nodes                        : %D\n"
-                       "    DoF per node                       : %D\n"
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n"
                        "  Multigrid:\n"
                        "    Type                               : %s\n"
                        "    Number of Levels                   : %d\n",
@@ -412,10 +412,10 @@ int main(int argc, char **argv) {
       for (PetscInt i = 0; i < 2; i++) {
         CeedInt level = i ? fine_level : 0;
         ierr = PetscPrintf(comm,
-                           "    Level %D (%s):\n"
+                           "    Level %" PetscInt_FMT " (%s):\n"
                            "      Number of 1D Basis Nodes (p)     : %d\n"
-                           "      Global Nodes                     : %D\n"
-                           "      Owned Nodes                      : %D\n",
+                           "      Global Nodes                     : %" PetscInt_FMT "\n"
+                           "      Owned Nodes                      : %" PetscInt_FMT "\n",
                            level, i ? "fine" : "coarse",
                            app_ctx->level_degrees[level] + 1,
                            U_g_size[level]/num_comp_u, U_l_size[level]/num_comp_u);
@@ -763,7 +763,7 @@ int main(int argc, char **argv) {
                        "    SNES Convergence                   : %s\n"
                        "    Number of Load Increments          : %d\n"
                        "    Completed Load Increments          : %d\n"
-                       "    Total SNES Iterations              : %D\n"
+                       "    Total SNES Iterations              : %" PetscInt_FMT "\n"
                        "    Final rnorm                        : %e\n",
                        snes_type, SNESConvergedReasons[reason],
                        app_ctx->num_increments, increment - 1,
@@ -777,7 +777,7 @@ int main(int argc, char **argv) {
     ierr = PetscPrintf(comm,
                        "  Linear Solver:\n"
                        "    KSP Type                           : %s\n"
-                       "    Total KSP Iterations               : %D\n",
+                       "    Total KSP Iterations               : %" PetscInt_FMT "\n",
                        ksp_type, ksp_its); CHKERRQ(ierr);
 
     // -- PC
diff --git a/examples/solids/problems/mooney-rivlin.c b/examples/solids/problems/mooney-rivlin.c
index 58a4eb33d3..712a6b707f 100644
--- a/examples/solids/problems/mooney-rivlin.c
+++ b/examples/solids/problems/mooney-rivlin.c
@@ -38,9 +38,8 @@ PetscErrorCode PhysicsSmootherContext_MR(MPI_Comm comm, Ceed ceed,
 
   PetscFunctionBegin;
 
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Mooney Rivlin physical parameters for smoother", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Mooney Rivlin physical parameters for smoother",
+                    NULL);
 
   ierr = PetscOptionsScalar("-nu_smoother", "Poisson's ratio for smoother",
                             NULL, nu_smoother, &nu_smoother, &nu_flag);
@@ -49,7 +48,7 @@ PetscErrorCode PhysicsSmootherContext_MR(MPI_Comm comm, Ceed ceed,
       nu_smoother >= 0.5) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP,
                                     "Mooney-Rivlin model requires Poisson ratio -nu option in [0, .5)");
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   if (nu_flag) {
     // Copy context
@@ -84,8 +83,7 @@ PetscErrorCode ProcessPhysics_MR(MPI_Comm comm, Physics_MR phys, Units units) {
 
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL, "Mooney Rivlin physical parameters", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Mooney Rivlin physical parameters", NULL);
 
   ierr = PetscOptionsScalar("-mu_1", "Material Property mu_1", NULL,
                             phys->mu_1, &phys->mu_1, NULL); CHKERRQ(ierr);
@@ -118,7 +116,7 @@ PetscErrorCode ProcessPhysics_MR(MPI_Comm comm, Physics_MR phys, Units units) {
   CHKERRQ(ierr);
   units->kilogram = fabs(units->kilogram);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   // Define derived units
   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
diff --git a/examples/solids/problems/neo-hookean.c b/examples/solids/problems/neo-hookean.c
index 5d4fa2ba67..5fd17a8c52 100644
--- a/examples/solids/problems/neo-hookean.c
+++ b/examples/solids/problems/neo-hookean.c
@@ -38,15 +38,14 @@ PetscErrorCode PhysicsSmootherContext_NH(MPI_Comm comm, Ceed ceed,
 
   PetscFunctionBegin;
 
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Neo-Hookean physical parameters for smoother", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Neo-Hookean physical parameters for smoother",
+                    NULL);
 
   ierr = PetscOptionsScalar("-nu_smoother", "Poisson's ratio for smoother",
                             NULL, nu_smoother, &nu_smoother, &nu_flag);
   CHKERRQ(ierr);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   if (nu_flag) {
     // Copy context
@@ -80,8 +79,7 @@ PetscErrorCode ProcessPhysics_NH(MPI_Comm comm, Physics_NH phys, Units units) {
 
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL, "Neo-Hookean physical parameters", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Neo-Hookean physical parameters", NULL);
 
   ierr = PetscOptionsScalar("-nu", "Poisson's ratio", NULL, phys->nu, &phys->nu,
                             &nu_flag); CHKERRQ(ierr);
@@ -104,7 +102,7 @@ PetscErrorCode ProcessPhysics_NH(MPI_Comm comm, Physics_NH phys, Units units) {
   CHKERRQ(ierr);
   units->kilogram = fabs(units->kilogram);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   // Check for all required options to be set
   if (!nu_flag) {
diff --git a/examples/solids/src/cl-options.c b/examples/solids/src/cl-options.c
index d4510f8a3b..3e5a0df1ed 100644
--- a/examples/solids/src/cl-options.c
+++ b/examples/solids/src/cl-options.c
@@ -20,9 +20,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) {
 
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Elasticity / Hyperelasticity in PETSc with libCEED",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL,
+                    "Elasticity / Hyperelasticity in PETSc with libCEED", NULL);
 
   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
                             NULL, app_ctx->ceed_resource, app_ctx->ceed_resource,
@@ -41,7 +40,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) {
   CHKERRQ(ierr);
 
   app_ctx->q_extra         = 0;
-  ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points",
+  ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points",
                          NULL, app_ctx->q_extra, &app_ctx->q_extra, NULL);
   CHKERRQ(ierr);
 
@@ -210,7 +209,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) {
     ierr = PetscViewerASCIIPrintf(app_ctx->energy_viewer, "%f,%e\n", 0., 0.);
     CHKERRQ(ierr);
   }
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting AppCtx
+  PetscOptionsEnd(); // End of setting AppCtx
 
   // Check for all required values set
   if (app_ctx->test_mode) {
diff --git a/examples/solids/src/misc.c b/examples/solids/src/misc.c
index 3bb593316d..8658d4c17e 100644
--- a/examples/solids/src/misc.c
+++ b/examples/solids/src/misc.c
@@ -131,7 +131,7 @@ PetscErrorCode ViewSolution(MPI_Comm comm, AppCtx app_ctx, Vec U,
 
   // Build file name
   ierr = PetscSNPrintf(output_filename, sizeof output_filename,
-                       "%s/solution-%03D.vtu", app_ctx->output_dir,
+                       "%s/solution-%03" PetscInt_FMT ".vtu", app_ctx->output_dir,
                        increment); CHKERRQ(ierr);
 
   // Increment sequence
diff --git a/gallery/identity/ceed-identity.c b/gallery/identity/ceed-identity.c
index a18b8d9fd7..857a2ed808 100644
--- a/gallery/identity/ceed-identity.c
+++ b/gallery/identity/ceed-identity.c
@@ -9,7 +9,7 @@
 #include <ceed/backend.h>
 #include <stddef.h>
 #include <string.h>
-#include "ceed-identity.h"
+#include <ceed/jit-source/gallery/ceed-identity.h>
 
 /**
   @brief Set fields identity QFunction that copies inputs directly into outputs
diff --git a/gallery/mass-vector/ceed-vectormassapply.c b/gallery/mass-vector/ceed-vectormassapply.c
index d08fa2a70c..29d9e5e0a7 100644
--- a/gallery/mass-vector/ceed-vectormassapply.c
+++ b/gallery/mass-vector/ceed-vectormassapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectormassapply.h"
+#include <ceed/jit-source/gallery/ceed-vectormassapply.h>
 
 /**
   @brief Set fields for Ceed QFunction for applying the mass matrix
diff --git a/gallery/mass/ceed-mass1dbuild.c b/gallery/mass/ceed-mass1dbuild.c
index 10c3a80103..6506125a82 100644
--- a/gallery/mass/ceed-mass1dbuild.c
+++ b/gallery/mass/ceed-mass1dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-mass1dbuild.h"
+#include <ceed/jit-source/gallery/ceed-mass1dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 1D
diff --git a/gallery/mass/ceed-mass2dbuild.c b/gallery/mass/ceed-mass2dbuild.c
index 554583b3da..ce3aeb00f2 100644
--- a/gallery/mass/ceed-mass2dbuild.c
+++ b/gallery/mass/ceed-mass2dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-mass2dbuild.h"
+#include <ceed/jit-source/gallery/ceed-mass2dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 2D
diff --git a/gallery/mass/ceed-mass3dbuild.c b/gallery/mass/ceed-mass3dbuild.c
index 66ef311006..ec8de0c671 100644
--- a/gallery/mass/ceed-mass3dbuild.c
+++ b/gallery/mass/ceed-mass3dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-mass3dbuild.h"
+#include <ceed/jit-source/gallery/ceed-mass3dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 3D
diff --git a/gallery/mass/ceed-massapply.c b/gallery/mass/ceed-massapply.c
index d30cc89f33..a110cfd52f 100644
--- a/gallery/mass/ceed-massapply.c
+++ b/gallery/mass/ceed-massapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-massapply.h"
+#include <ceed/jit-source/gallery/ceed-massapply.h>
 
 /**
   @brief Set fields for Ceed QFunction for applying the mass matrix
diff --git a/gallery/poisson-vector/ceed-vectorpoisson1dapply.c b/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
index 93ba30cd35..cd54a03446 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectorpoisson1dapply.h"
+#include <ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 1D Poisson operator
diff --git a/gallery/poisson-vector/ceed-vectorpoisson2dapply.c b/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
index 6bd65ae256..66fc448ae5 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectorpoisson2dapply.h"
+#include <ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 2D Poisson operator
diff --git a/gallery/poisson-vector/ceed-vectorpoisson3dapply.c b/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
index e2d8b1b169..bf924d492b 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectorpoisson3dapply.h"
+#include <ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 3D Poisson operator
diff --git a/gallery/poisson/ceed-poisson1dapply.c b/gallery/poisson/ceed-poisson1dapply.c
index 93d5354817..c6e7d7cdfa 100644
--- a/gallery/poisson/ceed-poisson1dapply.c
+++ b/gallery/poisson/ceed-poisson1dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson1dapply.h"
+#include <ceed/jit-source/gallery/ceed-poisson1dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 1D Poisson operator
diff --git a/gallery/poisson/ceed-poisson1dbuild.c b/gallery/poisson/ceed-poisson1dbuild.c
index 98bd7f7c4e..20d418aa68 100644
--- a/gallery/poisson/ceed-poisson1dbuild.c
+++ b/gallery/poisson/ceed-poisson1dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson1dbuild.h"
+#include <ceed/jit-source/gallery/ceed-poisson1dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 1D
diff --git a/gallery/poisson/ceed-poisson2dapply.c b/gallery/poisson/ceed-poisson2dapply.c
index 9b121f5517..e1f47b359d 100644
--- a/gallery/poisson/ceed-poisson2dapply.c
+++ b/gallery/poisson/ceed-poisson2dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson2dapply.h"
+#include <ceed/jit-source/gallery/ceed-poisson2dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 2D Poisson operator
diff --git a/gallery/poisson/ceed-poisson2dbuild.c b/gallery/poisson/ceed-poisson2dbuild.c
index e2cc0f3e39..f79896baca 100644
--- a/gallery/poisson/ceed-poisson2dbuild.c
+++ b/gallery/poisson/ceed-poisson2dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson2dbuild.h"
+#include <ceed/jit-source/gallery/ceed-poisson2dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 2D
diff --git a/gallery/poisson/ceed-poisson3dapply.c b/gallery/poisson/ceed-poisson3dapply.c
index 20a371ab0f..682c1ee3c3 100644
--- a/gallery/poisson/ceed-poisson3dapply.c
+++ b/gallery/poisson/ceed-poisson3dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson3dapply.h"
+#include <ceed/jit-source/gallery/ceed-poisson3dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 3D Poisson operator
diff --git a/gallery/poisson/ceed-poisson3dbuild.c b/gallery/poisson/ceed-poisson3dbuild.c
index 82bf3163b4..5bed48856a 100644
--- a/gallery/poisson/ceed-poisson3dbuild.c
+++ b/gallery/poisson/ceed-poisson3dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson3dbuild.h"
+#include <ceed/jit-source/gallery/ceed-poisson3dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 3D
diff --git a/gallery/scale/ceed-scale.c b/gallery/scale/ceed-scale.c
index 90d738f7e2..14d24f9084 100644
--- a/gallery/scale/ceed-scale.c
+++ b/gallery/scale/ceed-scale.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-scale.h"
+#include <ceed/jit-source/gallery/ceed-scale.h>
 
 /**
   @brief  Set fields for vector scaling QFunction that scales inputs
diff --git a/include/ceed-impl.h b/include/ceed-impl.h
index 993d3088e8..05dcf286f6 100644
--- a/include/ceed-impl.h
+++ b/include/ceed-impl.h
@@ -14,6 +14,8 @@
 #include <ceed/backend.h>
 #include <stdbool.h>
 
+CEED_INTERN const char CeedJitSourceRootDefault[];
+
 /** @defgroup CeedUser Public API for Ceed
     @ingroup Ceed
 */
@@ -89,6 +91,8 @@ struct Ceed_private {
   int obj_delegate_count;
   Ceed op_fallback_ceed, op_fallback_parent;
   const char *op_fallback_resource;
+  char **jit_source_roots;
+  CeedInt num_jit_source_roots;
   int (*Error)(Ceed, const char *, int, const char *, int, const char *,
                va_list *);
   int (*GetPreferredMemType)(CeedMemType *);
@@ -275,6 +279,8 @@ struct CeedQFunctionContext_private {
   int (*RestoreData)(CeedQFunctionContext);
   int (*RestoreDataRead)(CeedQFunctionContext);
   int (*Destroy)(CeedQFunctionContext);
+  CeedQFunctionContextDataDestroyUser data_destroy_function;
+  CeedMemType data_destroy_mem_type;
   CeedInt num_fields;
   CeedInt max_fields;
   CeedContextFieldLabel *field_labels;
@@ -371,6 +377,7 @@ struct CeedOperator_private {
   CeedQFunction qf;
   CeedQFunction dqf;
   CeedQFunction dqfT;
+  const char *name;
   bool is_immutable;
   bool is_interface_setup;
   bool is_backend_setup;
diff --git a/include/ceed/backend.h b/include/ceed/backend.h
index aae6b1fcea..fa9079b0cd 100644
--- a/include/ceed/backend.h
+++ b/include/ceed/backend.h
@@ -119,6 +119,7 @@ CEED_EXTERN int CeedOperatorGetActiveBasis(CeedOperator op,
 CEED_EXTERN int CeedOperatorGetActiveElemRestriction(CeedOperator op, CeedElemRestriction *active_rstr);
 CEED_EXTERN int CeedGetOperatorFallbackResource(Ceed ceed,
     const char **resource);
+CEED_EXTERN int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed);
 CEED_EXTERN int CeedSetOperatorFallbackResource(Ceed ceed,
     const char *resource);
 CEED_EXTERN int CeedGetOperatorFallbackParentCeed(Ceed ceed, Ceed *parent);
@@ -265,9 +266,15 @@ CEED_EXTERN int CeedQFunctionContextGetBackendData(CeedQFunctionContext ctx,
     void *data);
 CEED_EXTERN int CeedQFunctionContextSetBackendData(CeedQFunctionContext ctx,
     void *data);
+CEED_EXTERN int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
+    const char *field_name, CeedContextFieldLabel *field_label);
 CEED_EXTERN int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
                                    CeedContextFieldLabel field_label,
                                    CeedContextFieldType field_type, void *value);
+CEED_EXTERN int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
+    CeedContextFieldLabel field_label, double *values);
+CEED_EXTERN int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
+    CeedContextFieldLabel field_label, int *values);
 CEED_EXTERN int CeedQFunctionContextReference(CeedQFunctionContext ctx);
 
 CEED_EXTERN int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data);
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index 2c4ef57bc0..6a3d068ff5 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -203,6 +203,7 @@ CEED_EXTERN int CeedInit(const char *resource, Ceed *ceed);
 CEED_EXTERN int CeedReferenceCopy(Ceed ceed, Ceed *ceed_copy);
 CEED_EXTERN int CeedGetResource(Ceed ceed, const char **resource);
 CEED_EXTERN int CeedIsDeterministic(Ceed ceed, bool *is_deterministic);
+CEED_EXTERN int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root);
 CEED_EXTERN int CeedView(Ceed ceed, FILE *stream);
 CEED_EXTERN int CeedDestroy(Ceed *ceed);
 
@@ -246,7 +247,7 @@ CEED_EXTERN int CeedResetErrorMessage(Ceed, const char **err_msg);
 /// @ingroup Ceed
 #define CEED_VERSION_MAJOR 0
 #define CEED_VERSION_MINOR 10
-#define CEED_VERSION_PATCH 0
+#define CEED_VERSION_PATCH 1
 #define CEED_VERSION_RELEASE false
 
 /// Compile-time check that the the current library version is at least as
@@ -691,6 +692,16 @@ typedef enum {
 } CeedContextFieldType;
 CEED_EXTERN const char *const CeedContextFieldTypes[];
 
+/** Handle for the user provided CeedQFunctionContextDataDestroy callback function
+
+ @param[in,out] data  User-CeedQFunctionContext data
+
+ @return An error code: 0 - success, otherwise - failure
+
+ @ingroup CeedQFunction
+**/
+typedef int (*CeedQFunctionContextDataDestroyUser)(void *data);
+
 CEED_EXTERN int CeedQFunctionContextCreate(Ceed ceed,
     CeedQFunctionContext *ctx);
 CEED_EXTERN int CeedQFunctionContextReferenceCopy(CeedQFunctionContext ctx,
@@ -713,21 +724,16 @@ CEED_EXTERN int CeedQFunctionContextRegisterDouble(CeedQFunctionContext ctx,
 CEED_EXTERN int CeedQFunctionContextRegisterInt32(CeedQFunctionContext ctx,
     const char *field_name, size_t field_offset, size_t num_values,
     const char *field_description);
-CEED_EXTERN int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
-    const char *field_name, CeedContextFieldLabel *field_label);
 CEED_EXTERN int CeedQFunctionContextGetAllFieldLabels(CeedQFunctionContext ctx,
     const CeedContextFieldLabel **field_labels, CeedInt *num_fields);
 CEED_EXTERN int CeedContextFieldLabelGetDescription(CeedContextFieldLabel label,
     const char **field_name, const char **field_description, size_t *num_values,
     CeedContextFieldType *field_type);
-CEED_EXTERN int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
-    CeedContextFieldLabel field_label, double *values);
-CEED_EXTERN int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
-    CeedContextFieldLabel field_label, int *values);
 CEED_EXTERN int CeedQFunctionContextGetContextSize(CeedQFunctionContext ctx,
     size_t *ctx_size);
 CEED_EXTERN int CeedQFunctionContextView(CeedQFunctionContext ctx,
     FILE *stream);
+CEED_EXTERN int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx, CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f);
 CEED_EXTERN int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx);
 
 CEED_EXTERN int CeedOperatorCreate(Ceed ceed, CeedQFunction qf,
@@ -778,6 +784,7 @@ CEED_EXTERN int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
 CEED_EXTERN int CeedOperatorCreateFDMElementInverse(CeedOperator op,
     CeedOperator *fdm_inv, CeedRequest *request);
 CEED_EXTERN int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts);
+CEED_EXTERN int CeedOperatorSetName(CeedOperator op, const char *name);
 CEED_EXTERN int CeedOperatorView(CeedOperator op, FILE *stream);
 CEED_EXTERN int CeedOperatorGetCeed(CeedOperator op, Ceed *ceed);
 CEED_EXTERN int CeedOperatorGetNumElements(CeedOperator op, CeedInt *num_elem);
diff --git a/backends/cuda-ref/kernels/cuda-ref-basis-nontensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-basis-nontensor.h
rename to include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h
diff --git a/backends/cuda-ref/kernels/cuda-ref-basis-tensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-basis-tensor.h
rename to include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h
diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h
new file mode 100644
index 0000000000..ecaca9d444
--- /dev/null
+++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h
@@ -0,0 +1,148 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Diagonal assembly kernels
+//------------------------------------------------------------------------------
+
+typedef enum {
+  /// Perform no evaluation (either because there is no data or it is already at
+  /// quadrature points)
+  CEED_EVAL_NONE   = 0,
+  /// Interpolate from nodes to quadrature points
+  CEED_EVAL_INTERP = 1,
+  /// Evaluate gradients at quadrature points from input in a nodal basis
+  CEED_EVAL_GRAD   = 2,
+  /// Evaluate divergence at quadrature points from input in a nodal basis
+  CEED_EVAL_DIV    = 4,
+  /// Evaluate curl at quadrature points from input in a nodal basis
+  CEED_EVAL_CURL   = 8,
+  /// Using no input, evaluate quadrature weights on the reference element
+  CEED_EVAL_WEIGHT = 16,
+} CeedEvalMode;
+
+//------------------------------------------------------------------------------
+// Get Basis Emode Pointer
+//------------------------------------------------------------------------------
+extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr,
+    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
+    const CeedScalar *grad) {
+  switch (emode) {
+  case CEED_EVAL_NONE:
+    *basisptr = identity;
+    break;
+  case CEED_EVAL_INTERP:
+    *basisptr = interp;
+    break;
+  case CEED_EVAL_GRAD:
+    *basisptr = grad;
+    break;
+  case CEED_EVAL_WEIGHT:
+  case CEED_EVAL_DIV:
+  case CEED_EVAL_CURL:
+    break; // Caught by QF Assembly
+  }
+}
+
+//------------------------------------------------------------------------------
+// Core code for diagonal assembly
+//------------------------------------------------------------------------------
+__device__ void diagonalCore(const CeedInt nelem,
+    const CeedScalar maxnorm, const bool pointBlock,
+    const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  const int tid = threadIdx.x; // running with P threads, tid is evec node
+  const CeedScalar qfvaluebound = maxnorm*1e-12;
+
+  // Compute the diagonal of B^T D B
+  // Each element
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
+       e += gridDim.x*blockDim.z) {
+    CeedInt dout = -1;
+    // Each basis eval mode pair
+    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
+      const CeedScalar *bt = NULL;
+      if (emodeout[eout] == CEED_EVAL_GRAD)
+        dout += 1;
+      CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout,
+                                      &gradout[dout*NQPTS*NNODES]);
+      CeedInt din = -1;
+      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
+        const CeedScalar *b = NULL;
+        if (emodein[ein] == CEED_EVAL_GRAD)
+          din += 1;
+        CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin,
+                                        &gradin[din*NQPTS*NNODES]);
+        // Each component
+        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
+          // Each qpoint/node pair
+          if (pointBlock) {
+            // Point Block Diagonal
+            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
+              CeedScalar evalue = 0.;
+              for (CeedInt q = 0; q < NQPTS; q++) {
+                const CeedScalar qfvalue =
+                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
+                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
+                if (abs(qfvalue) > qfvaluebound)
+                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+              }
+              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
+            }
+          } else {
+            // Diagonal Only
+            CeedScalar evalue = 0.;
+            for (CeedInt q = 0; q < NQPTS; q++) {
+              const CeedScalar qfvalue =
+                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
+                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
+              if (abs(qfvalue) > qfvaluebound)
+                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+            }
+            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
+          }
+        }
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Linear diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
+// Linear point block diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h
new file mode 100644
index 0000000000..cb75ddc7ea
--- /dev/null
+++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Matrix assembly kernel for low-order elements (2D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int i = threadIdx.x; // The output row index of each B^TDB operation 
+  const int l = threadIdx.y; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+  // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        CeedScalar result = 0.0;
+        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+          CeedInt b_in_index = emode_in * NQPTS * NNODES;
+      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+             CeedInt b_out_index = emode_out * NQPTS * NNODES;
+             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+ 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+            for (CeedInt j = 0; j < NQPTS; j++) {
+     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+	    }
+
+          }// end of emode_out 
+        } // end of emode_in
+        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+   	values_array[val_index] = result;
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
+// Fallback kernel for larger orders (1D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssembleFallback(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int l = threadIdx.x; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+    // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        for (CeedInt i = 0; i < NNODES; i++) {
+          CeedScalar result = 0.0;
+          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+            CeedInt b_in_index = emode_in * NQPTS * NNODES;
+        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+               CeedInt b_out_index = emode_out * NQPTS * NNODES;
+               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+              for (CeedInt j = 0; j < NQPTS; j++) {
+       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+  	    }
+
+            }// end of emode_out 
+          } // end of emode_in
+          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+     	  values_array[val_index] = result;
+        } // end of loop over element node index, i
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
diff --git a/backends/cuda-ref/kernels/cuda-ref-qfunction.h b/include/ceed/jit-source/cuda/cuda-ref-qfunction.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-qfunction.h
rename to include/ceed/jit-source/cuda/cuda-ref-qfunction.h
diff --git a/backends/cuda-ref/kernels/cuda-ref-restriction.h b/include/ceed/jit-source/cuda/cuda-ref-restriction.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-restriction.h
rename to include/ceed/jit-source/cuda/cuda-ref-restriction.h
diff --git a/backends/cuda-shared/kernels/cuda-shared-basis.h b/include/ceed/jit-source/cuda/cuda-shared-basis.h
similarity index 100%
rename from backends/cuda-shared/kernels/cuda-shared-basis.h
rename to include/ceed/jit-source/cuda/cuda-shared-basis.h
diff --git a/gallery/identity/ceed-identity.h b/include/ceed/jit-source/gallery/ceed-identity.h
similarity index 100%
rename from gallery/identity/ceed-identity.h
rename to include/ceed/jit-source/gallery/ceed-identity.h
diff --git a/gallery/mass/ceed-mass1dbuild.h b/include/ceed/jit-source/gallery/ceed-mass1dbuild.h
similarity index 100%
rename from gallery/mass/ceed-mass1dbuild.h
rename to include/ceed/jit-source/gallery/ceed-mass1dbuild.h
diff --git a/gallery/mass/ceed-mass2dbuild.h b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h
similarity index 100%
rename from gallery/mass/ceed-mass2dbuild.h
rename to include/ceed/jit-source/gallery/ceed-mass2dbuild.h
diff --git a/gallery/mass/ceed-mass3dbuild.h b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h
similarity index 100%
rename from gallery/mass/ceed-mass3dbuild.h
rename to include/ceed/jit-source/gallery/ceed-mass3dbuild.h
diff --git a/gallery/mass/ceed-massapply.h b/include/ceed/jit-source/gallery/ceed-massapply.h
similarity index 100%
rename from gallery/mass/ceed-massapply.h
rename to include/ceed/jit-source/gallery/ceed-massapply.h
diff --git a/gallery/poisson/ceed-poisson1dapply.h b/include/ceed/jit-source/gallery/ceed-poisson1dapply.h
similarity index 100%
rename from gallery/poisson/ceed-poisson1dapply.h
rename to include/ceed/jit-source/gallery/ceed-poisson1dapply.h
diff --git a/gallery/poisson/ceed-poisson1dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h
similarity index 100%
rename from gallery/poisson/ceed-poisson1dbuild.h
rename to include/ceed/jit-source/gallery/ceed-poisson1dbuild.h
diff --git a/gallery/poisson/ceed-poisson2dapply.h b/include/ceed/jit-source/gallery/ceed-poisson2dapply.h
similarity index 100%
rename from gallery/poisson/ceed-poisson2dapply.h
rename to include/ceed/jit-source/gallery/ceed-poisson2dapply.h
diff --git a/gallery/poisson/ceed-poisson2dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h
similarity index 100%
rename from gallery/poisson/ceed-poisson2dbuild.h
rename to include/ceed/jit-source/gallery/ceed-poisson2dbuild.h
diff --git a/gallery/poisson/ceed-poisson3dapply.h b/include/ceed/jit-source/gallery/ceed-poisson3dapply.h
similarity index 100%
rename from gallery/poisson/ceed-poisson3dapply.h
rename to include/ceed/jit-source/gallery/ceed-poisson3dapply.h
diff --git a/gallery/poisson/ceed-poisson3dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h
similarity index 100%
rename from gallery/poisson/ceed-poisson3dbuild.h
rename to include/ceed/jit-source/gallery/ceed-poisson3dbuild.h
diff --git a/gallery/scale/ceed-scale.h b/include/ceed/jit-source/gallery/ceed-scale.h
similarity index 100%
rename from gallery/scale/ceed-scale.h
rename to include/ceed/jit-source/gallery/ceed-scale.h
diff --git a/gallery/mass-vector/ceed-vectormassapply.h b/include/ceed/jit-source/gallery/ceed-vectormassapply.h
similarity index 100%
rename from gallery/mass-vector/ceed-vectormassapply.h
rename to include/ceed/jit-source/gallery/ceed-vectormassapply.h
diff --git a/gallery/poisson-vector/ceed-vectorpoisson1dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h
similarity index 100%
rename from gallery/poisson-vector/ceed-vectorpoisson1dapply.h
rename to include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h
diff --git a/gallery/poisson-vector/ceed-vectorpoisson2dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h
similarity index 100%
rename from gallery/poisson-vector/ceed-vectorpoisson2dapply.h
rename to include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h
diff --git a/gallery/poisson-vector/ceed-vectorpoisson3dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h
similarity index 100%
rename from gallery/poisson-vector/ceed-vectorpoisson3dapply.h
rename to include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h
diff --git a/backends/hip-ref/kernels/hip-ref-basis-nontensor.h b/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-basis-nontensor.h
rename to include/ceed/jit-source/hip/hip-ref-basis-nontensor.h
diff --git a/backends/hip-ref/kernels/hip-ref-basis-tensor.h b/include/ceed/jit-source/hip/hip-ref-basis-tensor.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-basis-tensor.h
rename to include/ceed/jit-source/hip/hip-ref-basis-tensor.h
diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h
new file mode 100644
index 0000000000..f5fd171c2d
--- /dev/null
+++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h
@@ -0,0 +1,147 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Diagonal assembly kernels
+//------------------------------------------------------------------------------
+typedef enum {
+  /// Perform no evaluation (either because there is no data or it is already at
+  /// quadrature points)
+  CEED_EVAL_NONE   = 0,
+  /// Interpolate from nodes to quadrature points
+  CEED_EVAL_INTERP = 1,
+  /// Evaluate gradients at quadrature points from input in a nodal basis
+  CEED_EVAL_GRAD   = 2,
+  /// Evaluate divergence at quadrature points from input in a nodal basis
+  CEED_EVAL_DIV    = 4,
+  /// Evaluate curl at quadrature points from input in a nodal basis
+  CEED_EVAL_CURL   = 8,
+  /// Using no input, evaluate quadrature weights on the reference element
+  CEED_EVAL_WEIGHT = 16,
+} CeedEvalMode;
+
+//------------------------------------------------------------------------------
+// Get Basis Emode Pointer
+//------------------------------------------------------------------------------
+extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr,
+    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
+    const CeedScalar *grad) {
+  switch (emode) {
+  case CEED_EVAL_NONE:
+    *basisptr = identity;
+    break;
+  case CEED_EVAL_INTERP:
+    *basisptr = interp;
+    break;
+  case CEED_EVAL_GRAD:
+    *basisptr = grad;
+    break;
+  case CEED_EVAL_WEIGHT:
+  case CEED_EVAL_DIV:
+  case CEED_EVAL_CURL:
+    break; // Caught by QF Assembly
+  }
+}
+
+//------------------------------------------------------------------------------
+// Core code for diagonal assembly
+//------------------------------------------------------------------------------
+__device__ void diagonalCore(const CeedInt nelem,
+    const CeedScalar maxnorm, const bool pointBlock,
+    const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  const int tid = threadIdx.x; // running with P threads, tid is evec node
+  const CeedScalar qfvaluebound = maxnorm*1e-12;
+
+  // Compute the diagonal of B^T D B
+  // Each element
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
+       e += gridDim.x*blockDim.z) {
+    CeedInt dout = -1;
+    // Each basis eval mode pair
+    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
+      const CeedScalar *bt = NULL;
+      if (emodeout[eout] == CEED_EVAL_GRAD)
+        dout += 1;
+      CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout,
+                                      &gradout[dout*NQPTS*NNODES]);
+      CeedInt din = -1;
+      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
+        const CeedScalar *b = NULL;
+        if (emodein[ein] == CEED_EVAL_GRAD)
+          din += 1;
+        CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin,
+                                        &gradin[din*NQPTS*NNODES]);
+        // Each component
+        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
+          // Each qpoint/node pair
+          if (pointBlock) {
+            // Point Block Diagonal
+            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
+              CeedScalar evalue = 0.;
+              for (CeedInt q = 0; q < NQPTS; q++) {
+                const CeedScalar qfvalue =
+                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
+                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
+                if (abs(qfvalue) > qfvaluebound)
+                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+              }
+              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
+            }
+          } else {
+            // Diagonal Only
+            CeedScalar evalue = 0.;
+            for (CeedInt q = 0; q < NQPTS; q++) {
+              const CeedScalar qfvalue =
+                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
+                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
+              if (abs(qfvalue) > qfvaluebound)
+                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+            }
+            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
+          }
+        }
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Linear diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
+// Linear point block diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h
new file mode 100644
index 0000000000..cb75ddc7ea
--- /dev/null
+++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Matrix assembly kernel for low-order elements (2D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int i = threadIdx.x; // The output row index of each B^TDB operation 
+  const int l = threadIdx.y; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+  // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        CeedScalar result = 0.0;
+        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+          CeedInt b_in_index = emode_in * NQPTS * NNODES;
+      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+             CeedInt b_out_index = emode_out * NQPTS * NNODES;
+             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+ 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+            for (CeedInt j = 0; j < NQPTS; j++) {
+     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+	    }
+
+          }// end of emode_out 
+        } // end of emode_in
+        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+   	values_array[val_index] = result;
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
+// Fallback kernel for larger orders (1D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssembleFallback(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int l = threadIdx.x; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+    // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        for (CeedInt i = 0; i < NNODES; i++) {
+          CeedScalar result = 0.0;
+          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+            CeedInt b_in_index = emode_in * NQPTS * NNODES;
+        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+               CeedInt b_out_index = emode_out * NQPTS * NNODES;
+               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+              for (CeedInt j = 0; j < NQPTS; j++) {
+       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+  	    }
+
+            }// end of emode_out 
+          } // end of emode_in
+          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+     	  values_array[val_index] = result;
+        } // end of loop over element node index, i
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
diff --git a/backends/hip-ref/kernels/hip-ref-qfunction.h b/include/ceed/jit-source/hip/hip-ref-qfunction.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-qfunction.h
rename to include/ceed/jit-source/hip/hip-ref-qfunction.h
diff --git a/backends/hip-ref/kernels/hip-ref-restriction.h b/include/ceed/jit-source/hip/hip-ref-restriction.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-restriction.h
rename to include/ceed/jit-source/hip/hip-ref-restriction.h
diff --git a/backends/hip-shared/kernels/hip-shared-basis.h b/include/ceed/jit-source/hip/hip-shared-basis.h
similarity index 100%
rename from backends/hip-shared/kernels/hip-shared-basis.h
rename to include/ceed/jit-source/hip/hip-shared-basis.h
diff --git a/include/ceed/jit-tools.h b/include/ceed/jit-tools.h
index ffd19e6f5d..90d6a7712a 100644
--- a/include/ceed/jit-tools.h
+++ b/include/ceed/jit-tools.h
@@ -19,9 +19,13 @@
 
 #include <ceed/ceed.h>
 
+CEED_EXTERN int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid);
 CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer);
-
 CEED_EXTERN int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
                                     const char *relative_file_path, char **new_file_path);
+CEED_EXTERN int CeedGetJitRelativePath(const char *absolute_file_path,
+                                       const char **relative_file_path);
+CEED_EXTERN int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
+                                       char **absolute_file_path);
 
 #endif
diff --git a/include/ceed/khash.h b/include/ceed/khash.h
index 1e71f6dd2a..3a3dd4d91d 100644
--- a/include/ceed/khash.h
+++ b/include/ceed/khash.h
@@ -82,8 +82,8 @@ int main() {
 	* Added destructor
 */
 
-#ifndef _ceed_khash_h
-#define _ceed_khash_h
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
 
 /*!
   @header
@@ -589,4 +589,4 @@ typedef const char *kh_cstr_t;
 #define KHASH_MAP_INIT_STR(name, khval_t)								\
 	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
 
-#endif /* _ceed_khash_h */
+#endif /* __AC_KHASH_H */
diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c
index fb5b8b0d44..8c8d09879a 100644
--- a/interface/ceed-basis.c
+++ b/interface/ceed-basis.c
@@ -158,12 +158,12 @@ static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
 **/
 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
                           CeedInt n, const CeedScalar *a, FILE *stream) {
-  for (int i=0; i<m; i++) {
+  for (CeedInt i=0; i<m; i++) {
     if (m > 1)
       fprintf(stream, "%12s[%d]:", name, i);
     else
       fprintf(stream, "%12s:", name);
-    for (int j=0; j<n; j++)
+    for (CeedInt j=0; j<n; j++)
       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
     fputs("\n", stream);
   }
@@ -485,11 +485,30 @@ int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
     return CEED_ERROR_SUCCESS;
   }
 
-  if (dim<1)
+  if (dim < 1)
     // LCOV_EXCL_START
     return CeedError(ceed, CEED_ERROR_DIMENSION,
                      "Basis dimension must be a positive value");
   // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (P_1d < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (Q_1d < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE
                           : dim == 2 ? CEED_TOPOLOGY_QUAD
                           : CEED_TOPOLOGY_HEX;
@@ -549,12 +568,30 @@ int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
              *q_weight_1d;
 
-  if (dim<1)
+  if (dim < 1)
     // LCOV_EXCL_START
     return CeedError(ceed, CEED_ERROR_DIMENSION,
                      "Basis dimension must be a positive value");
   // LCOV_EXCL_STOP
 
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (P < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (Q < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   // Get Nodes and Weights
   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
@@ -655,6 +692,24 @@ int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (num_nodes < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (num_qpts < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, basis); CeedChk(ierr);
 
   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
@@ -730,6 +785,24 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (num_nodes < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (num_qpts < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, basis); CeedChk(ierr);
 
   (*basis)->ceed = ceed;
@@ -1293,14 +1366,14 @@ int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
   // Allocate
   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
   // Build q_ref_1d, q_weight_1d
-  for (int i = 0; i <= Q/2; i++) {
+  for (CeedInt i = 0; i <= Q/2; i++) {
     // Guess
     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
     // Pn(xi)
     P0 = 1.0;
     P1 = xi;
     P2 = 0.0;
-    for (int j = 2; j <= Q; j++) {
+    for (CeedInt j = 2; j <= Q; j++) {
       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
       P0 = P1;
       P1 = P2;
@@ -1309,10 +1382,10 @@ int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
     xi = xi-P2/dP2;
     // Newton to convergence
-    for (int k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
+    for (CeedInt k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
       P0 = 1.0;
       P1 = xi;
-      for (int j = 2; j <= Q; j++) {
+      for (CeedInt j = 2; j <= Q; j++) {
         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
         P0 = P1;
         P1 = P2;
@@ -1361,14 +1434,14 @@ int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
   q_ref_1d[0] = -1.0;
   q_ref_1d[Q-1] = 1.0;
   // Interior
-  for (int i = 1; i <= (Q-1)/2; i++) {
+  for (CeedInt i = 1; i <= (Q-1)/2; i++) {
     // Guess
     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
     // Pn(xi)
     P0 = 1.0;
     P1 = xi;
     P2 = 0.0;
-    for (int j = 2; j < Q; j++) {
+    for (CeedInt j = 2; j < Q; j++) {
       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
       P0 = P1;
       P1 = P2;
@@ -1378,10 +1451,10 @@ int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
     xi = xi-dP2/d2P2;
     // Newton to convergence
-    for (int k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
+    for (CeedInt k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
       P0 = 1.0;
       P1 = xi;
-      for (int j = 2; j < Q; j++) {
+      for (CeedInt j = 2; j < Q; j++) {
         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
         P0 = P1;
         P1 = P2;
diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c
index 49d960c444..21d785e472 100644
--- a/interface/ceed-elemrestriction.c
+++ b/interface/ceed-elemrestriction.c
@@ -43,8 +43,8 @@ int CeedPermutePadOffsets(const CeedInt *offsets, CeedInt *blk_offsets,
                           CeedInt num_blk, CeedInt num_elem, CeedInt blk_size,
                           CeedInt elem_size) {
   for (CeedInt e=0; e<num_blk*blk_size; e+=blk_size)
-    for (int j=0; j<blk_size; j++)
-      for (int k=0; k<elem_size; k++)
+    for (CeedInt j=0; j<blk_size; j++)
+      for (CeedInt k=0; k<elem_size; k++)
         blk_offsets[e*elem_size + k*blk_size + j]
           = offsets[CeedIntMin(e+j,num_elem-1)*elem_size + k];
   return CEED_ERROR_SUCCESS;
@@ -77,7 +77,7 @@ int CeedElemRestrictionGetStrides(CeedElemRestriction rstr,
                      "ElemRestriction has no stride data");
   // LCOV_EXCL_STOP
 
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*strides)[i] = rstr->strides[i];
   return CEED_ERROR_SUCCESS;
 }
@@ -205,7 +205,7 @@ int CeedElemRestrictionGetELayout(CeedElemRestriction rstr,
                      "ElemRestriction has no layout data");
   // LCOV_EXCL_STOP
 
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*layout)[i] = rstr->layout[i];
   return CEED_ERROR_SUCCESS;
 }
@@ -227,7 +227,7 @@ int CeedElemRestrictionGetELayout(CeedElemRestriction rstr,
 **/
 int CeedElemRestrictionSetELayout(CeedElemRestriction rstr,
                                   CeedInt layout[3]) {
-  for (int i = 0; i<3; i++)
+  for (CeedInt i = 0; i<3; i++)
     rstr->layout[i] = layout[i];
   return CEED_ERROR_SUCCESS;
 }
@@ -374,6 +374,24 @@ int CeedElemRestrictionCreate(Ceed ceed, CeedInt num_elem, CeedInt elem_size,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (comp_stride < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction component stride must be at least 1");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
   (*rstr)->ceed = ceed;
   ierr = CeedReference(ceed); CeedChk(ierr);
@@ -446,6 +464,24 @@ int CeedElemRestrictionCreateOriented(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (comp_stride < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction component stride must be at least 1");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
   (*rstr)->ceed = ceed;
   ierr = CeedReference(ceed); CeedChk(ierr);
@@ -510,6 +546,18 @@ int CeedElemRestrictionCreateStrided(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
   (*rstr)->ceed = ceed;
   ierr = CeedReference(ceed); CeedChk(ierr);
@@ -522,7 +570,7 @@ int CeedElemRestrictionCreateStrided(Ceed ceed, CeedInt num_elem,
   (*rstr)->blk_size = 1;
   (*rstr)->is_oriented = 0;
   ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr);
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*rstr)->strides[i] = strides[i];
   ierr = ceed->ElemRestrictionCreate(CEED_MEM_HOST, CEED_OWN_POINTER, NULL,
                                      *rstr);
@@ -591,6 +639,30 @@ int CeedElemRestrictionCreateBlocked(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (blk_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Block size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (comp_stride < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction component stride must be at least 1");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
 
   ierr = CeedCalloc(num_blk*blk_size*elem_size, &blk_offsets); CeedChk(ierr);
@@ -662,6 +734,24 @@ int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (blk_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Block size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
 
   (*rstr)->ceed = ceed;
@@ -675,7 +765,7 @@ int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, CeedInt num_elem,
   (*rstr)->blk_size = blk_size;
   (*rstr)->is_oriented = 0;
   ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr);
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*rstr)->strides[i] = strides[i];
   ierr = ceed->ElemRestrictionCreateBlocked(CEED_MEM_HOST, CEED_OWN_POINTER,
          NULL, *rstr); CeedChk(ierr);
diff --git a/interface/ceed-fortran.c b/interface/ceed-fortran.c
index 88f5635be0..2153518784 100644
--- a/interface/ceed-fortran.c
+++ b/interface/ceed-fortran.c
@@ -1074,8 +1074,8 @@ CEED_EXTERN void fCeedCompositeOperatorCreate(int *ceed, int *op, int *err) {
 #define fCeedOperatorSetField \
     FORTRAN_NAME(ceedoperatorsetfield,CEEDOPERATORSETFIELD)
 CEED_EXTERN void fCeedOperatorSetField(int *op, const char *field_name, int *r,
-                                       int *b,
-                                       int *v, int *err, fortran_charlen_t field_name_len) {
+                                       int *b, int *v, int *err,
+                                       fortran_charlen_t field_name_len) {
   FIX_STRING(field_name);
   CeedElemRestriction r_;
   CeedBasis b_;
@@ -1119,7 +1119,16 @@ CEED_EXTERN void fCeedCompositeOperatorAddSub(int *compositeop, int *subop,
   CeedOperator subop_ = CeedOperator_dict[*subop];
 
   *err = CeedCompositeOperatorAddSub(compositeop_, subop_);
-  if (*err) return;
+}
+
+#define fCeedOperatorSetName \
+    FORTRAN_NAME(ceedoperatorsetname, CEEDOPERATORSETNAME)
+CEED_EXTERN void fCeedOperatorSetName(int *op, const char *name, int *err,
+                                      fortran_charlen_t name_len) {
+  FIX_STRING(name);
+  CeedOperator op_ = CeedOperator_dict[*op];
+
+  *err = CeedOperatorSetName(op_, name_c);
 }
 
 #define fCeedOperatorLinearAssembleQFunction \
diff --git a/interface/ceed-jit-source-root-default.c b/interface/ceed-jit-source-root-default.c
new file mode 100644
index 0000000000..26a3348405
--- /dev/null
+++ b/interface/ceed-jit-source-root-default.c
@@ -0,0 +1,12 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed-impl.h>
+
+// This file and definition is used for in-source builds.
+// The definition for installs is in ceed-jit-source-root-install.c.
+const char CeedJitSourceRootDefault[] = CEED_JIT_SOUCE_ROOT_DEFAULT;
diff --git a/interface/ceed-jit-source-root-install.c b/interface/ceed-jit-source-root-install.c
new file mode 100644
index 0000000000..e25679e7e9
--- /dev/null
+++ b/interface/ceed-jit-source-root-install.c
@@ -0,0 +1,12 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed-impl.h>
+
+// This file and definition is used for installs.
+// The definition for in-source is in ceed-jit-source-root-default.c.
+const char CeedJitSourceRootDefault[] = CEED_JIT_SOUCE_ROOT_DEFAULT;
diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index 17a110f849..e994aee245 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -8,17 +8,71 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <ceed/jit-tools.h>
+#include <ceed-impl.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 
+/**
+  @brief Check if valid file exists at path given
+
+  @param ceed                  A Ceed object for error handling
+  @param[in]  source_file_path Absolute path to source file
+  @param[out] is_valid         Boolean flag indicating if file can be opend
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) {
+  int ierr;
+
+  // Sometimes we have path/to/file.h:function_name
+  // Create tempory file path without name, if needed
+  char *source_file_path_only;
+  char *last_colon = strrchr(source_file_path, ':');
+  if (last_colon) {
+    size_t source_file_path_length = (last_colon - source_file_path + 1);
+
+    ierr = CeedCalloc(source_file_path_length, &source_file_path_only);
+    CeedChk(ierr);
+    memcpy(source_file_path_only, source_file_path, source_file_path_length - 1);
+  } else {
+    source_file_path_only = (char *)source_file_path;
+  }
+
+  // Debug
+  CeedDebug256(ceed, 1, "Checking for source file: ");
+  CeedDebug256(ceed, 255, "%s\n", source_file_path_only);
+
+  // Check for valid file path
+  FILE *source_file;
+  source_file = fopen(source_file_path_only, "rb");
+  *is_valid = !!source_file;
+
+  if (*is_valid) {
+    // Debug
+    CeedDebug256(ceed, 1, "Found JiT source file: ");
+    CeedDebug256(ceed, 255, "%s\n", source_file_path_only);
+
+    fclose(source_file);
+  }
+
+  // Free temp file path, if used
+  if (last_colon) {
+    ierr = CeedFree(&source_file_path_only); CeedChk(ierr);
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Load source file into initalized string buffer, including full text
            of local files in place of `#include "local.h"`
 
-  @param ceed                   A Ceed object for error handling
-  @param[in]  source_file_path  Absolute path to source file
-  @param[out] buffer            String buffer for source file contents
+  @param ceed                  A Ceed object for error handling
+  @param[in]  source_file_path Absolute path to source file
+  @param[out] buffer           String buffer for source file contents
 
   @return An error code: 0 - success, otherwise - failure
 
@@ -69,7 +123,7 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
     const char *next_e = strchr(first_hash, 'e');
     char keyword[8] = "";
     if (next_e)
-      strncpy(keyword, &next_e[-6], 7);
+      memcpy(keyword, &next_e[-6], 7);
     bool is_hash_include = !strcmp(keyword, "include");
     // ---- Spaces allowed in '#  include <header.h>'
     if (next_e)
@@ -80,9 +134,9 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
       long current_size = strlen(*buffer);
       long copy_size = first_hash - &temp_buffer[file_offset];
       ierr = CeedRealloc(current_size + copy_size + 2, buffer); CeedChk(ierr);
-      strncpy(&(*buffer)[current_size], "\n", 2);
-      strncpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
-      strncpy(&(*buffer)[current_size + copy_size], "", 1);
+      memcpy(&(*buffer)[current_size], "\n", 2);
+      memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
+      memcpy(&(*buffer)[current_size + copy_size], "", 1);
       // -- Load local "header.h"
       char *next_quote = strchr(first_hash, '"');
       char *next_new_line = strchr(first_hash, '\n');
@@ -95,12 +149,13 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
         long include_file_name_len = strchr(&next_quote[1], '"') - next_quote - 1;
         ierr = CeedCalloc(root_length + include_file_name_len + 2,
                           &include_source_path); CeedChk(ierr);
-        strncpy(include_source_path, source_file_path, root_length + 1);
-        strncpy(&include_source_path[root_length + 1], &next_quote[1],
-                include_file_name_len);
-        strncpy(&include_source_path[root_length + include_file_name_len + 1], "", 1);
+        memcpy(include_source_path, source_file_path, root_length + 1);
+        memcpy(&include_source_path[root_length + 1], &next_quote[1],
+               include_file_name_len);
+        memcpy(&include_source_path[root_length + include_file_name_len + 1], "", 1);
         // ---- Recursive call to load source to buffer
         ierr = CeedLoadSourceToInitalizedBuffer(ceed, include_source_path, buffer);
+        CeedDebug256(ceed, 2, "JiT Including: %s\n", include_source_path);
         CeedChk(ierr);
         ierr = CeedFree(&include_source_path); CeedChk(ierr);
       }
@@ -113,9 +168,9 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
   long current_size = strlen(*buffer);
   long copy_size = strlen(&temp_buffer[file_offset]);
   ierr = CeedRealloc(current_size + copy_size + 2, buffer); CeedChk(ierr);
-  strncpy(&(*buffer)[current_size], "\n", 2);
-  strncpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
-  strncpy(&(*buffer)[current_size + copy_size + 1], "", 1);
+  memcpy(&(*buffer)[current_size], "\n", 2);
+  memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
+  memcpy(&(*buffer)[current_size + copy_size + 1], "", 1);
 
   // Cleanup
   ierr = CeedFree(&temp_buffer); CeedChk(ierr);
@@ -135,9 +190,9 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
            of local files in place of `#include "local.h"`.
          Note: Caller is responsible for freeing the string buffer with `CeedFree()`.
 
-  @param ceed                   A Ceed object for error handling
-  @param[in]  source_file_path  Absolute path to source file
-  @param[out] buffer            String buffer for source file contents
+  @param ceed                  A Ceed object for error handling
+  @param[in]  source_file_path Absolute path to source file
+  @param[out] buffer           String buffer for source file contents
 
   @return An error code: 0 - success, otherwise - failure
 
@@ -185,3 +240,77 @@ int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
 
   return CEED_ERROR_SUCCESS;
 }
+
+/**
+  @brief Find the relative filepath to an installed JiT file
+
+  @param[in]  absolute_file_path Absolute path to installed JiT file
+  @param[out] relative_file_path Relative path to installed JiT file
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedGetJitRelativePath(const char *absolute_file_path,
+                           const char **relative_file_path) {
+  *(relative_file_path) = strstr(absolute_file_path, "ceed/jit-source");
+
+  if (!*relative_file_path)
+    // LCOV_EXCL_START
+    return CeedError(NULL, CEED_ERROR_MAJOR,
+                     "Couldn't find relative path including "
+                     "'ceed/jit-source' for: %s", absolute_file_path);
+  // LCOV_EXCL_STOP
+
+  return CEED_ERROR_SUCCESS;
+}
+
+/**
+  @brief Build an absolute filepath to a JiT file
+
+  @param ceed                    A Ceed object for error handling
+  @param[in]  relative_file_path Relative path to installed JiT file
+  @param[out] absolute_file_path String buffer for absolute path to target file
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
+                           char **absolute_file_path) {
+  int ierr;
+  Ceed ceed_parent;
+
+  // Debug
+  CeedDebug256(ceed, 1, "---------- Ceed JiT ----------\n");
+  CeedDebug256(ceed, 1, "Relative JiT source file: ");
+  CeedDebug256(ceed, 255, "%s\n", relative_file_path);
+
+
+  ierr = CeedGetParent(ceed, &ceed_parent); CeedChk(ierr);
+  for (CeedInt i = 0; i < ceed_parent->num_jit_source_roots; i++) {
+    bool is_valid;
+
+    // Debug
+    CeedDebug256(ceed, 1, "Checking JiT root: ");
+    CeedDebug256(ceed, 255, "%s\n", ceed_parent->jit_source_roots[i]);
+
+    // Build  and check absolute path with current root
+    ierr = CeedPathConcatenate(ceed, ceed_parent->jit_source_roots[i],
+                               relative_file_path, absolute_file_path);
+    CeedChk(ierr);
+    ierr = CeedCheckFilePath(ceed, *absolute_file_path, &is_valid); CeedChk(ierr);
+
+    if (is_valid) {
+      return CEED_ERROR_SUCCESS;
+    } else {
+      ierr = CeedFree(absolute_file_path); CeedChk(ierr);
+    }
+  }
+
+  // LCOV_EXCL_START
+  return CeedError(ceed, CEED_ERROR_MAJOR,
+                   "Couldn't find matching JiT source file: %s",
+                   relative_file_path);
+  // LCOV_EXCL_STOP
+}
diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index d400677d7d..28f3ca5632 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -143,7 +143,7 @@ static int CeedOperatorFieldView(CeedOperatorField field,
   const char *pre = sub ? "  " : "";
   const char *in_out = input ? "Input" : "Output";
 
-  fprintf(stream, "%s    %s Field [%d]:\n"
+  fprintf(stream, "%s    %s field %d:\n"
           "%s      Name: \"%s\"\n",
           pre, in_out, field_number, pre, qf_field->field_name);
 
@@ -172,20 +172,26 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) {
   int ierr;
   const char *pre = sub ? "  " : "";
 
+  CeedInt num_elem, num_qpts;
+  ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChk(ierr);
+  ierr = CeedOperatorGetNumQuadraturePoints(op, &num_qpts); CeedChk(ierr);
+
   CeedInt total_fields = 0;
   ierr = CeedOperatorGetNumArgs(op, &total_fields); CeedChk(ierr);
+  fprintf(stream, "%s  %d elements with %d quadrature points each\n",
+          pre, num_elem, num_qpts);
 
-  fprintf(stream, "%s  %d Field%s\n", pre, total_fields,
+  fprintf(stream, "%s  %d field%s\n", pre, total_fields,
           total_fields>1 ? "s" : "");
 
-  fprintf(stream, "%s  %d Input Field%s:\n", pre, op->qf->num_input_fields,
+  fprintf(stream, "%s  %d input field%s:\n", pre, op->qf->num_input_fields,
           op->qf->num_input_fields>1 ? "s" : "");
   for (CeedInt i=0; i<op->qf->num_input_fields; i++) {
     ierr = CeedOperatorFieldView(op->input_fields[i], op->qf->input_fields[i],
                                  i, sub, 1, stream); CeedChk(ierr);
   }
 
-  fprintf(stream, "%s  %d Output Field%s:\n", pre, op->qf->num_output_fields,
+  fprintf(stream, "%s  %d output field%s:\n", pre, op->qf->num_output_fields,
           op->qf->num_output_fields>1 ? "s" : "");
   for (CeedInt i=0; i<op->qf->num_output_fields; i++) {
     ierr = CeedOperatorFieldView(op->output_fields[i], op->qf->output_fields[i],
@@ -207,7 +213,7 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) {
 int CeedOperatorGetActiveBasis(CeedOperator op, CeedBasis *active_basis) {
   *active_basis = NULL;
   if (op->is_composite) return CEED_ERROR_SUCCESS;
-  for (int i = 0; i < op->qf->num_input_fields; i++)
+  for (CeedInt i = 0; i < op->qf->num_input_fields; i++)
     if (op->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       *active_basis = op->input_fields[i]->basis;
       break;
@@ -239,7 +245,7 @@ int CeedOperatorGetActiveElemRestriction(CeedOperator op,
     CeedElemRestriction *active_rstr) {
   *active_rstr = NULL;
   if (op->is_composite) return CEED_ERROR_SUCCESS;
-  for (int i = 0; i < op->qf->num_input_fields; i++)
+  for (CeedInt i = 0; i < op->qf->num_input_fields; i++)
     if (op->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       *active_rstr = op->input_fields[i]->elem_restr;
       break;
@@ -653,7 +659,8 @@ int CeedOperatorReferenceCopy(CeedOperator op, CeedOperator *op_copy) {
 
   Active fields must be specified using this function, but their data (in a
   CeedVector) is passed in CeedOperatorApply().  There can be at most one active
-  input and at most one active output.
+  input CeedVector and at most one active output CeedVector passed to
+  CeedOperatorApply().
 
   @param op          CeedOperator on which to provide the field
   @param field_name  Name of the field (to be matched with the name used by
@@ -1163,6 +1170,31 @@ int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set name of CeedOperator for CeedOperatorView output
+
+  @param op    CeedOperator
+  @param name  Name to set, or NULL to remove previously set name
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedOperatorSetName(CeedOperator op, const char *name) {
+  int ierr;
+  char *name_copy;
+  size_t name_len = name ? strlen(name) : 0;
+
+  ierr = CeedFree(&op->name); CeedChk(ierr);
+  if (name_len > 0) {
+    ierr = CeedCalloc(name_len + 1, &name_copy); CeedChk(ierr);
+    memcpy(name_copy, name, name_len);
+    op->name = name_copy;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief View a CeedOperator
 
@@ -1175,17 +1207,23 @@ int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts) {
 **/
 int CeedOperatorView(CeedOperator op, FILE *stream) {
   int ierr;
+  bool has_name = op->name;
 
   if (op->is_composite) {
-    fprintf(stream, "Composite CeedOperator\n");
+    fprintf(stream, "Composite CeedOperator%s%s\n",
+            has_name ? " - " : "", has_name ? op->name : "");
 
     for (CeedInt i=0; i<op->num_suboperators; i++) {
-      fprintf(stream, "  SubOperator [%d]:\n", i);
+      has_name = op->sub_operators[i]->name;
+      fprintf(stream, "  SubOperator %d%s%s:\n", i,
+              has_name ? " - " : "",
+              has_name ? op->sub_operators[i]->name : "");
       ierr = CeedOperatorSingleView(op->sub_operators[i], 1, stream);
       CeedChk(ierr);
     }
   } else {
-    fprintf(stream, "CeedOperator\n");
+    fprintf(stream, "CeedOperator%s%s\n",
+            has_name ? " - " : "", has_name ? op->name : "");
     ierr = CeedOperatorSingleView(op, 0, stream); CeedChk(ierr);
   }
   return CEED_ERROR_SUCCESS;
@@ -1655,12 +1693,7 @@ int CeedOperatorDestroy(CeedOperator *op) {
   ierr = CeedFree(&(*op)->context_labels); CeedChk(ierr);
 
   // Destroy fallback
-  if ((*op)->op_fallback) {
-    ierr = (*op)->qf_fallback->Destroy((*op)->qf_fallback); CeedChk(ierr);
-    ierr = CeedFree(&(*op)->qf_fallback); CeedChk(ierr);
-    ierr = (*op)->op_fallback->Destroy((*op)->op_fallback); CeedChk(ierr);
-    ierr = CeedFree(&(*op)->op_fallback); CeedChk(ierr);
-  }
+  ierr = CeedOperatorDestroy(&(*op)->op_fallback); CeedChk(ierr);
 
   // Destroy QF assembly cache
   ierr = CeedQFunctionAssemblyDataDestroy(&(*op)->qf_assembled); CeedChk(ierr);
@@ -1668,6 +1701,7 @@ int CeedOperatorDestroy(CeedOperator *op) {
   ierr = CeedFree(&(*op)->input_fields); CeedChk(ierr);
   ierr = CeedFree(&(*op)->output_fields); CeedChk(ierr);
   ierr = CeedFree(&(*op)->sub_operators); CeedChk(ierr);
+  ierr = CeedFree(&(*op)->name); CeedChk(ierr);
   ierr = CeedFree(op); CeedChk(ierr);
   return CEED_ERROR_SUCCESS;
 }
diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 46235e787f..6789a90aee 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -34,51 +34,49 @@
 **/
 int CeedOperatorCreateFallback(CeedOperator op) {
   int ierr;
+  Ceed fallback_ceed;
 
-  // Fallback Ceed
-  const char *resource, *fallback_resource;
-  ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
-  ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
-  CeedChk(ierr);
-  if (!strcmp(resource, fallback_resource))
-    // LCOV_EXCL_START
-    return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
-                     "Backend %s cannot create an operator"
-                     "fallback to resource %s", resource, fallback_resource);
-  // LCOV_EXCL_STOP
+  // Check not already created
+  if (op->op_fallback) return CEED_ERROR_SUCCESS;
 
   // Fallback Ceed
-  Ceed ceed_ref;
-  if (!op->ceed->op_fallback_ceed) {
-    ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
-    ceed_ref->op_fallback_parent = op->ceed;
-    ceed_ref->Error = op->ceed->Error;
-    op->ceed->op_fallback_ceed = ceed_ref;
-  }
-  ceed_ref = op->ceed->op_fallback_ceed;
+  ierr = CeedGetOperatorFallbackCeed(op->ceed, &fallback_ceed); CeedChk(ierr);
 
   // Clone Op
-  CeedOperator op_ref;
-  ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
-  memcpy(op_ref, op, sizeof(*op_ref));
-  op_ref->data = NULL;
-  op_ref->is_interface_setup = false;
-  op_ref->is_backend_setup = false;
-  op_ref->ceed = ceed_ref;
-  ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
-  ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
-         &op_ref->qf_assembled); CeedChk(ierr);
-  op->op_fallback = op_ref;
-
-  // Clone QF
-  CeedQFunction qf_ref;
-  ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
-  memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
-  qf_ref->data = NULL;
-  qf_ref->ceed = ceed_ref;
-  ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
-  op_ref->qf = qf_ref;
-  op->qf_fallback = qf_ref;
+  CeedOperator op_fallback;
+  if (op->is_composite) {
+    ierr = CeedCompositeOperatorCreate(fallback_ceed, &op_fallback);
+    CeedChk(ierr);
+    for (CeedInt i = 0; i < op->num_suboperators; i++) {
+      ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]);
+      CeedChk(ierr);
+    }
+  } else {
+    ierr = CeedOperatorCreate(fallback_ceed, op->qf, op->dqf, op->dqfT,
+                              &op_fallback); CeedChk(ierr);
+    for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
+      ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
+                                  op->input_fields[i]->elem_restr,
+                                  op->input_fields[i]->basis,
+                                  op->input_fields[i]->vec); CeedChk(ierr);
+    }
+    for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
+      ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name,
+                                  op->output_fields[i]->elem_restr,
+                                  op->output_fields[i]->basis,
+                                  op->output_fields[i]->vec); CeedChk(ierr);
+    }
+    ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
+           &op_fallback->qf_assembled); CeedChk(ierr);
+    if (op_fallback->num_qpts == 0) {
+      ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts);
+      CeedChk(ierr);
+    }
+  }
+  ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr);
+  ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr);
+  op->op_fallback = op_fallback;
+
   return CEED_ERROR_SUCCESS;
 }
 
@@ -485,11 +483,11 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
 
   // Determine i, j locations for element matrices
   CeedInt count = 0;
-  for (int e = 0; e < num_elem; ++e) {
-    for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
-      for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
-        for (int i = 0; i < elem_size; ++i) {
-          for (int j = 0; j < elem_size; ++j) {
+  for (CeedInt e = 0; e < num_elem; ++e) {
+    for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
+      for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
+        for (CeedInt i = 0; i < elem_size; ++i) {
+          for (CeedInt j = 0; j < elem_size; ++j) {
             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
                                                (comp_out)*layout_er[1] + e*layout_er[2];
             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
@@ -670,31 +668,32 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
                                      num_qpts]; // logically 3-tensor
   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
   CeedScalar elem_mat[elem_size * elem_size];
-  int count = 0;
+  CeedInt count = 0;
   CeedScalar *vals;
   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
-  for (int e = 0; e < num_elem; ++e) {
-    for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
-      for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
-        for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
+  for (CeedInt e = 0; e < num_elem; ++e) {
+    for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
+      for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
+        for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
           B_mat_in[ell] = 0.0;
         }
-        for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
+        for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
           B_mat_out[ell] = 0.0;
         }
         // Store block-diagonal D matrix as collection of small dense blocks
-        for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
+        for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts;
+             ++ell) {
           D_mat[ell] = 0.0;
         }
         // form element matrix itself (for each block component)
-        for (int ell = 0; ell < elem_size*elem_size; ++ell) {
+        for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) {
           elem_mat[ell] = 0.0;
         }
-        for (int q = 0; q < num_qpts; ++q) {
-          for (int n = 0; n < elem_size; ++n) {
+        for (CeedInt q = 0; q < num_qpts; ++q) {
+          for (CeedInt n = 0; n < elem_size; ++n) {
             CeedInt d_in = -1;
-            for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
-              const int qq = num_eval_mode_in*q;
+            for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) {
+              const CeedInt qq = num_eval_mode_in*q;
               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
@@ -708,8 +707,8 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
               }
             }
             CeedInt d_out = -1;
-            for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
-              const int qq = num_eval_mode_out*q;
+            for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) {
+              const CeedInt qq = num_eval_mode_out*q;
               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
@@ -723,25 +722,26 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
               }
             }
           }
-          for (int ei = 0; ei < num_eval_mode_out; ++ei) {
-            for (int ej = 0; ej < num_eval_mode_in; ++ej) {
-              const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
-                                          +comp_out;
-              const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
-                                e*layout_qf[2];
+          for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) {
+            for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) {
+              const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)
+                                              *num_comp
+                                              +comp_out;
+              const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
+                                    e*layout_qf[2];
               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
             }
           }
         }
         // Compute B^T*D
-        for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
+        for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
           BTD[ell] = 0.0;
         }
-        for (int j = 0; j<elem_size; ++j) {
-          for (int q = 0; q<num_qpts; ++q) {
-            int qq = num_eval_mode_out*q;
-            for (int ei = 0; ei < num_eval_mode_in; ++ei) {
-              for (int ej = 0; ej < num_eval_mode_out; ++ej) {
+        for (CeedInt j = 0; j<elem_size; ++j) {
+          for (CeedInt q = 0; q<num_qpts; ++q) {
+            const CeedInt qq = num_eval_mode_out*q;
+            for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) {
+              for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) {
                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
               }
@@ -753,8 +753,8 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
 
         // put element matrix in coordinate data structure
-        for (int i = 0; i < elem_size; ++i) {
-          for (int j = 0; j < elem_size; ++j) {
+        for (CeedInt i = 0; i < elem_size; ++i) {
+          for (CeedInt j = 0; j < elem_size; ++j) {
             vals[offset + count] = elem_mat[i*elem_size + j];
             count++;
           }
@@ -846,7 +846,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
                             op_coarse); CeedChk(ierr);
   CeedElemRestriction rstr_fine = NULL;
   // -- Clone input fields
-  for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
+  for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       rstr_fine = op_fine->input_fields[i]->elem_restr;
       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
@@ -860,7 +860,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
     }
   }
   // -- Clone output fields
-  for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
+  for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
@@ -959,11 +959,33 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
   CeedChk(ierr);
 
+  // Clone name
+  bool has_name = op_fine->name;
+  size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
+  ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr);
+  {
+    char *prolongation_name;
+    ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr);
+    sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "",
+            op_fine->name);
+    ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr);
+    ierr = CeedFree(&prolongation_name); CeedChk(ierr);
+  }
+  {
+    char *restriction_name;
+    ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr);
+    sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "",
+            op_fine->name);
+    ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr);
+    ierr = CeedFree(&restriction_name); CeedChk(ierr);
+  }
+
   // Cleanup
   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
+
   return CEED_ERROR_SUCCESS;
 }
 
@@ -1685,7 +1707,7 @@ int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
   if (is_composite) {
     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
-    for (int k = 0; k < num_suboperators; ++k) {
+    for (CeedInt k = 0; k < num_suboperators; ++k) {
       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
              &single_entries); CeedChk(ierr);
       *num_entries += single_entries;
@@ -1703,7 +1725,7 @@ int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
   if (is_composite) {
     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
-    for (int k = 0; k < num_suboperators; ++k) {
+    for (CeedInt k = 0; k < num_suboperators; ++k) {
       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
              *cols); CeedChk(ierr);
       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
@@ -1775,7 +1797,7 @@ int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
   if (is_composite) {
     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
-    for (int k = 0; k < num_suboperators; ++k) {
+    for (CeedInt k = 0; k < num_suboperators; ++k) {
       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
       CeedChk(ierr);
       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
@@ -1856,14 +1878,16 @@ int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
+  const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL;
   if (is_tensor_f) {
-    memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
-    memcpy(interp_c, basis_coarse->interp_1d,
-           Q*P_c*sizeof basis_coarse->interp_1d[0]);
+    ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr);
+    ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr);
   } else {
-    memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
-    memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
+    ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr);
+    ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr);
   }
+  memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]);
+  memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]);
 
   // -- QR Factorization, interp_f = Q R
   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 8471d0a79f..d94f76af46 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -67,6 +67,8 @@ static size_t num_qfunctions;
 int CeedQFunctionRegister(const char *name, const char *source,
                           CeedInt vec_length, CeedQFunctionUser f,
                           int (*init)(Ceed, const char *, CeedQFunction)) {
+  int ierr;
+
   if (num_qfunctions >= sizeof(gallery_qfunctions) / sizeof(
         gallery_qfunctions[0]))
     // LCOV_EXCL_START
@@ -75,9 +77,12 @@ int CeedQFunctionRegister(const char *name, const char *source,
 
   CeedDebugEnv("Gallery Register: %s", name);
 
+  const char *relative_file_path;
+  ierr = CeedGetJitRelativePath(source, &relative_file_path); CeedChk(ierr);
+
   strncpy(gallery_qfunctions[num_qfunctions].name, name, CEED_MAX_RESOURCE_LEN);
   gallery_qfunctions[num_qfunctions].name[CEED_MAX_RESOURCE_LEN-1] = 0;
-  strncpy(gallery_qfunctions[num_qfunctions].source, source,
+  strncpy(gallery_qfunctions[num_qfunctions].source, relative_file_path,
           CEED_MAX_RESOURCE_LEN);
   gallery_qfunctions[num_qfunctions].source[CEED_MAX_RESOURCE_LEN-1] = 0;
   gallery_qfunctions[num_qfunctions].vec_length = vec_length;
@@ -138,7 +143,7 @@ static int CeedQFunctionFieldView(CeedQFunctionField field,
   ierr = CeedQFunctionFieldGetSize(field, &size); CeedChk(ierr);
   CeedEvalMode eval_mode;
   ierr = CeedQFunctionFieldGetEvalMode(field, &eval_mode); CeedChk(ierr);
-  fprintf(stream, "    %s Field [%d]:\n"
+  fprintf(stream, "    %s field %d:\n"
           "      Name: \"%s\"\n"
           "      Size: %d\n"
           "      EvalMode: \"%s\"\n",
@@ -598,16 +603,30 @@ int CeedQFunctionCreateInterior(Ceed ceed, CeedInt vec_length,
   (*qf)->function = f;
   (*qf)->user_flop_estimate = -1;
   if (strlen(source)) {
-    const char *kernel_name = strrchr(source, ':') + 1;
+    bool is_absolute_path;
+    char *absolute_path;
+
+    ierr = CeedCheckFilePath(ceed, source, &is_absolute_path); CeedChk(ierr);
+    if (is_absolute_path) {
+      absolute_path = (char *)source;
+    } else {
+      ierr = CeedGetJitAbsolutePath(ceed, source, &absolute_path); CeedChk(ierr);
+    }
+
+    const char *kernel_name = strrchr(absolute_path, ':') + 1;
     size_t kernel_name_len = strlen(kernel_name);
     ierr = CeedCalloc(kernel_name_len + 1, &kernel_name_copy); CeedChk(ierr);
-    strncpy(kernel_name_copy, kernel_name, kernel_name_len);
+    memcpy(kernel_name_copy, kernel_name, kernel_name_len);
     (*qf)->kernel_name = kernel_name_copy;
 
-    size_t source_len = strlen(source) - kernel_name_len - 1;
+    size_t source_len = strlen(absolute_path) - kernel_name_len - 1;
     ierr = CeedCalloc(source_len + 1, &source_copy); CeedChk(ierr);
-    strncpy(source_copy, source, source_len);
+    memcpy(source_copy, absolute_path, source_len);
     (*qf)->source_path = source_copy;
+
+    if (!is_absolute_path) {
+      ierr = CeedFree(&absolute_path); CeedChk(ierr);
+    }
   }
   ierr = CeedCalloc(CEED_FIELD_MAX, &(*qf)->input_fields); CeedChk(ierr);
   ierr = CeedCalloc(CEED_FIELD_MAX, &(*qf)->output_fields); CeedChk(ierr);
@@ -949,18 +968,18 @@ int CeedQFunctionSetUserFlopsEstimate(CeedQFunction qf, CeedSize flops) {
 int CeedQFunctionView(CeedQFunction qf, FILE *stream) {
   int ierr;
 
-  fprintf(stream, "%sCeedQFunction %s\n",
+  fprintf(stream, "%sCeedQFunction - %s\n",
           qf->is_gallery ? "Gallery " : "User ",
           qf->is_gallery ? qf->gallery_name : qf->kernel_name);
 
-  fprintf(stream, "  %d Input Field%s:\n", qf->num_input_fields,
+  fprintf(stream, "  %d input field%s:\n", qf->num_input_fields,
           qf->num_input_fields>1 ? "s" : "");
   for (CeedInt i=0; i<qf->num_input_fields; i++) {
     ierr = CeedQFunctionFieldView(qf->input_fields[i], i, 1, stream);
     CeedChk(ierr);
   }
 
-  fprintf(stream, "  %d Output Field%s:\n", qf->num_output_fields,
+  fprintf(stream, "  %d output field%s:\n", qf->num_output_fields,
           qf->num_output_fields>1 ? "s" : "");
   for (CeedInt i=0; i<qf->num_output_fields; i++) {
     ierr = CeedQFunctionFieldView(qf->output_fields[i], i, 0, stream);
@@ -1036,11 +1055,11 @@ int CeedQFunctionDestroy(CeedQFunction *qf) {
     ierr = (*qf)->Destroy(*qf); CeedChk(ierr);
   }
   // Free fields
-  for (int i=0; i<(*qf)->num_input_fields; i++) {
+  for (CeedInt i=0; i<(*qf)->num_input_fields; i++) {
     ierr = CeedFree(&(*(*qf)->input_fields[i]).field_name); CeedChk(ierr);
     ierr = CeedFree(&(*qf)->input_fields[i]); CeedChk(ierr);
   }
-  for (int i=0; i<(*qf)->num_output_fields; i++) {
+  for (CeedInt i=0; i<(*qf)->num_output_fields; i++) {
     ierr = CeedFree(&(*(*qf)->output_fields[i]).field_name); CeedChk(ierr);
     ierr = CeedFree(&(*qf)->output_fields[i]); CeedChk(ierr);
   }
diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c
index 9e51406e6f..8967953a6b 100644
--- a/interface/ceed-qfunctioncontext.c
+++ b/interface/ceed-qfunctioncontext.c
@@ -221,6 +221,36 @@ int CeedQFunctionContextSetBackendData(CeedQFunctionContext ctx, void *data) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Get label for a registered QFunctionContext field, or `NULL` if no
+           field has been registered with this `field_name`
+
+  @param[in] ctx           CeedQFunctionContext
+  @param[in] field_name    Name of field to retrieve label
+  @param[out] field_label  Variable to field label
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
+                                      const char *field_name,
+                                      CeedContextFieldLabel *field_label) {
+  int ierr;
+
+  CeedInt field_index;
+  ierr = CeedQFunctionContextGetFieldIndex(ctx, field_name, &field_index);
+  CeedChk(ierr);
+
+  if (field_index != -1) {
+    *field_label = ctx->field_labels[field_index];
+  } else {
+    *field_label = NULL;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Set QFunctionContext field
 
@@ -231,7 +261,7 @@ int CeedQFunctionContextSetBackendData(CeedQFunctionContext ctx, void *data) {
 
   @return An error code: 0 - success, otherwise - failure
 
-  @ref User
+  @ref Backend
 **/
 int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
                                    CeedContextFieldLabel field_label,
@@ -257,6 +287,62 @@ int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set QFunctionContext field holding a double precision value
+
+  @param ctx         CeedQFunctionContext
+  @param field_label Label for field to register
+  @param values      Values to set
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
+                                  CeedContextFieldLabel field_label, double *values) {
+  int ierr;
+
+  if (!field_label)
+    // LCOV_EXCL_START
+    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
+                     "Invalid field label");
+  // LCOV_EXCL_STOP
+
+  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
+                                        CEED_CONTEXT_FIELD_DOUBLE,
+                                        values); CeedChk(ierr);
+
+  return CEED_ERROR_SUCCESS;
+}
+
+/**
+  @brief Set QFunctionContext field holding an int32 value
+
+  @param ctx         CeedQFunctionContext
+  @param field_label Label for field to register
+  @param values      Values to set
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
+                                 CeedContextFieldLabel field_label, int *values) {
+  int ierr;
+
+  if (!field_label)
+    // LCOV_EXCL_START
+    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
+                     "Invalid field label");
+  // LCOV_EXCL_STOP
+
+  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
+                                        CEED_CONTEXT_FIELD_INT32,
+                                        values); CeedChk(ierr);
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Increment the reference counter for a CeedQFunctionContext
 
@@ -653,36 +739,6 @@ int CeedQFunctionContextGetAllFieldLabels(CeedQFunctionContext ctx,
   return CEED_ERROR_SUCCESS;
 }
 
-/**
-  @brief Get label for a registered QFunctionContext field, or `NULL` if no
-           field has been registered with this `field_name`
-
-  @param[in] ctx           CeedQFunctionContext
-  @param[in] field_name    Name of field to retrieve label
-  @param[out] field_label  Variable to field label
-
-  @return An error code: 0 - success, otherwise - failure
-
-  @ref User
-**/
-int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
-                                      const char *field_name,
-                                      CeedContextFieldLabel *field_label) {
-  int ierr;
-
-  CeedInt field_index;
-  ierr = CeedQFunctionContextGetFieldIndex(ctx, field_name, &field_index);
-  CeedChk(ierr);
-
-  if (field_index != -1) {
-    *field_label = ctx->field_labels[field_index];
-  } else {
-    *field_label = NULL;
-  }
-
-  return CEED_ERROR_SUCCESS;
-}
-
 /**
   @brief Get the descriptive information about a CeedContextFieldLabel
 
@@ -708,62 +764,6 @@ int CeedContextFieldLabelGetDescription(CeedContextFieldLabel label,
   return CEED_ERROR_SUCCESS;
 }
 
-/**
-  @brief Set QFunctionContext field holding a double precision value
-
-  @param ctx         CeedQFunctionContext
-  @param field_label Label for field to register
-  @param values      Values to set
-
-  @return An error code: 0 - success, otherwise - failure
-
-  @ref User
-**/
-int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
-                                  CeedContextFieldLabel field_label, double *values) {
-  int ierr;
-
-  if (!field_label)
-    // LCOV_EXCL_START
-    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
-                     "Invalid field label");
-  // LCOV_EXCL_STOP
-
-  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
-                                        CEED_CONTEXT_FIELD_DOUBLE,
-                                        values); CeedChk(ierr);
-
-  return CEED_ERROR_SUCCESS;
-}
-
-/**
-  @brief Set QFunctionContext field holding an int32 value
-
-  @param ctx         CeedQFunctionContext
-  @param field_label Label for field to register
-  @param values      Values to set
-
-  @return An error code: 0 - success, otherwise - failure
-
-  @ref User
-**/
-int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
-                                 CeedContextFieldLabel field_label, int *values) {
-  int ierr;
-
-  if (!field_label)
-    // LCOV_EXCL_START
-    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
-                     "Invalid field label");
-  // LCOV_EXCL_STOP
-
-  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
-                                        CEED_CONTEXT_FIELD_INT32,
-                                        values); CeedChk(ierr);
-
-  return CEED_ERROR_SUCCESS;
-}
-
 /**
   @brief Get data size for a Context
 
@@ -804,6 +804,30 @@ int CeedQFunctionContextView(CeedQFunctionContext ctx, FILE *stream) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set additional destroy routine for CeedQFunctionContext user data
+
+  @param ctx        CeedQFunctionContext to set user destroy function
+  @param f_mem_type Memory type to use when passing data into `f`
+  @param f          Additional routine to use to destroy user data
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+
+int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx,
+                                       CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f) {
+  if (!f)
+    // LCOV_EXCL_START
+    return CeedError(ctx->ceed, 1,
+                     "Must provide valid callback function for destroying user data");
+  // LCOV_EXCL_STOP
+  ctx->data_destroy_mem_type = f_mem_type;
+  ctx->data_destroy_function = f;
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Destroy a CeedQFunctionContext
 
@@ -826,6 +850,14 @@ int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx) {
                      "lock is in use");
   // LCOV_EXCL_STOP
 
+  if ((*ctx)->data_destroy_function) {
+    void *data;
+
+    ierr = CeedQFunctionContextGetData(*ctx, (*ctx)->data_destroy_mem_type, &data);
+    CeedChk(ierr);
+    ierr = (*ctx)->data_destroy_function(data); CeedChk(ierr);
+    ierr = CeedQFunctionContextRestoreData(*ctx, &data); CeedChk(ierr);
+  }
   if ((*ctx)->Destroy) {
     ierr = (*ctx)->Destroy(*ctx); CeedChk(ierr);
   }
diff --git a/interface/ceed-vector.c b/interface/ceed-vector.c
index 2896fe97ef..c79e784bd6 100644
--- a/interface/ceed-vector.c
+++ b/interface/ceed-vector.c
@@ -375,7 +375,7 @@ int CeedVectorSetValue(CeedVector vec, CeedScalar value) {
   } else {
     CeedScalar *array;
     ierr = CeedVectorGetArrayWrite(vec, CEED_MEM_HOST, &array); CeedChk(ierr);
-    for (int i=0; i<vec->length; i++) array[i] = value;
+    for (CeedInt i=0; i<vec->length; i++) array[i] = value;
     ierr = CeedVectorRestoreArray(vec, &array); CeedChk(ierr);
   }
   vec->state += 2;
@@ -1142,17 +1142,17 @@ int CeedVectorNorm(CeedVector vec, CeedNormType norm_type, CeedScalar *norm) {
   *norm = 0.;
   switch (norm_type) {
   case CEED_NORM_1:
-    for (int i=0; i<vec->length; i++) {
+    for (CeedInt i=0; i<vec->length; i++) {
       *norm += fabs(array[i]);
     }
     break;
   case CEED_NORM_2:
-    for (int i=0; i<vec->length; i++) {
+    for (CeedInt i=0; i<vec->length; i++) {
       *norm += fabs(array[i])*fabs(array[i]);
     }
     break;
   case CEED_NORM_MAX:
-    for (int i=0; i<vec->length; i++) {
+    for (CeedInt i=0; i<vec->length; i++) {
       const CeedScalar abs_v_i = fabs(array[i]);
       *norm = *norm > abs_v_i ? *norm : abs_v_i;
     }
diff --git a/interface/ceed.c b/interface/ceed.c
index f4869fed8e..0391504671 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -302,7 +302,7 @@ int CeedStringAllocCopy(const char *source, char **copy) {
   int ierr;
   size_t len = strlen(source);
   ierr = CeedCalloc(len + 1, copy); CeedChk(ierr);
-  memcpy(*copy, source, len + 1);
+  memcpy(*copy, source, len);
   return CEED_ERROR_SUCCESS;
 }
 
@@ -491,6 +491,45 @@ int CeedGetOperatorFallbackResource(Ceed ceed, const char **resource) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Get the fallback Ceed for CeedOperators
+
+  @param ceed                Ceed context
+  @param[out] fallback_ceed  Variable to store fallback Ceed
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+
+int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed) {
+  int ierr;
+
+  // Create fallback Ceed if uninitalized
+  if (!ceed->op_fallback_ceed) {
+    // Check resource
+    const char *resource, *fallback_resource;
+    ierr = CeedGetResource(ceed, &resource); CeedChk(ierr);
+    ierr = CeedGetOperatorFallbackResource(ceed, &fallback_resource); CeedChk(ierr);
+    if (!strcmp(resource, fallback_resource))
+      // LCOV_EXCL_START
+      return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
+                       "Backend %s cannot create an operator"
+                       "fallback to resource %s", resource, fallback_resource);
+    // LCOV_EXCL_STOP
+
+    // Create fallback
+    Ceed fallback_ceed;
+    ierr = CeedInit(fallback_resource, &fallback_ceed); CeedChk(ierr);
+    fallback_ceed->op_fallback_parent = ceed;
+    fallback_ceed->Error = ceed->Error;
+    ceed->op_fallback_ceed = fallback_ceed;
+  }
+  *fallback_ceed = ceed->op_fallback_ceed;
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Set the fallback resource for CeedOperators. The current resource, if
            any, is freed by calling this function. This string is freed upon the
@@ -807,6 +846,7 @@ int CeedInit(const char *resource, Ceed *ceed) {
 
   // Setup Ceed
   ierr = CeedCalloc(1, ceed); CeedChk(ierr);
+  ierr = CeedCalloc(1, &(*ceed)->jit_source_roots); CeedChk(ierr);
   const char *ceed_error_handler = getenv("CEED_ERROR_HANDLER");
   if (!ceed_error_handler)
     ceed_error_handler = "abort";
@@ -913,6 +953,13 @@ int CeedInit(const char *resource, Ceed *ceed) {
   ierr = CeedStringAllocCopy(backends[match_index].prefix,
                              (char **)&(*ceed)->resource);
   CeedChk(ierr);
+
+  // Set default JiT source root
+  // Note: there will always be the default root for every Ceed
+  // but all additional paths are added to the top-most parent
+  ierr = CeedAddJitSourceRoot(*ceed, (char *)CeedJitSourceRootDefault);
+  CeedChk(ierr);
+
   return CEED_ERROR_SUCCESS;
 }
 
@@ -998,6 +1045,33 @@ int CeedIsDeterministic(Ceed ceed, bool *is_deterministic) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set additional JiT source root for Ceed
+
+  @param[in] ceed            Ceed
+  @param[in] jit_source_root Absolute path to additional JiT source directory
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root) {
+  int ierr;
+  Ceed ceed_parent;
+
+  ierr = CeedGetParent(ceed, &ceed_parent); CeedChk(ierr);
+
+  CeedInt index = ceed_parent->num_jit_source_roots;
+  size_t path_length = strlen(jit_source_root);
+  ierr = CeedRealloc(index + 1, &ceed_parent->jit_source_roots); CeedChk(ierr);
+  ierr = CeedCalloc(path_length + 1, &ceed_parent->jit_source_roots[index]);
+  CeedChk(ierr);
+  memcpy(ceed_parent->jit_source_roots[index], jit_source_root, path_length);
+  ceed_parent->num_jit_source_roots++;
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief View a Ceed
 
@@ -1038,7 +1112,7 @@ int CeedDestroy(Ceed *ceed) {
   }
 
   if ((*ceed)->obj_delegate_count > 0) {
-    for (int i=0; i<(*ceed)->obj_delegate_count; i++) {
+    for (CeedInt i = 0; i < (*ceed)->obj_delegate_count; i++) {
       ierr = CeedDestroy(&((*ceed)->obj_delegates[i].delegate)); CeedChk(ierr);
       ierr = CeedFree(&(*ceed)->obj_delegates[i].obj_name); CeedChk(ierr);
     }
@@ -1049,6 +1123,11 @@ int CeedDestroy(Ceed *ceed) {
     ierr = (*ceed)->Destroy(*ceed); CeedChk(ierr);
   }
 
+  for (CeedInt i = 0; i < (*ceed)->num_jit_source_roots; i++) {
+    ierr = CeedFree(&(*ceed)->jit_source_roots[i]); CeedChk(ierr);
+  }
+  ierr = CeedFree(&(*ceed)->jit_source_roots); CeedChk(ierr);
+
   ierr = CeedFree(&(*ceed)->f_offsets); CeedChk(ierr);
   ierr = CeedFree(&(*ceed)->resource); CeedChk(ierr);
   ierr = CeedDestroy(&(*ceed)->op_fallback_ceed); CeedChk(ierr);
@@ -1194,7 +1273,7 @@ int CeedErrorExit(Ceed ceed, const char *filename, int line_no,
 int CeedSetErrorHandler(Ceed ceed, CeedErrorHandler handler) {
   ceed->Error = handler;
   if (ceed->delegate) CeedSetErrorHandler(ceed->delegate, handler);
-  for (int i=0; i<ceed->obj_delegate_count; i++)
+  for (CeedInt i=0; i<ceed->obj_delegate_count; i++)
     CeedSetErrorHandler(ceed->obj_delegates[i].delegate, handler);
   return CEED_ERROR_SUCCESS;
 }
diff --git a/julia/LibCEED.jl/test/rundevtests.jl b/julia/LibCEED.jl/test/rundevtests.jl
index 75d0f78410..e80f9ba9dc 100644
--- a/julia/LibCEED.jl/test/rundevtests.jl
+++ b/julia/LibCEED.jl/test/rundevtests.jl
@@ -1,3 +1,59 @@
 using Test, LibCEED, LinearAlgebra, StaticArrays
 
-@testset "LibCEED Development Tests" begin end
+@testset "LibCEED Development Tests" begin
+    @testset "QFunction" begin
+        c = Ceed()
+        @test showstr(create_interior_qfunction(c, "Poisson3DApply")) == """
+             Gallery CeedQFunction - Poisson3DApply
+               2 input fields:
+                 Input field 0:
+                   Name: "du"
+                   Size: 3
+                   EvalMode: "gradient"
+                 Input field 1:
+                   Name: "qdata"
+                   Size: 6
+                   EvalMode: "none"
+               1 output field:
+                 Output field 0:
+                   Name: "dv"
+                   Size: 3
+                   EvalMode: "gradient\""""
+    end
+
+    @testset "Operator" begin
+        c = Ceed()
+        @interior_qf id = (
+            c,
+            (input, :in, EVAL_INTERP),
+            (output, :out, EVAL_INTERP),
+            begin
+                output[] = input
+            end,
+        )
+        b = create_tensor_h1_lagrange_basis(c, 3, 1, 3, 3, GAUSS_LOBATTO)
+        n = getnumnodes(b)
+        offsets = Vector{CeedInt}(0:n-1)
+        r = create_elem_restriction(c, 1, n, 1, 1, n, offsets)
+        op = Operator(
+            c;
+            qf=id,
+            fields=[
+                (:input, r, b, CeedVectorActive()),
+                (:output, r, b, CeedVectorActive()),
+            ],
+        )
+        @test showstr(op) == """
+             CeedOperator
+               1 elements with 27 quadrature points each
+               2 fields
+               1 input field:
+                 Input field 0:
+                   Name: "input"
+                   Active vector
+               1 output field:
+                 Output field 0:
+                   Name: "output"
+                   Active vector"""
+    end
+end
diff --git a/julia/LibCEED.jl/test/runtests.jl b/julia/LibCEED.jl/test/runtests.jl
index 0473802cd0..ac45f73423 100644
--- a/julia/LibCEED.jl/test/runtests.jl
+++ b/julia/LibCEED.jl/test/runtests.jl
@@ -221,23 +221,6 @@ else
             apply!(id, Q, [v1], [v2])
             @test @witharray(a = v2, a == v)
 
-            @test showstr(create_interior_qfunction(c, "Poisson3DApply")) == """
-                Gallery CeedQFunction Poisson3DApply
-                  2 Input Fields:
-                    Input Field [0]:
-                      Name: "du"
-                      Size: 3
-                      EvalMode: "gradient"
-                    Input Field [1]:
-                      Name: "qdata"
-                      Size: 6
-                      EvalMode: "none"
-                  1 Output Field:
-                    Output Field [0]:
-                      Name: "dv"
-                      Size: 3
-                      EvalMode: "gradient\""""
-
             @interior_qf id2 = (c, (a, :in, EVAL_INTERP), (b, :out, EVAL_INTERP), b .= a)
             v2[] = 0.0
             apply!(id2, Q, [v1], [v2])
@@ -296,17 +279,6 @@ else
                     (:output, r, b, CeedVectorActive()),
                 ],
             )
-            @test showstr(op) == """
-                CeedOperator
-                  2 Fields
-                  1 Input Field:
-                    Input Field [0]:
-                      Name: "input"
-                      Active vector
-                  1 Output Field:
-                    Output Field [0]:
-                      Name: "output"
-                      Active vector"""
 
             v = rand(CeedScalar, n)
             v1 = CeedVector(c, v)
diff --git a/python/ceed_operator.py b/python/ceed_operator.py
index f487f76b3a..80412f96a0 100644
--- a/python/ceed_operator.py
+++ b/python/ceed_operator.py
@@ -108,6 +108,17 @@ def linear_assemble_add_point_block_diagonal(
                                                                        d._pointer[0], request)
         self._ceed._check_error(err_code)
 
+    # Set name
+    def name(self, name):
+        """Set name of Operator for print output
+
+           Args:
+             name: Name to set"""
+
+        name = ffi.new("char[]", name.encode('ascii'))
+        err_code = lib.CeedOperatorSetName(self._pointer[0], name)
+        self._ceed._check_error(err_code)
+
     # Apply CeedOperator
     def apply(self, u, v, request=REQUEST_IMMEDIATE):
         """Apply Operator to a vector.
diff --git a/python/tests/output/test_402.out b/python/tests/output/test_402.out
index 38bb67d83f..e64504af50 100644
--- a/python/tests/output/test_402.out
+++ b/python/tests/output/test_402.out
@@ -1,31 +1,31 @@
-User CeedQFunction setup_mass
-  2 Input Fields:
-    Input Field [0]:
+User CeedQFunction - setup_mass
+  2 input fields:
+    Input field 0:
       Name: "w"
       Size: 1
       EvalMode: "quadrature weights"
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 
-User CeedQFunction apply_mass
-  2 Input Fields:
-    Input Field [0]:
+User CeedQFunction - apply_mass
+  2 input fields:
+    Input field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/python/tests/output/test_413.out b/python/tests/output/test_413.out
index 01e8b7a62f..059f32cc38 100644
--- a/python/tests/output/test_413.out
+++ b/python/tests/output/test_413.out
@@ -1,31 +1,31 @@
-Gallery CeedQFunction Mass1DBuild
-  2 Input Fields:
-    Input Field [0]:
+Gallery CeedQFunction - Mass1DBuild
+  2 input fields:
+    Input field 0:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-    Input Field [1]:
+    Input field 1:
       Name: "weights"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 
-Gallery CeedQFunction MassApply
-  2 Input Fields:
-    Input Field [0]:
+Gallery CeedQFunction - MassApply
+  2 input fields:
+    Input field 0:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-    Input Field [1]:
+    Input field 1:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/python/tests/output/test_504.out b/python/tests/output/test_504.out
index 5b7190df5e..e956387c9b 100644
--- a/python/tests/output/test_504.out
+++ b/python/tests/output/test_504.out
@@ -1,29 +1,31 @@
 CeedOperator
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  15 elements with 8 quadrature points each
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "weights"
       No vector
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "rho"
       Collocated basis
       Active vector
 
 CeedOperator
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  15 elements with 8 quadrature points each
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "rho"
       Collocated basis
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Active vector
 
diff --git a/python/tests/output/test_523.out b/python/tests/output/test_523.out
index b709e59849..8721114d9f 100644
--- a/python/tests/output/test_523.out
+++ b/python/tests/output/test_523.out
@@ -1,56 +1,60 @@
-Composite CeedOperator
-  SubOperator [0]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+Composite CeedOperator - setup
+  SubOperator 0 - triangle elements:
+    6 elements with 4 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weights"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator [1]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+  SubOperator 1 - quadralateral elements:
+    6 elements with 16 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weights"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
 
-Composite CeedOperator
-  SubOperator [0]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+Composite CeedOperator - mass
+  SubOperator 0 - triangle elements:
+    6 elements with 4 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
-  SubOperator [1]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+  SubOperator 1 - quadralateral elements:
+    6 elements with 16 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
 
diff --git a/python/tests/test-5-operator.py b/python/tests/test-5-operator.py
index 044d77cdf2..78b455779d 100644
--- a/python/tests/test-5-operator.py
+++ b/python/tests/test-5-operator.py
@@ -1224,6 +1224,7 @@ def test_523(ceed_resource, capsys):
 
     # Operators
     op_setup_tet = ceed.Operator(qf_setup_tet)
+    op_setup_tet.name('triangle elements')
     op_setup_tet.set_field("weights", libceed.ELEMRESTRICTION_NONE, bx_tet,
                            libceed.VECTOR_NONE)
     op_setup_tet.set_field("dx", rx_tet, bx_tet, libceed.VECTOR_ACTIVE)
@@ -1231,6 +1232,7 @@ def test_523(ceed_resource, capsys):
                            qdata_tet)
 
     op_mass_tet = ceed.Operator(qf_mass_tet)
+    op_mass_tet.name('triangle elements')
     op_mass_tet.set_field("rho", rui_tet, libceed.BASIS_COLLOCATED, qdata_tet)
     op_mass_tet.set_field("u", ru_tet, bu_tet, libceed.VECTOR_ACTIVE)
     op_mass_tet.set_field("v", ru_tet, bu_tet, libceed.VECTOR_ACTIVE)
@@ -1278,6 +1280,7 @@ def test_523(ceed_resource, capsys):
 
     # Operators
     op_setup_hex = ceed.Operator(qf_setup_tet)
+    op_setup_hex.name("quadralateral elements")
     op_setup_hex.set_field("weights", libceed.ELEMRESTRICTION_NONE, bx_hex,
                            libceed.VECTOR_NONE)
     op_setup_hex.set_field("dx", rx_hex, bx_hex, libceed.VECTOR_ACTIVE)
@@ -1285,6 +1288,7 @@ def test_523(ceed_resource, capsys):
                            qdata_hex)
 
     op_mass_hex = ceed.Operator(qf_mass_hex)
+    op_mass_hex.name("quadralateral elements")
     op_mass_hex.set_field("rho", rui_hex, libceed.BASIS_COLLOCATED, qdata_hex)
     op_mass_hex.set_field("u", ru_hex, bu_hex, libceed.VECTOR_ACTIVE)
     op_mass_hex.set_field("v", ru_hex, bu_hex, libceed.VECTOR_ACTIVE)
@@ -1293,11 +1297,13 @@ def test_523(ceed_resource, capsys):
 
     # Setup
     op_setup = ceed.CompositeOperator()
+    op_setup.name('setup')
     op_setup.add_sub(op_setup_tet)
     op_setup.add_sub(op_setup_hex)
 
     # Apply mass matrix
     op_mass = ceed.CompositeOperator()
+    op_mass.name('mass')
     op_mass.add_sub(op_mass_tet)
     op_mass.add_sub(op_mass_hex)
 
diff --git a/rust/libceed/src/operator.rs b/rust/libceed/src/operator.rs
index 12b1df4fd4..084162e086 100644
--- a/rust/libceed/src/operator.rs
+++ b/rust/libceed/src/operator.rs
@@ -337,6 +337,7 @@ impl<'a> fmt::Display for OperatorCore<'a> {
 /// // Operator fields
 /// let op = ceed
 ///     .operator(&qf, QFunctionOpt::None, QFunctionOpt::None)?
+///     .name("mass")?
 ///     .field("dx", &r, &b, VectorOpt::Active)?
 ///     .field("weights", ElemRestrictionOpt::None, &b, VectorOpt::None)?
 ///     .field("qdata", &rq, BasisOpt::Collocated, VectorOpt::Active)?;
@@ -378,6 +379,7 @@ impl<'a> fmt::Display for Operator<'a> {
 /// let qf_mass = ceed.q_function_interior_by_name("MassApply")?;
 /// let op_mass = ceed
 ///     .operator(&qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+///     .name("Mass term")?
 ///     .field("u", &r, &b, VectorOpt::Active)?
 ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_mass)?
 ///     .field("v", &r, &b, VectorOpt::Active)?;
@@ -385,12 +387,14 @@ impl<'a> fmt::Display for Operator<'a> {
 /// let qf_diff = ceed.q_function_interior_by_name("Poisson1DApply")?;
 /// let op_diff = ceed
 ///     .operator(&qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+///     .name("Poisson term")?
 ///     .field("du", &r, &b, VectorOpt::Active)?
 ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_diff)?
 ///     .field("dv", &r, &b, VectorOpt::Active)?;
 ///
 /// let op = ceed
 ///     .composite_operator()?
+///     .name("Screened Poisson")?
 ///     .sub_operator(&op_mass)?
 ///     .sub_operator(&op_diff)?;
 ///
@@ -424,6 +428,12 @@ impl<'a> OperatorCore<'a> {
         self.check_error(ierr)
     }
 
+    pub fn name(&self, name: &str) -> crate::Result<i32> {
+        let name_c = CString::new(name).expect("CString::new failed");
+        let ierr = unsafe { bind_ceed::CeedOperatorSetName(self.ptr, name_c.as_ptr()) };
+        self.check_error(ierr)
+    }
+
     pub fn apply(&self, input: &Vector, output: &mut Vector) -> crate::Result<i32> {
         let ierr = unsafe {
             bind_ceed::CeedOperatorApply(
@@ -538,6 +548,46 @@ impl<'a> Operator<'a> {
         })
     }
 
+    /// Set name for Operator printing
+    ///
+    /// * 'name' - Name to set
+    ///
+    /// ```
+    /// # use libceed::prelude::*;
+    /// # fn main() -> libceed::Result<()> {
+    /// # let ceed = libceed::Ceed::default_init();
+    /// let qf = ceed.q_function_interior_by_name("Mass1DBuild")?;
+    ///
+    /// // Operator field arguments
+    /// let ne = 3;
+    /// let q = 4 as usize;
+    /// let mut ind: Vec<i32> = vec![0; 2 * ne];
+    /// for i in 0..ne {
+    ///     ind[2 * i + 0] = i as i32;
+    ///     ind[2 * i + 1] = (i + 1) as i32;
+    /// }
+    /// let r = ceed.elem_restriction(ne, 2, 1, 1, ne + 1, MemType::Host, &ind)?;
+    /// let strides: [i32; 3] = [1, q as i32, q as i32];
+    /// let rq = ceed.strided_elem_restriction(ne, 2, 1, q * ne, strides)?;
+    ///
+    /// let b = ceed.basis_tensor_H1_Lagrange(1, 1, 2, q, QuadMode::Gauss)?;
+    ///
+    /// // Operator fields
+    /// let op = ceed
+    ///     .operator(&qf, QFunctionOpt::None, QFunctionOpt::None)?
+    ///     .name("mass")?
+    ///     .field("dx", &r, &b, VectorOpt::Active)?
+    ///     .field("weights", ElemRestrictionOpt::None, &b, VectorOpt::None)?
+    ///     .field("qdata", &rq, BasisOpt::Collocated, VectorOpt::Active)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    #[allow(unused_mut)]
+    pub fn name(mut self, name: &str) -> crate::Result<Self> {
+        self.op_core.name(name)?;
+        Ok(self)
+    }
+
     /// Apply Operator to a vector
     ///
     /// * `input`  - Input Vector
@@ -2035,9 +2085,65 @@ impl<'a> CompositeOperator<'a> {
         })
     }
 
+    /// Set name for CompositeOperator printing
+    ///
+    /// * 'name' - Name to set
+    ///
+    /// ```
+    /// # use libceed::prelude::*;
+    /// # fn main() -> libceed::Result<()> {
+    /// # let ceed = libceed::Ceed::default_init();
+    ///
+    /// // Sub operator field arguments
+    /// let ne = 3;
+    /// let q = 4 as usize;
+    /// let mut ind: Vec<i32> = vec![0; 2 * ne];
+    /// for i in 0..ne {
+    ///     ind[2 * i + 0] = i as i32;
+    ///     ind[2 * i + 1] = (i + 1) as i32;
+    /// }
+    /// let r = ceed.elem_restriction(ne, 2, 1, 1, ne + 1, MemType::Host, &ind)?;
+    /// let strides: [i32; 3] = [1, q as i32, q as i32];
+    /// let rq = ceed.strided_elem_restriction(ne, 2, 1, q * ne, strides)?;
+    ///
+    /// let b = ceed.basis_tensor_H1_Lagrange(1, 1, 2, q, QuadMode::Gauss)?;
+    ///
+    /// let qdata_mass = ceed.vector(q * ne)?;
+    /// let qdata_diff = ceed.vector(q * ne)?;
+    ///
+    /// let qf_mass = ceed.q_function_interior_by_name("MassApply")?;
+    /// let op_mass = ceed
+    ///     .operator(&qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+    ///     .name("Mass term")?
+    ///     .field("u", &r, &b, VectorOpt::Active)?
+    ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_mass)?
+    ///     .field("v", &r, &b, VectorOpt::Active)?;
+    ///
+    /// let qf_diff = ceed.q_function_interior_by_name("Poisson1DApply")?;
+    /// let op_diff = ceed
+    ///     .operator(&qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+    ///     .name("Poisson term")?
+    ///     .field("du", &r, &b, VectorOpt::Active)?
+    ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_diff)?
+    ///     .field("dv", &r, &b, VectorOpt::Active)?;
+    ///
+    /// let op = ceed
+    ///     .composite_operator()?
+    ///     .name("Screened Poisson")?
+    ///     .sub_operator(&op_mass)?
+    ///     .sub_operator(&op_diff)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    #[allow(unused_mut)]
+    pub fn name(mut self, name: &str) -> crate::Result<Self> {
+        self.op_core.name(name)?;
+        Ok(self)
+    }
+
     /// Apply Operator to a vector
     ///
-    /// * `input`  - Input Vector
+    /// * `input`  - Inpuht Vector
     /// * `output` - Output Vector
     ///
     /// ```
diff --git a/rust/libceed/src/qfunction.rs b/rust/libceed/src/qfunction.rs
index baba3ddd75..efcbe5dc44 100644
--- a/rust/libceed/src/qfunction.rs
+++ b/rust/libceed/src/qfunction.rs
@@ -568,6 +568,12 @@ unsafe extern "C" fn trampoline(
     (trampoline_data.get_unchecked_mut().user_f)(inputs_array, outputs_array)
 }
 
+unsafe extern "C" fn destroy_trampoline(ctx: *mut ::std::os::raw::c_void) -> ::std::os::raw::c_int {
+    let trampoline_data: Pin<&mut QFunctionTrampolineData> = std::mem::transmute(ctx);
+    drop(trampoline_data);
+    0 // Clean error code
+}
+
 // -----------------------------------------------------------------------------
 // QFunction
 // -----------------------------------------------------------------------------
@@ -623,6 +629,14 @@ impl<'a> QFunction<'a> {
             )
         };
         ceed.check_error(ierr)?;
+        ierr = unsafe {
+            bind_ceed::CeedQFunctionContextSetDataDestroy(
+                qf_ctx_ptr,
+                crate::MemType::Host as bind_ceed::CeedMemType,
+                Some(destroy_trampoline),
+            )
+        };
+        ceed.check_error(ierr)?;
         ierr = unsafe { bind_ceed::CeedQFunctionSetContext(ptr, qf_ctx_ptr) };
         ceed.check_error(ierr)?;
         Ok(Self {
diff --git a/tests/output/t402-qfunction-f.out b/tests/output/t402-qfunction-f.out
index be66f7e3c0..7163a434f1 100644
--- a/tests/output/t402-qfunction-f.out
+++ b/tests/output/t402-qfunction-f.out
@@ -1,26 +1,26 @@
-User CeedQFunction setup
-  1 Input Field:
-    Input Field [0]:
+User CeedQFunction - setup
+  1 input field:
+    Input field 0:
       Name: "w"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-User CeedQFunction mass
-  2 Input Fields:
-    Input Field [0]:
+User CeedQFunction - mass
+  2 input fields:
+    Input field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t402-qfunction.out b/tests/output/t402-qfunction.out
index be66f7e3c0..7163a434f1 100644
--- a/tests/output/t402-qfunction.out
+++ b/tests/output/t402-qfunction.out
@@ -1,26 +1,26 @@
-User CeedQFunction setup
-  1 Input Field:
-    Input Field [0]:
+User CeedQFunction - setup
+  1 input field:
+    Input field 0:
       Name: "w"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-User CeedQFunction mass
-  2 Input Fields:
-    Input Field [0]:
+User CeedQFunction - mass
+  2 input fields:
+    Input field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t413-qfunction-f.out b/tests/output/t413-qfunction-f.out
index 460f580fbf..ffee1bdca7 100644
--- a/tests/output/t413-qfunction-f.out
+++ b/tests/output/t413-qfunction-f.out
@@ -1,30 +1,30 @@
-Gallery CeedQFunction Mass1DBuild
-  2 Input Fields:
-    Input Field [0]:
+Gallery CeedQFunction - Mass1DBuild
+  2 input fields:
+    Input field 0:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-    Input Field [1]:
+    Input field 1:
       Name: "weights"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-Gallery CeedQFunction MassApply
-  2 Input Fields:
-    Input Field [0]:
+Gallery CeedQFunction - MassApply
+  2 input fields:
+    Input field 0:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-    Input Field [1]:
+    Input field 1:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t413-qfunction.out b/tests/output/t413-qfunction.out
index 460f580fbf..ffee1bdca7 100644
--- a/tests/output/t413-qfunction.out
+++ b/tests/output/t413-qfunction.out
@@ -1,30 +1,30 @@
-Gallery CeedQFunction Mass1DBuild
-  2 Input Fields:
-    Input Field [0]:
+Gallery CeedQFunction - Mass1DBuild
+  2 input fields:
+    Input field 0:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-    Input Field [1]:
+    Input field 1:
       Name: "weights"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-Gallery CeedQFunction MassApply
-  2 Input Fields:
-    Input Field [0]:
+Gallery CeedQFunction - MassApply
+  2 input fields:
+    Input field 0:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-    Input Field [1]:
+    Input field 1:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t504-operator-f.out b/tests/output/t504-operator-f.out
index 7a3265af0c..91d47589d4 100644
--- a/tests/output/t504-operator-f.out
+++ b/tests/output/t504-operator-f.out
@@ -1,27 +1,29 @@
 CeedOperator
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  15 elements with 8 quadrature points each
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "weight"
       No vector
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "rho"
       Collocated basis
       Active vector
 CeedOperator
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  15 elements with 8 quadrature points each
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "rho"
       Collocated basis
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Active vector
diff --git a/tests/output/t504-operator.out b/tests/output/t504-operator.out
index 7a3265af0c..91d47589d4 100644
--- a/tests/output/t504-operator.out
+++ b/tests/output/t504-operator.out
@@ -1,27 +1,29 @@
 CeedOperator
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  15 elements with 8 quadrature points each
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "weight"
       No vector
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "rho"
       Collocated basis
       Active vector
 CeedOperator
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  15 elements with 8 quadrature points each
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "rho"
       Collocated basis
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Active vector
diff --git a/tests/output/t523-operator-f.out b/tests/output/t523-operator-f.out
index 49528cb6e6..79b6ac9151 100644
--- a/tests/output/t523-operator-f.out
+++ b/tests/output/t523-operator-f.out
@@ -1,54 +1,58 @@
-Composite CeedOperator
-  SubOperator [0]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+Composite CeedOperator - setup
+  SubOperator 0 - triangle elements:
+    6 elements with 4 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator [1]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+  SubOperator 1 - quadralateral elements:
+    6 elements with 16 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-Composite CeedOperator
-  SubOperator [0]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+Composite CeedOperator - mass
+  SubOperator 0 - triangle elements:
+    6 elements with 4 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
-  SubOperator [1]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+  SubOperator 1 - quadralateral elements:
+    6 elements with 16 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
diff --git a/tests/output/t523-operator.out b/tests/output/t523-operator.out
index 49528cb6e6..79b6ac9151 100644
--- a/tests/output/t523-operator.out
+++ b/tests/output/t523-operator.out
@@ -1,54 +1,58 @@
-Composite CeedOperator
-  SubOperator [0]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+Composite CeedOperator - setup
+  SubOperator 0 - triangle elements:
+    6 elements with 4 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator [1]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+  SubOperator 1 - quadralateral elements:
+    6 elements with 16 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-Composite CeedOperator
-  SubOperator [0]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+Composite CeedOperator - mass
+  SubOperator 0 - triangle elements:
+    6 elements with 4 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
-  SubOperator [1]:
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+  SubOperator 1 - quadralateral elements:
+    6 elements with 16 quadrature points each
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
diff --git a/tests/t407-qfunction.c b/tests/t407-qfunction.c
index d024ba6679..a65120d62f 100644
--- a/tests/t407-qfunction.c
+++ b/tests/t407-qfunction.c
@@ -2,6 +2,7 @@
 /// Test registering and setting QFunctionContext fields
 /// \test Test registering and setting QFunctionContext fields
 #include <ceed.h>
+#include <ceed/backend.h>
 #include <stddef.h>
 #include <string.h>
 
diff --git a/tests/t523-operator-f.f90 b/tests/t523-operator-f.f90
index 6770f5b4be..9e5d402bc6 100644
--- a/tests/t523-operator-f.f90
+++ b/tests/t523-operator-f.f90
@@ -121,7 +121,8 @@ program test
 ! ---- Setup Tet
       call ceedoperatorcreate(ceed,qf_setuptet,ceed_qfunction_none,&
      & ceed_qfunction_none,op_setuptet,err)
-      call ceedoperatorsetfield(op_setuptet, 'weight',&
+      call ceedoperatorsetname(op_setuptet,'triangle elements',err)
+      call ceedoperatorsetfield(op_setuptet,'weight',&
      & ceed_elemrestriction_none,bxtet,ceed_vector_none,err)
       call ceedoperatorsetfield(op_setuptet,'dx',erestrictxtet,&
      & bxtet,ceed_vector_active,err)
@@ -130,6 +131,7 @@ program test
 ! ---- Mass Tet
       call ceedoperatorcreate(ceed,qf_masstet,ceed_qfunction_none,&
      & ceed_qfunction_none,op_masstet,err)
+      call ceedoperatorsetname(op_masstet,'triangle elements',err)
       call ceedoperatorsetfield(op_masstet,'rho',erestrictuitet,&
      & ceed_basis_collocated,qdatatet,err)
       call ceedoperatorsetfield(op_masstet,'u',erestrictutet,&
@@ -184,6 +186,7 @@ program test
 ! ---- Setup Hex
       call ceedoperatorcreate(ceed,qf_setuphex,ceed_qfunction_none,&
      & ceed_qfunction_none,op_setuphex,err)
+      call ceedoperatorsetname(op_setuphex,'quadralateral elements',err)
       call ceedoperatorsetfield(op_setuphex,'weight',&
      & ceed_elemrestriction_none,bxhex,ceed_vector_none,err)
       call ceedoperatorsetfield(op_setuphex,'dx',erestrictxhex,&
@@ -193,6 +196,7 @@ program test
 ! ---- Mass Hex
       call ceedoperatorcreate(ceed,qf_masshex,ceed_qfunction_none,&
      & ceed_qfunction_none,op_masshex,err)
+      call ceedoperatorsetname(op_masshex,'quadralateral elements',err)
       call ceedoperatorsetfield(op_masshex,'rho',erestrictuihex,&
      & ceed_basis_collocated,qdatahex,err)
       call ceedoperatorsetfield(op_masshex,'u',erestrictuhex,&
@@ -202,10 +206,12 @@ program test
 
 ! Composite Operators
       call ceedcompositeoperatorcreate(ceed,op_setup,err)
+      call ceedoperatorsetname(op_setup,'setup',err)
       call ceedcompositeoperatoraddsub(op_setup,op_setuptet,err)
       call ceedcompositeoperatoraddsub(op_setup,op_setuphex,err)
 
       call ceedcompositeoperatorcreate(ceed,op_mass,err)
+      call ceedoperatorsetname(op_mass,'mass',err)
       call ceedcompositeoperatoraddsub(op_mass,op_masstet,err)
       call ceedcompositeoperatoraddsub(op_mass,op_masshex,err)
 
diff --git a/tests/t523-operator.c b/tests/t523-operator.c
index a8159ec70b..47630cecba 100644
--- a/tests/t523-operator.c
+++ b/tests/t523-operator.c
@@ -109,6 +109,7 @@ int main(int argc, char **argv) {
   // ---- Setup _tet
   CeedOperatorCreate(ceed, qf_setup_tet, CEED_QFUNCTION_NONE,
                      CEED_QFUNCTION_NONE, &op_setup_tet);
+  CeedOperatorSetName(op_setup_tet, "triangle elements");
   CeedOperatorSetField(op_setup_tet, "weight", CEED_ELEMRESTRICTION_NONE,
                        basis_x_tet,
                        CEED_VECTOR_NONE);
@@ -119,6 +120,7 @@ int main(int argc, char **argv) {
   // ---- Mass _tet
   CeedOperatorCreate(ceed, qf_mass_tet, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
                      &op_mass_tet);
+  CeedOperatorSetName(op_mass_tet, "triangle elements");
   CeedOperatorSetField(op_mass_tet, "rho", elem_restr_qd_tet,
                        CEED_BASIS_COLLOCATED,
                        q_data_tet);
@@ -170,6 +172,7 @@ int main(int argc, char **argv) {
   // -- Operators
   CeedOperatorCreate(ceed, qf_setup_hex, CEED_QFUNCTION_NONE,
                      CEED_QFUNCTION_NONE, &op_setup_hex);
+  CeedOperatorSetName(op_setup_hex, "quadralateral elements");
   CeedOperatorSetField(op_setup_hex, "weight", CEED_ELEMRESTRICTION_NONE,
                        basis_x_hex,
                        CEED_VECTOR_NONE);
@@ -180,6 +183,7 @@ int main(int argc, char **argv) {
 
   CeedOperatorCreate(ceed, qf_mass_hex, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
                      &op_mass_hex);
+  CeedOperatorSetName(op_mass_hex, "quadralateral elements");
   CeedOperatorSetField(op_mass_hex, "rho", elem_restr_qd_i_hex,
                        CEED_BASIS_COLLOCATED,
                        q_data_hex);
@@ -191,12 +195,14 @@ int main(int argc, char **argv) {
   // Set up Composite Operators
   // -- Create
   CeedCompositeOperatorCreate(ceed, &op_setup);
+  CeedOperatorSetName(op_setup, "setup");
   // -- Add SubOperators
   CeedCompositeOperatorAddSub(op_setup, op_setup_tet);
   CeedCompositeOperatorAddSub(op_setup, op_setup_hex);
 
   // -- Create
   CeedCompositeOperatorCreate(ceed, &op_mass);
+  CeedOperatorSetName(op_mass, "mass");
   // -- Add SubOperators
   CeedCompositeOperatorAddSub(op_mass, op_mass_tet);
   CeedCompositeOperatorAddSub(op_mass, op_mass_hex);