From 6eb0d8b4aff72517bac7a1ace48e04610a8fe084 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 31 Mar 2022 18:29:25 -0600
Subject: [PATCH 01/59] jit - use relpath from include/ceed-jit-source for jit
 source files

---
 Makefile                                      |  2 +-
 backends/cuda-ref/ceed-cuda-ref-basis.c       | 10 ++-
 .../cuda-ref/ceed-cuda-ref-qfunction-load.cpp |  4 +-
 backends/cuda-ref/ceed-cuda-restriction.c     |  5 +-
 backends/cuda-shared/ceed-cuda-shared-basis.c |  5 +-
 backends/hip-ref/ceed-hip-ref-basis.c         | 10 ++-
 .../hip-ref/ceed-hip-ref-qfunction-load.cpp   |  4 +-
 backends/hip-ref/ceed-hip-ref-restriction.c   |  5 +-
 backends/hip-shared/ceed-hip-shared-basis.c   |  5 +-
 gallery/identity/ceed-identity.c              |  2 +-
 gallery/mass-vector/ceed-vectormassapply.c    |  2 +-
 gallery/mass/ceed-mass1dbuild.c               |  2 +-
 gallery/mass/ceed-mass2dbuild.c               |  2 +-
 gallery/mass/ceed-mass3dbuild.c               |  2 +-
 gallery/mass/ceed-massapply.c                 |  2 +-
 .../ceed-vectorpoisson1dapply.c               |  2 +-
 .../ceed-vectorpoisson2dapply.c               |  2 +-
 .../ceed-vectorpoisson3dapply.c               |  2 +-
 gallery/poisson/ceed-poisson1dapply.c         |  2 +-
 gallery/poisson/ceed-poisson1dbuild.c         |  2 +-
 gallery/poisson/ceed-poisson2dapply.c         |  2 +-
 gallery/poisson/ceed-poisson2dbuild.c         |  2 +-
 gallery/poisson/ceed-poisson3dapply.c         |  2 +-
 gallery/poisson/ceed-poisson3dbuild.c         |  2 +-
 gallery/scale/ceed-scale.c                    |  2 +-
 include/ceed-impl.h                           |  1 +
 .../cuda}/cuda-ref-basis-nontensor.h          |  0
 .../cuda}/cuda-ref-basis-tensor.h             |  0
 .../cuda}/cuda-ref-qfunction.h                |  0
 .../cuda}/cuda-ref-restriction.h              |  0
 .../ceed-jit-source/cuda}/cuda-shared-basis.h |  0
 .../ceed-jit-source/gallery}/ceed-identity.h  |  0
 .../gallery}/ceed-mass1dbuild.h               |  0
 .../gallery}/ceed-mass2dbuild.h               |  0
 .../gallery}/ceed-mass3dbuild.h               |  0
 .../ceed-jit-source/gallery}/ceed-massapply.h |  0
 .../gallery}/ceed-poisson1dapply.h            |  0
 .../gallery}/ceed-poisson1dbuild.h            |  0
 .../gallery}/ceed-poisson2dapply.h            |  0
 .../gallery}/ceed-poisson2dbuild.h            |  0
 .../gallery}/ceed-poisson3dapply.h            |  0
 .../gallery}/ceed-poisson3dbuild.h            |  0
 .../ceed-jit-source/gallery}/ceed-scale.h     |  0
 .../gallery}/ceed-vectormassapply.h           |  0
 .../gallery}/ceed-vectorpoisson1dapply.h      |  0
 .../gallery}/ceed-vectorpoisson2dapply.h      |  0
 .../gallery}/ceed-vectorpoisson3dapply.h      |  0
 .../hip}/hip-ref-basis-nontensor.h            |  0
 .../hip}/hip-ref-basis-tensor.h               |  0
 .../ceed-jit-source/hip}/hip-ref-qfunction.h  |  0
 .../hip}/hip-ref-restriction.h                |  0
 .../ceed-jit-source/hip}/hip-shared-basis.h   |  0
 include/ceed/jit-tools.h                      |  6 +-
 interface/ceed-jit-tools.c                    | 84 +++++++++++++++++--
 interface/ceed-qfunction.c                    | 15 +++-
 interface/ceed.c                              |  1 +
 56 files changed, 143 insertions(+), 46 deletions(-)
 rename {backends/cuda-ref/kernels => include/ceed-jit-source/cuda}/cuda-ref-basis-nontensor.h (100%)
 rename {backends/cuda-ref/kernels => include/ceed-jit-source/cuda}/cuda-ref-basis-tensor.h (100%)
 rename {backends/cuda-ref/kernels => include/ceed-jit-source/cuda}/cuda-ref-qfunction.h (100%)
 rename {backends/cuda-ref/kernels => include/ceed-jit-source/cuda}/cuda-ref-restriction.h (100%)
 rename {backends/cuda-shared/kernels => include/ceed-jit-source/cuda}/cuda-shared-basis.h (100%)
 rename {gallery/identity => include/ceed-jit-source/gallery}/ceed-identity.h (100%)
 rename {gallery/mass => include/ceed-jit-source/gallery}/ceed-mass1dbuild.h (100%)
 rename {gallery/mass => include/ceed-jit-source/gallery}/ceed-mass2dbuild.h (100%)
 rename {gallery/mass => include/ceed-jit-source/gallery}/ceed-mass3dbuild.h (100%)
 rename {gallery/mass => include/ceed-jit-source/gallery}/ceed-massapply.h (100%)
 rename {gallery/poisson => include/ceed-jit-source/gallery}/ceed-poisson1dapply.h (100%)
 rename {gallery/poisson => include/ceed-jit-source/gallery}/ceed-poisson1dbuild.h (100%)
 rename {gallery/poisson => include/ceed-jit-source/gallery}/ceed-poisson2dapply.h (100%)
 rename {gallery/poisson => include/ceed-jit-source/gallery}/ceed-poisson2dbuild.h (100%)
 rename {gallery/poisson => include/ceed-jit-source/gallery}/ceed-poisson3dapply.h (100%)
 rename {gallery/poisson => include/ceed-jit-source/gallery}/ceed-poisson3dbuild.h (100%)
 rename {gallery/scale => include/ceed-jit-source/gallery}/ceed-scale.h (100%)
 rename {gallery/mass-vector => include/ceed-jit-source/gallery}/ceed-vectormassapply.h (100%)
 rename {gallery/poisson-vector => include/ceed-jit-source/gallery}/ceed-vectorpoisson1dapply.h (100%)
 rename {gallery/poisson-vector => include/ceed-jit-source/gallery}/ceed-vectorpoisson2dapply.h (100%)
 rename {gallery/poisson-vector => include/ceed-jit-source/gallery}/ceed-vectorpoisson3dapply.h (100%)
 rename {backends/hip-ref/kernels => include/ceed-jit-source/hip}/hip-ref-basis-nontensor.h (100%)
 rename {backends/hip-ref/kernels => include/ceed-jit-source/hip}/hip-ref-basis-tensor.h (100%)
 rename {backends/hip-ref/kernels => include/ceed-jit-source/hip}/hip-ref-qfunction.h (100%)
 rename {backends/hip-ref/kernels => include/ceed-jit-source/hip}/hip-ref-restriction.h (100%)
 rename {backends/hip-shared/kernels => include/ceed-jit-source/hip}/hip-shared-basis.h (100%)

diff --git a/Makefile b/Makefile
index 102f4edab2..4c86f5daf0 100644
--- a/Makefile
+++ b/Makefile
@@ -145,7 +145,7 @@ ifeq ($(COVERAGE), 1)
   CEED_LDFLAGS += --coverage
 endif
 
-CFLAGS += $(if $(ASAN),$(AFLAGS))
+CFLAGS += $(if $(ASAN),$(AFLAGS)) -DCEED_JIT_SOURCE_ROOT="\"$(abspath ./include)/\""
 FFLAGS += $(if $(ASAN),$(AFLAGS))
 CEED_LDFLAGS += $(if $(ASAN),$(AFLAGS))
 CPPFLAGS += -I./include
diff --git a/backends/cuda-ref/ceed-cuda-ref-basis.c b/backends/cuda-ref/ceed-cuda-ref-basis.c
index dbe1221559..71af75a7ad 100644
--- a/backends/cuda-ref/ceed-cuda-ref-basis.c
+++ b/backends/cuda-ref/ceed-cuda-ref-basis.c
@@ -266,8 +266,9 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-basis-tensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/cuda/cuda-ref-basis-tensor.h",
+                                 &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
@@ -335,8 +336,9 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-basis-nontensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/cuda/cuda-ref-basis-nontensor.h",
+                                 &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
index eb00f21c6c..ca28216a8a 100644
--- a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
+++ b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
@@ -45,8 +45,8 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-qfunction.h",
-                             &read_write_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed, "ceed-jit-source/cuda/cuda-ref-qfunction.h",
+                                 &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/cuda-ref/ceed-cuda-restriction.c b/backends/cuda-ref/ceed-cuda-restriction.c
index 8bcd779949..a3cdc28e01 100644
--- a/backends/cuda-ref/ceed-cuda-restriction.c
+++ b/backends/cuda-ref/ceed-cuda-restriction.c
@@ -341,8 +341,9 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType m_type, CeedCopyMode copy_mode,
   // Compile CUDA kernels
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-restriction.h",
-                             &restriction_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/cuda/cuda-ref-restriction.h",
+                                 &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
                                 &restriction_kernel_source);
diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c
index 77657a0c3a..bf924ebf6f 100644
--- a/backends/cuda-shared/ceed-cuda-shared-basis.c
+++ b/backends/cuda-shared/ceed-cuda-shared-basis.c
@@ -270,8 +270,9 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-shared-basis.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/cuda/cuda-shared-basis.h",
+                                 &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-basis.c b/backends/hip-ref/ceed-hip-ref-basis.c
index 16a509b81d..f0fade6509 100644
--- a/backends/hip-ref/ceed-hip-ref-basis.c
+++ b/backends/hip-ref/ceed-hip-ref-basis.c
@@ -268,8 +268,9 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt ncomp;
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-tensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/hip/hip-ref-basis-tensor.h",
+                                 &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
@@ -336,8 +337,9 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes,
   CeedInt ncomp;
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-nontensor.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/hip/hip-ref-basis-nontensor.h",
+                                 &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
index 4666dbcffb..1faf40852e 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
+++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
@@ -47,8 +47,8 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-qfunction.h",
-                             &read_write_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed, "ceed-jit-source/hip/hip-ref-qfunction.h",
+                                 &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-restriction.c b/backends/hip-ref/ceed-hip-ref-restriction.c
index f332ee4568..615ae0e0f4 100644
--- a/backends/hip-ref/ceed-hip-ref-restriction.c
+++ b/backends/hip-ref/ceed-hip-ref-restriction.c
@@ -339,8 +339,9 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode,
   // Compile HIP kernels
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-restriction.h",
-                             &restriction_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/hip/hip-ref-restriction.h",
+                                 &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
                                 &restriction_kernel_source);
diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c
index 2fcb6569cc..3ae5547ee8 100644
--- a/backends/hip-shared/ceed-hip-shared-basis.c
+++ b/backends/hip-shared/ceed-hip-shared-basis.c
@@ -326,8 +326,9 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
 
   // Compile basis kernels
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-shared-basis.h",
-                             &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetInstalledJitPath(ceed,
+                                 "ceed-jit-source/hip/hip-shared-basis.h",
+                                 &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/gallery/identity/ceed-identity.c b/gallery/identity/ceed-identity.c
index a18b8d9fd7..c407f92108 100644
--- a/gallery/identity/ceed-identity.c
+++ b/gallery/identity/ceed-identity.c
@@ -9,7 +9,7 @@
 #include <ceed/backend.h>
 #include <stddef.h>
 #include <string.h>
-#include "ceed-identity.h"
+#include <ceed-jit-source/gallery/ceed-identity.h>
 
 /**
   @brief Set fields identity QFunction that copies inputs directly into outputs
diff --git a/gallery/mass-vector/ceed-vectormassapply.c b/gallery/mass-vector/ceed-vectormassapply.c
index d08fa2a70c..8fd9be381a 100644
--- a/gallery/mass-vector/ceed-vectormassapply.c
+++ b/gallery/mass-vector/ceed-vectormassapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectormassapply.h"
+#include <ceed-jit-source/gallery/ceed-vectormassapply.h>
 
 /**
   @brief Set fields for Ceed QFunction for applying the mass matrix
diff --git a/gallery/mass/ceed-mass1dbuild.c b/gallery/mass/ceed-mass1dbuild.c
index 10c3a80103..11d58da24e 100644
--- a/gallery/mass/ceed-mass1dbuild.c
+++ b/gallery/mass/ceed-mass1dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-mass1dbuild.h"
+#include <ceed-jit-source/gallery/ceed-mass1dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 1D
diff --git a/gallery/mass/ceed-mass2dbuild.c b/gallery/mass/ceed-mass2dbuild.c
index 554583b3da..a4cdc96123 100644
--- a/gallery/mass/ceed-mass2dbuild.c
+++ b/gallery/mass/ceed-mass2dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-mass2dbuild.h"
+#include <ceed-jit-source/gallery/ceed-mass2dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 2D
diff --git a/gallery/mass/ceed-mass3dbuild.c b/gallery/mass/ceed-mass3dbuild.c
index 66ef311006..7a66e850c6 100644
--- a/gallery/mass/ceed-mass3dbuild.c
+++ b/gallery/mass/ceed-mass3dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-mass3dbuild.h"
+#include <ceed-jit-source/gallery/ceed-mass3dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 3D
diff --git a/gallery/mass/ceed-massapply.c b/gallery/mass/ceed-massapply.c
index d30cc89f33..79b41d3285 100644
--- a/gallery/mass/ceed-massapply.c
+++ b/gallery/mass/ceed-massapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-massapply.h"
+#include <ceed-jit-source/gallery/ceed-massapply.h>
 
 /**
   @brief Set fields for Ceed QFunction for applying the mass matrix
diff --git a/gallery/poisson-vector/ceed-vectorpoisson1dapply.c b/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
index 93ba30cd35..89be5ef18e 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectorpoisson1dapply.h"
+#include <ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 1D Poisson operator
diff --git a/gallery/poisson-vector/ceed-vectorpoisson2dapply.c b/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
index 6bd65ae256..bb9e90aa93 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectorpoisson2dapply.h"
+#include <ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 2D Poisson operator
diff --git a/gallery/poisson-vector/ceed-vectorpoisson3dapply.c b/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
index e2d8b1b169..413b9a1f1c 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-vectorpoisson3dapply.h"
+#include <ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 3D Poisson operator
diff --git a/gallery/poisson/ceed-poisson1dapply.c b/gallery/poisson/ceed-poisson1dapply.c
index 93d5354817..500f844552 100644
--- a/gallery/poisson/ceed-poisson1dapply.c
+++ b/gallery/poisson/ceed-poisson1dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson1dapply.h"
+#include <ceed-jit-source/gallery/ceed-poisson1dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 1D Poisson operator
diff --git a/gallery/poisson/ceed-poisson1dbuild.c b/gallery/poisson/ceed-poisson1dbuild.c
index 98bd7f7c4e..45e28ecddd 100644
--- a/gallery/poisson/ceed-poisson1dbuild.c
+++ b/gallery/poisson/ceed-poisson1dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson1dbuild.h"
+#include <ceed-jit-source/gallery/ceed-poisson1dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 1D
diff --git a/gallery/poisson/ceed-poisson2dapply.c b/gallery/poisson/ceed-poisson2dapply.c
index 9b121f5517..9090da8e6c 100644
--- a/gallery/poisson/ceed-poisson2dapply.c
+++ b/gallery/poisson/ceed-poisson2dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson2dapply.h"
+#include <ceed-jit-source/gallery/ceed-poisson2dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 2D Poisson operator
diff --git a/gallery/poisson/ceed-poisson2dbuild.c b/gallery/poisson/ceed-poisson2dbuild.c
index e2cc0f3e39..cb3d8f3076 100644
--- a/gallery/poisson/ceed-poisson2dbuild.c
+++ b/gallery/poisson/ceed-poisson2dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson2dbuild.h"
+#include <ceed-jit-source/gallery/ceed-poisson2dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 2D
diff --git a/gallery/poisson/ceed-poisson3dapply.c b/gallery/poisson/ceed-poisson3dapply.c
index 20a371ab0f..fba742982c 100644
--- a/gallery/poisson/ceed-poisson3dapply.c
+++ b/gallery/poisson/ceed-poisson3dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson3dapply.h"
+#include <ceed-jit-source/gallery/ceed-poisson3dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 3D Poisson operator
diff --git a/gallery/poisson/ceed-poisson3dbuild.c b/gallery/poisson/ceed-poisson3dbuild.c
index 82bf3163b4..b86b9d9d2b 100644
--- a/gallery/poisson/ceed-poisson3dbuild.c
+++ b/gallery/poisson/ceed-poisson3dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-poisson3dbuild.h"
+#include <ceed-jit-source/gallery/ceed-poisson3dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 3D
diff --git a/gallery/scale/ceed-scale.c b/gallery/scale/ceed-scale.c
index 90d738f7e2..be834536a1 100644
--- a/gallery/scale/ceed-scale.c
+++ b/gallery/scale/ceed-scale.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include "ceed-scale.h"
+#include <ceed-jit-source/gallery/ceed-scale.h>
 
 /**
   @brief  Set fields for vector scaling QFunction that scales inputs
diff --git a/include/ceed-impl.h b/include/ceed-impl.h
index a1985e81b8..172353df58 100644
--- a/include/ceed-impl.h
+++ b/include/ceed-impl.h
@@ -89,6 +89,7 @@ struct Ceed_private {
   int obj_delegate_count;
   Ceed op_fallback_ceed, op_fallback_parent;
   const char *op_fallback_resource;
+  const char *jit_source_root;
   int (*Error)(Ceed, const char *, int, const char *, int, const char *,
                va_list *);
   int (*GetPreferredMemType)(CeedMemType *);
diff --git a/backends/cuda-ref/kernels/cuda-ref-basis-nontensor.h b/include/ceed-jit-source/cuda/cuda-ref-basis-nontensor.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-basis-nontensor.h
rename to include/ceed-jit-source/cuda/cuda-ref-basis-nontensor.h
diff --git a/backends/cuda-ref/kernels/cuda-ref-basis-tensor.h b/include/ceed-jit-source/cuda/cuda-ref-basis-tensor.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-basis-tensor.h
rename to include/ceed-jit-source/cuda/cuda-ref-basis-tensor.h
diff --git a/backends/cuda-ref/kernels/cuda-ref-qfunction.h b/include/ceed-jit-source/cuda/cuda-ref-qfunction.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-qfunction.h
rename to include/ceed-jit-source/cuda/cuda-ref-qfunction.h
diff --git a/backends/cuda-ref/kernels/cuda-ref-restriction.h b/include/ceed-jit-source/cuda/cuda-ref-restriction.h
similarity index 100%
rename from backends/cuda-ref/kernels/cuda-ref-restriction.h
rename to include/ceed-jit-source/cuda/cuda-ref-restriction.h
diff --git a/backends/cuda-shared/kernels/cuda-shared-basis.h b/include/ceed-jit-source/cuda/cuda-shared-basis.h
similarity index 100%
rename from backends/cuda-shared/kernels/cuda-shared-basis.h
rename to include/ceed-jit-source/cuda/cuda-shared-basis.h
diff --git a/gallery/identity/ceed-identity.h b/include/ceed-jit-source/gallery/ceed-identity.h
similarity index 100%
rename from gallery/identity/ceed-identity.h
rename to include/ceed-jit-source/gallery/ceed-identity.h
diff --git a/gallery/mass/ceed-mass1dbuild.h b/include/ceed-jit-source/gallery/ceed-mass1dbuild.h
similarity index 100%
rename from gallery/mass/ceed-mass1dbuild.h
rename to include/ceed-jit-source/gallery/ceed-mass1dbuild.h
diff --git a/gallery/mass/ceed-mass2dbuild.h b/include/ceed-jit-source/gallery/ceed-mass2dbuild.h
similarity index 100%
rename from gallery/mass/ceed-mass2dbuild.h
rename to include/ceed-jit-source/gallery/ceed-mass2dbuild.h
diff --git a/gallery/mass/ceed-mass3dbuild.h b/include/ceed-jit-source/gallery/ceed-mass3dbuild.h
similarity index 100%
rename from gallery/mass/ceed-mass3dbuild.h
rename to include/ceed-jit-source/gallery/ceed-mass3dbuild.h
diff --git a/gallery/mass/ceed-massapply.h b/include/ceed-jit-source/gallery/ceed-massapply.h
similarity index 100%
rename from gallery/mass/ceed-massapply.h
rename to include/ceed-jit-source/gallery/ceed-massapply.h
diff --git a/gallery/poisson/ceed-poisson1dapply.h b/include/ceed-jit-source/gallery/ceed-poisson1dapply.h
similarity index 100%
rename from gallery/poisson/ceed-poisson1dapply.h
rename to include/ceed-jit-source/gallery/ceed-poisson1dapply.h
diff --git a/gallery/poisson/ceed-poisson1dbuild.h b/include/ceed-jit-source/gallery/ceed-poisson1dbuild.h
similarity index 100%
rename from gallery/poisson/ceed-poisson1dbuild.h
rename to include/ceed-jit-source/gallery/ceed-poisson1dbuild.h
diff --git a/gallery/poisson/ceed-poisson2dapply.h b/include/ceed-jit-source/gallery/ceed-poisson2dapply.h
similarity index 100%
rename from gallery/poisson/ceed-poisson2dapply.h
rename to include/ceed-jit-source/gallery/ceed-poisson2dapply.h
diff --git a/gallery/poisson/ceed-poisson2dbuild.h b/include/ceed-jit-source/gallery/ceed-poisson2dbuild.h
similarity index 100%
rename from gallery/poisson/ceed-poisson2dbuild.h
rename to include/ceed-jit-source/gallery/ceed-poisson2dbuild.h
diff --git a/gallery/poisson/ceed-poisson3dapply.h b/include/ceed-jit-source/gallery/ceed-poisson3dapply.h
similarity index 100%
rename from gallery/poisson/ceed-poisson3dapply.h
rename to include/ceed-jit-source/gallery/ceed-poisson3dapply.h
diff --git a/gallery/poisson/ceed-poisson3dbuild.h b/include/ceed-jit-source/gallery/ceed-poisson3dbuild.h
similarity index 100%
rename from gallery/poisson/ceed-poisson3dbuild.h
rename to include/ceed-jit-source/gallery/ceed-poisson3dbuild.h
diff --git a/gallery/scale/ceed-scale.h b/include/ceed-jit-source/gallery/ceed-scale.h
similarity index 100%
rename from gallery/scale/ceed-scale.h
rename to include/ceed-jit-source/gallery/ceed-scale.h
diff --git a/gallery/mass-vector/ceed-vectormassapply.h b/include/ceed-jit-source/gallery/ceed-vectormassapply.h
similarity index 100%
rename from gallery/mass-vector/ceed-vectormassapply.h
rename to include/ceed-jit-source/gallery/ceed-vectormassapply.h
diff --git a/gallery/poisson-vector/ceed-vectorpoisson1dapply.h b/include/ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h
similarity index 100%
rename from gallery/poisson-vector/ceed-vectorpoisson1dapply.h
rename to include/ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h
diff --git a/gallery/poisson-vector/ceed-vectorpoisson2dapply.h b/include/ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h
similarity index 100%
rename from gallery/poisson-vector/ceed-vectorpoisson2dapply.h
rename to include/ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h
diff --git a/gallery/poisson-vector/ceed-vectorpoisson3dapply.h b/include/ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h
similarity index 100%
rename from gallery/poisson-vector/ceed-vectorpoisson3dapply.h
rename to include/ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h
diff --git a/backends/hip-ref/kernels/hip-ref-basis-nontensor.h b/include/ceed-jit-source/hip/hip-ref-basis-nontensor.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-basis-nontensor.h
rename to include/ceed-jit-source/hip/hip-ref-basis-nontensor.h
diff --git a/backends/hip-ref/kernels/hip-ref-basis-tensor.h b/include/ceed-jit-source/hip/hip-ref-basis-tensor.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-basis-tensor.h
rename to include/ceed-jit-source/hip/hip-ref-basis-tensor.h
diff --git a/backends/hip-ref/kernels/hip-ref-qfunction.h b/include/ceed-jit-source/hip/hip-ref-qfunction.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-qfunction.h
rename to include/ceed-jit-source/hip/hip-ref-qfunction.h
diff --git a/backends/hip-ref/kernels/hip-ref-restriction.h b/include/ceed-jit-source/hip/hip-ref-restriction.h
similarity index 100%
rename from backends/hip-ref/kernels/hip-ref-restriction.h
rename to include/ceed-jit-source/hip/hip-ref-restriction.h
diff --git a/backends/hip-shared/kernels/hip-shared-basis.h b/include/ceed-jit-source/hip/hip-shared-basis.h
similarity index 100%
rename from backends/hip-shared/kernels/hip-shared-basis.h
rename to include/ceed-jit-source/hip/hip-shared-basis.h
diff --git a/include/ceed/jit-tools.h b/include/ceed/jit-tools.h
index ffd19e6f5d..40d36502f3 100644
--- a/include/ceed/jit-tools.h
+++ b/include/ceed/jit-tools.h
@@ -20,8 +20,12 @@
 #include <ceed/ceed.h>
 
 CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer);
-
+CEED_EXTERN int CeedGetJitSourceRoot(Ceed ceed, const char **jit_source_root);
+CEED_EXTERN int CeedGetJitRelativePath(const char *absolute_file_path,
+                                       const char **relative_file_path);
 CEED_EXTERN int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
                                     const char *relative_file_path, char **new_file_path);
+CEED_EXTERN int CeedGetInstalledJitPath(Ceed ceed, const char *relative_file_path,
+                                        char **jit_file_path);
 
 #endif
diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index 17a110f849..a9f3a223a1 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -8,6 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <ceed/jit-tools.h>
+#include <ceed-impl.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
@@ -16,9 +17,9 @@
   @brief Load source file into initalized string buffer, including full text
            of local files in place of `#include "local.h"`
 
-  @param ceed                   A Ceed object for error handling
-  @param[in]  source_file_path  Absolute path to source file
-  @param[out] buffer            String buffer for source file contents
+  @param ceed                  A Ceed object for error handling
+  @param[in]  source_file_path Absolute path to source file
+  @param[out] buffer           String buffer for source file contents
 
   @return An error code: 0 - success, otherwise - failure
 
@@ -135,9 +136,9 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
            of local files in place of `#include "local.h"`.
          Note: Caller is responsible for freeing the string buffer with `CeedFree()`.
 
-  @param ceed                   A Ceed object for error handling
-  @param[in]  source_file_path  Absolute path to source file
-  @param[out] buffer            String buffer for source file contents
+  @param ceed                  A Ceed object for error handling
+  @param[in]  source_file_path Absolute path to source file
+  @param[out] buffer           String buffer for source file contents
 
   @return An error code: 0 - success, otherwise - failure
 
@@ -157,6 +158,47 @@ int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path,
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Get root of search path for installed files for JiT
+
+  @param ceed                 A Ceed object for error handling
+  @param[out] jit_source_root String for search path root
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedGetJitSourceRoot(Ceed ceed, const char **jit_source_root) {
+  CeedDebug256(ceed, 1, "JiT Source Root: ");
+  CeedDebug256(ceed, 255, "%s\n", ceed->jit_source_root);
+  *jit_source_root = ceed->jit_source_root;
+  return CEED_ERROR_SUCCESS;
+}
+
+/**
+  @brief Find the relative filepath to an installed JiT file
+
+  @param[in]  absolute_file_path Absolute path to installed JiT file
+  @param[out] relative_file_path Relative path to installed JiT file
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedGetJitRelativePath(const char *absolute_file_path,
+                           const char **relative_file_path) {
+  *(relative_file_path) = strstr(absolute_file_path, "ceed-jit-source");
+
+  if (!*relative_file_path)
+    // LCOV_EXCL_START
+    return CeedError(NULL, CEED_ERROR_MAJOR,
+                     "Couldn't find relative path including "
+                     "'ceed-jit-source' for: %s", absolute_file_path);
+  // LCOV_EXCL_STOP
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Build an absolute filepath from a base filepath and an absolute filepath.
            This helps construct source file paths for `CeedLoadSourceToBuffer()`.
@@ -185,3 +227,33 @@ int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
 
   return CEED_ERROR_SUCCESS;
 }
+
+/**
+  @brief Build an absolute filepath to an installed JiT file
+
+  @param ceed                     A Ceed object for error handling
+  @param[in]  relative_file_path  Relative path to installed JiT file
+  @param[out] new_file_path       String buffer for absolute path to target file
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedGetInstalledJitPath(Ceed ceed, const char *relative_file_path,
+                            char **jit_file_path) {
+  int ierr;
+  const char *jit_source_root;
+
+  ierr = CeedGetJitSourceRoot(ceed, &jit_source_root); CeedChk(ierr);
+
+  char *last_slash = strrchr(jit_source_root, '/');
+  size_t base_length = (last_slash - jit_source_root + 1),
+         relative_length = strlen(relative_file_path),
+         new_file_path_length = base_length + relative_length + 1;
+
+  ierr = CeedCalloc(new_file_path_length, jit_file_path); CeedChk(ierr);
+  memcpy(*jit_file_path, jit_source_root, base_length);
+  memcpy(&((*jit_file_path)[base_length]), relative_file_path, relative_length);
+
+  return CEED_ERROR_SUCCESS;
+}
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 8471d0a79f..fc66c825aa 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -67,6 +67,8 @@ static size_t num_qfunctions;
 int CeedQFunctionRegister(const char *name, const char *source,
                           CeedInt vec_length, CeedQFunctionUser f,
                           int (*init)(Ceed, const char *, CeedQFunction)) {
+  int ierr;
+
   if (num_qfunctions >= sizeof(gallery_qfunctions) / sizeof(
         gallery_qfunctions[0]))
     // LCOV_EXCL_START
@@ -75,9 +77,12 @@ int CeedQFunctionRegister(const char *name, const char *source,
 
   CeedDebugEnv("Gallery Register: %s", name);
 
+  const char *relative_file_path;
+  ierr = CeedGetJitRelativePath(source, &relative_file_path); CeedChk(ierr);
+
   strncpy(gallery_qfunctions[num_qfunctions].name, name, CEED_MAX_RESOURCE_LEN);
   gallery_qfunctions[num_qfunctions].name[CEED_MAX_RESOURCE_LEN-1] = 0;
-  strncpy(gallery_qfunctions[num_qfunctions].source, source,
+  strncpy(gallery_qfunctions[num_qfunctions].source, relative_file_path,
           CEED_MAX_RESOURCE_LEN);
   gallery_qfunctions[num_qfunctions].source[CEED_MAX_RESOURCE_LEN-1] = 0;
   gallery_qfunctions[num_qfunctions].vec_length = vec_length;
@@ -650,12 +655,18 @@ int CeedQFunctionCreateInteriorByName(Ceed ceed,  const char *name,
     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "No suitable gallery QFunction");
   // LCOV_EXCL_STOP
 
+  // Build source path
+  char *gallery_qfunction_source_path;
+
   // Create QFunction
+  ierr = CeedGetInstalledJitPath(ceed, gallery_qfunctions[match_index].source,
+                                 &gallery_qfunction_source_path); CeedChk(ierr);
   ierr = CeedQFunctionCreateInterior(ceed,
                                      gallery_qfunctions[match_index].vec_length,
                                      gallery_qfunctions[match_index].f,
-                                     gallery_qfunctions[match_index].source, qf);
+                                     gallery_qfunction_source_path, qf);
   CeedChk(ierr);
+  ierr = CeedFree(&gallery_qfunction_source_path); CeedChkBackend(ierr);
 
   // QFunction specific setup
   ierr = gallery_qfunctions[match_index].init(ceed, name, *qf); CeedChk(ierr);
diff --git a/interface/ceed.c b/interface/ceed.c
index 52961af6e9..f6e067aba4 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -807,6 +807,7 @@ int CeedInit(const char *resource, Ceed *ceed) {
 
   // Setup Ceed
   ierr = CeedCalloc(1, ceed); CeedChk(ierr);
+  (*ceed)->jit_source_root = CEED_JIT_SOURCE_ROOT;
   const char *ceed_error_handler = getenv("CEED_ERROR_HANDLER");
   if (!ceed_error_handler)
     ceed_error_handler = "abort";

From ec9662f473a541d91a40b6d595ea8775a4c1973d Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 1 Apr 2022 10:47:27 -0600
Subject: [PATCH 02/59] jit - include jit files in install data

---
 Makefile | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4c86f5daf0..3c6cbbc350 100644
--- a/Makefile
+++ b/Makefile
@@ -626,7 +626,9 @@ $(OBJDIR)/ceed.pc : pkgconfig-prefix = $(prefix)
 
 install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL) -d $(addprefix $(if $(DESTDIR),"$(DESTDIR)"),"$(includedir)"\
-	  "$(includedir)/ceed/" "$(libdir)" "$(pkgconfigdir)")
+	  "$(includedir)/ceed/" "$(includedir)/ceed-jit-source/"\
+	  "$(includedir)/ceed-jit-source/cuda/" "$(includedir)/ceed-jit-source/hip/"\
+	  "$(includedir)/ceed-jit-source/gallery/" "$(libdir)" "$(pkgconfigdir)")
 	$(INSTALL_DATA) include/ceed/ceed.h "$(DESTDIR)$(includedir)/ceed/"
 	$(INSTALL_DATA) include/ceed/ceed-f32.h "$(DESTDIR)$(includedir)/ceed/"
 	$(INSTALL_DATA) include/ceed/ceed-f64.h "$(DESTDIR)$(includedir)/ceed/"
@@ -640,6 +642,32 @@ install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL_DATA) $(OBJDIR)/ceed.pc "$(DESTDIR)$(pkgconfigdir)/"
 	$(INSTALL_DATA) include/ceed.h "$(DESTDIR)$(includedir)/"
 	$(INSTALL_DATA) include/ceedf.h "$(DESTDIR)$(includedir)/"
+	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-basis-nontensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
+	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-basis-tensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
+	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-qfunction.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
+	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-restriction.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
+	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-shared-basis.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
+	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-basis-nontensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
+	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-basis-tensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
+	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-qfunction.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
+	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-restriction.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
+	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-shared-basis.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-identity.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-mass1dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-mass2dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-mass3dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-massapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson1dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson1dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson2dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson2dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson3dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson3dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-scale.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectormassapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
 
 .PHONY : all cln clean doxygen doc lib install par print test tst prove prv prove-all junit examples style style-c style-py tidy iwyu info info-backends info-backends-all
 

From 5766aa57233d93ab7d3cb5b89af9d21f6061682c Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 1 Apr 2022 11:22:43 -0600
Subject: [PATCH 03/59] make - rebuild ceed.o on install

---
 Makefile                          | 15 ++++++++++++++-
 doc/sphinx/source/releasenotes.md |  4 ++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3c6cbbc350..be4c483f82 100644
--- a/Makefile
+++ b/Makefile
@@ -145,7 +145,7 @@ ifeq ($(COVERAGE), 1)
   CEED_LDFLAGS += --coverage
 endif
 
-CFLAGS += $(if $(ASAN),$(AFLAGS)) -DCEED_JIT_SOURCE_ROOT="\"$(abspath ./include)/\""
+CFLAGS += $(if $(ASAN),$(AFLAGS))
 FFLAGS += $(if $(ASAN),$(AFLAGS))
 CEED_LDFLAGS += $(if $(ASAN),$(AFLAGS))
 CPPFLAGS += -I./include
@@ -624,6 +624,19 @@ $(OBJDIR)/ceed.pc : pkgconfig-prefix = $(prefix)
 	    -e "s:%prefix%:$(pkgconfig-prefix):" \
 	    -e "s:%libs_private%:$(pkgconfig-libs-private):" $< > $@
 
+ifeq ($(filter install,$(MAKECMDGOALS)),install)
+  CPPFLAGS += $(if $(ASAN),$(AFLAGS)) -DCEED_JIT_SOURCE_ROOT="\"$(abspath $(DESTDIR)$(includedir))/\""
+else
+  CPPFLAGS += -DCEED_JIT_SOURCE_ROOT="\"$(abspath ./include)/\""
+endif
+
+$(OBJDIR)/interface/ceed.o: .FORCE
+.FORCE:
+
+ifeq ($(filter install,$(MAKECMDGOALS)),install)
+	$(MAKE) $(OBJDIR)/interface/ceed.o
+endif
+
 install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL) -d $(addprefix $(if $(DESTDIR),"$(DESTDIR)"),"$(includedir)"\
 	  "$(includedir)/ceed/" "$(includedir)/ceed-jit-source/"\
diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md
index ccfcda1470..ec2fa884dc 100644
--- a/doc/sphinx/source/releasenotes.md
+++ b/doc/sphinx/source/releasenotes.md
@@ -11,6 +11,10 @@ for each release of libCEED.
 
 - Added {c:func}`CeedQFunctionSetUserFlopsEstimate` and {c:func}`CeedOperatorGetFlopsEstimate` to facilitate estimating FLOPs in operator application.
 
+### Bugfix
+
+- Install JiT source files in install directory to fix GPU functionality for installed libCEED.
+
 (v0-10)=
 
 ## v0.10 (Mar 21, 2022)

From a0154adecfab8547cdc0febbbf40ac009dbe9d1d Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Sun, 3 Apr 2022 18:23:29 -0600
Subject: [PATCH 04/59] move include/ceed-jit-source to include/ceed/jit-source

---
 Makefile                                      | 35 ++++---------------
 backends/cuda-ref/ceed-cuda-ref-basis.c       |  4 +--
 .../cuda-ref/ceed-cuda-ref-qfunction-load.cpp |  2 +-
 backends/cuda-ref/ceed-cuda-restriction.c     |  2 +-
 backends/cuda-shared/ceed-cuda-shared-basis.c |  2 +-
 backends/hip-ref/ceed-hip-ref-basis.c         |  4 +--
 .../hip-ref/ceed-hip-ref-qfunction-load.cpp   |  2 +-
 backends/hip-ref/ceed-hip-ref-restriction.c   |  2 +-
 backends/hip-shared/ceed-hip-shared-basis.c   |  2 +-
 gallery/identity/ceed-identity.c              |  2 +-
 gallery/mass-vector/ceed-vectormassapply.c    |  2 +-
 gallery/mass/ceed-mass1dbuild.c               |  2 +-
 gallery/mass/ceed-mass2dbuild.c               |  2 +-
 gallery/mass/ceed-mass3dbuild.c               |  2 +-
 gallery/mass/ceed-massapply.c                 |  2 +-
 .../ceed-vectorpoisson1dapply.c               |  2 +-
 .../ceed-vectorpoisson2dapply.c               |  2 +-
 .../ceed-vectorpoisson3dapply.c               |  2 +-
 gallery/poisson/ceed-poisson1dapply.c         |  2 +-
 gallery/poisson/ceed-poisson1dbuild.c         |  2 +-
 gallery/poisson/ceed-poisson2dapply.c         |  2 +-
 gallery/poisson/ceed-poisson2dbuild.c         |  2 +-
 gallery/poisson/ceed-poisson3dapply.c         |  2 +-
 gallery/poisson/ceed-poisson3dbuild.c         |  2 +-
 gallery/scale/ceed-scale.c                    |  2 +-
 .../cuda/cuda-ref-basis-nontensor.h           |  0
 .../jit-source}/cuda/cuda-ref-basis-tensor.h  |  0
 .../jit-source}/cuda/cuda-ref-qfunction.h     |  0
 .../jit-source}/cuda/cuda-ref-restriction.h   |  0
 .../jit-source}/cuda/cuda-shared-basis.h      |  0
 .../jit-source}/gallery/ceed-identity.h       |  0
 .../jit-source}/gallery/ceed-mass1dbuild.h    |  0
 .../jit-source}/gallery/ceed-mass2dbuild.h    |  0
 .../jit-source}/gallery/ceed-mass3dbuild.h    |  0
 .../jit-source}/gallery/ceed-massapply.h      |  0
 .../jit-source}/gallery/ceed-poisson1dapply.h |  0
 .../jit-source}/gallery/ceed-poisson1dbuild.h |  0
 .../jit-source}/gallery/ceed-poisson2dapply.h |  0
 .../jit-source}/gallery/ceed-poisson2dbuild.h |  0
 .../jit-source}/gallery/ceed-poisson3dapply.h |  0
 .../jit-source}/gallery/ceed-poisson3dbuild.h |  0
 .../jit-source}/gallery/ceed-scale.h          |  0
 .../gallery/ceed-vectormassapply.h            |  0
 .../gallery/ceed-vectorpoisson1dapply.h       |  0
 .../gallery/ceed-vectorpoisson2dapply.h       |  0
 .../gallery/ceed-vectorpoisson3dapply.h       |  0
 .../jit-source}/hip/hip-ref-basis-nontensor.h |  0
 .../jit-source}/hip/hip-ref-basis-tensor.h    |  0
 .../jit-source}/hip/hip-ref-qfunction.h       |  0
 .../jit-source}/hip/hip-ref-restriction.h     |  0
 .../jit-source}/hip/hip-shared-basis.h        |  0
 interface/ceed-jit-tools.c                    |  5 +--
 52 files changed, 35 insertions(+), 57 deletions(-)
 rename include/{ceed-jit-source => ceed/jit-source}/cuda/cuda-ref-basis-nontensor.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/cuda/cuda-ref-basis-tensor.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/cuda/cuda-ref-qfunction.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/cuda/cuda-ref-restriction.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/cuda/cuda-shared-basis.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-identity.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-mass1dbuild.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-mass2dbuild.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-mass3dbuild.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-massapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-poisson1dapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-poisson1dbuild.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-poisson2dapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-poisson2dbuild.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-poisson3dapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-poisson3dbuild.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-scale.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-vectormassapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-vectorpoisson1dapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-vectorpoisson2dapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/gallery/ceed-vectorpoisson3dapply.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/hip/hip-ref-basis-nontensor.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/hip/hip-ref-basis-tensor.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/hip/hip-ref-qfunction.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/hip/hip-ref-restriction.h (100%)
 rename include/{ceed-jit-source => ceed/jit-source}/hip/hip-shared-basis.h (100%)

diff --git a/Makefile b/Makefile
index be4c483f82..209693e7dc 100644
--- a/Makefile
+++ b/Makefile
@@ -639,9 +639,9 @@ endif
 
 install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL) -d $(addprefix $(if $(DESTDIR),"$(DESTDIR)"),"$(includedir)"\
-	  "$(includedir)/ceed/" "$(includedir)/ceed-jit-source/"\
-	  "$(includedir)/ceed-jit-source/cuda/" "$(includedir)/ceed-jit-source/hip/"\
-	  "$(includedir)/ceed-jit-source/gallery/" "$(libdir)" "$(pkgconfigdir)")
+	  "$(includedir)/ceed/" "$(includedir)/ceed/jit-source/"\
+	  "$(includedir)/ceed/jit-source/cuda/" "$(includedir)/ceed/jit-source/hip/"\
+	  "$(includedir)/ceed/jit-source/gallery/" "$(libdir)" "$(pkgconfigdir)")
 	$(INSTALL_DATA) include/ceed/ceed.h "$(DESTDIR)$(includedir)/ceed/"
 	$(INSTALL_DATA) include/ceed/ceed-f32.h "$(DESTDIR)$(includedir)/ceed/"
 	$(INSTALL_DATA) include/ceed/ceed-f64.h "$(DESTDIR)$(includedir)/ceed/"
@@ -655,32 +655,9 @@ install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL_DATA) $(OBJDIR)/ceed.pc "$(DESTDIR)$(pkgconfigdir)/"
 	$(INSTALL_DATA) include/ceed.h "$(DESTDIR)$(includedir)/"
 	$(INSTALL_DATA) include/ceedf.h "$(DESTDIR)$(includedir)/"
-	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-basis-nontensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
-	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-basis-tensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
-	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-qfunction.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
-	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-ref-restriction.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
-	$(INSTALL_DATA) include/ceed-jit-source/cuda/cuda-shared-basis.h "$(DESTDIR)$(includedir)/ceed-jit-source/cuda/"
-	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-basis-nontensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
-	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-basis-tensor.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
-	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-qfunction.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
-	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-ref-restriction.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
-	$(INSTALL_DATA) include/ceed-jit-source/hip/hip-shared-basis.h "$(DESTDIR)$(includedir)/ceed-jit-source/hip/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-identity.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-mass1dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-mass2dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-mass3dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-massapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson1dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson1dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson2dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson2dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson3dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-poisson3dbuild.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-scale.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectormassapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
-	$(INSTALL_DATA) include/ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h "$(DESTDIR)$(includedir)/ceed-jit-source/gallery/"
+	$(INSTALL_DATA) $(wildcard include/ceed/jit-source/cuda/*.h) "$(DESTDIR)$(includedir)/ceed/jit-source/cuda/"
+	$(INSTALL_DATA) $(wildcard include/ceed/jit-source/hip/*.h) "$(DESTDIR)$(includedir)/ceed/jit-source/hip/"
+	$(INSTALL_DATA) $(wildcard include/ceed/jit-source/gallery/*.h) "$(DESTDIR)$(includedir)/ceed/jit-source/gallery/"
 
 .PHONY : all cln clean doxygen doc lib install par print test tst prove prv prove-all junit examples style style-c style-py tidy iwyu info info-backends info-backends-all
 
diff --git a/backends/cuda-ref/ceed-cuda-ref-basis.c b/backends/cuda-ref/ceed-cuda-ref-basis.c
index 71af75a7ad..2da1e2bc9b 100644
--- a/backends/cuda-ref/ceed-cuda-ref-basis.c
+++ b/backends/cuda-ref/ceed-cuda-ref-basis.c
@@ -267,7 +267,7 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/cuda/cuda-ref-basis-tensor.h",
+                                 "ceed/jit-source/cuda/cuda-ref-basis-tensor.h",
                                  &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
@@ -337,7 +337,7 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim,
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/cuda/cuda-ref-basis-nontensor.h",
+                                 "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h",
                                  &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
index ca28216a8a..09cd152c9d 100644
--- a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
+++ b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
@@ -45,7 +45,7 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed, "ceed-jit-source/cuda/cuda-ref-qfunction.h",
+  ierr = CeedGetInstalledJitPath(ceed, "ceed/jit-source/cuda/cuda-ref-qfunction.h",
                                  &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
diff --git a/backends/cuda-ref/ceed-cuda-restriction.c b/backends/cuda-ref/ceed-cuda-restriction.c
index a3cdc28e01..e5719df36d 100644
--- a/backends/cuda-ref/ceed-cuda-restriction.c
+++ b/backends/cuda-ref/ceed-cuda-restriction.c
@@ -342,7 +342,7 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType m_type, CeedCopyMode copy_mode,
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/cuda/cuda-ref-restriction.h",
+                                 "ceed/jit-source/cuda/cuda-ref-restriction.h",
                                  &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c
index bf924ebf6f..68f944481f 100644
--- a/backends/cuda-shared/ceed-cuda-shared-basis.c
+++ b/backends/cuda-shared/ceed-cuda-shared-basis.c
@@ -271,7 +271,7 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/cuda/cuda-shared-basis.h",
+                                 "ceed/jit-source/cuda/cuda-shared-basis.h",
                                  &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
diff --git a/backends/hip-ref/ceed-hip-ref-basis.c b/backends/hip-ref/ceed-hip-ref-basis.c
index f0fade6509..e69d3f459f 100644
--- a/backends/hip-ref/ceed-hip-ref-basis.c
+++ b/backends/hip-ref/ceed-hip-ref-basis.c
@@ -269,7 +269,7 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/hip/hip-ref-basis-tensor.h",
+                                 "ceed/jit-source/hip/hip-ref-basis-tensor.h",
                                  &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
@@ -338,7 +338,7 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes,
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/hip/hip-ref-basis-nontensor.h",
+                                 "ceed/jit-source/hip/hip-ref-basis-nontensor.h",
                                  &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
index 1faf40852e..af6c16e56b 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
+++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
@@ -47,7 +47,7 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed, "ceed-jit-source/hip/hip-ref-qfunction.h",
+  ierr = CeedGetInstalledJitPath(ceed, "ceed/jit-source/hip/hip-ref-qfunction.h",
                                  &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
diff --git a/backends/hip-ref/ceed-hip-ref-restriction.c b/backends/hip-ref/ceed-hip-ref-restriction.c
index 615ae0e0f4..8857249466 100644
--- a/backends/hip-ref/ceed-hip-ref-restriction.c
+++ b/backends/hip-ref/ceed-hip-ref-restriction.c
@@ -340,7 +340,7 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode,
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/hip/hip-ref-restriction.h",
+                                 "ceed/jit-source/hip/hip-ref-restriction.h",
                                  &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c
index 3ae5547ee8..f60f960cad 100644
--- a/backends/hip-shared/ceed-hip-shared-basis.c
+++ b/backends/hip-shared/ceed-hip-shared-basis.c
@@ -327,7 +327,7 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   // Compile basis kernels
   char *basis_kernel_path, *basis_kernel_source;
   ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed-jit-source/hip/hip-shared-basis.h",
+                                 "ceed/jit-source/hip/hip-shared-basis.h",
                                  &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
diff --git a/gallery/identity/ceed-identity.c b/gallery/identity/ceed-identity.c
index c407f92108..857a2ed808 100644
--- a/gallery/identity/ceed-identity.c
+++ b/gallery/identity/ceed-identity.c
@@ -9,7 +9,7 @@
 #include <ceed/backend.h>
 #include <stddef.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-identity.h>
+#include <ceed/jit-source/gallery/ceed-identity.h>
 
 /**
   @brief Set fields identity QFunction that copies inputs directly into outputs
diff --git a/gallery/mass-vector/ceed-vectormassapply.c b/gallery/mass-vector/ceed-vectormassapply.c
index 8fd9be381a..29d9e5e0a7 100644
--- a/gallery/mass-vector/ceed-vectormassapply.c
+++ b/gallery/mass-vector/ceed-vectormassapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-vectormassapply.h>
+#include <ceed/jit-source/gallery/ceed-vectormassapply.h>
 
 /**
   @brief Set fields for Ceed QFunction for applying the mass matrix
diff --git a/gallery/mass/ceed-mass1dbuild.c b/gallery/mass/ceed-mass1dbuild.c
index 11d58da24e..6506125a82 100644
--- a/gallery/mass/ceed-mass1dbuild.c
+++ b/gallery/mass/ceed-mass1dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-mass1dbuild.h>
+#include <ceed/jit-source/gallery/ceed-mass1dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 1D
diff --git a/gallery/mass/ceed-mass2dbuild.c b/gallery/mass/ceed-mass2dbuild.c
index a4cdc96123..ce3aeb00f2 100644
--- a/gallery/mass/ceed-mass2dbuild.c
+++ b/gallery/mass/ceed-mass2dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-mass2dbuild.h>
+#include <ceed/jit-source/gallery/ceed-mass2dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 2D
diff --git a/gallery/mass/ceed-mass3dbuild.c b/gallery/mass/ceed-mass3dbuild.c
index 7a66e850c6..ec8de0c671 100644
--- a/gallery/mass/ceed-mass3dbuild.c
+++ b/gallery/mass/ceed-mass3dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-mass3dbuild.h>
+#include <ceed/jit-source/gallery/ceed-mass3dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 3D
diff --git a/gallery/mass/ceed-massapply.c b/gallery/mass/ceed-massapply.c
index 79b41d3285..a110cfd52f 100644
--- a/gallery/mass/ceed-massapply.c
+++ b/gallery/mass/ceed-massapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-massapply.h>
+#include <ceed/jit-source/gallery/ceed-massapply.h>
 
 /**
   @brief Set fields for Ceed QFunction for applying the mass matrix
diff --git a/gallery/poisson-vector/ceed-vectorpoisson1dapply.c b/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
index 89be5ef18e..cd54a03446 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson1dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h>
+#include <ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 1D Poisson operator
diff --git a/gallery/poisson-vector/ceed-vectorpoisson2dapply.c b/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
index bb9e90aa93..66fc448ae5 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson2dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h>
+#include <ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 2D Poisson operator
diff --git a/gallery/poisson-vector/ceed-vectorpoisson3dapply.c b/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
index 413b9a1f1c..bf924d492b 100644
--- a/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
+++ b/gallery/poisson-vector/ceed-vectorpoisson3dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h>
+#include <ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 3D Poisson operator
diff --git a/gallery/poisson/ceed-poisson1dapply.c b/gallery/poisson/ceed-poisson1dapply.c
index 500f844552..c6e7d7cdfa 100644
--- a/gallery/poisson/ceed-poisson1dapply.c
+++ b/gallery/poisson/ceed-poisson1dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-poisson1dapply.h>
+#include <ceed/jit-source/gallery/ceed-poisson1dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 1D Poisson operator
diff --git a/gallery/poisson/ceed-poisson1dbuild.c b/gallery/poisson/ceed-poisson1dbuild.c
index 45e28ecddd..20d418aa68 100644
--- a/gallery/poisson/ceed-poisson1dbuild.c
+++ b/gallery/poisson/ceed-poisson1dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-poisson1dbuild.h>
+#include <ceed/jit-source/gallery/ceed-poisson1dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 1D
diff --git a/gallery/poisson/ceed-poisson2dapply.c b/gallery/poisson/ceed-poisson2dapply.c
index 9090da8e6c..e1f47b359d 100644
--- a/gallery/poisson/ceed-poisson2dapply.c
+++ b/gallery/poisson/ceed-poisson2dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-poisson2dapply.h>
+#include <ceed/jit-source/gallery/ceed-poisson2dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 2D Poisson operator
diff --git a/gallery/poisson/ceed-poisson2dbuild.c b/gallery/poisson/ceed-poisson2dbuild.c
index cb3d8f3076..f79896baca 100644
--- a/gallery/poisson/ceed-poisson2dbuild.c
+++ b/gallery/poisson/ceed-poisson2dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-poisson2dbuild.h>
+#include <ceed/jit-source/gallery/ceed-poisson2dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 2D
diff --git a/gallery/poisson/ceed-poisson3dapply.c b/gallery/poisson/ceed-poisson3dapply.c
index fba742982c..682c1ee3c3 100644
--- a/gallery/poisson/ceed-poisson3dapply.c
+++ b/gallery/poisson/ceed-poisson3dapply.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-poisson3dapply.h>
+#include <ceed/jit-source/gallery/ceed-poisson3dapply.h>
 
 /**
   @brief Set fields for Ceed QFunction applying the 3D Poisson operator
diff --git a/gallery/poisson/ceed-poisson3dbuild.c b/gallery/poisson/ceed-poisson3dbuild.c
index b86b9d9d2b..5bed48856a 100644
--- a/gallery/poisson/ceed-poisson3dbuild.c
+++ b/gallery/poisson/ceed-poisson3dbuild.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-poisson3dbuild.h>
+#include <ceed/jit-source/gallery/ceed-poisson3dbuild.h>
 
 /**
   @brief Set fields for Ceed QFunction building the geometric data for the 3D
diff --git a/gallery/scale/ceed-scale.c b/gallery/scale/ceed-scale.c
index be834536a1..14d24f9084 100644
--- a/gallery/scale/ceed-scale.c
+++ b/gallery/scale/ceed-scale.c
@@ -8,7 +8,7 @@
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
 #include <string.h>
-#include <ceed-jit-source/gallery/ceed-scale.h>
+#include <ceed/jit-source/gallery/ceed-scale.h>
 
 /**
   @brief  Set fields for vector scaling QFunction that scales inputs
diff --git a/include/ceed-jit-source/cuda/cuda-ref-basis-nontensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h
similarity index 100%
rename from include/ceed-jit-source/cuda/cuda-ref-basis-nontensor.h
rename to include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h
diff --git a/include/ceed-jit-source/cuda/cuda-ref-basis-tensor.h b/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h
similarity index 100%
rename from include/ceed-jit-source/cuda/cuda-ref-basis-tensor.h
rename to include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h
diff --git a/include/ceed-jit-source/cuda/cuda-ref-qfunction.h b/include/ceed/jit-source/cuda/cuda-ref-qfunction.h
similarity index 100%
rename from include/ceed-jit-source/cuda/cuda-ref-qfunction.h
rename to include/ceed/jit-source/cuda/cuda-ref-qfunction.h
diff --git a/include/ceed-jit-source/cuda/cuda-ref-restriction.h b/include/ceed/jit-source/cuda/cuda-ref-restriction.h
similarity index 100%
rename from include/ceed-jit-source/cuda/cuda-ref-restriction.h
rename to include/ceed/jit-source/cuda/cuda-ref-restriction.h
diff --git a/include/ceed-jit-source/cuda/cuda-shared-basis.h b/include/ceed/jit-source/cuda/cuda-shared-basis.h
similarity index 100%
rename from include/ceed-jit-source/cuda/cuda-shared-basis.h
rename to include/ceed/jit-source/cuda/cuda-shared-basis.h
diff --git a/include/ceed-jit-source/gallery/ceed-identity.h b/include/ceed/jit-source/gallery/ceed-identity.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-identity.h
rename to include/ceed/jit-source/gallery/ceed-identity.h
diff --git a/include/ceed-jit-source/gallery/ceed-mass1dbuild.h b/include/ceed/jit-source/gallery/ceed-mass1dbuild.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-mass1dbuild.h
rename to include/ceed/jit-source/gallery/ceed-mass1dbuild.h
diff --git a/include/ceed-jit-source/gallery/ceed-mass2dbuild.h b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-mass2dbuild.h
rename to include/ceed/jit-source/gallery/ceed-mass2dbuild.h
diff --git a/include/ceed-jit-source/gallery/ceed-mass3dbuild.h b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-mass3dbuild.h
rename to include/ceed/jit-source/gallery/ceed-mass3dbuild.h
diff --git a/include/ceed-jit-source/gallery/ceed-massapply.h b/include/ceed/jit-source/gallery/ceed-massapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-massapply.h
rename to include/ceed/jit-source/gallery/ceed-massapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-poisson1dapply.h b/include/ceed/jit-source/gallery/ceed-poisson1dapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-poisson1dapply.h
rename to include/ceed/jit-source/gallery/ceed-poisson1dapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-poisson1dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson1dbuild.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-poisson1dbuild.h
rename to include/ceed/jit-source/gallery/ceed-poisson1dbuild.h
diff --git a/include/ceed-jit-source/gallery/ceed-poisson2dapply.h b/include/ceed/jit-source/gallery/ceed-poisson2dapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-poisson2dapply.h
rename to include/ceed/jit-source/gallery/ceed-poisson2dapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-poisson2dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-poisson2dbuild.h
rename to include/ceed/jit-source/gallery/ceed-poisson2dbuild.h
diff --git a/include/ceed-jit-source/gallery/ceed-poisson3dapply.h b/include/ceed/jit-source/gallery/ceed-poisson3dapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-poisson3dapply.h
rename to include/ceed/jit-source/gallery/ceed-poisson3dapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-poisson3dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-poisson3dbuild.h
rename to include/ceed/jit-source/gallery/ceed-poisson3dbuild.h
diff --git a/include/ceed-jit-source/gallery/ceed-scale.h b/include/ceed/jit-source/gallery/ceed-scale.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-scale.h
rename to include/ceed/jit-source/gallery/ceed-scale.h
diff --git a/include/ceed-jit-source/gallery/ceed-vectormassapply.h b/include/ceed/jit-source/gallery/ceed-vectormassapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-vectormassapply.h
rename to include/ceed/jit-source/gallery/ceed-vectormassapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-vectorpoisson1dapply.h
rename to include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-vectorpoisson2dapply.h
rename to include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h
diff --git a/include/ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h
similarity index 100%
rename from include/ceed-jit-source/gallery/ceed-vectorpoisson3dapply.h
rename to include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h
diff --git a/include/ceed-jit-source/hip/hip-ref-basis-nontensor.h b/include/ceed/jit-source/hip/hip-ref-basis-nontensor.h
similarity index 100%
rename from include/ceed-jit-source/hip/hip-ref-basis-nontensor.h
rename to include/ceed/jit-source/hip/hip-ref-basis-nontensor.h
diff --git a/include/ceed-jit-source/hip/hip-ref-basis-tensor.h b/include/ceed/jit-source/hip/hip-ref-basis-tensor.h
similarity index 100%
rename from include/ceed-jit-source/hip/hip-ref-basis-tensor.h
rename to include/ceed/jit-source/hip/hip-ref-basis-tensor.h
diff --git a/include/ceed-jit-source/hip/hip-ref-qfunction.h b/include/ceed/jit-source/hip/hip-ref-qfunction.h
similarity index 100%
rename from include/ceed-jit-source/hip/hip-ref-qfunction.h
rename to include/ceed/jit-source/hip/hip-ref-qfunction.h
diff --git a/include/ceed-jit-source/hip/hip-ref-restriction.h b/include/ceed/jit-source/hip/hip-ref-restriction.h
similarity index 100%
rename from include/ceed-jit-source/hip/hip-ref-restriction.h
rename to include/ceed/jit-source/hip/hip-ref-restriction.h
diff --git a/include/ceed-jit-source/hip/hip-shared-basis.h b/include/ceed/jit-source/hip/hip-shared-basis.h
similarity index 100%
rename from include/ceed-jit-source/hip/hip-shared-basis.h
rename to include/ceed/jit-source/hip/hip-shared-basis.h
diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index a9f3a223a1..f71ddf2b54 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -102,6 +102,7 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
         strncpy(&include_source_path[root_length + include_file_name_len + 1], "", 1);
         // ---- Recursive call to load source to buffer
         ierr = CeedLoadSourceToInitalizedBuffer(ceed, include_source_path, buffer);
+        CeedDebug256(ceed, 2, "JiT Including: %s\n", include_source_path);
         CeedChk(ierr);
         ierr = CeedFree(&include_source_path); CeedChk(ierr);
       }
@@ -187,13 +188,13 @@ int CeedGetJitSourceRoot(Ceed ceed, const char **jit_source_root) {
 **/
 int CeedGetJitRelativePath(const char *absolute_file_path,
                            const char **relative_file_path) {
-  *(relative_file_path) = strstr(absolute_file_path, "ceed-jit-source");
+  *(relative_file_path) = strstr(absolute_file_path, "ceed/jit-source");
 
   if (!*relative_file_path)
     // LCOV_EXCL_START
     return CeedError(NULL, CEED_ERROR_MAJOR,
                      "Couldn't find relative path including "
-                     "'ceed-jit-source' for: %s", absolute_file_path);
+                     "'ceed/jit-source' for: %s", absolute_file_path);
   // LCOV_EXCL_STOP
 
   return CEED_ERROR_SUCCESS;

From 032e71eaa1e750e7a66514b4e3c9cd7b57405aab Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Mon, 4 Apr 2022 11:37:34 -0600
Subject: [PATCH 05/59] jit - search array of jit source roots

---
 Makefile                                 |   8 +-
 include/ceed-impl.h                      |   5 +-
 include/ceed/jit-tools.h                 |   9 +-
 interface/ceed-jit-source-root-default.c |  12 +++
 interface/ceed-jit-tools.c               | 118 +++++++++++++----------
 interface/ceed-qfunction.c               |   4 +-
 interface/ceed.c                         |   9 +-
 7 files changed, 103 insertions(+), 62 deletions(-)
 create mode 100644 interface/ceed-jit-source-root-default.c

diff --git a/Makefile b/Makefile
index 209693e7dc..f0da690d00 100644
--- a/Makefile
+++ b/Makefile
@@ -625,16 +625,16 @@ $(OBJDIR)/ceed.pc : pkgconfig-prefix = $(prefix)
 	    -e "s:%libs_private%:$(pkgconfig-libs-private):" $< > $@
 
 ifeq ($(filter install,$(MAKECMDGOALS)),install)
-  CPPFLAGS += $(if $(ASAN),$(AFLAGS)) -DCEED_JIT_SOURCE_ROOT="\"$(abspath $(DESTDIR)$(includedir))/\""
+  CPPFLAGS += $(if $(ASAN),$(AFLAGS)) -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath $(DESTDIR)$(includedir))/\""
 else
-  CPPFLAGS += -DCEED_JIT_SOURCE_ROOT="\"$(abspath ./include)/\""
+  CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
 endif
 
-$(OBJDIR)/interface/ceed.o: .FORCE
+$(OBJDIR)/interface/ceed-jit-source-root-default.o: .FORCE
 .FORCE:
 
 ifeq ($(filter install,$(MAKECMDGOALS)),install)
-	$(MAKE) $(OBJDIR)/interface/ceed.o
+	$(MAKE) $(OBJDIR)/interface/ceed-jit-source-root-default.o
 endif
 
 install : $(libceed) $(OBJDIR)/ceed.pc
diff --git a/include/ceed-impl.h b/include/ceed-impl.h
index 172353df58..07d1169125 100644
--- a/include/ceed-impl.h
+++ b/include/ceed-impl.h
@@ -14,6 +14,8 @@
 #include <ceed/backend.h>
 #include <stdbool.h>
 
+CEED_INTERN const char CeedJitSourceRootDefault[];
+
 /** @defgroup CeedUser Public API for Ceed
     @ingroup Ceed
 */
@@ -89,7 +91,8 @@ struct Ceed_private {
   int obj_delegate_count;
   Ceed op_fallback_ceed, op_fallback_parent;
   const char *op_fallback_resource;
-  const char *jit_source_root;
+  const char **jit_source_roots;
+  CeedInt num_jit_source_roots;
   int (*Error)(Ceed, const char *, int, const char *, int, const char *,
                va_list *);
   int (*GetPreferredMemType)(CeedMemType *);
diff --git a/include/ceed/jit-tools.h b/include/ceed/jit-tools.h
index 40d36502f3..7445760578 100644
--- a/include/ceed/jit-tools.h
+++ b/include/ceed/jit-tools.h
@@ -20,12 +20,11 @@
 #include <ceed/ceed.h>
 
 CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer);
-CEED_EXTERN int CeedGetJitSourceRoot(Ceed ceed, const char **jit_source_root);
-CEED_EXTERN int CeedGetJitRelativePath(const char *absolute_file_path,
-                                       const char **relative_file_path);
 CEED_EXTERN int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
                                     const char *relative_file_path, char **new_file_path);
-CEED_EXTERN int CeedGetInstalledJitPath(Ceed ceed, const char *relative_file_path,
-                                        char **jit_file_path);
+CEED_EXTERN int CeedGetJitRelativePath(const char *absolute_file_path,
+                                       const char **relative_file_path);
+CEED_EXTERN int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
+                                       char **absolute_file_path);
 
 #endif
diff --git a/interface/ceed-jit-source-root-default.c b/interface/ceed-jit-source-root-default.c
new file mode 100644
index 0000000000..03edb3afd0
--- /dev/null
+++ b/interface/ceed-jit-source-root-default.c
@@ -0,0 +1,12 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed.h>
+#include <ceed/backend.h>
+#include <ceed-impl.h>
+
+const char CeedJitSourceRootDefault[] = CEED_JIT_SOUCE_ROOT_DEFAULT;
diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index f71ddf2b54..13c5047d6a 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -160,19 +160,31 @@ int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path,
 }
 
 /**
-  @brief Get root of search path for installed files for JiT
+  @brief Build an absolute filepath from a base filepath and an absolute filepath.
+           This helps construct source file paths for `CeedLoadSourceToBuffer()`.
+         Note: Caller is responsible for freeing the string buffer with `CeedFree()`.
 
-  @param ceed                 A Ceed object for error handling
-  @param[out] jit_source_root String for search path root
+  @param ceed                     A Ceed object for error handling
+  @param[in]  base_file_path      Absolute path to current file
+  @param[in]  relative_file_path  Relative path to target file
+  @param[out] new_file_path       String buffer for absolute path to target file
 
   @return An error code: 0 - success, otherwise - failure
 
   @ref Backend
 **/
-int CeedGetJitSourceRoot(Ceed ceed, const char **jit_source_root) {
-  CeedDebug256(ceed, 1, "JiT Source Root: ");
-  CeedDebug256(ceed, 255, "%s\n", ceed->jit_source_root);
-  *jit_source_root = ceed->jit_source_root;
+int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
+                        const char *relative_file_path, char **new_file_path) {
+  int ierr;
+  char *last_slash = strrchr(base_file_path, '/');
+  size_t base_length = (last_slash - base_file_path + 1),
+         relative_length = strlen(relative_file_path),
+         new_file_path_length = base_length + relative_length + 1;
+
+  ierr = CeedCalloc(new_file_path_length, new_file_path); CeedChk(ierr);
+  memcpy(*new_file_path, base_file_path, base_length);
+  memcpy(&((*new_file_path)[base_length]), relative_file_path, relative_length);
+
   return CEED_ERROR_SUCCESS;
 }
 
@@ -201,60 +213,68 @@ int CeedGetJitRelativePath(const char *absolute_file_path,
 }
 
 /**
-  @brief Build an absolute filepath from a base filepath and an absolute filepath.
-           This helps construct source file paths for `CeedLoadSourceToBuffer()`.
-         Note: Caller is responsible for freeing the string buffer with `CeedFree()`.
+  @brief Build an absolute filepath to a JiT file
 
-  @param ceed                     A Ceed object for error handling
-  @param[in]  base_file_path      Absolute path to current file
-  @param[in]  relative_file_path  Relative path to target file
-  @param[out] new_file_path       String buffer for absolute path to target file
+  @param ceed                    A Ceed object for error handling
+  @param[in]  relative_file_path Relative path to installed JiT file
+  @param[out] absolute_file_path String buffer for absolute path to target file
 
   @return An error code: 0 - success, otherwise - failure
 
   @ref Backend
 **/
-int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
-                        const char *relative_file_path, char **new_file_path) {
+int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
+                           char **absolute_file_path) {
   int ierr;
-  char *last_slash = strrchr(base_file_path, '/');
-  size_t base_length = (last_slash - base_file_path + 1),
-         relative_length = strlen(relative_file_path),
-         new_file_path_length = base_length + relative_length + 1;
-
-  ierr = CeedCalloc(new_file_path_length, new_file_path); CeedChk(ierr);
-  memcpy(*new_file_path, base_file_path, base_length);
-  memcpy(&((*new_file_path)[base_length]), relative_file_path, relative_length);
 
-  return CEED_ERROR_SUCCESS;
-}
-
-/**
-  @brief Build an absolute filepath to an installed JiT file
-
-  @param ceed                     A Ceed object for error handling
-  @param[in]  relative_file_path  Relative path to installed JiT file
-  @param[out] new_file_path       String buffer for absolute path to target file
+  // Debug
+  CeedDebug256(ceed, 1, "---------- Ceed JiT ----------\n");
+  CeedDebug256(ceed, 1, "Relative JiT source file: ");
+  CeedDebug256(ceed, 255, "%s\n", relative_file_path);
 
-  @return An error code: 0 - success, otherwise - failure
 
-  @ref Backend
-**/
-int CeedGetInstalledJitPath(Ceed ceed, const char *relative_file_path,
-                            char **jit_file_path) {
-  int ierr;
-  const char *jit_source_root;
+  for (CeedInt i = 0; i < ceed->num_jit_source_roots; i++) {
+    // Debug
+    CeedDebug256(ceed, 1, "Checking JiT root: ");
+    CeedDebug256(ceed, 255, "%s\n", ceed->jit_source_roots[i]);
 
-  ierr = CeedGetJitSourceRoot(ceed, &jit_source_root); CeedChk(ierr);
+    // Build absolute path with current root
+    ierr = CeedPathConcatenate(ceed, ceed->jit_source_roots[i],
+                               relative_file_path, absolute_file_path);
+    CeedChk(ierr);
 
-  char *last_slash = strrchr(jit_source_root, '/');
-  size_t base_length = (last_slash - jit_source_root + 1),
-         relative_length = strlen(relative_file_path),
-         new_file_path_length = base_length + relative_length + 1;
+    // Temporarily mask function name if included
+    char *last_colon = strrchr(*absolute_file_path, ':');
+    if (last_colon) {
+      *last_colon = '\0';
+    }
 
-  ierr = CeedCalloc(new_file_path_length, jit_file_path); CeedChk(ierr);
-  memcpy(*jit_file_path, jit_source_root, base_length);
-  memcpy(&((*jit_file_path)[base_length]), relative_file_path, relative_length);
+    // Debug
+    CeedDebug256(ceed, 1, "Checking for source file: ");
+    CeedDebug256(ceed, 255, "%s\n", *absolute_file_path);
+
+    // Check for valid file path
+    FILE *source_file;
+    source_file = fopen((const char *)*absolute_file_path, "rb");
+    if (source_file) {
+      // Debug
+      CeedDebug256(ceed, 1, "Found JiT source file: ");
+      CeedDebug256(ceed, 255, "%s\n", *absolute_file_path);
+
+      // Restore function name if included
+      if (last_colon) {
+        *last_colon = ':';
+      }
+      fclose(source_file);
+      return CEED_ERROR_SUCCESS;
+    } else {
+      ierr = CeedFree(absolute_file_path); CeedChk(ierr);
+    }
+  }
 
-  return CEED_ERROR_SUCCESS;
+  // LCOV_EXCL_START
+  return CeedError(ceed, CEED_ERROR_MAJOR,
+                   "Couldn't find matching JiT source file: %s",
+                   relative_file_path);
+  // LCOV_EXCL_STOP
 }
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index fc66c825aa..27ee39c2a9 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -659,8 +659,8 @@ int CeedQFunctionCreateInteriorByName(Ceed ceed,  const char *name,
   char *gallery_qfunction_source_path;
 
   // Create QFunction
-  ierr = CeedGetInstalledJitPath(ceed, gallery_qfunctions[match_index].source,
-                                 &gallery_qfunction_source_path); CeedChk(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed, gallery_qfunctions[match_index].source,
+                                &gallery_qfunction_source_path); CeedChk(ierr);
   ierr = CeedQFunctionCreateInterior(ceed,
                                      gallery_qfunctions[match_index].vec_length,
                                      gallery_qfunctions[match_index].f,
diff --git a/interface/ceed.c b/interface/ceed.c
index f6e067aba4..16668c882e 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -807,7 +807,9 @@ int CeedInit(const char *resource, Ceed *ceed) {
 
   // Setup Ceed
   ierr = CeedCalloc(1, ceed); CeedChk(ierr);
-  (*ceed)->jit_source_root = CEED_JIT_SOURCE_ROOT;
+  ierr = CeedCalloc(1, &(*ceed)->jit_source_roots); CeedChk(ierr);
+  (*ceed)->jit_source_roots[0] = CeedJitSourceRootDefault;
+  (*ceed)->num_jit_source_roots = 1;
   const char *ceed_error_handler = getenv("CEED_ERROR_HANDLER");
   if (!ceed_error_handler)
     ceed_error_handler = "abort";
@@ -1048,6 +1050,11 @@ int CeedDestroy(Ceed *ceed) {
     ierr = (*ceed)->Destroy(*ceed); CeedChk(ierr);
   }
 
+  for (int i=1; i < (*ceed)->num_jit_source_roots; i++) {
+    ierr = CeedFree(&(*ceed)->jit_source_roots[i]); CeedChk(ierr);
+  }
+  ierr = CeedFree(&(*ceed)->jit_source_roots); CeedChk(ierr);
+
   ierr = CeedFree(&(*ceed)->f_offsets); CeedChk(ierr);
   ierr = CeedFree(&(*ceed)->resource); CeedChk(ierr);
   ierr = CeedDestroy(&(*ceed)->op_fallback_ceed); CeedChk(ierr);

From ee5a26f213b810c411fd8b16edb331d78b70003c Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Mon, 4 Apr 2022 11:53:51 -0600
Subject: [PATCH 06/59] jit - add interface for adding additional jit source
 dirs

---
 backends/cuda-ref/ceed-cuda-ref-basis.c       | 12 +--
 .../cuda-ref/ceed-cuda-ref-qfunction-load.cpp |  4 +-
 backends/cuda-ref/ceed-cuda-restriction.c     |  6 +-
 backends/cuda-shared/ceed-cuda-shared-basis.c |  6 +-
 backends/hip-ref/ceed-hip-ref-basis.c         | 12 +--
 .../hip-ref/ceed-hip-ref-qfunction-load.cpp   |  4 +-
 backends/hip-ref/ceed-hip-ref-restriction.c   |  6 +-
 backends/hip-shared/ceed-hip-shared-basis.c   |  6 +-
 include/ceed-impl.h                           |  2 +-
 include/ceed/ceed.h                           |  1 +
 include/ceed/jit-tools.h                      |  1 +
 interface/ceed-jit-tools.c                    | 86 +++++++++++++------
 interface/ceed-qfunction.c                    | 28 +++---
 interface/ceed.c                              | 35 +++++++-
 14 files changed, 140 insertions(+), 69 deletions(-)

diff --git a/backends/cuda-ref/ceed-cuda-ref-basis.c b/backends/cuda-ref/ceed-cuda-ref-basis.c
index 2da1e2bc9b..d9343a7839 100644
--- a/backends/cuda-ref/ceed-cuda-ref-basis.c
+++ b/backends/cuda-ref/ceed-cuda-ref-basis.c
@@ -266,9 +266,9 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/cuda/cuda-ref-basis-tensor.h",
-                                 &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-basis-tensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
@@ -336,9 +336,9 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h",
-                                 &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
index 09cd152c9d..aee2038a34 100644
--- a/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
+++ b/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
@@ -45,8 +45,8 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed, "ceed/jit-source/cuda/cuda-ref-qfunction.h",
-                                 &read_write_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-qfunction.h",
+                                &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/cuda-ref/ceed-cuda-restriction.c b/backends/cuda-ref/ceed-cuda-restriction.c
index e5719df36d..57b92e36c9 100644
--- a/backends/cuda-ref/ceed-cuda-restriction.c
+++ b/backends/cuda-ref/ceed-cuda-restriction.c
@@ -341,9 +341,9 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType m_type, CeedCopyMode copy_mode,
   // Compile CUDA kernels
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/cuda/cuda-ref-restriction.h",
-                                 &restriction_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-restriction.h",
+                                &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
                                 &restriction_kernel_source);
diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c
index 68f944481f..dd20b8fa4c 100644
--- a/backends/cuda-shared/ceed-cuda-shared-basis.c
+++ b/backends/cuda-shared/ceed-cuda-shared-basis.c
@@ -270,9 +270,9 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt num_comp;
   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/cuda/cuda-shared-basis.h",
-                                 &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-shared-basis.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-basis.c b/backends/hip-ref/ceed-hip-ref-basis.c
index e69d3f459f..88184db967 100644
--- a/backends/hip-ref/ceed-hip-ref-basis.c
+++ b/backends/hip-ref/ceed-hip-ref-basis.c
@@ -268,9 +268,9 @@ int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
   CeedInt ncomp;
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/hip/hip-ref-basis-tensor.h",
-                                 &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-basis-tensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
@@ -337,9 +337,9 @@ int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes,
   CeedInt ncomp;
   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/hip/hip-ref-basis-nontensor.h",
-                                 &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-basis-nontensor.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
index af6c16e56b..dddb6ae992 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
+++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
@@ -47,8 +47,8 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
 
   // Build strings for final kernel
   char *read_write_kernel_path, *read_write_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed, "ceed/jit-source/hip/hip-ref-qfunction.h",
-                                 &read_write_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-qfunction.h",
+                                &read_write_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
   CeedChkBackend(ierr);
diff --git a/backends/hip-ref/ceed-hip-ref-restriction.c b/backends/hip-ref/ceed-hip-ref-restriction.c
index 8857249466..139af010c6 100644
--- a/backends/hip-ref/ceed-hip-ref-restriction.c
+++ b/backends/hip-ref/ceed-hip-ref-restriction.c
@@ -339,9 +339,9 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode,
   // Compile HIP kernels
   CeedInt num_nodes = impl->num_nodes;
   char *restriction_kernel_path, *restriction_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/hip/hip-ref-restriction.h",
-                                 &restriction_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-restriction.h",
+                                &restriction_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, restriction_kernel_path,
                                 &restriction_kernel_source);
diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c
index f60f960cad..2cd202d0cb 100644
--- a/backends/hip-shared/ceed-hip-shared-basis.c
+++ b/backends/hip-shared/ceed-hip-shared-basis.c
@@ -326,9 +326,9 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
 
   // Compile basis kernels
   char *basis_kernel_path, *basis_kernel_source;
-  ierr = CeedGetInstalledJitPath(ceed,
-                                 "ceed/jit-source/hip/hip-shared-basis.h",
-                                 &basis_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-shared-basis.h",
+                                &basis_kernel_path); CeedChkBackend(ierr);
   CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n");
   ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source);
   CeedChkBackend(ierr);
diff --git a/include/ceed-impl.h b/include/ceed-impl.h
index 07d1169125..c02d5219a8 100644
--- a/include/ceed-impl.h
+++ b/include/ceed-impl.h
@@ -91,7 +91,7 @@ struct Ceed_private {
   int obj_delegate_count;
   Ceed op_fallback_ceed, op_fallback_parent;
   const char *op_fallback_resource;
-  const char **jit_source_roots;
+  char **jit_source_roots;
   CeedInt num_jit_source_roots;
   int (*Error)(Ceed, const char *, int, const char *, int, const char *,
                va_list *);
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index 2f827c81d4..5c3c85f06d 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -197,6 +197,7 @@ CEED_EXTERN int CeedInit(const char *resource, Ceed *ceed);
 CEED_EXTERN int CeedReferenceCopy(Ceed ceed, Ceed *ceed_copy);
 CEED_EXTERN int CeedGetResource(Ceed ceed, const char **resource);
 CEED_EXTERN int CeedIsDeterministic(Ceed ceed, bool *is_deterministic);
+CEED_EXTERN int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root);
 CEED_EXTERN int CeedView(Ceed ceed, FILE *stream);
 CEED_EXTERN int CeedDestroy(Ceed *ceed);
 
diff --git a/include/ceed/jit-tools.h b/include/ceed/jit-tools.h
index 7445760578..90d6a7712a 100644
--- a/include/ceed/jit-tools.h
+++ b/include/ceed/jit-tools.h
@@ -19,6 +19,7 @@
 
 #include <ceed/ceed.h>
 
+CEED_EXTERN int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid);
 CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer);
 CEED_EXTERN int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
                                     const char *relative_file_path, char **new_file_path);
diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index 13c5047d6a..48f9158c95 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -13,6 +13,59 @@
 #include <stdio.h>
 #include <string.h>
 
+/**
+  @brief Check if valid file exists at path given
+
+  @param ceed                  A Ceed object for error handling
+  @param[in]  source_file_path Absolute path to source file
+  @param[out] is_valid         Boolean flag indicating if file can be opend
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) {
+  int ierr;
+
+  // Sometimes we have path/to/file.h:function_name
+  // Create tempory file path without name, if needed
+  char *source_file_path_only;
+  char *last_colon = strrchr(source_file_path, ':');
+  if (last_colon) {
+    size_t source_file_path_length = (last_colon - source_file_path + 1);
+
+    ierr = CeedCalloc(source_file_path_length, &source_file_path_only);
+    CeedChk(ierr);
+    strncpy(source_file_path_only, source_file_path, source_file_path_length - 1);
+  } else {
+    source_file_path_only = (char *)source_file_path;
+  }
+
+  // Debug
+  CeedDebug256(ceed, 1, "Checking for source file: ");
+  CeedDebug256(ceed, 255, "%s\n", source_file_path_only);
+
+  // Check for valid file path
+  FILE *source_file;
+  source_file = fopen(source_file_path_only, "rb");
+  *is_valid = !!source_file;
+
+  if (*is_valid) {
+    // Debug
+    CeedDebug256(ceed, 1, "Found JiT source file: ");
+    CeedDebug256(ceed, 255, "%s\n", source_file_path_only);
+
+    fclose(source_file);
+  }
+
+  // Free temp file path, if used
+  if (last_colon) {
+    ierr = CeedFree(&source_file_path_only); CeedChk(ierr);
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Load source file into initalized string buffer, including full text
            of local files in place of `#include "local.h"`
@@ -182,8 +235,8 @@ int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
          new_file_path_length = base_length + relative_length + 1;
 
   ierr = CeedCalloc(new_file_path_length, new_file_path); CeedChk(ierr);
-  memcpy(*new_file_path, base_file_path, base_length);
-  memcpy(&((*new_file_path)[base_length]), relative_file_path, relative_length);
+  strncpy(*new_file_path, base_file_path, base_length);
+  strncpy(&((*new_file_path)[base_length]), relative_file_path, relative_length);
 
   return CEED_ERROR_SUCCESS;
 }
@@ -234,38 +287,19 @@ int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
 
 
   for (CeedInt i = 0; i < ceed->num_jit_source_roots; i++) {
+    bool is_valid;
+
     // Debug
     CeedDebug256(ceed, 1, "Checking JiT root: ");
     CeedDebug256(ceed, 255, "%s\n", ceed->jit_source_roots[i]);
 
-    // Build absolute path with current root
+    // Build  and check absolute path with current root
     ierr = CeedPathConcatenate(ceed, ceed->jit_source_roots[i],
                                relative_file_path, absolute_file_path);
     CeedChk(ierr);
+    ierr = CeedCheckFilePath(ceed, *absolute_file_path, &is_valid); CeedChk(ierr);
 
-    // Temporarily mask function name if included
-    char *last_colon = strrchr(*absolute_file_path, ':');
-    if (last_colon) {
-      *last_colon = '\0';
-    }
-
-    // Debug
-    CeedDebug256(ceed, 1, "Checking for source file: ");
-    CeedDebug256(ceed, 255, "%s\n", *absolute_file_path);
-
-    // Check for valid file path
-    FILE *source_file;
-    source_file = fopen((const char *)*absolute_file_path, "rb");
-    if (source_file) {
-      // Debug
-      CeedDebug256(ceed, 1, "Found JiT source file: ");
-      CeedDebug256(ceed, 255, "%s\n", *absolute_file_path);
-
-      // Restore function name if included
-      if (last_colon) {
-        *last_colon = ':';
-      }
-      fclose(source_file);
+    if (is_valid) {
       return CEED_ERROR_SUCCESS;
     } else {
       ierr = CeedFree(absolute_file_path); CeedChk(ierr);
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 27ee39c2a9..6aba938198 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -603,16 +603,30 @@ int CeedQFunctionCreateInterior(Ceed ceed, CeedInt vec_length,
   (*qf)->function = f;
   (*qf)->user_flop_estimate = -1;
   if (strlen(source)) {
-    const char *kernel_name = strrchr(source, ':') + 1;
+    bool is_absolute_path;
+    char *absolute_path;
+
+    ierr = CeedCheckFilePath(ceed, source, &is_absolute_path); CeedChk(ierr);
+    if (is_absolute_path) {
+      absolute_path = (char *)source;
+    } else {
+      ierr = CeedGetJitAbsolutePath(ceed, source, &absolute_path); CeedChk(ierr);
+    }
+
+    const char *kernel_name = strrchr(absolute_path, ':') + 1;
     size_t kernel_name_len = strlen(kernel_name);
     ierr = CeedCalloc(kernel_name_len + 1, &kernel_name_copy); CeedChk(ierr);
     strncpy(kernel_name_copy, kernel_name, kernel_name_len);
     (*qf)->kernel_name = kernel_name_copy;
 
-    size_t source_len = strlen(source) - kernel_name_len - 1;
+    size_t source_len = strlen(absolute_path) - kernel_name_len - 1;
     ierr = CeedCalloc(source_len + 1, &source_copy); CeedChk(ierr);
-    strncpy(source_copy, source, source_len);
+    strncpy(source_copy, absolute_path, source_len);
     (*qf)->source_path = source_copy;
+
+    if (!is_absolute_path) {
+      ierr = CeedFree(&absolute_path); CeedChk(ierr);
+    }
   }
   ierr = CeedCalloc(CEED_FIELD_MAX, &(*qf)->input_fields); CeedChk(ierr);
   ierr = CeedCalloc(CEED_FIELD_MAX, &(*qf)->output_fields); CeedChk(ierr);
@@ -655,18 +669,12 @@ int CeedQFunctionCreateInteriorByName(Ceed ceed,  const char *name,
     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "No suitable gallery QFunction");
   // LCOV_EXCL_STOP
 
-  // Build source path
-  char *gallery_qfunction_source_path;
-
   // Create QFunction
-  ierr = CeedGetJitAbsolutePath(ceed, gallery_qfunctions[match_index].source,
-                                &gallery_qfunction_source_path); CeedChk(ierr);
   ierr = CeedQFunctionCreateInterior(ceed,
                                      gallery_qfunctions[match_index].vec_length,
                                      gallery_qfunctions[match_index].f,
-                                     gallery_qfunction_source_path, qf);
+                                     gallery_qfunctions[match_index].source, qf);
   CeedChk(ierr);
-  ierr = CeedFree(&gallery_qfunction_source_path); CeedChkBackend(ierr);
 
   // QFunction specific setup
   ierr = gallery_qfunctions[match_index].init(ceed, name, *qf); CeedChk(ierr);
diff --git a/interface/ceed.c b/interface/ceed.c
index 16668c882e..7d6e712453 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -808,8 +808,6 @@ int CeedInit(const char *resource, Ceed *ceed) {
   // Setup Ceed
   ierr = CeedCalloc(1, ceed); CeedChk(ierr);
   ierr = CeedCalloc(1, &(*ceed)->jit_source_roots); CeedChk(ierr);
-  (*ceed)->jit_source_roots[0] = CeedJitSourceRootDefault;
-  (*ceed)->num_jit_source_roots = 1;
   const char *ceed_error_handler = getenv("CEED_ERROR_HANDLER");
   if (!ceed_error_handler)
     ceed_error_handler = "abort";
@@ -914,6 +912,11 @@ int CeedInit(const char *resource, Ceed *ceed) {
   ierr = CeedStringAllocCopy(backends[match_index].prefix,
                              (char **)&(*ceed)->resource);
   CeedChk(ierr);
+
+  // Set default JiT source root
+  ierr = CeedAddJitSourceRoot(*ceed, (char *)CeedJitSourceRootDefault);
+  CeedChk(ierr);
+
   return CEED_ERROR_SUCCESS;
 }
 
@@ -999,6 +1002,30 @@ int CeedIsDeterministic(Ceed ceed, bool *is_deterministic) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set additional JiT source root for Ceed
+
+  @param[in] ceed            Ceed
+  @param[in] jit_source_root Absolute path to additional JiT source directory
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root) {
+  int ierr;
+
+  CeedInt index = ceed->num_jit_source_roots;
+  size_t path_length = strlen(jit_source_root);
+  ierr = CeedRealloc(index + 1, &ceed->jit_source_roots); CeedChk(ierr);
+  ierr = CeedCalloc(path_length + 1, &ceed->jit_source_roots[index]);
+  CeedChk(ierr);
+  strncpy(ceed->jit_source_roots[index], jit_source_root, path_length);
+  ceed->num_jit_source_roots++;
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief View a Ceed
 
@@ -1039,7 +1066,7 @@ int CeedDestroy(Ceed *ceed) {
   }
 
   if ((*ceed)->obj_delegate_count > 0) {
-    for (int i=0; i<(*ceed)->obj_delegate_count; i++) {
+    for (int i = 0; i < (*ceed)->obj_delegate_count; i++) {
       ierr = CeedDestroy(&((*ceed)->obj_delegates[i].delegate)); CeedChk(ierr);
       ierr = CeedFree(&(*ceed)->obj_delegates[i].obj_name); CeedChk(ierr);
     }
@@ -1050,7 +1077,7 @@ int CeedDestroy(Ceed *ceed) {
     ierr = (*ceed)->Destroy(*ceed); CeedChk(ierr);
   }
 
-  for (int i=1; i < (*ceed)->num_jit_source_roots; i++) {
+  for (int i = 0; i < (*ceed)->num_jit_source_roots; i++) {
     ierr = CeedFree(&(*ceed)->jit_source_roots[i]); CeedChk(ierr);
   }
   ierr = CeedFree(&(*ceed)->jit_source_roots); CeedChk(ierr);

From 1799df4f378bfad15b9733036c48cf4b42fcd943 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 5 Apr 2022 11:34:26 -0600
Subject: [PATCH 07/59] Makefile: avoid recursive make and DESTDIR misuse

DESTDIR is meant to be a staging place for package managers. Usually the
stuff put into DESTDIR is put into a tarball and unpacked at / for use.
---
 Makefile | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index f0da690d00..90537e10f8 100644
--- a/Makefile
+++ b/Makefile
@@ -625,17 +625,14 @@ $(OBJDIR)/ceed.pc : pkgconfig-prefix = $(prefix)
 	    -e "s:%libs_private%:$(pkgconfig-libs-private):" $< > $@
 
 ifeq ($(filter install,$(MAKECMDGOALS)),install)
-  CPPFLAGS += $(if $(ASAN),$(AFLAGS)) -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath $(DESTDIR)$(includedir))/\""
+  $(OBJDIR)/interface/ceed-jit-source-root-default.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath $(includedir))/\""
 else
-  CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
+  $(OBJDIR)/interface/ceed-jit-source-root-default.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
 endif
 
-$(OBJDIR)/interface/ceed-jit-source-root-default.o: .FORCE
-.FORCE:
+$(OBJDIR)/interface/ceed-jit-source-root-default.o : $(if $(filter install,$(MAKECMDGOALS)),.FORCE)
 
-ifeq ($(filter install,$(MAKECMDGOALS)),install)
-	$(MAKE) $(OBJDIR)/interface/ceed-jit-source-root-default.o
-endif
+.FORCE:
 
 install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL) -d $(addprefix $(if $(DESTDIR),"$(DESTDIR)"),"$(includedir)"\

From 72fa462db3bf4c0b3c87709899d3ead7ed7fec82 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 5 Apr 2022 15:03:13 -0600
Subject: [PATCH 08/59] Revise default path build and install

---
 Makefile                                 | 19 +++++++------------
 interface/ceed-jit-source-root-default.c |  4 ++--
 interface/ceed-jit-source-root-install.c | 12 ++++++++++++
 3 files changed, 21 insertions(+), 14 deletions(-)
 create mode 100644 interface/ceed-jit-source-root-install.c

diff --git a/Makefile b/Makefile
index 90537e10f8..82619b8cae 100644
--- a/Makefile
+++ b/Makefile
@@ -151,7 +151,9 @@ CEED_LDFLAGS += $(if $(ASAN),$(AFLAGS))
 CPPFLAGS += -I./include
 CEED_LDLIBS = -lm
 OBJDIR := build
-LIBDIR := lib
+for_install := $(filter install,$(MAKECMDGOALS))
+LIBDIR := $(if $(for_install),$(OBJDIR),lib)
+
 
 # Installation variables
 prefix ?= /usr/local
@@ -178,9 +180,9 @@ SO_EXT := $(if $(DARWIN),dylib,so)
 ceed.pc := $(LIBDIR)/pkgconfig/ceed.pc
 libceed.so := $(LIBDIR)/libceed.$(SO_EXT)
 libceed.a := $(LIBDIR)/libceed.a
-libceed := $(if $(STATIC),$(libceed.a),$(libceed.so))
+libceed := $(if $(STATIC),$(libceed.a),$(libceed.$(SO_EXT)))
 CEED_LIBS = -lceed
-libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c, $(wildcard interface/ceed*.c backends/*.c gallery/*.c))
+libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c interface/ceed-jit-source-root-$(if $(for_install),default,install).c, $(wildcard interface/ceed*.c backends/*.c gallery/*.c))
 gallery.c := $(wildcard gallery/*/ceed*.c)
 libceed.c += $(gallery.c)
 libceeds = $(libceed)
@@ -624,15 +626,8 @@ $(OBJDIR)/ceed.pc : pkgconfig-prefix = $(prefix)
 	    -e "s:%prefix%:$(pkgconfig-prefix):" \
 	    -e "s:%libs_private%:$(pkgconfig-libs-private):" $< > $@
 
-ifeq ($(filter install,$(MAKECMDGOALS)),install)
-  $(OBJDIR)/interface/ceed-jit-source-root-default.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath $(includedir))/\""
-else
-  $(OBJDIR)/interface/ceed-jit-source-root-default.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
-endif
-
-$(OBJDIR)/interface/ceed-jit-source-root-default.o : $(if $(filter install,$(MAKECMDGOALS)),.FORCE)
-
-.FORCE:
+$(OBJDIR)/interface/ceed-jit-source-root-default.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
+$(OBJDIR)/interface/ceed-jit-source-root-install.o : CPPFLAGS += -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath $(includedir))/\""
 
 install : $(libceed) $(OBJDIR)/ceed.pc
 	$(INSTALL) -d $(addprefix $(if $(DESTDIR),"$(DESTDIR)"),"$(includedir)"\
diff --git a/interface/ceed-jit-source-root-default.c b/interface/ceed-jit-source-root-default.c
index 03edb3afd0..26a3348405 100644
--- a/interface/ceed-jit-source-root-default.c
+++ b/interface/ceed-jit-source-root-default.c
@@ -5,8 +5,8 @@
 //
 // This file is part of CEED:  http://github.com/ceed
 
-#include <ceed.h>
-#include <ceed/backend.h>
 #include <ceed-impl.h>
 
+// This file and definition is used for in-source builds.
+// The definition for installs is in ceed-jit-source-root-install.c.
 const char CeedJitSourceRootDefault[] = CEED_JIT_SOUCE_ROOT_DEFAULT;
diff --git a/interface/ceed-jit-source-root-install.c b/interface/ceed-jit-source-root-install.c
new file mode 100644
index 0000000000..e25679e7e9
--- /dev/null
+++ b/interface/ceed-jit-source-root-install.c
@@ -0,0 +1,12 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed-impl.h>
+
+// This file and definition is used for installs.
+// The definition for in-source is in ceed-jit-source-root-default.c.
+const char CeedJitSourceRootDefault[] = CEED_JIT_SOUCE_ROOT_DEFAULT;

From 78a9fcb6bae7d7246469592ed2b1811dbe8e744f Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 8 Apr 2022 08:39:49 -0600
Subject: [PATCH 09/59] make - minor fix

---
 .github/workflows/c-fortran-test-linux-osx.yml | 1 +
 Makefile                                       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/c-fortran-test-linux-osx.yml b/.github/workflows/c-fortran-test-linux-osx.yml
index 87d62af5c4..d7433397f1 100644
--- a/.github/workflows/c-fortran-test-linux-osx.yml
+++ b/.github/workflows/c-fortran-test-linux-osx.yml
@@ -20,6 +20,7 @@ jobs:
         CC: ${{ matrix.compiler }}
         FC: gfortran-9
       run: |
+        make -v
         make info
         make -j2
         PROVE_OPTS=-v make prove -j2
diff --git a/Makefile b/Makefile
index 82619b8cae..1b57ca0e56 100644
--- a/Makefile
+++ b/Makefile
@@ -180,7 +180,7 @@ SO_EXT := $(if $(DARWIN),dylib,so)
 ceed.pc := $(LIBDIR)/pkgconfig/ceed.pc
 libceed.so := $(LIBDIR)/libceed.$(SO_EXT)
 libceed.a := $(LIBDIR)/libceed.a
-libceed := $(if $(STATIC),$(libceed.a),$(libceed.$(SO_EXT)))
+libceed := $(if $(STATIC),$(libceed.a),$(libceed.so))
 CEED_LIBS = -lceed
 libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c interface/ceed-jit-source-root-$(if $(for_install),default,install).c, $(wildcard interface/ceed*.c backends/*.c gallery/*.c))
 gallery.c := $(wildcard gallery/*/ceed*.c)

From d27ed4f31dc2f2431d97e2d56fc3a37c879fb4b3 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 8 Apr 2022 10:10:51 -0600
Subject: [PATCH 10/59] README - add note about separate build and install
 stages for installation

---
 README.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e479a25b42..fee7ed6c0d 100644
--- a/README.md
+++ b/README.md
@@ -375,7 +375,7 @@ For more details about the benchmarks, see the `benchmarks/README.md` file.
 To install libCEED, run:
 
 ```
-make install prefix=/usr/local
+make install prefix=/path/to/install/dir
 ```
 
 or (e.g., if creating packages):
@@ -384,6 +384,13 @@ or (e.g., if creating packages):
 make install prefix=/usr DESTDIR=/packaging/path
 ```
 
+To build and install in separate steps, run:
+
+```
+make for_install=1 prefix=/path/to/install/dir
+make install prefix=/path/to/install/dir
+```
+
 The usual variables like `CC` and `CFLAGS` are used, and optimization flags
 for all languages can be set using the likes of `OPT='-O3 -march=native'`. Use
 `STATIC=1` to build static libraries (`libceed.a`).

From 6155f12f9604e8e71de88f26602a4cc89885b3cf Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 8 Apr 2022 11:34:08 -0600
Subject: [PATCH 11/59] ceed - add JiT source root to topmost parent

---
 interface/ceed-jit-tools.c |  8 +++++---
 interface/ceed.c           | 15 ++++++++++-----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index 48f9158c95..b082b6e4ce 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -279,6 +279,7 @@ int CeedGetJitRelativePath(const char *absolute_file_path,
 int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
                            char **absolute_file_path) {
   int ierr;
+  Ceed ceed_parent;
 
   // Debug
   CeedDebug256(ceed, 1, "---------- Ceed JiT ----------\n");
@@ -286,15 +287,16 @@ int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path,
   CeedDebug256(ceed, 255, "%s\n", relative_file_path);
 
 
-  for (CeedInt i = 0; i < ceed->num_jit_source_roots; i++) {
+  ierr = CeedGetParent(ceed, &ceed_parent); CeedChk(ierr);
+  for (CeedInt i = 0; i < ceed_parent->num_jit_source_roots; i++) {
     bool is_valid;
 
     // Debug
     CeedDebug256(ceed, 1, "Checking JiT root: ");
-    CeedDebug256(ceed, 255, "%s\n", ceed->jit_source_roots[i]);
+    CeedDebug256(ceed, 255, "%s\n", ceed_parent->jit_source_roots[i]);
 
     // Build  and check absolute path with current root
-    ierr = CeedPathConcatenate(ceed, ceed->jit_source_roots[i],
+    ierr = CeedPathConcatenate(ceed, ceed_parent->jit_source_roots[i],
                                relative_file_path, absolute_file_path);
     CeedChk(ierr);
     ierr = CeedCheckFilePath(ceed, *absolute_file_path, &is_valid); CeedChk(ierr);
diff --git a/interface/ceed.c b/interface/ceed.c
index 7d6e712453..1154b12795 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -914,6 +914,8 @@ int CeedInit(const char *resource, Ceed *ceed) {
   CeedChk(ierr);
 
   // Set default JiT source root
+  // Note: there will always be the default root for every Ceed
+  // but all additional paths are added to the top-most parent
   ierr = CeedAddJitSourceRoot(*ceed, (char *)CeedJitSourceRootDefault);
   CeedChk(ierr);
 
@@ -1014,14 +1016,17 @@ int CeedIsDeterministic(Ceed ceed, bool *is_deterministic) {
 **/
 int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root) {
   int ierr;
+  Ceed ceed_parent;
 
-  CeedInt index = ceed->num_jit_source_roots;
+  ierr = CeedGetParent(ceed, &ceed_parent); CeedChk(ierr);
+
+  CeedInt index = ceed_parent->num_jit_source_roots;
   size_t path_length = strlen(jit_source_root);
-  ierr = CeedRealloc(index + 1, &ceed->jit_source_roots); CeedChk(ierr);
-  ierr = CeedCalloc(path_length + 1, &ceed->jit_source_roots[index]);
+  ierr = CeedRealloc(index + 1, &ceed_parent->jit_source_roots); CeedChk(ierr);
+  ierr = CeedCalloc(path_length + 1, &ceed_parent->jit_source_roots[index]);
   CeedChk(ierr);
-  strncpy(ceed->jit_source_roots[index], jit_source_root, path_length);
-  ceed->num_jit_source_roots++;
+  strncpy(ceed_parent->jit_source_roots[index], jit_source_root, path_length);
+  ceed_parent->num_jit_source_roots++;
 
   return CEED_ERROR_SUCCESS;
 }

From d602d780e9fc9e325096fde88897de7d87589857 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 8 Apr 2022 12:42:08 -0600
Subject: [PATCH 12/59] minor - unify strncpy vs memcpy, silence errors

---
 interface/ceed-jit-tools.c | 28 ++++++++++++++--------------
 interface/ceed-qfunction.c |  4 ++--
 interface/ceed.c           |  4 ++--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c
index b082b6e4ce..e994aee245 100644
--- a/interface/ceed-jit-tools.c
+++ b/interface/ceed-jit-tools.c
@@ -36,7 +36,7 @@ int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) {
 
     ierr = CeedCalloc(source_file_path_length, &source_file_path_only);
     CeedChk(ierr);
-    strncpy(source_file_path_only, source_file_path, source_file_path_length - 1);
+    memcpy(source_file_path_only, source_file_path, source_file_path_length - 1);
   } else {
     source_file_path_only = (char *)source_file_path;
   }
@@ -123,7 +123,7 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
     const char *next_e = strchr(first_hash, 'e');
     char keyword[8] = "";
     if (next_e)
-      strncpy(keyword, &next_e[-6], 7);
+      memcpy(keyword, &next_e[-6], 7);
     bool is_hash_include = !strcmp(keyword, "include");
     // ---- Spaces allowed in '#  include <header.h>'
     if (next_e)
@@ -134,9 +134,9 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
       long current_size = strlen(*buffer);
       long copy_size = first_hash - &temp_buffer[file_offset];
       ierr = CeedRealloc(current_size + copy_size + 2, buffer); CeedChk(ierr);
-      strncpy(&(*buffer)[current_size], "\n", 2);
-      strncpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
-      strncpy(&(*buffer)[current_size + copy_size], "", 1);
+      memcpy(&(*buffer)[current_size], "\n", 2);
+      memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
+      memcpy(&(*buffer)[current_size + copy_size], "", 1);
       // -- Load local "header.h"
       char *next_quote = strchr(first_hash, '"');
       char *next_new_line = strchr(first_hash, '\n');
@@ -149,10 +149,10 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
         long include_file_name_len = strchr(&next_quote[1], '"') - next_quote - 1;
         ierr = CeedCalloc(root_length + include_file_name_len + 2,
                           &include_source_path); CeedChk(ierr);
-        strncpy(include_source_path, source_file_path, root_length + 1);
-        strncpy(&include_source_path[root_length + 1], &next_quote[1],
-                include_file_name_len);
-        strncpy(&include_source_path[root_length + include_file_name_len + 1], "", 1);
+        memcpy(include_source_path, source_file_path, root_length + 1);
+        memcpy(&include_source_path[root_length + 1], &next_quote[1],
+               include_file_name_len);
+        memcpy(&include_source_path[root_length + include_file_name_len + 1], "", 1);
         // ---- Recursive call to load source to buffer
         ierr = CeedLoadSourceToInitalizedBuffer(ceed, include_source_path, buffer);
         CeedDebug256(ceed, 2, "JiT Including: %s\n", include_source_path);
@@ -168,9 +168,9 @@ static inline int CeedLoadSourceToInitalizedBuffer(Ceed ceed,
   long current_size = strlen(*buffer);
   long copy_size = strlen(&temp_buffer[file_offset]);
   ierr = CeedRealloc(current_size + copy_size + 2, buffer); CeedChk(ierr);
-  strncpy(&(*buffer)[current_size], "\n", 2);
-  strncpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
-  strncpy(&(*buffer)[current_size + copy_size + 1], "", 1);
+  memcpy(&(*buffer)[current_size], "\n", 2);
+  memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size);
+  memcpy(&(*buffer)[current_size + copy_size + 1], "", 1);
 
   // Cleanup
   ierr = CeedFree(&temp_buffer); CeedChk(ierr);
@@ -235,8 +235,8 @@ int CeedPathConcatenate(Ceed ceed, const char *base_file_path,
          new_file_path_length = base_length + relative_length + 1;
 
   ierr = CeedCalloc(new_file_path_length, new_file_path); CeedChk(ierr);
-  strncpy(*new_file_path, base_file_path, base_length);
-  strncpy(&((*new_file_path)[base_length]), relative_file_path, relative_length);
+  memcpy(*new_file_path, base_file_path, base_length);
+  memcpy(&((*new_file_path)[base_length]), relative_file_path, relative_length);
 
   return CEED_ERROR_SUCCESS;
 }
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 6aba938198..1039921656 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -616,12 +616,12 @@ int CeedQFunctionCreateInterior(Ceed ceed, CeedInt vec_length,
     const char *kernel_name = strrchr(absolute_path, ':') + 1;
     size_t kernel_name_len = strlen(kernel_name);
     ierr = CeedCalloc(kernel_name_len + 1, &kernel_name_copy); CeedChk(ierr);
-    strncpy(kernel_name_copy, kernel_name, kernel_name_len);
+    memcpy(kernel_name_copy, kernel_name, kernel_name_len);
     (*qf)->kernel_name = kernel_name_copy;
 
     size_t source_len = strlen(absolute_path) - kernel_name_len - 1;
     ierr = CeedCalloc(source_len + 1, &source_copy); CeedChk(ierr);
-    strncpy(source_copy, absolute_path, source_len);
+    memcpy(source_copy, absolute_path, source_len);
     (*qf)->source_path = source_copy;
 
     if (!is_absolute_path) {
diff --git a/interface/ceed.c b/interface/ceed.c
index 1154b12795..08ef354072 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -302,7 +302,7 @@ int CeedStringAllocCopy(const char *source, char **copy) {
   int ierr;
   size_t len = strlen(source);
   ierr = CeedCalloc(len + 1, copy); CeedChk(ierr);
-  memcpy(*copy, source, len + 1);
+  memcpy(*copy, source, len);
   return CEED_ERROR_SUCCESS;
 }
 
@@ -1025,7 +1025,7 @@ int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root) {
   ierr = CeedRealloc(index + 1, &ceed_parent->jit_source_roots); CeedChk(ierr);
   ierr = CeedCalloc(path_length + 1, &ceed_parent->jit_source_roots[index]);
   CeedChk(ierr);
-  strncpy(ceed_parent->jit_source_roots[index], jit_source_root, path_length);
+  memcpy(ceed_parent->jit_source_roots[index], jit_source_root, path_length);
   ceed_parent->num_jit_source_roots++;
 
   return CEED_ERROR_SUCCESS;

From bb61b449606a9c0f246fe4353a57146e68950733 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 8 Apr 2022 13:36:59 -0600
Subject: [PATCH 13/59] tidy - fix flags for tidy

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1b57ca0e56..54c997698c 100644
--- a/Makefile
+++ b/Makefile
@@ -687,7 +687,7 @@ style : style-c style-py
 CLANG_TIDY ?= clang-tidy
 
 %.c.tidy : %.c
-	$(CLANG_TIDY) $(TIDY_OPTS) $^ -- $(CPPFLAGS) --std=c99 -I$(CUDA_DIR)/include -I$(HIP_DIR)/include
+	$(CLANG_TIDY) $(TIDY_OPTS) $^ -- $(CPPFLAGS) --std=c99 -I$(CUDA_DIR)/include -I$(HIP_DIR)/include -DCEED_JIT_SOUCE_ROOT_DEFAULT="\"$(abspath ./include)/\""
 
 %.cpp.tidy : %.cpp
 	$(CLANG_TIDY) $(TIDY_OPTS) $^ -- $(CPPFLAGS) --std=c++11 -I$(CUDA_DIR)/include -I$(OCCA_DIR)/include -I$(HIP_DIR)/include

From 71b2a0f6f4a99141ea55c8b15cf50f079031c343 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Sun, 10 Apr 2022 11:39:21 -0600
Subject: [PATCH 14/59] doc: Fix Operator description

---
 doc/sphinx/source/libCEEDapi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx/source/libCEEDapi.md b/doc/sphinx/source/libCEEDapi.md
index 18699021a7..5e0ba6b2e3 100644
--- a/doc/sphinx/source/libCEEDapi.md
+++ b/doc/sphinx/source/libCEEDapi.md
@@ -470,7 +470,7 @@ be added according to demand.
 There are two common approaches for supporting non-conforming elements: applying the node constraints via $\bm P$ so that the **L-vector** can be processed uniformly and applying the constraints via $\bm{\mathcal{E}}$ so that the **E-vector** is uniform.
 The former can be done with the existing interface while the latter will require a generalization to element restriction that would define field values at constrained nodes as linear combinations of the values at primary nodes.
 
-These operations, $\bm{P}$, $\bm{B}$, and $\bm{D}$,
+These operations, $\bm{\mathcal{E}}$, $\bm{B}$, and $\bm{D}$,
 are combined with a {ref}`CeedOperator`. As with {ref}`CeedQFunction`s, operator fields are added
 separately with a matching field name, basis ($\bm{B}$), element restriction
 ($\bm{\mathcal{E}}$), and **L-vector**. The flag

From f374d6a3d4bb58585c9e7362022a5daaf33a852a Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Tue, 29 Mar 2022 13:37:38 -0600
Subject: [PATCH 15/59] release - prepare for v0.10.1 patch

---
 CITATION.cff                      | 2 +-
 Doxyfile                          | 2 +-
 ceed.pc.template                  | 2 +-
 doc/sphinx/source/releasenotes.md | 9 +++++++--
 include/ceed/ceed.h               | 4 ++--
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 9324c1ead8..bdbf150421 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,6 +1,6 @@
 cff-version: 1.2.0
 title: "libCEED: Efficient Extensible Discretization"
-version: 0.10.0
+version: 0.10.1
 date-released: 2021-07-07
 license:  BSD-2-Clause
 message: "Please cite the following works when using this software."
diff --git a/Doxyfile b/Doxyfile
index 839bde80d3..8f0be2cdea 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = libCEED
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = v0.10.0
+PROJECT_NUMBER         = v0.10.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/ceed.pc.template b/ceed.pc.template
index f884794467..bd8485ed88 100644
--- a/ceed.pc.template
+++ b/ceed.pc.template
@@ -4,7 +4,7 @@ libdir=${prefix}/lib
 
 Name: CEED
 Description: Code for Efficient Extensible Discretization
-Version: 0.10.0
+Version: 0.10.1
 Cflags: -I${includedir}
 Libs: -L${libdir} -lceed
 Libs.private: %libs_private%
diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md
index ec2fa884dc..9d5e6488ac 100644
--- a/doc/sphinx/source/releasenotes.md
+++ b/doc/sphinx/source/releasenotes.md
@@ -1,7 +1,6 @@
 # Changes/Release Notes
 
-On this page we provide a summary of the main API changes, new features and examples
-for each release of libCEED.
+On this page we provide a summary of the main API changes, new features and examples for each release of libCEED.
 
 (main)=
 
@@ -9,6 +8,12 @@ for each release of libCEED.
 
 ### Interface changes
 
+(v0-10-1)=
+
+## v0.10.1 (Apr 11, 2022)
+
+### Interface changes
+
 - Added {c:func}`CeedQFunctionSetUserFlopsEstimate` and {c:func}`CeedOperatorGetFlopsEstimate` to facilitate estimating FLOPs in operator application.
 
 ### Bugfix
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index 5c3c85f06d..7146214124 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -241,8 +241,8 @@ CEED_EXTERN int CeedResetErrorMessage(Ceed, const char **err_msg);
 /// @ingroup Ceed
 #define CEED_VERSION_MAJOR 0
 #define CEED_VERSION_MINOR 10
-#define CEED_VERSION_PATCH 0
-#define CEED_VERSION_RELEASE false
+#define CEED_VERSION_PATCH 1
+#define CEED_VERSION_RELEASE true
 
 /// Compile-time check that the the current library version is at least as
 /// recent as the specified version. This macro is typically used in

From 38faa2d678bf97988f8285cd62d2c5ebde8c0404 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Mon, 11 Apr 2022 17:15:16 -0600
Subject: [PATCH 16/59] minor - toggle release flag

---
 include/ceed/ceed.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index 7146214124..e67df5577e 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -242,7 +242,7 @@ CEED_EXTERN int CeedResetErrorMessage(Ceed, const char **err_msg);
 #define CEED_VERSION_MAJOR 0
 #define CEED_VERSION_MINOR 10
 #define CEED_VERSION_PATCH 1
-#define CEED_VERSION_RELEASE true
+#define CEED_VERSION_RELEASE false
 
 /// Compile-time check that the the current library version is at least as
 /// recent as the specified version. This macro is typically used in

From 67490bc6de48fa3615ca135c74fbc2a7c9584dee Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Tue, 12 Apr 2022 20:35:40 -0600
Subject: [PATCH 17/59] petsc - update for PetscOptionsBegin/End

---
 examples/fluids/problems/advection.c      |  5 ++---
 examples/fluids/problems/advection2d.c    |  5 ++---
 examples/fluids/problems/densitycurrent.c |  5 ++---
 examples/fluids/problems/eulervortex.c    |  5 ++---
 examples/fluids/problems/newtonian.c      |  8 ++++----
 examples/fluids/src/cloptions.c           |  6 +++---
 examples/petsc/area.c                     |  6 ++----
 examples/petsc/bps.c                      |  6 ++----
 examples/petsc/bpsraw.c                   |  4 ++--
 examples/petsc/bpssphere.c                |  4 ++--
 examples/petsc/multigrid.c                |  4 ++--
 examples/solids/problems/mooney-rivlin.c  | 12 +++++-------
 examples/solids/problems/neo-hookean.c    | 12 +++++-------
 examples/solids/src/cl-options.c          |  7 +++----
 14 files changed, 38 insertions(+), 51 deletions(-)

diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index 78e6f4f314..59651967c8 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -75,8 +75,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for ADVECTION problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for ADVECTION problem", NULL);
   // -- Physics
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
@@ -148,7 +147,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
     CHKERRQ(ierr);
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index f8014956c1..6c29be22f2 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -73,8 +73,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for ADVECTION2D problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for ADVECTION2D problem", NULL);
   // -- Physics
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
@@ -130,7 +129,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
     CHKERRQ(ierr);
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 1ad9a3f22b..62dc438429 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -40,8 +40,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for DENSITY_CURRENT problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for DENSITY_CURRENT problem", NULL);
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
   for (int i=0; i<3; i++) center[i] = .5*domain_size[i];
@@ -60,7 +59,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
     }
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   PetscScalar meter = user->units->meter;
   rc = fabs(rc) * meter;
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index 65007d6a2a..a31d9fd2b1 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -73,8 +73,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL, "Options for EULER_VORTEX problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for EULER_VORTEX problem", NULL);
   // -- Physics
   ierr = PetscOptionsScalar("-vortex_strength", "Strength of Vortex",
                             NULL, vortex_strength, &vortex_strength, NULL);
@@ -120,7 +119,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
     CHKERRQ(ierr);
   }
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index 5c5b2880d5..1265c564ef 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -77,9 +77,9 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Options for Newtonian Ideal Gas based problem",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Options for Newtonian Ideal Gas based problem",
+                    NULL);
+
   // -- Physics
   ierr = PetscOptionsScalar("-theta0", "Reference potential temperature",
                             NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
@@ -133,7 +133,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
                        "Warning! Use -stab supg only with -implicit\n");
     CHKERRQ(ierr);
   }
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // ------------------------------------------------------
   //           Set up the PETSc context
diff --git a/examples/fluids/src/cloptions.c b/examples/fluids/src/cloptions.c
index 065c548361..e1af3dbc39 100644
--- a/examples/fluids/src/cloptions.c
+++ b/examples/fluids/src/cloptions.c
@@ -42,8 +42,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL, "Navier-Stokes in PETSc with libCEED",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Navier-Stokes in PETSc with libCEED",
+                    NULL);
 
   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
                             NULL, app_ctx->ceed_resource, app_ctx->ceed_resource,
@@ -148,7 +148,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
                               "Face IDs to apply outflow BC",
                               NULL, bc->outflows, &bc->num_outflow, NULL); CHKERRQ(ierr);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   PetscFunctionReturn(0);
 }
diff --git a/examples/petsc/area.c b/examples/petsc/area.c
index d929b8d1cb..e5291d93a0 100644
--- a/examples/petsc/area.c
+++ b/examples/petsc/area.c
@@ -92,9 +92,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED surface area problem with PETSc",
-                           NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED surface area problem with PETSc", NULL);
   problem_choice = SPHERE;
   ierr = PetscOptionsEnum("-problem",
                           "Problem to solve", NULL,
@@ -116,7 +114,7 @@ int main(int argc, char **argv) {
                           NULL, simplex, &simplex, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis",
                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Setup DM
   if (read_mesh) {
diff --git a/examples/petsc/bps.c b/examples/petsc/bps.c
index ce72b1e144..9f34fb7ccb 100644
--- a/examples/petsc/bps.c
+++ b/examples/petsc/bps.c
@@ -469,8 +469,7 @@ int main(int argc, char **argv) {
   rp->comm = comm;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   {
     PetscBool set;
     ierr = PetscOptionsEnumArray("-problem", "CEED benchmark problem to solve",
@@ -558,8 +557,7 @@ int main(int argc, char **argv) {
     if (flg) ranks_per_node = p;
   }
 
-  ierr = PetscOptionsEnd();
-  CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Register PETSc logging stage
   ierr = PetscLogStageRegister("Solve Stage", &rp->solve_stage);
diff --git a/examples/petsc/bpsraw.c b/examples/petsc/bpsraw.c
index d7e350005d..ba53d07c7e 100644
--- a/examples/petsc/bpsraw.c
+++ b/examples/petsc/bpsraw.c
@@ -439,7 +439,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   bp_choice = CEED_BP1;
   ierr = PetscOptionsEnum("-problem",
                           "CEED benchmark problem to solve", NULL,
@@ -480,7 +480,7 @@ int main(int argc, char **argv) {
                               "Min and max number of iterations to use during benchmarking",
                               NULL, ksp_max_it_clip, &two, NULL);
   CHKERRQ(ierr);
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
   P = degree + 1;
   Q = P + q_extra;
 
diff --git a/examples/petsc/bpssphere.c b/examples/petsc/bpssphere.c
index 6c7576b9c6..9723a69f99 100644
--- a/examples/petsc/bpssphere.c
+++ b/examples/petsc/bpssphere.c
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Read command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   bp_choice = CEED_BP1;
   ierr = PetscOptionsEnum("-problem",
                           "CEED benchmark problem to solve", NULL,
@@ -124,7 +124,7 @@ int main(int argc, char **argv) {
   simplex = PETSC_FALSE;
   ierr = PetscOptionsBool("-simplex", "Use simplices, or tensor product cells",
                           NULL, simplex, &simplex, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Setup DM
   if (read_mesh) {
diff --git a/examples/petsc/multigrid.c b/examples/petsc/multigrid.c
index 41b9fe26c7..bcb519d75a 100644
--- a/examples/petsc/multigrid.c
+++ b/examples/petsc/multigrid.c
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
   comm = PETSC_COMM_WORLD;
 
   // Parse command line options
-  ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL);
   bp_choice = CEED_BP3;
   ierr = PetscOptionsEnum("-problem",
                           "CEED benchmark problem to solve", NULL,
@@ -138,7 +138,7 @@ int main(int argc, char **argv) {
     ierr = PetscOptionsIntArray("-cells","Number of cells per dimension", NULL,
                                 mesh_elem, &tmp, NULL); CHKERRQ(ierr);
   }
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr);
+  PetscOptionsEnd();
 
   // Set up libCEED
   CeedInit(ceed_resource, &ceed);
diff --git a/examples/solids/problems/mooney-rivlin.c b/examples/solids/problems/mooney-rivlin.c
index 58a4eb33d3..712a6b707f 100644
--- a/examples/solids/problems/mooney-rivlin.c
+++ b/examples/solids/problems/mooney-rivlin.c
@@ -38,9 +38,8 @@ PetscErrorCode PhysicsSmootherContext_MR(MPI_Comm comm, Ceed ceed,
 
   PetscFunctionBegin;
 
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Mooney Rivlin physical parameters for smoother", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Mooney Rivlin physical parameters for smoother",
+                    NULL);
 
   ierr = PetscOptionsScalar("-nu_smoother", "Poisson's ratio for smoother",
                             NULL, nu_smoother, &nu_smoother, &nu_flag);
@@ -49,7 +48,7 @@ PetscErrorCode PhysicsSmootherContext_MR(MPI_Comm comm, Ceed ceed,
       nu_smoother >= 0.5) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP,
                                     "Mooney-Rivlin model requires Poisson ratio -nu option in [0, .5)");
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   if (nu_flag) {
     // Copy context
@@ -84,8 +83,7 @@ PetscErrorCode ProcessPhysics_MR(MPI_Comm comm, Physics_MR phys, Units units) {
 
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL, "Mooney Rivlin physical parameters", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Mooney Rivlin physical parameters", NULL);
 
   ierr = PetscOptionsScalar("-mu_1", "Material Property mu_1", NULL,
                             phys->mu_1, &phys->mu_1, NULL); CHKERRQ(ierr);
@@ -118,7 +116,7 @@ PetscErrorCode ProcessPhysics_MR(MPI_Comm comm, Physics_MR phys, Units units) {
   CHKERRQ(ierr);
   units->kilogram = fabs(units->kilogram);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   // Define derived units
   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
diff --git a/examples/solids/problems/neo-hookean.c b/examples/solids/problems/neo-hookean.c
index 5d4fa2ba67..5fd17a8c52 100644
--- a/examples/solids/problems/neo-hookean.c
+++ b/examples/solids/problems/neo-hookean.c
@@ -38,15 +38,14 @@ PetscErrorCode PhysicsSmootherContext_NH(MPI_Comm comm, Ceed ceed,
 
   PetscFunctionBegin;
 
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Neo-Hookean physical parameters for smoother", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Neo-Hookean physical parameters for smoother",
+                    NULL);
 
   ierr = PetscOptionsScalar("-nu_smoother", "Poisson's ratio for smoother",
                             NULL, nu_smoother, &nu_smoother, &nu_flag);
   CHKERRQ(ierr);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   if (nu_flag) {
     // Copy context
@@ -80,8 +79,7 @@ PetscErrorCode ProcessPhysics_NH(MPI_Comm comm, Physics_NH phys, Units units) {
 
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL, "Neo-Hookean physical parameters", NULL);
-  CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL, "Neo-Hookean physical parameters", NULL);
 
   ierr = PetscOptionsScalar("-nu", "Poisson's ratio", NULL, phys->nu, &phys->nu,
                             &nu_flag); CHKERRQ(ierr);
@@ -104,7 +102,7 @@ PetscErrorCode ProcessPhysics_NH(MPI_Comm comm, Physics_NH phys, Units units) {
   CHKERRQ(ierr);
   units->kilogram = fabs(units->kilogram);
 
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting Physics
+  PetscOptionsEnd(); // End of setting Physics
 
   // Check for all required options to be set
   if (!nu_flag) {
diff --git a/examples/solids/src/cl-options.c b/examples/solids/src/cl-options.c
index d4510f8a3b..1849fae7b5 100644
--- a/examples/solids/src/cl-options.c
+++ b/examples/solids/src/cl-options.c
@@ -20,9 +20,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) {
 
   PetscFunctionBeginUser;
 
-  ierr = PetscOptionsBegin(comm, NULL,
-                           "Elasticity / Hyperelasticity in PETSc with libCEED",
-                           NULL); CHKERRQ(ierr);
+  PetscOptionsBegin(comm, NULL,
+                    "Elasticity / Hyperelasticity in PETSc with libCEED", NULL);
 
   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
                             NULL, app_ctx->ceed_resource, app_ctx->ceed_resource,
@@ -210,7 +209,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) {
     ierr = PetscViewerASCIIPrintf(app_ctx->energy_viewer, "%f,%e\n", 0., 0.);
     CHKERRQ(ierr);
   }
-  ierr = PetscOptionsEnd(); CHKERRQ(ierr); // End of setting AppCtx
+  PetscOptionsEnd(); // End of setting AppCtx
 
   // Check for all required values set
   if (app_ctx->test_mode) {

From 979e564e8d444b7d9c22148107323e4e570b83c8 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Tue, 12 Apr 2022 14:29:08 -0600
Subject: [PATCH 18/59] docs: Improve wording in CeedOperatorSetField
 description

---
 interface/ceed-operator.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index d400677d7d..37aade4069 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -653,7 +653,8 @@ int CeedOperatorReferenceCopy(CeedOperator op, CeedOperator *op_copy) {
 
   Active fields must be specified using this function, but their data (in a
   CeedVector) is passed in CeedOperatorApply().  There can be at most one active
-  input and at most one active output.
+  input CeedVector and at most one active output CeedVector passed to
+  CeedOperatorApply().
 
   @param op          CeedOperator on which to provide the field
   @param field_name  Name of the field (to be matched with the name used by

From 0814089585cdf806de225f3491b42521824423b0 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Sun, 17 Apr 2022 14:41:29 -0600
Subject: [PATCH 19/59] examples: use PetscInt_FMT instead of %D

This fixes many warnings now that PETSc uses printf format string
attribute checkers.
---
 examples/fluids/navierstokes.c  | 10 +++++-----
 examples/fluids/src/cloptions.c |  2 +-
 examples/fluids/src/misc.c      |  2 +-
 examples/fluids/src/setupts.c   |  5 +++--
 examples/petsc/area.c           |  6 +++---
 examples/petsc/bps.c            | 10 +++++-----
 examples/petsc/bpsraw.c         | 15 +++++++++------
 examples/petsc/bpssphere.c      |  4 ++--
 examples/petsc/multigrid.c      | 18 +++++++++---------
 examples/solids/elasticity.c    | 16 ++++++++--------
 examples/solids/src/misc.c      |  2 +-
 11 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index c3f638c5b7..29f321d19d 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -259,11 +259,11 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (P)       : %d\n"
                        "    Number of 1D Quadrature Points (Q) : %d\n"
-                       "    Global DoFs                        : %D\n"
-                       "    Owned DoFs                         : %D\n"
-                       "    DoFs per node                      : %D\n"
-                       "    Global nodes                       : %D\n"
-                       "    Owned nodes                        : %D\n",
+                       "    Global DoFs                        : %" PetscInt_FMT "\n"
+                       "    Owned DoFs                         : %" PetscInt_FMT "\n"
+                       "    DoFs per node                      : %" PetscInt_FMT "\n"
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT "\n",
                        num_P, num_Q, glob_dofs, owned_dofs, num_comp_q,
                        glob_nodes, owned_nodes); CHKERRQ(ierr);
   }
diff --git a/examples/fluids/src/cloptions.c b/examples/fluids/src/cloptions.c
index e1af3dbc39..fcb6b3560c 100644
--- a/examples/fluids/src/cloptions.c
+++ b/examples/fluids/src/cloptions.c
@@ -134,7 +134,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
         for (PetscInt w = 0; w < bc->num_wall; w++)
           if (bc->slips[c][s] == bc->walls[w])
             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,
-                    "Boundary condition already set on face %D!\n",
+                    "Boundary condition already set on face %" PetscInt_FMT "!\n",
                     bc->walls[w]);
 
   // Inflow BCs
diff --git a/examples/fluids/src/misc.c b/examples/fluids/src/misc.c
index 94c189c89c..b7b83748e4 100644
--- a/examples/fluids/src/misc.c
+++ b/examples/fluids/src/misc.c
@@ -202,7 +202,7 @@ PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
   ierr = TSGetStepNumber(ts, &steps); CHKERRQ(ierr);
   if (!app_ctx->test_mode) {
     ierr = PetscPrintf(PETSC_COMM_WORLD,
-                       "Time integrator took %D time steps to reach final time %g\n",
+                       "Time integrator took %" PetscInt_FMT " time steps to reach final time %g\n",
                        steps, (double)final_time); CHKERRQ(ierr);
   }
 
diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c
index 49374a1524..75713c32b0 100644
--- a/examples/fluids/src/setupts.c
+++ b/examples/fluids/src/setupts.c
@@ -220,7 +220,8 @@ PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time,
   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Q_loc); CHKERRQ(ierr);
 
   // Output
-  ierr = PetscSNPrintf(file_path, sizeof file_path, "%s/ns-%03D.vtu",
+  ierr = PetscSNPrintf(file_path, sizeof file_path,
+                       "%s/ns-%03" PetscInt_FMT ".vtu",
                        user->app_ctx->output_dir, step_no + user->app_ctx->cont_steps);
   CHKERRQ(ierr);
   ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q), file_path,
@@ -241,7 +242,7 @@ PetscErrorCode TSMonitor_NS(TS ts, PetscInt step_no, PetscReal time,
     ierr = DMGlobalToLocal(user->dm_viz, Q_refined, INSERT_VALUES, Q_refined_loc);
     CHKERRQ(ierr);
     ierr = PetscSNPrintf(file_path_refined, sizeof file_path_refined,
-                         "%s/nsrefined-%03D.vtu", user->app_ctx->output_dir,
+                         "%s/nsrefined-%03" PetscInt_FMT ".vtu", user->app_ctx->output_dir,
                          step_no + user->app_ctx->cont_steps);
     CHKERRQ(ierr);
     ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q_refined),
diff --git a/examples/petsc/area.c b/examples/petsc/area.c
index e5291d93a0..35828cbeb4 100644
--- a/examples/petsc/area.c
+++ b/examples/petsc/area.c
@@ -190,9 +190,9 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    DoF per node                       : %D\n"
-                       "    Global DoFs                        : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n"
+                       "    Global DoFs                        : %" PetscInt_FMT "\n",
                        used_resource, CeedMemTypes[mem_type_backend], P, Q,
                        g_size/num_comp_u, num_comp_u, g_size); CHKERRQ(ierr);
   }
diff --git a/examples/petsc/bps.c b/examples/petsc/bps.c
index 9f34fb7ccb..3d3c66da91 100644
--- a/examples/petsc/bps.c
+++ b/examples/petsc/bps.c
@@ -183,10 +183,10 @@ static PetscErrorCode RunWithDM(RunParams rp, DM dm,
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (P)       : %d\n"
                        "    Number of 1D Quadrature Points (Q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    Local Elements                     : %D\n"
-                       "    Owned nodes                        : %D\n"
-                       "    DoF per node                       : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Local Elements                     : %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n",
                        rp->bp_choice+1, rp->hostname, comm_size,
                        rp->ranks_per_node, vec_type, used_resource,
                        CeedMemTypes[mem_type_backend],
@@ -308,7 +308,7 @@ static PetscErrorCode RunWithDM(RunParams rp, DM dm,
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/petsc/bpsraw.c b/examples/petsc/bpsraw.c
index ba53d07c7e..166516f8ba 100644
--- a/examples/petsc/bpsraw.c
+++ b/examples/petsc/bpsraw.c
@@ -555,11 +555,14 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (P)       : %d\n"
                        "    Number of 1D Quadrature Points (Q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    Process Decomposition              : %D %D %D\n"
-                       "    Local Elements                     : %D = %D %D %D\n"
-                       "    Owned nodes                        : %D = %D %D %D\n"
-                       "    DoF per node                       : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Process Decomposition              : %" PetscInt_FMT
+                       " %" PetscInt_FMT " %" PetscInt_FMT "\n"
+                       "    Local Elements                     : %" PetscInt_FMT
+                       " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT
+                       " = %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n",
                        bp_choice+1, vec_type, used_resource,
                        CeedMemTypes[mem_type_backend],
                        P, Q,  gsize/num_comp_u, p[0], p[1], p[2], local_elem,
@@ -903,7 +906,7 @@ int main(int argc, char **argv) {
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/petsc/bpssphere.c b/examples/petsc/bpssphere.c
index 9723a69f99..79a4019261 100644
--- a/examples/petsc/bpssphere.c
+++ b/examples/petsc/bpssphere.c
@@ -200,7 +200,7 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global nodes                       : %D\n",
+                       "    Global nodes                       : %" PetscInt_FMT "\n",
                        bp_choice+1, ceed_resource, CeedMemTypes[mem_type_backend], P, Q,
                        g_size/num_comp_u); CHKERRQ(ierr);
   }
@@ -326,7 +326,7 @@ int main(int argc, char **argv) {
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/petsc/multigrid.c b/examples/petsc/multigrid.c
index bcb519d75a..c32fd93524 100644
--- a/examples/petsc/multigrid.c
+++ b/examples/petsc/multigrid.c
@@ -112,12 +112,12 @@ int main(int argc, char **argv) {
                             "Epsilon parameter for Kershaw mesh transformation",
                             NULL, eps, &eps, NULL);
   if (eps > 1 || eps <= 0) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE,
-                                     "-eps %D must be (0,1]", eps);
+                                     "-eps %g must be (0,1]", (double)PetscRealPart(eps));
   degree = test_mode ? 3 : 2;
   ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis",
                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
   if (degree < 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE,
-                            "-degree %D must be at least 1", degree);
+                            "-degree %" PetscInt_FMT " must be at least 1", degree);
   q_extra = bp_options[bp_choice].q_extra;
   ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points",
                          NULL, q_extra, &q_extra, NULL); CHKERRQ(ierr);
@@ -273,9 +273,9 @@ int main(int argc, char **argv) {
                        "  Mesh:\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global Nodes                       : %D\n"
-                       "    Owned Nodes                        : %D\n"
-                       "    DoF per node                       : %D\n"
+                       "    Global Nodes                       : %" PetscInt_FMT "\n"
+                       "    Owned Nodes                        : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n"
                        "  Multigrid:\n"
                        "    Number of Levels                   : %d\n",
                        bp_choice+1, vec_type, used_resource,
@@ -296,10 +296,10 @@ int main(int argc, char **argv) {
   for (int i=0; i<num_levels; i++) {
     // Print level information
     if (!test_mode && (i == 0 || i == fine_level)) {
-      ierr = PetscPrintf(comm,"    Level %D (%s):\n"
+      ierr = PetscPrintf(comm,"    Level %" PetscInt_FMT " (%s):\n"
                          "      Number of 1D Basis Nodes (p)     : %d\n"
-                         "      Global Nodes                     : %D\n"
-                         "      Owned Nodes                      : %D\n",
+                         "      Global Nodes                     : %" PetscInt_FMT "\n"
+                         "      Owned Nodes                      : %" PetscInt_FMT "\n",
                          i, (i? "fine" : "coarse"), level_degrees[i] + 1,
                          g_size[i]/num_comp_u, l_size[i]/num_comp_u); CHKERRQ(ierr);
     }
@@ -562,7 +562,7 @@ int main(int argc, char **argv) {
                          "  KSP:\n"
                          "    KSP Type                           : %s\n"
                          "    KSP Convergence                    : %s\n"
-                         "    Total KSP Iterations               : %D\n"
+                         "    Total KSP Iterations               : %" PetscInt_FMT "\n"
                          "    Final rnorm                        : %e\n",
                          ksp_type, KSPConvergedReasons[reason], its,
                          (double)rnorm); CHKERRQ(ierr);
diff --git a/examples/solids/elasticity.c b/examples/solids/elasticity.c
index d5c763357a..71a6cb0313 100644
--- a/examples/solids/elasticity.c
+++ b/examples/solids/elasticity.c
@@ -388,9 +388,9 @@ int main(int argc, char **argv) {
                        "    File                               : %s\n"
                        "    Number of 1D Basis Nodes (p)       : %d\n"
                        "    Number of 1D Quadrature Points (q) : %d\n"
-                       "    Global nodes                       : %D\n"
-                       "    Owned nodes                        : %D\n"
-                       "    DoF per node                       : %D\n"
+                       "    Global nodes                       : %" PetscInt_FMT "\n"
+                       "    Owned nodes                        : %" PetscInt_FMT "\n"
+                       "    DoF per node                       : %" PetscInt_FMT "\n"
                        "  Multigrid:\n"
                        "    Type                               : %s\n"
                        "    Number of Levels                   : %d\n",
@@ -412,10 +412,10 @@ int main(int argc, char **argv) {
       for (PetscInt i = 0; i < 2; i++) {
         CeedInt level = i ? fine_level : 0;
         ierr = PetscPrintf(comm,
-                           "    Level %D (%s):\n"
+                           "    Level %" PetscInt_FMT " (%s):\n"
                            "      Number of 1D Basis Nodes (p)     : %d\n"
-                           "      Global Nodes                     : %D\n"
-                           "      Owned Nodes                      : %D\n",
+                           "      Global Nodes                     : %" PetscInt_FMT "\n"
+                           "      Owned Nodes                      : %" PetscInt_FMT "\n",
                            level, i ? "fine" : "coarse",
                            app_ctx->level_degrees[level] + 1,
                            U_g_size[level]/num_comp_u, U_l_size[level]/num_comp_u);
@@ -763,7 +763,7 @@ int main(int argc, char **argv) {
                        "    SNES Convergence                   : %s\n"
                        "    Number of Load Increments          : %d\n"
                        "    Completed Load Increments          : %d\n"
-                       "    Total SNES Iterations              : %D\n"
+                       "    Total SNES Iterations              : %" PetscInt_FMT "\n"
                        "    Final rnorm                        : %e\n",
                        snes_type, SNESConvergedReasons[reason],
                        app_ctx->num_increments, increment - 1,
@@ -777,7 +777,7 @@ int main(int argc, char **argv) {
     ierr = PetscPrintf(comm,
                        "  Linear Solver:\n"
                        "    KSP Type                           : %s\n"
-                       "    Total KSP Iterations               : %D\n",
+                       "    Total KSP Iterations               : %" PetscInt_FMT "\n",
                        ksp_type, ksp_its); CHKERRQ(ierr);
 
     // -- PC
diff --git a/examples/solids/src/misc.c b/examples/solids/src/misc.c
index 3bb593316d..8658d4c17e 100644
--- a/examples/solids/src/misc.c
+++ b/examples/solids/src/misc.c
@@ -131,7 +131,7 @@ PetscErrorCode ViewSolution(MPI_Comm comm, AppCtx app_ctx, Vec U,
 
   // Build file name
   ierr = PetscSNPrintf(output_filename, sizeof output_filename,
-                       "%s/solution-%03D.vtu", app_ctx->output_dir,
+                       "%s/solution-%03" PetscInt_FMT ".vtu", app_ctx->output_dir,
                        increment); CHKERRQ(ierr);
 
   // Increment sequence

From 07b31e0ef78c1ae2337ae2e7912515a2998488a7 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Wed, 20 Apr 2022 17:20:18 -0600
Subject: [PATCH 20/59] gpu - remove 'quoted' operator assembly kernels

---
 backends/cuda-ref/ceed-cuda-ref-operator.c    | 315 +++--------------
 backends/hip-ref/ceed-hip-ref-operator.c      | 316 +++---------------
 .../cuda-ref-operator-assemble-diagonal.h     | 148 ++++++++
 .../cuda/cuda-ref-operator-assemble.h         | 117 +++++++
 .../hip/hip-ref-operator-assemble-diagonal.h  | 147 ++++++++
 .../hip/hip-ref-operator-assemble.h           | 117 +++++++
 6 files changed, 606 insertions(+), 554 deletions(-)
 create mode 100644 include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h
 create mode 100644 include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h
 create mode 100644 include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h
 create mode 100644 include/ceed/jit-source/hip/hip-ref-operator-assemble.h

diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c
index cbe2f812d8..b0691a3662 100644
--- a/backends/cuda-ref/ceed-cuda-ref-operator.c
+++ b/backends/cuda-ref/ceed-cuda-ref-operator.c
@@ -7,6 +7,7 @@
 
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
+#include <ceed/jit-tools.h>
 #include <assert.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -719,149 +720,6 @@ static int CeedOperatorLinearAssembleQFunctionUpdate_Cuda(CeedOperator op,
          &rstr, request);
 }
 
-//------------------------------------------------------------------------------
-// Diagonal assembly kernels
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *diagonalkernels = QUOTE(
-
-typedef enum {
-  /// Perform no evaluation (either because there is no data or it is already at
-  /// quadrature points)
-  CEED_EVAL_NONE   = 0,
-  /// Interpolate from nodes to quadrature points
-  CEED_EVAL_INTERP = 1,
-  /// Evaluate gradients at quadrature points from input in a nodal basis
-  CEED_EVAL_GRAD   = 2,
-  /// Evaluate divergence at quadrature points from input in a nodal basis
-  CEED_EVAL_DIV    = 4,
-  /// Evaluate curl at quadrature points from input in a nodal basis
-  CEED_EVAL_CURL   = 8,
-  /// Using no input, evaluate quadrature weights on the reference element
-  CEED_EVAL_WEIGHT = 16,
-} CeedEvalMode;
-
-//------------------------------------------------------------------------------
-// Get Basis Emode Pointer
-//------------------------------------------------------------------------------
-extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr,
-    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
-    const CeedScalar *grad) {
-  switch (emode) {
-  case CEED_EVAL_NONE:
-    *basisptr = identity;
-    break;
-  case CEED_EVAL_INTERP:
-    *basisptr = interp;
-    break;
-  case CEED_EVAL_GRAD:
-    *basisptr = grad;
-    break;
-  case CEED_EVAL_WEIGHT:
-  case CEED_EVAL_DIV:
-  case CEED_EVAL_CURL:
-    break; // Caught by QF Assembly
-  }
-}
-
-//------------------------------------------------------------------------------
-// Core code for diagonal assembly
-//------------------------------------------------------------------------------
-__device__ void diagonalCore(const CeedInt nelem,
-    const CeedScalar maxnorm, const bool pointBlock,
-    const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  const int tid = threadIdx.x; // running with P threads, tid is evec node
-  const CeedScalar qfvaluebound = maxnorm*1e-12;
-
-  // Compute the diagonal of B^T D B
-  // Each element
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
-       e += gridDim.x*blockDim.z) {
-    CeedInt dout = -1;
-    // Each basis eval mode pair
-    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
-      const CeedScalar *bt = NULL;
-      if (emodeout[eout] == CEED_EVAL_GRAD)
-        dout += 1;
-      CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout,
-                                      &gradout[dout*NQPTS*NNODES]);
-      CeedInt din = -1;
-      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
-        const CeedScalar *b = NULL;
-        if (emodein[ein] == CEED_EVAL_GRAD)
-          din += 1;
-        CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin,
-                                        &gradin[din*NQPTS*NNODES]);
-        // Each component
-        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
-          // Each qpoint/node pair
-          if (pointBlock) {
-            // Point Block Diagonal
-            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
-              CeedScalar evalue = 0.;
-              for (CeedInt q = 0; q < NQPTS; q++) {
-                const CeedScalar qfvalue =
-                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
-                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
-                if (abs(qfvalue) > qfvaluebound)
-                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-              }
-              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
-            }
-          } else {
-            // Diagonal Only
-            CeedScalar evalue = 0.;
-            for (CeedInt q = 0; q < NQPTS; q++) {
-              const CeedScalar qfvalue =
-                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
-                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
-              if (abs(qfvalue) > qfvaluebound)
-                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-            }
-            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
-          }
-        }
-      }
-    }
-  }
-}
-
-//------------------------------------------------------------------------------
-// Linear diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-//------------------------------------------------------------------------------
-// Linear point block diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Create point block restriction
 //------------------------------------------------------------------------------
@@ -1027,11 +885,21 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op,
   diag->numemodeout = numemodeout;
 
   // Assemble kernel
+  char *diagonal_kernel_path, *diagonal_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h",
+                                &diagonal_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Diagonal Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, diagonal_kernel_path,
+                                &diagonal_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2,
+               "----- Loading Diagonal Assembly Source Complete! -----\n");
   CeedInt nnodes, nqpts;
   ierr = CeedBasisGetNumNodes(basisin, &nnodes); CeedChkBackend(ierr);
   ierr = CeedBasisGetNumQuadraturePoints(basisin, &nqpts); CeedChkBackend(ierr);
   diag->nnodes = nnodes;
-  ierr = CeedCompileCuda(ceed, diagonalkernels, &diag->module, 5,
+  ierr = CeedCompileCuda(ceed, diagonal_kernel_source, &diag->module, 5,
                          "NUMEMODEIN", numemodein,
                          "NUMEMODEOUT", numemodeout,
                          "NNODES", nnodes,
@@ -1043,6 +911,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op,
   ierr = CeedGetKernelCuda(ceed, diag->module, "linearPointBlockDiagonal",
                            &diag->linearPointBlock);
   CeedChk_Cu(ceed, ierr);
+  ierr = CeedFree(&diagonal_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&diagonal_kernel_source); CeedChkBackend(ierr);
 
   // Basis matrices
   const CeedInt qBytes = nqpts * sizeof(CeedScalar);
@@ -1246,119 +1116,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda(CeedOperator op,
   }
 }
 
-//------------------------------------------------------------------------------
-// Matrix assembly kernel for low-order elements (2D thread block)
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *assemblykernel = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int i = threadIdx.x; // The output row index of each B^TDB operation 
-  const int l = threadIdx.y; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-  // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        CeedScalar result = 0.0;
-        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-          CeedInt b_in_index = emode_in * NQPTS * NNODES;
-      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-             CeedInt b_out_index = emode_out * NQPTS * NNODES;
-             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
- 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-            for (CeedInt j = 0; j < NQPTS; j++) {
-     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-	    }
-
-          }// end of emode_out 
-        } // end of emode_in
-        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-   	values_array[val_index] = result;
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-
-//------------------------------------------------------------------------------
-// Fallback kernel for larger orders (1D thread block)
-//------------------------------------------------------------------------------
-static const char *assemblykernelbigelem = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int l = threadIdx.x; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-    // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        for (CeedInt i = 0; i < NNODES; i++) {
-          CeedScalar result = 0.0;
-          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-            CeedInt b_in_index = emode_in * NQPTS * NNODES;
-        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-               CeedInt b_out_index = emode_out * NQPTS * NNODES;
-               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
-   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-              for (CeedInt j = 0; j < NQPTS; j++) {
-       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-  	    }
-
-            }// end of emode_out 
-          } // end of emode_in
-          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-     	  values_array[val_index] = result;
-        } // end of loop over element node index, i
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Single operator assembly setup
 //------------------------------------------------------------------------------
@@ -1482,35 +1239,39 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op) {
   CeedInt block_size = esize * esize * elemsPerBlock;
   Ceed_Cuda *cuda_data;
   ierr = CeedGetData(ceed, &cuda_data); CeedChkBackend(ierr);
-  if (block_size > cuda_data->device_prop.maxThreadsPerBlock) {
+  char *assembly_kernel_path, *assembly_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/cuda/cuda-ref-operator-assemble.h",
+                                &assembly_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, assembly_kernel_path,
+                                &assembly_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Source Complete! -----\n");
+  bool fallback = block_size > cuda_data->device_prop.maxThreadsPerBlock;
+  if (fallback) {
     // Use fallback kernel with 1D threadblock
     block_size = esize * elemsPerBlock;
     asmb->block_size_x = esize;
     asmb->block_size_y = 1;
-    ierr = CeedCompileCuda(ceed, assemblykernelbigelem, &asmb->module, 7,
-                           "NELEM", nelem,
-                           "NUMEMODEIN", num_emode_in,
-                           "NUMEMODEOUT", num_emode_out,
-                           "NQPTS", nqpts,
-                           "NNODES", esize,
-                           "BLOCK_SIZE", block_size,
-                           "NCOMP", ncomp
-                          ); CeedChk_Cu(ceed, ierr);
   } else {  // Use kernel with 2D threadblock
     asmb->block_size_x = esize;
     asmb->block_size_y = esize;
-    ierr = CeedCompileCuda(ceed, assemblykernel, &asmb->module, 7,
-                           "NELEM", nelem,
-                           "NUMEMODEIN", num_emode_in,
-                           "NUMEMODEOUT", num_emode_out,
-                           "NQPTS", nqpts,
-                           "NNODES", esize,
-                           "BLOCK_SIZE", block_size,
-                           "NCOMP", ncomp
-                          ); CeedChk_Cu(ceed, ierr);
   }
-  ierr = CeedGetKernelCuda(ceed, asmb->module, "linearAssemble",
+  ierr = CeedCompileCuda(ceed, assembly_kernel_source, &asmb->module, 7,
+                         "NELEM", nelem,
+                         "NUMEMODEIN", num_emode_in,
+                         "NUMEMODEOUT", num_emode_out,
+                         "NQPTS", nqpts,
+                         "NNODES", esize,
+                         "BLOCK_SIZE", block_size,
+                         "NCOMP", ncomp
+                        ); CeedChk_Cu(ceed, ierr);
+  ierr = CeedGetKernelCuda(ceed, asmb->module,
+                           fallback ? "linearAssembleFallback" : "linearAssemble",
                            &asmb->linearAssemble); CeedChk_Cu(ceed, ierr);
+  ierr = CeedFree(&assembly_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&assembly_kernel_source); CeedChkBackend(ierr);
 
   // Build 'full' B matrices (not 1D arrays used for tensor-product matrices)
   const CeedScalar *interp_in, *grad_in;
diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c
index c3d1a4f324..9cecbec4f4 100644
--- a/backends/hip-ref/ceed-hip-ref-operator.c
+++ b/backends/hip-ref/ceed-hip-ref-operator.c
@@ -7,6 +7,7 @@
 
 #include <ceed/ceed.h>
 #include <ceed/backend.h>
+#include <ceed/jit-tools.h>
 #include <hip/hip_runtime.h>
 #include <assert.h>
 #include <stdbool.h>
@@ -715,149 +716,6 @@ static int CeedOperatorLinearAssembleQFunctionUpdate_Hip(CeedOperator op,
          request);
 }
 
-//------------------------------------------------------------------------------
-// Diagonal assembly kernels
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *diagonalkernels = QUOTE(
-
-typedef enum {
-  /// Perform no evaluation (either because there is no data or it is already at
-  /// quadrature points)
-  CEED_EVAL_NONE   = 0,
-  /// Interpolate from nodes to quadrature points
-  CEED_EVAL_INTERP = 1,
-  /// Evaluate gradients at quadrature points from input in a nodal basis
-  CEED_EVAL_GRAD   = 2,
-  /// Evaluate divergence at quadrature points from input in a nodal basis
-  CEED_EVAL_DIV    = 4,
-  /// Evaluate curl at quadrature points from input in a nodal basis
-  CEED_EVAL_CURL   = 8,
-  /// Using no input, evaluate quadrature weights on the reference element
-  CEED_EVAL_WEIGHT = 16,
-} CeedEvalMode;
-
-//------------------------------------------------------------------------------
-// Get Basis Emode Pointer
-//------------------------------------------------------------------------------
-extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr,
-    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
-    const CeedScalar *grad) {
-  switch (emode) {
-  case CEED_EVAL_NONE:
-    *basisptr = identity;
-    break;
-  case CEED_EVAL_INTERP:
-    *basisptr = interp;
-    break;
-  case CEED_EVAL_GRAD:
-    *basisptr = grad;
-    break;
-  case CEED_EVAL_WEIGHT:
-  case CEED_EVAL_DIV:
-  case CEED_EVAL_CURL:
-    break; // Caught by QF Assembly
-  }
-}
-
-//------------------------------------------------------------------------------
-// Core code for diagonal assembly
-//------------------------------------------------------------------------------
-__device__ void diagonalCore(const CeedInt nelem,
-    const CeedScalar maxnorm, const bool pointBlock,
-    const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  const int tid = threadIdx.x; // running with P threads, tid is evec node
-  const CeedScalar qfvaluebound = maxnorm*1e-12;
-
-  // Compute the diagonal of B^T D B
-  // Each element
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
-       e += gridDim.x*blockDim.z) {
-    CeedInt dout = -1;
-    // Each basis eval mode pair
-    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
-      const CeedScalar *bt = NULL;
-      if (emodeout[eout] == CEED_EVAL_GRAD)
-        dout += 1;
-      CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout,
-                                      &gradout[dout*NQPTS*NNODES]);
-      CeedInt din = -1;
-      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
-        const CeedScalar *b = NULL;
-        if (emodein[ein] == CEED_EVAL_GRAD)
-          din += 1;
-        CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin,
-                                        &gradin[din*NQPTS*NNODES]);
-        // Each component
-        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
-          // Each qpoint/node pair
-          if (pointBlock) {
-            // Point Block Diagonal
-            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
-              CeedScalar evalue = 0.;
-              for (CeedInt q = 0; q < NQPTS; q++) {
-                const CeedScalar qfvalue =
-                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
-                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
-                if (abs(qfvalue) > qfvaluebound)
-                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-              }
-              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
-            }
-          } else {
-            // Diagonal Only
-            CeedScalar evalue = 0.;
-            for (CeedInt q = 0; q < NQPTS; q++) {
-              const CeedScalar qfvalue =
-                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
-                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
-              if (abs(qfvalue) > qfvaluebound)
-                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
-            }
-            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
-          }
-        }
-      }
-    }
-  }
-}
-
-//------------------------------------------------------------------------------
-// Linear diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-//------------------------------------------------------------------------------
-// Linear point block diagonal
-//------------------------------------------------------------------------------
-extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
-    const CeedScalar maxnorm, const CeedScalar *identity,
-    const CeedScalar *interpin, const CeedScalar *gradin,
-    const CeedScalar *interpout, const CeedScalar *gradout,
-    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
-    const CeedScalar *__restrict__ assembledqfarray,
-    CeedScalar *__restrict__ elemdiagarray) {
-  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
-               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
-}
-
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Create point block restriction
 //------------------------------------------------------------------------------
@@ -1023,11 +881,22 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op,
   diag->numemodeout = numemodeout;
 
   // Assemble kernel
+
+  char *diagonal_kernel_path, *diagonal_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h",
+                                &diagonal_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Diagonal Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, diagonal_kernel_path,
+                                &diagonal_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2,
+               "----- Loading Diagonal Assembly Source Complete! -----\n");
   CeedInt nnodes, nqpts;
   ierr = CeedBasisGetNumNodes(basisin, &nnodes); CeedChkBackend(ierr);
   ierr = CeedBasisGetNumQuadraturePoints(basisin, &nqpts); CeedChkBackend(ierr);
   diag->nnodes = nnodes;
-  ierr = CeedCompileHip(ceed, diagonalkernels, &diag->module, 5,
+  ierr = CeedCompileHip(ceed, diagonal_kernel_source, &diag->module, 5,
                         "NUMEMODEIN", numemodein,
                         "NUMEMODEOUT", numemodeout,
                         "NNODES", nnodes,
@@ -1039,6 +908,8 @@ static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op,
   ierr = CeedGetKernelHip(ceed, diag->module, "linearPointBlockDiagonal",
                           &diag->linearPointBlock);
   CeedChk_Hip(ceed, ierr);
+  ierr = CeedFree(&diagonal_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&diagonal_kernel_source); CeedChkBackend(ierr);
 
   // Basis matrices
   const CeedInt qBytes = nqpts * sizeof(CeedScalar);
@@ -1243,119 +1114,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Hip(CeedOperator op,
   }
 }
 
-//------------------------------------------------------------------------------
-// Matrix assembly kernel for low-order elements (2D thread block)
-//------------------------------------------------------------------------------
-// *INDENT-OFF*
-static const char *assemblykernel = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int i = threadIdx.x; // The output row index of each B^TDB operation 
-  const int l = threadIdx.y; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-  // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        CeedScalar result = 0.0;
-        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-          CeedInt b_in_index = emode_in * NQPTS * NNODES;
-      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-             CeedInt b_out_index = emode_out * NQPTS * NNODES;
-             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
- 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-            for (CeedInt j = 0; j < NQPTS; j++) {
-     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-	    }
-
-          }// end of emode_out 
-        } // end of emode_in
-        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-   	values_array[val_index] = result;
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-
-//------------------------------------------------------------------------------
-// Fallback kernel for larger orders (1D thread block)
-//------------------------------------------------------------------------------
-static const char *assemblykernelbigelem = QUOTE(
-extern "C" __launch_bounds__(BLOCK_SIZE) 
-           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
-                   const CeedScalar *__restrict__ qf_array,
-                   CeedScalar *__restrict__ values_array) {
-
-  // This kernel assumes B_in and B_out have the same number of quadrature points and 
-  // basis points. 
-  // TODO: expand to more general cases
-  const int l = threadIdx.x; // The output column index of each B^TDB operation
-			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
-
-  // Strides for final output ordering, determined by the reference (interface) implementation of
-  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
-  const CeedInt comp_out_stride = NNODES * NNODES;
-  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
-  const CeedInt e_stride = comp_in_stride * NCOMP;
-  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
-  const CeedInt qe_stride = NQPTS;
-  const CeedInt qcomp_out_stride = NELEM * qe_stride;
-  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
-  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
-  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
-
-    // Loop over each element (if necessary)
-  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
-         e += gridDim.x*blockDim.z) {
-    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
-      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
-        for (CeedInt i = 0; i < NNODES; i++) {
-          CeedScalar result = 0.0;
-          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
-          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
-            CeedInt b_in_index = emode_in * NQPTS * NNODES;
-        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
-               CeedInt b_out_index = emode_out * NQPTS * NNODES;
-               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
-   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
-              for (CeedInt j = 0; j < NQPTS; j++) {
-       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
-  	    }
-
-            }// end of emode_out 
-          } // end of emode_in
-          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
-     	  values_array[val_index] = result;
-        } // end of loop over element node index, i
-      } // end of out component
-    } // end of in component
-  } // end of element loop
-}
-);
-// *INDENT-ON*
-
 //------------------------------------------------------------------------------
 // Single operator assembly setup
 //------------------------------------------------------------------------------
@@ -1477,34 +1235,38 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op) {
   int elemsPerBlock = 1;
   asmb->elemsPerBlock = elemsPerBlock;
   CeedInt block_size = esize * esize * elemsPerBlock;
-  if (block_size > 1024) { // Use fallback kernel with 1D threadblock
+  char *assembly_kernel_path, *assembly_kernel_source;
+  ierr = CeedGetJitAbsolutePath(ceed,
+                                "ceed/jit-source/hip/hip-ref-operator-assemble.h",
+                                &assembly_kernel_path); CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Kernel Source -----\n");
+  ierr = CeedLoadSourceToBuffer(ceed, assembly_kernel_path,
+                                &assembly_kernel_source);
+  CeedChkBackend(ierr);
+  CeedDebug256(ceed, 2, "----- Loading Assembly Source Complete! -----\n");
+  bool fallback = block_size > 1024;
+  if (fallback) { // Use fallback kernel with 1D threadblock
     block_size = esize * elemsPerBlock;
     asmb->block_size_x = esize;
     asmb->block_size_y = 1;
-    ierr = CeedCompileHip(ceed, assemblykernelbigelem, &asmb->module, 7,
-                          "NELEM", nelem,
-                          "NUMEMODEIN", num_emode_in,
-                          "NUMEMODEOUT", num_emode_out,
-                          "NQPTS", nqpts,
-                          "NNODES", esize,
-                          "BLOCK_SIZE", block_size,
-                          "NCOMP", ncomp
-                         ); CeedChk_Hip(ceed, ierr);
   } else {  // Use kernel with 2D threadblock
     asmb->block_size_x = esize;
     asmb->block_size_y = esize;
-    ierr = CeedCompileHip(ceed, assemblykernel, &asmb->module, 7,
-                          "NELEM", nelem,
-                          "NUMEMODEIN", num_emode_in,
-                          "NUMEMODEOUT", num_emode_out,
-                          "NQPTS", nqpts,
-                          "NNODES", esize,
-                          "BLOCK_SIZE", block_size,
-                          "NCOMP", ncomp
-                         ); CeedChk_Hip(ceed, ierr);
   }
-  ierr = CeedGetKernelHip(ceed, asmb->module, "linearAssemble",
+  ierr = CeedCompileHip(ceed, assembly_kernel_source, &asmb->module, 7,
+                        "NELEM", nelem,
+                        "NUMEMODEIN", num_emode_in,
+                        "NUMEMODEOUT", num_emode_out,
+                        "NQPTS", nqpts,
+                        "NNODES", esize,
+                        "BLOCK_SIZE", block_size,
+                        "NCOMP", ncomp
+                       ); CeedChk_Hip(ceed, ierr);
+  ierr = CeedGetKernelHip(ceed, asmb->module,
+                          fallback ? "linearAssembleFallback" : "linearAssemble",
                           &asmb->linearAssemble); CeedChk_Hip(ceed, ierr);
+  ierr = CeedFree(&assembly_kernel_path); CeedChkBackend(ierr);
+  ierr = CeedFree(&assembly_kernel_source); CeedChkBackend(ierr);
 
   // Build 'full' B matrices (not 1D arrays used for tensor-product matrices)
   const CeedScalar *interp_in, *grad_in;
diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h
new file mode 100644
index 0000000000..ecaca9d444
--- /dev/null
+++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h
@@ -0,0 +1,148 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Diagonal assembly kernels
+//------------------------------------------------------------------------------
+
+typedef enum {
+  /// Perform no evaluation (either because there is no data or it is already at
+  /// quadrature points)
+  CEED_EVAL_NONE   = 0,
+  /// Interpolate from nodes to quadrature points
+  CEED_EVAL_INTERP = 1,
+  /// Evaluate gradients at quadrature points from input in a nodal basis
+  CEED_EVAL_GRAD   = 2,
+  /// Evaluate divergence at quadrature points from input in a nodal basis
+  CEED_EVAL_DIV    = 4,
+  /// Evaluate curl at quadrature points from input in a nodal basis
+  CEED_EVAL_CURL   = 8,
+  /// Using no input, evaluate quadrature weights on the reference element
+  CEED_EVAL_WEIGHT = 16,
+} CeedEvalMode;
+
+//------------------------------------------------------------------------------
+// Get Basis Emode Pointer
+//------------------------------------------------------------------------------
+extern "C" __device__ void CeedOperatorGetBasisPointer_Cuda(const CeedScalar **basisptr,
+    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
+    const CeedScalar *grad) {
+  switch (emode) {
+  case CEED_EVAL_NONE:
+    *basisptr = identity;
+    break;
+  case CEED_EVAL_INTERP:
+    *basisptr = interp;
+    break;
+  case CEED_EVAL_GRAD:
+    *basisptr = grad;
+    break;
+  case CEED_EVAL_WEIGHT:
+  case CEED_EVAL_DIV:
+  case CEED_EVAL_CURL:
+    break; // Caught by QF Assembly
+  }
+}
+
+//------------------------------------------------------------------------------
+// Core code for diagonal assembly
+//------------------------------------------------------------------------------
+__device__ void diagonalCore(const CeedInt nelem,
+    const CeedScalar maxnorm, const bool pointBlock,
+    const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  const int tid = threadIdx.x; // running with P threads, tid is evec node
+  const CeedScalar qfvaluebound = maxnorm*1e-12;
+
+  // Compute the diagonal of B^T D B
+  // Each element
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
+       e += gridDim.x*blockDim.z) {
+    CeedInt dout = -1;
+    // Each basis eval mode pair
+    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
+      const CeedScalar *bt = NULL;
+      if (emodeout[eout] == CEED_EVAL_GRAD)
+        dout += 1;
+      CeedOperatorGetBasisPointer_Cuda(&bt, emodeout[eout], identity, interpout,
+                                      &gradout[dout*NQPTS*NNODES]);
+      CeedInt din = -1;
+      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
+        const CeedScalar *b = NULL;
+        if (emodein[ein] == CEED_EVAL_GRAD)
+          din += 1;
+        CeedOperatorGetBasisPointer_Cuda(&b, emodein[ein], identity, interpin,
+                                        &gradin[din*NQPTS*NNODES]);
+        // Each component
+        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
+          // Each qpoint/node pair
+          if (pointBlock) {
+            // Point Block Diagonal
+            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
+              CeedScalar evalue = 0.;
+              for (CeedInt q = 0; q < NQPTS; q++) {
+                const CeedScalar qfvalue =
+                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
+                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
+                if (abs(qfvalue) > qfvaluebound)
+                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+              }
+              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
+            }
+          } else {
+            // Diagonal Only
+            CeedScalar evalue = 0.;
+            for (CeedInt q = 0; q < NQPTS; q++) {
+              const CeedScalar qfvalue =
+                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
+                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
+              if (abs(qfvalue) > qfvaluebound)
+                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+            }
+            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
+          }
+        }
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Linear diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
+// Linear point block diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
diff --git a/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h
new file mode 100644
index 0000000000..cb75ddc7ea
--- /dev/null
+++ b/include/ceed/jit-source/cuda/cuda-ref-operator-assemble.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Matrix assembly kernel for low-order elements (2D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int i = threadIdx.x; // The output row index of each B^TDB operation 
+  const int l = threadIdx.y; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+  // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        CeedScalar result = 0.0;
+        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+          CeedInt b_in_index = emode_in * NQPTS * NNODES;
+      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+             CeedInt b_out_index = emode_out * NQPTS * NNODES;
+             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+ 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+            for (CeedInt j = 0; j < NQPTS; j++) {
+     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+	    }
+
+          }// end of emode_out 
+        } // end of emode_in
+        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+   	values_array[val_index] = result;
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
+// Fallback kernel for larger orders (1D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssembleFallback(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int l = threadIdx.x; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+    // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        for (CeedInt i = 0; i < NNODES; i++) {
+          CeedScalar result = 0.0;
+          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+            CeedInt b_in_index = emode_in * NQPTS * NNODES;
+        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+               CeedInt b_out_index = emode_out * NQPTS * NNODES;
+               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+              for (CeedInt j = 0; j < NQPTS; j++) {
+       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+  	    }
+
+            }// end of emode_out 
+          } // end of emode_in
+          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+     	  values_array[val_index] = result;
+        } // end of loop over element node index, i
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h
new file mode 100644
index 0000000000..f5fd171c2d
--- /dev/null
+++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble-diagonal.h
@@ -0,0 +1,147 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Diagonal assembly kernels
+//------------------------------------------------------------------------------
+typedef enum {
+  /// Perform no evaluation (either because there is no data or it is already at
+  /// quadrature points)
+  CEED_EVAL_NONE   = 0,
+  /// Interpolate from nodes to quadrature points
+  CEED_EVAL_INTERP = 1,
+  /// Evaluate gradients at quadrature points from input in a nodal basis
+  CEED_EVAL_GRAD   = 2,
+  /// Evaluate divergence at quadrature points from input in a nodal basis
+  CEED_EVAL_DIV    = 4,
+  /// Evaluate curl at quadrature points from input in a nodal basis
+  CEED_EVAL_CURL   = 8,
+  /// Using no input, evaluate quadrature weights on the reference element
+  CEED_EVAL_WEIGHT = 16,
+} CeedEvalMode;
+
+//------------------------------------------------------------------------------
+// Get Basis Emode Pointer
+//------------------------------------------------------------------------------
+extern "C" __device__ void CeedOperatorGetBasisPointer_Hip(const CeedScalar **basisptr,
+    CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
+    const CeedScalar *grad) {
+  switch (emode) {
+  case CEED_EVAL_NONE:
+    *basisptr = identity;
+    break;
+  case CEED_EVAL_INTERP:
+    *basisptr = interp;
+    break;
+  case CEED_EVAL_GRAD:
+    *basisptr = grad;
+    break;
+  case CEED_EVAL_WEIGHT:
+  case CEED_EVAL_DIV:
+  case CEED_EVAL_CURL:
+    break; // Caught by QF Assembly
+  }
+}
+
+//------------------------------------------------------------------------------
+// Core code for diagonal assembly
+//------------------------------------------------------------------------------
+__device__ void diagonalCore(const CeedInt nelem,
+    const CeedScalar maxnorm, const bool pointBlock,
+    const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  const int tid = threadIdx.x; // running with P threads, tid is evec node
+  const CeedScalar qfvaluebound = maxnorm*1e-12;
+
+  // Compute the diagonal of B^T D B
+  // Each element
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < nelem;
+       e += gridDim.x*blockDim.z) {
+    CeedInt dout = -1;
+    // Each basis eval mode pair
+    for (CeedInt eout = 0; eout < NUMEMODEOUT; eout++) {
+      const CeedScalar *bt = NULL;
+      if (emodeout[eout] == CEED_EVAL_GRAD)
+        dout += 1;
+      CeedOperatorGetBasisPointer_Hip(&bt, emodeout[eout], identity, interpout,
+                                      &gradout[dout*NQPTS*NNODES]);
+      CeedInt din = -1;
+      for (CeedInt ein = 0; ein < NUMEMODEIN; ein++) {
+        const CeedScalar *b = NULL;
+        if (emodein[ein] == CEED_EVAL_GRAD)
+          din += 1;
+        CeedOperatorGetBasisPointer_Hip(&b, emodein[ein], identity, interpin,
+                                        &gradin[din*NQPTS*NNODES]);
+        // Each component
+        for (CeedInt compOut = 0; compOut < NCOMP; compOut++) {
+          // Each qpoint/node pair
+          if (pointBlock) {
+            // Point Block Diagonal
+            for (CeedInt compIn = 0; compIn < NCOMP; compIn++) {
+              CeedScalar evalue = 0.;
+              for (CeedInt q = 0; q < NQPTS; q++) {
+                const CeedScalar qfvalue =
+                  assembledqfarray[((((ein*NCOMP+compIn)*NUMEMODEOUT+eout)*
+                                     NCOMP+compOut)*nelem+e)*NQPTS+q];
+                if (abs(qfvalue) > qfvaluebound)
+                  evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+              }
+              elemdiagarray[((compOut*NCOMP+compIn)*nelem+e)*NNODES+tid] += evalue;
+            }
+          } else {
+            // Diagonal Only
+            CeedScalar evalue = 0.;
+            for (CeedInt q = 0; q < NQPTS; q++) {
+              const CeedScalar qfvalue =
+                assembledqfarray[((((ein*NCOMP+compOut)*NUMEMODEOUT+eout)*
+                                   NCOMP+compOut)*nelem+e)*NQPTS+q];
+              if (abs(qfvalue) > qfvaluebound)
+                evalue += bt[q*NNODES+tid] * qfvalue * b[q*NNODES+tid];
+            }
+            elemdiagarray[(compOut*nelem+e)*NNODES+tid] += evalue;
+          }
+        }
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Linear diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, false, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
+// Linear point block diagonal
+//------------------------------------------------------------------------------
+extern "C" __global__ void linearPointBlockDiagonal(const CeedInt nelem,
+    const CeedScalar maxnorm, const CeedScalar *identity,
+    const CeedScalar *interpin, const CeedScalar *gradin,
+    const CeedScalar *interpout, const CeedScalar *gradout,
+    const CeedEvalMode *emodein, const CeedEvalMode *emodeout,
+    const CeedScalar *__restrict__ assembledqfarray,
+    CeedScalar *__restrict__ elemdiagarray) {
+  diagonalCore(nelem, maxnorm, true, identity, interpin, gradin, interpout,
+               gradout, emodein, emodeout, assembledqfarray, elemdiagarray);
+}
+
+//------------------------------------------------------------------------------
diff --git a/include/ceed/jit-source/hip/hip-ref-operator-assemble.h b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h
new file mode 100644
index 0000000000..cb75ddc7ea
--- /dev/null
+++ b/include/ceed/jit-source/hip/hip-ref-operator-assemble.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed/ceed.h>
+
+//------------------------------------------------------------------------------
+// Matrix assembly kernel for low-order elements (2D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssemble(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int i = threadIdx.x; // The output row index of each B^TDB operation 
+  const int l = threadIdx.y; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+  // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        CeedScalar result = 0.0;
+        CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+        for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+          CeedInt b_in_index = emode_in * NQPTS * NNODES;
+      	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+             CeedInt b_out_index = emode_out * NQPTS * NNODES;
+             CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+ 	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+            for (CeedInt j = 0; j < NQPTS; j++) {
+     	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+	    }
+
+          }// end of emode_out 
+        } // end of emode_in
+        CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+   	values_array[val_index] = result;
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------
+// Fallback kernel for larger orders (1D thread block)
+//------------------------------------------------------------------------------
+extern "C" __launch_bounds__(BLOCK_SIZE) 
+           __global__ void linearAssembleFallback(const CeedScalar *B_in, const CeedScalar *B_out,
+                   const CeedScalar *__restrict__ qf_array,
+                   CeedScalar *__restrict__ values_array) {
+
+  // This kernel assumes B_in and B_out have the same number of quadrature points and 
+  // basis points. 
+  // TODO: expand to more general cases
+  const int l = threadIdx.x; // The output column index of each B^TDB operation
+			     // such that we have (Bout^T)_ij D_jk Bin_kl = C_il
+
+  // Strides for final output ordering, determined by the reference (interface) implementation of
+  // the symbolic assembly, slowest --> fastest: element, comp_in, comp_out, node_row, node_col 
+  const CeedInt comp_out_stride = NNODES * NNODES;
+  const CeedInt comp_in_stride = comp_out_stride * NCOMP;
+  const CeedInt e_stride = comp_in_stride * NCOMP;
+  // Strides for QF array, slowest --> fastest:  emode_in, comp_in, emode_out, comp_out, elem, qpt 
+  const CeedInt qe_stride = NQPTS;
+  const CeedInt qcomp_out_stride = NELEM * qe_stride;
+  const CeedInt qemode_out_stride = qcomp_out_stride * NCOMP;
+  const CeedInt qcomp_in_stride = qemode_out_stride * NUMEMODEOUT;
+  const CeedInt qemode_in_stride = qcomp_in_stride * NCOMP;
+
+    // Loop over each element (if necessary)
+  for (CeedInt e = blockIdx.x*blockDim.z + threadIdx.z; e < NELEM;
+         e += gridDim.x*blockDim.z) {
+    for (CeedInt comp_in = 0; comp_in < NCOMP; comp_in++) {
+      for (CeedInt comp_out = 0; comp_out < NCOMP; comp_out++) {
+        for (CeedInt i = 0; i < NNODES; i++) {
+          CeedScalar result = 0.0;
+          CeedInt qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e; 
+          for (CeedInt emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
+            CeedInt b_in_index = emode_in * NQPTS * NNODES;
+        	  for (CeedInt emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
+               CeedInt b_out_index = emode_out * NQPTS * NNODES;
+               CeedInt qf_index = qf_index_comp + qemode_out_stride * emode_out + qemode_in_stride * emode_in;
+   	     // Perform the B^T D B operation for this 'chunk' of D (the qf_array)
+              for (CeedInt j = 0; j < NQPTS; j++) {
+       	      result += B_out[b_out_index + j * NNODES  + i] * qf_array[qf_index + j] * B_in[b_in_index + j * NNODES + l];  
+  	    }
+
+            }// end of emode_out 
+          } // end of emode_in
+          CeedInt val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
+     	  values_array[val_index] = result;
+        } // end of loop over element node index, i
+      } // end of out component
+    } // end of in component
+  } // end of element loop
+}
+
+//------------------------------------------------------------------------------

From e6a0ab89c38f1283091e68ca521745ca21d629af Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 21 Apr 2022 09:21:22 -0600
Subject: [PATCH 21/59] ctx - move CeedQFunctionContextGetFieldLabel to backend
 interface

---
 include/ceed/backend.h            |  2 ++
 include/ceed/ceed.h               |  2 --
 interface/ceed-qfunctioncontext.c | 60 +++++++++++++++----------------
 tests/t407-qfunction.c            |  1 +
 4 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/include/ceed/backend.h b/include/ceed/backend.h
index 6156a1f881..8c6a21ee06 100644
--- a/include/ceed/backend.h
+++ b/include/ceed/backend.h
@@ -250,6 +250,8 @@ CEED_EXTERN int CeedQFunctionContextGetBackendData(CeedQFunctionContext ctx,
     void *data);
 CEED_EXTERN int CeedQFunctionContextSetBackendData(CeedQFunctionContext ctx,
     void *data);
+CEED_EXTERN int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
+    const char *field_name, CeedContextFieldLabel *field_label);
 CEED_EXTERN int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
                                    CeedContextFieldLabel field_label,
                                    CeedContextFieldType field_type, void *value);
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index e67df5577e..da00e32cad 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -677,8 +677,6 @@ CEED_EXTERN int CeedQFunctionContextRegisterDouble(CeedQFunctionContext ctx,
 CEED_EXTERN int CeedQFunctionContextRegisterInt32(CeedQFunctionContext ctx,
     const char *field_name, size_t field_offset, size_t num_values,
     const char *field_description);
-CEED_EXTERN int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
-    const char *field_name, CeedContextFieldLabel *field_label);
 CEED_EXTERN int CeedQFunctionContextGetAllFieldLabels(CeedQFunctionContext ctx,
     const CeedContextFieldLabel **field_labels, CeedInt *num_fields);
 CEED_EXTERN int CeedContextFieldLabelGetDescription(CeedContextFieldLabel label,
diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c
index 9e51406e6f..5551236b47 100644
--- a/interface/ceed-qfunctioncontext.c
+++ b/interface/ceed-qfunctioncontext.c
@@ -221,6 +221,36 @@ int CeedQFunctionContextSetBackendData(CeedQFunctionContext ctx, void *data) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Get label for a registered QFunctionContext field, or `NULL` if no
+           field has been registered with this `field_name`
+
+  @param[in] ctx           CeedQFunctionContext
+  @param[in] field_name    Name of field to retrieve label
+  @param[out] field_label  Variable to field label
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
+                                      const char *field_name,
+                                      CeedContextFieldLabel *field_label) {
+  int ierr;
+
+  CeedInt field_index;
+  ierr = CeedQFunctionContextGetFieldIndex(ctx, field_name, &field_index);
+  CeedChk(ierr);
+
+  if (field_index != -1) {
+    *field_label = ctx->field_labels[field_index];
+  } else {
+    *field_label = NULL;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Set QFunctionContext field
 
@@ -653,36 +683,6 @@ int CeedQFunctionContextGetAllFieldLabels(CeedQFunctionContext ctx,
   return CEED_ERROR_SUCCESS;
 }
 
-/**
-  @brief Get label for a registered QFunctionContext field, or `NULL` if no
-           field has been registered with this `field_name`
-
-  @param[in] ctx           CeedQFunctionContext
-  @param[in] field_name    Name of field to retrieve label
-  @param[out] field_label  Variable to field label
-
-  @return An error code: 0 - success, otherwise - failure
-
-  @ref User
-**/
-int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
-                                      const char *field_name,
-                                      CeedContextFieldLabel *field_label) {
-  int ierr;
-
-  CeedInt field_index;
-  ierr = CeedQFunctionContextGetFieldIndex(ctx, field_name, &field_index);
-  CeedChk(ierr);
-
-  if (field_index != -1) {
-    *field_label = ctx->field_labels[field_index];
-  } else {
-    *field_label = NULL;
-  }
-
-  return CEED_ERROR_SUCCESS;
-}
-
 /**
   @brief Get the descriptive information about a CeedContextFieldLabel
 
diff --git a/tests/t407-qfunction.c b/tests/t407-qfunction.c
index d024ba6679..a65120d62f 100644
--- a/tests/t407-qfunction.c
+++ b/tests/t407-qfunction.c
@@ -2,6 +2,7 @@
 /// Test registering and setting QFunctionContext fields
 /// \test Test registering and setting QFunctionContext fields
 #include <ceed.h>
+#include <ceed/backend.h>
 #include <stddef.h>
 #include <string.h>
 

From bfacc300f2c0575b0498d737853db8ffd1d5be6d Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 21 Apr 2022 09:24:59 -0600
Subject: [PATCH 22/59] ctx - move CeedQFunctionContextSet* to backend
 interface

---
 include/ceed/backend.h            |   4 ++
 include/ceed/ceed.h               |   4 --
 interface/ceed-qfunctioncontext.c | 112 +++++++++++++++---------------
 3 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/include/ceed/backend.h b/include/ceed/backend.h
index 8c6a21ee06..3b67db3343 100644
--- a/include/ceed/backend.h
+++ b/include/ceed/backend.h
@@ -255,6 +255,10 @@ CEED_EXTERN int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
 CEED_EXTERN int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
                                    CeedContextFieldLabel field_label,
                                    CeedContextFieldType field_type, void *value);
+CEED_EXTERN int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
+    CeedContextFieldLabel field_label, double *values);
+CEED_EXTERN int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
+    CeedContextFieldLabel field_label, int *values);
 CEED_EXTERN int CeedQFunctionContextReference(CeedQFunctionContext ctx);
 
 CEED_EXTERN int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data);
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index da00e32cad..e7fbaaf9e3 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -682,10 +682,6 @@ CEED_EXTERN int CeedQFunctionContextGetAllFieldLabels(CeedQFunctionContext ctx,
 CEED_EXTERN int CeedContextFieldLabelGetDescription(CeedContextFieldLabel label,
     const char **field_name, const char **field_description, size_t *num_values,
     CeedContextFieldType *field_type);
-CEED_EXTERN int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
-    CeedContextFieldLabel field_label, double *values);
-CEED_EXTERN int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
-    CeedContextFieldLabel field_label, int *values);
 CEED_EXTERN int CeedQFunctionContextGetContextSize(CeedQFunctionContext ctx,
     size_t *ctx_size);
 CEED_EXTERN int CeedQFunctionContextView(CeedQFunctionContext ctx,
diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c
index 5551236b47..3a930c2688 100644
--- a/interface/ceed-qfunctioncontext.c
+++ b/interface/ceed-qfunctioncontext.c
@@ -287,6 +287,62 @@ int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set QFunctionContext field holding a double precision value
+
+  @param ctx         CeedQFunctionContext
+  @param field_label Label for field to register
+  @param values      Values to set
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
+                                  CeedContextFieldLabel field_label, double *values) {
+  int ierr;
+
+  if (!field_label)
+    // LCOV_EXCL_START
+    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
+                     "Invalid field label");
+  // LCOV_EXCL_STOP
+
+  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
+                                        CEED_CONTEXT_FIELD_DOUBLE,
+                                        values); CeedChk(ierr);
+
+  return CEED_ERROR_SUCCESS;
+}
+
+/**
+  @brief Set QFunctionContext field holding an int32 value
+
+  @param ctx         CeedQFunctionContext
+  @param field_label Label for field to register
+  @param values      Values to set
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
+                                 CeedContextFieldLabel field_label, int *values) {
+  int ierr;
+
+  if (!field_label)
+    // LCOV_EXCL_START
+    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
+                     "Invalid field label");
+  // LCOV_EXCL_STOP
+
+  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
+                                        CEED_CONTEXT_FIELD_INT32,
+                                        values); CeedChk(ierr);
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Increment the reference counter for a CeedQFunctionContext
 
@@ -708,62 +764,6 @@ int CeedContextFieldLabelGetDescription(CeedContextFieldLabel label,
   return CEED_ERROR_SUCCESS;
 }
 
-/**
-  @brief Set QFunctionContext field holding a double precision value
-
-  @param ctx         CeedQFunctionContext
-  @param field_label Label for field to register
-  @param values      Values to set
-
-  @return An error code: 0 - success, otherwise - failure
-
-  @ref User
-**/
-int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
-                                  CeedContextFieldLabel field_label, double *values) {
-  int ierr;
-
-  if (!field_label)
-    // LCOV_EXCL_START
-    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
-                     "Invalid field label");
-  // LCOV_EXCL_STOP
-
-  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
-                                        CEED_CONTEXT_FIELD_DOUBLE,
-                                        values); CeedChk(ierr);
-
-  return CEED_ERROR_SUCCESS;
-}
-
-/**
-  @brief Set QFunctionContext field holding an int32 value
-
-  @param ctx         CeedQFunctionContext
-  @param field_label Label for field to register
-  @param values      Values to set
-
-  @return An error code: 0 - success, otherwise - failure
-
-  @ref User
-**/
-int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
-                                 CeedContextFieldLabel field_label, int *values) {
-  int ierr;
-
-  if (!field_label)
-    // LCOV_EXCL_START
-    return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED,
-                     "Invalid field label");
-  // LCOV_EXCL_STOP
-
-  ierr = CeedQFunctionContextSetGeneric(ctx, field_label,
-                                        CEED_CONTEXT_FIELD_INT32,
-                                        values); CeedChk(ierr);
-
-  return CEED_ERROR_SUCCESS;
-}
-
 /**
   @brief Get data size for a Context
 

From 3e1e85abf6c24fac2b39e0494a6c97716f813df0 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 21 Apr 2022 09:59:09 -0600
Subject: [PATCH 23/59] ctx - update refrences to function usage level

---
 interface/ceed-qfunctioncontext.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c
index 3a930c2688..b72fc34d75 100644
--- a/interface/ceed-qfunctioncontext.c
+++ b/interface/ceed-qfunctioncontext.c
@@ -231,7 +231,7 @@ int CeedQFunctionContextSetBackendData(CeedQFunctionContext ctx, void *data) {
 
   @return An error code: 0 - success, otherwise - failure
 
-  @ref User
+  @ref Backend
 **/
 int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
                                       const char *field_name,
@@ -261,7 +261,7 @@ int CeedQFunctionContextGetFieldLabel(CeedQFunctionContext ctx,
 
   @return An error code: 0 - success, otherwise - failure
 
-  @ref User
+  @ref Backend
 **/
 int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
                                    CeedContextFieldLabel field_label,
@@ -296,7 +296,7 @@ int CeedQFunctionContextSetGeneric(CeedQFunctionContext ctx,
 
   @return An error code: 0 - success, otherwise - failure
 
-  @ref User
+  @ref Backend
 **/
 int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
                                   CeedContextFieldLabel field_label, double *values) {
@@ -324,7 +324,7 @@ int CeedQFunctionContextSetDouble(CeedQFunctionContext ctx,
 
   @return An error code: 0 - success, otherwise - failure
 
-  @ref User
+  @ref Backend
 **/
 int CeedQFunctionContextSetInt32(CeedQFunctionContext ctx,
                                  CeedContextFieldLabel field_label, int *values) {

From 11436a05c428446f6e40fe7eea93d96698ed3bc1 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Wed, 20 Apr 2022 14:13:29 -0600
Subject: [PATCH 24/59] examples/fluids: Set ctx->time via context fields

---
 examples/fluids/navierstokes.c         |  6 +-----
 examples/fluids/navierstokes.h         |  3 ++-
 examples/fluids/problems/eulervortex.c |  3 +++
 examples/fluids/src/setuplibceed.c     | 11 ++++++++++-
 examples/fluids/src/setupts.c          | 11 +++++++----
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index 29f321d19d..f7021ab29b 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -149,11 +149,7 @@ int main(int argc, char **argv) {
   // Set up libCEED
   // ---------------------------------------------------------------------------
   // -- Set up libCEED objects
-  ierr = SetupLibceed(ceed, ceed_data, dm, user, app_ctx, problem, bc);
-  CHKERRQ(ierr);
-
-  // -- Set up context for QFunctions
-  ierr = problem->setup_ctx(ceed, ceed_data, app_ctx, setup_ctx, phys_ctx);
+  ierr = SetupLibceed(ceed, ceed_data, dm, user, app_ctx, problem, bc, setup_ctx);
   CHKERRQ(ierr);
 
   // ---------------------------------------------------------------------------
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 973c0efd1b..157222b5fd 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -282,6 +282,7 @@ struct Physics_private {
   PetscBool                implicit;
   PetscBool                has_curr_time;
   PetscBool                has_neumann;
+  CeedContextFieldLabel    solution_time_label;
 };
 
 // Problem specific data
@@ -384,7 +385,7 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
                                        CeedOperator *op_apply);
 
 PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
-                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc);
+                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc, SetupContext setup_ctx);
 
 // -----------------------------------------------------------------------------
 // Time-stepping functions
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index a31d9fd2b1..7760ef6412 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -180,6 +180,9 @@ PetscErrorCode SetupContext_EULER_VORTEX(Ceed ceed, CeedData ceed_data,
   CeedQFunctionContextSetData(ceed_data->euler_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
                               sizeof(*phys->euler_ctx), phys->euler_ctx);
+  CeedQFunctionContextRegisterDouble(ceed_data->euler_context, "solution time",
+                                     offsetof(struct EulerContext_, curr_time), 1, "Phyiscal time of the solution");
+
   if (ceed_data->qf_ics)
     CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->euler_context);
   if (ceed_data->qf_rhs_vol)
diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c
index cae34465cb..eb8b5ed09a 100644
--- a/examples/fluids/src/setuplibceed.c
+++ b/examples/fluids/src/setuplibceed.c
@@ -205,11 +205,16 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
       CeedOperatorDestroy(&op_apply_outflow);
     }
   }
+
+  // ----- Get Context Labels for Operator
+  CeedOperatorContextGetFieldLabel(*op_apply, "solution time",
+                                   &phys->solution_time_label);
+
   PetscFunctionReturn(0);
 }
 
 PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
-                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc) {
+                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc, SetupContext setup_ctx) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
@@ -452,6 +457,10 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   CeedOperatorApply(ceed_data->op_setup_vol, ceed_data->x_coord,
                     ceed_data->q_data, CEED_REQUEST_IMMEDIATE);
 
+  // -- Set up context for QFunctions
+  ierr = problem->setup_ctx(ceed, ceed_data, app_ctx, setup_ctx, user->phys);
+  CHKERRQ(ierr);
+
   // -- Create and apply CEED Composite Operator for the entire domain
   if (!user->phys->implicit) { // RHS
     ierr = CreateOperatorForDomain(ceed, dm, bc, ceed_data, user->phys,
diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c
index 75713c32b0..511666684e 100644
--- a/examples/fluids/src/setupts.c
+++ b/examples/fluids/src/setupts.c
@@ -89,8 +89,9 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
-  // Update EulerContext
-  if (user->phys->has_curr_time) user->phys->euler_ctx->curr_time = t;
+  // Update solution time
+  if (user->phys->solution_time_label)
+    CeedOperatorContextSetDouble(user->op_rhs, user->phys->solution_time_label, &t);
 
   // Get local vectors
   ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr);
@@ -146,8 +147,10 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G,
   PetscErrorCode    ierr;
   PetscFunctionBeginUser;
 
-  // Update EulerContext
-  if (user->phys->has_curr_time) user->phys->euler_ctx->curr_time = t;
+  // Update solution time
+  if (user->phys->solution_time_label)
+    CeedOperatorContextSetDouble(user->op_ifunction,
+                                 user->phys->solution_time_label, &t);
 
   // Get local vectors
   ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr);

From 2288fb5222bbca88523f94a06377dc76b8b46264 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Mon, 25 Apr 2022 11:03:55 -0600
Subject: [PATCH 25/59] minor - doc update and consistency for qextra

---
 benchmarks/petsc-bps.sh          | 2 +-
 benchmarks/petsc-bpsraw.sh       | 2 +-
 examples/fluids/README.md        | 2 +-
 examples/petsc/README.md         | 4 ++--
 examples/solids/README.md        | 2 +-
 examples/solids/src/cl-options.c | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/benchmarks/petsc-bps.sh b/benchmarks/petsc-bps.sh
index e589b36412..36f900cc16 100755
--- a/benchmarks/petsc-bps.sh
+++ b/benchmarks/petsc-bps.sh
@@ -21,7 +21,7 @@ function run_tests()
 
    # Some of the available options are:
    # -degree <1>: Polynomial degree of tensor product basis
-   # -qextra <1>: Number of extra quadrature points
+   # -q_extra <1>: Number of extra quadrature points
    # -ceed </cpu/self>: CEED resource specifier
    # -local_nodes <1000>: Target number of locally (per rank) owned nodes
 
diff --git a/benchmarks/petsc-bpsraw.sh b/benchmarks/petsc-bpsraw.sh
index f39f7ac521..bcfc97c16e 100755
--- a/benchmarks/petsc-bpsraw.sh
+++ b/benchmarks/petsc-bpsraw.sh
@@ -21,7 +21,7 @@ function run_tests()
 
    # Some of the available options are:
    # -degree <1>: Polynomial degree of tensor product basis
-   # -qextra <1>: Number of extra quadrature points
+   # -q_extra <1>: Number of extra quadrature points
    # -ceed </cpu/self>: CEED resource specifier
    # -local <1000>: Target number of locally (per rank) owned nodes
 
diff --git a/examples/fluids/README.md b/examples/fluids/README.md
index 43b41194cf..2ef4ca990a 100644
--- a/examples/fluids/README.md
+++ b/examples/fluids/README.md
@@ -60,7 +60,7 @@ The following options are common among all problem types:
   - Polynomial degree of tensor product basis (must be >= 1)
   - `1`
 
-* - `-qextra`
+* - `-q_extra`
   - Number of extra quadrature points
   - `2`
 
diff --git a/examples/petsc/README.md b/examples/petsc/README.md
index 6ac521ff7b..9339cb791a 100644
--- a/examples/petsc/README.md
+++ b/examples/petsc/README.md
@@ -61,7 +61,7 @@ The following arguments can be specified for all of the above examples:
 - `-ceed`              - CEED resource specifier
 - `-problem`           - CEED benchmark problem to solve
 - `-degree`            - Polynomial degree of tensor product basis
-- `-qextra`            - Number of extra quadrature points
+- `-q_extra`           - Number of extra quadrature points
 - `-test`              - Testing mode (do not print unless error is large)
 - `-benchmark`         - Benchmarking mode (prints benchmark statistics)
 
@@ -84,7 +84,7 @@ The following arguments can be specified for the area example:
 - `-ceed`              - CEED resource specifier
 - `-problem`           - Problem to solve, either 'cube' or 'sphere'
 - `-petscspace_degree` - Polynomial degree of tensor product basis
-- `-qextra`            - Number of extra quadrature points
+- `-q_extra`           - Number of extra quadrature points
 - `-test`              - Testing mode (do not print unless error is large)
 - `-mesh`              - Read mesh from file
 
diff --git a/examples/solids/README.md b/examples/solids/README.md
index 0a1fd57629..c204333939 100644
--- a/examples/solids/README.md
+++ b/examples/solids/README.md
@@ -96,7 +96,7 @@ The command line options just shown are the minimum requirements to run the mini
   - CEED resource specifier
   - `/cpu/self`
 
-* - `-qextra`
+* - `-q_extra`
   - Number of extra quadrature points
   - `0`
 
diff --git a/examples/solids/src/cl-options.c b/examples/solids/src/cl-options.c
index 1849fae7b5..3e5a0df1ed 100644
--- a/examples/solids/src/cl-options.c
+++ b/examples/solids/src/cl-options.c
@@ -40,7 +40,7 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx) {
   CHKERRQ(ierr);
 
   app_ctx->q_extra         = 0;
-  ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points",
+  ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points",
                          NULL, app_ctx->q_extra, &app_ctx->q_extra, NULL);
   CHKERRQ(ierr);
 

From f48ed27d17d281fc05396017c95057b07eeb82f6 Mon Sep 17 00:00:00 2001
From: nbeams <246972+nbeams@users.noreply.github.com>
Date: Mon, 25 Apr 2022 13:01:32 -0600
Subject: [PATCH 26/59] Use backend functions for SyncArray in CUDA and HIP

---
 backends/cuda-ref/ceed-cuda-vector.c   | 77 +++++++++++++-------------
 backends/hip-ref/ceed-hip-ref-vector.c | 76 ++++++++++++-------------
 interface/ceed.c                       |  1 +
 3 files changed, 79 insertions(+), 75 deletions(-)

diff --git a/backends/cuda-ref/ceed-cuda-vector.c b/backends/cuda-ref/ceed-cuda-vector.c
index bb373dd037..a6dcb2462c 100644
--- a/backends/cuda-ref/ceed-cuda-vector.c
+++ b/backends/cuda-ref/ceed-cuda-vector.c
@@ -13,6 +13,30 @@
 #include <string.h>
 #include "ceed-cuda-ref.h"
 
+
+//------------------------------------------------------------------------------
+// Check if host/device sync is needed
+//------------------------------------------------------------------------------
+static inline int CeedVectorNeedSync_Cuda(const CeedVector vec,
+    CeedMemType mem_type, bool *need_sync) {
+  int ierr;
+  CeedVector_Cuda *impl;
+  ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
+
+  bool has_valid_array = false;
+  ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr);
+  switch (mem_type) {
+  case CEED_MEM_HOST:
+    *need_sync = has_valid_array && !impl->h_array;
+    break;
+  case CEED_MEM_DEVICE:
+    *need_sync = has_valid_array && !impl->d_array;
+    break;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 //------------------------------------------------------------------------------
 // Sync host to device
 //------------------------------------------------------------------------------
@@ -88,8 +112,16 @@ static inline int CeedVectorSyncD2H_Cuda(const CeedVector vec) {
 //------------------------------------------------------------------------------
 // Sync arrays
 //------------------------------------------------------------------------------
-static inline int CeedVectorSync_Cuda(const CeedVector vec,
-                                      CeedMemType mem_type) {
+static int CeedVectorSyncArray_Cuda(const CeedVector vec,
+                                    CeedMemType mem_type) {
+  int ierr;
+  // Check whether device/host sync is needed
+  bool need_sync = false;
+  ierr = CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync);
+  CeedChkBackend(ierr);
+  if (!need_sync)
+    return CEED_ERROR_SUCCESS;
+
   switch (mem_type) {
   case CEED_MEM_HOST: return CeedVectorSyncD2H_Cuda(vec);
   case CEED_MEM_DEVICE: return CeedVectorSyncH2D_Cuda(vec);
@@ -167,29 +199,6 @@ static inline int CeedVectorHasBorrowedArrayOfType_Cuda(const CeedVector vec,
   return CEED_ERROR_SUCCESS;
 }
 
-//------------------------------------------------------------------------------
-// Check if is any array of given type
-//------------------------------------------------------------------------------
-static inline int CeedVectorNeedSync_Cuda(const CeedVector vec,
-    CeedMemType mem_type, bool *need_sync) {
-  int ierr;
-  CeedVector_Cuda *impl;
-  ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
-
-  bool has_valid_array = false;
-  ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr);
-  switch (mem_type) {
-  case CEED_MEM_HOST:
-    *need_sync = has_valid_array && !impl->h_array;
-    break;
-  case CEED_MEM_DEVICE:
-    *need_sync = has_valid_array && !impl->d_array;
-    break;
-  }
-
-  return CEED_ERROR_SUCCESS;
-}
-
 //------------------------------------------------------------------------------
 // Set array from host
 //------------------------------------------------------------------------------
@@ -368,11 +377,7 @@ static int CeedVectorTakeArray_Cuda(CeedVector vec, CeedMemType mem_type,
   ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
 
   // Sync array to requested mem_type
-  bool need_sync = false;
-  ierr = CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync); CeedChkBackend(ierr);
-  if (need_sync) {
-    ierr = CeedVectorSync_Cuda(vec, mem_type); CeedChkBackend(ierr);
-  }
+  ierr = CeedVectorSyncArray(vec, mem_type); CeedChkBackend(ierr);
 
   // Update pointer
   switch (mem_type) {
@@ -403,14 +408,8 @@ static int CeedVectorGetArrayCore_Cuda(const CeedVector vec,
   CeedVector_Cuda *impl;
   ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
 
-  bool need_sync = false, has_array_of_type = true;
-  ierr = CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync); CeedChkBackend(ierr);
-  ierr = CeedVectorHasArrayOfType_Cuda(vec, mem_type, &has_array_of_type);
-  CeedChkBackend(ierr);
-  if (need_sync) {
-    // Sync array to requested mem_type
-    ierr = CeedVectorSync_Cuda(vec, mem_type); CeedChkBackend(ierr);
-  }
+  // Sync array to requested mem_type
+  ierr = CeedVectorSyncArray(vec, mem_type); CeedChkBackend(ierr);
 
   // Update pointer
   switch (mem_type) {
@@ -763,6 +762,8 @@ int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec) {
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetValue",
                                 (int (*)())(CeedVectorSetValue_Cuda));
   CeedChkBackend(ierr);
+  ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SyncArray",
+                                CeedVectorSyncArray_Cuda); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArray",
                                 CeedVectorGetArray_Cuda); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead",
diff --git a/backends/hip-ref/ceed-hip-ref-vector.c b/backends/hip-ref/ceed-hip-ref-vector.c
index b8371225cb..49c6494025 100644
--- a/backends/hip-ref/ceed-hip-ref-vector.c
+++ b/backends/hip-ref/ceed-hip-ref-vector.c
@@ -13,6 +13,30 @@
 #include <string.h>
 #include "ceed-hip-ref.h"
 
+
+//------------------------------------------------------------------------------
+// Check if host/device sync is needed
+//------------------------------------------------------------------------------
+static inline int CeedVectorNeedSync_Hip(const CeedVector vec,
+    CeedMemType mem_type, bool *need_sync) {
+  int ierr;
+  CeedVector_Hip *impl;
+  ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
+
+  bool has_valid_array = false;
+  ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr);
+  switch (mem_type) {
+  case CEED_MEM_HOST:
+    *need_sync = has_valid_array && !impl->h_array;
+    break;
+  case CEED_MEM_DEVICE:
+    *need_sync = has_valid_array && !impl->d_array;
+    break;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 //------------------------------------------------------------------------------
 // Sync host to device
 //------------------------------------------------------------------------------
@@ -88,8 +112,16 @@ static inline int CeedVectorSyncD2H_Hip(const CeedVector vec) {
 //------------------------------------------------------------------------------
 // Sync arrays
 //------------------------------------------------------------------------------
-static inline int CeedVectorSync_Hip(const CeedVector vec,
-                                     CeedMemType mem_type) {
+static int CeedVectorSyncArray_Hip(const CeedVector vec,
+                                   CeedMemType mem_type) {
+  int ierr;
+  // Check whether device/host sync is needed
+  bool need_sync = false;
+  ierr = CeedVectorNeedSync_Hip(vec, mem_type, &need_sync);
+  CeedChkBackend(ierr);
+  if (!need_sync)
+    return CEED_ERROR_SUCCESS;
+
   switch (mem_type) {
   case CEED_MEM_HOST: return CeedVectorSyncD2H_Hip(vec);
   case CEED_MEM_DEVICE: return CeedVectorSyncH2D_Hip(vec);
@@ -167,29 +199,6 @@ static inline int CeedVectorHasBorrowedArrayOfType_Hip(const CeedVector vec,
   return CEED_ERROR_SUCCESS;
 }
 
-//------------------------------------------------------------------------------
-// Sync array of given type
-//------------------------------------------------------------------------------
-static inline int CeedVectorNeedSync_Hip(const CeedVector vec,
-    CeedMemType mem_type, bool *need_sync) {
-  int ierr;
-  CeedVector_Hip *impl;
-  ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
-
-  bool has_valid_array = false;
-  ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr);
-  switch (mem_type) {
-  case CEED_MEM_HOST:
-    *need_sync = has_valid_array && !impl->h_array;
-    break;
-  case CEED_MEM_DEVICE:
-    *need_sync = has_valid_array && !impl->d_array;
-    break;
-  }
-
-  return CEED_ERROR_SUCCESS;
-}
-
 //------------------------------------------------------------------------------
 // Set array from host
 //------------------------------------------------------------------------------
@@ -363,11 +372,7 @@ static int CeedVectorTakeArray_Hip(CeedVector vec, CeedMemType mem_type,
   ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
 
   // Sync array to requested mem_type
-  bool need_sync = false;
-  ierr = CeedVectorNeedSync_Hip(vec, mem_type, &need_sync); CeedChkBackend(ierr);
-  if (need_sync) {
-    ierr = CeedVectorSync_Hip(vec, mem_type); CeedChkBackend(ierr);
-  }
+  ierr = CeedVectorSyncArray(vec, mem_type); CeedChkBackend(ierr);
 
   // Update pointer
   switch (mem_type) {
@@ -398,13 +403,8 @@ static int CeedVectorGetArrayCore_Hip(const CeedVector vec,
   CeedVector_Hip *impl;
   ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr);
 
-  bool need_sync = false;
-  ierr = CeedVectorNeedSync_Hip(vec, mem_type, &need_sync); CeedChkBackend(ierr);
-  CeedChkBackend(ierr);
-  if (need_sync) {
-    // Sync array to requested mem_type
-    ierr = CeedVectorSync_Hip(vec, mem_type); CeedChkBackend(ierr);
-  }
+  // Sync array to requested mem_type
+  ierr = CeedVectorSyncArray(vec, mem_type); CeedChkBackend(ierr);
 
   // Update pointer
   switch (mem_type) {
@@ -758,6 +758,8 @@ int CeedVectorCreate_Hip(CeedSize n, CeedVector vec) {
                                 CeedVectorTakeArray_Hip); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetValue",
                                 (int (*)())(CeedVectorSetValue_Hip)); CeedChkBackend(ierr);
+  ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SyncArray",
+                                CeedVectorSyncArray_Hip); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArray",
                                 CeedVectorGetArray_Hip); CeedChkBackend(ierr);
   ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead",
diff --git a/interface/ceed.c b/interface/ceed.c
index 08ef354072..34635cdf4a 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -843,6 +843,7 @@ int CeedInit(const char *resource, Ceed *ceed) {
     CEED_FTABLE_ENTRY(CeedVector, SetArray),
     CEED_FTABLE_ENTRY(CeedVector, TakeArray),
     CEED_FTABLE_ENTRY(CeedVector, SetValue),
+    CEED_FTABLE_ENTRY(CeedVector, SyncArray),
     CEED_FTABLE_ENTRY(CeedVector, GetArray),
     CEED_FTABLE_ENTRY(CeedVector, GetArrayRead),
     CEED_FTABLE_ENTRY(CeedVector, GetArrayWrite),

From c470c2d95a90dfacf862d2ae1d87105c88be1edc Mon Sep 17 00:00:00 2001
From: nbeams <246972+nbeams@users.noreply.github.com>
Date: Wed, 27 Apr 2022 14:16:27 -0600
Subject: [PATCH 27/59] Use CeedMallocArray for void pointers in CUDA/HIP
 QFunctionContext

---
 backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c | 5 +++--
 backends/hip-ref/ceed-hip-ref-qfunctioncontext.c   | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c b/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c
index 2a9a584b26..0bb14e0089 100644
--- a/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c
+++ b/backends/cuda-ref/ceed-cuda-ref-qfunctioncontext.c
@@ -72,7 +72,7 @@ static inline int CeedQFunctionContextSyncD2H_Cuda(
   } else if (impl->h_data_owned) {
     impl->h_data = impl->h_data_owned;
   } else {
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
     CeedChkBackend(ierr);
     impl->h_data = impl->h_data_owned;
   }
@@ -184,7 +184,8 @@ static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx,
   case CEED_COPY_VALUES: {
     size_t ctxsize;
     ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned); CeedChkBackend(ierr);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
+    CeedChkBackend(ierr);
     impl->h_data_borrowed = NULL;
     impl->h_data = impl->h_data_owned;
     memcpy(impl->h_data, data, ctxsize);
diff --git a/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c b/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c
index 46cf1b13f5..e4c71e21c1 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c
+++ b/backends/hip-ref/ceed-hip-ref-qfunctioncontext.c
@@ -72,7 +72,7 @@ static inline int CeedQFunctionContextSyncD2H_Hip(
   } else if (impl->h_data_owned) {
     impl->h_data = impl->h_data_owned;
   } else {
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
     CeedChkBackend(ierr);
     impl->h_data = impl->h_data_owned;
   }
@@ -184,7 +184,8 @@ static int CeedQFunctionContextSetDataHost_Hip(const CeedQFunctionContext ctx,
   case CEED_COPY_VALUES: {
     size_t ctxsize;
     ierr = CeedQFunctionContextGetContextSize(ctx, &ctxsize); CeedChkBackend(ierr);
-    ierr = CeedMalloc(ctxsize, &impl->h_data_owned); CeedChkBackend(ierr);
+    ierr = CeedMallocArray(1, ctxsize, &impl->h_data_owned);
+    CeedChkBackend(ierr);
     impl->h_data_borrowed = NULL;
     impl->h_data = impl->h_data_owned;
     memcpy(impl->h_data, data, ctxsize);

From 2790b72b4f43887fa8322363f29d18765b4e7e19 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 28 Apr 2022 09:50:37 -0600
Subject: [PATCH 28/59] ctx - add interface for additional destroy

---
 include/ceed-impl.h               |  2 ++
 include/ceed/ceed.h               | 11 +++++++++++
 interface/ceed-qfunctioncontext.c | 32 +++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/include/ceed-impl.h b/include/ceed-impl.h
index c02d5219a8..66409ddba8 100644
--- a/include/ceed-impl.h
+++ b/include/ceed-impl.h
@@ -275,6 +275,8 @@ struct CeedQFunctionContext_private {
   int (*RestoreData)(CeedQFunctionContext);
   int (*RestoreDataRead)(CeedQFunctionContext);
   int (*Destroy)(CeedQFunctionContext);
+  CeedQFunctionContextDataDestroyUser data_destroy_function;
+  CeedMemType data_destroy_mem_type;
   CeedInt num_fields;
   CeedInt max_fields;
   CeedContextFieldLabel *field_labels;
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index e7fbaaf9e3..6b87d7514f 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -655,6 +655,16 @@ typedef enum {
 } CeedContextFieldType;
 CEED_EXTERN const char *const CeedContextFieldTypes[];
 
+/** Handle for the user provided CeedQFunctionContextDataDestroy callback function
+
+ @param[in,out] data  User-CeedQFunctionContext data
+
+ @return An error code: 0 - success, otherwise - failure
+
+ @ingroup CeedQFunction
+**/
+typedef int (*CeedQFunctionContextDataDestroyUser)(void *data);
+
 CEED_EXTERN int CeedQFunctionContextCreate(Ceed ceed,
     CeedQFunctionContext *ctx);
 CEED_EXTERN int CeedQFunctionContextReferenceCopy(CeedQFunctionContext ctx,
@@ -686,6 +696,7 @@ CEED_EXTERN int CeedQFunctionContextGetContextSize(CeedQFunctionContext ctx,
     size_t *ctx_size);
 CEED_EXTERN int CeedQFunctionContextView(CeedQFunctionContext ctx,
     FILE *stream);
+CEED_EXTERN int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx, CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f);
 CEED_EXTERN int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx);
 
 CEED_EXTERN int CeedOperatorCreate(Ceed ceed, CeedQFunction qf,
diff --git a/interface/ceed-qfunctioncontext.c b/interface/ceed-qfunctioncontext.c
index b72fc34d75..8967953a6b 100644
--- a/interface/ceed-qfunctioncontext.c
+++ b/interface/ceed-qfunctioncontext.c
@@ -804,6 +804,30 @@ int CeedQFunctionContextView(CeedQFunctionContext ctx, FILE *stream) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set additional destroy routine for CeedQFunctionContext user data
+
+  @param ctx        CeedQFunctionContext to set user destroy function
+  @param f_mem_type Memory type to use when passing data into `f`
+  @param f          Additional routine to use to destroy user data
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+
+int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx,
+                                       CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f) {
+  if (!f)
+    // LCOV_EXCL_START
+    return CeedError(ctx->ceed, 1,
+                     "Must provide valid callback function for destroying user data");
+  // LCOV_EXCL_STOP
+  ctx->data_destroy_mem_type = f_mem_type;
+  ctx->data_destroy_function = f;
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Destroy a CeedQFunctionContext
 
@@ -826,6 +850,14 @@ int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx) {
                      "lock is in use");
   // LCOV_EXCL_STOP
 
+  if ((*ctx)->data_destroy_function) {
+    void *data;
+
+    ierr = CeedQFunctionContextGetData(*ctx, (*ctx)->data_destroy_mem_type, &data);
+    CeedChk(ierr);
+    ierr = (*ctx)->data_destroy_function(data); CeedChk(ierr);
+    ierr = CeedQFunctionContextRestoreData(*ctx, &data); CeedChk(ierr);
+  }
   if ((*ctx)->Destroy) {
     ierr = (*ctx)->Destroy(*ctx); CeedChk(ierr);
   }

From a71fcd9fac4e7a8dfa69a197fd7b41b8f31fd6a3 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 28 Apr 2022 10:34:13 -0600
Subject: [PATCH 29/59] rust - drop pinned trampoline data

---
 rust/libceed/src/qfunction.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/rust/libceed/src/qfunction.rs b/rust/libceed/src/qfunction.rs
index baba3ddd75..efcbe5dc44 100644
--- a/rust/libceed/src/qfunction.rs
+++ b/rust/libceed/src/qfunction.rs
@@ -568,6 +568,12 @@ unsafe extern "C" fn trampoline(
     (trampoline_data.get_unchecked_mut().user_f)(inputs_array, outputs_array)
 }
 
+unsafe extern "C" fn destroy_trampoline(ctx: *mut ::std::os::raw::c_void) -> ::std::os::raw::c_int {
+    let trampoline_data: Pin<&mut QFunctionTrampolineData> = std::mem::transmute(ctx);
+    drop(trampoline_data);
+    0 // Clean error code
+}
+
 // -----------------------------------------------------------------------------
 // QFunction
 // -----------------------------------------------------------------------------
@@ -623,6 +629,14 @@ impl<'a> QFunction<'a> {
             )
         };
         ceed.check_error(ierr)?;
+        ierr = unsafe {
+            bind_ceed::CeedQFunctionContextSetDataDestroy(
+                qf_ctx_ptr,
+                crate::MemType::Host as bind_ceed::CeedMemType,
+                Some(destroy_trampoline),
+            )
+        };
+        ceed.check_error(ierr)?;
         ierr = unsafe { bind_ceed::CeedQFunctionSetContext(ptr, qf_ctx_ptr) };
         ceed.check_error(ierr)?;
         Ok(Self {

From 88626eed6564cd43033d3137230605fb5f962840 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Fri, 29 Apr 2022 18:12:39 -0600
Subject: [PATCH 30/59] feat: Add blasius boundary layer and channel flow
 examples to examples/fluids (#942)

* examples/fluids: Upload current case for collaboration

* examples/fluids: Replace developing BL with duct

* examples/fluids: Allow density to "float" at inflow

* fix: Add u_normal

* examples/fluids: bug fix in density flux

* examples/fluids: implement "floating" pressure; prescribed u, T

* examples/fluids: fixing initialization

We need has_neumann=true to get the weak boundary integrals called. It
is initialized in problem->setup_ctx and used in SetupLibceed(). This is
a hack to always apply.

* examples/fluids: include kinetic energy in Blasius IC

* examples/fluids: Blasius quasi-2D (slip in z planes)

* feat: Add Exact_Channel function

* feat: Use Exact_Channel for IC and Inflow

* examples/fluids: Add channel example

* examples/fluids: Define mu in blasius.c, cleanup

* examples/fluid: Add Blasius solution calculator

* examples/fluids: Add blasius IC and BCs

* Convert to implicit TS

* examples/fluids: Update blasius.yaml

* Move to ChannelContext, refactor DC and newtonian

 - Added a dedicated ChannelContext, which allows for user setting of
   flow parameters and consistent sharing of parameters between
   QFunctions (instead of hardcoding)
 - Moved density current (DC) specific settings from newtonian.c
   densitycurrent.c

* Make gravity into vector quantity

* Add in body force for channel

* examples/fluids: remove explicitly setting coordinate field

This is created automatically now by DMPlex and DMProjectCoordinates
breaks (localized) periodicity, resulting in a tangled mesh.

* examples/fluids: transpose flux Jacobian in SUPG stabilization

* examples/fluids: update docs commensurate with flux Jacobian transpose fix

* examples/fluids: work on docs for tau

* examples/fluids: Increase blasius Re, implement Xi to SUPG

 - Increased the Uinf and mu by 4x for the blasius problem
 - Implemented the calculation of Xi for the SUPG term in newtonian.h

* examples/fluids: Remove flux jacobian transpose

 - Removed for both eulervortex.h and newtonian.h

* examples/fluids: Reset Newtonian unit scaling to neutral

 - Note that this resets the scaling for the densitycurrent problem,
   thus the examples (and possibly tests) will probably need to be
   changed

* examples/fluids: fix PetscOptionsBegin for new PETSc

* feat: Add slanted domain top surface

* feat: Add graded mesh option for blasius

* fix: Correct inflow boundary condition handling

* feat: Move to blasius_context, add CLI options

* feat: Move Blasius to physical air properties

* fix: Blasius profile and mesh generation

 - Add the correct float->int rounding
 - Fix eta to a value after exceeding the table's bounds

fix: Add fix eta after exceeding table

* examples/fluids: Move to physical mu default, fix tests

* doc: Add minimum documentation for channel and blasius

* examples/fluids: Add primitive jacobian and tau

 - Also add misc comments

* examples/fluids: propose alternative flux Jacobian via prim-to-conservative

* examples/fluids: Add PHASTA diagonal tau

* fix: Add back in timestep obtaining

* examples/fluids: Add analytic tractions to inflow and outflow

- This is a stop-gap until viscous flux can be computed properly on the
  boundary from current solution.
- Tau constants in a state of flux still but this produced a decent v at
  inflow and outflow, removing the steep dive caused by forcing
  dv/dx=-du/dy from a zero traction of omission.

* examples/fluids: CLI options for diagonal c_tau's

* examples/fluids: Update dt via context labels

* feat: Add more blasius table points

* examples/fluids: Cleanup stab and adjust Ctau_E

* examples/fluids: Replace spatial tau with diagonal

* examples/fluids: Fix stab in Euler

 - Initialize `stab` to zero, add sum over loop

* examples/fluids: Document diagonal tau formulation

* examples/fluids: Fix compiler warnings

* examples/fluids: check error codes and elide unused header

* examples/fluids: Correct blasius example CLI

 - Also add mention of the `blasius.yaml` file

* examples/fluids: Add supg to default blasius.yaml

* examples/fluids: Use yaml for channel and blasius

 - Instead of long CLI options
 - Also cleaned up the yaml formatting
 - Added Cartesian directions to the faceMarker names

* examples/fluids: Show example yamls in README

Co-authored-by: Jed Brown <jed@jedbrown.org>
Co-authored-by: Kenneth E. Jansen <Kenneth.Jansen@colorado.edu>
---
 doc/sphinx/source/references.bib            |   9 +
 examples/fluids/README.md                   | 276 +++++++++++++---
 examples/fluids/blasius.yaml                |  37 +++
 examples/fluids/channel.yaml                |  18 ++
 examples/fluids/index.md                    |  71 +++-
 examples/fluids/navierstokes.c              |   6 +-
 examples/fluids/navierstokes.h              |  53 ++-
 examples/fluids/problems/blasius.c          | 196 ++++++++++++
 examples/fluids/problems/channel.c          | 128 ++++++++
 examples/fluids/problems/densitycurrent.c   |  90 ++++--
 examples/fluids/problems/newtonian.c        |  99 +++---
 examples/fluids/qfunctions/advection.h      |   2 +-
 examples/fluids/qfunctions/advection2d.h    |   2 +-
 examples/fluids/qfunctions/blasius.h        | 338 ++++++++++++++++++++
 examples/fluids/qfunctions/channel.h        | 240 ++++++++++++++
 examples/fluids/qfunctions/densitycurrent.h |   5 +-
 examples/fluids/qfunctions/eulervortex.h    |  24 +-
 examples/fluids/qfunctions/newtonian.h      | 307 ++++++++++++++----
 examples/fluids/src/cloptions.c             |   6 +
 examples/fluids/src/setupdm.c               |  10 -
 examples/fluids/src/setuplibceed.c          |   4 +-
 examples/fluids/src/setupts.c               |  16 +-
 22 files changed, 1702 insertions(+), 235 deletions(-)
 create mode 100644 examples/fluids/blasius.yaml
 create mode 100644 examples/fluids/channel.yaml
 create mode 100644 examples/fluids/problems/blasius.c
 create mode 100644 examples/fluids/problems/channel.c
 create mode 100644 examples/fluids/qfunctions/blasius.h
 create mode 100644 examples/fluids/qfunctions/channel.h

diff --git a/doc/sphinx/source/references.bib b/doc/sphinx/source/references.bib
index 5a7829a4e1..d29d912f11 100644
--- a/doc/sphinx/source/references.bib
+++ b/doc/sphinx/source/references.bib
@@ -163,3 +163,12 @@ @book{toro2009
   publisher={Springer, Berlin, Heidelberg},
   isbn={978-3-540-49834-6}
 }
+
+@phdthesis{whitingStabilizedFEM1999,
+  title = {Stabilized {{Finite Element Methods}} for {{Fluid Dynamics}} Using a {{Hierarchical Basis}}},
+  author = {Whiting, Christian H},
+  year = {1999},
+  address = {{Troy, NY}},
+  langid = {english},
+  school = {Rennselear Polytechnic Institute},
+}
diff --git a/examples/fluids/README.md b/examples/fluids/README.md
index 2ef4ca990a..1b068836aa 100644
--- a/examples/fluids/README.md
+++ b/examples/fluids/README.md
@@ -123,18 +123,59 @@ The following options are common among all problem types:
 
 For the case of a square/cubic mesh, the list of face indices to be used with `-bc_wall`, `bc_inflow`, `bc_outflow` and/or `-bc_slip_x`, `-bc_slip_y`, and `-bc_slip_z` are:
 
-* 2D:
-  - faceMarkerBottom = 1
-  - faceMarkerRight  = 2
-  - faceMarkerTop    = 3
-  - faceMarkerLeft   = 4
-* 3D:
-  - faceMarkerBottom = 1
-  - faceMarkerTop    = 2
-  - faceMarkerFront  = 3
-  - faceMarkerBack   = 4
-  - faceMarkerRight  = 5
-  - faceMarkerLeft   = 6
+:::{list-table} 2D Face ID Labels
+:header-rows: 1
+* - PETSc Face Name
+  - Cartesian direction
+  - Face ID
+
+* - faceMarkerBottom
+  - -z
+  - 1
+
+* - faceMarkerRight
+  - +x
+  - 2
+
+* - faceMarkerTop
+  - +z
+  - 3
+
+* - faceMarkerLeft
+  - -x
+  - 4
+:::
+
+:::{list-table} 2D Face ID Labels
+:header-rows: 1
+* - PETSc Face Name
+  - Cartesian direction
+  - Face ID
+
+* - faceMarkerBottom
+  - -z
+  - 1
+
+* - faceMarkerTop
+  - +z
+  - 2
+
+* - faceMarkerFront
+  - -y
+  - 3
+
+* - faceMarkerBack
+  - +y
+  - 4
+
+* - faceMarkerRight
+  - +x
+  - 5
+
+* - faceMarkerLeft
+  - -x
+  - 6
+:::
 
 For the 2D advection problem, the following additional command-line options are available:
 
@@ -340,9 +381,9 @@ This problem can be run with:
 ./navierstokes -problem euler_vortex -dm_plex_box_faces 20,20,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,1000,50 -dm_plex_dim 3 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -mean_velocity .5,-.8,0.
 ```
 
-For the Density Current problem, the following additional command-line options are available:
+For the Density Current, Channel, and Blasius problems, the following common command-line options are available:
 
-:::{list-table} Euler Vortex Runtime Options
+:::{list-table} Newtonian Ideal Gas problems Runtime Options
 :header-rows: 1
 
 * - Option
@@ -350,34 +391,19 @@ For the Density Current problem, the following additional command-line options a
   - Default value
   - Unit
 
-* - `-center`
-  - Location of bubble center
-  - `(lx,ly,lz)/2`
-  - `(m,m,m)`
-
-* - `-dc_axis`
-  - Axis of density current cylindrical anomaly, or `(0,0,0)` for spherically symmetric
-  - `(0,0,0)`
-  -
-
-* - `-rc`
-  - Characteristic radius of thermal bubble
-  - `1000`
-  - `m`
-
 * - `-units_meter`
   - 1 meter in scaled length units
-  - `1E-2`
+  - `1`
   -
 
 * - `-units_second`
   - 1 second in scaled time units
-  - `1E-2`
+  - `1`
   -
 
 * - `-units_kilogram`
   - 1 kilogram in scaled mass units
-  - `1E-6`
+  - `1`
   -
 
 * - `-units_Kelvin`
@@ -391,29 +417,34 @@ For the Density Current problem, the following additional command-line options a
   -
 
 * - `-c_tau`
-  - Stabilization constant
+  - Stabilization constant, $c_\tau$
   - `0.5`
   -
 
-* - `-theta0`
-  - Reference potential temperature
-  - `300`
-  - `K`
+* - `-Ctau_t`
+  - Stabilization time constant, $C_t$
+  - `1.0`
+  -
 
-* - `-thetaC`
-  - Perturbation of potential temperature
-  - `-15`
-  - `K`
+* - `-Ctau_v`
+  - Stabilization viscous constant, $C_v$
+  - `36.0`
+  -
 
-* - `-P0`
-  - Atmospheric pressure
-  - `1E5`
-  - `Pa`
+* - `-Ctau_C`
+  - Stabilization continuity constant, $C_c$
+  - `1.0`
+  -
 
-* - `-N`
-  - Brunt-Vaisala frequency
-  - `0.01`
-  - `1/s`
+* - `-Ctau_M`
+  - Stabilization momentum constant, $C_m$
+  - `1.0`
+  -
+
+* - `-Ctau_E`
+  - Stabilization energy constant, $C_E$
+  - `1.0`
+  -
 
 * - `-cv`
   - Heat capacity at constant volume
@@ -446,8 +477,153 @@ For the Density Current problem, the following additional command-line options a
   - `W/(m K)`
 :::
 
+The Density Current problem the following command-line options are available in
+addition to the Newtonian Ideal Gas options:
+
+:::{list-table} Density Current Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-center`
+  - Location of bubble center
+  - `(lx,ly,lz)/2`
+  - `(m,m,m)`
+
+* - `-dc_axis`
+  - Axis of density current cylindrical anomaly, or `(0,0,0)` for spherically symmetric
+  - `(0,0,0)`
+  -
+
+* - `-rc`
+  - Characteristic radius of thermal bubble
+  - `1000`
+  - `m`
+
+* - `-theta0`
+  - Reference potential temperature
+  - `300`
+  - `K`
+
+* - `-thetaC`
+  - Perturbation of potential temperature
+  - `-15`
+  - `K`
+
+* - `-P0`
+  - Atmospheric pressure
+  - `1E5`
+  - `Pa`
+
+* - `-N`
+  - Brunt-Vaisala frequency
+  - `0.01`
+  - `1/s`
+:::
+
 This problem can be run with:
 
 ```
-./navierstokes -problem density_current -dm_plex_box_faces 16,1,8 -degree 1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 2000,125,1000 -dm_plex_dim 3 -rc 400. -bc_wall 1,2,5,6 -wall_comps 1,2,3 -bc_slip_y 3,4 -viz_refine 2
+./navierstokes -problem density_current -dm_plex_box_faces 16,1,8 -degree 1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 2000,125,1000 -dm_plex_dim 3 -rc 400. -bc_wall 1,2,5,6 -wall_comps 1,2,3 -bc_slip_y 3,4 -mu 75
+```
+
+The Channel problem the following command-line options are available in
+addition to the Newtonian Ideal Gas options:
+
+:::{list-table} Channel Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-umax`
+  - Maximum/centerline velocity of the flow
+  - `10`
+  - `m/s`
+
+* - `-theta0`
+  - Reference potential temperature
+  - `300`
+  - `K`
+
+* - `-P0`
+  - Atmospheric pressure
+  - `1E5`
+  - `Pa`
+:::
+
+This problem can be run with the `channel.yaml` file via:
+
+```
+./navierstokes -options_file channel.yaml
+```
+```{literalinclude} ../../../../../examples/fluids/channel.yaml
+:language: yaml
+```
+
+The Blasius problem the following command-line options are available in
+addition to the Newtonian Ideal Gas options:
+
+:::{list-table} Blasius Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-Uinf`
+  - Freestream velocity
+  - `40`
+  - `m/s`
+
+* - `-delta0`
+  - Boundary layer height at the inflow
+  - `4.2e-4`
+  - `m`
+
+* - `-theta0`
+  - Reference potential temperature
+  - `288`
+  - `K`
+
+* - `-P0`
+  - Atmospheric pressure
+  - `1.01E5`
+  - `Pa`
+
+* - `-refine_height`
+  - Height at which `-Ndelta` number of elements should refined into
+  - `5.9E-4`
+  - `m`
+
+* - `-Ndelta`
+  - Number of elements to keep below `-refine_height`
+  - `45`
+  -
+
+* - `-growth`
+  - Growth rate of the elements in the refinement region
+  - `1.08`
+  -
+
+* - `-top_angle`
+  - Downward angle of the top face of the domain. This face serves as an outlet.
+  - `5`
+  - `degrees`
+:::
+
+This problem can be run with the `blasius.yaml` file via:
+
+```
+./navierstokes -options_file blasius.yaml
+```
+
+```{literalinclude} ../../../../../examples/fluids/blasius.yaml
+:language: yaml
 ```
diff --git a/examples/fluids/blasius.yaml b/examples/fluids/blasius.yaml
new file mode 100644
index 0000000000..85be3b40de
--- /dev/null
+++ b/examples/fluids/blasius.yaml
@@ -0,0 +1,37 @@
+problem: 'blasius'
+
+implicit: true
+ts:
+  adapt_type: 'none'
+  type: 'beuler'
+  dt: 5.e-8
+
+## Linear Settings:
+degree: 1
+dm_plex_box_faces: 40,60,1
+nDelta: 45
+
+## Quadratic Settings:
+#degree: 2
+#dm_plex_box_faces: 20,30,1
+#nDelta: 22
+#growth: 1.1664 # 1.08^2
+
+stab: 'supg'
+Ctau_t: 1
+#Ctau_v: 36,60,128 is what PHASTA has for p=1,2, 3
+Ctau_v: 36
+Ctau_C: 0.125
+Ctau_M: 1.0
+Ctau_E: 0.125
+q_extra: 0
+
+dm_plex_box_lower: 0,0,0
+dm_plex_box_upper: 4.2e-3,4.2e-3,5.e-5
+dm_plex_dim: 3
+bc_slip_z: 1,2
+bc_wall: 3
+wall_comps: 1,2,3
+bc_inflow: 6
+bc_outflow: 5,4
+g: 0,0,0
diff --git a/examples/fluids/channel.yaml b/examples/fluids/channel.yaml
new file mode 100644
index 0000000000..8e2cf6d6bb
--- /dev/null
+++ b/examples/fluids/channel.yaml
@@ -0,0 +1,18 @@
+problem: 'channel'
+
+umax: 40
+implicit: true
+ts:
+  type: 'beuler'
+  adapt_type: 'none'
+  dt: 5e-8
+
+dm_plex_box_lower: 0,0,0
+dm_plex_box_upper: 1,1,.1
+dm_plex_dim: 3
+degree: 1
+dm_plex_box_faces: 10,10,1
+bc_slip_z: 1,2
+bc_wall: 3,4
+wall_comps: 1,2,3
+dm_plex_box_bd: 'periodic,none,none'
diff --git a/examples/fluids/index.md b/examples/fluids/index.md
index 5f355cb17d..45d2364975 100644
--- a/examples/fluids/index.md
+++ b/examples/fluids/index.md
@@ -173,7 +173,7 @@ Our formulation follows {cite}`hughesetal2010`, which offers a comprehensive rev
   \int_{\Omega} \bm v \cdot \left( \frac{\partial \bm{q}_N}{\partial t} - \bm{S}(\bm{q}_N) \right)  \,dV
   - \int_{\Omega} \nabla \bm v \!:\! \bm{F}(\bm{q}_N)\,dV & \\
   + \int_{\partial \Omega} \bm v \cdot \bm{F}(\bm{q}_N) \cdot \widehat{\bm{n}} \,dS & \\
-  + \int_{\Omega} \bm{P}(\bm v)^T \, \left( \frac{\partial \bm{q}_N}{\partial t} \, + \,
+  + \int_{\Omega} \mathcal{P}(\bm v)^T \, \left( \frac{\partial \bm{q}_N}{\partial t} \, + \,
   \nabla \cdot \bm{F} \, (\bm{q}_N) - \bm{S}(\bm{q}_N) \right) \,dV &= 0
   \, , \; \forall \bm v \in \mathcal{V}_p
   \end{aligned}
@@ -201,11 +201,13 @@ In both {eq}`eq-weak-vector-ns-su` and {eq}`eq-weak-vector-ns-supg`, $\mathcal P
 It is defined as
 
 $$
-\mathcal P(\bm v) \equiv \left(\bm{\tau} \cdot \frac{\partial \bm{F}_{\text{adv}} (\bm{q}_N)}{\partial \bm{q}_N} \right)^T \, \nabla \bm v\,,
-$$
+\mathcal P(\bm v) \equiv \bm{\tau} \left(\frac{\partial \bm{F}_{\text{adv}} (\bm{q}_N)}{\partial \bm{q}_N} \right) \, \nabla \bm v\,,
+$$ (eq-streamline-P)
 
-where parameter $\bm{\tau} \in \mathbb R^{3\times 3}$ (spatial indices) or $\bm \tau \in \mathbb R^{5\times 5}$ (field indices) is an intrinsic time scale matrix.
-This expression contains the flux Jacobian, which we express in variational notation by differentiating the advective flux $\bm F_{\text{adv}}$ of {eq}`eq-ns-flux`
+where parameter $\bm{\tau} \in \mathbb R^{3}$ (spatial index) or $\bm \tau \in \mathbb R^{5\times 5}$ (field indices) is an intrinsic time scale matrix.
+Most generally, we consider $\bm\tau \in \mathbb R^{3,5,5}$.
+This expression contains the advective flux Jacobian, which may be thought of as mapping from a 5-vector (state) to a $(5,3)$ tensor (flux) or from a $(5,3)$ tensor (gradient of state) to a 5-vector (time derivative of state); the latter is used in {eq}`eq-streamline-P` because it's applied to $\nabla\bm v$.
+The forward variational form can be readily expressed by differentiating $\bm F_{\text{adv}}$ of {eq}`eq-ns-flux`
 
 $$
 \begin{aligned}
@@ -219,14 +221,14 @@ $$
 $$
 
 where $\diff P$ is defined by differentiating {eq}`eq-state`.
-In this notation, we may equivalently write the stabilization term as
+This action is also readily computed by forward-mode AD, but since $\bm v$ is a test function, we actually need the action of the adjoint to use {eq}`eq-streamline-P` in finite element computation; that can be computed by reverse-mode AD.
+We may equivalently write the stabilization term as
 
 $$
-\mathcal P(\bm v)^T \bm r = \nabla \bm v \bm\tau \diff\bm F_{\text{adv}}(\bm r),
+\mathcal P(\bm v)^T \bm r = \nabla \bm v \tcolon \left(\frac{\partial \bm F_{\text{adv}}}{\partial \bm q}\right)^T \, \bm\tau \bm r,
 $$
 
-where $\bm r$ is the strong form residual.
-Note that both $\nabla \bm v$ and $\diff \bm F$ are $5\times 3$ matrices and that $\bm\tau$ can be defined with spatial indices, or field indices, leading to a stabilization term of $(\nabla \bm v)_{\alpha i} \tau_{ij} \diff \bm F_{\alpha j}$ for spatial or $(\nabla \bm v)_{\alpha i} \tau_{\alpha \beta} \diff \bm F_{\beta i}$ for field, where $\alpha,\beta$ are field indices and $i,j$ are spatial indices.
+where $\bm r$ is the strong form residual and $\bm\tau$ is a $5\times 5$ matrix.
 
 :::{dropdown} Stabilization scale $\bm\tau$
 A velocity vector $\bm u$ can be pulled back to the reference element as $\bm u_{\bm X} = \nabla_{\bm x}\bm X \cdot \bm u$, with units of reference length (non-dimensional) per second.
@@ -260,12 +262,32 @@ $$ (eq-test-perturbation-advdiff)
 
 See {cite}`hughesetal2010` equations 15-17 and 34-36 for further discussion of this formulation.
 
-For the Navier-Stokes and Euler equations in primitive variables, {cite}`whiting2003hierarchical` defines a $5\times 5$ diagonal stabilization consisting of
+For the Navier-Stokes and Euler equations, {cite}`whiting2003hierarchical` defines a $5\times 5$ diagonal stabilization $\mathrm{diag}(\tau_c, \tau_m, \tau_m, \tau_m, \tau_E)$ consisting of
 1. continuity stabilization $\tau_c$
 2. momentum stabilization $\tau_m$
 3. energy stabilization $\tau_E$
 
-However, since our equations are in conservative form, we follow {cite}`hughesetal2010` in defining a $3\times 3$ diagonal stabilization according to spatial criterion 2 (equation 27) as follows.
+The Navier-Stokes code in this example uses the following formulation for $\tau_c$, $\tau_m$, $\tau_E$:
+
+$$ 
+\begin{aligned}
+
+\tau_c &= \frac{C_c \mathcal{F}}{8\rho \trace(\bm g)} \\
+\tau_m &= \frac{C_m}{\mathcal{F}} \\
+\tau_E &= \frac{C_E}{\mathcal{F} c_v} \\
+\end{aligned}
+$$
+
+$$
+\mathcal{F} = \sqrt{ \rho^2 \left [ \left(\frac{2C_t}{\Delta t}\right)^2
++ \bm u \cdot (\bm u \cdot  \bm g)
++ C_v \mu^2 \Vert \bm g \Vert_F ^2\right]}
+$$
+
+where $\bm g = \nabla_{\bm x} \bm{X} \cdot \nabla_{\bm x} \bm{X}$ is the metric tensor and $\Vert \cdot \Vert_F$ is the Frobenius norm.
+This formulation is currently not available in the Euler code.
+
+In the Euler code, we follow {cite}`hughesetal2010` in defining a $3\times 3$ diagonal stabilization according to spatial criterion 2 (equation 27) as follows.
 
 $$
 \tau_{ii} = c_{\tau} \frac{2 \xi(\mathrm{Pe})}{(\lambda_{\max \text{abs}})_i \lVert \nabla_{x_i} \bm X \rVert}
@@ -364,3 +386,30 @@ $$
 
 where $P_0$ is the atmospheric pressure.
 For this problem, we have used no-slip and non-penetration boundary conditions for $\bm{u}$, and no-flux for mass and energy densities.
+
+## Channel
+
+A compressible channel flow. Analytical solution given in
+{cite}`whitingStabilizedFEM1999`:
+
+$$ u_1 = u_{\max} \left [ 1 - \left ( \frac{x_2}{H}\right)^2 \right] \quad \quad u_2 = u_3 = 0$$
+$$T = T_w \left [ 1 + \frac{Pr \hat{E}c}{3} \left \{1 - \left(\frac{x_2}{H}\right)^4  \right \} \right]$$
+$$p = p_0 - \frac{2\rho_0 u_{\max}^2 x_1}{Re_H H}$$
+
+where $H$ is the channel half-height, $u_{\max}$ is the center velocity, $T_w$ is the temperature at the wall, $Pr=\frac{\mu}{c_p \kappa}$ is the Prandlt number, $\hat E_c = \frac{u_{\max}^2}{c_p T_w}$ is the modified Eckert number, and $Re_h = \frac{u_{\max}H}{\nu}$ is the Reynolds number.
+
+Boundary conditions are periodic in the streamwise direction, and no-slip and non-penetration boundary conditions at the walls.
+The flow is driven by a body force.
+
+## Blasius
+
+Simulation of a laminar boundary layer flow, with the inflow being prescribed
+by a [Blasius similarity
+solution](https://en.wikipedia.org/wiki/Blasius_boundary_layer). At the inflow,
+the velocity is prescribed by the Blasius soution profile, temperature is set
+constant, and density is allowed to float. At the outlet, only the density is
+prescribed based on the user-set pressure. The wall is a no-slip,
+no-penetration, no-heat flux condition. The top of the domain is treated as an
+outflow and is tilted at a downward angle to ensure that flow is always exiting
+it.
+
diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index f7021ab29b..466c8e5749 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -22,8 +22,8 @@
 //     ./navierstokes -ceed /cpu/self -problem density_current -degree 1
 //     ./navierstokes -ceed /gpu/cuda -problem advection -degree 1
 //
-//TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin
-//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin
+//TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin
+//TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin
 //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin
 //TESTARGS(name="adv_rotation_implicit_sharp_cylinder") -ceed {ceed_resource} -test -problem advection -bubble_type cylinder -bubble_continuity back_sharp -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_Slip_z 1,2 -bc_wall 3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-sharp-cylinder.bin
 //TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin
@@ -357,6 +357,8 @@ int main(int argc, char **argv) {
   ierr = PetscFree(phys_ctx->newtonian_ig_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->euler_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->advection_ctx); CHKERRQ(ierr);
+  ierr = PetscFree(phys_ctx->channel_ctx); CHKERRQ(ierr);
+  ierr = PetscFree(phys_ctx->blasius_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx); CHKERRQ(ierr);
   ierr = PetscFree(app_ctx); CHKERRQ(ierr);
   ierr = PetscFree(ceed_data); CHKERRQ(ierr);
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 157222b5fd..2c840be401 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -132,7 +132,7 @@ struct AppCtx_private {
 struct CeedData_private {
   CeedVector           x_coord, q_data;
   CeedQFunctionContext setup_context, newt_ig_context, advection_context,
-                       euler_context;
+                       euler_context, channel_context, blasius_context;
   CeedQFunction        qf_setup_vol, qf_ics, qf_rhs_vol, qf_ifunction_vol,
                        qf_setup_sur, qf_apply_inflow, qf_apply_outflow;
   CeedBasis            basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur;
@@ -193,7 +193,7 @@ struct SetupContext_ {
   CeedScalar N;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
@@ -263,14 +263,50 @@ struct NewtonianIdealGasContext_ {
   CeedScalar k;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar c_tau;
+  CeedScalar Ctau_t;
+  CeedScalar Ctau_v;
+  CeedScalar Ctau_C;
+  CeedScalar Ctau_M;
+  CeedScalar Ctau_E;
+  CeedScalar dt;
   StabilizationType stabilization;
 };
 #endif
 
+#ifndef channel_context_struct
+#define channel_context_struct
+typedef struct ChannelContext_ *ChannelContext;
+struct ChannelContext_ {
+  bool       implicit; // !< Using implicit timesteping or not
+  CeedScalar theta0;   // !< Reference temperature
+  CeedScalar P0;       // !< Reference Pressure
+  CeedScalar umax;     // !< Centerline velocity
+  CeedScalar center;   // !< Y Coordinate for center of channel
+  CeedScalar H;        // !< Channel half-height
+  CeedScalar B;        // !< Body-force driving the flow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+#endif
+
+#ifndef blasius_context_struct
+#define blasius_context_struct
+typedef struct BlasiusContext_ *BlasiusContext;
+struct BlasiusContext_ {
+  bool       implicit;  // !< Using implicit timesteping or not
+  CeedScalar delta0;    // !< Boundary layer height at inflow
+  CeedScalar Uinf;      // !< Velocity at boundary layer edge
+  CeedScalar P0;        // !< Pressure at outflow
+  CeedScalar theta0;    // !< Temperature at inflow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+#endif
+
 // Struct that contains all enums and structs used for the physics of all problems
 struct Physics_private {
+  BlasiusContext           blasius_ctx;
+  ChannelContext           channel_ctx;
   NewtonianIdealGasContext newtonian_ig_ctx;
   EulerContext             euler_ctx;
   AdvectionContext         advection_ctx;
@@ -283,6 +319,7 @@ struct Physics_private {
   PetscBool                has_curr_time;
   PetscBool                has_neumann;
   CeedContextFieldLabel    solution_time_label;
+  CeedContextFieldLabel    timestep_size_label;
 };
 
 // Problem specific data
@@ -306,6 +343,10 @@ typedef struct {
 // Set up problems
 // -----------------------------------------------------------------------------
 // Set up function for each problem
+extern PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm,
+                                 void *setup_ctx, void *ctx);
+extern PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm,
+                                 void *setup_ctx, void *ctx);
 extern PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm,
                                       void *setup_ctx, void *ctx);
 extern PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm,
@@ -318,6 +359,12 @@ extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm,
                                      void *setup_ctx, void *ctx);
 
 // Set up context for each problem
+extern PetscErrorCode SetupContext_CHANNEL(Ceed ceed, CeedData ceed_data,
+    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
+
+extern PetscErrorCode SetupContext_BLASIUS(Ceed ceed, CeedData ceed_data,
+    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
+
 extern PetscErrorCode SetupContext_NEWTONIAN_IG(Ceed ceed, CeedData ceed_data,
     AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
 
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
new file mode 100644
index 0000000000..c6b4b7d9d3
--- /dev/null
+++ b/examples/fluids/problems/blasius.c
@@ -0,0 +1,196 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Utility functions for setting up Blasius Boundary Layer
+
+#include "../navierstokes.h"
+#include "../qfunctions/blasius.h"
+
+#ifndef blasius_context_struct
+#define blasius_context_struct
+typedef struct BlasiusContext_ *BlasiusContext;
+struct BlasiusContext_ {
+  bool       implicit;  // !< Using implicit timesteping or not
+  CeedScalar delta0;    // !< Boundary layer height at inflow
+  CeedScalar Uinf;      // !< Velocity at boundary layer edge
+  CeedScalar P0;        // !< Pressure at outflow
+  CeedScalar theta0;    // !< Temperature at inflow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+#endif
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+/* \brief Modify the domain and mesh for blasius
+ *
+ * Modifies mesh such that `N` elements are within 1.2*`delta0` with a geometric
+ * growth ratio of `growth`. Excess elements are then geometrically distributed
+ * to the top surface.
+ *
+ * The top surface is also angled downwards, so that it may be used as an
+ * outflow. It's angle is controlled by top_angle (in units of degrees).
+ */
+PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
+                          PetscReal refine_height, PetscReal top_angle) {
+
+  PetscInt ierr, narr, ncoords;
+  PetscReal domain_min[3], domain_max[3], domain_size[3];
+  PetscScalar *arr_coords;
+  Vec vec_coords;
+  PetscFunctionBeginUser;
+
+  PetscReal angle_coeff = tan(top_angle*(M_PI/180));
+
+  // Get domain boundary information
+  ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  // Get coords array from DM
+  ierr = DMGetCoordinatesLocal(dm, &vec_coords); CHKERRQ(ierr);
+  ierr = VecGetLocalSize(vec_coords, &narr); CHKERRQ(ierr);
+  ierr = VecGetArray(vec_coords, &arr_coords); CHKERRQ(ierr);
+
+  PetscScalar (*coords)[dim] = (PetscScalar(*)[dim]) arr_coords;
+  ncoords = narr/dim;
+
+  // Get mesh information
+  PetscInt nmax = 3, faces[3];
+  ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax,
+                                 NULL); CHKERRQ(ierr);
+
+  // Calculate the first element height
+  PetscReal dybox = domain_size[1]/faces[1];
+  PetscReal dy1   = refine_height*(growth-1)/(pow(growth, N)-1);
+
+  // Calculate log of sizing outside BL
+  PetscReal logdy = (log(domain_max[1]) - log(refine_height)) / (faces[1] - N);
+
+  for(int i=0; i<ncoords; i++) {
+    PetscInt y_box_index = round(coords[i][1]/dybox);
+    if(y_box_index <= N) {
+      coords[i][1] = (1 - (coords[i][0]/domain_max[0])*angle_coeff) *
+                     dy1*(pow(growth, coords[i][1]/dybox)-1)/(growth-1);
+    } else {
+      PetscInt j = y_box_index - N;
+      coords[i][1] = (1 - (coords[i][0]/domain_max[0])*angle_coeff) *
+                     exp(log(refine_height) + logdy*j);
+    }
+  }
+
+  ierr = VecRestoreArray(vec_coords, &arr_coords); CHKERRQ(ierr);
+  ierr = DMSetCoordinatesLocal(dm, vec_coords); CHKERRQ(ierr);
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
+                          void *ctx) {
+
+  PetscInt ierr;
+  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  User              user = *(User *)ctx;
+  MPI_Comm          comm = PETSC_COMM_WORLD;
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &user->phys->blasius_ctx); CHKERRQ(ierr);
+
+  // ------------------------------------------------------
+  //               SET UP Blasius
+  // ------------------------------------------------------
+  problem->ics                     = ICsBlasius;
+  problem->ics_loc                 = ICsBlasius_loc;
+  problem->apply_inflow            = Blasius_Inflow;
+  problem->apply_inflow_loc        = Blasius_Inflow_loc;
+  problem->apply_outflow           = Blasius_Outflow;
+  problem->apply_outflow_loc       = Blasius_Outflow_loc;
+  problem->setup_ctx               = SetupContext_BLASIUS;
+
+  // CeedScalar mu = .04; // Pa s, dynamic viscosity
+  CeedScalar mu            = 1.8e-5;   // Pa s, dynamic viscosity
+  CeedScalar Uinf          = 40;   // m/s
+  CeedScalar delta0        = 4.2e-4;    // m
+  PetscReal  refine_height = 5.9e-4;    // m
+  PetscReal  growth        = 1.08; // [-]
+  PetscInt   Ndelta        = 45;   // [-]
+  PetscReal  top_angle     = 5;    // degrees
+  CeedScalar theta0        = 288.; // K
+  CeedScalar P0            = 1.01e5; // Pa
+
+  PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
+  ierr = PetscOptionsScalar("-Uinf", "Velocity at boundary layer edge",
+                            NULL, Uinf, &Uinf, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-delta0", "Boundary layer height at inflow",
+                            NULL, delta0, &delta0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-theta0", "Wall temperature",
+                            NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-P0", "Pressure at outflow",
+                            NULL, P0, &P0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBoundedInt("-Ndelta", "Velocity at boundary layer edge",
+                                NULL, Ndelta, &Ndelta, NULL, 1); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-refine_height",
+                            "Height of boundary layer mesh refinement",
+                            NULL, refine_height, &refine_height, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-growth",
+                            "Geometric growth rate of boundary layer mesh",
+                            NULL, growth, &growth, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-top_angle",
+                            "Geometric top_angle rate of boundary layer mesh",
+                            NULL, top_angle, &top_angle, NULL); CHKERRQ(ierr);
+  PetscOptionsEnd();
+
+  PetscScalar meter           = user->units->meter;
+  PetscScalar second          = user->units->second;
+  PetscScalar Kelvin          = user->units->Kelvin;
+  PetscScalar Pascal          = user->units->Pascal;
+
+  mu     *= Pascal * second;
+  theta0 *= Kelvin;
+  P0     *= Pascal;
+  Uinf   *= meter / second;
+  delta0 *= meter;
+
+  ierr = modifyMesh(dm, problem->dim, growth, Ndelta, refine_height, top_angle);
+  CHKERRQ(ierr);
+
+  user->phys->blasius_ctx->Uinf      = Uinf;
+  user->phys->blasius_ctx->delta0    = delta0;
+  user->phys->blasius_ctx->theta0    = theta0;
+  user->phys->blasius_ctx->P0        = P0;
+  user->phys->blasius_ctx->implicit  = user->phys->implicit;
+
+  user->phys->newtonian_ig_ctx->mu = mu;
+  user->phys->blasius_ctx->newtonian_ctx = *user->phys->newtonian_ig_ctx;
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupContext_BLASIUS(Ceed ceed, CeedData ceed_data,
+                                    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
+  PetscFunctionBeginUser;
+  PetscInt ierr;
+  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
+  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*setup_ctx), setup_ctx);
+  ierr = SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx, phys);
+  CHKERRQ(ierr);
+
+  CeedQFunctionContextCreate(ceed, &ceed_data->blasius_context);
+  CeedQFunctionContextSetData(ceed_data->blasius_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*phys->blasius_ctx), phys->blasius_ctx);
+  phys->has_neumann = PETSC_TRUE;
+  if (ceed_data->qf_ics)
+    CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->blasius_context);
+  if (ceed_data->qf_apply_inflow)
+    CeedQFunctionSetContext(ceed_data->qf_apply_inflow, ceed_data->blasius_context);
+  if (ceed_data->qf_apply_outflow)
+    CeedQFunctionSetContext(ceed_data->qf_apply_outflow,
+                            ceed_data->blasius_context);
+  PetscFunctionReturn(0);
+}
+
diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c
new file mode 100644
index 0000000000..3c23bd9047
--- /dev/null
+++ b/examples/fluids/problems/channel.c
@@ -0,0 +1,128 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Utility functions for setting up Channel flow
+
+#include "../navierstokes.h"
+#include "../qfunctions/channel.h"
+
+#ifndef channel_context_struct
+#define channel_context_struct
+typedef struct ChannelContext_ *ChannelContext;
+struct ChannelContext_ {
+  bool       implicit; // !< Using implicit timesteping or not
+  CeedScalar theta0;   // !< Reference temperature
+  CeedScalar P0;       // !< Reference Pressure
+  CeedScalar umax;     // !< Centerline velocity
+  CeedScalar center;   // !< Y Coordinate for center of channel
+  CeedScalar H;        // !< Channel half-height
+  CeedScalar B;        // !< Body-force driving the flow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+#endif
+
+PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
+                          void *ctx) {
+
+  PetscInt ierr;
+  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  User              user = *(User *)ctx;
+  MPI_Comm          comm = PETSC_COMM_WORLD;
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &user->phys->channel_ctx); CHKERRQ(ierr);
+
+  // ------------------------------------------------------
+  //               SET UP Channel
+  // ------------------------------------------------------
+  problem->ics               = ICsChannel;
+  problem->ics_loc           = ICsChannel_loc;
+  problem->apply_inflow      = Channel_Inflow;
+  problem->apply_inflow_loc  = Channel_Inflow_loc;
+  problem->apply_outflow     = Channel_Outflow;
+  problem->apply_outflow_loc = Channel_Outflow_loc;
+  problem->setup_ctx         = SetupContext_CHANNEL;
+
+  // -- Command Line Options
+  CeedScalar umax   = 10.;  // m/s
+  CeedScalar mu     = .01;  // Pa s, dynamic viscosity
+  //TODO ^^ make optional/respect explicit user set
+  CeedScalar theta0 = 300.; // K
+  CeedScalar P0     = 1.e5; // Pa
+  PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
+  ierr = PetscOptionsScalar("-umax", "Centerline velocity of the Channel",
+                            NULL, umax, &umax, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-theta0", "Wall temperature",
+                            NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-P0", "Pressure at outflow",
+                            NULL, P0, &P0, NULL); CHKERRQ(ierr);
+  PetscOptionsEnd();
+
+  PetscScalar meter  = user->units->meter;
+  PetscScalar second = user->units->second;
+  PetscScalar Kelvin = user->units->Kelvin;
+  PetscScalar Pascal = user->units->Pascal;
+
+  mu     *= Pascal * second;
+  theta0 *= Kelvin;
+  P0     *= Pascal;
+  umax   *= meter / second;
+
+  //-- Setup Problem information
+  CeedScalar H, center;
+  {
+    PetscReal domain_min[3], domain_max[3], domain_size[3];
+    ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+    for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+    H      = 0.5*domain_size[1]*meter;
+    center = H + domain_min[1]*meter;
+  }
+
+  user->phys->channel_ctx->center   = center;
+  user->phys->channel_ctx->H        = H;
+  user->phys->channel_ctx->theta0   = theta0;
+  user->phys->channel_ctx->P0       = P0;
+  user->phys->channel_ctx->umax     = umax;
+  user->phys->channel_ctx->implicit = user->phys->implicit;
+  user->phys->channel_ctx->B = -2*umax*mu/H;
+
+  {
+    // Calculate Body force
+    CeedScalar cv  = user->phys->newtonian_ig_ctx->cv,
+               cp  = user->phys->newtonian_ig_ctx->cp;
+    CeedScalar Rd  = cp - cv;
+    CeedScalar rho = P0 / (Rd*theta0);
+    CeedScalar g[] = {user->phys->channel_ctx->B / rho, 0., 0.};
+    ierr = PetscArraycpy(user->phys->newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
+  }
+  user->phys->newtonian_ig_ctx->mu = mu;
+  user->phys->channel_ctx->newtonian_ctx = *user->phys->newtonian_ig_ctx;
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupContext_CHANNEL(Ceed ceed, CeedData ceed_data,
+                                    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
+  PetscFunctionBeginUser;
+  PetscInt ierr;
+  ierr = SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx, phys);
+  CHKERRQ(ierr);
+  CeedQFunctionContextCreate(ceed, &ceed_data->channel_context);
+  CeedQFunctionContextSetData(ceed_data->channel_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*phys->channel_ctx), phys->channel_ctx);
+  phys->has_neumann = PETSC_TRUE;
+  if (ceed_data->qf_ics)
+    CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->channel_context);
+  if (ceed_data->qf_apply_inflow)
+    CeedQFunctionSetContext(ceed_data->qf_apply_inflow, ceed_data->channel_context);
+  if (ceed_data->qf_apply_outflow)
+    CeedQFunctionSetContext(ceed_data->qf_apply_outflow,
+                            ceed_data->channel_context);
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 62dc438429..57fe457e6d 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -1,5 +1,6 @@
-// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
-// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other
+// CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
+// files for details.
 //
 // SPDX-License-Identifier: BSD-2-Clause
 //
@@ -8,67 +9,102 @@
 /// @file
 /// Utility functions for setting up DENSITY_CURRENT
 
-#include "../navierstokes.h"
 #include "../qfunctions/densitycurrent.h"
+#include "../navierstokes.h"
 
 PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
                                   void *ctx) {
 
   PetscInt ierr;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx);
+  CHKERRQ(ierr);
   SetupContext setup_context = *(SetupContext *)setup_ctx;
-  User         user          = *(User *)ctx;
-  MPI_Comm     comm          = PETSC_COMM_WORLD;
+  User user = *(User *)ctx;
+  MPI_Comm comm = PETSC_COMM_WORLD;
   PetscFunctionBeginUser;
 
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
   // ------------------------------------------------------
-  problem->ics     = ICsDC;
+  problem->ics = ICsDC;
   problem->ics_loc = ICsDC_loc;
-  problem->bc      = Exact_DC;
+  problem->bc = Exact_DC;
 
   // ------------------------------------------------------
   //             Create the libCEED context
   // ------------------------------------------------------
-  CeedScalar rc     = 1000.;   // m (Radius of bubble)
+  CeedScalar theta0 = 300.; // K
+  CeedScalar thetaC = -15.; // K
+  CeedScalar P0 = 1.e5;     // Pa
+  CeedScalar N = 0.01;      // 1/s
+  CeedScalar rc = 1000.;    // m (Radius of bubble)
   PetscReal center[3], dc_axis[3] = {0, 0, 0};
   PetscReal domain_min[3], domain_max[3], domain_size[3];
-  ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  ierr = DMGetBoundingBox(dm, domain_min, domain_max);
+  CHKERRQ(ierr);
+  for (int i = 0; i < 3; i++)
+    domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //              Command line Options
   // ------------------------------------------------------
   PetscOptionsBegin(comm, NULL, "Options for DENSITY_CURRENT problem", NULL);
+  ierr = PetscOptionsScalar("-theta0", "Reference potential temperature", NULL,
+                            theta0, &theta0, NULL);
+  CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-thetaC", "Perturbation of potential temperature",
+                            NULL, thetaC, &thetaC, NULL);
+  CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-P0", "Atmospheric pressure", NULL, P0, &P0, NULL);
+  CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-N", "Brunt-Vaisala frequency", NULL, N, &N, NULL);
+  CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
-                            NULL, rc, &rc, NULL); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) center[i] = .5*domain_size[i];
+                            NULL, rc, &rc, NULL);
+  CHKERRQ(ierr);
+  for (int i = 0; i < 3; i++)
+    center[i] = .5 * domain_size[i];
   PetscInt n = problem->dim;
-  ierr = PetscOptionsRealArray("-center", "Location of bubble center",
-                               NULL, center, &n, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsRealArray("-center", "Location of bubble center", NULL,
+                               center, &n, NULL);
+  CHKERRQ(ierr);
   n = problem->dim;
   ierr = PetscOptionsRealArray("-dc_axis",
-                               "Axis of density current cylindrical anomaly, or {0,0,0} for spherically symmetric",
-                               NULL, dc_axis, &n, NULL); CHKERRQ(ierr);
+                               "Axis of density current cylindrical anomaly, "
+                               "or {0,0,0} for spherically symmetric",
+                               NULL, dc_axis, &n, NULL);
+  CHKERRQ(ierr);
   {
     PetscReal norm = PetscSqrtReal(PetscSqr(dc_axis[0]) + PetscSqr(dc_axis[1]) +
                                    PetscSqr(dc_axis[2]));
     if (norm > 0) {
-      for (int i=0; i<3; i++)  dc_axis[i] /= norm;
+      for (int i = 0; i < 3; i++)
+        dc_axis[i] /= norm;
     }
   }
 
   PetscOptionsEnd();
 
-  PetscScalar meter = user->units->meter;
+  PetscScalar meter           = user->units->meter;
+  PetscScalar second          = user->units->second;
+  PetscScalar Kelvin          = user->units->Kelvin;
+  PetscScalar Pascal          = user->units->Pascal;
   rc = fabs(rc) * meter;
-  for (int i=0; i<3; i++) center[i] *= meter;
+  theta0 *= Kelvin;
+  thetaC *= Kelvin;
+  P0 *= Pascal;
+  N *= (1. / second);
+  for (int i = 0; i < 3; i++)
+    center[i] *= meter;
 
-  setup_context->rc         = rc;
-  setup_context->center[0]  = center[0];
-  setup_context->center[1]  = center[1];
-  setup_context->center[2]  = center[2];
+  setup_context->theta0 = theta0;
+  setup_context->thetaC = thetaC;
+  setup_context->P0 = P0;
+  setup_context->N = N;
+  setup_context->rc = rc;
+  setup_context->center[0] = center[0];
+  setup_context->center[1] = center[1];
+  setup_context->center[2] = center[2];
   setup_context->dc_axis[0] = dc_axis[0];
   setup_context->dc_axis[1] = dc_axis[1];
   setup_context->dc_axis[2] = dc_axis[2];
@@ -79,15 +115,15 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
 PetscErrorCode SetupContext_DENSITY_CURRENT(Ceed ceed, CeedData ceed_data,
     AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
   PetscFunctionBeginUser;
-  PetscInt ierr = SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx,
-                  phys);
+  PetscInt ierr =
+    SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx, phys);
   CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }
 
 PetscErrorCode PRINT_DENSITY_CURRENT(Physics phys, SetupContext setup_ctx,
                                      AppCtx app_ctx) {
-  MPI_Comm       comm = PETSC_COMM_WORLD;
+  MPI_Comm comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index 1265c564ef..8422ce5cbf 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -12,6 +12,28 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/newtonian.h"
 
+
+#ifndef newtonian_context_struct
+#define newtonian_context_struct
+typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
+struct NewtonianIdealGasContext_ {
+  CeedScalar lambda;
+  CeedScalar mu;
+  CeedScalar k;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar g[3];
+  CeedScalar c_tau;
+  CeedScalar Ctau_t;
+  CeedScalar Ctau_v;
+  CeedScalar Ctau_C;
+  CeedScalar Ctau_M;
+  CeedScalar Ctau_E;
+  CeedScalar dt;
+  StabilizationType stabilization;
+};
+#endif
+
 PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
                                void *ctx) {
   SetupContext      setup_context = *(SetupContext *)setup_ctx;
@@ -48,19 +70,18 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //             Create the libCEED context
   // ------------------------------------------------------
-  CeedScalar theta0 = 300.;    // K
-  CeedScalar thetaC = -15.;    // K
-  CeedScalar P0     = 1.e5;    // Pa
-  CeedScalar N      = 0.01;    // 1/s
-  CeedScalar cv     = 717.;    // J/(kg K)
-  CeedScalar cp     = 1004.;   // J/(kg K)
-  CeedScalar g      = 9.81;    // m/s^2
-  CeedScalar lambda = -2./3.;  // -
-  CeedScalar mu     = 75.;     // Pa s, dynamic viscosity
-  // mu = 75 is not physical for air, but is good for numerical stability
-  CeedScalar k      = 0.02638; // W/(m K)
-  CeedScalar c_tau  = 0.5;     // -
-  // c_tau = 0.5 is reported as "optimal" in Hughes et al 2010
+  CeedScalar cv     = 717.;          // J/(kg K)
+  CeedScalar cp     = 1004.;         // J/(kg K)
+  CeedScalar g[3]   = {0, 0, -9.81}; // m/s^2
+  CeedScalar lambda = -2./3.;        // -
+  CeedScalar mu     = 1.8e-5;        // Pa s, dynamic viscosity
+  CeedScalar k      = 0.02638;       // W/(m K)
+  CeedScalar c_tau  = 0.5;           // -
+  CeedScalar Ctau_t  = 1.0;          // -
+  CeedScalar Ctau_v  = 36.0;         // TODO make function of degree
+  CeedScalar Ctau_C  = 1.0;          // TODO make function of degree
+  CeedScalar Ctau_M  = 1.0;          // TODO make function of degree
+  CeedScalar Ctau_E  = 1.0;          // TODO make function of degree
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
   for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
@@ -68,9 +89,9 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //             Create the PETSc context
   // ------------------------------------------------------
-  PetscScalar meter    = 1e-2;  // 1 meter in scaled length units
-  PetscScalar kilogram = 1e-6;  // 1 kilogram in scaled mass units
-  PetscScalar second   = 1e-2;  // 1 second in scaled time units
+  PetscScalar meter    = 1;  // 1 meter in scaled length units
+  PetscScalar kilogram = 1;  // 1 kilogram in scaled mass units
+  PetscScalar second   = 1;  // 1 second in scaled time units
   PetscScalar Kelvin   = 1;     // 1 Kelvin in scaled temperature units
   PetscScalar W_per_m_K, Pascal, J_per_kg_K, m_per_squared_s;
 
@@ -81,20 +102,10 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
                     NULL);
 
   // -- Physics
-  ierr = PetscOptionsScalar("-theta0", "Reference potential temperature",
-                            NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-thetaC", "Perturbation of potential temperature",
-                            NULL, thetaC, &thetaC, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-P0", "Atmospheric pressure",
-                            NULL, P0, &P0, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-N", "Brunt-Vaisala frequency",
-                            NULL, N, &N, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-cv", "Heat capacity at constant volume",
                             NULL, cv, &cv, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-cp", "Heat capacity at constant pressure",
                             NULL, cp, &cp, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-g", "Gravitational acceleration",
-                            NULL, g, &g, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-lambda",
                             "Stokes hypothesis second viscosity coefficient",
                             NULL, lambda, &lambda, NULL); CHKERRQ(ierr);
@@ -103,11 +114,24 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   ierr = PetscOptionsScalar("-k", "Thermal conductivity",
                             NULL, k, &k, NULL); CHKERRQ(ierr);
 
+  PetscInt dim = problem->dim;
+  ierr = PetscOptionsRealArray("-g", "Gravitational acceleration",
+                               NULL, g, &dim, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsEnum("-stab", "Stabilization method", NULL,
                           StabilizationTypes, (PetscEnum)(stab = STAB_NONE),
                           (PetscEnum *)&stab, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-c_tau", "Stabilization constant",
                             NULL, c_tau, &c_tau, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_t", "Stabilization time constant",
+                            NULL, Ctau_t, &Ctau_t, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_v", "Stabilization viscous constant",
+                            NULL, Ctau_v, &Ctau_v, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_C", "Stabilization continuity constant",
+                            NULL, Ctau_C, &Ctau_C, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_M", "Stabilization momentum constant",
+                            NULL, Ctau_M, &Ctau_M, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-Ctau_E", "Stabilization energy constant",
+                            NULL, Ctau_E, &Ctau_E, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation",
                           NULL, implicit=PETSC_FALSE, &implicit, NULL);
   CHKERRQ(ierr);
@@ -157,30 +181,22 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   //           Set up the libCEED context
   // ------------------------------------------------------
   // -- Scale variables to desired units
-  theta0 *= Kelvin;
-  thetaC *= Kelvin;
-  P0     *= Pascal;
-  N      *= (1./second);
   cv     *= J_per_kg_K;
   cp     *= J_per_kg_K;
-  g      *= m_per_squared_s;
   mu     *= Pascal * second;
   k      *= W_per_m_K;
   for (int i=0; i<3; i++) domain_size[i] *= meter;
+  for (int i=0; i<3; i++) g[i]           *= m_per_squared_s;
   problem->dm_scale = meter;
 
   // -- Setup Context
-  setup_context->theta0     = theta0;
-  setup_context->thetaC     = thetaC;
-  setup_context->P0         = P0;
-  setup_context->N          = N;
   setup_context->cv         = cv;
   setup_context->cp         = cp;
-  setup_context->g          = g;
   setup_context->lx         = domain_size[0];
   setup_context->ly         = domain_size[1];
   setup_context->lz         = domain_size[2];
   setup_context->time       = 0;
+  ierr = PetscArraycpy(setup_context->g, g, 3); CHKERRQ(ierr);
 
   // -- Solver Settings
   user->phys->stab          = stab;
@@ -193,9 +209,14 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   user->phys->newtonian_ig_ctx->k             = k;
   user->phys->newtonian_ig_ctx->cv            = cv;
   user->phys->newtonian_ig_ctx->cp            = cp;
-  user->phys->newtonian_ig_ctx->g             = g;
   user->phys->newtonian_ig_ctx->c_tau         = c_tau;
+  user->phys->newtonian_ig_ctx->Ctau_t        = Ctau_t;
+  user->phys->newtonian_ig_ctx->Ctau_v        = Ctau_v;
+  user->phys->newtonian_ig_ctx->Ctau_C        = Ctau_C;
+  user->phys->newtonian_ig_ctx->Ctau_M        = Ctau_M;
+  user->phys->newtonian_ig_ctx->Ctau_E        = Ctau_E;
   user->phys->newtonian_ig_ctx->stabilization = stab;
+  ierr = PetscArraycpy(user->phys->newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
 
   PetscFunctionReturn(0);
 }
@@ -207,10 +228,14 @@ PetscErrorCode SetupContext_NEWTONIAN_IG(Ceed ceed, CeedData ceed_data,
   CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
                               CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
   CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
+
   CeedQFunctionContextCreate(ceed, &ceed_data->newt_ig_context);
   CeedQFunctionContextSetData(ceed_data->newt_ig_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
                               sizeof(*phys->newtonian_ig_ctx), phys->newtonian_ig_ctx);
+  CeedQFunctionContextRegisterDouble(ceed_data->newt_ig_context, "timestep size",
+                                     offsetof(struct NewtonianIdealGasContext_, dt), 1, "Size of timestep, delta t");
+
   if (ceed_data->qf_rhs_vol)
     CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->newt_ig_context);
   if (ceed_data->qf_ifunction_vol)
diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index ad69ff0762..9d9e2bdbfa 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -23,7 +23,7 @@ struct SetupContext_ {
   CeedScalar N;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index f828f83f15..08c4dc163f 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -27,7 +27,7 @@ struct SetupContext_ {
   CeedScalar N;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h
new file mode 100644
index 0000000000..5399ff638d
--- /dev/null
+++ b/examples/fluids/qfunctions/blasius.h
@@ -0,0 +1,338 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Operator for Navier-Stokes example using PETSc
+
+
+#ifndef blasius_h
+#define blasius_h
+
+#include <math.h>
+#include <ceed.h>
+#include "../navierstokes.h"
+
+#ifndef blasius_context_struct
+#define blasius_context_struct
+typedef struct BlasiusContext_ *BlasiusContext;
+struct BlasiusContext_ {
+  bool       implicit;  // !< Using implicit timesteping or not
+  CeedScalar delta0;    // !< Boundary layer height at inflow
+  CeedScalar Uinf;      // !< Velocity at boundary layer edge
+  CeedScalar P0;        // !< Pressure at outflow
+  CeedScalar theta0;    // !< Temperature at inflow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+#endif
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+void CEED_QFUNCTION_HELPER(BlasiusSolution)(const CeedScalar y,
+    const CeedScalar Uinf, const CeedScalar x0, const CeedScalar x,
+    const CeedScalar rho, CeedScalar *u, CeedScalar *v, CeedScalar *t12,
+    const NewtonianIdealGasContext newt_ctx) {
+
+  CeedInt nprofs = 50;
+  // *INDENT-OFF*
+  CeedScalar eta_table[] = {
+    0.000000000000000000e+00, 1.282051282051281937e-01, 2.564102564102563875e-01, 3.846153846153845812e-01, 5.128205128205127750e-01,
+    6.410256410256409687e-01, 7.692307692307691624e-01, 8.974358974358973562e-01, 1.025641025641025550e+00, 1.153846153846153744e+00,
+    1.282051282051281937e+00, 1.410256410256410131e+00, 1.538461538461538325e+00, 1.666666666666666519e+00, 1.794871794871794712e+00,
+    1.923076923076922906e+00, 2.051282051282051100e+00, 2.179487179487179294e+00, 2.307692307692307487e+00, 2.435897435897435681e+00,
+    2.564102564102563875e+00, 2.692307692307692069e+00, 2.820512820512820262e+00, 2.948717948717948456e+00, 3.076923076923076650e+00,
+    3.205128205128204844e+00, 3.333333333333333037e+00, 3.461538461538461231e+00, 3.589743589743589425e+00, 3.717948717948717618e+00,
+    3.846153846153845812e+00, 3.974358974358974006e+00, 4.102564102564102200e+00, 4.230769230769229949e+00, 4.358974358974358587e+00,
+    4.487179487179487225e+00, 4.615384615384614975e+00, 4.743589743589742724e+00, 4.871794871794871362e+00, 5.000000000000000000e+00,
+    5.500000000000000000e+00, 6.000000000000000000e+00, 6.500000000000000000e+00, 7.000000000000000000e+00, 7.500000000000000000e+00,
+    8.000000000000000000e+00, 8.500000000000000000e+00, 9.000000000000000000e+00, 9.500000000000000000e+00, 1.000000000000000000e+01};
+
+  CeedScalar f_table[] = {
+    0.000000000000000000e+00, 2.728923405566200267e-03, 1.091524811461423369e-02, 2.455658828897525764e-02, 4.364674649279581820e-02,
+    6.817382707725749835e-02, 9.811838418932711248e-02, 1.334516294237205192e-01, 1.741337304561980659e-01, 2.201122374410622862e-01,
+    2.713206781625860375e-01, 3.276773654929600599e-01, 3.890844612583744255e-01, 4.554273387986328414e-01, 5.265742820946719416e-01,
+    6.023765522220410062e-01, 6.826688421431770237e-01, 7.672701287583111318e-01, 8.559849171804534418e-01, 9.486048570979430661e-01,
+    1.044910695686512625e+00, 1.144674516826549082e+00, 1.247662203367335465e+00, 1.353636048811749593e+00, 1.462357437868362364e+00,
+    1.573589512396551759e+00, 1.687099740622293842e+00, 1.802662313062363353e+00, 1.920060297987626230e+00, 2.039087501786055245e+00,
+    2.159549994377929050e+00, 2.281267275838891884e+00, 2.404073076539093190e+00, 2.527815798402052838e+00, 2.652358618452637540e+00,
+    2.777579287003750341e+00, 2.903369661199559637e+00, 3.029635020019957992e+00, 3.156293209307130088e+00, 3.283273665161465349e+00,
+    3.780571892998292771e+00, 4.279620922520262383e+00, 4.779322325882148448e+00, 5.279238811036782053e+00, 5.779218028455369804e+00,
+    6.279213431354994768e+00, 6.779212528163703233e+00, 7.279212370655419484e+00, 7.779212346288013613e+00, 8.279212342945751146e+00};
+
+  CeedScalar fp_table[] = {
+    0.000000000000000000e+00, 4.257083277988830267e-02, 8.513297869782740501e-02, 1.276641169537044151e-01, 1.701271279078802878e-01,
+    2.124702831905590783e-01, 2.546276046951935212e-01, 2.965194442747576264e-01, 3.380533304776729975e-01, 3.791251204629754179e-01,
+    4.196204840172004791e-01, 4.594167322894788796e-01, 4.983849866855867838e-01, 5.363926638765821320e-01, 5.733062319885513514e-01,
+    6.089941719927144392e-01, 6.433300586189647507e-01, 6.761956584341198839e-01, 7.074839307288774970e-01, 7.371018110314454530e-01,
+    7.649726585225528064e-01, 7.910382579383948842e-01, 8.152602836158657773e-01, 8.376211573266827415e-01, 8.581242609418713307e-01,
+    8.767934976651666767e-01, 8.936722290953328374e-01, 9.088216471306606037e-01, 9.223186672607004422e-01, 9.342534510898168332e-01,
+    9.447266795705382414e-01, 9.538467037387058367e-01, 9.617266968332524035e-01, 9.684819213624265011e-01, 9.742272083384174719e-01,
+    9.790747253056680810e-01, 9.831320868743089747e-01, 9.865008381344084754e-01, 9.892753192614093249e-01, 9.915419001656551323e-01,
+    9.968788209317821503e-01, 9.989728724371175206e-01, 9.996990677381791812e-01, 9.999216041491896245e-01, 9.999818594083667023e-01,
+    9.999962745365539307e-01, 9.999993214550036980e-01, 9.999998904550418954e-01, 9.999999843329338001e-01, 9.999999980166356384e-01};
+
+  CeedScalar fpp_table[] = {
+    3.320573362157903663e-01, 3.320379743512646420e-01, 3.319024760665882368e-01, 3.315350015070190337e-01, 3.308206767975666041e-01,
+    3.296466995822193158e-01, 3.279038639411161471e-01, 3.254884713737624113e-01, 3.223045750196085746e-01, 3.182664816607024272e-01,
+    3.133014118810801829e-01, 3.073521951089355775e-01, 3.003798556086043625e-01, 2.923659305537876785e-01, 2.833143548208253981e-01,
+    2.732527514995234941e-01, 2.622329840371728227e-01, 2.503308560706500874e-01, 2.376448876931176457e-01, 2.242941499773744018e-01,
+    2.104151994284793603e-01, 1.961582158440171031e-01, 1.816825052623964043e-01, 1.671515786102889534e-01, 1.527280512426029968e-01,
+    1.385686249977987894e-01, 1.248194106805364800e-01, 1.116118251613979206e-01, 9.905925581301598670e-02, 8.725462988794610575e-02,
+    7.626896310981794158e-02, 6.615089622448211415e-02, 5.692716644118058639e-02, 4.860390768479891377e-02, 4.116863313890323922e-02,
+    3.459272784597366285e-02, 2.883426862493499582e-02, 2.384099224121952881e-02, 1.955324839409207718e-02, 1.590679868531958210e-02,
+    6.578593141419011685e-03, 2.402039843751689954e-03, 7.741093231657678389e-04, 2.201689553063347941e-04, 5.526217815680267893e-05,
+    1.224092624232004387e-05, 2.392841910090350858e-06, 4.127879363882133676e-07, 6.284244603762621373e-08, 8.442944409712819646e-09};
+  // *INDENT-ON*
+
+  CeedScalar nu = newt_ctx->mu / rho;
+  CeedScalar eta = y*sqrt(Uinf/(nu*(x0+x)));
+  CeedInt idx=-1;
+
+  for(CeedInt i=0; i<nprofs; i++) {
+    if (eta < eta_table[i]) {
+      idx = i;
+      break;
+    }
+  }
+  CeedScalar f, fp, fpp;
+
+  if (idx > 0) { // eta within the bounds of eta_table
+    CeedScalar coeff = (eta - eta_table[idx-1]) / (eta_table[idx] - eta_table[idx
+                       -1]);
+
+    f   = f_table[idx-1]   + coeff*( f_table[idx]   - f_table[idx-1] );
+    fp  = fp_table[idx-1]  + coeff*( fp_table[idx]  - fp_table[idx-1] );
+    fpp = fpp_table[idx-1] + coeff*( fpp_table[idx] - fpp_table[idx-1] );
+  } else { // eta outside bounds of eta_table
+    f   = f_table[nprofs-1];
+    fp  = fp_table[nprofs-1];
+    fpp = fpp_table[nprofs-1];
+    eta = eta_table[nprofs-1];
+  }
+
+  *u = Uinf*fp;
+  *t12 = rho*nu*Uinf*fpp*sqrt(Uinf/(nu*(x0+x)));
+  *v = 0.5*sqrt(nu*Uinf/(x0+x))*(eta*fp - f);
+}
+
+// *****************************************************************************
+// This QFunction sets a Blasius boundary layer for the initial condition
+// *****************************************************************************
+CEED_QFUNCTION(ICsBlasius)(void *ctx, CeedInt Q,
+                           const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
+
+  // Outputs
+  CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  const BlasiusContext context = (BlasiusContext)ctx;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar gamma  = cp/cv;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar delta0 = context->delta0;
+  const CeedScalar Uinf   = context->Uinf;
+
+  const CeedScalar e_internal = cv * theta0;
+  const CeedScalar rho        = P0 / ((gamma - 1) * e_internal);
+  const CeedScalar x0         = Uinf*rho / (mu*25/ (delta0*delta0) );
+  CeedScalar u, v, t12;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD
+  for (CeedInt i=0; i<Q; i++) {
+    const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]};
+
+    BlasiusSolution(x[1], Uinf, x0, x[0], rho, &u, &v, &t12,
+                    &context->newtonian_ctx);
+
+    q0[0][i] = rho;
+    q0[1][i] = u * rho;
+    q0[2][i] = v * rho;
+    q0[3][i] = 0.;
+    q0[4][i] = rho * e_internal + 0.5*(u*u + v*v)*rho;
+  } // End of Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q,
+                               const CeedScalar *const *in,
+                               CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+  const BlasiusContext context = (BlasiusContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar delta0 = context->delta0;
+  const CeedScalar Uinf   = context->Uinf;
+  const CeedScalar rho_0  = P0 / (Rd * theta0);
+  const CeedScalar x0     = Uinf*rho_0 / (mu*25/ (delta0*delta0) );
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // Calcualte prescribed inflow values
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+
+    // Find pressure using state inside the domain
+    const CeedScalar rho = q[0][i];
+    const CeedScalar P = rho * Rd * theta0; // interior rho with exterior T
+
+    // Find inflow state using calculated P and prescribed velocity, theta0
+    const CeedScalar e_internal = cv * theta0;
+
+    CeedScalar velocity[3] = {0.};
+    CeedScalar t12;
+    BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1],
+                    &t12, &context->newtonian_ctx);
+
+    const CeedScalar E_kinetic = .5 * rho * (velocity[0]*velocity[0] +
+                                 velocity[1]*velocity[1] +
+                                 velocity[2]*velocity[2]);
+    const CeedScalar E = rho * e_internal + E_kinetic;  // use interior rho
+    // from T       and  u exterior
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (int j=0; j<5; j++) v[j][i] = 0.;
+
+    const CeedScalar u_normal = norm[0]*velocity[0] +
+                                norm[1]*velocity[1] +
+                                norm[2]*velocity[2];
+
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal; // interior rho
+
+    // -- Momentum
+    for (int j=0; j<3; j++)
+      v[j+1][i] -= wdetJb * (rho * u_normal * velocity[j] + // interior rho
+                             norm[j] * P); // mixed P
+    v[2][i] -= wdetJb * t12  ;
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+    v[4][i] -= wdetJb * t12 * velocity[1];
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q,
+                                const CeedScalar *const *in,
+                                CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+
+  const BlasiusContext context = (BlasiusContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar rho_0  = P0 / (Rd*theta0);
+  const CeedScalar delta0 = context->delta0;
+  const CeedScalar Uinf   = context->Uinf;
+  const CeedScalar x0     = Uinf*rho_0 / (mu*25/ (delta0*delta0) );
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp in
+    const CeedScalar rho      =  q[0][i];
+    const CeedScalar u[3]     = {q[1][i] / rho,
+                                 q[2][i] / rho,
+                                 q[3][i] / rho
+                                };
+    const CeedScalar E        =  q[4][i];
+
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (int j=0; j<5; j++) v[j][i] = 0.;
+
+    // Implementing outflow condition
+    const CeedScalar P         = P0; // pressure
+    const CeedScalar u_normal  = norm[0]*u[0] + norm[1]*u[1] +
+                                 norm[2]*u[2]; // Normal velocity
+
+    // Calculate prescribed outflow traction values
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar velocity[3] = {0.};
+    CeedScalar t12;
+    BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1],
+                    &t12, &context->newtonian_ctx);
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal;
+
+    // -- Momentum
+    for (int j=0; j<3; j++)
+      v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
+    v[2][i] += wdetJb * t12  ;
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+    v[4][i] += wdetJb * t12 * velocity[1];
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+#endif // blasius_h
diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h
new file mode 100644
index 0000000000..14f8feeaf8
--- /dev/null
+++ b/examples/fluids/qfunctions/channel.h
@@ -0,0 +1,240 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Operator for Navier-Stokes example using PETSc
+
+
+#ifndef channel_h
+#define channel_h
+
+#include <math.h>
+#include <ceed.h>
+#include "../navierstokes.h"
+
+#ifndef channel_context_struct
+#define channel_context_struct
+typedef struct ChannelContext_ *ChannelContext;
+struct ChannelContext_ {
+  bool       implicit; // !< Using implicit timesteping or not
+  CeedScalar theta0;   // !< Reference temperature
+  CeedScalar P0;       // !< Reference Pressure
+  CeedScalar umax;     // !< Centerline velocity
+  CeedScalar center;   // !< Y Coordinate for center of channel
+  CeedScalar H;        // !< Channel half-height
+  CeedScalar B;        // !< Body-force driving the flow
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+};
+#endif
+
+CEED_QFUNCTION_HELPER int Exact_Channel(CeedInt dim, CeedScalar time,
+                                        const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
+
+  const ChannelContext context = (ChannelContext)ctx;
+  const CeedScalar theta0 = context->theta0;
+  const CeedScalar P0     = context->P0;
+  const CeedScalar umax   = context->umax;
+  const CeedScalar center = context->center;
+  const CeedScalar H      = context->H;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+  const CeedScalar mu     = context->newtonian_ctx.mu;
+  const CeedScalar k      = context->newtonian_ctx.k;
+
+  const CeedScalar y=X[1];
+
+  const CeedScalar Pr    = mu / (cp*k);
+  const CeedScalar Ec    = (umax*umax) / (cp*theta0);
+  const CeedScalar theta = theta0*( 1 + (Pr*Ec/3)*(1 - pow((y-center)/H,4)));
+
+  const CeedScalar p = P0;
+
+  const CeedScalar rho = p / (Rd*theta);
+
+  q[0] = rho;
+  q[1] = rho * umax*(1 - pow((y-center)/H,2));
+  q[2] = 0;
+  q[3] = 0;
+  q[4] = rho * (cv*theta) + .5 * (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]) / rho;
+
+  return 0;
+}
+
+// *****************************************************************************
+// This QFunction sets the initial condition
+// *****************************************************************************
+CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q,
+                           const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
+
+  // Outputs
+  CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD
+  for (CeedInt i=0; i<Q; i++) {
+    const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar q[5] = {0.};
+    Exact_Channel(3, 0., x, 5, q, ctx);
+
+    for (CeedInt j=0; j<5; j++)
+      q0[j][i] = q[j];
+  } // End of Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q,
+                               const CeedScalar *const *in,
+                               CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+  const ChannelContext context = (ChannelContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar cv     = context->newtonian_ctx.cv;
+  const CeedScalar cp     = context->newtonian_ctx.cp;
+  const CeedScalar gamma  = cp/cv;
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // Calcualte prescribed inflow values
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar q_exact[5] = {0.};
+    Exact_Channel(3, 0., x, 5, q_exact, ctx);
+    const CeedScalar E_kinetic_exact = 0.5*(q_exact[1]*q_exact[1] +
+                                            q_exact[2]*q_exact[2] +
+                                            q_exact[3]*q_exact[3]) / q_exact[0];
+    const CeedScalar velocity[3] = {q_exact[1]/q_exact[0],
+                                    q_exact[2]/q_exact[0],
+                                    q_exact[3]/q_exact[0]
+                                   };
+    const CeedScalar theta = (q_exact[4] - E_kinetic_exact) / (q_exact[0]*cv);
+
+    // Find pressure using state inside the domain
+    const CeedScalar rho = q[0][i];
+    const CeedScalar u[3] = {q[1][i]/rho, q[2][i]/rho, q[3][i]/rho};
+    const CeedScalar E_internal = q[4][i] - .5 * rho * (u[0]*u[0] + u[1]*u[1] +
+                                  u[2]*u[2]);
+    const CeedScalar P = E_internal * (gamma - 1.);
+
+    // Find inflow state using calculated P and prescribed velocity, theta0
+    const CeedScalar e_internal = cv * theta;
+    const CeedScalar rho_in = P / ((gamma - 1) * e_internal);
+    const CeedScalar E_kinetic = .5 * rho_in * (velocity[0]*velocity[0] +
+                                 velocity[1]*velocity[1] +
+                                 velocity[2]*velocity[2]);
+    const CeedScalar E = rho_in * e_internal + E_kinetic;
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (int j=0; j<5; j++) v[j][i] = 0.;
+
+    const CeedScalar u_normal = norm[0]*velocity[0] +
+                                norm[1]*velocity[1] +
+                                norm[2]*velocity[2];
+
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho_in * u_normal;
+
+    // -- Momentum
+    for (int j=0; j<3; j++)
+      v[j+1][i] -= wdetJb * (rho_in * u_normal * velocity[j] +
+                             norm[j] * P);
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+
+// *****************************************************************************
+CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q,
+                                const CeedScalar *const *in,
+                                CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1];
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+
+  const ChannelContext context = (ChannelContext)ctx;
+  const bool implicit     = context->implicit;
+  const CeedScalar P0     = context->P0;
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup
+    // -- Interp in
+    const CeedScalar rho      =  q[0][i];
+    const CeedScalar u[3]     = {q[1][i] / rho,
+                                 q[2][i] / rho,
+                                 q[3][i] / rho
+                                };
+    const CeedScalar E        =  q[4][i];
+
+    // -- Interp-to-Interp q_data
+    // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
+    // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
+    // We can effect this by swapping the sign on this weight
+    const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
+
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (int j=0; j<5; j++) v[j][i] = 0.;
+
+    // Implementing outflow condition
+    const CeedScalar P         = P0; // pressure
+    const CeedScalar u_normal  = norm[0]*u[0] + norm[1]*u[1] +
+                                 norm[2]*u[2]; // Normal velocity
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal;
+
+    // -- Momentum
+    for (int j=0; j<3; j++)
+      v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+
+  } // End Quadrature Point Loop
+  return 0;
+}
+#endif // channel_h
diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h
index 0bc1fbfb94..3aed68e123 100644
--- a/examples/fluids/qfunctions/densitycurrent.h
+++ b/examples/fluids/qfunctions/densitycurrent.h
@@ -32,7 +32,7 @@ struct SetupContext_ {
   CeedScalar N;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
@@ -110,11 +110,12 @@ CEED_QFUNCTION_HELPER int Exact_DC(CeedInt dim, CeedScalar time,
   const CeedScalar N        = context->N;
   const CeedScalar cv       = context->cv;
   const CeedScalar cp       = context->cp;
-  const CeedScalar g        = context->g;
+  const CeedScalar *g_vec   = context->g;
   const CeedScalar rc       = context->rc;
   const CeedScalar *center  = context->center;
   const CeedScalar *dc_axis = context->dc_axis;
   const CeedScalar Rd       = cp - cv;
+  const CeedScalar g = -g_vec[2];
 
   // Setup
   // -- Coordinates
diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h
index 65efe2c20d..578ba6f27b 100644
--- a/examples/fluids/qfunctions/eulervortex.h
+++ b/examples/fluids/qfunctions/eulervortex.h
@@ -412,13 +412,6 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
     ConvectiveFluxJacobian_Euler(jacob_F_conv, rho, u, E, gamma);
 
-    // ---- Transpose of the Jacobian
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
-
     // ---- dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
     for (int j=0; j<3; j++) {
@@ -442,7 +435,7 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
 
     // -- Stabilization method: none or SU
-    CeedScalar stab[5][3];
+    CeedScalar stab[5][3] = {{0.}};
     switch (context->stabilization) {
     case 0:        // Galerkin
       break;
@@ -450,7 +443,7 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
       for (int j=0; j<3; j++)
         for (int k=0; k<5; k++)
           for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
+            stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
 
       for (int j=0; j<5; j++)
         for (int k=0; k<3; k++)
@@ -589,13 +582,6 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
     ConvectiveFluxJacobian_Euler(jacob_F_conv, rho, u, E, gamma);
 
-    // ---- Transpose of the Jacobian
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
-
     // ---- dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
     for (int j=0; j<3; j++) {
@@ -624,7 +610,7 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
 
     // -- Stabilization method: none, SU, or SUPG
-    CeedScalar stab[5][3];
+    CeedScalar stab[5][3] = {{0.}};
     switch (context->stabilization) {
     case 0:        // Galerkin
       break;
@@ -632,7 +618,7 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
       for (int j=0; j<3; j++)
         for (int k=0; k<5; k++)
           for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
+            stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
 
       for (int j=0; j<5; j++)
         for (int k=0; k<3; k++)
@@ -644,7 +630,7 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
       for (int j=0; j<3; j++)
         for (int k=0; k<5; k++)
           for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_res[l];
+            stab[k][j] = jacob_F_conv[j][k][l] * Tau_x[j] * strong_res[l];
 
       for (int j=0; j<5; j++)
         for (int k=0; k<3; k++)
diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h
index 2396e58cf2..4c84ee7225 100644
--- a/examples/fluids/qfunctions/newtonian.h
+++ b/examples/fluids/qfunctions/newtonian.h
@@ -29,7 +29,7 @@ struct SetupContext_ {
   CeedScalar N;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
@@ -59,8 +59,14 @@ struct NewtonianIdealGasContext_ {
   CeedScalar k;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g;
+  CeedScalar g[3];
   CeedScalar c_tau;
+  CeedScalar Ctau_t;
+  CeedScalar Ctau_v;
+  CeedScalar Ctau_C;
+  CeedScalar Ctau_M;
+  CeedScalar Ctau_E;
+  CeedScalar dt;
   StabilizationType stabilization;
 };
 #endif
@@ -70,11 +76,13 @@ struct NewtonianIdealGasContext_ {
 // *****************************************************************************
 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
-    const CeedScalar gamma, const CeedScalar g, CeedScalar z) {
+    const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
+  CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
-      dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - g*z)) : 0.) - u[i]*u[j];
+      dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
+                      u[i]*u[j];
       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
@@ -90,6 +98,138 @@ CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
   }
 }
 
+// *****************************************************************************
+// Helper function for computing flux Jacobian of Primitive variables
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
+    const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
+    const CeedScalar Rd, const CeedScalar cv) {
+  CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
+  // TODO Add in gravity's contribution
+
+  CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
+  CeedScalar drdT = -rho / T;
+  CeedScalar drdP = 1. / ( Rd * T);
+  CeedScalar etot =  E / rho ;
+  CeedScalar e2p  = drdP * etot + 1. ;
+  CeedScalar e3p  = ( E  + rho * Rd * T );
+  CeedScalar e4p  = drdT * etot + rho * cv ;
+
+  for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
+    for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
+//        [row][col] of A_i
+      dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
+      for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
+        // this loop handles middle columns for all 5 rows
+        dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt vel_k
+        dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
+                           ((i==k) ? u[j] : 0.) ) * rho;
+        dF[i][4][k+1]   = rho * u[i] * u[k]
+                          + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
+      }
+      dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
+    }
+    dF[i][4][0] = u[i] * e2p; // F^e wrt p
+    dF[i][4][4] = u[i] * e4p; // F^e wrt T
+    dF[i][0][0] = u[i] * drdP; // F^c wrt p
+    dF[i][0][4] = u[i] * drdT; // F^c wrt T
+  }
+}
+
+CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
+    const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
+    const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
+  CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
+  CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
+  CeedScalar drdT = -rho / T;
+  CeedScalar drdP = 1. / ( Rd * T);
+  dU[0] = drdP * dY[0] + drdT * dY[4];
+  CeedScalar de_kinetic = 0;
+  for (int i=0; i<3; i++) {
+    dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
+    de_kinetic += u[i] * dY[1+i];
+  }
+  dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
+          + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
+}
+
+// *****************************************************************************
+// Helper function for computing Tau elements (stabilization constant)
+//   Model from:
+//     PHASTA
+//
+//   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
+//
+// Where NOT UPDATED YET
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
+                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
+                                        const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
+                                        const CeedScalar mu, const CeedScalar dt,
+                                        const CeedScalar rho) {
+  // Context
+  const CeedScalar Ctau_t = newt_ctx->Ctau_t;
+  const CeedScalar Ctau_v = newt_ctx->Ctau_v;
+  const CeedScalar Ctau_C = newt_ctx->Ctau_C;
+  const CeedScalar Ctau_M = newt_ctx->Ctau_M;
+  const CeedScalar Ctau_E = newt_ctx->Ctau_E;
+  CeedScalar gijd[6];
+  CeedScalar tau;
+  CeedScalar dts;
+  CeedScalar fact;
+
+  //*INDENT-OFF*
+  gijd[0] =   dXdx[0][0] * dXdx[0][0]
+            + dXdx[1][0] * dXdx[1][0]
+            + dXdx[2][0] * dXdx[2][0];
+
+  gijd[1] =   dXdx[0][0] * dXdx[0][1]
+            + dXdx[1][0] * dXdx[1][1]
+            + dXdx[2][0] * dXdx[2][1];
+
+  gijd[2] =   dXdx[0][1] * dXdx[0][1]
+            + dXdx[1][1] * dXdx[1][1]
+            + dXdx[2][1] * dXdx[2][1];
+
+  gijd[3] =   dXdx[0][0] * dXdx[0][2]
+            + dXdx[1][0] * dXdx[1][2]
+            + dXdx[2][0] * dXdx[2][2];
+
+  gijd[4] =   dXdx[0][1] * dXdx[0][2]
+            + dXdx[1][1] * dXdx[1][2]
+            + dXdx[2][1] * dXdx[2][2];
+
+  gijd[5] =   dXdx[0][2] * dXdx[0][2]
+            + dXdx[1][2] * dXdx[1][2]
+            + dXdx[2][2] * dXdx[2][2];
+  //*INDENT-ON*
+
+  dts = Ctau_t / dt ;
+
+  tau = rho*rho*((4. * dts * dts)
+                 + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
+                 + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
+                 + u[2] *   u[2] * gijd[5])
+        + Ctau_v* mu * mu *
+        (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
+         + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
+
+  fact=sqrt(tau);
+
+  Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
+
+  Tau_d[1] = Ctau_M / fact;
+  Tau_d[2] = Ctau_E / ( fact * cv );
+
+// consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
+//  to avoid a division if the compiler is smart enough to see that cv IS
+// a constant that it could invert once for all elements
+// but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
+// OR we could absorb cv into Ctau_E but this puts more burden on user to
+// know how to change constants with a change of fluid or units.  Same for
+// Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
+}
+
 // *****************************************************************************
 // Helper function for computing Tau elements (stabilization constant)
 //   Model from:
@@ -108,14 +248,20 @@ CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
 // *****************************************************************************
 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
-                                       const CeedScalar sound_speed, const CeedScalar c_tau) {
+                                       /* const CeedScalar sound_speed, const CeedScalar c_tau) { */
+                                       const CeedScalar sound_speed, const CeedScalar c_tau,
+                                       const CeedScalar viscosity) {
+  const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
+                                (2*viscosity);
   for (int i=0; i<3; i++) {
     // length of element in direction i
     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
                             dXdx[2][i]*dXdx[2][i]);
+    CeedScalar Pe = mag_u_visc*h;
+    CeedScalar Xi = 1/tanh(Pe) - 1/Pe;
     // fastest wave in direction i
     CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
-    Tau_x[i] = c_tau * h / fastest_wave;
+    Tau_x[i] = c_tau * h * Xi / fastest_wave;
   }
 }
 
@@ -130,37 +276,34 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
   // Outputs
   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
 
+  // Context
+  const SetupContext context = (SetupContext)ctx;
+  const CeedScalar theta0    = context->theta0;
+  const CeedScalar P0        = context->P0;
+  const CeedScalar cv        = context->cv;
+  const CeedScalar cp        = context->cp;
+  const CeedScalar *g        = context->g;
+  const CeedScalar Rd        = cp - cv;
+
   // Quadrature Point Loop
   CeedPragmaSIMD
   for (CeedInt i=0; i<Q; i++) {
     CeedScalar q[5] = {0.};
 
-    // Context
-    const SetupContext context = (SetupContext)ctx;
-    const CeedScalar theta0    = context->theta0;
-    const CeedScalar P0        = context->P0;
-    const CeedScalar N         = context->N;
-    const CeedScalar cv        = context->cv;
-    const CeedScalar cp        = context->cp;
-    const CeedScalar g         = context->g;
-    const CeedScalar Rd        = cp - cv;
-
     // Setup
     // -- Coordinates
-    const CeedScalar z = X[2][i];
-
-    // -- Exner pressure, hydrostatic balance
-    const CeedScalar Pi = 1. + g*g*(exp(-N*N*z/g) - 1.) / (cp*theta0*N*N);
+    const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
+    const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
 
     // -- Density
-    const CeedScalar rho = P0 * pow(Pi, cv/Rd) / (Rd*theta0);
+    const CeedScalar rho = P0 / (Rd*theta0);
 
     // Initial Conditions
     q[0] = rho;
     q[1] = 0.0;
     q[2] = 0.0;
     q[3] = 0.0;
-    q[4] = rho * (cv*theta0*Pi + g*z);
+    q[4] = rho * (cv*theta0 + e_potential);
 
     for (CeedInt j=0; j<5; j++)
       q0[j][i] = q[j];
@@ -190,8 +333,7 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
 //
 // Thermal Stress:
 //   Fe = u Fu + k grad( T )
-//
-// Equation of State:
+// Equation of State
 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
 //
 // Stabilization:
@@ -238,9 +380,10 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
   const CeedScalar k      = context->k;
   const CeedScalar cv     = context->cv;
   const CeedScalar cp     = context->cp;
-  const CeedScalar g      = context->g;
-  const CeedScalar c_tau  = context->c_tau;
+  const CeedScalar *g     = context->g;
+  const CeedScalar dt     = context->dt;
   const CeedScalar gamma  = cp / cv;
+  const CeedScalar Rd     = cp - cv;
 
   CeedPragmaSIMD
   // Quadrature Point Loop
@@ -288,6 +431,7 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
                                     q_data[8][i],
                                     q_data[9][i]}
                                   };
+    const CeedScalar x_i[3]       = {x[0][i], x[1][i], x[2][i]};
     // *INDENT-ON*
     // -- Grad-to-Grad q_data
     // dU/dx
@@ -314,11 +458,11 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
           dudx[j][k] += du[j][l] * dXdx[l][k];
     // -- grad_T
     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]))/cv,
+                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]))/cv,
+                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) - g)/cv
+                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
                                   };
 
     // -- Fuvisc
@@ -344,20 +488,13 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
     // Pressure
     const CeedScalar
     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
-    E_potential = rho*g*x[2][i],
+    E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
     E_internal  = E - E_kinetic - E_potential,
     P           = E_internal * (gamma - 1.); // P = pressure
 
     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
-    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x[2][i]);
-
-    // jacob_F_conv_T = jacob_F_conv^T
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
+    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
 
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
@@ -376,7 +513,7 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Body force
-    const CeedScalar body_force[5] = {0, 0, 0, -rho*g, 0};
+    const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
 
     // The Physics
     // Zero dv so all future terms can safely sum into it
@@ -416,22 +553,32 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
     for (int j=0; j<5; j++)
       v[j][i] = wdetJ * body_force[j];
 
-    // Stabilization
-    // -- Tau elements
-    const CeedScalar sound_speed = sqrt(gamma * P / rho);
-    CeedScalar Tau_x[3] = {0.};
-    Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
+    // Spatial Stabilization
+    // -- Not used in favor of diagonal tau. Kept for future testing
+    // const CeedScalar sound_speed = sqrt(gamma * P / rho);
+    // CeedScalar Tau_x[3] = {0.};
+    // Tau_spatial(Tau_x, dXdx, u, sound_speed, context->c_tau, mu);
 
-    // -- Stabilization method: none or SU
-    CeedScalar stab[5][3];
+    // -- Stabilization method: none, SU, or SUPG
+    CeedScalar stab[5][3] = {{0.}};
+    CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
+    CeedScalar Tau_d[3] = {0.};
     switch (context->stabilization) {
     case STAB_NONE:        // Galerkin
       break;
     case STAB_SU:        // SU
+      Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
+      tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
+      tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
+      tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
+      tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
+      tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
+      PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
+                                  tau_strong_conv_conservative);
       for (int j=0; j<3; j++)
         for (int k=0; k<5; k++)
           for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
+            stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
 
       for (int j=0; j<5; j++)
         for (int k=0; k<3; k++)
@@ -479,9 +626,10 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
   const CeedScalar k      = context->k;
   const CeedScalar cv     = context->cv;
   const CeedScalar cp     = context->cp;
-  const CeedScalar g      = context->g;
-  const CeedScalar c_tau  = context->c_tau;
+  const CeedScalar *g     = context->g;
+  const CeedScalar dt     = context->dt;
   const CeedScalar gamma  = cp / cv;
+  const CeedScalar Rd     = cp-cv;
 
   CeedPragmaSIMD
   // Quadrature Point Loop
@@ -530,6 +678,7 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
                                     q_data[8][i],
                                     q_data[9][i]}
                                   };
+    const CeedScalar x_i[3]     = {x[0][i], x[1][i], x[2][i]};
     // *INDENT-ON*
     // -- Grad-to-Grad q_data
     // dU/dx
@@ -556,11 +705,11 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
           dudx[j][k] += du[j][l] * dXdx[l][k];
     // -- grad_T
     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]))/cv,
+                                    (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]))/cv,
+                                    (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
-                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) - g)/cv
+                                    (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
                                   };
     // -- Fuvisc
     // ---- Symmetric 3x3 matrix
@@ -585,20 +734,14 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
     // Pressure
     const CeedScalar
     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
-    E_potential = rho*g*x[2][i],
+    E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
     E_internal  = E - E_kinetic - E_potential,
     P           = E_internal * (gamma - 1.); // P = pressure
 
     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
-    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x[2][i]);
+    computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
 
-    // jacob_F_conv_T = jacob_F_conv^T
-    CeedScalar jacob_F_conv_T[3][5][5];
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
-          jacob_F_conv_T[j][k][l] = jacob_F_conv[j][l][k];
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
     for (int j=0; j<3; j++) {
@@ -615,7 +758,7 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Body force
-    const CeedScalar body_force[5] = {0, 0, 0, -rho*g, 0};
+    const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
 
     // Strong residual
     CeedScalar strong_res[5];
@@ -664,22 +807,33 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
     for (int j=0; j<5; j++)
       v[j][i] -= wdetJ*body_force[j];
 
-    // Stabilization
-    // -- Tau elements
-    const CeedScalar sound_speed = sqrt(gamma * P / rho);
-    CeedScalar Tau_x[3] = {0.};
-    Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
+    // Spatial Stabilization
+    // -- Not used in favor of diagonal tau. Kept for future testing
+    // const CeedScalar sound_speed = sqrt(gamma * P / rho);
+    // CeedScalar Tau_x[3] = {0.};
+    // Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau, mu);
 
     // -- Stabilization method: none, SU, or SUPG
-    CeedScalar stab[5][3];
+    CeedScalar stab[5][3] = {{0.}};
+    CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
+    CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
+    CeedScalar Tau_d[3] = {0.};
     switch (context->stabilization) {
     case STAB_NONE:        // Galerkin
       break;
     case STAB_SU:        // SU
+      Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
+      tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
+      tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
+      tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
+      tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
+      tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
+      PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
+                                  tau_strong_conv_conservative);
       for (int j=0; j<3; j++)
         for (int k=0; k<5; k++)
           for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_conv[l];
+            stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
 
       for (int j=0; j<5; j++)
         for (int k=0; k<3; k++)
@@ -688,10 +842,25 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
                                 stab[j][2] * dXdx[k][2]);
       break;
     case STAB_SUPG:        // SUPG
+      Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
+      tau_strong_res[0] = Tau_d[0] * strong_res[0];
+      tau_strong_res[1] = Tau_d[1] * strong_res[1];
+      tau_strong_res[2] = Tau_d[1] * strong_res[2];
+      tau_strong_res[3] = Tau_d[1] * strong_res[3];
+      tau_strong_res[4] = Tau_d[2] * strong_res[4];
+// Alternate route (useful later with primitive variable code)
+// this function was verified against PHASTA for as IC that was as close as possible
+//    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
+// it has also been verified to compute a correct through the following
+//   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
+// applied in the triple loop below
+//  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
+      PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
+                                  tau_strong_res_conservative);
       for (int j=0; j<3; j++)
         for (int k=0; k<5; k++)
           for (int l=0; l<5; l++)
-            stab[k][j] = jacob_F_conv_T[j][k][l] * Tau_x[j] * strong_res[l];
+            stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
 
       for (int j=0; j<5; j++)
         for (int k=0; k<3; k++)
diff --git a/examples/fluids/src/cloptions.c b/examples/fluids/src/cloptions.c
index fcb6b3560c..ce049d2ed4 100644
--- a/examples/fluids/src/cloptions.c
+++ b/examples/fluids/src/cloptions.c
@@ -30,6 +30,12 @@ PetscErrorCode RegisterProblems_NS(AppCtx app_ctx) {
   ierr = PetscFunctionListAdd(&app_ctx->problems, "advection2d",
                               NS_ADVECTION2D); CHKERRQ(ierr);
 
+  ierr = PetscFunctionListAdd(&app_ctx->problems, "blasius",
+                              NS_BLASIUS); CHKERRQ(ierr);
+
+  ierr = PetscFunctionListAdd(&app_ctx->problems, "channel",
+                              NS_CHANNEL); CHKERRQ(ierr);
+
   PetscFunctionReturn(0);
 }
 
diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c
index 54846182d6..4f1a3538d3 100644
--- a/examples/fluids/src/setupdm.c
+++ b/examples/fluids/src/setupdm.c
@@ -41,16 +41,6 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
     ierr = PetscObjectSetName((PetscObject)fe, "Q"); CHKERRQ(ierr);
     ierr = DMAddField(dm, NULL,(PetscObject)fe); CHKERRQ(ierr);
     ierr = DMCreateDS(dm); CHKERRQ(ierr);
-    {
-      /* create FE field for coordinates */
-      PetscFE fe_coords;
-      PetscInt num_comp_coord;
-      ierr = DMGetCoordinateDim(dm, &num_comp_coord); CHKERRQ(ierr);
-      ierr = PetscFECreateLagrange(PETSC_COMM_SELF, problem->dim, num_comp_coord,
-                                   PETSC_FALSE, 1, 1, &fe_coords); CHKERRQ(ierr);
-      ierr = DMProjectCoordinates(dm, fe_coords); CHKERRQ(ierr);
-      ierr = PetscFEDestroy(&fe_coords); CHKERRQ(ierr);
-    }
     ierr = DMGetLabel(dm, "Face Sets", &label); CHKERRQ(ierr);
     // Set wall BCs
     if (bc->num_wall > 0) {
diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c
index eb8b5ed09a..2379477f95 100644
--- a/examples/fluids/src/setuplibceed.c
+++ b/examples/fluids/src/setuplibceed.c
@@ -84,7 +84,7 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
   CeedCompositeOperatorAddSub(*op_apply, op_apply_vol);
 
   // -- Create Sub-Operator for in/outflow BCs
-  if (phys->has_neumann) {
+  if (phys->has_neumann || 1) {
     // --- Setup
     ierr = DMGetLabel(dm, "Face Sets", &domain_label); CHKERRQ(ierr);
     //ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr);
@@ -209,6 +209,8 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
   // ----- Get Context Labels for Operator
   CeedOperatorContextGetFieldLabel(*op_apply, "solution time",
                                    &phys->solution_time_label);
+  CeedOperatorContextGetFieldLabel(*op_apply, "timestep size",
+                                   &phys->timestep_size_label);
 
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/src/setupts.c b/examples/fluids/src/setupts.c
index 511666684e..8bc6dedb1e 100644
--- a/examples/fluids/src/setupts.c
+++ b/examples/fluids/src/setupts.c
@@ -89,9 +89,15 @@ PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *user_data) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 
-  // Update solution time
+  // Update context field labels
   if (user->phys->solution_time_label)
     CeedOperatorContextSetDouble(user->op_rhs, user->phys->solution_time_label, &t);
+  if (user->phys->timestep_size_label) {
+    PetscScalar dt;
+    ierr = TSGetTimeStep(ts,&dt); CHKERRQ(ierr);
+    CeedOperatorContextSetDouble(user->op_rhs, user->phys->timestep_size_label,
+                                 &dt);
+  }
 
   // Get local vectors
   ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr);
@@ -147,10 +153,16 @@ PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Q_dot, Vec G,
   PetscErrorCode    ierr;
   PetscFunctionBeginUser;
 
-  // Update solution time
+  // Update context field labels
   if (user->phys->solution_time_label)
     CeedOperatorContextSetDouble(user->op_ifunction,
                                  user->phys->solution_time_label, &t);
+  if (user->phys->timestep_size_label) {
+    PetscScalar dt;
+    ierr = TSGetTimeStep(ts,&dt); CHKERRQ(ierr);
+    CeedOperatorContextSetDouble(user->op_ifunction,
+                                 user->phys->timestep_size_label, &dt);
+  }
 
   // Get local vectors
   ierr = DMGetLocalVector(user->dm, &Q_loc); CHKERRQ(ierr);

From 871db79f2f3e2ed5b6659a8b21aea7cbbaad5cb5 Mon Sep 17 00:00:00 2001
From: "Kenneth E. Jansen" <Kenneth.Jansen@colorado.edu>
Date: Mon, 2 May 2022 19:10:39 -0600
Subject: [PATCH 31/59] Cleaner density inflow (#953)

* density weakly set on the inflow....also seems to have a commit intended for ken/primitiveSUPGterm  OOOP

Merged onto main this density ONLY inflow step...fixed E_kinitec bug along the way which used interior velocity.

* first cut at choose your weak inflow rho or T

* cleanup and a missing gamma

* dropped time step to 2e-6, added time limit of 0.001, added suggested parameters for tau constants for linear vs. quadratic.  Note this is far from certain, expecially quadratic.  Also left commented the required settings to forces SNES to do a fixed number of NL iterations. The current settings do 2 NL iterations for a while then drop to 1 which is ok for steady state but might not be good for SRS.

* potentially better Ctau_{C,M,E} that have much lower oscillations in the free stream even at large time steps like 3.2e-5

* examples/fluids: fix output_freq

* examples/fluids: use bool for weakT

Suggested-by: James Wright <james@jameswright.xyz>

Co-authored-by: Jed Brown <jed@jedbrown.org>
---
 examples/fluids/blasius.yaml           | 19 ++++++++++--
 examples/fluids/navierstokes.h         |  1 +
 examples/fluids/problems/blasius.c     |  5 ++++
 examples/fluids/qfunctions/blasius.h   | 40 +++++++++++++++++---------
 examples/fluids/qfunctions/newtonian.h |  3 +-
 5 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/examples/fluids/blasius.yaml b/examples/fluids/blasius.yaml
index 85be3b40de..4d4cef22a7 100644
--- a/examples/fluids/blasius.yaml
+++ b/examples/fluids/blasius.yaml
@@ -4,7 +4,12 @@ implicit: true
 ts:
   adapt_type: 'none'
   type: 'beuler'
-  dt: 5.e-8
+  dt: 0.2e-5
+  max_time: 1.0e-3
+output_freq: 10
+
+#snes_max_it: 4 
+#snes_convergence_test: skip
 
 ## Linear Settings:
 degree: 1
@@ -20,15 +25,23 @@ nDelta: 45
 stab: 'supg'
 Ctau_t: 1
 #Ctau_v: 36,60,128 is what PHASTA has for p=1,2, 3
+## linear Settings:
 Ctau_v: 36
-Ctau_C: 0.125
-Ctau_M: 1.0
+Ctau_C: 0.25
+Ctau_M: 0.25
 Ctau_E: 0.125
+## Quadratic Settings:
+#Ctau_v: 60
+#Ctau_C: 0.125
+#Ctau_M: 0.125
+#Ctau_E: 0.125
+
 q_extra: 0
 
 dm_plex_box_lower: 0,0,0
 dm_plex_box_upper: 4.2e-3,4.2e-3,5.e-5
 dm_plex_dim: 3
+# Faces labeled 1=z- 2=z+ 3=y- 4=y+ 5=x+ 6=x-
 bc_slip_z: 1,2
 bc_wall: 3
 wall_comps: 1,2,3
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 2c840be401..7af5d86c6f 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -295,6 +295,7 @@ struct ChannelContext_ {
 typedef struct BlasiusContext_ *BlasiusContext;
 struct BlasiusContext_ {
   bool       implicit;  // !< Using implicit timesteping or not
+  bool       weakT;     // !< flag to set Temperature at inflow
   CeedScalar delta0;    // !< Boundary layer height at inflow
   CeedScalar Uinf;      // !< Velocity at boundary layer edge
   CeedScalar P0;        // !< Pressure at outflow
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index c6b4b7d9d3..19ce6ef1ee 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -20,6 +20,7 @@ struct BlasiusContext_ {
   CeedScalar Uinf;      // !< Velocity at boundary layer edge
   CeedScalar P0;        // !< Pressure at outflow
   CeedScalar theta0;    // !< Temperature at inflow
+  CeedInt weakT;        // !< flag to weakly set Temperature at inflow if not set weak rho instead
   struct NewtonianIdealGasContext_ newtonian_ctx;
 };
 #endif
@@ -121,8 +122,11 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
   PetscReal  top_angle     = 5;    // degrees
   CeedScalar theta0        = 288.; // K
   CeedScalar P0            = 1.01e5; // Pa
+  PetscBool  weakT         = PETSC_FALSE; // weak density or temperature
 
   PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
+  ierr = PetscOptionsBool("-weakT", "Change from rho weak to T weak at inflow",
+                          NULL, weakT, &weakT, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-Uinf", "Velocity at boundary layer edge",
                             NULL, Uinf, &Uinf, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-delta0", "Boundary layer height at inflow",
@@ -158,6 +162,7 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
   ierr = modifyMesh(dm, problem->dim, growth, Ndelta, refine_height, top_angle);
   CHKERRQ(ierr);
 
+  user->phys->blasius_ctx->weakT     = !!weakT;
   user->phys->blasius_ctx->Uinf      = Uinf;
   user->phys->blasius_ctx->delta0    = delta0;
   user->phys->blasius_ctx->theta0    = theta0;
diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h
index 5399ff638d..e8c0261a0c 100644
--- a/examples/fluids/qfunctions/blasius.h
+++ b/examples/fluids/qfunctions/blasius.h
@@ -21,6 +21,7 @@
 typedef struct BlasiusContext_ *BlasiusContext;
 struct BlasiusContext_ {
   bool       implicit;  // !< Using implicit timesteping or not
+  bool       weakT;     // !< flag to set Temperature weakly at inflow
   CeedScalar delta0;    // !< Boundary layer height at inflow
   CeedScalar Uinf;      // !< Velocity at boundary layer edge
   CeedScalar P0;        // !< Pressure at outflow
@@ -183,11 +184,13 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q,
   const CeedScalar cv     = context->newtonian_ctx.cv;
   const CeedScalar cp     = context->newtonian_ctx.cp;
   const CeedScalar Rd     = cp - cv;
+  const CeedScalar gamma  = cp/cv;
 
   const CeedScalar theta0 = context->theta0;
   const CeedScalar P0     = context->P0;
   const CeedScalar delta0 = context->delta0;
   const CeedScalar Uinf   = context->Uinf;
+  const bool weakT        = context->weakT;
   const CeedScalar rho_0  = P0 / (Rd * theta0);
   const CeedScalar x0     = Uinf*rho_0 / (mu*25/ (delta0*delta0) );
 
@@ -201,26 +204,35 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q,
     // We can effect this by swapping the sign on this weight
     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
 
-    // Calcualte prescribed inflow values
+    // Calculate inflow values
     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
-
-    // Find pressure using state inside the domain
-    const CeedScalar rho = q[0][i];
-    const CeedScalar P = rho * Rd * theta0; // interior rho with exterior T
-
-    // Find inflow state using calculated P and prescribed velocity, theta0
-    const CeedScalar e_internal = cv * theta0;
-
     CeedScalar velocity[3] = {0.};
     CeedScalar t12;
     BlasiusSolution(x[1], Uinf, x0, x[0], rho_0, &velocity[0], &velocity[1],
                     &t12, &context->newtonian_ctx);
 
-    const CeedScalar E_kinetic = .5 * rho * (velocity[0]*velocity[0] +
-                                 velocity[1]*velocity[1] +
-                                 velocity[2]*velocity[2]);
-    const CeedScalar E = rho * e_internal + E_kinetic;  // use interior rho
-    // from T       and  u exterior
+    // enabling user to choose between weak T and weak rho inflow
+    CeedScalar rho,E_internal, P, E_kinetic;
+    if (weakT) {
+      // rho should be from the current solution
+      rho = q[0][i];
+      // Temperature is being set weakly (theta0) and for constant cv this sets E_internal
+      E_internal = rho * cv * theta0;
+      // Find pressure using
+      P=rho*Rd*theta0; // interior rho with exterior T
+      E_kinetic = .5 * rho * (velocity[0]*velocity[0] +
+                              velocity[1]*velocity[1] +
+                              velocity[2]*velocity[2]);
+    } else {
+      //  Fixing rho weakly on the inflow to a value  consistent with theta0 and P0
+      rho =  rho_0;
+      E_kinetic = .5 * rho * (velocity[0]*velocity[0] +
+                              velocity[1]*velocity[1] +
+                              velocity[2]*velocity[2]);
+      E_internal = q[4][i] - E_kinetic; // uses set rho and u but E from solution
+      P = E_internal * (gamma - 1.);
+    }
+    const CeedScalar E = E_internal + E_kinetic;
     // ---- Normal vect
     const CeedScalar norm[3] = {q_data_sur[1][i],
                                 q_data_sur[2][i],
diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h
index 4c84ee7225..d1467cec57 100644
--- a/examples/fluids/qfunctions/newtonian.h
+++ b/examples/fluids/qfunctions/newtonian.h
@@ -120,8 +120,7 @@ CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
 //        [row][col] of A_i
       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
-        // this loop handles middle columns for all 5 rows
-        dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt vel_k
+        dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
                            ((i==k) ? u[j] : 0.) ) * rho;
         dF[i][4][k+1]   = rho * u[i] * u[k]

From 019b76820d7ff306c177822c4e76ffe5939c204b Mon Sep 17 00:00:00 2001
From: Timothy Aiken <55306867+tt-aiken@users.noreply.github.com>
Date: Wed, 4 May 2022 20:08:11 -0600
Subject: [PATCH 32/59] Shock Tube + YZB Discontinuity Capturing (#851)

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Changed variable name midpt to mid_point, removed redundant slip BC declarations.

* Fixed style error, made variable names clearer, and removed dead code.

* Missed a style fix

* Rebased with main

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Fixed style error, made variable names clearer, and removed dead code.

* Missed a style fix

* Implemented fixes to the flux jacobians

* Updated documentation, tau definition in SU, and recommended fixes to the flux Jacobian.

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Missed a style fix

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Fixes to style and documentation.

* Fixing unnecessary problem->dim declaration and missing field in comment describing initial condition density.

* Added warnings for incompatible user input combinations.

* Added comments on beta to documentation, fixed j_gradn style issues.

* Compilation errors on my end. Committing to see if it's local or not.

* Style fixes

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Changed variable name midpt to mid_point, removed redundant slip BC declarations.

* Fixed style error, made variable names clearer, and removed dead code.

* Missed a style fix

* Rebased with main

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Fixed style error, made variable names clearer, and removed dead code.

* Missed a style fix

* Implemented fixes to the flux jacobians

* Updated documentation, tau definition in SU, and recommended fixes to the flux Jacobian.

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Missed a style fix

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Fixes to style and documentation.

* Fixing unnecessary problem->dim declaration and missing field in comment describing initial condition density.

* Added warnings for incompatible user input combinations.

* Added comments on beta to documentation, fixed j_gradn style issues.

* Compilation errors on my end. Committing to see if it's local or not.

* Fluids: SETERRQ1 -> SETERRQ to be compatible with PETSc 3.17

* Fluids - Update shock-tube with #881

* Small fix to mid_point definition and removed repeat entries for Shock Tube problem in README.md.

* Including test output.

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Changed variable name midpt to mid_point, removed redundant slip BC declarations.

* Fixed style error, made variable names clearer, and removed dead code.

* Missed a style fix

* Rebased with main

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Fixed style error, made variable names clearer, and removed dead code.

* Missed a style fix

* Implemented fixes to the flux jacobians

* Updated documentation, tau definition in SU, and recommended fixes to the flux Jacobian.

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Missed a style fix

* Implemented the Sod Shock Tube problem with YZB discontinuity capturing. for the explicit formulation. SU stabilization is also implemented for the explicit formulation.

* Fixes to style and documentation.

* Fixing unnecessary problem->dim declaration and missing field in comment describing initial condition density.

* Added warnings for incompatible user input combinations.

* Added comments on beta to documentation, fixed j_gradn style issues.

* Compilation errors on my end. Committing to see if it's local or not.

* Fluids: SETERRQ1 -> SETERRQ to be compatible with PETSc 3.17

* Fluids - Update shock-tube with #881

* Fluids - added test output file for shocktube

* Small fix to mid_point definition and removed repeat entries for Shock Tube problem in README.md.

* Including test output.

* Update doc/sphinx/source/references.bib

Co-authored-by: Jed Brown <jed@jedbrown.org>

* Update examples/fluids/README.md

Co-authored-by: Jed Brown <jed@jedbrown.org>

* Update examples/fluids/index.md

Co-authored-by: Jed Brown <jed@jedbrown.org>

* Update examples/fluids/navierstokes.h

Co-authored-by: Jed Brown <jed@jedbrown.org>

* Update examples/fluids/problems/shocktube.c

Co-authored-by: Jed Brown <jed@jedbrown.org>

* Update examples/fluids/qfunctions/shocktube.h

Co-authored-by: Jed Brown <jed@jedbrown.org>

* Removed unused IFunction

* Added shocktube variables to SetupContext in advection.h, advection2d.h, and densitycurrent.h

* Missed some merge conflicts that have now been cleared up.

* Added helper function for covariant element length along a given vector.

* Added SetupContext to shocktube.h

* Fix to regression test path

* remove variable transpose of the Jacobian

* Fix index swap in implicit SUPG

* examples/fluids shocktube: fix stab summation and transpose

* delete IFunction

* example/fluids- updates reference solution

Co-authored-by: Timothy Aiken <timothyaiken@Timothys-MacBook-Pro.local>
Co-authored-by: Timothy Aiken <timothyaiken@cu-biot-10-10.203.167.69.int.colorado.edu>
Co-authored-by: Leila Ghaffari <Leila.Ghaffari@colorado.edu>
Co-authored-by: Jed Brown <jed@jedbrown.org>
Co-authored-by: Timothy Aiken <timothyaiken@cu-genvpn-tcom-10.180.164.177.int.colorado.edu>
Co-authored-by: AdelekeBankole <adeleke.bankole@colorado.edu>
---
 doc/sphinx/source/references.bib              |  18 +
 examples/fluids/README.md                     |  59 +++
 examples/fluids/index.md                      |  34 ++
 examples/fluids/navierstokes.c                |   2 +
 examples/fluids/navierstokes.h                |  37 +-
 examples/fluids/problems/shocktube.c          | 197 +++++++
 examples/fluids/qfunctions/advection.h        |   5 +
 examples/fluids/qfunctions/advection2d.h      |   5 +
 examples/fluids/qfunctions/densitycurrent.h   |   5 +
 examples/fluids/qfunctions/shocktube.h        | 481 ++++++++++++++++++
 examples/fluids/src/cloptions.c               |   3 +
 ...navierstokes-shocktube-explicit-su-yzb.bin | Bin 0 -> 4840 bytes
 12 files changed, 844 insertions(+), 2 deletions(-)
 create mode 100644 examples/fluids/problems/shocktube.c
 create mode 100644 examples/fluids/qfunctions/shocktube.h
 create mode 100644 examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin

diff --git a/doc/sphinx/source/references.bib b/doc/sphinx/source/references.bib
index d29d912f11..f0cabd7df6 100644
--- a/doc/sphinx/source/references.bib
+++ b/doc/sphinx/source/references.bib
@@ -164,6 +164,24 @@ @book{toro2009
   isbn={978-3-540-49834-6}
 }
 
+@misc{sodshocktubewiki,
+  title={Sod shock tube},
+  howpublished={\url{https://en.wikipedia.org/wiki/Sod_shock_tube}},
+  note={Accessed: 01-30-2022}
+}
+
+@article{tezduyar2007yzb,
+  title={{SUPG} finite element computation of inviscid supersonic flows with $YZ\beta$ shock capturing},
+  author={Tezduyar, Tayfun E and Senga, Masayoshi},
+  journal={Computers and Fluids},
+  volume={36},
+  number={1},
+  pages={147-159},
+  year={2007},
+  publisher={Elsevier},
+  doi={10.1016/j.compfluid.2005.07.009}
+}
+
 @phdthesis{whitingStabilizedFEM1999,
   title = {Stabilized {{Finite Element Methods}} for {{Fluid Dynamics}} Using a {{Hierarchical Basis}}},
   author = {Whiting, Christian H},
diff --git a/examples/fluids/README.md b/examples/fluids/README.md
index 1b068836aa..2774d30e6c 100644
--- a/examples/fluids/README.md
+++ b/examples/fluids/README.md
@@ -177,6 +177,12 @@ For the case of a square/cubic mesh, the list of face indices to be used with `-
   - 6
 :::
 
+### Advection
+
+For testing purposes, there is a reduced mode for pure advection, which holds density $\rho$ and momentum density $\rho \bm u$ constant while advecting "total energy density" $E$. These are available in 2D and 3D.
+
+#### 2D advection
+
 For the 2D advection problem, the following additional command-line options are available:
 
 :::{list-table} Advection2D Runtime Options
@@ -251,6 +257,8 @@ and the `translation` mode with:
 ```
 Note the lengths in `-dm_plex_box_upper` are given in meters, and will be nondimensionalized according to `-units_meter`.
 
+#### 3D advection
+
 For the 3D advection problem, the following additional command-line options are available:
 
 :::{list-table} Advection3D Runtime Options
@@ -334,6 +342,10 @@ and the `translation` mode with:
 ./navierstokes -problem advection -dm_plex_box_faces 10,10,10 -dm_plex_dim 3 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 8000,8000,8000 -wind_type translation -wind_translation .5,-1,0 -bc_inflow 1,2,3,4,5,6
 ```
 
+### Inviscid Ideal Gas
+
+#### Isentropic Euler vortex
+
 For the Isentropic Vortex problem, the following additional command-line options are available:
 
 :::{list-table} Isentropic Vortex Runtime Options
@@ -381,6 +393,47 @@ This problem can be run with:
 ./navierstokes -problem euler_vortex -dm_plex_box_faces 20,20,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,1000,50 -dm_plex_dim 3 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -mean_velocity .5,-.8,0.
 ```
 
+#### Sod shock tube
+
+For the Shock Tube problem, the following additional command-line options are available:
+
+:::{list-table} Shock Tube Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-units_meter`
+  - 1 meter in scaled length units
+  - `1E-2`
+  -
+
+* - `-units_second`
+  - 1 second in scaled time units
+  - `1E-2`
+  -
+
+* - `-yzb`
+  - Use YZB discontinuity capturing
+  - `none`
+  -
+
+* - `-stab`
+  - Stabilization method (`none`, `su`, or `supg`)
+  - `none`
+  -
+:::
+
+This problem can be run with:
+
+```
+./navierstokes -problem shocktube -yzb -stab su -bc_slip_z 3,4 -bc_slip_y 1,2 -bc_wall 5,6 -dm_plex_dim 3 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,100,100 -dm_plex_box_faces 200,1,1 -units_second 0.1 
+```
+
+### Newtonian viscosity, Ideal Gas
+
 For the Density Current, Channel, and Blasius problems, the following common command-line options are available:
 
 :::{list-table} Newtonian Ideal Gas problems Runtime Options
@@ -477,6 +530,8 @@ For the Density Current, Channel, and Blasius problems, the following common com
   - `W/(m K)`
 :::
 
+#### Density current
+
 The Density Current problem the following command-line options are available in
 addition to the Newtonian Ideal Gas options:
 
@@ -530,6 +585,8 @@ This problem can be run with:
 ./navierstokes -problem density_current -dm_plex_box_faces 16,1,8 -degree 1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 2000,125,1000 -dm_plex_dim 3 -rc 400. -bc_wall 1,2,5,6 -wall_comps 1,2,3 -bc_slip_y 3,4 -mu 75
 ```
 
+#### Channel flow
+
 The Channel problem the following command-line options are available in
 addition to the Newtonian Ideal Gas options:
 
@@ -566,6 +623,8 @@ This problem can be run with the `channel.yaml` file via:
 :language: yaml
 ```
 
+#### Blasius boundary layer
+
 The Blasius problem the following command-line options are available in
 addition to the Newtonian Ideal Gas options:
 
diff --git a/examples/fluids/index.md b/examples/fluids/index.md
index 45d2364975..3eca2bb1f2 100644
--- a/examples/fluids/index.md
+++ b/examples/fluids/index.md
@@ -373,6 +373,40 @@ $$
 where $(\bar{x}, \, \bar{y}) = (x-x_c, \, y-y_c)$, $(x_c, \, y_c)$ represents the center of the domain, $r^2=\bar{x}^2 + \bar{y}^2$, and $\epsilon$ is the vortex strength ($\epsilon$ < 10).
 There is no perturbation in the entropy $S=P/\rho^\gamma$ ($\delta S=0)$.
 
+(problem-shock-tube)=
+
+## Shock Tube
+
+This test problem is based on Sod's Shock Tube (from{cite}`sodshocktubewiki`), a canonical test case for discontinuity capturing in one dimension. For this problem, the three-dimensional Euler equations are formulated exactly as in the Isentropic Vortex problem. The default initial conditions are $P=1$, $\rho=1$ for the driver section and $P=0.1$, $\rho=0.125$ for the driven section. The initial velocity is zero in both sections. Slip boundary conditions are applied to the side walls and wall boundary conditions are applied at the end walls.
+
+SU upwinding and discontinuity capturing have been implemented into the explicit timestepping operator for this problem. Discontinuity capturing is accomplished using a modified version of the $YZ\beta$ operator described in {cite}`tezduyar2007yzb`. This discontinuity capturing scheme involves the introduction of a dissipation term of the form
+
+$$
+\int_{\Omega} \nu_{SHOCK} \nabla \bm v \!:\! \nabla \bm q dV
+$$
+
+The shock capturing viscosity is implemented following the first formulation described in {cite} `tezduyar2007yzb`. The characteristic velocity $u_{cha}$ is taken to be the acoustic speed while the reference density $\rho_{ref}$ is just the local density. Shock capturing viscosity is defined by the following
+
+$$
+\nu_{SHOCK} = \tau_{SHOCK} u_{cha}^2
+$$
+where,
+$$
+\tau_{SHOCK} = \frac{h_{SHOCK}}{2u_{cha}} \left( \frac{ \,|\, \nabla \rho \,|\, h_{SHOCK}}{\rho_{ref}} \right)^{\beta}
+$$
+
+$\beta$ is a tuning parameter set between 1 (smoother shocks) and 2 (sharper shocks. The parameter $h_{SHOCK}$ is a length scale that is proportional to the element length in the direction of the density gradient unit vector. This density gradient unit vector is defined as $\hat{\bm j} = \frac{\nabla \rho}{|\nabla \rho|}. The original formulation of Tezduyar and Senga relies on the shape function gradient to define the element length scale, but this gradient is not available to qFunctions in libCEED. To avoid this problem, $h_{SHOCK}$ is defined in the current implementation as
+
+$$
+h_{SHOCK} = 2 \left( C_{YZB} \,|\, \bm p \,|\, \right)^{-1}
+$$
+where
+$$
+p_k = \hat{j}_i \frac{\partial \xi_i}{x_k}
+$$
+
+The constant $C_{YZB}$ is set to 0.1 for piecewise linear elements in the current implementation. Larger values approaching unity are expected with more robust stabilization and implicit timestepping.
+
 (problem-density-current)=
 
 ## Density Current
diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index 466c8e5749..ea7fc9ae3f 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -33,6 +33,7 @@
 //TESTARGS(name="adv2d_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0 -dm_plex_box_upper 125,125 -units_kilogram 1e-9 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -wind_type translation -wind_translation .53,-1.33,0 -bc_inflow 1,2,3,4 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin
 //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin
 //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin
+//TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin
 
 /// @file
 /// Navier-Stokes example using PETSc
@@ -356,6 +357,7 @@ int main(int argc, char **argv) {
   ierr = PetscFree(setup_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->newtonian_ig_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->euler_ctx); CHKERRQ(ierr);
+  ierr = PetscFree(phys_ctx->shocktube_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->advection_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->channel_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx->blasius_ctx); CHKERRQ(ierr);
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 7af5d86c6f..8236002cd0 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -132,7 +132,7 @@ struct AppCtx_private {
 struct CeedData_private {
   CeedVector           x_coord, q_data;
   CeedQFunctionContext setup_context, newt_ig_context, advection_context,
-                       euler_context, channel_context, blasius_context;
+                       euler_context, shocktube_context, channel_context, blasius_context;
   CeedQFunction        qf_setup_vol, qf_ics, qf_rhs_vol, qf_ifunction_vol,
                        qf_setup_sur, qf_apply_inflow, qf_apply_outflow;
   CeedBasis            basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur;
@@ -202,6 +202,11 @@ struct SetupContext_ {
   CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
+  CeedScalar mid_point;
+  CeedScalar P_high;
+  CeedScalar rho_high;
+  CeedScalar P_low;
+  CeedScalar rho_low;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
@@ -240,6 +245,20 @@ struct EulerContext_ {
 };
 #endif
 
+// SHOCKTUBE
+#ifndef shocktube_context_struct
+#define shocktube_context_struct
+typedef struct ShockTubeContext_ *ShockTubeContext;
+struct ShockTubeContext_ {
+  CeedScalar Cyzb;
+  CeedScalar Byzb;
+  CeedScalar c_tau;
+  bool implicit;
+  bool yzb;
+  int stabilization;
+};
+#endif
+
 // ADVECTION and ADVECTION2D
 #ifndef advection_context_struct
 #define advection_context_struct
@@ -310,6 +329,7 @@ struct Physics_private {
   ChannelContext           channel_ctx;
   NewtonianIdealGasContext newtonian_ig_ctx;
   EulerContext             euler_ctx;
+  ShockTubeContext         shocktube_ctx;
   AdvectionContext         advection_ctx;
   WindType                 wind_type;
   BubbleType               bubble_type;
@@ -351,9 +371,13 @@ extern PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm,
 extern PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm,
                                       void *setup_ctx, void *ctx);
 extern PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm,
-    void *setup_ctx, void *ctx);
+    void *setup_ctx,
+    void *ctx);
+
 extern PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm,
                                       void *setup_ctx, void *ctx);
+extern PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
+                                   void *ctx);
 extern PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
                                    void *ctx);
 extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm,
@@ -375,6 +399,9 @@ extern PetscErrorCode SetupContext_DENSITY_CURRENT(Ceed ceed,
 extern PetscErrorCode SetupContext_EULER_VORTEX(Ceed ceed, CeedData ceed_data,
     AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
 
+extern PetscErrorCode SetupContext_SHOCKTUBE(Ceed ceed, CeedData ceed_data,
+    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
+
 extern PetscErrorCode SetupContext_ADVECTION(Ceed ceed, CeedData ceed_data,
     AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
 
@@ -388,6 +415,9 @@ extern PetscErrorCode BC_DENSITY_CURRENT(DM dm, SimpleBC bc, Physics phys,
 extern PetscErrorCode BC_EULER_VORTEX(DM dm, SimpleBC bc, Physics phys,
                                       void *setup_ctx);
 
+extern PetscErrorCode BC_SHOCKTUBE(DM dm, SimpleBC bc, Physics phys,
+                                   void *setup_ctx);
+
 extern PetscErrorCode BC_ADVECTION(DM dm, SimpleBC bc, Physics phys,
                                    void *setup_ctx);
 
@@ -401,6 +431,9 @@ extern PetscErrorCode PRINT_DENSITY_CURRENT(Physics phys,
 extern PetscErrorCode PRINT_EULER_VORTEX(Physics phys, SetupContext setup_ctx,
     AppCtx app_ctx);
 
+extern PetscErrorCode PRINT_SHOCKTUBE(Physics phys, SetupContext setup_ctx,
+                                      AppCtx app_ctx);
+
 extern PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
                                       AppCtx app_ctx);
 
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
new file mode 100644
index 0000000000..679ff515df
--- /dev/null
+++ b/examples/fluids/problems/shocktube.c
@@ -0,0 +1,197 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up SHOCKTUBE
+
+#include "../navierstokes.h"
+#include "../qfunctions/setupgeo.h"
+#include "../qfunctions/shocktube.h"
+
+PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
+                            void *ctx) {
+  SetupContext      setup_context = *(SetupContext *)setup_ctx;
+  User              user = *(User *)ctx;
+  MPI_Comm          comm = PETSC_COMM_WORLD;
+  PetscBool         implicit;
+  PetscBool         yzb;
+  PetscInt          stab;
+  PetscBool         has_curr_time = PETSC_FALSE;
+  PetscInt          ierr;
+  PetscFunctionBeginUser;
+
+  ierr = PetscCalloc1(1, &user->phys->shocktube_ctx); CHKERRQ(ierr);
+
+  // ------------------------------------------------------
+  //               SET UP SHOCKTUBE
+  // ------------------------------------------------------
+  problem->dim                     = 3;
+  problem->q_data_size_vol         = 10;
+  problem->q_data_size_sur         = 4;
+  problem->setup_vol               = Setup;
+  problem->setup_vol_loc           = Setup_loc;
+  problem->setup_sur               = SetupBoundary;
+  problem->setup_sur_loc           = SetupBoundary_loc;
+  problem->ics                     = ICsShockTube;
+  problem->ics_loc                 = ICsShockTube_loc;
+  problem->apply_vol_rhs           = EulerShockTube;
+  problem->apply_vol_rhs_loc       = EulerShockTube_loc;
+  problem->apply_vol_ifunction     = NULL;
+  problem->apply_vol_ifunction_loc = NULL;
+  problem->bc                      = Exact_ShockTube;
+  problem->setup_ctx               = SetupContext_SHOCKTUBE;
+  problem->non_zero_time           = PETSC_FALSE;
+  problem->print_info              = PRINT_SHOCKTUBE;
+
+  // ------------------------------------------------------
+  //             Create the libCEED context
+  // ------------------------------------------------------
+  // Driver section initial conditions
+  CeedScalar P_high          = 1.0;     // Pa
+  CeedScalar rho_high        = 1.0;     // kg/m^3
+  // Driven section initial conditions
+  CeedScalar P_low           = 0.1;     // Pa
+  CeedScalar rho_low         = 0.125;   // kg/m^3
+  // Stabilization parameter
+  CeedScalar c_tau           = 0.5;     // -, based on Hughes et al (2010)
+  // Tuning parameters for the YZB shock capturing
+  CeedScalar Cyzb            = 0.1;     // -, used in approximation of (Na),x
+  CeedScalar Byzb            = 2.0;     // -, 1 for smooth shocks
+  //                                          2 for sharp shocks
+  PetscReal domain_min[3], domain_max[3], domain_size[3];
+  ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  // ------------------------------------------------------
+  //             Create the PETSc context
+  // ------------------------------------------------------
+  PetscScalar meter    = 1e-2; // 1 meter in scaled length units
+  PetscScalar second   = 1e-2; // 1 second in scaled time units
+
+  // ------------------------------------------------------
+  //              Command line Options
+  // ------------------------------------------------------
+  PetscOptionsBegin(comm, NULL, "Options for SHOCKTUBE problem", NULL);
+
+  // -- Numerical formulation options
+  ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation",
+                          NULL, implicit=PETSC_FALSE, &implicit, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsEnum("-stab", "Stabilization method", NULL,
+                          StabilizationTypes, (PetscEnum)(stab = STAB_NONE),
+                          (PetscEnum *)&stab, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-c_tau", "Stabilization constant",
+                            NULL, c_tau, &c_tau, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-yzb", "Use YZB discontinuity capturing",
+                          NULL, yzb=PETSC_FALSE, &yzb, NULL); CHKERRQ(ierr);
+
+  // -- Units
+  ierr = PetscOptionsScalar("-units_meter", "1 meter in scaled length units",
+                            NULL, meter, &meter, NULL); CHKERRQ(ierr);
+  meter = fabs(meter);
+  ierr = PetscOptionsScalar("-units_second","1 second in scaled time units",
+                            NULL, second, &second, NULL); CHKERRQ(ierr);
+  second = fabs(second);
+
+  // -- Warnings
+  if (stab == STAB_SUPG) {
+    ierr = PetscPrintf(comm,
+                       "Warning! -stab supg not implemented for the shocktube problem. \n");
+    CHKERRQ(ierr);
+  }
+  if (yzb && implicit) {
+    ierr = PetscPrintf(comm,
+                       "Warning! -yzb only implemented for explicit timestepping. \n");
+    CHKERRQ(ierr);
+  }
+
+
+  PetscOptionsEnd();
+
+  // ------------------------------------------------------
+  //           Set up the PETSc context
+  // ------------------------------------------------------
+  user->units->meter  = meter;
+  user->units->second = second;
+
+  // ------------------------------------------------------
+  //           Set up the libCEED context
+  // ------------------------------------------------------
+  // -- Scale variables to desired units
+  for (int i=0; i<3; i++) {
+    domain_size[i] *= meter;
+    domain_min[i] *= meter;
+  }
+  problem->dm_scale = meter;
+  CeedScalar mid_point = 0.5*(domain_size[0]+domain_min[0]);
+
+  // -- Setup Context
+  setup_context->lx        = domain_size[0];
+  setup_context->ly        = domain_size[1];
+  setup_context->lz        = domain_size[2];
+  setup_context->mid_point = mid_point;
+  setup_context->time      = 0.0;
+  setup_context->P_high    = P_high;
+  setup_context->rho_high  = rho_high;
+  setup_context->P_low     = P_low;
+  setup_context->rho_low   = rho_low;
+
+  // -- QFunction Context
+  user->phys->implicit                      = implicit;
+  user->phys->has_curr_time                 = has_curr_time;
+  user->phys->shocktube_ctx->implicit       = implicit;
+  user->phys->shocktube_ctx->stabilization  = stab;
+  user->phys->shocktube_ctx->yzb            = yzb;
+  user->phys->shocktube_ctx->Cyzb           = Cyzb;
+  user->phys->shocktube_ctx->Byzb           = Byzb;
+  user->phys->shocktube_ctx->c_tau          = c_tau;
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupContext_SHOCKTUBE(Ceed ceed, CeedData ceed_data,
+                                      AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
+  PetscFunctionBeginUser;
+
+  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
+  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
+  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
+  CeedQFunctionContextCreate(ceed, &ceed_data->shocktube_context);
+  CeedQFunctionContextSetData(ceed_data->shocktube_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*phys->shocktube_ctx), phys->shocktube_ctx);
+  if (ceed_data->qf_rhs_vol)
+    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->shocktube_context);
+  if (ceed_data->qf_ifunction_vol)
+    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
+                            ceed_data->shocktube_context);
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode PRINT_SHOCKTUBE(Physics phys, SetupContext setup_ctx,
+                               AppCtx app_ctx) {
+  MPI_Comm       comm = PETSC_COMM_WORLD;
+  PetscErrorCode ierr;
+  PetscFunctionBeginUser;
+
+  ierr = PetscPrintf(comm,
+                     "  Problem:\n"
+                     "    Problem Name                       : %s\n",
+                     app_ctx->problem_name); CHKERRQ(ierr);
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index 9d9e2bdbfa..2ed50f8335 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -32,6 +32,11 @@ struct SetupContext_ {
   CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
+  CeedScalar mid_point;
+  CeedScalar P_high;
+  CeedScalar rho_high;
+  CeedScalar P_low;
+  CeedScalar rho_low;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index 08c4dc163f..11e00dc5a8 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -36,6 +36,11 @@ struct SetupContext_ {
   CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
+  CeedScalar mid_point;
+  CeedScalar P_high;
+  CeedScalar rho_high;
+  CeedScalar P_low;
+  CeedScalar rho_low;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h
index 3aed68e123..4b8691e3ab 100644
--- a/examples/fluids/qfunctions/densitycurrent.h
+++ b/examples/fluids/qfunctions/densitycurrent.h
@@ -41,6 +41,11 @@ struct SetupContext_ {
   CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
+  CeedScalar mid_point;
+  CeedScalar P_high;
+  CeedScalar rho_high;
+  CeedScalar P_low;
+  CeedScalar rho_low;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h
new file mode 100644
index 0000000000..c9e0c9498b
--- /dev/null
+++ b/examples/fluids/qfunctions/shocktube.h
@@ -0,0 +1,481 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Shock tube initial condition and Euler equation operator for Navier-Stokes
+/// example using PETSc - modified from eulervortex.h
+
+// Model from:
+//   On the Order of Accuracy and Numerical Performance of Two Classes of
+//   Finite Volume WENO Schemes, Zhang, Zhang, and Shu (2011).
+
+#ifndef shocktube_h
+#define shocktube_h
+
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#ifndef setup_context_struct
+#define setup_context_struct
+typedef struct SetupContext_ *SetupContext;
+struct SetupContext_ {
+  CeedScalar theta0;
+  CeedScalar thetaC;
+  CeedScalar P0;
+  CeedScalar N;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar g[3];
+  CeedScalar rc;
+  CeedScalar lx;
+  CeedScalar ly;
+  CeedScalar lz;
+  CeedScalar center[3];
+  CeedScalar dc_axis[3];
+  CeedScalar wind[3];
+  CeedScalar time;
+  CeedScalar mid_point;
+  CeedScalar P_high;
+  CeedScalar rho_high;
+  CeedScalar P_low;
+  CeedScalar rho_low;
+  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
+  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
+  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
+};
+#endif
+
+#ifndef shocktube_context_struct
+#define shocktube_context_struct
+typedef struct ShockTubeContext_ *ShockTubeContext;
+struct ShockTubeContext_ {
+  CeedScalar Cyzb;
+  CeedScalar Byzb;
+  CeedScalar c_tau;
+  bool implicit;
+  bool yzb;
+  int stabilization;
+};
+#endif
+
+// *****************************************************************************
+// This function sets the initial conditions
+//
+//   Temperature:
+//     T   = P / (rho * R)
+//   Density:
+//     rho = 1.0        if x <= mid_point
+//         = 0.125      if x >  mid_point
+//   Pressure:
+//     P   = 1.0        if x <= mid_point
+//         = 0.1        if x >  mid_point
+//   Velocity:
+//     u   = 0
+//   Velocity/Momentum Density:
+//     Ui  = rho ui
+//   Total Energy:
+//     E   = P / (gamma - 1) + rho (u u)/2
+//
+// Constants:
+//   cv              ,  Specific heat, constant volume
+//   cp              ,  Specific heat, constant pressure
+//   mid_point       ,  Location of initial domain mid_point
+//   gamma  = cp / cv,  Specific heat ratio
+//
+// *****************************************************************************
+
+// *****************************************************************************
+// This helper function provides support for the exact, time-dependent solution
+//   (currently not implemented) and IC formulation for Euler traveling vortex
+// *****************************************************************************
+CEED_QFUNCTION_HELPER int Exact_ShockTube(CeedInt dim, CeedScalar time,
+    const CeedScalar X[], CeedInt Nf, CeedScalar q[],
+    void *ctx) {
+
+  // Context
+  const SetupContext context = (SetupContext)ctx;
+  const CeedScalar mid_point = context->mid_point;      // Midpoint of the domain
+  const CeedScalar P_high = context->P_high;            // Driver section pressure
+  const CeedScalar rho_high = context->rho_high;        // Driver section density
+  const CeedScalar P_low = context->P_low;              // Driven section pressure
+  const CeedScalar rho_low = context->rho_low;          // Driven section density
+
+  // Setup
+  const CeedScalar gamma = 1.4;    // ratio of specific heats
+  const CeedScalar x     = X[0];   // Coordinates
+
+  CeedScalar rho, P, u[3] = {0.};
+
+  // Initial Conditions
+  if (x <= mid_point) {
+    rho = rho_high;
+    P   = P_high;
+  } else {
+    rho = rho_low;
+    P   = P_low;
+  }
+
+  // Assign exact solution
+  q[0] = rho;
+  q[1] = rho * u[0];
+  q[2] = rho * u[1];
+  q[3] = rho * u[2];
+  q[4] = P / (gamma-1.0) + rho * (u[0]*u[0]) / 2.;
+
+  // Return
+  return 0;
+}
+
+// *****************************************************************************
+// Helper function for computing flux Jacobian
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void ConvectiveFluxJacobian_Euler(CeedScalar dF[3][5][5],
+    const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
+    const CeedScalar gamma) {
+  CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
+  for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
+    for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
+      dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2.)) : 0.) - u[i]*u[j];
+      for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
+        dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
+        dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
+                          ((i==k) ? u[j] : 0.) -
+                          ((i==j) ? u[k] : 0.) * (gamma-1.);
+        dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
+                          (gamma-1.)*u[i]*u[k];
+      }
+      dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
+    }
+    dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
+    dF[i][4][4] = u[i] * gamma;
+  }
+}
+
+// *****************************************************************************
+// Helper function for calculating the covariant length scale in the direction
+// of some 3 element input vector
+//
+// Where
+//  vec         = vector that length is measured in the direction of
+//  h           = covariant element length along vec
+// *****************************************************************************
+CEED_QFUNCTION_HELPER CeedScalar Covariant_length_along_vector(
+  CeedScalar vec[3], const CeedScalar dXdx[3][3]) {
+
+  CeedScalar vec_norm = sqrt(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]);
+  CeedScalar vec_dot_jacobian[3] = {0.0};
+  for (CeedInt i=0; i<3; i++) {
+    for (CeedInt j=0; j<3; j++) {
+      vec_dot_jacobian[i] += dXdx[j][i]*vec[i];
+    }
+  }
+  CeedScalar norm_vec_dot_jacobian = sqrt(vec_dot_jacobian[0]*vec_dot_jacobian[0]+
+                                          vec_dot_jacobian[1]*vec_dot_jacobian[1]+
+                                          vec_dot_jacobian[2]*vec_dot_jacobian[2]);
+  CeedScalar h = 2.0 * vec_norm / norm_vec_dot_jacobian;
+  return h;
+}
+
+
+// *****************************************************************************
+// Helper function for computing Tau elements (stabilization constant)
+//   Model from:
+//     Stabilized Methods for Compressible Flows, Hughes et al 2010
+//
+//   Spatial criterion #2 - Tau is a 3x3 diagonal matrix
+//   Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum)
+//
+// Where
+//   c_tau     = stabilization constant (0.5 is reported as "optimal")
+//   h[i]      = 2 length(dxdX[i])
+//   Pe        = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity )
+//   Xi(Pe)    = coth Pe - 1. / Pe (1. at large local Peclet number )
+//   rho(A[i]) = spectral radius of the convective flux Jacobian i,
+//               wave speed in direction i
+// *****************************************************************************
+CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
+                                       const CeedScalar dXdx[3][3], const CeedScalar u[3],
+                                       const CeedScalar sound_speed, const CeedScalar c_tau) {
+  for (int i=0; i<3; i++) {
+    // length of element in direction i
+    CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
+                            dXdx[2][i]*dXdx[2][i]);
+    // fastest wave in direction i
+    CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
+    Tau_x[i] = c_tau * h / fastest_wave;
+  }
+}
+
+// *****************************************************************************
+// This QFunction sets the initial conditions for shock tube
+// *****************************************************************************
+CEED_QFUNCTION(ICsShockTube)(void *ctx, CeedInt Q,
+                             const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
+
+  // Outputs
+  CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    const CeedScalar x[] = {X[0][i], X[1][i], X[2][i]};
+    CeedScalar q[5];
+
+    Exact_ShockTube(3, 0., x, 5, q, ctx);
+
+    for (CeedInt j=0; j<5; j++)
+      q0[j][i] = q[j];
+  } // End of Quadrature Point Loop
+
+  // Return
+  return 0;
+}
+
+// *****************************************************************************
+// This QFunction implements the following formulation of Euler equations
+//   with explicit time stepping method
+//
+// This is 3D Euler for compressible gas dynamics in conservation
+//   form with state variables of density, momentum density, and total
+//   energy density.
+//
+// State Variables: q = ( rho, U1, U2, U3, E )
+//   rho - Mass Density
+//   Ui  - Momentum Density,      Ui = rho ui
+//   E   - Total Energy Density,  E  = P / (gamma - 1) + rho (u u)/2
+//
+// Euler Equations:
+//   drho/dt + div( U )                   = 0
+//   dU/dt   + div( rho (u x u) + P I3 )  = 0
+//   dE/dt   + div( (E + P) u )           = 0
+//
+// Equation of State:
+//   P = (gamma - 1) (E - rho (u u) / 2)
+//
+// Constants:
+//   cv              ,  Specific heat, constant volume
+//   cp              ,  Specific heat, constant pressure
+//   g               ,  Gravity
+//   gamma  = cp / cv,  Specific heat ratio
+// *****************************************************************************
+CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
+                               const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
+                   (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
+                   (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
+             (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
+
+  const CeedScalar gamma = 1.4;
+
+  ShockTubeContext context = (ShockTubeContext)ctx;
+  const CeedScalar Cyzb  = context->Cyzb;
+  const CeedScalar Byzb  = context->Byzb;
+  const CeedScalar c_tau = context->c_tau;
+
+  CeedPragmaSIMD
+  // Quadrature Point Loop
+  for (CeedInt i=0; i<Q; i++) {
+    // *INDENT-OFF*
+    // Setup
+    // -- Interp in
+    const CeedScalar rho        =   q[0][i];
+    const CeedScalar u[3]       =  {q[1][i] / rho,
+                                    q[2][i] / rho,
+                                    q[3][i] / rho
+                                   };
+    const CeedScalar E          =   q[4][i];
+    const CeedScalar drho[3]    =  {dq[0][0][i],
+                                    dq[1][0][i],
+                                    dq[2][0][i]
+                                   };
+    const CeedScalar dU[3][3]   = {{dq[0][1][i],
+                                    dq[1][1][i],
+                                    dq[2][1][i]},
+                                   {dq[0][2][i],
+                                    dq[1][2][i],
+                                    dq[2][2][i]},
+                                   {dq[0][3][i],
+                                    dq[1][3][i],
+                                    dq[2][3][i]}
+                                  };
+    const CeedScalar dE[3]      =  {dq[0][4][i],
+                                    dq[1][4][i],
+                                    dq[2][4][i]
+                                   };
+    // -- Interp-to-Interp q_data
+    const CeedScalar wdetJ      =   q_data[0][i];
+    // -- Interp-to-Grad q_data
+    // ---- Inverse of change of coordinate matrix: X_i,j
+    // *INDENT-OFF*
+    const CeedScalar dXdx[3][3] = {{q_data[1][i],
+                                    q_data[2][i],
+                                    q_data[3][i]},
+                                   {q_data[4][i],
+                                    q_data[5][i],
+                                    q_data[6][i]},
+                                   {q_data[7][i],
+                                    q_data[8][i],
+                                    q_data[9][i]}
+                                  };
+    // dU/dx
+    CeedScalar du[3][3] = {{0}};
+    CeedScalar drhodx[3] = {0};
+    CeedScalar dEdx[3] = {0};
+    CeedScalar dUdx[3][3] = {{0}};
+    CeedScalar dXdxdXdxT[3][3] = {{0}};
+    for (int j=0; j<3; j++) {
+      for (int k=0; k<3; k++) {
+        du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
+        drhodx[j] += drho[k] * dXdx[k][j];
+        dEdx[j] += dE[k] * dXdx[k][j];
+        for (int l=0; l<3; l++) {
+          dUdx[j][k] += dU[j][l] * dXdx[l][k];
+          dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
+        }
+      }
+    }
+
+    // *INDENT-ON*
+    const CeedScalar
+    E_kinetic  = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
+    E_internal = E - E_kinetic,
+    P          = E_internal * (gamma - 1); // P = pressure
+
+    // The Physics
+    // Zero v and dv so all future terms can safely sum into it
+    for (int j=0; j<5; j++) {
+      v[j][i] = 0;
+      for (int k=0; k<3; k++)
+        dv[k][j][i] = 0;
+    }
+
+    // -- Density
+    // ---- u rho
+    for (int j=0; j<3; j++)
+      dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
+                             rho*u[2]*dXdx[j][2]);
+    // -- Momentum
+    // ---- rho (u x u) + P I3
+    for (int j=0; j<3; j++)
+      for (int k=0; k<3; k++)
+        dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
+                                 (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
+                                 (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
+    // -- Total Energy Density
+    // ---- (E + P) u
+    for (int j=0; j<3; j++)
+      dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
+                                         u[2]*dXdx[j][2]);
+
+    // -- YZB stabilization
+    if (context->yzb) {
+      CeedScalar drho_norm = 0.0;         // magnitude of the density gradient
+      CeedScalar j_vec[3] = {0.0};        // unit vector aligned with the density gradient
+      CeedScalar h_shock = 0.0;           // element lengthscale
+      CeedScalar acoustic_vel = 0.0;      // characteristic velocity, acoustic speed
+      CeedScalar tau_shock = 0.0;         // timescale
+      CeedScalar nu_shock = 0.0;          // artificial diffusion
+
+      // Unit vector aligned with the density gradient
+      drho_norm = sqrt(drhodx[0]*drhodx[0] + drhodx[1]*drhodx[1] +
+                       drhodx[2]*drhodx[2]);
+      for (int j=0; j<3; j++)
+        j_vec[j] = drhodx[j] / (drho_norm + 1e-20);
+
+      if (drho_norm == 0.0) {
+        nu_shock = 0.0;
+      } else {
+        h_shock = Covariant_length_along_vector(j_vec, dXdx);
+        h_shock /= Cyzb;
+        acoustic_vel = sqrt(gamma*P/rho);
+        tau_shock = h_shock / (2*acoustic_vel) * pow(drho_norm * h_shock / rho, Byzb);
+        nu_shock = fabs(tau_shock * acoustic_vel * acoustic_vel);
+      }
+
+      for (int j=0; j<3; j++)
+        dv[j][0][i] -= wdetJ * nu_shock * drhodx[j];
+
+      for (int k=0; k<3; k++)
+        for (int j=0; j<3; j++)
+          dv[j][k][i] -= wdetJ * nu_shock * du[k][j];
+
+      for (int j=0; j<3; j++)
+        dv[j][4][i] -= wdetJ * nu_shock * dEdx[j];
+    }
+
+    // Stabilization
+    // Need the Jacobian for the advective fluxes for stabilization
+    //    indexed as: jacob_F_conv[direction][flux component][solution component]
+    CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
+    ConvectiveFluxJacobian_Euler(jacob_F_conv, rho, u, E, gamma);
+
+
+    // dqdx collects drhodx, dUdx and dEdx in one vector
+    CeedScalar dqdx[5][3];
+    for (int j=0; j<3; j++) {
+      dqdx[0][j] = drhodx[j];
+      dqdx[4][j] = dEdx[j];
+      for (int k=0; k<3; k++)
+        dqdx[k+1][j] = dUdx[k][j];
+    }
+
+    // strong_conv = dF/dq * dq/dx    (Strong convection)
+    CeedScalar strong_conv[5] = {0};
+    for (int j=0; j<3; j++)
+      for (int k=0; k<5; k++)
+        for (int l=0; l<5; l++)
+          strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
+
+    // Stabilization
+    // -- Tau elements
+    const CeedScalar sound_speed = sqrt(gamma * P / rho);
+    CeedScalar Tau_x[3] = {0.};
+    Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau);
+
+    CeedScalar stab[5][3] = {0};
+    switch (context->stabilization) {
+    case 0:        // Galerkin
+      break;
+    case 1:        // SU
+      for (int j=0; j<3; j++)
+        for (int k=0; k<5; k++)
+          for (int l=0; l<5; l++) {
+            stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
+          }
+      for (int j=0; j<5; j++)
+        for (int k=0; k<3; k++)
+          dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
+                                stab[j][1] * dXdx[k][1] +
+                                stab[j][2] * dXdx[k][2]);
+      break;
+    }
+
+  } // End Quadrature Point Loop
+
+  // Return
+  return 0;
+}
+
+#endif // shocktube_h
diff --git a/examples/fluids/src/cloptions.c b/examples/fluids/src/cloptions.c
index ce049d2ed4..fd1471912d 100644
--- a/examples/fluids/src/cloptions.c
+++ b/examples/fluids/src/cloptions.c
@@ -24,6 +24,9 @@ PetscErrorCode RegisterProblems_NS(AppCtx app_ctx) {
   ierr = PetscFunctionListAdd(&app_ctx->problems, "euler_vortex",
                               NS_EULER_VORTEX); CHKERRQ(ierr);
 
+  ierr = PetscFunctionListAdd(&app_ctx->problems, "shocktube",
+                              NS_SHOCKTUBE); CHKERRQ(ierr);
+
   ierr = PetscFunctionListAdd(&app_ctx->problems, "advection",
                               NS_ADVECTION); CHKERRQ(ierr);
 
diff --git a/examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin b/examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b42c972cc57920d88156b70efc7b01b5c3964819
GIT binary patch
literal 4840
zcmcJSeN0nV7>7?Oeh`BKL7mI2GUGtlTE!`fZ4b-brsC+FR1w`Wm1t0>$;J>zbua{G
z*mPq3*pTV`MpU+`kY;5;#mx~1#gA!KHnaqmXlxY&xB=b0=e+$qJ(Di6|1^E_=6BC+
z?)&6-QCM~oLII1g4UtbmAo-FwejAIXOh?PITNu7o8l966oWbFl9G=DDIUK&hPu}+&
z{fdcbc2h7XznH@}Go0oO<Zvm=2WijM1=yY3Y8OAf%DwfWvWDOq;0RN$Q9T<Tc4>+H
za)Ol1EvNj{o2r7e2^Z~>KPj(1Pq<~zK$Wd7;O5=*lZ3a$j1zf>X;qwBYrOtx%O5vI
z-r4bPqriLCzx19#T~&K5HFL%_f%p8jX}eM2YfL}vXuv~fAJS$QrwBZ^HFf4#Y&EZG
zeNfwr(-x-eF33j-QcFMO+uAaG_gDEl;YakiFl+1NUQ>R81h;H>d6x{ggswPlJ#!Fm
zlUPGn>^I}C@_G3gSBK*pj|-=j<eTso91~l)rwH2;W~K%iHsZ$OqT<9U?Kro*BUU<o
zDQ-!dF<85^6aO+<@%oOb&3N^X?V7dw)Yv?yz5d}Ri*U{D{T+&+?O3B$oQUl1!i}RV
zDt>Qk!!-es&C4?;;*f<OmfhFaVN*cYX-&{sT!Z>Ayz|UT_0hZbpgY}_*n|eQ#YZ0&
zI10I17o>irafrhFexSh7@U4|{k)sy^>s41|BMoE6sYQ+^O}$SzN*thCYHkck>_3li
zls<@Zl&5^4Ihhe9E2A74fJ>Ag=&l-0rGx_y0FI1{;|gStRwCdM7bp95fWu2T{1C&_
z$a7B5kB}>$@@1U)EDqN(d@(s^8^dY-Y7XDb;k@(2de1w5^SqT!yw&R5Gng}O6#tJ~
z4>`P@!#f#H@^y&eG{46`-sESF4{wp{HVtF0OV7!2I?sBGa@JdU$Fd6Fh<fbIr+%_4
z55``oz94b@Z*JZiIsc6O!U*CayWQcfB*9y+E<2DBPPvD-r2VYrV+eP7Yl`Ttye$!C
zkqh3s<l(KX2i*eq;jPjVfxCOl4&G9Lw~m6hj)S*yz*`FN)>-h@E%26-daD3|xAua!
z_ET?74F_+nqu$yTLA@0p2;Ry7Z*2l^?WW%9nS-gf1h2SzYsV~~-m-k&B|P`6x9rL3
z;ywu8QV@TDx9Y7DE7=oIZ`}yhpZS7t4{vGz)~`}J_R{68wPVutn{ym{?DE!J@p%|G
zisra^B~jcvT|9@s$na$1r|}G@IdeGtGY;R&aGJlK;WYm+!|A-^eqFw#^VQ5e<-GH}
z<~{HH?XS0}&+a+*&*3x5Z5-~{OIM%cd5h-sa&o$Tb07ZW<xsB-=H$pY{ONh?(7wz^
zt3R3r*LCyObC2Tfqin=Op5B`Nb&8>wa!+rCSQMp%dwc8K=~(29x7N)Ub9{L#Q{cY5
zC4Yjq<X+xt^75AC3Euj+kKQT;Z@q<mdrRfpTRXoKo_p3?JwGRj`{3=ZhKO3)hdjM?
z-G<9@=>E9(E$!U4yxT)4ck5eg4gZ9WsOSXl@>cRtMQw%U3UJqc)4h-8@CXiPy+v~{
zXP)ghbe`>7l(T({a@J>*ui{+S;dSr(P2<e_-M5r<@Bi}NW93TH!x(P?XS@ZR@fPK*
zZ(yGB)_-$<b97<8?@Pyc3(m=S3-};T4%4?_zQ1$c(YMN%CfkNRybs*XTT$2dESgqH
zJmm70M(A7GBhQbJQSRxjIAbK~Rdz3LZH&GAl{hbW>#|4R(k-|saM4@U@SbR*?}=q5
zfxCI@Jc7QJ4&Ew<zEul-Yd!QW6?p45^sQ#_))MGj6Tn+l(6`P)-<kw{>r3!fBJ{0D
z=vybCZ=Hs|wGX`YJ*M93^z2(Z^L=`&C3>B(AB?xg$H?{K-ne<IL3~fN-0QkV_Q}&*
zmI1oO-GqDetp&~ZTIX(~+|66Mb1^n!$xpzY-ck;)(u}Um{RX&mkA=5V^64u#P9yz{
cc^~vQhc^%T2mL$``9Fos|9xM)^El4&FNQ_O2mk;8

literal 0
HcmV?d00001


From 381e65939e85104561074440c4dd3dd99bd0efff Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 5 May 2022 08:35:39 -0600
Subject: [PATCH 33/59] op - include num_elem and num_qpts in view

---
 interface/ceed-operator.c         | 6 ++++++
 julia/LibCEED.jl/test/runtests.jl | 1 +
 python/tests/output/test_504.out  | 2 ++
 python/tests/output/test_523.out  | 4 ++++
 tests/output/t504-operator-f.out  | 2 ++
 tests/output/t504-operator.out    | 2 ++
 tests/output/t523-operator-f.out  | 4 ++++
 tests/output/t523-operator.out    | 4 ++++
 8 files changed, 25 insertions(+)

diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index 37aade4069..8dbe51f58e 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -172,8 +172,14 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) {
   int ierr;
   const char *pre = sub ? "  " : "";
 
+  CeedInt num_elem, num_qpts;
+  ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChk(ierr);
+  ierr = CeedOperatorGetNumQuadraturePoints(op, &num_qpts); CeedChk(ierr);
+
   CeedInt total_fields = 0;
   ierr = CeedOperatorGetNumArgs(op, &total_fields); CeedChk(ierr);
+  fprintf(stream, "%s  %d elements with %d quadrature points each\n",
+          pre, num_elem, num_qpts);
 
   fprintf(stream, "%s  %d Field%s\n", pre, total_fields,
           total_fields>1 ? "s" : "");
diff --git a/julia/LibCEED.jl/test/runtests.jl b/julia/LibCEED.jl/test/runtests.jl
index 0473802cd0..6fa8b2c6d3 100644
--- a/julia/LibCEED.jl/test/runtests.jl
+++ b/julia/LibCEED.jl/test/runtests.jl
@@ -298,6 +298,7 @@ else
             )
             @test showstr(op) == """
                 CeedOperator
+                  1 elements with 27 quadrature points each
                   2 Fields
                   1 Input Field:
                     Input Field [0]:
diff --git a/python/tests/output/test_504.out b/python/tests/output/test_504.out
index 5b7190df5e..9361d24566 100644
--- a/python/tests/output/test_504.out
+++ b/python/tests/output/test_504.out
@@ -1,4 +1,5 @@
 CeedOperator
+  15 elements with 8 quadrature points each
   3 Fields
   2 Input Fields:
     Input Field [0]:
@@ -14,6 +15,7 @@ CeedOperator
       Active vector
 
 CeedOperator
+  15 elements with 8 quadrature points each
   3 Fields
   2 Input Fields:
     Input Field [0]:
diff --git a/python/tests/output/test_523.out b/python/tests/output/test_523.out
index b709e59849..af833a3206 100644
--- a/python/tests/output/test_523.out
+++ b/python/tests/output/test_523.out
@@ -1,5 +1,6 @@
 Composite CeedOperator
   SubOperator [0]:
+    6 elements with 4 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -13,6 +14,7 @@ Composite CeedOperator
         Name: "rho"
         Collocated basis
   SubOperator [1]:
+    6 elements with 16 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -28,6 +30,7 @@ Composite CeedOperator
 
 Composite CeedOperator
   SubOperator [0]:
+    6 elements with 4 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -41,6 +44,7 @@ Composite CeedOperator
         Name: "v"
         Active vector
   SubOperator [1]:
+    6 elements with 16 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
diff --git a/tests/output/t504-operator-f.out b/tests/output/t504-operator-f.out
index 7a3265af0c..168a7794d9 100644
--- a/tests/output/t504-operator-f.out
+++ b/tests/output/t504-operator-f.out
@@ -1,4 +1,5 @@
 CeedOperator
+  15 elements with 8 quadrature points each
   3 Fields
   2 Input Fields:
     Input Field [0]:
@@ -13,6 +14,7 @@ CeedOperator
       Collocated basis
       Active vector
 CeedOperator
+  15 elements with 8 quadrature points each
   3 Fields
   2 Input Fields:
     Input Field [0]:
diff --git a/tests/output/t504-operator.out b/tests/output/t504-operator.out
index 7a3265af0c..168a7794d9 100644
--- a/tests/output/t504-operator.out
+++ b/tests/output/t504-operator.out
@@ -1,4 +1,5 @@
 CeedOperator
+  15 elements with 8 quadrature points each
   3 Fields
   2 Input Fields:
     Input Field [0]:
@@ -13,6 +14,7 @@ CeedOperator
       Collocated basis
       Active vector
 CeedOperator
+  15 elements with 8 quadrature points each
   3 Fields
   2 Input Fields:
     Input Field [0]:
diff --git a/tests/output/t523-operator-f.out b/tests/output/t523-operator-f.out
index 49528cb6e6..b199df2f0d 100644
--- a/tests/output/t523-operator-f.out
+++ b/tests/output/t523-operator-f.out
@@ -1,5 +1,6 @@
 Composite CeedOperator
   SubOperator [0]:
+    6 elements with 4 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -13,6 +14,7 @@ Composite CeedOperator
         Name: "rho"
         Collocated basis
   SubOperator [1]:
+    6 elements with 16 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -27,6 +29,7 @@ Composite CeedOperator
         Collocated basis
 Composite CeedOperator
   SubOperator [0]:
+    6 elements with 4 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -40,6 +43,7 @@ Composite CeedOperator
         Name: "v"
         Active vector
   SubOperator [1]:
+    6 elements with 16 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
diff --git a/tests/output/t523-operator.out b/tests/output/t523-operator.out
index 49528cb6e6..b199df2f0d 100644
--- a/tests/output/t523-operator.out
+++ b/tests/output/t523-operator.out
@@ -1,5 +1,6 @@
 Composite CeedOperator
   SubOperator [0]:
+    6 elements with 4 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -13,6 +14,7 @@ Composite CeedOperator
         Name: "rho"
         Collocated basis
   SubOperator [1]:
+    6 elements with 16 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -27,6 +29,7 @@ Composite CeedOperator
         Collocated basis
 Composite CeedOperator
   SubOperator [0]:
+    6 elements with 4 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:
@@ -40,6 +43,7 @@ Composite CeedOperator
         Name: "v"
         Active vector
   SubOperator [1]:
+    6 elements with 16 quadrature points each
     3 Fields
     2 Input Fields:
       Input Field [0]:

From 4b62541956ab0845c36cb260ae3f20ae6805fd3f Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 5 May 2022 08:47:26 -0600
Subject: [PATCH 34/59] julia - move OperatorView test to Dev

---
 julia/LibCEED.jl/test/rundevtests.jl | 38 +++++++++++++++++++++++++++-
 julia/LibCEED.jl/test/runtests.jl    | 12 ---------
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/julia/LibCEED.jl/test/rundevtests.jl b/julia/LibCEED.jl/test/rundevtests.jl
index 75d0f78410..111c139f4b 100644
--- a/julia/LibCEED.jl/test/rundevtests.jl
+++ b/julia/LibCEED.jl/test/rundevtests.jl
@@ -1,3 +1,39 @@
 using Test, LibCEED, LinearAlgebra, StaticArrays
 
-@testset "LibCEED Development Tests" begin end
+@testset "LibCEED Development Tests" begin
+    @testset "Operator" begin
+        c = Ceed()
+        @interior_qf id = (
+            c,
+            (input, :in, EVAL_INTERP),
+            (output, :out, EVAL_INTERP),
+            begin
+                output[] = input
+            end,
+        )
+        b = create_tensor_h1_lagrange_basis(c, 3, 1, 3, 3, GAUSS_LOBATTO)
+        n = getnumnodes(b)
+        offsets = Vector{CeedInt}(0:n-1)
+        r = create_elem_restriction(c, 1, n, 1, 1, n, offsets)
+        op = Operator(
+            c;
+            qf=id,
+            fields=[
+                (:input, r, b, CeedVectorActive()),
+                (:output, r, b, CeedVectorActive()),
+            ],
+        )
+        @test showstr(op) == """
+             CeedOperator
+               1 elements with 27 quadrature points each
+               2 Fields
+               1 Input Field:
+                 Input Field [0]:
+                   Name: "input"
+                   Active vector
+               1 Output Field:
+                 Output Field [0]:
+                   Name: "output"
+                   Active vector"""
+    end
+end
diff --git a/julia/LibCEED.jl/test/runtests.jl b/julia/LibCEED.jl/test/runtests.jl
index 6fa8b2c6d3..1c87f965fd 100644
--- a/julia/LibCEED.jl/test/runtests.jl
+++ b/julia/LibCEED.jl/test/runtests.jl
@@ -296,18 +296,6 @@ else
                     (:output, r, b, CeedVectorActive()),
                 ],
             )
-            @test showstr(op) == """
-                CeedOperator
-                  1 elements with 27 quadrature points each
-                  2 Fields
-                  1 Input Field:
-                    Input Field [0]:
-                      Name: "input"
-                      Active vector
-                  1 Output Field:
-                    Output Field [0]:
-                      Name: "output"
-                      Active vector"""
 
             v = rand(CeedScalar, n)
             v1 = CeedVector(c, v)

From b8bf0bca2754e3c4d98d2d0a42f776e4e8976ea4 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 6 May 2022 07:54:45 -0600
Subject: [PATCH 35/59] op/qf - consistent viewing ouput

---
 interface/ceed-operator.c            | 10 ++---
 interface/ceed-qfunction.c           |  6 +--
 julia/LibCEED.jl/test/rundevtests.jl | 30 ++++++++++++---
 julia/LibCEED.jl/test/runtests.jl    | 17 ---------
 python/tests/output/test_402.out     | 20 +++++-----
 python/tests/output/test_413.out     | 20 +++++-----
 python/tests/output/test_504.out     | 24 ++++++------
 python/tests/output/test_523.out     | 56 ++++++++++++++--------------
 tests/output/t402-qfunction-f.out    | 18 ++++-----
 tests/output/t402-qfunction.out      | 18 ++++-----
 tests/output/t413-qfunction-f.out    | 20 +++++-----
 tests/output/t413-qfunction.out      | 20 +++++-----
 tests/output/t504-operator-f.out     | 24 ++++++------
 tests/output/t504-operator.out       | 24 ++++++------
 tests/output/t523-operator-f.out     | 56 ++++++++++++++--------------
 tests/output/t523-operator.out       | 56 ++++++++++++++--------------
 16 files changed, 211 insertions(+), 208 deletions(-)

diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index 8dbe51f58e..2a450e0d70 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -143,7 +143,7 @@ static int CeedOperatorFieldView(CeedOperatorField field,
   const char *pre = sub ? "  " : "";
   const char *in_out = input ? "Input" : "Output";
 
-  fprintf(stream, "%s    %s Field [%d]:\n"
+  fprintf(stream, "%s    %s field %d:\n"
           "%s      Name: \"%s\"\n",
           pre, in_out, field_number, pre, qf_field->field_name);
 
@@ -181,17 +181,17 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) {
   fprintf(stream, "%s  %d elements with %d quadrature points each\n",
           pre, num_elem, num_qpts);
 
-  fprintf(stream, "%s  %d Field%s\n", pre, total_fields,
+  fprintf(stream, "%s  %d field%s\n", pre, total_fields,
           total_fields>1 ? "s" : "");
 
-  fprintf(stream, "%s  %d Input Field%s:\n", pre, op->qf->num_input_fields,
+  fprintf(stream, "%s  %d input field%s:\n", pre, op->qf->num_input_fields,
           op->qf->num_input_fields>1 ? "s" : "");
   for (CeedInt i=0; i<op->qf->num_input_fields; i++) {
     ierr = CeedOperatorFieldView(op->input_fields[i], op->qf->input_fields[i],
                                  i, sub, 1, stream); CeedChk(ierr);
   }
 
-  fprintf(stream, "%s  %d Output Field%s:\n", pre, op->qf->num_output_fields,
+  fprintf(stream, "%s  %d output field%s:\n", pre, op->qf->num_output_fields,
           op->qf->num_output_fields>1 ? "s" : "");
   for (CeedInt i=0; i<op->qf->num_output_fields; i++) {
     ierr = CeedOperatorFieldView(op->output_fields[i], op->qf->output_fields[i],
@@ -1187,7 +1187,7 @@ int CeedOperatorView(CeedOperator op, FILE *stream) {
     fprintf(stream, "Composite CeedOperator\n");
 
     for (CeedInt i=0; i<op->num_suboperators; i++) {
-      fprintf(stream, "  SubOperator [%d]:\n", i);
+      fprintf(stream, "  SubOperator %d:\n", i);
       ierr = CeedOperatorSingleView(op->sub_operators[i], 1, stream);
       CeedChk(ierr);
     }
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 1039921656..910226c30f 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -143,7 +143,7 @@ static int CeedQFunctionFieldView(CeedQFunctionField field,
   ierr = CeedQFunctionFieldGetSize(field, &size); CeedChk(ierr);
   CeedEvalMode eval_mode;
   ierr = CeedQFunctionFieldGetEvalMode(field, &eval_mode); CeedChk(ierr);
-  fprintf(stream, "    %s Field [%d]:\n"
+  fprintf(stream, "    %s field %d:\n"
           "      Name: \"%s\"\n"
           "      Size: %d\n"
           "      EvalMode: \"%s\"\n",
@@ -972,14 +972,14 @@ int CeedQFunctionView(CeedQFunction qf, FILE *stream) {
           qf->is_gallery ? "Gallery " : "User ",
           qf->is_gallery ? qf->gallery_name : qf->kernel_name);
 
-  fprintf(stream, "  %d Input Field%s:\n", qf->num_input_fields,
+  fprintf(stream, "  %d input field%s:\n", qf->num_input_fields,
           qf->num_input_fields>1 ? "s" : "");
   for (CeedInt i=0; i<qf->num_input_fields; i++) {
     ierr = CeedQFunctionFieldView(qf->input_fields[i], i, 1, stream);
     CeedChk(ierr);
   }
 
-  fprintf(stream, "  %d Output Field%s:\n", qf->num_output_fields,
+  fprintf(stream, "  %d output field%s:\n", qf->num_output_fields,
           qf->num_output_fields>1 ? "s" : "");
   for (CeedInt i=0; i<qf->num_output_fields; i++) {
     ierr = CeedQFunctionFieldView(qf->output_fields[i], i, 0, stream);
diff --git a/julia/LibCEED.jl/test/rundevtests.jl b/julia/LibCEED.jl/test/rundevtests.jl
index 111c139f4b..dd77e7daa5 100644
--- a/julia/LibCEED.jl/test/rundevtests.jl
+++ b/julia/LibCEED.jl/test/rundevtests.jl
@@ -1,6 +1,26 @@
 using Test, LibCEED, LinearAlgebra, StaticArrays
 
 @testset "LibCEED Development Tests" begin
+    @testset "QFunction" begin
+        c = Ceed()
+        @test showstr(create_interior_qfunction(c, "Poisson3DApply")) == """
+             Gallery CeedQFunction Poisson3DApply
+               2 input fields:
+                 Input field 0:
+                   Name: "du"
+                   Size: 3
+                   EvalMode: "gradient"
+                 Input field 1:
+                   Name: "qdata"
+                   Size: 6
+                   EvalMode: "none"
+               1 output field:
+                 Output field 0:
+                   Name: "dv"
+                   Size: 3
+                   EvalMode: "gradient\""""
+    end
+
     @testset "Operator" begin
         c = Ceed()
         @interior_qf id = (
@@ -26,13 +46,13 @@ using Test, LibCEED, LinearAlgebra, StaticArrays
         @test showstr(op) == """
              CeedOperator
                1 elements with 27 quadrature points each
-               2 Fields
-               1 Input Field:
-                 Input Field [0]:
+               2 fields
+               1 input field:
+                 Input field 0:
                    Name: "input"
                    Active vector
-               1 Output Field:
-                 Output Field [0]:
+               1 output field:
+                 Output field 0:
                    Name: "output"
                    Active vector"""
     end
diff --git a/julia/LibCEED.jl/test/runtests.jl b/julia/LibCEED.jl/test/runtests.jl
index 1c87f965fd..ac45f73423 100644
--- a/julia/LibCEED.jl/test/runtests.jl
+++ b/julia/LibCEED.jl/test/runtests.jl
@@ -221,23 +221,6 @@ else
             apply!(id, Q, [v1], [v2])
             @test @witharray(a = v2, a == v)
 
-            @test showstr(create_interior_qfunction(c, "Poisson3DApply")) == """
-                Gallery CeedQFunction Poisson3DApply
-                  2 Input Fields:
-                    Input Field [0]:
-                      Name: "du"
-                      Size: 3
-                      EvalMode: "gradient"
-                    Input Field [1]:
-                      Name: "qdata"
-                      Size: 6
-                      EvalMode: "none"
-                  1 Output Field:
-                    Output Field [0]:
-                      Name: "dv"
-                      Size: 3
-                      EvalMode: "gradient\""""
-
             @interior_qf id2 = (c, (a, :in, EVAL_INTERP), (b, :out, EVAL_INTERP), b .= a)
             v2[] = 0.0
             apply!(id2, Q, [v1], [v2])
diff --git a/python/tests/output/test_402.out b/python/tests/output/test_402.out
index 38bb67d83f..ba1f4c22d9 100644
--- a/python/tests/output/test_402.out
+++ b/python/tests/output/test_402.out
@@ -1,31 +1,31 @@
 User CeedQFunction setup_mass
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "w"
       Size: 1
       EvalMode: "quadrature weights"
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 
 User CeedQFunction apply_mass
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/python/tests/output/test_413.out b/python/tests/output/test_413.out
index 01e8b7a62f..7e8806fa4c 100644
--- a/python/tests/output/test_413.out
+++ b/python/tests/output/test_413.out
@@ -1,31 +1,31 @@
 Gallery CeedQFunction Mass1DBuild
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-    Input Field [1]:
+    Input field 1:
       Name: "weights"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 
 Gallery CeedQFunction MassApply
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-    Input Field [1]:
+    Input field 1:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/python/tests/output/test_504.out b/python/tests/output/test_504.out
index 9361d24566..e956387c9b 100644
--- a/python/tests/output/test_504.out
+++ b/python/tests/output/test_504.out
@@ -1,31 +1,31 @@
 CeedOperator
   15 elements with 8 quadrature points each
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "weights"
       No vector
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "rho"
       Collocated basis
       Active vector
 
 CeedOperator
   15 elements with 8 quadrature points each
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "rho"
       Collocated basis
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Active vector
 
diff --git a/python/tests/output/test_523.out b/python/tests/output/test_523.out
index af833a3206..577676498d 100644
--- a/python/tests/output/test_523.out
+++ b/python/tests/output/test_523.out
@@ -1,60 +1,60 @@
 Composite CeedOperator
-  SubOperator [0]:
+  SubOperator 0:
     6 elements with 4 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weights"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator [1]:
+  SubOperator 1:
     6 elements with 16 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weights"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
 
 Composite CeedOperator
-  SubOperator [0]:
+  SubOperator 0:
     6 elements with 4 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
-  SubOperator [1]:
+  SubOperator 1:
     6 elements with 16 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
 
diff --git a/tests/output/t402-qfunction-f.out b/tests/output/t402-qfunction-f.out
index be66f7e3c0..62f024ffdf 100644
--- a/tests/output/t402-qfunction-f.out
+++ b/tests/output/t402-qfunction-f.out
@@ -1,26 +1,26 @@
 User CeedQFunction setup
-  1 Input Field:
-    Input Field [0]:
+  1 input field:
+    Input field 0:
       Name: "w"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 User CeedQFunction mass
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t402-qfunction.out b/tests/output/t402-qfunction.out
index be66f7e3c0..62f024ffdf 100644
--- a/tests/output/t402-qfunction.out
+++ b/tests/output/t402-qfunction.out
@@ -1,26 +1,26 @@
 User CeedQFunction setup
-  1 Input Field:
-    Input Field [0]:
+  1 input field:
+    Input field 0:
       Name: "w"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 User CeedQFunction mass
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t413-qfunction-f.out b/tests/output/t413-qfunction-f.out
index 460f580fbf..03c41e1634 100644
--- a/tests/output/t413-qfunction-f.out
+++ b/tests/output/t413-qfunction-f.out
@@ -1,30 +1,30 @@
 Gallery CeedQFunction Mass1DBuild
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-    Input Field [1]:
+    Input field 1:
       Name: "weights"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 Gallery CeedQFunction MassApply
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-    Input Field [1]:
+    Input field 1:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t413-qfunction.out b/tests/output/t413-qfunction.out
index 460f580fbf..03c41e1634 100644
--- a/tests/output/t413-qfunction.out
+++ b/tests/output/t413-qfunction.out
@@ -1,30 +1,30 @@
 Gallery CeedQFunction Mass1DBuild
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "dx"
       Size: 1
       EvalMode: "gradient"
-    Input Field [1]:
+    Input field 1:
       Name: "weights"
       Size: 1
       EvalMode: "quadrature weights"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
 Gallery CeedQFunction MassApply
-  2 Input Fields:
-    Input Field [0]:
+  2 input fields:
+    Input field 0:
       Name: "u"
       Size: 1
       EvalMode: "interpolation"
-    Input Field [1]:
+    Input field 1:
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Size: 1
       EvalMode: "interpolation"
diff --git a/tests/output/t504-operator-f.out b/tests/output/t504-operator-f.out
index 168a7794d9..91d47589d4 100644
--- a/tests/output/t504-operator-f.out
+++ b/tests/output/t504-operator-f.out
@@ -1,29 +1,29 @@
 CeedOperator
   15 elements with 8 quadrature points each
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "weight"
       No vector
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "rho"
       Collocated basis
       Active vector
 CeedOperator
   15 elements with 8 quadrature points each
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "rho"
       Collocated basis
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Active vector
diff --git a/tests/output/t504-operator.out b/tests/output/t504-operator.out
index 168a7794d9..91d47589d4 100644
--- a/tests/output/t504-operator.out
+++ b/tests/output/t504-operator.out
@@ -1,29 +1,29 @@
 CeedOperator
   15 elements with 8 quadrature points each
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "weight"
       No vector
-    Input Field [1]:
+    Input field 1:
       Name: "dx"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "rho"
       Collocated basis
       Active vector
 CeedOperator
   15 elements with 8 quadrature points each
-  3 Fields
-  2 Input Fields:
-    Input Field [0]:
+  3 fields
+  2 input fields:
+    Input field 0:
       Name: "rho"
       Collocated basis
-    Input Field [1]:
+    Input field 1:
       Name: "u"
       Active vector
-  1 Output Field:
-    Output Field [0]:
+  1 output field:
+    Output field 0:
       Name: "v"
       Active vector
diff --git a/tests/output/t523-operator-f.out b/tests/output/t523-operator-f.out
index b199df2f0d..fdae3fc01f 100644
--- a/tests/output/t523-operator-f.out
+++ b/tests/output/t523-operator-f.out
@@ -1,58 +1,58 @@
 Composite CeedOperator
-  SubOperator [0]:
+  SubOperator 0:
     6 elements with 4 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator [1]:
+  SubOperator 1:
     6 elements with 16 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
 Composite CeedOperator
-  SubOperator [0]:
+  SubOperator 0:
     6 elements with 4 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
-  SubOperator [1]:
+  SubOperator 1:
     6 elements with 16 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
diff --git a/tests/output/t523-operator.out b/tests/output/t523-operator.out
index b199df2f0d..fdae3fc01f 100644
--- a/tests/output/t523-operator.out
+++ b/tests/output/t523-operator.out
@@ -1,58 +1,58 @@
 Composite CeedOperator
-  SubOperator [0]:
+  SubOperator 0:
     6 elements with 4 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator [1]:
+  SubOperator 1:
     6 elements with 16 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "weight"
         No vector
-      Input Field [1]:
+      Input field 1:
         Name: "dx"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "rho"
         Collocated basis
 Composite CeedOperator
-  SubOperator [0]:
+  SubOperator 0:
     6 elements with 4 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector
-  SubOperator [1]:
+  SubOperator 1:
     6 elements with 16 quadrature points each
-    3 Fields
-    2 Input Fields:
-      Input Field [0]:
+    3 fields
+    2 input fields:
+      Input field 0:
         Name: "rho"
         Collocated basis
-      Input Field [1]:
+      Input field 1:
         Name: "u"
         Active vector
-    1 Output Field:
-      Output Field [0]:
+    1 output field:
+      Output field 0:
         Name: "v"
         Active vector

From ea6b58218a3c4883c2efd66165b4d6b684f89f5a Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 6 May 2022 11:50:31 -0600
Subject: [PATCH 36/59] op - add optional name for Operators

---
 doc/sphinx/source/releasenotes.md            |   2 +
 examples/rust/ex1-volume/src/main.rs         |   2 +
 examples/rust/ex2-surface/src/main.rs        |   2 +
 examples/rust/ex3-vector-volume/src/main.rs  |   2 +
 examples/rust/ex4-vector-surface/src/main.rs |   2 +
 include/ceed-impl.h                          |   1 +
 include/ceed/ceed.h                          |   1 +
 interface/ceed-fortran.c                     |  15 ++-
 interface/ceed-operator.c                    |  38 ++++++-
 interface/ceed-preconditioning.c             |  22 ++++
 interface/ceed-qfunction.c                   |   2 +-
 julia/LibCEED.jl/test/rundevtests.jl         |   2 +-
 python/ceed_operator.py                      |  11 ++
 python/tests/output/test_402.out             |   4 +-
 python/tests/output/test_413.out             |   4 +-
 python/tests/output/test_523.out             |  12 +--
 python/tests/test-5-operator.py              |   6 ++
 rust/libceed/src/operator.rs                 | 108 ++++++++++++++++++-
 tests/output/t402-qfunction-f.out            |   4 +-
 tests/output/t402-qfunction.out              |   4 +-
 tests/output/t413-qfunction-f.out            |   4 +-
 tests/output/t413-qfunction.out              |   4 +-
 tests/output/t523-operator-f.out             |  12 +--
 tests/output/t523-operator.out               |  12 +--
 tests/t523-operator-f.f90                    |   8 +-
 tests/t523-operator.c                        |   6 ++
 26 files changed, 250 insertions(+), 40 deletions(-)

diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md
index 9d5e6488ac..1c0b7a93f0 100644
--- a/doc/sphinx/source/releasenotes.md
+++ b/doc/sphinx/source/releasenotes.md
@@ -8,6 +8,8 @@ On this page we provide a summary of the main API changes, new features and exam
 
 ### Interface changes
 
+- Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output.
+
 (v0-10-1)=
 
 ## v0.10.1 (Apr 11, 2022)
diff --git a/examples/rust/ex1-volume/src/main.rs b/examples/rust/ex1-volume/src/main.rs
index 60af355708..8efda23bdc 100644
--- a/examples/rust/ex1-volume/src/main.rs
+++ b/examples/rust/ex1-volume/src/main.rs
@@ -178,6 +178,7 @@ fn example_1(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the mass operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -226,6 +227,7 @@ fn example_1(options: opt::Opt) -> libceed::Result<()> {
     // Mass Operator
     let op_mass = ceed
         .operator(qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("mass")?
         .field("u", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("v", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/rust/ex2-surface/src/main.rs b/examples/rust/ex2-surface/src/main.rs
index 8cd8a6b203..42882590a5 100644
--- a/examples/rust/ex2-surface/src/main.rs
+++ b/examples/rust/ex2-surface/src/main.rs
@@ -220,6 +220,7 @@ fn example_2(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the diff operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -305,6 +306,7 @@ fn example_2(options: opt::Opt) -> libceed::Result<()> {
     // Diff Operator
     let op_diff = ceed
         .operator(qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("Poisson")?
         .field("du", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("dv", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/rust/ex3-vector-volume/src/main.rs b/examples/rust/ex3-vector-volume/src/main.rs
index 20f3ff92ff..518aa47a52 100644
--- a/examples/rust/ex3-vector-volume/src/main.rs
+++ b/examples/rust/ex3-vector-volume/src/main.rs
@@ -187,6 +187,7 @@ fn example_3(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the mass operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -239,6 +240,7 @@ fn example_3(options: opt::Opt) -> libceed::Result<()> {
     // Mass Operator
     let op_mass = ceed
         .operator(qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("mass")?
         .field("u", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("v", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/examples/rust/ex4-vector-surface/src/main.rs b/examples/rust/ex4-vector-surface/src/main.rs
index 5583235f87..5a0c1e25dd 100644
--- a/examples/rust/ex4-vector-surface/src/main.rs
+++ b/examples/rust/ex4-vector-surface/src/main.rs
@@ -227,6 +227,7 @@ fn example_4(options: opt::Opt) -> libceed::Result<()> {
     // Operator that build the quadrature data for the diff operator
     let op_build = ceed
         .operator(qf_build, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("build qdata")?
         .field("dx", &restr_mesh, &basis_mesh, VectorOpt::Active)?
         .field(
             "weights",
@@ -326,6 +327,7 @@ fn example_4(options: opt::Opt) -> libceed::Result<()> {
     // Diff Operator
     let op_diff = ceed
         .operator(qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+        .name("Poisson")?
         .field("du", &restr_solution, &basis_solution, VectorOpt::Active)?
         .field("qdata", &restr_qdata, BasisOpt::Collocated, &qdata)?
         .field("dv", &restr_solution, &basis_solution, VectorOpt::Active)?
diff --git a/include/ceed-impl.h b/include/ceed-impl.h
index 66409ddba8..ea49f0e998 100644
--- a/include/ceed-impl.h
+++ b/include/ceed-impl.h
@@ -373,6 +373,7 @@ struct CeedOperator_private {
   CeedQFunction qf;
   CeedQFunction dqf;
   CeedQFunction dqfT;
+  const char *name;
   bool is_immutable;
   bool is_interface_setup;
   bool is_backend_setup;
diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h
index 6b87d7514f..9f46ee843c 100644
--- a/include/ceed/ceed.h
+++ b/include/ceed/ceed.h
@@ -747,6 +747,7 @@ CEED_EXTERN int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
 CEED_EXTERN int CeedOperatorCreateFDMElementInverse(CeedOperator op,
     CeedOperator *fdm_inv, CeedRequest *request);
 CEED_EXTERN int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts);
+CEED_EXTERN int CeedOperatorSetName(CeedOperator op, const char *name);
 CEED_EXTERN int CeedOperatorView(CeedOperator op, FILE *stream);
 CEED_EXTERN int CeedOperatorGetCeed(CeedOperator op, Ceed *ceed);
 CEED_EXTERN int CeedOperatorGetNumElements(CeedOperator op, CeedInt *num_elem);
diff --git a/interface/ceed-fortran.c b/interface/ceed-fortran.c
index 88f5635be0..2153518784 100644
--- a/interface/ceed-fortran.c
+++ b/interface/ceed-fortran.c
@@ -1074,8 +1074,8 @@ CEED_EXTERN void fCeedCompositeOperatorCreate(int *ceed, int *op, int *err) {
 #define fCeedOperatorSetField \
     FORTRAN_NAME(ceedoperatorsetfield,CEEDOPERATORSETFIELD)
 CEED_EXTERN void fCeedOperatorSetField(int *op, const char *field_name, int *r,
-                                       int *b,
-                                       int *v, int *err, fortran_charlen_t field_name_len) {
+                                       int *b, int *v, int *err,
+                                       fortran_charlen_t field_name_len) {
   FIX_STRING(field_name);
   CeedElemRestriction r_;
   CeedBasis b_;
@@ -1119,7 +1119,16 @@ CEED_EXTERN void fCeedCompositeOperatorAddSub(int *compositeop, int *subop,
   CeedOperator subop_ = CeedOperator_dict[*subop];
 
   *err = CeedCompositeOperatorAddSub(compositeop_, subop_);
-  if (*err) return;
+}
+
+#define fCeedOperatorSetName \
+    FORTRAN_NAME(ceedoperatorsetname, CEEDOPERATORSETNAME)
+CEED_EXTERN void fCeedOperatorSetName(int *op, const char *name, int *err,
+                                      fortran_charlen_t name_len) {
+  FIX_STRING(name);
+  CeedOperator op_ = CeedOperator_dict[*op];
+
+  *err = CeedOperatorSetName(op_, name_c);
 }
 
 #define fCeedOperatorLinearAssembleQFunction \
diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index 2a450e0d70..a715e8f5d3 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -1170,6 +1170,31 @@ int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Set name of CeedOperator for CeedOperatorView output
+
+  @param op    CeedOperator
+  @param name  Name to set, or NULL to remove previously set name
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref User
+**/
+int CeedOperatorSetName(CeedOperator op, const char *name) {
+  int ierr;
+  char *name_copy;
+  size_t name_len = name ? strlen(name) : 0;
+
+  ierr = CeedFree(&op->name); CeedChk(ierr);
+  if (name_len > 0) {
+    ierr = CeedCalloc(name_len + 1, &name_copy); CeedChk(ierr);
+    memcpy(name_copy, name, name_len);
+    op->name = name_copy;
+  }
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief View a CeedOperator
 
@@ -1182,17 +1207,23 @@ int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts) {
 **/
 int CeedOperatorView(CeedOperator op, FILE *stream) {
   int ierr;
+  bool has_name = op->name;
 
   if (op->is_composite) {
-    fprintf(stream, "Composite CeedOperator\n");
+    fprintf(stream, "Composite CeedOperator%s%s\n",
+            has_name ? " - " : "", has_name ? op->name : "");
 
     for (CeedInt i=0; i<op->num_suboperators; i++) {
-      fprintf(stream, "  SubOperator %d:\n", i);
+      has_name = op->sub_operators[i]->name;
+      fprintf(stream, "  SubOperator %d%s%s:\n", i,
+              has_name ? " - " : "",
+              has_name ? op->sub_operators[i]->name : "");
       ierr = CeedOperatorSingleView(op->sub_operators[i], 1, stream);
       CeedChk(ierr);
     }
   } else {
-    fprintf(stream, "CeedOperator\n");
+    fprintf(stream, "CeedOperator%s%s\n",
+            has_name ? " - " : "", has_name ? op->name : "");
     ierr = CeedOperatorSingleView(op, 0, stream); CeedChk(ierr);
   }
   return CEED_ERROR_SUCCESS;
@@ -1675,6 +1706,7 @@ int CeedOperatorDestroy(CeedOperator *op) {
   ierr = CeedFree(&(*op)->input_fields); CeedChk(ierr);
   ierr = CeedFree(&(*op)->output_fields); CeedChk(ierr);
   ierr = CeedFree(&(*op)->sub_operators); CeedChk(ierr);
+  ierr = CeedFree(&(*op)->name); CeedChk(ierr);
   ierr = CeedFree(op); CeedChk(ierr);
   return CEED_ERROR_SUCCESS;
 }
diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 46235e787f..24d31e90d9 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -68,6 +68,7 @@ int CeedOperatorCreateFallback(CeedOperator op) {
   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
   ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
          &op_ref->qf_assembled); CeedChk(ierr);
+  ierr = CeedOperatorSetName(op_ref, op->name); CeedChk(ierr);
   op->op_fallback = op_ref;
 
   // Clone QF
@@ -959,6 +960,27 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
   CeedChk(ierr);
 
+  // Clone name
+  bool has_name = op_fine->name;
+  size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
+  ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr);
+  {
+    char *prolongation_name;
+    ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr);
+    sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "",
+            op_fine->name);
+    ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr);
+    ierr = CeedFree(&prolongation_name); CeedChk(ierr);
+  }
+  {
+    char *restriction_name;
+    ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr);
+    sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "",
+            op_fine->name);
+    ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr);
+    ierr = CeedFree(&restriction_name); CeedChk(ierr);
+  }
+
   // Cleanup
   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 910226c30f..8e6c096785 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -968,7 +968,7 @@ int CeedQFunctionSetUserFlopsEstimate(CeedQFunction qf, CeedSize flops) {
 int CeedQFunctionView(CeedQFunction qf, FILE *stream) {
   int ierr;
 
-  fprintf(stream, "%sCeedQFunction %s\n",
+  fprintf(stream, "%sCeedQFunction - %s\n",
           qf->is_gallery ? "Gallery " : "User ",
           qf->is_gallery ? qf->gallery_name : qf->kernel_name);
 
diff --git a/julia/LibCEED.jl/test/rundevtests.jl b/julia/LibCEED.jl/test/rundevtests.jl
index dd77e7daa5..e80f9ba9dc 100644
--- a/julia/LibCEED.jl/test/rundevtests.jl
+++ b/julia/LibCEED.jl/test/rundevtests.jl
@@ -4,7 +4,7 @@ using Test, LibCEED, LinearAlgebra, StaticArrays
     @testset "QFunction" begin
         c = Ceed()
         @test showstr(create_interior_qfunction(c, "Poisson3DApply")) == """
-             Gallery CeedQFunction Poisson3DApply
+             Gallery CeedQFunction - Poisson3DApply
                2 input fields:
                  Input field 0:
                    Name: "du"
diff --git a/python/ceed_operator.py b/python/ceed_operator.py
index f487f76b3a..80412f96a0 100644
--- a/python/ceed_operator.py
+++ b/python/ceed_operator.py
@@ -108,6 +108,17 @@ def linear_assemble_add_point_block_diagonal(
                                                                        d._pointer[0], request)
         self._ceed._check_error(err_code)
 
+    # Set name
+    def name(self, name):
+        """Set name of Operator for print output
+
+           Args:
+             name: Name to set"""
+
+        name = ffi.new("char[]", name.encode('ascii'))
+        err_code = lib.CeedOperatorSetName(self._pointer[0], name)
+        self._ceed._check_error(err_code)
+
     # Apply CeedOperator
     def apply(self, u, v, request=REQUEST_IMMEDIATE):
         """Apply Operator to a vector.
diff --git a/python/tests/output/test_402.out b/python/tests/output/test_402.out
index ba1f4c22d9..e64504af50 100644
--- a/python/tests/output/test_402.out
+++ b/python/tests/output/test_402.out
@@ -1,4 +1,4 @@
-User CeedQFunction setup_mass
+User CeedQFunction - setup_mass
   2 input fields:
     Input field 0:
       Name: "w"
@@ -14,7 +14,7 @@ User CeedQFunction setup_mass
       Size: 1
       EvalMode: "none"
 
-User CeedQFunction apply_mass
+User CeedQFunction - apply_mass
   2 input fields:
     Input field 0:
       Name: "qdata"
diff --git a/python/tests/output/test_413.out b/python/tests/output/test_413.out
index 7e8806fa4c..059f32cc38 100644
--- a/python/tests/output/test_413.out
+++ b/python/tests/output/test_413.out
@@ -1,4 +1,4 @@
-Gallery CeedQFunction Mass1DBuild
+Gallery CeedQFunction - Mass1DBuild
   2 input fields:
     Input field 0:
       Name: "dx"
@@ -14,7 +14,7 @@ Gallery CeedQFunction Mass1DBuild
       Size: 1
       EvalMode: "none"
 
-Gallery CeedQFunction MassApply
+Gallery CeedQFunction - MassApply
   2 input fields:
     Input field 0:
       Name: "u"
diff --git a/python/tests/output/test_523.out b/python/tests/output/test_523.out
index 577676498d..8721114d9f 100644
--- a/python/tests/output/test_523.out
+++ b/python/tests/output/test_523.out
@@ -1,5 +1,5 @@
-Composite CeedOperator
-  SubOperator 0:
+Composite CeedOperator - setup
+  SubOperator 0 - triangle elements:
     6 elements with 4 quadrature points each
     3 fields
     2 input fields:
@@ -13,7 +13,7 @@ Composite CeedOperator
       Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator 1:
+  SubOperator 1 - quadralateral elements:
     6 elements with 16 quadrature points each
     3 fields
     2 input fields:
@@ -28,8 +28,8 @@ Composite CeedOperator
         Name: "rho"
         Collocated basis
 
-Composite CeedOperator
-  SubOperator 0:
+Composite CeedOperator - mass
+  SubOperator 0 - triangle elements:
     6 elements with 4 quadrature points each
     3 fields
     2 input fields:
@@ -43,7 +43,7 @@ Composite CeedOperator
       Output field 0:
         Name: "v"
         Active vector
-  SubOperator 1:
+  SubOperator 1 - quadralateral elements:
     6 elements with 16 quadrature points each
     3 fields
     2 input fields:
diff --git a/python/tests/test-5-operator.py b/python/tests/test-5-operator.py
index 044d77cdf2..78b455779d 100644
--- a/python/tests/test-5-operator.py
+++ b/python/tests/test-5-operator.py
@@ -1224,6 +1224,7 @@ def test_523(ceed_resource, capsys):
 
     # Operators
     op_setup_tet = ceed.Operator(qf_setup_tet)
+    op_setup_tet.name('triangle elements')
     op_setup_tet.set_field("weights", libceed.ELEMRESTRICTION_NONE, bx_tet,
                            libceed.VECTOR_NONE)
     op_setup_tet.set_field("dx", rx_tet, bx_tet, libceed.VECTOR_ACTIVE)
@@ -1231,6 +1232,7 @@ def test_523(ceed_resource, capsys):
                            qdata_tet)
 
     op_mass_tet = ceed.Operator(qf_mass_tet)
+    op_mass_tet.name('triangle elements')
     op_mass_tet.set_field("rho", rui_tet, libceed.BASIS_COLLOCATED, qdata_tet)
     op_mass_tet.set_field("u", ru_tet, bu_tet, libceed.VECTOR_ACTIVE)
     op_mass_tet.set_field("v", ru_tet, bu_tet, libceed.VECTOR_ACTIVE)
@@ -1278,6 +1280,7 @@ def test_523(ceed_resource, capsys):
 
     # Operators
     op_setup_hex = ceed.Operator(qf_setup_tet)
+    op_setup_hex.name("quadralateral elements")
     op_setup_hex.set_field("weights", libceed.ELEMRESTRICTION_NONE, bx_hex,
                            libceed.VECTOR_NONE)
     op_setup_hex.set_field("dx", rx_hex, bx_hex, libceed.VECTOR_ACTIVE)
@@ -1285,6 +1288,7 @@ def test_523(ceed_resource, capsys):
                            qdata_hex)
 
     op_mass_hex = ceed.Operator(qf_mass_hex)
+    op_mass_hex.name("quadralateral elements")
     op_mass_hex.set_field("rho", rui_hex, libceed.BASIS_COLLOCATED, qdata_hex)
     op_mass_hex.set_field("u", ru_hex, bu_hex, libceed.VECTOR_ACTIVE)
     op_mass_hex.set_field("v", ru_hex, bu_hex, libceed.VECTOR_ACTIVE)
@@ -1293,11 +1297,13 @@ def test_523(ceed_resource, capsys):
 
     # Setup
     op_setup = ceed.CompositeOperator()
+    op_setup.name('setup')
     op_setup.add_sub(op_setup_tet)
     op_setup.add_sub(op_setup_hex)
 
     # Apply mass matrix
     op_mass = ceed.CompositeOperator()
+    op_mass.name('mass')
     op_mass.add_sub(op_mass_tet)
     op_mass.add_sub(op_mass_hex)
 
diff --git a/rust/libceed/src/operator.rs b/rust/libceed/src/operator.rs
index 12b1df4fd4..084162e086 100644
--- a/rust/libceed/src/operator.rs
+++ b/rust/libceed/src/operator.rs
@@ -337,6 +337,7 @@ impl<'a> fmt::Display for OperatorCore<'a> {
 /// // Operator fields
 /// let op = ceed
 ///     .operator(&qf, QFunctionOpt::None, QFunctionOpt::None)?
+///     .name("mass")?
 ///     .field("dx", &r, &b, VectorOpt::Active)?
 ///     .field("weights", ElemRestrictionOpt::None, &b, VectorOpt::None)?
 ///     .field("qdata", &rq, BasisOpt::Collocated, VectorOpt::Active)?;
@@ -378,6 +379,7 @@ impl<'a> fmt::Display for Operator<'a> {
 /// let qf_mass = ceed.q_function_interior_by_name("MassApply")?;
 /// let op_mass = ceed
 ///     .operator(&qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+///     .name("Mass term")?
 ///     .field("u", &r, &b, VectorOpt::Active)?
 ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_mass)?
 ///     .field("v", &r, &b, VectorOpt::Active)?;
@@ -385,12 +387,14 @@ impl<'a> fmt::Display for Operator<'a> {
 /// let qf_diff = ceed.q_function_interior_by_name("Poisson1DApply")?;
 /// let op_diff = ceed
 ///     .operator(&qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+///     .name("Poisson term")?
 ///     .field("du", &r, &b, VectorOpt::Active)?
 ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_diff)?
 ///     .field("dv", &r, &b, VectorOpt::Active)?;
 ///
 /// let op = ceed
 ///     .composite_operator()?
+///     .name("Screened Poisson")?
 ///     .sub_operator(&op_mass)?
 ///     .sub_operator(&op_diff)?;
 ///
@@ -424,6 +428,12 @@ impl<'a> OperatorCore<'a> {
         self.check_error(ierr)
     }
 
+    pub fn name(&self, name: &str) -> crate::Result<i32> {
+        let name_c = CString::new(name).expect("CString::new failed");
+        let ierr = unsafe { bind_ceed::CeedOperatorSetName(self.ptr, name_c.as_ptr()) };
+        self.check_error(ierr)
+    }
+
     pub fn apply(&self, input: &Vector, output: &mut Vector) -> crate::Result<i32> {
         let ierr = unsafe {
             bind_ceed::CeedOperatorApply(
@@ -538,6 +548,46 @@ impl<'a> Operator<'a> {
         })
     }
 
+    /// Set name for Operator printing
+    ///
+    /// * 'name' - Name to set
+    ///
+    /// ```
+    /// # use libceed::prelude::*;
+    /// # fn main() -> libceed::Result<()> {
+    /// # let ceed = libceed::Ceed::default_init();
+    /// let qf = ceed.q_function_interior_by_name("Mass1DBuild")?;
+    ///
+    /// // Operator field arguments
+    /// let ne = 3;
+    /// let q = 4 as usize;
+    /// let mut ind: Vec<i32> = vec![0; 2 * ne];
+    /// for i in 0..ne {
+    ///     ind[2 * i + 0] = i as i32;
+    ///     ind[2 * i + 1] = (i + 1) as i32;
+    /// }
+    /// let r = ceed.elem_restriction(ne, 2, 1, 1, ne + 1, MemType::Host, &ind)?;
+    /// let strides: [i32; 3] = [1, q as i32, q as i32];
+    /// let rq = ceed.strided_elem_restriction(ne, 2, 1, q * ne, strides)?;
+    ///
+    /// let b = ceed.basis_tensor_H1_Lagrange(1, 1, 2, q, QuadMode::Gauss)?;
+    ///
+    /// // Operator fields
+    /// let op = ceed
+    ///     .operator(&qf, QFunctionOpt::None, QFunctionOpt::None)?
+    ///     .name("mass")?
+    ///     .field("dx", &r, &b, VectorOpt::Active)?
+    ///     .field("weights", ElemRestrictionOpt::None, &b, VectorOpt::None)?
+    ///     .field("qdata", &rq, BasisOpt::Collocated, VectorOpt::Active)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    #[allow(unused_mut)]
+    pub fn name(mut self, name: &str) -> crate::Result<Self> {
+        self.op_core.name(name)?;
+        Ok(self)
+    }
+
     /// Apply Operator to a vector
     ///
     /// * `input`  - Input Vector
@@ -2035,9 +2085,65 @@ impl<'a> CompositeOperator<'a> {
         })
     }
 
+    /// Set name for CompositeOperator printing
+    ///
+    /// * 'name' - Name to set
+    ///
+    /// ```
+    /// # use libceed::prelude::*;
+    /// # fn main() -> libceed::Result<()> {
+    /// # let ceed = libceed::Ceed::default_init();
+    ///
+    /// // Sub operator field arguments
+    /// let ne = 3;
+    /// let q = 4 as usize;
+    /// let mut ind: Vec<i32> = vec![0; 2 * ne];
+    /// for i in 0..ne {
+    ///     ind[2 * i + 0] = i as i32;
+    ///     ind[2 * i + 1] = (i + 1) as i32;
+    /// }
+    /// let r = ceed.elem_restriction(ne, 2, 1, 1, ne + 1, MemType::Host, &ind)?;
+    /// let strides: [i32; 3] = [1, q as i32, q as i32];
+    /// let rq = ceed.strided_elem_restriction(ne, 2, 1, q * ne, strides)?;
+    ///
+    /// let b = ceed.basis_tensor_H1_Lagrange(1, 1, 2, q, QuadMode::Gauss)?;
+    ///
+    /// let qdata_mass = ceed.vector(q * ne)?;
+    /// let qdata_diff = ceed.vector(q * ne)?;
+    ///
+    /// let qf_mass = ceed.q_function_interior_by_name("MassApply")?;
+    /// let op_mass = ceed
+    ///     .operator(&qf_mass, QFunctionOpt::None, QFunctionOpt::None)?
+    ///     .name("Mass term")?
+    ///     .field("u", &r, &b, VectorOpt::Active)?
+    ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_mass)?
+    ///     .field("v", &r, &b, VectorOpt::Active)?;
+    ///
+    /// let qf_diff = ceed.q_function_interior_by_name("Poisson1DApply")?;
+    /// let op_diff = ceed
+    ///     .operator(&qf_diff, QFunctionOpt::None, QFunctionOpt::None)?
+    ///     .name("Poisson term")?
+    ///     .field("du", &r, &b, VectorOpt::Active)?
+    ///     .field("qdata", &rq, BasisOpt::Collocated, &qdata_diff)?
+    ///     .field("dv", &r, &b, VectorOpt::Active)?;
+    ///
+    /// let op = ceed
+    ///     .composite_operator()?
+    ///     .name("Screened Poisson")?
+    ///     .sub_operator(&op_mass)?
+    ///     .sub_operator(&op_diff)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    #[allow(unused_mut)]
+    pub fn name(mut self, name: &str) -> crate::Result<Self> {
+        self.op_core.name(name)?;
+        Ok(self)
+    }
+
     /// Apply Operator to a vector
     ///
-    /// * `input`  - Input Vector
+    /// * `input`  - Inpuht Vector
     /// * `output` - Output Vector
     ///
     /// ```
diff --git a/tests/output/t402-qfunction-f.out b/tests/output/t402-qfunction-f.out
index 62f024ffdf..7163a434f1 100644
--- a/tests/output/t402-qfunction-f.out
+++ b/tests/output/t402-qfunction-f.out
@@ -1,4 +1,4 @@
-User CeedQFunction setup
+User CeedQFunction - setup
   1 input field:
     Input field 0:
       Name: "w"
@@ -9,7 +9,7 @@ User CeedQFunction setup
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-User CeedQFunction mass
+User CeedQFunction - mass
   2 input fields:
     Input field 0:
       Name: "qdata"
diff --git a/tests/output/t402-qfunction.out b/tests/output/t402-qfunction.out
index 62f024ffdf..7163a434f1 100644
--- a/tests/output/t402-qfunction.out
+++ b/tests/output/t402-qfunction.out
@@ -1,4 +1,4 @@
-User CeedQFunction setup
+User CeedQFunction - setup
   1 input field:
     Input field 0:
       Name: "w"
@@ -9,7 +9,7 @@ User CeedQFunction setup
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-User CeedQFunction mass
+User CeedQFunction - mass
   2 input fields:
     Input field 0:
       Name: "qdata"
diff --git a/tests/output/t413-qfunction-f.out b/tests/output/t413-qfunction-f.out
index 03c41e1634..ffee1bdca7 100644
--- a/tests/output/t413-qfunction-f.out
+++ b/tests/output/t413-qfunction-f.out
@@ -1,4 +1,4 @@
-Gallery CeedQFunction Mass1DBuild
+Gallery CeedQFunction - Mass1DBuild
   2 input fields:
     Input field 0:
       Name: "dx"
@@ -13,7 +13,7 @@ Gallery CeedQFunction Mass1DBuild
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-Gallery CeedQFunction MassApply
+Gallery CeedQFunction - MassApply
   2 input fields:
     Input field 0:
       Name: "u"
diff --git a/tests/output/t413-qfunction.out b/tests/output/t413-qfunction.out
index 03c41e1634..ffee1bdca7 100644
--- a/tests/output/t413-qfunction.out
+++ b/tests/output/t413-qfunction.out
@@ -1,4 +1,4 @@
-Gallery CeedQFunction Mass1DBuild
+Gallery CeedQFunction - Mass1DBuild
   2 input fields:
     Input field 0:
       Name: "dx"
@@ -13,7 +13,7 @@ Gallery CeedQFunction Mass1DBuild
       Name: "qdata"
       Size: 1
       EvalMode: "none"
-Gallery CeedQFunction MassApply
+Gallery CeedQFunction - MassApply
   2 input fields:
     Input field 0:
       Name: "u"
diff --git a/tests/output/t523-operator-f.out b/tests/output/t523-operator-f.out
index fdae3fc01f..79b6ac9151 100644
--- a/tests/output/t523-operator-f.out
+++ b/tests/output/t523-operator-f.out
@@ -1,5 +1,5 @@
-Composite CeedOperator
-  SubOperator 0:
+Composite CeedOperator - setup
+  SubOperator 0 - triangle elements:
     6 elements with 4 quadrature points each
     3 fields
     2 input fields:
@@ -13,7 +13,7 @@ Composite CeedOperator
       Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator 1:
+  SubOperator 1 - quadralateral elements:
     6 elements with 16 quadrature points each
     3 fields
     2 input fields:
@@ -27,8 +27,8 @@ Composite CeedOperator
       Output field 0:
         Name: "rho"
         Collocated basis
-Composite CeedOperator
-  SubOperator 0:
+Composite CeedOperator - mass
+  SubOperator 0 - triangle elements:
     6 elements with 4 quadrature points each
     3 fields
     2 input fields:
@@ -42,7 +42,7 @@ Composite CeedOperator
       Output field 0:
         Name: "v"
         Active vector
-  SubOperator 1:
+  SubOperator 1 - quadralateral elements:
     6 elements with 16 quadrature points each
     3 fields
     2 input fields:
diff --git a/tests/output/t523-operator.out b/tests/output/t523-operator.out
index fdae3fc01f..79b6ac9151 100644
--- a/tests/output/t523-operator.out
+++ b/tests/output/t523-operator.out
@@ -1,5 +1,5 @@
-Composite CeedOperator
-  SubOperator 0:
+Composite CeedOperator - setup
+  SubOperator 0 - triangle elements:
     6 elements with 4 quadrature points each
     3 fields
     2 input fields:
@@ -13,7 +13,7 @@ Composite CeedOperator
       Output field 0:
         Name: "rho"
         Collocated basis
-  SubOperator 1:
+  SubOperator 1 - quadralateral elements:
     6 elements with 16 quadrature points each
     3 fields
     2 input fields:
@@ -27,8 +27,8 @@ Composite CeedOperator
       Output field 0:
         Name: "rho"
         Collocated basis
-Composite CeedOperator
-  SubOperator 0:
+Composite CeedOperator - mass
+  SubOperator 0 - triangle elements:
     6 elements with 4 quadrature points each
     3 fields
     2 input fields:
@@ -42,7 +42,7 @@ Composite CeedOperator
       Output field 0:
         Name: "v"
         Active vector
-  SubOperator 1:
+  SubOperator 1 - quadralateral elements:
     6 elements with 16 quadrature points each
     3 fields
     2 input fields:
diff --git a/tests/t523-operator-f.f90 b/tests/t523-operator-f.f90
index 6770f5b4be..9e5d402bc6 100644
--- a/tests/t523-operator-f.f90
+++ b/tests/t523-operator-f.f90
@@ -121,7 +121,8 @@ program test
 ! ---- Setup Tet
       call ceedoperatorcreate(ceed,qf_setuptet,ceed_qfunction_none,&
      & ceed_qfunction_none,op_setuptet,err)
-      call ceedoperatorsetfield(op_setuptet, 'weight',&
+      call ceedoperatorsetname(op_setuptet,'triangle elements',err)
+      call ceedoperatorsetfield(op_setuptet,'weight',&
      & ceed_elemrestriction_none,bxtet,ceed_vector_none,err)
       call ceedoperatorsetfield(op_setuptet,'dx',erestrictxtet,&
      & bxtet,ceed_vector_active,err)
@@ -130,6 +131,7 @@ program test
 ! ---- Mass Tet
       call ceedoperatorcreate(ceed,qf_masstet,ceed_qfunction_none,&
      & ceed_qfunction_none,op_masstet,err)
+      call ceedoperatorsetname(op_masstet,'triangle elements',err)
       call ceedoperatorsetfield(op_masstet,'rho',erestrictuitet,&
      & ceed_basis_collocated,qdatatet,err)
       call ceedoperatorsetfield(op_masstet,'u',erestrictutet,&
@@ -184,6 +186,7 @@ program test
 ! ---- Setup Hex
       call ceedoperatorcreate(ceed,qf_setuphex,ceed_qfunction_none,&
      & ceed_qfunction_none,op_setuphex,err)
+      call ceedoperatorsetname(op_setuphex,'quadralateral elements',err)
       call ceedoperatorsetfield(op_setuphex,'weight',&
      & ceed_elemrestriction_none,bxhex,ceed_vector_none,err)
       call ceedoperatorsetfield(op_setuphex,'dx',erestrictxhex,&
@@ -193,6 +196,7 @@ program test
 ! ---- Mass Hex
       call ceedoperatorcreate(ceed,qf_masshex,ceed_qfunction_none,&
      & ceed_qfunction_none,op_masshex,err)
+      call ceedoperatorsetname(op_masshex,'quadralateral elements',err)
       call ceedoperatorsetfield(op_masshex,'rho',erestrictuihex,&
      & ceed_basis_collocated,qdatahex,err)
       call ceedoperatorsetfield(op_masshex,'u',erestrictuhex,&
@@ -202,10 +206,12 @@ program test
 
 ! Composite Operators
       call ceedcompositeoperatorcreate(ceed,op_setup,err)
+      call ceedoperatorsetname(op_setup,'setup',err)
       call ceedcompositeoperatoraddsub(op_setup,op_setuptet,err)
       call ceedcompositeoperatoraddsub(op_setup,op_setuphex,err)
 
       call ceedcompositeoperatorcreate(ceed,op_mass,err)
+      call ceedoperatorsetname(op_mass,'mass',err)
       call ceedcompositeoperatoraddsub(op_mass,op_masstet,err)
       call ceedcompositeoperatoraddsub(op_mass,op_masshex,err)
 
diff --git a/tests/t523-operator.c b/tests/t523-operator.c
index a8159ec70b..47630cecba 100644
--- a/tests/t523-operator.c
+++ b/tests/t523-operator.c
@@ -109,6 +109,7 @@ int main(int argc, char **argv) {
   // ---- Setup _tet
   CeedOperatorCreate(ceed, qf_setup_tet, CEED_QFUNCTION_NONE,
                      CEED_QFUNCTION_NONE, &op_setup_tet);
+  CeedOperatorSetName(op_setup_tet, "triangle elements");
   CeedOperatorSetField(op_setup_tet, "weight", CEED_ELEMRESTRICTION_NONE,
                        basis_x_tet,
                        CEED_VECTOR_NONE);
@@ -119,6 +120,7 @@ int main(int argc, char **argv) {
   // ---- Mass _tet
   CeedOperatorCreate(ceed, qf_mass_tet, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
                      &op_mass_tet);
+  CeedOperatorSetName(op_mass_tet, "triangle elements");
   CeedOperatorSetField(op_mass_tet, "rho", elem_restr_qd_tet,
                        CEED_BASIS_COLLOCATED,
                        q_data_tet);
@@ -170,6 +172,7 @@ int main(int argc, char **argv) {
   // -- Operators
   CeedOperatorCreate(ceed, qf_setup_hex, CEED_QFUNCTION_NONE,
                      CEED_QFUNCTION_NONE, &op_setup_hex);
+  CeedOperatorSetName(op_setup_hex, "quadralateral elements");
   CeedOperatorSetField(op_setup_hex, "weight", CEED_ELEMRESTRICTION_NONE,
                        basis_x_hex,
                        CEED_VECTOR_NONE);
@@ -180,6 +183,7 @@ int main(int argc, char **argv) {
 
   CeedOperatorCreate(ceed, qf_mass_hex, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
                      &op_mass_hex);
+  CeedOperatorSetName(op_mass_hex, "quadralateral elements");
   CeedOperatorSetField(op_mass_hex, "rho", elem_restr_qd_i_hex,
                        CEED_BASIS_COLLOCATED,
                        q_data_hex);
@@ -191,12 +195,14 @@ int main(int argc, char **argv) {
   // Set up Composite Operators
   // -- Create
   CeedCompositeOperatorCreate(ceed, &op_setup);
+  CeedOperatorSetName(op_setup, "setup");
   // -- Add SubOperators
   CeedCompositeOperatorAddSub(op_setup, op_setup_tet);
   CeedCompositeOperatorAddSub(op_setup, op_setup_hex);
 
   // -- Create
   CeedCompositeOperatorCreate(ceed, &op_mass);
+  CeedOperatorSetName(op_mass, "mass");
   // -- Add SubOperators
   CeedCompositeOperatorAddSub(op_mass, op_mass_tet);
   CeedCompositeOperatorAddSub(op_mass, op_mass_hex);

From baf62c763fe782181c5fba1d862615c9ba148e1b Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Fri, 6 May 2022 17:48:56 -0600
Subject: [PATCH 37/59] op - fix small bug in operator cloning

---
 interface/ceed-preconditioning.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 24d31e90d9..f99ef82305 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -68,7 +68,6 @@ int CeedOperatorCreateFallback(CeedOperator op) {
   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
   ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
          &op_ref->qf_assembled); CeedChk(ierr);
-  ierr = CeedOperatorSetName(op_ref, op->name); CeedChk(ierr);
   op->op_fallback = op_ref;
 
   // Clone QF

From 91e5af17cc38f7d4d16af29119518d78e5298d3f Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Sun, 8 May 2022 21:29:31 -0600
Subject: [PATCH 38/59] examples/fluids: create struct in Problem for each
 qfunction + related

This is a cosmetic step toward better modularity.
---
 examples/fluids/navierstokes.h            | 11 +++---
 examples/fluids/problems/advection.c      | 38 ++++++++++----------
 examples/fluids/problems/advection2d.c    | 38 ++++++++++----------
 examples/fluids/problems/blasius.c        | 14 ++++----
 examples/fluids/problems/channel.c        | 14 ++++----
 examples/fluids/problems/densitycurrent.c |  4 +--
 examples/fluids/problems/eulervortex.c    | 42 +++++++++++------------
 examples/fluids/problems/newtonian.c      | 32 ++++++++---------
 examples/fluids/problems/shocktube.c      | 34 +++++++++---------
 examples/fluids/src/setuplibceed.c        | 33 ++++++++++--------
 10 files changed, 133 insertions(+), 127 deletions(-)

diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 8236002cd0..d53a89f8ab 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -343,15 +343,18 @@ struct Physics_private {
   CeedContextFieldLabel    timestep_size_label;
 };
 
+typedef struct {
+  CeedQFunctionUser    qfunction;
+  const char           *qfunction_loc;
+} ProblemQFunctionSpec;
+
 // Problem specific data
 // *INDENT-OFF*
 typedef struct {
   CeedInt           dim, q_data_size_vol, q_data_size_sur;
   CeedScalar        dm_scale;
-  CeedQFunctionUser setup_vol, setup_sur, ics, apply_vol_rhs, apply_vol_ifunction,
-                    apply_inflow, apply_outflow;
-  const char        *setup_vol_loc, *setup_sur_loc, *ics_loc,
-                    *apply_vol_rhs_loc, *apply_vol_ifunction_loc, *apply_inflow_loc, *apply_outflow_loc;
+  ProblemQFunctionSpec setup_vol, setup_sur, ics, apply_vol_rhs, apply_vol_ifunction,
+    apply_inflow, apply_outflow;
   bool              non_zero_time;
   PetscErrorCode    (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
                           PetscScalar[], void *);
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index 59651967c8..51a77930eb 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -31,25 +31,25 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP ADVECTION
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->ics                     = ICsAdvection;
-  problem->ics_loc                 = ICsAdvection_loc;
-  problem->apply_vol_rhs           = Advection;
-  problem->apply_vol_rhs_loc       = Advection_loc;
-  problem->apply_vol_ifunction     = IFunction_Advection;
-  problem->apply_vol_ifunction_loc = IFunction_Advection_loc;
-  problem->apply_inflow            = Advection_InOutFlow;
-  problem->apply_inflow_loc        = Advection_InOutFlow_loc;
-  problem->bc                      = Exact_Advection;
-  problem->setup_ctx               = SetupContext_ADVECTION;
-  problem->non_zero_time           = PETSC_FALSE;
-  problem->print_info              = PRINT_ADVECTION;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 4;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->ics.qfunction                     = ICsAdvection;
+  problem->ics.qfunction_loc                 = ICsAdvection_loc;
+  problem->apply_vol_rhs.qfunction           = Advection;
+  problem->apply_vol_rhs.qfunction_loc       = Advection_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Advection;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Advection_loc;
+  problem->apply_inflow.qfunction            = Advection_InOutFlow;
+  problem->apply_inflow.qfunction_loc        = Advection_InOutFlow_loc;
+  problem->bc                                = Exact_Advection;
+  problem->setup_ctx                         = SetupContext_ADVECTION;
+  problem->non_zero_time                     = PETSC_FALSE;
+  problem->print_info                        = PRINT_ADVECTION;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index 6c29be22f2..6ae6b1550b 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -29,25 +29,25 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP ADVECTION2D
   // ------------------------------------------------------
-  problem->dim                     = 2;
-  problem->q_data_size_vol         = 5;
-  problem->q_data_size_sur         = 3;
-  problem->setup_vol               = Setup2d;
-  problem->setup_vol_loc           = Setup2d_loc;
-  problem->setup_sur               = SetupBoundary2d;
-  problem->setup_sur_loc           = SetupBoundary2d_loc;
-  problem->ics                     = ICsAdvection2d;
-  problem->ics_loc                 = ICsAdvection2d_loc;
-  problem->apply_vol_rhs           = Advection2d;
-  problem->apply_vol_rhs_loc       = Advection2d_loc;
-  problem->apply_vol_ifunction     = IFunction_Advection2d;
-  problem->apply_vol_ifunction_loc = IFunction_Advection2d_loc;
-  problem->apply_inflow            = Advection2d_InOutFlow;
-  problem->apply_inflow_loc        = Advection2d_InOutFlow_loc;
-  problem->bc                      = Exact_Advection2d;
-  problem->setup_ctx               = SetupContext_ADVECTION2D;
-  problem->non_zero_time           = PETSC_TRUE;
-  problem->print_info              = PRINT_ADVECTION2D;
+  problem->dim                               = 2;
+  problem->q_data_size_vol                   = 5;
+  problem->q_data_size_sur                   = 3;
+  problem->setup_vol.qfunction               = Setup2d;
+  problem->setup_vol.qfunction_loc           = Setup2d_loc;
+  problem->setup_sur.qfunction               = SetupBoundary2d;
+  problem->setup_sur.qfunction_loc           = SetupBoundary2d_loc;
+  problem->ics.qfunction                     = ICsAdvection2d;
+  problem->ics.qfunction_loc                 = ICsAdvection2d_loc;
+  problem->apply_vol_rhs.qfunction           = Advection2d;
+  problem->apply_vol_rhs.qfunction_loc       = Advection2d_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Advection2d;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Advection2d_loc;
+  problem->apply_inflow.qfunction            = Advection2d_InOutFlow;
+  problem->apply_inflow.qfunction_loc        = Advection2d_InOutFlow_loc;
+  problem->bc                                = Exact_Advection2d;
+  problem->setup_ctx                         = SetupContext_ADVECTION2D;
+  problem->non_zero_time                     = PETSC_TRUE;
+  problem->print_info                        = PRINT_ADVECTION2D;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index 19ce6ef1ee..e3e1498c8f 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -104,13 +104,13 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP Blasius
   // ------------------------------------------------------
-  problem->ics                     = ICsBlasius;
-  problem->ics_loc                 = ICsBlasius_loc;
-  problem->apply_inflow            = Blasius_Inflow;
-  problem->apply_inflow_loc        = Blasius_Inflow_loc;
-  problem->apply_outflow           = Blasius_Outflow;
-  problem->apply_outflow_loc       = Blasius_Outflow_loc;
-  problem->setup_ctx               = SetupContext_BLASIUS;
+  problem->ics.qfunction               = ICsBlasius;
+  problem->ics.qfunction_loc           = ICsBlasius_loc;
+  problem->apply_inflow.qfunction      = Blasius_Inflow;
+  problem->apply_inflow.qfunction_loc  = Blasius_Inflow_loc;
+  problem->apply_outflow.qfunction     = Blasius_Outflow;
+  problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc;
+  problem->setup_ctx                   = SetupContext_BLASIUS;
 
   // CeedScalar mu = .04; // Pa s, dynamic viscosity
   CeedScalar mu            = 1.8e-5;   // Pa s, dynamic viscosity
diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c
index 3c23bd9047..5b184beb66 100644
--- a/examples/fluids/problems/channel.c
+++ b/examples/fluids/problems/channel.c
@@ -39,13 +39,13 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP Channel
   // ------------------------------------------------------
-  problem->ics               = ICsChannel;
-  problem->ics_loc           = ICsChannel_loc;
-  problem->apply_inflow      = Channel_Inflow;
-  problem->apply_inflow_loc  = Channel_Inflow_loc;
-  problem->apply_outflow     = Channel_Outflow;
-  problem->apply_outflow_loc = Channel_Outflow_loc;
-  problem->setup_ctx         = SetupContext_CHANNEL;
+  problem->ics.qfunction               = ICsChannel;
+  problem->ics.qfunction_loc           = ICsChannel_loc;
+  problem->apply_inflow.qfunction      = Channel_Inflow;
+  problem->apply_inflow.qfunction_loc  = Channel_Inflow_loc;
+  problem->apply_outflow.qfunction     = Channel_Outflow;
+  problem->apply_outflow.qfunction_loc = Channel_Outflow_loc;
+  problem->setup_ctx                   = SetupContext_CHANNEL;
 
   // -- Command Line Options
   CeedScalar umax   = 10.;  // m/s
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 57fe457e6d..4ab836dd0c 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -26,8 +26,8 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
   // ------------------------------------------------------
-  problem->ics = ICsDC;
-  problem->ics_loc = ICsDC_loc;
+  problem->ics.qfunction = ICsDC;
+  problem->ics.qfunction_loc = ICsDC_loc;
   problem->bc = Exact_DC;
 
   // ------------------------------------------------------
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index 7760ef6412..4c90ea1c6c 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -30,27 +30,27 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->ics                     = ICsEuler;
-  problem->ics_loc                 = ICsEuler_loc;
-  problem->apply_vol_rhs           = Euler;
-  problem->apply_vol_rhs_loc       = Euler_loc;
-  problem->apply_vol_ifunction     = IFunction_Euler;
-  problem->apply_vol_ifunction_loc = IFunction_Euler_loc;
-  problem->apply_inflow            = TravelingVortex_Inflow;
-  problem->apply_inflow_loc        = TravelingVortex_Inflow_loc;
-  problem->apply_outflow           = Euler_Outflow;
-  problem->apply_outflow_loc       = Euler_Outflow_loc;
-  problem->bc                      = Exact_Euler;
-  problem->setup_ctx               = SetupContext_EULER_VORTEX;
-  problem->non_zero_time           = PETSC_TRUE;
-  problem->print_info              = PRINT_EULER_VORTEX;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 4;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->ics.qfunction                     = ICsEuler;
+  problem->ics.qfunction_loc                 = ICsEuler_loc;
+  problem->apply_vol_rhs.qfunction           = Euler;
+  problem->apply_vol_rhs.qfunction_loc       = Euler_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Euler;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Euler_loc;
+  problem->apply_inflow.qfunction            = TravelingVortex_Inflow;
+  problem->apply_inflow.qfunction_loc        = TravelingVortex_Inflow_loc;
+  problem->apply_outflow.qfunction           = Euler_Outflow;
+  problem->apply_outflow.qfunction_loc       = Euler_Outflow_loc;
+  problem->bc                                = Exact_Euler;
+  problem->setup_ctx                         = SetupContext_EULER_VORTEX;
+  problem->non_zero_time                     = PETSC_TRUE;
+  problem->print_info                        = PRINT_EULER_VORTEX;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index 8422ce5cbf..df610759c7 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -50,22 +50,22 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //           Setup Generic Newtonian IG Problem
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->ics                     = ICsNewtonianIG;
-  problem->ics_loc                 = ICsNewtonianIG_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->apply_vol_rhs           = Newtonian;
-  problem->apply_vol_rhs_loc       = Newtonian_loc;
-  problem->apply_vol_ifunction     = IFunction_Newtonian;
-  problem->apply_vol_ifunction_loc = IFunction_Newtonian_loc;
-  problem->setup_ctx               = SetupContext_DENSITY_CURRENT;
-  problem->non_zero_time           = PETSC_FALSE;
-  problem->print_info              = PRINT_DENSITY_CURRENT;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 4;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->ics.qfunction                     = ICsNewtonianIG;
+  problem->ics.qfunction_loc                 = ICsNewtonianIG_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->apply_vol_rhs.qfunction           = Newtonian;
+  problem->apply_vol_rhs.qfunction_loc       = Newtonian_loc;
+  problem->apply_vol_ifunction.qfunction     = IFunction_Newtonian;
+  problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc;
+  problem->setup_ctx                         = SetupContext_DENSITY_CURRENT;
+  problem->non_zero_time                     = PETSC_FALSE;
+  problem->print_info                        = PRINT_DENSITY_CURRENT;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
index 679ff515df..7492311efe 100644
--- a/examples/fluids/problems/shocktube.c
+++ b/examples/fluids/problems/shocktube.c
@@ -38,23 +38,23 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   // ------------------------------------------------------
   //               SET UP SHOCKTUBE
   // ------------------------------------------------------
-  problem->dim                     = 3;
-  problem->q_data_size_vol         = 10;
-  problem->q_data_size_sur         = 4;
-  problem->setup_vol               = Setup;
-  problem->setup_vol_loc           = Setup_loc;
-  problem->setup_sur               = SetupBoundary;
-  problem->setup_sur_loc           = SetupBoundary_loc;
-  problem->ics                     = ICsShockTube;
-  problem->ics_loc                 = ICsShockTube_loc;
-  problem->apply_vol_rhs           = EulerShockTube;
-  problem->apply_vol_rhs_loc       = EulerShockTube_loc;
-  problem->apply_vol_ifunction     = NULL;
-  problem->apply_vol_ifunction_loc = NULL;
-  problem->bc                      = Exact_ShockTube;
-  problem->setup_ctx               = SetupContext_SHOCKTUBE;
-  problem->non_zero_time           = PETSC_FALSE;
-  problem->print_info              = PRINT_SHOCKTUBE;
+  problem->dim                               = 3;
+  problem->q_data_size_vol                   = 10;
+  problem->q_data_size_sur                   = 4;
+  problem->setup_vol.qfunction               = Setup;
+  problem->setup_vol.qfunction_loc           = Setup_loc;
+  problem->setup_sur.qfunction               = SetupBoundary;
+  problem->setup_sur.qfunction_loc           = SetupBoundary_loc;
+  problem->ics.qfunction                     = ICsShockTube;
+  problem->ics.qfunction_loc                 = ICsShockTube_loc;
+  problem->apply_vol_rhs.qfunction           = EulerShockTube;
+  problem->apply_vol_rhs.qfunction_loc       = EulerShockTube_loc;
+  problem->apply_vol_ifunction.qfunction     = NULL;
+  problem->apply_vol_ifunction.qfunction_loc = NULL;
+  problem->bc                                = Exact_ShockTube;
+  problem->setup_ctx                         = SetupContext_SHOCKTUBE;
+  problem->non_zero_time                     = PETSC_FALSE;
+  problem->print_info                        = PRINT_SHOCKTUBE;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c
index 2379477f95..c640ed0ca4 100644
--- a/examples/fluids/src/setuplibceed.c
+++ b/examples/fluids/src/setuplibceed.c
@@ -257,7 +257,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   // CEED QFunctions
   // -----------------------------------------------------------------------------
   // -- Create QFunction for quadrature data
-  CeedQFunctionCreateInterior(ceed, 1, problem->setup_vol, problem->setup_vol_loc,
+  CeedQFunctionCreateInterior(ceed, 1, problem->setup_vol.qfunction,
+                              problem->setup_vol.qfunction_loc,
                               &ceed_data->qf_setup_vol);
   CeedQFunctionAddInput(ceed_data->qf_setup_vol, "dx", num_comp_x*dim,
                         CEED_EVAL_GRAD);
@@ -266,15 +267,16 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
                          CEED_EVAL_NONE);
 
   // -- Create QFunction for ICs
-  CeedQFunctionCreateInterior(ceed, 1, problem->ics, problem->ics_loc,
+  CeedQFunctionCreateInterior(ceed, 1, problem->ics.qfunction,
+                              problem->ics.qfunction_loc,
                               &ceed_data->qf_ics);
   CeedQFunctionAddInput(ceed_data->qf_ics, "x", num_comp_x, CEED_EVAL_INTERP);
   CeedQFunctionAddOutput(ceed_data->qf_ics, "q0", num_comp_q, CEED_EVAL_NONE);
 
   // -- Create QFunction for RHS
-  if (problem->apply_vol_rhs) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_rhs,
-                                problem->apply_vol_rhs_loc, &ceed_data->qf_rhs_vol);
+  if (problem->apply_vol_rhs.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_rhs.qfunction,
+                                problem->apply_vol_rhs.qfunction_loc, &ceed_data->qf_rhs_vol);
     CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "q", num_comp_q, CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "dq", num_comp_q*dim,
                           CEED_EVAL_GRAD);
@@ -288,9 +290,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   }
 
   // -- Create QFunction for IFunction
-  if (problem->apply_vol_ifunction) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_ifunction,
-                                problem->apply_vol_ifunction_loc, &ceed_data->qf_ifunction_vol);
+  if (problem->apply_vol_ifunction.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_ifunction.qfunction,
+                                problem->apply_vol_ifunction.qfunction_loc, &ceed_data->qf_ifunction_vol);
     CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "dq", num_comp_q*dim,
@@ -416,7 +418,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   // CEED QFunctions
   // -----------------------------------------------------------------------------
   // -- Create QFunction for quadrature data
-  CeedQFunctionCreateInterior(ceed, 1, problem->setup_sur, problem->setup_sur_loc,
+  CeedQFunctionCreateInterior(ceed, 1, problem->setup_sur.qfunction,
+                              problem->setup_sur.qfunction_loc,
                               &ceed_data->qf_setup_sur);
   CeedQFunctionAddInput(ceed_data->qf_setup_sur, "dx", num_comp_x*dim_sur,
                         CEED_EVAL_GRAD);
@@ -425,9 +428,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
                          q_data_size_sur, CEED_EVAL_NONE);
 
   // -- Creat QFunction for inflow boundaries
-  if (problem->apply_inflow) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow,
-                                problem->apply_inflow_loc, &ceed_data->qf_apply_inflow);
+  if (problem->apply_inflow.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow.qfunction,
+                                problem->apply_inflow.qfunction_loc, &ceed_data->qf_apply_inflow);
     CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "surface qdata",
@@ -439,9 +442,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   }
 
   // -- Creat QFunction for outflow boundaries
-  if (problem->apply_outflow) {
-    CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow,
-                                problem->apply_outflow_loc, &ceed_data->qf_apply_outflow);
+  if (problem->apply_outflow.qfunction) {
+    CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow.qfunction,
+                                problem->apply_outflow.qfunction_loc, &ceed_data->qf_apply_outflow);
     CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "surface qdata",

From cffd6fdce1224b9ad441ed4022faa643463969db Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Sun, 8 May 2022 21:37:51 -0600
Subject: [PATCH 39/59] examples/fluids: add viscous channel flow test

---
 examples/fluids/navierstokes.c                   |   1 +
 .../tests-output/fluids-navierstokes-channel.bin | Bin 0 -> 6408 bytes
 2 files changed, 1 insertion(+)
 create mode 100644 examples/fluids/tests-output/fluids-navierstokes-channel.bin

diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index ea7fc9ae3f..a10747402d 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -22,6 +22,7 @@
 //     ./navierstokes -ceed /cpu/self -problem density_current -degree 1
 //     ./navierstokes -ceed /gpu/cuda -problem advection -degree 1
 //
+//TESTARGS(name="channel") -ceed {ceed_resource} -test -options_file examples/fluids/channel.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-channel.bin
 //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -ts_dt 1e-3 -units_meter 1e-2 -units_second 1e-2 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin
 //TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -units_kilogram 1e-9 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -mu 75 -units_meter 1e-2 -units_second 1e-2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin
 //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 2,2,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -bc_wall 1,2,3,4,5,6 -wall_comps 4 -units_kilogram 1e-9 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin
diff --git a/examples/fluids/tests-output/fluids-navierstokes-channel.bin b/examples/fluids/tests-output/fluids-navierstokes-channel.bin
new file mode 100644
index 0000000000000000000000000000000000000000..23d6f0e271adff05b32e7d9a62b07e0a8d89d012
GIT binary patch
literal 6408
zcmdVedtA+F8UXM^BheJ05lKYR2W5;kqnl3Xq6=dWa*Jwd!Z1uTMMa&KTvmgF(N^O&
zG}4Gds#A@O7;_3WT`ZAO&U8dtql{#q=XsCS^uFz9|J?oa_xYaZ{r&o#=RB7r{la}D
z5}jdFn)kVQSEoIe8g4S2es!gV_WF_b+C_W)Tzfr9d#&#D`DX3)`<>1k*Xepfr|VIj
zt|x1+-`w8>uB9dAA%4!|vtmD)IirWu^GSZscC`(c8p>}b<+(p5FAI2x{z`3-1|BGz
zWm84o^a#(BmQ?7bqL-DPI1lb`KX2O&^7>FWcD|hzdbOh77rk=HEH85Px*^PChoM&;
z3wA(1e)-*O<|YT2hb%<Dd+?h&^o(x9gPFTdXYL=z;%)Y#e|Pw=cgY_Xe8l|JS9Jfq
ziI(WuTlLS8{~B3DUUt77z3gW7W^{k6mJ;&X^3miKn=hlk(gaq62UNu`t|kAitQ!45
zgq_scF(=`4aOo+hx)-h&%qrWYhTEGPZ0{SAhp*EIPc&;todYgSY`+*n{x|l14n!QA
zhc4OId>Gti=WwHC<X<!?*!g{qqU(L0@DknLM8}pqd}#`Ex8KlpeGV-^m-YSO3i$@T
zOzrbcM(^3<^!MnV7fjlj$LcU28^+?NnUL4-%OGF>(?aHdDX05~WNFBojcmvxEX~Qo
zuN$LF6eGSxpPivQgFK}BCi2Mce&}XTG-tr2E!(%pknidjfG#hx{?vJD%+TREy$YS~
zui3b@_w%VzL&XkD1OFG~D(_hI#kaE#g3C3d%$Ub|(DTcS4t7KLzBsBHJlb@w&n$`$
zwqUOMJsy2_zod)kNl&(UlYbWH!S4T22>PtLsqdhN8LT}^zB+BL_W72gf9z4P2R%k^
z{S*0`g;$vmp2p(e@kd`@{Vbft2k&L>JC5$(e@Q*M!alN$eC=|2KXO$@I=XlMl#}S=
zUauZa9@O23;){EXL0^2&qX1l<TfS)~`3~n&^vEKQZO*P<XLEC#8=P)9s?yyD?3NnF
z-^i>u<Vvnu*M{D@ym1luwc6T3h>yQ<A#i#Ki&xsAtIy1xg5DG{yp-J8C!4ux1^T0h
zhg{IrHz%AT_vkgBxvven=3<6Bx~61H4!LWwjJd}KiXV}^id=8pW^%W9tXJ^-c3CWb
z)m-$3_lyf!{C27jkwu|HDPA2h61}n1G@0H1&Pj?_m8YVs)5girYc^b-MDAoilf2lx
z4!w27^C0xL+rIGq!QUTb<wqX6GvC?GZg*mu=~9Q9{x8doS_VrE69<O-7WE_l^4tpa
ziZZMF%9{2az8f#yByW%nBoFHrgnp&ndZF@~<%u$b;1}eVudih1FZmu_<>>#7vM{$L
z?wSSUKb&9uPpXfRp^012e>T>Ape%{AHC`S#hFq2KRQr7C<e^nH%EE|)-jah*A0WQy
zvqj7|mXZ%pURC}WGiShuUwX3mdDwrXk)b(xbblwe1<HbBlKEanjpWy_Zeu=3gMQ`Z
z!!yd_MT<|RK9G@L%A7$Sw(KE##Vx-v%3GfoUU_`@HTh%58{{4GFAwtX%#U9u;rkWk
zANaKg#xkgX;`~EzI_n4Z3tW(Y=;~y<Z^;GuhyJMNKr?bd{-J9IO;?c%@(;aXpb5)=
zS3dvHA6qR8V)y6s54|QMHH%!3f9P$mlA)i!&A$%y?@}Jrzu0^eclv$v^)C@zuJQ7Y
zB<Jhj9q?$=_51pe^Yt$uJ?S4QCgg(phaNV*W(B#R{-MXjWo{+s>)#Ue_4UsbES|4_
z=nB8}`^owGmx69%l?~r7)PF(!1DEHj_4CQK^{+$!J&_0fH&e$8>VvTV&H|Svjn?re
z7xZ6nmz_V1jUpHHUv#?<DzA|X`Y*a{lYt$%p#P$Ko}I(``v$K68j;5*!25yc=lgFx
zx>rKz$K-tfO+^2*`=p8Fg8mCGZHdo>-zS`}?Y|x7Kb<_B|K=YZMe_|m|D}Tm$^v43
zA?N2mbbtG(?fT^Pp;D|5CFO7DKlDl$>tgb^^Pf4J-+~>`kJrC?!knM~&@=QM*D&Yi
zzdh*RCHvTt3+6xc?37+j<ox^>jqdNM5A_e`U%~tb9#D1U++lJy|NS@f{hRs!%{~D2
zm-d0q^STARua4*8{P~}*JAZzn_VXR*`;PH%KL5sDw4eW9**|ohF#i<!?EB^O?+kdL
zY|g7ndjEX>WrF+LFETtr-qrrGyzV^t+x+|E{&6Kx!ko`PJU>J4D%3~F4>gy6Z2vfB
z8%-|AKa9`b<qGo+#Pj*b_K#8HKOq<7AH+lcnfD>@YX2C0;7|1XwiWLmNxEy7k_+me
z6V#8LV~)do4fR1#|IqD+4Tb#<Tu}eeWkW0`kPGS`x@WE%+aLJ)$M%m#S5}|+`bYbR
zm!#h&I-jq9+WkXznp{x-;QrE<uYdhJIlq5ga>D(i&(smrU-<sZ_K%4UY(D4vFWW!-
zl6zA;-+$Tu(P|Ix5Bi&+|HA#F!`?H8{TN)(f6-%h*ueY&&i7xof2i-AVDWta#dyVx
z+I{4F|E2xIM&f2qF6h4yFVDRzPbKea{|M~qgY%7O{&Rx)r}oZQ@b`)5KlG+OQ{ej%
z&wuFZJFyyae*W`7*VOFIB^S(p<o)%azQFlzUF;tt=eV+XZvL~x_{J84+wA`Q{D)q%
zCJyo+&KJyo=xr|ot;t3E$CK90!IGy&G+*=k$35j`qb;#VDxjah{4dx)9P%gcu;1Ns
zh+MFLIGk+l9p(P=DY;<(a8RVKoY$@DcXGl0;ZU&uwXW0abaK)Dk&+XpoAna<D?Fcy
z-#;8w6MLq&`$K+$i}sJAf|S-CtGbi(`^Q*^3qJ?FyXuJrc~|>~YO)o+Us3+$!TWz8
z@mxUfU$Fn8H)U*w`B<EP=<3~r`jYec=Z>y<miYm>Apgkwnr|c*<R5y2epD!n=kjkP
zdSiPO^lNxN4?h3UYw|}dCl}-&dRtvv1GzT;xc%e8rFHcC=IbBZKOEDhk@NMhDi7+5
z=@-YwlJoVC?H@(^q5q2OAI67S{{qh^u7Bt;F&{$z64pPqe>~{>h{f~u591ZHS6h<v
z^^ffzBlWtG3+f-l%X2G_!tWFA-^KoM{&oIw>Mwl%W&4MD{6KO+|INexo3qf2T+n~f
z?Z!k6CKvQybXjOE<PY>`LH|YfOq&AVFM1dIhvLXBa=!nv{UgqJ2sz(>+5X{mALf5J
zU(kQy{?eAMdp{-bYX6v91NA}p|7W&;EKY{~7@VK~*!~fbSwP;^{^4i|`ybA4|Nj5;
zANP-1-xTKj{FjIGU(fe9Fz4n!wtpO1%l0FF{<FgP?87f6Q#?QavHfGlbvZdd|J6c#
PKvlx{d*uHI`^UckpxjUK

literal 0
HcmV?d00001


From 805fe78e79a8894810ad102692aced56c4202f41 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Mon, 9 May 2022 10:32:15 -0600
Subject: [PATCH 40/59] pc - use refcounting in fallback clone creation

---
 interface/ceed-operator.c        |  7 +---
 interface/ceed-preconditioning.c | 62 ++++++++++++++++++++------------
 2 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index a715e8f5d3..dc67767b39 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -1693,12 +1693,7 @@ int CeedOperatorDestroy(CeedOperator *op) {
   ierr = CeedFree(&(*op)->context_labels); CeedChk(ierr);
 
   // Destroy fallback
-  if ((*op)->op_fallback) {
-    ierr = (*op)->qf_fallback->Destroy((*op)->qf_fallback); CeedChk(ierr);
-    ierr = CeedFree(&(*op)->qf_fallback); CeedChk(ierr);
-    ierr = (*op)->op_fallback->Destroy((*op)->op_fallback); CeedChk(ierr);
-    ierr = CeedFree(&(*op)->op_fallback); CeedChk(ierr);
-  }
+  ierr = CeedOperatorDestroy(&(*op)->op_fallback); CeedChk(ierr);
 
   // Destroy QF assembly cache
   ierr = CeedQFunctionAssemblyDataDestroy(&(*op)->qf_assembled); CeedChk(ierr);
diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index f99ef82305..6367d70ba9 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -35,6 +35,9 @@
 int CeedOperatorCreateFallback(CeedOperator op) {
   int ierr;
 
+  // Check not already created
+  if (op->op_fallback) return CEED_ERROR_SUCCESS;
+
   // Fallback Ceed
   const char *resource, *fallback_resource;
   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
@@ -48,37 +51,49 @@ int CeedOperatorCreateFallback(CeedOperator op) {
   // LCOV_EXCL_STOP
 
   // Fallback Ceed
-  Ceed ceed_ref;
   if (!op->ceed->op_fallback_ceed) {
+    Ceed ceed_ref;
     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
     ceed_ref->op_fallback_parent = op->ceed;
     ceed_ref->Error = op->ceed->Error;
     op->ceed->op_fallback_ceed = ceed_ref;
   }
-  ceed_ref = op->ceed->op_fallback_ceed;
 
   // Clone Op
-  CeedOperator op_ref;
-  ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
-  memcpy(op_ref, op, sizeof(*op_ref));
-  op_ref->data = NULL;
-  op_ref->is_interface_setup = false;
-  op_ref->is_backend_setup = false;
-  op_ref->ceed = ceed_ref;
-  ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
-  ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
-         &op_ref->qf_assembled); CeedChk(ierr);
-  op->op_fallback = op_ref;
-
-  // Clone QF
-  CeedQFunction qf_ref;
-  ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
-  memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
-  qf_ref->data = NULL;
-  qf_ref->ceed = ceed_ref;
-  ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
-  op_ref->qf = qf_ref;
-  op->qf_fallback = qf_ref;
+  CeedOperator op_fallback;
+  if (op->is_composite) {
+    ierr = CeedCompositeOperatorCreate(op->ceed->op_fallback_ceed, &op_fallback);
+    CeedChk(ierr);
+    for (CeedInt i = 0; i < op->num_suboperators; i++) {
+      ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]);
+      CeedChk(ierr);
+    }
+  } else {
+    ierr = CeedOperatorCreate(op->ceed->op_fallback_ceed, op->qf, op->dqf, op->dqfT,
+                              &op_fallback); CeedChk(ierr);
+    for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
+      ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
+                                  op->input_fields[i]->elem_restr,
+                                  op->input_fields[i]->basis,
+                                  op->input_fields[i]->vec); CeedChk(ierr);
+    }
+    for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
+      ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name,
+                                  op->output_fields[i]->elem_restr,
+                                  op->output_fields[i]->basis,
+                                  op->output_fields[i]->vec); CeedChk(ierr);
+    }
+    ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
+           &op_fallback->qf_assembled); CeedChk(ierr);
+    if (op_fallback->num_qpts == 0) {
+      ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts);
+      CeedChk(ierr);
+    }
+  }
+  ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr);
+  ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr);
+  op->op_fallback = op_fallback;
+
   return CEED_ERROR_SUCCESS;
 }
 
@@ -985,6 +1000,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
+
   return CEED_ERROR_SUCCESS;
 }
 

From 841e4c7362a2acf3a6f116f4961b1eb52fa410fc Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Mon, 9 May 2022 17:02:12 -0600
Subject: [PATCH 41/59] examples/fluids: consolidate two-step setup

Contexts are created in one place now and we have removed duplicate
struct definitions for physics.
---
 examples/fluids/channel.yaml                  |   1 +
 examples/fluids/navierstokes.c                |  15 +-
 examples/fluids/navierstokes.h                | 199 ++----------------
 examples/fluids/problems/advection.c          |  75 ++++---
 examples/fluids/problems/advection2d.c        |  70 +++---
 examples/fluids/problems/blasius.c            |  89 +++-----
 examples/fluids/problems/channel.c            |  90 ++++----
 examples/fluids/problems/densitycurrent.c     |  25 ---
 examples/fluids/problems/eulervortex.c        |  92 ++++----
 examples/fluids/problems/newtonian.c          | 111 +++++-----
 examples/fluids/problems/shocktube.c          |  54 ++---
 examples/fluids/qfunctions/advection.h        |   3 -
 examples/fluids/qfunctions/advection2d.h      |   3 -
 examples/fluids/qfunctions/blasius.h          |   5 +-
 examples/fluids/qfunctions/channel.h          |   5 +-
 examples/fluids/qfunctions/eulervortex.h      |   3 -
 examples/fluids/qfunctions/newtonian.h        |  28 +--
 examples/fluids/qfunctions/newtonian_types.h  |  25 +++
 examples/fluids/qfunctions/shocktube.h        |   3 -
 .../fluids/qfunctions/stabilization_types.h   |  10 +
 examples/fluids/src/misc.c                    |  35 +--
 examples/fluids/src/setuplibceed.c            |  30 ++-
 22 files changed, 371 insertions(+), 600 deletions(-)
 create mode 100644 examples/fluids/qfunctions/newtonian_types.h
 create mode 100644 examples/fluids/qfunctions/stabilization_types.h

diff --git a/examples/fluids/channel.yaml b/examples/fluids/channel.yaml
index 8e2cf6d6bb..0c7e89d7f5 100644
--- a/examples/fluids/channel.yaml
+++ b/examples/fluids/channel.yaml
@@ -1,4 +1,5 @@
 problem: 'channel'
+mu: .01
 
 umax: 40
 implicit: true
diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index a10747402d..84ec0fdfcb 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -167,7 +167,7 @@ int main(int argc, char **argv) {
   ierr = DMGetLocalVector(dm, &Q_loc); CHKERRQ(ierr);
 
   // -- Fix multiplicity for ICs
-  ierr = ICs_FixMultiplicity(dm, ceed_data, Q_loc, Q, 0.0); CHKERRQ(ierr);
+  ierr = ICs_FixMultiplicity(dm, ceed_data, user, Q_loc, Q, 0.0); CHKERRQ(ierr);
 
   // ---------------------------------------------------------------------------
   // Set up lumped mass matrix
@@ -220,7 +220,7 @@ int main(int argc, char **argv) {
                        host_name, comm_size); CHKERRQ(ierr);
 
     // Problem specific info
-    ierr = problem->print_info(phys_ctx, setup_ctx, app_ctx); CHKERRQ(ierr);
+    ierr = problem->print_info(problem, setup_ctx, app_ctx); CHKERRQ(ierr);
 
     // libCEED
     const char *used_resource;
@@ -279,7 +279,7 @@ int main(int argc, char **argv) {
   // ---------------------------------------------------------------------------
   // Post-processing
   // ---------------------------------------------------------------------------
-  ierr = PostProcess_NS(ts, ceed_data, dm, problem, app_ctx, Q, final_time);
+  ierr = PostProcess_NS(ts, ceed_data, dm, problem, user, Q, final_time);
   CHKERRQ(ierr);
 
   // ---------------------------------------------------------------------------
@@ -294,9 +294,6 @@ int main(int argc, char **argv) {
 
   // -- Contexts
   CeedQFunctionContextDestroy(&ceed_data->setup_context);
-  CeedQFunctionContextDestroy(&ceed_data->newt_ig_context);
-  CeedQFunctionContextDestroy(&ceed_data->advection_context);
-  CeedQFunctionContextDestroy(&ceed_data->euler_context);
 
   // -- QFunctions
   CeedQFunctionDestroy(&ceed_data->qf_setup_vol);
@@ -356,12 +353,6 @@ int main(int argc, char **argv) {
   ierr = PetscFree(problem); CHKERRQ(ierr);
   ierr = PetscFree(bc); CHKERRQ(ierr);
   ierr = PetscFree(setup_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->newtonian_ig_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->euler_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->shocktube_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->advection_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->channel_ctx); CHKERRQ(ierr);
-  ierr = PetscFree(phys_ctx->blasius_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx); CHKERRQ(ierr);
   ierr = PetscFree(app_ctx); CHKERRQ(ierr);
   ierr = PetscFree(ceed_data); CHKERRQ(ierr);
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index d53a89f8ab..8b92c8e5b7 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -14,6 +14,7 @@
 #include <petscsys.h>
 #include <petscts.h>
 #include <stdbool.h>
+#include "qfunctions/stabilization_types.h"
 
 // -----------------------------------------------------------------------------
 // PETSc Version
@@ -85,11 +86,6 @@ static const char *const EulerTestTypes[] = {
 };
 
 // Stabilization methods
-typedef enum {
-  STAB_NONE = 0,
-  STAB_SU   = 1, // Streamline Upwind
-  STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
-} StabilizationType;
 static const char *const StabilizationTypes[] = {
   "none",
   "SU",
@@ -131,8 +127,7 @@ struct AppCtx_private {
 // libCEED data struct
 struct CeedData_private {
   CeedVector           x_coord, q_data;
-  CeedQFunctionContext setup_context, newt_ig_context, advection_context,
-                       euler_context, shocktube_context, channel_context, blasius_context;
+  CeedQFunctionContext setup_context;
   CeedQFunction        qf_setup_vol, qf_ics, qf_rhs_vol, qf_ifunction_vol,
                        qf_setup_sur, qf_apply_inflow, qf_apply_outflow;
   CeedBasis            basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur;
@@ -213,124 +208,8 @@ struct SetupContext_ {
 };
 #endif
 
-// DENSITY_CURRENT
-#ifndef dc_context_struct
-#define dc_context_struct
-typedef struct DCContext_ *DCContext;
-struct DCContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g;
-  CeedScalar c_tau;
-  int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
-};
-#endif
-
-// EULER_VORTEX
-#ifndef euler_context_struct
-#define euler_context_struct
-typedef struct EulerContext_ *EulerContext;
-struct EulerContext_ {
-  CeedScalar center[3];
-  CeedScalar curr_time;
-  CeedScalar vortex_strength;
-  CeedScalar c_tau;
-  CeedScalar mean_velocity[3];
-  bool implicit;
-  int euler_test;
-  int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
-};
-#endif
-
-// SHOCKTUBE
-#ifndef shocktube_context_struct
-#define shocktube_context_struct
-typedef struct ShockTubeContext_ *ShockTubeContext;
-struct ShockTubeContext_ {
-  CeedScalar Cyzb;
-  CeedScalar Byzb;
-  CeedScalar c_tau;
-  bool implicit;
-  bool yzb;
-  int stabilization;
-};
-#endif
-
-// ADVECTION and ADVECTION2D
-#ifndef advection_context_struct
-#define advection_context_struct
-typedef struct AdvectionContext_ *AdvectionContext;
-struct AdvectionContext_ {
-  CeedScalar CtauS;
-  CeedScalar strong_form;
-  CeedScalar E_wind;
-  bool implicit;
-  int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
-};
-#endif
-
-// Newtonian Ideal Gas
-#ifndef newtonian_context_struct
-#define newtonian_context_struct
-typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
-struct NewtonianIdealGasContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar c_tau;
-  CeedScalar Ctau_t;
-  CeedScalar Ctau_v;
-  CeedScalar Ctau_C;
-  CeedScalar Ctau_M;
-  CeedScalar Ctau_E;
-  CeedScalar dt;
-  StabilizationType stabilization;
-};
-#endif
-
-#ifndef channel_context_struct
-#define channel_context_struct
-typedef struct ChannelContext_ *ChannelContext;
-struct ChannelContext_ {
-  bool       implicit; // !< Using implicit timesteping or not
-  CeedScalar theta0;   // !< Reference temperature
-  CeedScalar P0;       // !< Reference Pressure
-  CeedScalar umax;     // !< Centerline velocity
-  CeedScalar center;   // !< Y Coordinate for center of channel
-  CeedScalar H;        // !< Channel half-height
-  CeedScalar B;        // !< Body-force driving the flow
-  struct NewtonianIdealGasContext_ newtonian_ctx;
-};
-#endif
-
-#ifndef blasius_context_struct
-#define blasius_context_struct
-typedef struct BlasiusContext_ *BlasiusContext;
-struct BlasiusContext_ {
-  bool       implicit;  // !< Using implicit timesteping or not
-  bool       weakT;     // !< flag to set Temperature at inflow
-  CeedScalar delta0;    // !< Boundary layer height at inflow
-  CeedScalar Uinf;      // !< Velocity at boundary layer edge
-  CeedScalar P0;        // !< Pressure at outflow
-  CeedScalar theta0;    // !< Temperature at inflow
-  struct NewtonianIdealGasContext_ newtonian_ctx;
-};
-#endif
-
 // Struct that contains all enums and structs used for the physics of all problems
 struct Physics_private {
-  BlasiusContext           blasius_ctx;
-  ChannelContext           channel_ctx;
-  NewtonianIdealGasContext newtonian_ig_ctx;
-  EulerContext             euler_ctx;
-  ShockTubeContext         shocktube_ctx;
-  AdvectionContext         advection_ctx;
   WindType                 wind_type;
   BubbleType               bubble_type;
   BubbleContinuityType     bubble_continuity_type;
@@ -341,16 +220,19 @@ struct Physics_private {
   PetscBool                has_neumann;
   CeedContextFieldLabel    solution_time_label;
   CeedContextFieldLabel    timestep_size_label;
+  CeedContextFieldLabel    ics_time_label;
 };
 
 typedef struct {
   CeedQFunctionUser    qfunction;
   const char           *qfunction_loc;
+  CeedQFunctionContext qfunction_context;
 } ProblemQFunctionSpec;
 
 // Problem specific data
 // *INDENT-OFF*
-typedef struct {
+typedef struct ProblemData_private ProblemData;
+struct ProblemData_private {
   CeedInt           dim, q_data_size_vol, q_data_size_sur;
   CeedScalar        dm_scale;
   ProblemQFunctionSpec setup_vol, setup_sur, ics, apply_vol_rhs, apply_vol_ifunction,
@@ -358,11 +240,12 @@ typedef struct {
   bool              non_zero_time;
   PetscErrorCode    (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
                           PetscScalar[], void *);
-  PetscErrorCode    (*setup_ctx)(Ceed, CeedData, AppCtx, SetupContext, Physics);
-  PetscErrorCode    (*print_info)(Physics, SetupContext, AppCtx);
-} ProblemData;
+  PetscErrorCode    (*print_info)(ProblemData*, SetupContext, AppCtx);
+};
 // *INDENT-ON*
 
+extern int FreeContextPetsc(void *);
+
 // -----------------------------------------------------------------------------
 // Set up problems
 // -----------------------------------------------------------------------------
@@ -386,61 +269,24 @@ extern PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
 extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm,
                                      void *setup_ctx, void *ctx);
 
-// Set up context for each problem
-extern PetscErrorCode SetupContext_CHANNEL(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_BLASIUS(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_NEWTONIAN_IG(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_DENSITY_CURRENT(Ceed ceed,
-    CeedData ceed_data, AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_EULER_VORTEX(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_SHOCKTUBE(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_ADVECTION(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-extern PetscErrorCode SetupContext_ADVECTION2D(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys);
-
-// Boundary condition function for each problem
-extern PetscErrorCode BC_DENSITY_CURRENT(DM dm, SimpleBC bc, Physics phys,
-    void *setup_ctx);
-
-extern PetscErrorCode BC_EULER_VORTEX(DM dm, SimpleBC bc, Physics phys,
-                                      void *setup_ctx);
-
-extern PetscErrorCode BC_SHOCKTUBE(DM dm, SimpleBC bc, Physics phys,
-                                   void *setup_ctx);
-
-extern PetscErrorCode BC_ADVECTION(DM dm, SimpleBC bc, Physics phys,
-                                   void *setup_ctx);
-
-extern PetscErrorCode BC_ADVECTION2D(DM dm, SimpleBC bc, Physics phys,
-                                     void *setup_ctx);
-
 // Print function for each problem
-extern PetscErrorCode PRINT_DENSITY_CURRENT(Physics phys,
+extern PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
     SetupContext setup_ctx, AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_EULER_VORTEX(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem,
+    SetupContext setup_ctx,
     AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_SHOCKTUBE(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem,
+                                      SetupContext setup_ctx,
                                       AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_ADVECTION(ProblemData *problem,
+                                      SetupContext setup_ctx,
                                       AppCtx app_ctx);
 
-extern PetscErrorCode PRINT_ADVECTION2D(Physics phys, SetupContext setup_ctx,
+extern PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem,
+                                        SetupContext setup_ctx,
                                         AppCtx app_ctx);
 
 // -----------------------------------------------------------------------------
@@ -520,7 +366,8 @@ PetscErrorCode ProcessCommandLineOptions(MPI_Comm comm, AppCtx app_ctx,
 // -----------------------------------------------------------------------------
 // Miscellaneous utility functions
 // -----------------------------------------------------------------------------
-PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, Vec Q_loc, Vec Q,
+PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user,
+                                   Vec Q_loc, Vec Q,
                                    CeedScalar time);
 
 PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm,
@@ -531,12 +378,12 @@ PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm,
 PetscErrorCode RegressionTests_NS(AppCtx app_ctx, Vec Q);
 
 // Get error for problems with exact solutions
-PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
+PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, User user, Vec Q,
                            PetscScalar final_time);
 
 // Post-processing
 PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
-                              ProblemData *problem, AppCtx app_ctx,
+                              ProblemData *problem, User user,
                               Vec Q, PetscScalar final_time);
 
 // -- Gather initial Q values in case of continuation of simulation
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index 51a77930eb..bed865d370 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -24,9 +24,11 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   PetscBool            implicit;
   PetscBool            has_curr_time = PETSC_FALSE;
   PetscInt             ierr;
-  PetscFunctionBeginUser;
+  AdvectionContext     advection_ctx;
+  CeedQFunctionContext advection_context;
 
-  ierr = PetscCalloc1(1, &user->phys->advection_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &advection_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP ADVECTION
@@ -47,7 +49,6 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_inflow.qfunction            = Advection_InOutFlow;
   problem->apply_inflow.qfunction_loc        = Advection_InOutFlow_loc;
   problem->bc                                = Exact_Advection;
-  problem->setup_ctx                         = SetupContext_ADVECTION;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_ADVECTION;
 
@@ -193,43 +194,39 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   //  if passed correctly
   user->phys->implicit                     = implicit;
   user->phys->has_curr_time                = has_curr_time;
-  user->phys->advection_ctx->CtauS         = CtauS;
-  user->phys->advection_ctx->E_wind        = E_wind;
-  user->phys->advection_ctx->implicit      = implicit;
-  user->phys->advection_ctx->strong_form   = strong_form;
-  user->phys->advection_ctx->stabilization = stab;
-
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_ADVECTION(Ceed ceed, CeedData ceed_data,
-                                      AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-  CeedQFunctionContextCreate(ceed, &ceed_data->advection_context);
-  CeedQFunctionContextSetData(ceed_data->advection_context, CEED_MEM_HOST,
+  advection_ctx->CtauS         = CtauS;
+  advection_ctx->E_wind        = E_wind;
+  advection_ctx->implicit      = implicit;
+  advection_ctx->strong_form   = strong_form;
+  advection_ctx->stabilization = stab;
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+
+  CeedQFunctionContextCreate(user->ceed, &advection_context);
+  CeedQFunctionContextSetData(advection_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*phys->advection_ctx), phys->advection_ctx);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->advection_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->advection_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow,
-                            ceed_data->advection_context);
+                              sizeof(*advection_ctx), advection_ctx);
+  CeedQFunctionContextSetDataDestroy(advection_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  problem->apply_vol_rhs.qfunction_context = advection_context;
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_inflow.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
+PetscErrorCode PRINT_ADVECTION(ProblemData *problem, SetupContext setup_ctx,
                                AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
-  PetscFunctionBeginUser;
+  AdvectionContext advection_ctx;
 
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &advection_ctx);
   ierr = PetscPrintf(comm,
                      "  Problem:\n"
                      "    Problem Name                       : %s\n"
@@ -237,16 +234,18 @@ PetscErrorCode PRINT_ADVECTION(Physics phys, SetupContext setup_ctx,
                      "    Bubble Type                        : %s (%dD)\n"
                      "    Bubble Continuity                  : %s\n"
                      "    Wind Type                          : %s\n",
-                     app_ctx->problem_name, StabilizationTypes[phys->stab],
-                     BubbleTypes[phys->bubble_type],
-                     phys->bubble_type == BUBBLE_SPHERE ? 3 : 2,
-                     BubbleContinuityTypes[phys->bubble_continuity_type],
-                     WindTypes[phys->wind_type]); CHKERRQ(ierr);
+                     app_ctx->problem_name, StabilizationTypes[advection_ctx->stabilization],
+                     BubbleTypes[setup_ctx->bubble_type],
+                     setup_ctx->bubble_type == BUBBLE_SPHERE ? 3 : 2,
+                     BubbleContinuityTypes[setup_ctx->bubble_continuity_type],
+                     WindTypes[setup_ctx->wind_type]); CHKERRQ(ierr);
 
-  if (phys->wind_type == WIND_TRANSLATION) {
+  if (setup_ctx->wind_type == WIND_TRANSLATION) {
     ierr = PetscPrintf(comm,
                        "    Background Wind                    : %f,%f,%f\n",
                        setup_ctx->wind[0], setup_ctx->wind[1], setup_ctx->wind[2]); CHKERRQ(ierr);
   }
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &advection_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index 6ae6b1550b..632bb522f9 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -22,9 +22,12 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   PetscBool         implicit;
   PetscBool         has_curr_time = PETSC_FALSE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  AdvectionContext     advection_ctx;
+  CeedQFunctionContext advection_context;
+
 
-  ierr = PetscCalloc1(1, &user->phys->advection_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &advection_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP ADVECTION2D
@@ -45,7 +48,6 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_inflow.qfunction            = Advection2d_InOutFlow;
   problem->apply_inflow.qfunction_loc        = Advection2d_InOutFlow_loc;
   problem->bc                                = Exact_Advection2d;
-  problem->setup_ctx                         = SetupContext_ADVECTION2D;
   problem->non_zero_time                     = PETSC_TRUE;
   problem->print_info                        = PRINT_ADVECTION2D;
 
@@ -168,55 +170,53 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   user->phys->wind_type                    = wind_type;
   user->phys->implicit                     = implicit;
   user->phys->has_curr_time                = has_curr_time;
-  user->phys->advection_ctx->CtauS         = CtauS;
-  user->phys->advection_ctx->E_wind        = E_wind;
-  user->phys->advection_ctx->implicit      = implicit;
-  user->phys->advection_ctx->strong_form   = strong_form;
-  user->phys->advection_ctx->stabilization = stab;
-
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_ADVECTION2D(Ceed ceed, CeedData ceed_data,
-                                        AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-  CeedQFunctionContextCreate(ceed, &ceed_data->advection_context);
-  CeedQFunctionContextSetData(ceed_data->advection_context, CEED_MEM_HOST,
+  advection_ctx->CtauS         = CtauS;
+  advection_ctx->E_wind        = E_wind;
+  advection_ctx->implicit      = implicit;
+  advection_ctx->strong_form   = strong_form;
+  advection_ctx->stabilization = stab;
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+
+  CeedQFunctionContextCreate(user->ceed, &advection_context);
+  CeedQFunctionContextSetData(advection_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*phys->advection_ctx), phys->advection_ctx);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->advection_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->advection_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow,
-                            ceed_data->advection_context);
+                              sizeof(*advection_ctx), advection_ctx);
+  CeedQFunctionContextSetDataDestroy(advection_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  problem->apply_vol_rhs.qfunction_context = advection_context;
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
+  CeedQFunctionContextReferenceCopy(advection_context,
+                                    &problem->apply_inflow.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_ADVECTION2D(Physics phys, SetupContext setup_ctx,
+PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem, SetupContext setup_ctx,
                                  AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
-  PetscFunctionBeginUser;
+  AdvectionContext advection_ctx;
 
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &advection_ctx);
   ierr = PetscPrintf(comm,
                      "  Problem:\n"
                      "    Problem Name                       : %s\n"
                      "    Stabilization                      : %s\n"
                      "    Wind Type                          : %s\n",
-                     app_ctx->problem_name, StabilizationTypes[phys->stab],
-                     WindTypes[phys->wind_type]); CHKERRQ(ierr);
+                     app_ctx->problem_name, StabilizationTypes[advection_ctx->stabilization],
+                     WindTypes[setup_ctx->wind_type]); CHKERRQ(ierr);
 
-  if (phys->wind_type == WIND_TRANSLATION) {
+  if (setup_ctx->wind_type == WIND_TRANSLATION) {
     ierr = PetscPrintf(comm,
                        "    Background Wind                    : %f,%f\n",
                        setup_ctx->wind[0], setup_ctx->wind[1]); CHKERRQ(ierr);
   }
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &advection_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index e3e1498c8f..ee37cb0036 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -11,24 +11,6 @@
 #include "../navierstokes.h"
 #include "../qfunctions/blasius.h"
 
-#ifndef blasius_context_struct
-#define blasius_context_struct
-typedef struct BlasiusContext_ *BlasiusContext;
-struct BlasiusContext_ {
-  bool       implicit;  // !< Using implicit timesteping or not
-  CeedScalar delta0;    // !< Boundary layer height at inflow
-  CeedScalar Uinf;      // !< Velocity at boundary layer edge
-  CeedScalar P0;        // !< Pressure at outflow
-  CeedScalar theta0;    // !< Temperature at inflow
-  CeedInt weakT;        // !< flag to weakly set Temperature at inflow if not set weak rho instead
-  struct NewtonianIdealGasContext_ newtonian_ctx;
-};
-#endif
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846
-#endif
-
 /* \brief Modify the domain and mesh for blasius
  *
  * Modifies mesh such that `N` elements are within 1.2*`delta0` with a geometric
@@ -95,25 +77,28 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
                           void *ctx) {
 
   PetscInt ierr;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
   User              user = *(User *)ctx;
   MPI_Comm          comm = PETSC_COMM_WORLD;
+  BlasiusContext    blasius_ctx;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  CeedQFunctionContext blasius_context;
+
   PetscFunctionBeginUser;
-  ierr = PetscCalloc1(1, &user->phys->blasius_ctx); CHKERRQ(ierr);
+  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &blasius_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP Blasius
   // ------------------------------------------------------
+  CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
   problem->ics.qfunction               = ICsBlasius;
   problem->ics.qfunction_loc           = ICsBlasius_loc;
   problem->apply_inflow.qfunction      = Blasius_Inflow;
   problem->apply_inflow.qfunction_loc  = Blasius_Inflow_loc;
   problem->apply_outflow.qfunction     = Blasius_Outflow;
   problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc;
-  problem->setup_ctx                   = SetupContext_BLASIUS;
 
   // CeedScalar mu = .04; // Pa s, dynamic viscosity
-  CeedScalar mu            = 1.8e-5;   // Pa s, dynamic viscosity
   CeedScalar Uinf          = 40;   // m/s
   CeedScalar delta0        = 4.2e-4;    // m
   PetscReal  refine_height = 5.9e-4;    // m
@@ -153,7 +138,6 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
   PetscScalar Kelvin          = user->units->Kelvin;
   PetscScalar Pascal          = user->units->Pascal;
 
-  mu     *= Pascal * second;
   theta0 *= Kelvin;
   P0     *= Pascal;
   Uinf   *= meter / second;
@@ -162,40 +146,31 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
   ierr = modifyMesh(dm, problem->dim, growth, Ndelta, refine_height, top_angle);
   CHKERRQ(ierr);
 
-  user->phys->blasius_ctx->weakT     = !!weakT;
-  user->phys->blasius_ctx->Uinf      = Uinf;
-  user->phys->blasius_ctx->delta0    = delta0;
-  user->phys->blasius_ctx->theta0    = theta0;
-  user->phys->blasius_ctx->P0        = P0;
-  user->phys->blasius_ctx->implicit  = user->phys->implicit;
-
-  user->phys->newtonian_ig_ctx->mu = mu;
-  user->phys->blasius_ctx->newtonian_ctx = *user->phys->newtonian_ig_ctx;
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_BLASIUS(Ceed ceed, CeedData ceed_data,
-                                    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  PetscInt ierr;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
+  // Some properties depend on parameters from NewtonianIdealGas
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ig_ctx);
+
+  blasius_ctx->weakT     = !!weakT;
+  blasius_ctx->Uinf      = Uinf;
+  blasius_ctx->delta0    = delta0;
+  blasius_ctx->theta0    = theta0;
+  blasius_ctx->P0        = P0;
+  blasius_ctx->implicit  = user->phys->implicit;
+  blasius_ctx->newtonian_ctx = *newtonian_ig_ctx;
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ig_ctx);
+
+  CeedQFunctionContextCreate(user->ceed, &blasius_context);
+  CeedQFunctionContextSetData(blasius_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*setup_ctx), setup_ctx);
-  ierr = SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx, phys);
-  CHKERRQ(ierr);
-
-  CeedQFunctionContextCreate(ceed, &ceed_data->blasius_context);
-  CeedQFunctionContextSetData(ceed_data->blasius_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*phys->blasius_ctx), phys->blasius_ctx);
-  phys->has_neumann = PETSC_TRUE;
-  if (ceed_data->qf_ics)
-    CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->blasius_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow, ceed_data->blasius_context);
-  if (ceed_data->qf_apply_outflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_outflow,
-                            ceed_data->blasius_context);
+                              sizeof(*blasius_ctx), blasius_ctx);
+  CeedQFunctionContextSetDataDestroy(blasius_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+
+  problem->ics.qfunction_context = blasius_context;
+  CeedQFunctionContextReferenceCopy(blasius_context,
+                                    &problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextReferenceCopy(blasius_context,
+                                    &problem->apply_outflow.qfunction_context);
   PetscFunctionReturn(0);
 }
-
diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c
index 5b184beb66..2e1c5b64cb 100644
--- a/examples/fluids/problems/channel.c
+++ b/examples/fluids/problems/channel.c
@@ -11,46 +11,33 @@
 #include "../navierstokes.h"
 #include "../qfunctions/channel.h"
 
-#ifndef channel_context_struct
-#define channel_context_struct
-typedef struct ChannelContext_ *ChannelContext;
-struct ChannelContext_ {
-  bool       implicit; // !< Using implicit timesteping or not
-  CeedScalar theta0;   // !< Reference temperature
-  CeedScalar P0;       // !< Reference Pressure
-  CeedScalar umax;     // !< Centerline velocity
-  CeedScalar center;   // !< Y Coordinate for center of channel
-  CeedScalar H;        // !< Channel half-height
-  CeedScalar B;        // !< Body-force driving the flow
-  struct NewtonianIdealGasContext_ newtonian_ctx;
-};
-#endif
-
 PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
                           void *ctx) {
 
   PetscInt ierr;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
   User              user = *(User *)ctx;
   MPI_Comm          comm = PETSC_COMM_WORLD;
+  ChannelContext    channel_ctx;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  CeedQFunctionContext channel_context;
+
   PetscFunctionBeginUser;
-  ierr = PetscCalloc1(1, &user->phys->channel_ctx); CHKERRQ(ierr);
+  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &channel_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP Channel
   // ------------------------------------------------------
+  CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
   problem->ics.qfunction               = ICsChannel;
   problem->ics.qfunction_loc           = ICsChannel_loc;
   problem->apply_inflow.qfunction      = Channel_Inflow;
   problem->apply_inflow.qfunction_loc  = Channel_Inflow_loc;
   problem->apply_outflow.qfunction     = Channel_Outflow;
   problem->apply_outflow.qfunction_loc = Channel_Outflow_loc;
-  problem->setup_ctx                   = SetupContext_CHANNEL;
 
   // -- Command Line Options
   CeedScalar umax   = 10.;  // m/s
-  CeedScalar mu     = .01;  // Pa s, dynamic viscosity
-  //TODO ^^ make optional/respect explicit user set
   CeedScalar theta0 = 300.; // K
   CeedScalar P0     = 1.e5; // Pa
   PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
@@ -67,7 +54,6 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
   PetscScalar Kelvin = user->units->Kelvin;
   PetscScalar Pascal = user->units->Pascal;
 
-  mu     *= Pascal * second;
   theta0 *= Kelvin;
   P0     *= Pascal;
   umax   *= meter / second;
@@ -83,46 +69,42 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
     center = H + domain_min[1]*meter;
   }
 
-  user->phys->channel_ctx->center   = center;
-  user->phys->channel_ctx->H        = H;
-  user->phys->channel_ctx->theta0   = theta0;
-  user->phys->channel_ctx->P0       = P0;
-  user->phys->channel_ctx->umax     = umax;
-  user->phys->channel_ctx->implicit = user->phys->implicit;
-  user->phys->channel_ctx->B = -2*umax*mu/H;
+  // Some properties depend on parameters from NewtonianIdealGas
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ig_ctx);
+
+  channel_ctx->center   = center;
+  channel_ctx->H        = H;
+  channel_ctx->theta0   = theta0;
+  channel_ctx->P0       = P0;
+  channel_ctx->umax     = umax;
+  channel_ctx->implicit = user->phys->implicit;
+  channel_ctx->B = -2*umax*newtonian_ig_ctx->mu/H;
 
   {
     // Calculate Body force
-    CeedScalar cv  = user->phys->newtonian_ig_ctx->cv,
-               cp  = user->phys->newtonian_ig_ctx->cp;
+    CeedScalar cv  = newtonian_ig_ctx->cv,
+               cp  = newtonian_ig_ctx->cp;
     CeedScalar Rd  = cp - cv;
     CeedScalar rho = P0 / (Rd*theta0);
-    CeedScalar g[] = {user->phys->channel_ctx->B / rho, 0., 0.};
-    ierr = PetscArraycpy(user->phys->newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
+    CeedScalar g[] = {channel_ctx->B / rho, 0., 0.};
+    ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
   }
-  user->phys->newtonian_ig_ctx->mu = mu;
-  user->phys->channel_ctx->newtonian_ctx = *user->phys->newtonian_ig_ctx;
+  channel_ctx->newtonian_ctx = *newtonian_ig_ctx;
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ig_ctx);
 
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_CHANNEL(Ceed ceed, CeedData ceed_data,
-                                    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  PetscInt ierr;
-  ierr = SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx, phys);
-  CHKERRQ(ierr);
-  CeedQFunctionContextCreate(ceed, &ceed_data->channel_context);
-  CeedQFunctionContextSetData(ceed_data->channel_context, CEED_MEM_HOST,
+  CeedQFunctionContextCreate(user->ceed, &channel_context);
+  CeedQFunctionContextSetData(channel_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*phys->channel_ctx), phys->channel_ctx);
-  phys->has_neumann = PETSC_TRUE;
-  if (ceed_data->qf_ics)
-    CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->channel_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow, ceed_data->channel_context);
-  if (ceed_data->qf_apply_outflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_outflow,
-                            ceed_data->channel_context);
+                              sizeof(*channel_ctx), channel_ctx);
+  CeedQFunctionContextSetDataDestroy(channel_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+
+  problem->ics.qfunction_context = channel_context;
+  CeedQFunctionContextReferenceCopy(channel_context,
+                                    &problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextReferenceCopy(channel_context,
+                                    &problem->apply_outflow.qfunction_context);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 4ab836dd0c..2d66cf6d57 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -111,28 +111,3 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
 
   PetscFunctionReturn(0);
 }
-
-PetscErrorCode SetupContext_DENSITY_CURRENT(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  PetscInt ierr =
-    SetupContext_NEWTONIAN_IG(ceed, ceed_data, app_ctx, setup_ctx, phys);
-  CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode PRINT_DENSITY_CURRENT(Physics phys, SetupContext setup_ctx,
-                                     AppCtx app_ctx) {
-  MPI_Comm comm = PETSC_COMM_WORLD;
-  PetscErrorCode ierr;
-  PetscFunctionBeginUser;
-
-  ierr = PetscPrintf(comm,
-                     "  Problem:\n"
-                     "    Problem Name                       : %s\n"
-                     "    Stabilization                      : %s\n",
-                     app_ctx->problem_name, StabilizationTypes[phys->stab]);
-  CHKERRQ(ierr);
-
-  PetscFunctionReturn(0);
-}
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index 4c90ea1c6c..c54c921e15 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -23,9 +23,11 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   PetscBool         has_curr_time = PETSC_TRUE;
   PetscBool         has_neumann = PETSC_TRUE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  EulerContext      euler_ctx;
+  CeedQFunctionContext euler_context;
 
-  ierr = PetscCalloc1(1, &user->phys->euler_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &euler_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
@@ -48,7 +50,6 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_outflow.qfunction           = Euler_Outflow;
   problem->apply_outflow.qfunction_loc       = Euler_Outflow_loc;
   problem->bc                                = Exact_Euler;
-  problem->setup_ctx                         = SetupContext_EULER_VORTEX;
   problem->non_zero_time                     = PETSC_TRUE;
   problem->print_info                        = PRINT_EULER_VORTEX;
 
@@ -153,66 +154,59 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   user->phys->implicit                    = implicit;
   user->phys->has_curr_time               = has_curr_time;
   user->phys->has_neumann                 = has_neumann;
-  user->phys->euler_ctx->curr_time        = 0.;
-  user->phys->euler_ctx->implicit         = implicit;
-  user->phys->euler_ctx->euler_test       = euler_test;
-  user->phys->euler_ctx->center[0]        = center[0];
-  user->phys->euler_ctx->center[1]        = center[1];
-  user->phys->euler_ctx->center[2]        = center[2];
-  user->phys->euler_ctx->vortex_strength  = vortex_strength;
-  user->phys->euler_ctx->c_tau            = c_tau;
-  user->phys->euler_ctx->mean_velocity[0] = mean_velocity[0];
-  user->phys->euler_ctx->mean_velocity[1] = mean_velocity[1];
-  user->phys->euler_ctx->mean_velocity[2] = mean_velocity[2];
-  user->phys->euler_ctx->stabilization    = stab;
-
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_EULER_VORTEX(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER,
-                              sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionContextCreate(ceed, &ceed_data->euler_context);
-  CeedQFunctionContextSetData(ceed_data->euler_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER,
-                              sizeof(*phys->euler_ctx), phys->euler_ctx);
-  CeedQFunctionContextRegisterDouble(ceed_data->euler_context, "solution time",
+  euler_ctx->curr_time        = 0.;
+  euler_ctx->implicit         = implicit;
+  euler_ctx->euler_test       = euler_test;
+  euler_ctx->center[0]        = center[0];
+  euler_ctx->center[1]        = center[1];
+  euler_ctx->center[2]        = center[2];
+  euler_ctx->vortex_strength  = vortex_strength;
+  euler_ctx->c_tau            = c_tau;
+  euler_ctx->mean_velocity[0] = mean_velocity[0];
+  euler_ctx->mean_velocity[1] = mean_velocity[1];
+  euler_ctx->mean_velocity[2] = mean_velocity[2];
+  euler_ctx->stabilization    = stab;
+
+  CeedQFunctionContextCreate(user->ceed, &euler_context);
+  CeedQFunctionContextSetData(euler_context, CEED_MEM_HOST, CEED_USE_POINTER,
+                              sizeof(*euler_ctx), euler_ctx);
+  CeedQFunctionContextSetDataDestroy(euler_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(euler_context, "solution time",
                                      offsetof(struct EulerContext_, curr_time), 1, "Phyiscal time of the solution");
-
-  if (ceed_data->qf_ics)
-    CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->euler_context);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->euler_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol, ceed_data->euler_context);
-  if (ceed_data->qf_apply_inflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_inflow, ceed_data->euler_context);
-  if (ceed_data->qf_apply_outflow)
-    CeedQFunctionSetContext(ceed_data->qf_apply_outflow, ceed_data->euler_context);
+  problem->ics.qfunction_context = euler_context;
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_vol_rhs.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->apply_outflow.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_EULER_VORTEX(Physics phys, SetupContext setup_ctx,
+PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem, SetupContext setup_ctx,
                                   AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
-  PetscFunctionBeginUser;
+  EulerContext   euler_ctx;
 
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              &euler_ctx);
   ierr = PetscPrintf(comm,
                      "  Problem:\n"
                      "    Problem Name                       : %s\n"
                      "    Test Case                          : %s\n"
                      "    Background Velocity                : %f,%f,%f\n"
                      "    Stabilization                      : %s\n",
-                     app_ctx->problem_name, EulerTestTypes[phys->euler_test],
-                     phys->euler_ctx->mean_velocity[0],
-                     phys->euler_ctx->mean_velocity[1],
-                     phys->euler_ctx->mean_velocity[2],
-                     StabilizationTypes[phys->stab]); CHKERRQ(ierr);
+                     app_ctx->problem_name, EulerTestTypes[euler_ctx->euler_test],
+                     euler_ctx->mean_velocity[0],
+                     euler_ctx->mean_velocity[1],
+                     euler_ctx->mean_velocity[2],
+                     StabilizationTypes[euler_ctx->stabilization]); CHKERRQ(ierr);
 
+  CeedQFunctionContextRestoreData(problem->ics.qfunction_context, &euler_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index df610759c7..f6a842f813 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -12,28 +12,6 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/newtonian.h"
 
-
-#ifndef newtonian_context_struct
-#define newtonian_context_struct
-typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
-struct NewtonianIdealGasContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar c_tau;
-  CeedScalar Ctau_t;
-  CeedScalar Ctau_v;
-  CeedScalar Ctau_C;
-  CeedScalar Ctau_M;
-  CeedScalar Ctau_E;
-  CeedScalar dt;
-  StabilizationType stabilization;
-};
-#endif
-
 PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
                                void *ctx) {
   SetupContext      setup_context = *(SetupContext *)setup_ctx;
@@ -43,9 +21,11 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   PetscBool         implicit;
   PetscBool         has_curr_time = PETSC_FALSE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  CeedQFunctionContext newtonian_ig_context;
 
-  ierr = PetscCalloc1(1, &user->phys->newtonian_ig_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &newtonian_ig_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //           Setup Generic Newtonian IG Problem
@@ -63,7 +43,6 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_vol_rhs.qfunction_loc       = Newtonian_loc;
   problem->apply_vol_ifunction.qfunction     = IFunction_Newtonian;
   problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc;
-  problem->setup_ctx                         = SetupContext_DENSITY_CURRENT;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_DENSITY_CURRENT;
 
@@ -204,42 +183,58 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   user->phys->has_curr_time = has_curr_time;
 
   // -- QFunction Context
-  user->phys->newtonian_ig_ctx->lambda        = lambda;
-  user->phys->newtonian_ig_ctx->mu            = mu;
-  user->phys->newtonian_ig_ctx->k             = k;
-  user->phys->newtonian_ig_ctx->cv            = cv;
-  user->phys->newtonian_ig_ctx->cp            = cp;
-  user->phys->newtonian_ig_ctx->c_tau         = c_tau;
-  user->phys->newtonian_ig_ctx->Ctau_t        = Ctau_t;
-  user->phys->newtonian_ig_ctx->Ctau_v        = Ctau_v;
-  user->phys->newtonian_ig_ctx->Ctau_C        = Ctau_C;
-  user->phys->newtonian_ig_ctx->Ctau_M        = Ctau_M;
-  user->phys->newtonian_ig_ctx->Ctau_E        = Ctau_E;
-  user->phys->newtonian_ig_ctx->stabilization = stab;
-  ierr = PetscArraycpy(user->phys->newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
-
+  newtonian_ig_ctx->lambda        = lambda;
+  newtonian_ig_ctx->mu            = mu;
+  newtonian_ig_ctx->k             = k;
+  newtonian_ig_ctx->cv            = cv;
+  newtonian_ig_ctx->cp            = cp;
+  newtonian_ig_ctx->c_tau         = c_tau;
+  newtonian_ig_ctx->Ctau_t        = Ctau_t;
+  newtonian_ig_ctx->Ctau_v        = Ctau_v;
+  newtonian_ig_ctx->Ctau_C        = Ctau_C;
+  newtonian_ig_ctx->Ctau_M        = Ctau_M;
+  newtonian_ig_ctx->Ctau_E        = Ctau_E;
+  newtonian_ig_ctx->stabilization = stab;
+  ierr = PetscArraycpy(newtonian_ig_ctx->g, g, 3); CHKERRQ(ierr);
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+  CeedQFunctionContextRegisterDouble(problem->ics.qfunction_context,
+                                     "evaluation time",
+                                     (char *)&setup_context->time - (char *)setup_context, 1, "Time of evaluation");
+
+  CeedQFunctionContextCreate(user->ceed, &newtonian_ig_context);
+  CeedQFunctionContextSetData(newtonian_ig_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER,
+                              sizeof(*newtonian_ig_ctx), newtonian_ig_ctx);
+  CeedQFunctionContextSetDataDestroy(newtonian_ig_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(newtonian_ig_context, "timestep size",
+                                     offsetof(struct NewtonianIdealGasContext_, dt), 1, "Size of timestep, delta t");
+  problem->apply_vol_rhs.qfunction_context = newtonian_ig_context;
+  CeedQFunctionContextReferenceCopy(newtonian_ig_context,
+                                    &problem->apply_vol_ifunction.qfunction_context);
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode SetupContext_NEWTONIAN_IG(Ceed ceed, CeedData ceed_data,
-    AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-
-  CeedQFunctionContextCreate(ceed, &ceed_data->newt_ig_context);
-  CeedQFunctionContextSetData(ceed_data->newt_ig_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER,
-                              sizeof(*phys->newtonian_ig_ctx), phys->newtonian_ig_ctx);
-  CeedQFunctionContextRegisterDouble(ceed_data->newt_ig_context, "timestep size",
-                                     offsetof(struct NewtonianIdealGasContext_, dt), 1, "Size of timestep, delta t");
+PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
+                                     SetupContext setup_ctx,
+                                     AppCtx app_ctx) {
+  MPI_Comm comm = PETSC_COMM_WORLD;
+  PetscErrorCode ierr;
+  NewtonianIdealGasContext newtonian_ctx;
 
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->newt_ig_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->newt_ig_context);
+  PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ctx);
+  ierr = PetscPrintf(comm,
+                     "  Problem:\n"
+                     "    Problem Name                       : %s\n"
+                     "    Stabilization                      : %s\n",
+                     app_ctx->problem_name, StabilizationTypes[newtonian_ctx->stabilization]);
+  CHKERRQ(ierr);
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ctx);
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
index 7492311efe..a5e98fa519 100644
--- a/examples/fluids/problems/shocktube.c
+++ b/examples/fluids/problems/shocktube.c
@@ -31,9 +31,12 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   PetscInt          stab;
   PetscBool         has_curr_time = PETSC_FALSE;
   PetscInt          ierr;
-  PetscFunctionBeginUser;
+  ShockTubeContext  shocktube_ctx;
+  CeedQFunctionContext shocktube_context;
+
 
-  ierr = PetscCalloc1(1, &user->phys->shocktube_ctx); CHKERRQ(ierr);
+  PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &shocktube_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP SHOCKTUBE
@@ -52,7 +55,6 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_vol_ifunction.qfunction     = NULL;
   problem->apply_vol_ifunction.qfunction_loc = NULL;
   problem->bc                                = Exact_ShockTube;
-  problem->setup_ctx                         = SetupContext_SHOCKTUBE;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_SHOCKTUBE;
 
@@ -151,38 +153,28 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   // -- QFunction Context
   user->phys->implicit                      = implicit;
   user->phys->has_curr_time                 = has_curr_time;
-  user->phys->shocktube_ctx->implicit       = implicit;
-  user->phys->shocktube_ctx->stabilization  = stab;
-  user->phys->shocktube_ctx->yzb            = yzb;
-  user->phys->shocktube_ctx->Cyzb           = Cyzb;
-  user->phys->shocktube_ctx->Byzb           = Byzb;
-  user->phys->shocktube_ctx->c_tau          = c_tau;
-
-  PetscFunctionReturn(0);
-}
-
-PetscErrorCode SetupContext_SHOCKTUBE(Ceed ceed, CeedData ceed_data,
-                                      AppCtx app_ctx, SetupContext setup_ctx, Physics phys) {
-  PetscFunctionBeginUser;
-
-  CeedQFunctionContextCreate(ceed, &ceed_data->setup_context);
-  CeedQFunctionContextSetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              CEED_USE_POINTER, sizeof(*setup_ctx), setup_ctx);
-  CeedQFunctionSetContext(ceed_data->qf_ics, ceed_data->setup_context);
-  CeedQFunctionContextCreate(ceed, &ceed_data->shocktube_context);
-  CeedQFunctionContextSetData(ceed_data->shocktube_context, CEED_MEM_HOST,
+  shocktube_ctx->implicit       = implicit;
+  shocktube_ctx->stabilization  = stab;
+  shocktube_ctx->yzb            = yzb;
+  shocktube_ctx->Cyzb           = Cyzb;
+  shocktube_ctx->Byzb           = Byzb;
+  shocktube_ctx->c_tau          = c_tau;
+
+  CeedQFunctionContextCreate(user->ceed, &problem->ics.qfunction_context);
+  CeedQFunctionContextSetData(problem->ics.qfunction_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, sizeof(*setup_context), setup_context);
+
+  CeedQFunctionContextCreate(user->ceed, &shocktube_context);
+  CeedQFunctionContextSetData(shocktube_context, CEED_MEM_HOST,
                               CEED_USE_POINTER,
-                              sizeof(*phys->shocktube_ctx), phys->shocktube_ctx);
-  if (ceed_data->qf_rhs_vol)
-    CeedQFunctionSetContext(ceed_data->qf_rhs_vol, ceed_data->shocktube_context);
-  if (ceed_data->qf_ifunction_vol)
-    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
-                            ceed_data->shocktube_context);
-
+                              sizeof(*shocktube_ctx), shocktube_ctx);
+  CeedQFunctionContextSetDataDestroy(shocktube_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  problem->apply_vol_rhs.qfunction_context = shocktube_context;
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_SHOCKTUBE(Physics phys, SetupContext setup_ctx,
+PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, SetupContext setup_ctx,
                                AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index 2ed50f8335..3b6de2ae39 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -43,8 +43,6 @@ struct SetupContext_ {
 };
 #endif
 
-#ifndef advection_context_struct
-#define advection_context_struct
 typedef struct AdvectionContext_ *AdvectionContext;
 struct AdvectionContext_ {
   CeedScalar CtauS;
@@ -53,7 +51,6 @@ struct AdvectionContext_ {
   bool implicit;
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
-#endif
 
 // *****************************************************************************
 // This QFunction sets the initial conditions and the boundary conditions
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index 11e00dc5a8..41d23cee1b 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -47,8 +47,6 @@ struct SetupContext_ {
 };
 #endif
 
-#ifndef advection_context_struct
-#define advection_context_struct
 typedef struct AdvectionContext_ *AdvectionContext;
 struct AdvectionContext_ {
   CeedScalar CtauS;
@@ -57,7 +55,6 @@ struct AdvectionContext_ {
   bool implicit;
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
-#endif
 
 // *****************************************************************************
 // This QFunction sets the initial conditions and the boundary conditions
diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h
index e8c0261a0c..99279ae68d 100644
--- a/examples/fluids/qfunctions/blasius.h
+++ b/examples/fluids/qfunctions/blasius.h
@@ -14,10 +14,8 @@
 
 #include <math.h>
 #include <ceed.h>
-#include "../navierstokes.h"
+#include "newtonian_types.h"
 
-#ifndef blasius_context_struct
-#define blasius_context_struct
 typedef struct BlasiusContext_ *BlasiusContext;
 struct BlasiusContext_ {
   bool       implicit;  // !< Using implicit timesteping or not
@@ -28,7 +26,6 @@ struct BlasiusContext_ {
   CeedScalar theta0;    // !< Temperature at inflow
   struct NewtonianIdealGasContext_ newtonian_ctx;
 };
-#endif
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h
index 14f8feeaf8..31ed0c2925 100644
--- a/examples/fluids/qfunctions/channel.h
+++ b/examples/fluids/qfunctions/channel.h
@@ -14,10 +14,8 @@
 
 #include <math.h>
 #include <ceed.h>
-#include "../navierstokes.h"
+#include "newtonian_types.h"
 
-#ifndef channel_context_struct
-#define channel_context_struct
 typedef struct ChannelContext_ *ChannelContext;
 struct ChannelContext_ {
   bool       implicit; // !< Using implicit timesteping or not
@@ -29,7 +27,6 @@ struct ChannelContext_ {
   CeedScalar B;        // !< Body-force driving the flow
   struct NewtonianIdealGasContext_ newtonian_ctx;
 };
-#endif
 
 CEED_QFUNCTION_HELPER int Exact_Channel(CeedInt dim, CeedScalar time,
                                         const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h
index 578ba6f27b..c705e14941 100644
--- a/examples/fluids/qfunctions/eulervortex.h
+++ b/examples/fluids/qfunctions/eulervortex.h
@@ -23,8 +23,6 @@
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef euler_context_struct
-#define euler_context_struct
 typedef struct EulerContext_ *EulerContext;
 struct EulerContext_ {
   CeedScalar center[3];
@@ -36,7 +34,6 @@ struct EulerContext_ {
   int euler_test;
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
-#endif
 
 // *****************************************************************************
 // This function sets the initial conditions
diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h
index d1467cec57..05fd2b093f 100644
--- a/examples/fluids/qfunctions/newtonian.h
+++ b/examples/fluids/qfunctions/newtonian.h
@@ -14,6 +14,7 @@
 
 #include <math.h>
 #include <ceed.h>
+#include "newtonian_types.h"
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
@@ -44,33 +45,6 @@ struct SetupContext_ {
 };
 #endif
 
-#ifndef newtonian_context_struct
-#define newtonian_context_struct
-typedef enum {
-  STAB_NONE = 0,
-  STAB_SU   = 1, // Streamline Upwind
-  STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
-} StabilizationType;
-
-typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
-struct NewtonianIdealGasContext_ {
-  CeedScalar lambda;
-  CeedScalar mu;
-  CeedScalar k;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar c_tau;
-  CeedScalar Ctau_t;
-  CeedScalar Ctau_v;
-  CeedScalar Ctau_C;
-  CeedScalar Ctau_M;
-  CeedScalar Ctau_E;
-  CeedScalar dt;
-  StabilizationType stabilization;
-};
-#endif
-
 // *****************************************************************************
 // Helper function for computing flux Jacobian
 // *****************************************************************************
diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h
new file mode 100644
index 0000000000..7fbaa4575a
--- /dev/null
+++ b/examples/fluids/qfunctions/newtonian_types.h
@@ -0,0 +1,25 @@
+#ifndef newtonian_types_h
+#define newtonian_types_h
+
+#include <ceed/ceed.h>
+#include "stabilization_types.h"
+
+typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
+struct NewtonianIdealGasContext_ {
+  CeedScalar lambda;
+  CeedScalar mu;
+  CeedScalar k;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar g[3];
+  CeedScalar c_tau;
+  CeedScalar Ctau_t;
+  CeedScalar Ctau_v;
+  CeedScalar Ctau_C;
+  CeedScalar Ctau_M;
+  CeedScalar Ctau_E;
+  CeedScalar dt;
+  StabilizationType stabilization;
+};
+
+#endif // newtonian_types_h
diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h
index c9e0c9498b..f0b5cc6b35 100644
--- a/examples/fluids/qfunctions/shocktube.h
+++ b/examples/fluids/qfunctions/shocktube.h
@@ -61,8 +61,6 @@ struct SetupContext_ {
 };
 #endif
 
-#ifndef shocktube_context_struct
-#define shocktube_context_struct
 typedef struct ShockTubeContext_ *ShockTubeContext;
 struct ShockTubeContext_ {
   CeedScalar Cyzb;
@@ -72,7 +70,6 @@ struct ShockTubeContext_ {
   bool yzb;
   int stabilization;
 };
-#endif
 
 // *****************************************************************************
 // This function sets the initial conditions
diff --git a/examples/fluids/qfunctions/stabilization_types.h b/examples/fluids/qfunctions/stabilization_types.h
new file mode 100644
index 0000000000..7e484df200
--- /dev/null
+++ b/examples/fluids/qfunctions/stabilization_types.h
@@ -0,0 +1,10 @@
+#ifndef stabilization_types_h
+#define stabilization_types_h
+
+typedef enum {
+  STAB_NONE = 0,
+  STAB_SU   = 1, // Streamline Upwind
+  STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
+} StabilizationType;
+
+#endif // stabilization_types_h
diff --git a/examples/fluids/src/misc.c b/examples/fluids/src/misc.c
index b7b83748e4..2a7398ed89 100644
--- a/examples/fluids/src/misc.c
+++ b/examples/fluids/src/misc.c
@@ -10,7 +10,8 @@
 
 #include "../navierstokes.h"
 
-PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, Vec Q_loc, Vec Q,
+PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user,
+                                   Vec Q_loc, Vec Q,
                                    CeedScalar time) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
@@ -18,11 +19,9 @@ PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, Vec Q_loc, Vec Q,
   // ---------------------------------------------------------------------------
   // Update SetupContext
   // ---------------------------------------------------------------------------
-  SetupContext setup_ctx;
-  CeedQFunctionContextGetData(ceed_data->setup_context, CEED_MEM_HOST,
-                              (void **)&setup_ctx);
-  setup_ctx->time = time;
-  CeedQFunctionContextRestoreData(ceed_data->setup_context, (void **)&setup_ctx);
+  if (user->phys->ics_time_label)
+    CeedOperatorContextSetDouble(ceed_data->op_ics, user->phys->ics_time_label,
+                                 &time);
 
   // ---------------------------------------------------------------------------
   // ICs
@@ -151,7 +150,7 @@ PetscErrorCode RegressionTests_NS(AppCtx app_ctx, Vec Q) {
 }
 
 // Get error for problems with exact solutions
-PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
+PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, User user, Vec Q,
                            PetscScalar final_time) {
   PetscInt       loc_nodes;
   Vec            Q_exact, Q_exact_loc;
@@ -163,7 +162,8 @@ PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
   ierr = DMCreateGlobalVector(dm, &Q_exact); CHKERRQ(ierr);
   ierr = DMGetLocalVector(dm, &Q_exact_loc); CHKERRQ(ierr);
   ierr = VecGetSize(Q_exact_loc, &loc_nodes); CHKERRQ(ierr);
-  ierr = ICs_FixMultiplicity(dm, ceed_data, Q_exact_loc, Q_exact, final_time);
+  ierr = ICs_FixMultiplicity(dm, ceed_data, user, Q_exact_loc, Q_exact,
+                             final_time);
   CHKERRQ(ierr);
 
   // Get |exact solution - obtained solution|
@@ -187,20 +187,20 @@ PetscErrorCode GetError_NS(CeedData ceed_data, DM dm, AppCtx app_ctx, Vec Q,
 
 // Post-processing
 PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
-                              ProblemData *problem, AppCtx app_ctx,
+                              ProblemData *problem, User user,
                               Vec Q, PetscScalar final_time) {
   PetscInt       steps;
   PetscErrorCode ierr;
   PetscFunctionBegin;
 
   // Print relative error
-  if (problem->non_zero_time && !app_ctx->test_mode) {
-    ierr = GetError_NS(ceed_data, dm, app_ctx, Q, final_time); CHKERRQ(ierr);
+  if (problem->non_zero_time && !user->app_ctx->test_mode) {
+    ierr = GetError_NS(ceed_data, dm, user, Q, final_time); CHKERRQ(ierr);
   }
 
   // Print final time and number of steps
   ierr = TSGetStepNumber(ts, &steps); CHKERRQ(ierr);
-  if (!app_ctx->test_mode) {
+  if (!user->app_ctx->test_mode) {
     ierr = PetscPrintf(PETSC_COMM_WORLD,
                        "Time integrator took %" PetscInt_FMT " time steps to reach final time %g\n",
                        steps, (double)final_time); CHKERRQ(ierr);
@@ -210,8 +210,8 @@ PetscErrorCode PostProcess_NS(TS ts, CeedData ceed_data, DM dm,
   ierr = VecViewFromOptions(Q, NULL, "-vec_view"); CHKERRQ(ierr);
 
   // Compare reference solution values with current test run for CI
-  if (app_ctx->test_mode) {
-    ierr = RegressionTests_NS(app_ctx, Q); CHKERRQ(ierr);
+  if (user->app_ctx->test_mode) {
+    ierr = RegressionTests_NS(user->app_ctx, Q); CHKERRQ(ierr);
   }
 
   PetscFunctionReturn(0);
@@ -259,3 +259,10 @@ PetscErrorCode SetBCsFromICs_NS(DM dm, Vec Q, Vec Q_loc) {
 
   PetscFunctionReturn(0);
 }
+
+// Free a plain data context that was allocated using PETSc; returning libCEED error codes
+int FreeContextPetsc(void *data) {
+  if (PetscFree(data)) return CeedError(NULL, CEED_ERROR_ACCESS,
+                                          "PetscFree failed");
+  return CEED_ERROR_SUCCESS;
+}
diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c
index c640ed0ca4..a9e5eac1bd 100644
--- a/examples/fluids/src/setuplibceed.c
+++ b/examples/fluids/src/setuplibceed.c
@@ -260,6 +260,11 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   CeedQFunctionCreateInterior(ceed, 1, problem->setup_vol.qfunction,
                               problem->setup_vol.qfunction_loc,
                               &ceed_data->qf_setup_vol);
+  if (problem->setup_vol.qfunction_context) {
+    CeedQFunctionSetContext(ceed_data->qf_setup_vol,
+                            problem->setup_vol.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->setup_vol.qfunction_context);
+  }
   CeedQFunctionAddInput(ceed_data->qf_setup_vol, "dx", num_comp_x*dim,
                         CEED_EVAL_GRAD);
   CeedQFunctionAddInput(ceed_data->qf_setup_vol, "weight", 1, CEED_EVAL_WEIGHT);
@@ -270,6 +275,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   CeedQFunctionCreateInterior(ceed, 1, problem->ics.qfunction,
                               problem->ics.qfunction_loc,
                               &ceed_data->qf_ics);
+  CeedQFunctionSetContext(ceed_data->qf_ics, problem->ics.qfunction_context);
+  CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
   CeedQFunctionAddInput(ceed_data->qf_ics, "x", num_comp_x, CEED_EVAL_INTERP);
   CeedQFunctionAddOutput(ceed_data->qf_ics, "q0", num_comp_q, CEED_EVAL_NONE);
 
@@ -277,6 +284,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   if (problem->apply_vol_rhs.qfunction) {
     CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_rhs.qfunction,
                                 problem->apply_vol_rhs.qfunction_loc, &ceed_data->qf_rhs_vol);
+    CeedQFunctionSetContext(ceed_data->qf_rhs_vol,
+                            problem->apply_vol_rhs.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_vol_rhs.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "q", num_comp_q, CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_rhs_vol, "dq", num_comp_q*dim,
                           CEED_EVAL_GRAD);
@@ -293,6 +303,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   if (problem->apply_vol_ifunction.qfunction) {
     CeedQFunctionCreateInterior(ceed, 1, problem->apply_vol_ifunction.qfunction,
                                 problem->apply_vol_ifunction.qfunction_loc, &ceed_data->qf_ifunction_vol);
+    CeedQFunctionSetContext(ceed_data->qf_ifunction_vol,
+                            problem->apply_vol_ifunction.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_vol_ifunction.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_ifunction_vol, "dq", num_comp_q*dim,
@@ -356,6 +369,8 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
                        ceed_data->basis_xc, CEED_VECTOR_ACTIVE);
   CeedOperatorSetField(ceed_data->op_ics, "q0", ceed_data->elem_restr_q,
                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
+  CeedOperatorContextGetFieldLabel(ceed_data->op_ics, "evaluation time",
+                                   &user->phys->ics_time_label);
 
   // Create CEED operator for RHS
   if (ceed_data->qf_rhs_vol) {
@@ -421,6 +436,11 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   CeedQFunctionCreateInterior(ceed, 1, problem->setup_sur.qfunction,
                               problem->setup_sur.qfunction_loc,
                               &ceed_data->qf_setup_sur);
+  if (problem->setup_sur.qfunction_context) {
+    CeedQFunctionSetContext(ceed_data->qf_setup_sur,
+                            problem->setup_sur.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->setup_sur.qfunction_context);
+  }
   CeedQFunctionAddInput(ceed_data->qf_setup_sur, "dx", num_comp_x*dim_sur,
                         CEED_EVAL_GRAD);
   CeedQFunctionAddInput(ceed_data->qf_setup_sur, "weight", 1, CEED_EVAL_WEIGHT);
@@ -431,6 +451,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   if (problem->apply_inflow.qfunction) {
     CeedQFunctionCreateInterior(ceed, 1, problem->apply_inflow.qfunction,
                                 problem->apply_inflow.qfunction_loc, &ceed_data->qf_apply_inflow);
+    CeedQFunctionSetContext(ceed_data->qf_apply_inflow,
+                            problem->apply_inflow.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_apply_inflow, "surface qdata",
@@ -445,6 +468,9 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   if (problem->apply_outflow.qfunction) {
     CeedQFunctionCreateInterior(ceed, 1, problem->apply_outflow.qfunction,
                                 problem->apply_outflow.qfunction_loc, &ceed_data->qf_apply_outflow);
+    CeedQFunctionSetContext(ceed_data->qf_apply_outflow,
+                            problem->apply_outflow.qfunction_context);
+    CeedQFunctionContextDestroy(&problem->apply_outflow.qfunction_context);
     CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "q", num_comp_q,
                           CEED_EVAL_INTERP);
     CeedQFunctionAddInput(ceed_data->qf_apply_outflow, "surface qdata",
@@ -462,10 +488,6 @@ PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
   CeedOperatorApply(ceed_data->op_setup_vol, ceed_data->x_coord,
                     ceed_data->q_data, CEED_REQUEST_IMMEDIATE);
 
-  // -- Set up context for QFunctions
-  ierr = problem->setup_ctx(ceed, ceed_data, app_ctx, setup_ctx, user->phys);
-  CHKERRQ(ierr);
-
   // -- Create and apply CEED Composite Operator for the entire domain
   if (!user->phys->implicit) { // RHS
     ierr = CreateOperatorForDomain(ceed, dm, bc, ceed_data, user->phys,

From c95f9967850094821b23829a90999d6fd511c6a5 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 10 May 2022 07:22:57 -0600
Subject: [PATCH 42/59] examples/fluids: reduce prevalance of top-level
 setup_ctx

---
 examples/fluids/navierstokes.c            |  6 +++---
 examples/fluids/navierstokes.h            |  7 ++++---
 examples/fluids/problems/advection.c      |  1 +
 examples/fluids/problems/advection2d.c    |  1 +
 examples/fluids/problems/densitycurrent.c |  1 +
 examples/fluids/problems/eulervortex.c    |  1 +
 examples/fluids/problems/shocktube.c      |  1 +
 examples/fluids/src/setupdm.c             | 14 +++++++-------
 examples/fluids/src/setuplibceed.c        |  2 +-
 9 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index 84ec0fdfcb..9a43722008 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -138,12 +138,12 @@ int main(int argc, char **argv) {
   }
 
   // -- Set up DM
-  ierr = SetUpDM(dm, problem, app_ctx->degree, bc, phys_ctx, setup_ctx);
+  ierr = SetUpDM(dm, problem, app_ctx->degree, bc, phys_ctx);
   CHKERRQ(ierr);
 
   // -- Refine DM for high-order viz
   if (app_ctx->viz_refine) {
-    ierr = VizRefineDM(dm, user, problem, bc, phys_ctx, setup_ctx);
+    ierr = VizRefineDM(dm, user, problem, bc, phys_ctx);
     CHKERRQ(ierr);
   }
 
@@ -151,7 +151,7 @@ int main(int argc, char **argv) {
   // Set up libCEED
   // ---------------------------------------------------------------------------
   // -- Set up libCEED objects
-  ierr = SetupLibceed(ceed, ceed_data, dm, user, app_ctx, problem, bc, setup_ctx);
+  ierr = SetupLibceed(ceed, ceed_data, dm, user, app_ctx, problem, bc);
   CHKERRQ(ierr);
 
   // ---------------------------------------------------------------------------
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index 8b92c8e5b7..c1890dc662 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -240,6 +240,7 @@ struct ProblemData_private {
   bool              non_zero_time;
   PetscErrorCode    (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
                           PetscScalar[], void *);
+  void *bc_ctx;
   PetscErrorCode    (*print_info)(ProblemData*, SetupContext, AppCtx);
 };
 // *INDENT-ON*
@@ -315,7 +316,7 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
                                        CeedOperator *op_apply);
 
 PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
-                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc, SetupContext setup_ctx);
+                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc);
 
 // -----------------------------------------------------------------------------
 // Time-stepping functions
@@ -347,11 +348,11 @@ PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm);
 
 // Set up DM
 PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
-                       SimpleBC bc, Physics phys, void *setup_ctx);
+                       SimpleBC bc, Physics phys);
 
 // Refine DM for high-order viz
 PetscErrorCode VizRefineDM(DM dm, User user, ProblemData *problem,
-                           SimpleBC bc, Physics phys, void *setup_ctx);
+                           SimpleBC bc, Physics phys);
 
 // -----------------------------------------------------------------------------
 // Process command line options
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index bed865d370..e5ec5474a9 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -49,6 +49,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_inflow.qfunction            = Advection_InOutFlow;
   problem->apply_inflow.qfunction_loc        = Advection_InOutFlow_loc;
   problem->bc                                = Exact_Advection;
+  problem->bc_ctx                            = setup_ctx;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_ADVECTION;
 
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index 632bb522f9..759e8e53af 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -48,6 +48,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_inflow.qfunction            = Advection2d_InOutFlow;
   problem->apply_inflow.qfunction_loc        = Advection2d_InOutFlow_loc;
   problem->bc                                = Exact_Advection2d;
+  problem->bc_ctx                            = setup_ctx;
   problem->non_zero_time                     = PETSC_TRUE;
   problem->print_info                        = PRINT_ADVECTION2D;
 
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 2d66cf6d57..11d67a81fd 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -29,6 +29,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
   problem->ics.qfunction = ICsDC;
   problem->ics.qfunction_loc = ICsDC_loc;
   problem->bc = Exact_DC;
+  problem->bc_ctx = setup_ctx;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index c54c921e15..a0b094ed42 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -50,6 +50,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_outflow.qfunction           = Euler_Outflow;
   problem->apply_outflow.qfunction_loc       = Euler_Outflow_loc;
   problem->bc                                = Exact_Euler;
+  problem->bc_ctx                            = setup_ctx;
   problem->non_zero_time                     = PETSC_TRUE;
   problem->print_info                        = PRINT_EULER_VORTEX;
 
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
index a5e98fa519..fefea2d651 100644
--- a/examples/fluids/problems/shocktube.c
+++ b/examples/fluids/problems/shocktube.c
@@ -55,6 +55,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_vol_ifunction.qfunction     = NULL;
   problem->apply_vol_ifunction.qfunction_loc = NULL;
   problem->bc                                = Exact_ShockTube;
+  problem->bc_ctx                            = setup_ctx;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_SHOCKTUBE;
 
diff --git a/examples/fluids/src/setupdm.c b/examples/fluids/src/setupdm.c
index 4f1a3538d3..cefea0b012 100644
--- a/examples/fluids/src/setupdm.c
+++ b/examples/fluids/src/setupdm.c
@@ -27,7 +27,7 @@ PetscErrorCode CreateDM(MPI_Comm comm, ProblemData *problem, DM *dm) {
 
 // Setup DM
 PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
-                       SimpleBC bc, Physics phys, void *setup_ctx) {
+                       SimpleBC bc, Physics phys) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
   {
@@ -47,28 +47,28 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label,
                            bc->num_wall, bc->walls, 0, bc->num_comps,
                            bc->wall_comps, (void(*)(void))problem->bc,
-                           NULL, setup_ctx, NULL);  CHKERRQ(ierr);
+                           NULL, problem->bc_ctx, NULL);  CHKERRQ(ierr);
     }
     // Set slip BCs in the x direction
     if (bc->num_slip[0] > 0) {
       PetscInt comps[1] = {1};
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipx", label,
                            bc->num_slip[0], bc->slips[0], 0, 1, comps,
-                           (void(*)(void))NULL, NULL, setup_ctx, NULL); CHKERRQ(ierr);
+                           (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr);
     }
     // Set slip BCs in the y direction
     if (bc->num_slip[1] > 0) {
       PetscInt comps[1] = {2};
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipy", label,
                            bc->num_slip[1], bc->slips[1], 0, 1, comps,
-                           (void(*)(void))NULL, NULL, setup_ctx, NULL); CHKERRQ(ierr);
+                           (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr);
     }
     // Set slip BCs in the z direction
     if (bc->num_slip[2] > 0) {
       PetscInt comps[1] = {3};
       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipz", label,
                            bc->num_slip[2], bc->slips[2], 0, 1, comps,
-                           (void(*)(void))NULL, NULL, setup_ctx, NULL); CHKERRQ(ierr);
+                           (void(*)(void))NULL, NULL, problem->bc_ctx, NULL); CHKERRQ(ierr);
     }
     ierr = DMPlexSetClosurePermutationTensor(dm, PETSC_DETERMINE, NULL);
     CHKERRQ(ierr);
@@ -95,7 +95,7 @@ PetscErrorCode SetUpDM(DM dm, ProblemData *problem, PetscInt degree,
 
 // Refine DM for high-order viz
 PetscErrorCode VizRefineDM(DM dm, User user, ProblemData *problem,
-                           SimpleBC bc, Physics phys, void *setup_ctx) {
+                           SimpleBC bc, Physics phys) {
   PetscErrorCode ierr;
   DM             dm_hierarchy[user->app_ctx->viz_refine + 1];
   VecType        vec_type;
@@ -116,7 +116,7 @@ PetscErrorCode VizRefineDM(DM dm, User user, ProblemData *problem,
     if (i + 1 == user->app_ctx->viz_refine) d = 1;
     ierr = DMGetVecType(dm, &vec_type); CHKERRQ(ierr);
     ierr = DMSetVecType(dm_hierarchy[i+1], vec_type); CHKERRQ(ierr);
-    ierr = SetUpDM(dm_hierarchy[i+1], problem, d, bc, phys, setup_ctx);
+    ierr = SetUpDM(dm_hierarchy[i+1], problem, d, bc, phys);
     CHKERRQ(ierr);
     ierr = DMCreateInterpolation(dm_hierarchy[i], dm_hierarchy[i+1], &interp_next,
                                  NULL); CHKERRQ(ierr);
diff --git a/examples/fluids/src/setuplibceed.c b/examples/fluids/src/setuplibceed.c
index a9e5eac1bd..84972e3456 100644
--- a/examples/fluids/src/setuplibceed.c
+++ b/examples/fluids/src/setuplibceed.c
@@ -216,7 +216,7 @@ PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
 }
 
 PetscErrorCode SetupLibceed(Ceed ceed, CeedData ceed_data, DM dm, User user,
-                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc, SetupContext setup_ctx) {
+                            AppCtx app_ctx, ProblemData *problem, SimpleBC bc) {
   PetscErrorCode ierr;
   PetscFunctionBeginUser;
 

From b41f14d561fc54523b3795f9add587407a364e80 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Tue, 10 May 2022 11:05:35 -0600
Subject: [PATCH 43/59] ci - fix Nek5000 testing

---
 .gitlab-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7cc9afbd4b..c455c4cd08 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -55,7 +55,7 @@ noether-cpu:
     - export COVERAGE=0
     - cd .. && export NEK5K_VERSION=Nek5000-19.0 && { [[ -d $NEK5K_VERSION ]] || { git clone --depth 1 --branch v19.0 https://github.com/Nek5000/Nek5000.git $NEK5K_VERSION && cd $NEK5K_VERSION/tools && ./maketools genbox genmap reatore2 && cd ../..; }; } && export NEK5K_DIR=$PWD/$NEK5K_VERSION && export PATH=$NEK5K_DIR/bin:$PATH MPI=0 && cd libCEED
     - echo "-------------- Nek5000 -------------" && git -C $NEK5K_DIR describe --tags
-    - make -k -j$NPROC_CPU BACKENDS="$BACKENDS_CPU" JUNIT_BATCH="cpu" junit search=nek
+    - make -k -j$NPROC_CPU BACKENDS="$BACKENDS_CPU" JUNIT_BATCH="cpu" junit search=nek NEK5K_DIR=$NEK5K_DIR
 # Clang-tidy
     - echo "-------------- clang-tidy ----------" && clang-tidy --version
     - TIDY_OPTS="-fix-errors" make -j$NPROC_CPU tidy && git diff --exit-code
@@ -123,7 +123,7 @@ noether-rocm:
     - export COVERAGE=0
     - cd .. && export NEK5K_VERSION=Nek5000-19.0 && { [[ -d $NEK5K_VERSION ]] || { git clone --depth 1 --branch v19.0 https://github.com/Nek5000/Nek5000.git $NEK5K_VERSION && cd $NEK5K_VERSION/tools && ./maketools genbox genmap reatore2 && cd ../..; }; } && export NEK5K_DIR=$PWD/$NEK5K_VERSION && export PATH=$NEK5K_DIR/bin:$PATH MPI=0 && cd libCEED
     - echo "-------------- Nek5000 -------------" && git -C $NEK5K_DIR describe --tags
-    - make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=nek
+    - make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=nek NEK5K_DIR=$NEK5K_DIR
 # Clang-tidy
     - echo "-------------- clang-tidy ----------" && clang-tidy --version
     - TIDY_OPTS="-fix-errors" make -j$NPROC_CPU tidy && git diff --exit-code

From b1289648340bfca1d477ff4687c2ef2a2cabee2a Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 10 May 2022 11:55:59 -0600
Subject: [PATCH 44/59] examples/fluids: remove setup_ctx from printing

---
 examples/fluids/navierstokes.c         | 2 +-
 examples/fluids/navierstokes.h         | 8 ++------
 examples/fluids/problems/advection.c   | 8 ++++++--
 examples/fluids/problems/advection2d.c | 7 ++++++-
 examples/fluids/problems/eulervortex.c | 2 +-
 examples/fluids/problems/newtonian.c   | 1 -
 examples/fluids/problems/shocktube.c   | 3 +--
 7 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index 9a43722008..710455bd2a 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -220,7 +220,7 @@ int main(int argc, char **argv) {
                        host_name, comm_size); CHKERRQ(ierr);
 
     // Problem specific info
-    ierr = problem->print_info(problem, setup_ctx, app_ctx); CHKERRQ(ierr);
+    ierr = problem->print_info(problem, app_ctx); CHKERRQ(ierr);
 
     // libCEED
     const char *used_resource;
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index c1890dc662..f6e56e7342 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -241,7 +241,7 @@ struct ProblemData_private {
   PetscErrorCode    (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
                           PetscScalar[], void *);
   void *bc_ctx;
-  PetscErrorCode    (*print_info)(ProblemData*, SetupContext, AppCtx);
+  PetscErrorCode    (*print_info)(ProblemData*, AppCtx);
 };
 // *INDENT-ON*
 
@@ -272,22 +272,18 @@ extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm,
 
 // Print function for each problem
 extern PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
-    SetupContext setup_ctx, AppCtx app_ctx);
+    AppCtx app_ctx);
 
 extern PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem,
-    SetupContext setup_ctx,
     AppCtx app_ctx);
 
 extern PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem,
-                                      SetupContext setup_ctx,
                                       AppCtx app_ctx);
 
 extern PetscErrorCode PRINT_ADVECTION(ProblemData *problem,
-                                      SetupContext setup_ctx,
                                       AppCtx app_ctx);
 
 extern PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem,
-                                        SetupContext setup_ctx,
                                         AppCtx app_ctx);
 
 // -----------------------------------------------------------------------------
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index e5ec5474a9..1c3c482eed 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -219,13 +219,15 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_ADVECTION(ProblemData *problem, SetupContext setup_ctx,
-                               AppCtx app_ctx) {
+PetscErrorCode PRINT_ADVECTION(ProblemData *problem, AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
+  SetupContext   setup_ctx;
   AdvectionContext advection_ctx;
 
   PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->ics.qfunction_context,
+                              CEED_MEM_HOST, &setup_ctx);
   CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
                               CEED_MEM_HOST, &advection_ctx);
   ierr = PetscPrintf(comm,
@@ -246,6 +248,8 @@ PetscErrorCode PRINT_ADVECTION(ProblemData *problem, SetupContext setup_ctx,
                        "    Background Wind                    : %f,%f,%f\n",
                        setup_ctx->wind[0], setup_ctx->wind[1], setup_ctx->wind[2]); CHKERRQ(ierr);
   }
+  CeedQFunctionContextRestoreData(problem->ics.qfunction_context,
+                                  &setup_ctx);
   CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
                                   &advection_ctx);
   PetscFunctionReturn(0);
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index 759e8e53af..ee6a4c355c 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -195,13 +195,16 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem, SetupContext setup_ctx,
+PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem,
                                  AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
+  SetupContext setup_ctx;
   AdvectionContext advection_ctx;
 
   PetscFunctionBeginUser;
+  CeedQFunctionContextGetData(problem->ics.qfunction_context,
+                              CEED_MEM_HOST, &setup_ctx);
   CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
                               CEED_MEM_HOST, &advection_ctx);
   ierr = PetscPrintf(comm,
@@ -217,6 +220,8 @@ PetscErrorCode PRINT_ADVECTION2D(ProblemData *problem, SetupContext setup_ctx,
                        "    Background Wind                    : %f,%f\n",
                        setup_ctx->wind[0], setup_ctx->wind[1]); CHKERRQ(ierr);
   }
+  CeedQFunctionContextRestoreData(problem->ics.qfunction_context,
+                                  &setup_ctx);
   CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
                                   &advection_ctx);
   PetscFunctionReturn(0);
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index a0b094ed42..3c3484fabf 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -187,7 +187,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem, SetupContext setup_ctx,
+PetscErrorCode PRINT_EULER_VORTEX(ProblemData *problem,
                                   AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index f6a842f813..bfab2a1d75 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -219,7 +219,6 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
 }
 
 PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
-                                     SetupContext setup_ctx,
                                      AppCtx app_ctx) {
   MPI_Comm comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
index fefea2d651..5983d7410e 100644
--- a/examples/fluids/problems/shocktube.c
+++ b/examples/fluids/problems/shocktube.c
@@ -175,8 +175,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, SetupContext setup_ctx,
-                               AppCtx app_ctx) {
+PetscErrorCode PRINT_SHOCKTUBE(ProblemData *problem, AppCtx app_ctx) {
   MPI_Comm       comm = PETSC_COMM_WORLD;
   PetscErrorCode ierr;
   PetscFunctionBeginUser;

From a0add3c91c6ddcd77c0d376840911ac920dd4230 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 10 May 2022 12:38:17 -0600
Subject: [PATCH 45/59] examples/fluids: make setup contexts independent

These are now private to each example and can be developed independently.
---
 examples/fluids/navierstokes.c               | 13 ++----
 examples/fluids/navierstokes.h               | 47 +++-----------------
 examples/fluids/problems/advection.c         |  7 +--
 examples/fluids/problems/advection2d.c       |  8 ++--
 examples/fluids/problems/blasius.c           |  5 +--
 examples/fluids/problems/channel.c           |  4 +-
 examples/fluids/problems/densitycurrent.c    | 12 +++--
 examples/fluids/problems/eulervortex.c       | 18 ++------
 examples/fluids/problems/newtonian.c         |  8 ++--
 examples/fluids/problems/shocktube.c         | 11 ++---
 examples/fluids/qfunctions/advection.h       | 17 -------
 examples/fluids/qfunctions/advection2d.h     | 20 ---------
 examples/fluids/qfunctions/densitycurrent.h  | 31 +------------
 examples/fluids/qfunctions/newtonian.h       | 25 -----------
 examples/fluids/qfunctions/newtonian_types.h | 21 +++++++++
 examples/fluids/qfunctions/shocktube.h       | 11 -----
 examples/fluids/src/misc.c                   |  2 +-
 17 files changed, 64 insertions(+), 196 deletions(-)

diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index 710455bd2a..d3a05d798b 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -69,9 +69,6 @@ int main(int argc, char **argv) {
   SimpleBC bc;
   ierr = PetscCalloc1(1, &bc); CHKERRQ(ierr);
 
-  SetupContext setup_ctx;
-  ierr = PetscCalloc1(1, &setup_ctx); CHKERRQ(ierr);
-
   Physics phys_ctx;
   ierr = PetscCalloc1(1, &phys_ctx); CHKERRQ(ierr);
 
@@ -129,12 +126,12 @@ int main(int argc, char **argv) {
   // Choose the problem from the list of registered problems
   // ---------------------------------------------------------------------------
   {
-    PetscErrorCode (*p)(ProblemData *, DM, void *, void *);
+    PetscErrorCode (*p)(ProblemData *, DM, void *);
     ierr = PetscFunctionListFind(app_ctx->problems, app_ctx->problem_name, &p);
     CHKERRQ(ierr);
     if (!p) SETERRQ(PETSC_COMM_SELF, 1, "Problem '%s' not found",
                       app_ctx->problem_name);
-    ierr = (*p)(problem, dm, &setup_ctx, &user); CHKERRQ(ierr);
+    ierr = (*p)(problem, dm, &user); CHKERRQ(ierr);
   }
 
   // -- Set up DM
@@ -292,9 +289,6 @@ int main(int argc, char **argv) {
   CeedVectorDestroy(&user->q_dot_ceed);
   CeedVectorDestroy(&user->g_ceed);
 
-  // -- Contexts
-  CeedQFunctionContextDestroy(&ceed_data->setup_context);
-
   // -- QFunctions
   CeedQFunctionDestroy(&ceed_data->qf_setup_vol);
   CeedQFunctionDestroy(&ceed_data->qf_ics);
@@ -347,12 +341,13 @@ int main(int argc, char **argv) {
   // -- Function list
   ierr = PetscFunctionListDestroy(&app_ctx->problems); CHKERRQ(ierr);
 
+  ierr = PetscFree(problem->bc_ctx); CHKERRQ(ierr);
+
   // -- Structs
   ierr = PetscFree(units); CHKERRQ(ierr);
   ierr = PetscFree(user); CHKERRQ(ierr);
   ierr = PetscFree(problem); CHKERRQ(ierr);
   ierr = PetscFree(bc); CHKERRQ(ierr);
-  ierr = PetscFree(setup_ctx); CHKERRQ(ierr);
   ierr = PetscFree(phys_ctx); CHKERRQ(ierr);
   ierr = PetscFree(app_ctx); CHKERRQ(ierr);
   ierr = PetscFree(ceed_data); CHKERRQ(ierr);
diff --git a/examples/fluids/navierstokes.h b/examples/fluids/navierstokes.h
index f6e56e7342..9275ea4f38 100644
--- a/examples/fluids/navierstokes.h
+++ b/examples/fluids/navierstokes.h
@@ -127,7 +127,6 @@ struct AppCtx_private {
 // libCEED data struct
 struct CeedData_private {
   CeedVector           x_coord, q_data;
-  CeedQFunctionContext setup_context;
   CeedQFunction        qf_setup_vol, qf_ics, qf_rhs_vol, qf_ifunction_vol,
                        qf_setup_sur, qf_apply_inflow, qf_apply_outflow;
   CeedBasis            basis_x, basis_xc, basis_q, basis_x_sur, basis_q_sur;
@@ -177,37 +176,6 @@ struct SimpleBC_private {
   PetscBool user_bc;
 };
 
-// Initial conditions
-#ifndef setup_context_struct
-#define setup_context_struct
-typedef struct SetupContext_ *SetupContext;
-struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
-  CeedScalar time;
-  CeedScalar mid_point;
-  CeedScalar P_high;
-  CeedScalar rho_high;
-  CeedScalar P_low;
-  CeedScalar rho_low;
-  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
-};
-#endif
-
 // Struct that contains all enums and structs used for the physics of all problems
 struct Physics_private {
   WindType                 wind_type;
@@ -252,23 +220,22 @@ extern int FreeContextPetsc(void *);
 // -----------------------------------------------------------------------------
 // Set up function for each problem
 extern PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm,
-                                 void *setup_ctx, void *ctx);
+                                 void *ctx);
 extern PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm,
-                                 void *setup_ctx, void *ctx);
+                                 void *ctx);
 extern PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm,
-                                      void *setup_ctx, void *ctx);
+                                      void *ctx);
 extern PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm,
-    void *setup_ctx,
     void *ctx);
 
 extern PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm,
-                                      void *setup_ctx, void *ctx);
-extern PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
+                                      void *ctx);
+extern PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm,
                                    void *ctx);
-extern PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
+extern PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
                                    void *ctx);
 extern PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm,
-                                     void *setup_ctx, void *ctx);
+                                     void *ctx);
 
 // Print function for each problem
 extern PetscErrorCode PRINT_DENSITY_CURRENT(ProblemData *problem,
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index 1c3c482eed..5a3a748a58 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -12,13 +12,13 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/advection.h"
 
-PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
+PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
                             void *ctx) {
   WindType             wind_type;
   BubbleType           bubble_type;
   BubbleContinuityType bubble_continuity_type;
   StabilizationType    stab;
-  SetupContext         setup_context = *(SetupContext *)setup_ctx;
+  SetupContext         setup_context;
   User                 user = *(User *)ctx;
   MPI_Comm             comm = PETSC_COMM_WORLD;
   PetscBool            implicit;
@@ -28,6 +28,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   CeedQFunctionContext advection_context;
 
   PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
   ierr = PetscCalloc1(1, &advection_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
@@ -49,7 +50,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_inflow.qfunction            = Advection_InOutFlow;
   problem->apply_inflow.qfunction_loc        = Advection_InOutFlow_loc;
   problem->bc                                = Exact_Advection;
-  problem->bc_ctx                            = setup_ctx;
+  problem->bc_ctx                            = setup_context;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_ADVECTION;
 
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index ee6a4c355c..64a826dec0 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -12,11 +12,10 @@
 #include "../qfunctions/setupgeo2d.h"
 #include "../qfunctions/advection2d.h"
 
-PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
-                              void *ctx) {
+PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx) {
   WindType          wind_type;
   StabilizationType stab;
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
+  SetupContext      setup_context;
   User              user = *(User *)ctx;
   MPI_Comm          comm = PETSC_COMM_WORLD;
   PetscBool         implicit;
@@ -28,6 +27,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
 
   PetscFunctionBeginUser;
   ierr = PetscCalloc1(1, &advection_ctx); CHKERRQ(ierr);
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
 
   // ------------------------------------------------------
   //               SET UP ADVECTION2D
@@ -48,7 +48,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_inflow.qfunction            = Advection2d_InOutFlow;
   problem->apply_inflow.qfunction_loc        = Advection2d_InOutFlow_loc;
   problem->bc                                = Exact_Advection2d;
-  problem->bc_ctx                            = setup_ctx;
+  problem->bc_ctx                            = setup_context;
   problem->non_zero_time                     = PETSC_TRUE;
   problem->print_info                        = PRINT_ADVECTION2D;
 
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index ee37cb0036..4d5d20b0b4 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -73,8 +73,7 @@ PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
   PetscFunctionReturn(0);
 }
 
-PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
-                          void *ctx) {
+PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
 
   PetscInt ierr;
   User              user = *(User *)ctx;
@@ -84,7 +83,7 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *setup_ctx,
   CeedQFunctionContext blasius_context;
 
   PetscFunctionBeginUser;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr);
   ierr = PetscCalloc1(1, &blasius_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c
index 2e1c5b64cb..eb1fb7bed7 100644
--- a/examples/fluids/problems/channel.c
+++ b/examples/fluids/problems/channel.c
@@ -11,7 +11,7 @@
 #include "../navierstokes.h"
 #include "../qfunctions/channel.h"
 
-PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
+PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm,
                           void *ctx) {
 
   PetscInt ierr;
@@ -22,7 +22,7 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm, void *setup_ctx,
   CeedQFunctionContext channel_context;
 
   PetscFunctionBeginUser;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx); CHKERRQ(ierr);
+  ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr);
   ierr = PetscCalloc1(1, &channel_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index 11d67a81fd..b6a1f7de1a 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -12,24 +12,22 @@
 #include "../qfunctions/densitycurrent.h"
 #include "../navierstokes.h"
 
-PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *setup_ctx,
-                                  void *ctx) {
+PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) {
 
   PetscInt ierr;
-  ierr = NS_NEWTONIAN_IG(problem, dm, setup_ctx, ctx);
-  CHKERRQ(ierr);
-  SetupContext setup_context = *(SetupContext *)setup_ctx;
+  SetupContext setup_context;
   User user = *(User *)ctx;
   MPI_Comm comm = PETSC_COMM_WORLD;
-  PetscFunctionBeginUser;
 
+  PetscFunctionBeginUser;
+  ierr = NS_NEWTONIAN_IG(problem, dm, ctx); CHKERRQ(ierr);
   // ------------------------------------------------------
   //               SET UP DENSITY_CURRENT
   // ------------------------------------------------------
   problem->ics.qfunction = ICsDC;
   problem->ics.qfunction_loc = ICsDC_loc;
   problem->bc = Exact_DC;
-  problem->bc_ctx = setup_ctx;
+  setup_context = problem->bc_ctx;
 
   // ------------------------------------------------------
   //             Create the libCEED context
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index 3c3484fabf..97fe9337da 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -12,10 +12,8 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/eulervortex.h"
 
-PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
-                               void *ctx) {
+PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) {
   EulerTestType     euler_test;
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
   User              user = *(User *)ctx;
   StabilizationType stab;
   MPI_Comm          comm = PETSC_COMM_WORLD;
@@ -50,7 +48,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_outflow.qfunction           = Euler_Outflow;
   problem->apply_outflow.qfunction_loc       = Euler_Outflow_loc;
   problem->bc                                = Exact_Euler;
-  problem->bc_ctx                            = setup_ctx;
+  problem->bc_ctx                            = euler_ctx;
   problem->non_zero_time                     = PETSC_TRUE;
   problem->print_info                        = PRINT_EULER_VORTEX;
 
@@ -140,15 +138,6 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
   }
   problem->dm_scale = meter;
 
-  // -- Setup Context
-  setup_context->lx        = domain_size[0];
-  setup_context->ly        = domain_size[1];
-  setup_context->lz        = domain_size[2];
-  setup_context->center[0] = center[0];
-  setup_context->center[1] = center[1];
-  setup_context->center[2] = center[2];
-  setup_context->time      = 0;
-
   // -- QFunction Context
   user->phys->stab                        = stab;
   user->phys->euler_test                  = euler_test;
@@ -175,7 +164,8 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *setup_ctx,
                                      FreeContextPetsc);
   CeedQFunctionContextRegisterDouble(euler_context, "solution time",
                                      offsetof(struct EulerContext_, curr_time), 1, "Phyiscal time of the solution");
-  problem->ics.qfunction_context = euler_context;
+  CeedQFunctionContextReferenceCopy(euler_context,
+                                    &problem->ics.qfunction_context);
   CeedQFunctionContextReferenceCopy(euler_context,
                                     &problem->apply_vol_rhs.qfunction_context);
   CeedQFunctionContextReferenceCopy(euler_context,
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index bfab2a1d75..11f9cc8487 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -12,9 +12,8 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/newtonian.h"
 
-PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
-                               void *ctx) {
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
+PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) {
+  SetupContext      setup_context;
   User              user = *(User *)ctx;
   StabilizationType stab;
   MPI_Comm          comm = PETSC_COMM_WORLD;
@@ -25,6 +24,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   CeedQFunctionContext newtonian_ig_context;
 
   PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
   ierr = PetscCalloc1(1, &newtonian_ig_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
@@ -43,6 +43,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_vol_rhs.qfunction_loc       = Newtonian_loc;
   problem->apply_vol_ifunction.qfunction     = IFunction_Newtonian;
   problem->apply_vol_ifunction.qfunction_loc = IFunction_Newtonian_loc;
+  problem->bc                                = NULL;
+  problem->bc_ctx                            = setup_context;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_DENSITY_CURRENT;
 
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
index 5983d7410e..542c198c98 100644
--- a/examples/fluids/problems/shocktube.c
+++ b/examples/fluids/problems/shocktube.c
@@ -21,9 +21,8 @@
 #include "../qfunctions/setupgeo.h"
 #include "../qfunctions/shocktube.h"
 
-PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
-                            void *ctx) {
-  SetupContext      setup_context = *(SetupContext *)setup_ctx;
+PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) {
+  SetupContext      setup_context;
   User              user = *(User *)ctx;
   MPI_Comm          comm = PETSC_COMM_WORLD;
   PetscBool         implicit;
@@ -36,6 +35,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
 
 
   PetscFunctionBeginUser;
+  ierr = PetscCalloc1(1, &setup_context); CHKERRQ(ierr);
   ierr = PetscCalloc1(1, &shocktube_ctx); CHKERRQ(ierr);
 
   // ------------------------------------------------------
@@ -55,7 +55,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   problem->apply_vol_ifunction.qfunction     = NULL;
   problem->apply_vol_ifunction.qfunction_loc = NULL;
   problem->bc                                = Exact_ShockTube;
-  problem->bc_ctx                            = setup_ctx;
+  problem->bc_ctx                            = setup_context;
   problem->non_zero_time                     = PETSC_FALSE;
   problem->print_info                        = PRINT_SHOCKTUBE;
 
@@ -141,9 +141,6 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *setup_ctx,
   CeedScalar mid_point = 0.5*(domain_size[0]+domain_min[0]);
 
   // -- Setup Context
-  setup_context->lx        = domain_size[0];
-  setup_context->ly        = domain_size[1];
-  setup_context->lz        = domain_size[2];
   setup_context->mid_point = mid_point;
   setup_context->time      = 0.0;
   setup_context->P_high    = P_high;
diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index 3b6de2ae39..8e94a21e1e 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -13,35 +13,18 @@
 
 #include <math.h>
 
-#ifndef setup_context_struct
-#define setup_context_struct
 typedef struct SetupContext_ *SetupContext;
 struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
   CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
-  CeedScalar mid_point;
-  CeedScalar P_high;
-  CeedScalar rho_high;
-  CeedScalar P_low;
-  CeedScalar rho_low;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
 };
-#endif
 
 typedef struct AdvectionContext_ *AdvectionContext;
 struct AdvectionContext_ {
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index 41d23cee1b..2ff88cbb33 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -17,35 +17,15 @@
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
 typedef struct SetupContext_ *SetupContext;
 struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
   CeedScalar rc;
   CeedScalar lx;
   CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
   CeedScalar wind[3];
   CeedScalar time;
-  CeedScalar mid_point;
-  CeedScalar P_high;
-  CeedScalar rho_high;
-  CeedScalar P_low;
-  CeedScalar rho_low;
   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
 };
-#endif
 
 typedef struct AdvectionContext_ *AdvectionContext;
 struct AdvectionContext_ {
diff --git a/examples/fluids/qfunctions/densitycurrent.h b/examples/fluids/qfunctions/densitycurrent.h
index 4b8691e3ab..2d503d8165 100644
--- a/examples/fluids/qfunctions/densitycurrent.h
+++ b/examples/fluids/qfunctions/densitycurrent.h
@@ -17,41 +17,12 @@
 
 #include <math.h>
 #include <ceed.h>
+#include "newtonian_types.h"
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
-typedef struct SetupContext_ *SetupContext;
-struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
-  CeedScalar time;
-  CeedScalar mid_point;
-  CeedScalar P_high;
-  CeedScalar rho_high;
-  CeedScalar P_low;
-  CeedScalar rho_low;
-  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
-};
-#endif
-
 // *****************************************************************************
 // This function sets the initial conditions and the boundary conditions
 //
diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h
index 05fd2b093f..008a5c8e46 100644
--- a/examples/fluids/qfunctions/newtonian.h
+++ b/examples/fluids/qfunctions/newtonian.h
@@ -20,31 +20,6 @@
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
-typedef struct SetupContext_ *SetupContext;
-struct SetupContext_ {
-  CeedScalar theta0;
-  CeedScalar thetaC;
-  CeedScalar P0;
-  CeedScalar N;
-  CeedScalar cv;
-  CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
-  CeedScalar time;
-  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
-  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
-  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
-};
-#endif
-
 // *****************************************************************************
 // Helper function for computing flux Jacobian
 // *****************************************************************************
diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h
index 7fbaa4575a..0e7e552b73 100644
--- a/examples/fluids/qfunctions/newtonian_types.h
+++ b/examples/fluids/qfunctions/newtonian_types.h
@@ -4,6 +4,27 @@
 #include <ceed/ceed.h>
 #include "stabilization_types.h"
 
+typedef struct SetupContext_ *SetupContext;
+struct SetupContext_ {
+  CeedScalar theta0;
+  CeedScalar thetaC;
+  CeedScalar P0;
+  CeedScalar N;
+  CeedScalar cv;
+  CeedScalar cp;
+  CeedScalar g[3];
+  CeedScalar rc;
+  CeedScalar lx;
+  CeedScalar ly;
+  CeedScalar lz;
+  CeedScalar center[3];
+  CeedScalar dc_axis[3];
+  CeedScalar time;
+  int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
+  int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
+  int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
+};
+
 typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
 struct NewtonianIdealGasContext_ {
   CeedScalar lambda;
diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h
index f0b5cc6b35..115699c4c1 100644
--- a/examples/fluids/qfunctions/shocktube.h
+++ b/examples/fluids/qfunctions/shocktube.h
@@ -31,8 +31,6 @@
 #define M_PI    3.14159265358979323846
 #endif
 
-#ifndef setup_context_struct
-#define setup_context_struct
 typedef struct SetupContext_ *SetupContext;
 struct SetupContext_ {
   CeedScalar theta0;
@@ -41,14 +39,6 @@ struct SetupContext_ {
   CeedScalar N;
   CeedScalar cv;
   CeedScalar cp;
-  CeedScalar g[3];
-  CeedScalar rc;
-  CeedScalar lx;
-  CeedScalar ly;
-  CeedScalar lz;
-  CeedScalar center[3];
-  CeedScalar dc_axis[3];
-  CeedScalar wind[3];
   CeedScalar time;
   CeedScalar mid_point;
   CeedScalar P_high;
@@ -59,7 +49,6 @@ struct SetupContext_ {
   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
 };
-#endif
 
 typedef struct ShockTubeContext_ *ShockTubeContext;
 struct ShockTubeContext_ {
diff --git a/examples/fluids/src/misc.c b/examples/fluids/src/misc.c
index 2a7398ed89..d7b63e1581 100644
--- a/examples/fluids/src/misc.c
+++ b/examples/fluids/src/misc.c
@@ -17,7 +17,7 @@ PetscErrorCode ICs_FixMultiplicity(DM dm, CeedData ceed_data, User user,
   PetscFunctionBeginUser;
 
   // ---------------------------------------------------------------------------
-  // Update SetupContext
+  // Update time for evaluation
   // ---------------------------------------------------------------------------
   if (user->phys->ics_time_label)
     CeedOperatorContextSetDouble(ceed_data->op_ics, user->phys->ics_time_label,

From 4030acc1f2b85b55be8bda075e87cd2ffe2103d0 Mon Sep 17 00:00:00 2001
From: Toby Isaac <toby.isaac@gmail.com>
Date: Tue, 10 May 2022 18:56:04 -0400
Subject: [PATCH 46/59] Use standard khash header guard

---
 include/ceed/khash.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/ceed/khash.h b/include/ceed/khash.h
index 1e71f6dd2a..3a3dd4d91d 100644
--- a/include/ceed/khash.h
+++ b/include/ceed/khash.h
@@ -82,8 +82,8 @@ int main() {
 	* Added destructor
 */
 
-#ifndef _ceed_khash_h
-#define _ceed_khash_h
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
 
 /*!
   @header
@@ -589,4 +589,4 @@ typedef const char *kh_cstr_t;
 #define KHASH_MAP_INIT_STR(name, khval_t)								\
 	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
 
-#endif /* _ceed_khash_h */
+#endif /* __AC_KHASH_H */

From c32eb7cb37237000cbea5cacd49f757917c78e18 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 10 May 2022 21:15:52 -0600
Subject: [PATCH 47/59] examples/fluids: avoid pow(double, int) due to
 CUDA_ERROR_INVALID_PTX

NVRTC with various CUDA versions 11.2 to 11.6 return
CUDA_ERROR_INVALID_PTX when calling pow(double, int) from kernels.
Typicially pow(x, 2.) is optimized to x*x at -O1 and above with gcc, but
pow(x, 4.) is not optimized.
---
 examples/fluids/qfunctions/advection.h       | 11 ++++++-----
 examples/fluids/qfunctions/advection2d.h     |  8 +++++---
 examples/fluids/qfunctions/channel.h         |  7 ++++---
 examples/fluids/qfunctions/newtonian_types.h |  2 ++
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index 8e94a21e1e..4e2bf6f39f 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -35,6 +35,8 @@ struct AdvectionContext_ {
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
 
+CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
+
 // *****************************************************************************
 // This QFunction sets the initial conditions and the boundary conditions
 //   for two test cases: ROTATION and TRANSLATION
@@ -109,14 +111,13 @@ CEED_QFUNCTION_HELPER int Exact_Advection(CeedInt dim, CeedScalar time,
   switch (context->bubble_type) {
   //  original sphere
   case 0: { // (dim=3)
-    r = sqrt(pow((x - x0[0]), 2) +
-             pow((y - x0[1]), 2) +
-             pow((z - x0[2]), 2));
+    r = sqrt(Square(x - x0[0]) +
+             Square(y - x0[1]) +
+             Square(z - x0[2]));
   } break;
   // cylinder (needs periodicity to work properly)
   case 1: { // (dim=2)
-    r = sqrt(pow((x - x0[0]), 2) +
-             pow((y - x0[1]), 2) );
+    r = sqrt(Square(x - x0[0]) + Square(y - x0[1]));
   } break;
   }
 
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index 2ff88cbb33..2de60ec4cf 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -36,6 +36,8 @@ struct AdvectionContext_ {
   int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG
 };
 
+CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
+
 // *****************************************************************************
 // This QFunction sets the initial conditions and the boundary conditions
 //   for two test cases: ROTATION and TRANSLATION
@@ -124,15 +126,15 @@ CEED_QFUNCTION_HELPER int Exact_Advection2d(CeedInt dim, CeedScalar time,
     return 1;
   }
 
-  CeedScalar r = sqrt(pow(x - x0[0], 2) + pow(y - x0[1], 2));
+  CeedScalar r = sqrt(Square(x - x0[0]) + Square(y - x0[1]));
   CeedScalar E = 1 - r/rc;
 
   if (0) { // non-smooth initial conditions
     if (q[4] < E) q[4] = E;
-    r = sqrt(pow(x - x1[0], 2) + pow(y - x1[1], 2));
+    r = sqrt(Square(x - x1[0]) + Square(y - x1[1]));
     if (r <= rc) q[4] = 1;
   }
-  r = sqrt(pow(x - x2[0], 2) + pow(y - x2[1], 2));
+  r = sqrt(Square(x - x2[0]) + Square(y - x2[1]));
   E = (r <= rc) ? .5 + .5*cos(r*M_PI/rc) : 0;
   if (q[4] < E) q[4] = E;
 
diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h
index 31ed0c2925..e2a16721ad 100644
--- a/examples/fluids/qfunctions/channel.h
+++ b/examples/fluids/qfunctions/channel.h
@@ -13,7 +13,7 @@
 #define channel_h
 
 #include <math.h>
-#include <ceed.h>
+#include <ceed/ceed.h>
 #include "newtonian_types.h"
 
 typedef struct ChannelContext_ *ChannelContext;
@@ -47,14 +47,15 @@ CEED_QFUNCTION_HELPER int Exact_Channel(CeedInt dim, CeedScalar time,
 
   const CeedScalar Pr    = mu / (cp*k);
   const CeedScalar Ec    = (umax*umax) / (cp*theta0);
-  const CeedScalar theta = theta0*( 1 + (Pr*Ec/3)*(1 - pow((y-center)/H,4)));
+  const CeedScalar theta = theta0*(1 + (Pr*Ec/3)
+                                   * (1 - Square(Square((y-center)/H))));
 
   const CeedScalar p = P0;
 
   const CeedScalar rho = p / (Rd*theta);
 
   q[0] = rho;
-  q[1] = rho * umax*(1 - pow((y-center)/H,2));
+  q[1] = rho * umax*(1 - Square((y-center)/H));
   q[2] = 0;
   q[3] = 0;
   q[4] = rho * (cv*theta) + .5 * (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]) / rho;
diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h
index 0e7e552b73..31c198f38d 100644
--- a/examples/fluids/qfunctions/newtonian_types.h
+++ b/examples/fluids/qfunctions/newtonian_types.h
@@ -43,4 +43,6 @@ struct NewtonianIdealGasContext_ {
   StabilizationType stabilization;
 };
 
+CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
+
 #endif // newtonian_types_h

From debc926d78630bf06c8f3261d1cba58925b5c534 Mon Sep 17 00:00:00 2001
From: Jed Brown <jed@jedbrown.org>
Date: Tue, 10 May 2022 21:25:39 -0600
Subject: [PATCH 48/59] CI: re-enable fluids and solids testing on lv-cuda

---
 .gitlab-ci.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7cc9afbd4b..fc9acde7d8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -240,8 +240,7 @@ lv-cuda:
 # -- PETSc with CUDA (minimal)
     - export PETSC_DIR=/home/jeth8984/petsc PETSC_ARCH=cuda-O && git -C $PETSC_DIR describe
     - echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
-#     Note: Skipping fluids and solids due to CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES on RTX 2080 Super
-    - nice make -k -j$NPROC_GPU JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc"
+    - nice make -k -j$NPROC_GPU JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc fluids solids"
 # Report status
     - touch .SUCCESS
   after_script:

From ba6664ae303f5b2ef46b3df96973d9bdc665107c Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Sun, 22 May 2022 16:44:21 -0600
Subject: [PATCH 49/59] Fluids - Add STG inflow (#868)

* doc(fluids): Add STG equations

* doc(fluids): Add basic data flow for STG

* doc(fluids): Add Shur et al. 2014 STG paper to bib

* doc(fluids): Specify STG inputs files, misc additions

* doc(fluids): Add intro for STG section

* fix(fluids): Add #include ceed.h for qfunctions

 - In the spirit of "include what you use"

* feat(fluids): Start work on stg_shur14.h

* doc-fix: Correct kappa_min definition

* Move STG setup functions to problems/stg_shur14

* feat: Add cholesky decomposition function

* fix: Correct stg_ctx malloc, reorganize creation

Co-authored-by: Jed Brown <jed@jedbrown.org>

* fix(fluids): Correct return values of functions

* style: Fix up style

* feat(fluids): Get file paths from PetscOptions

 - Also convert SetupSTGContext to return PetscErrorCode

* fix(fluids): Correct stg_ctx dereferencing

 - Also move to size_t for type of the offsets

* feat(fluids): Add funcs for processing STG*.dat files

* feat(fluids): Move to PetscOptions* for STG flags

* feat: Use `PetscMax` instead of macro

* fix(fluids): Correct/Refactor file reading functions

 - Move to `PetscSynchronizedFGets`
 - Remove `inline`
 - Pass `comm` between functions
 - Add `OpenPHASTADatFile` to DRY

* docs(fluids): Fix equation typo

* fix(fluids): Correct calculation of kappa

* feat(fluids): Complete STGShur14_Calc

* feat(fluids): Add InterpolateProfile helper func

* feat(fluids): Add CalcSpectrum helper func

* feat(fluids): Add to STGShur14_Calc qfunction

* fix: Add M_PI, Update SETERRQ functions

- Also update style

* fix: Correct interpolation outside of datarange

* fix: Add missing definition for ke in CalcSpectrum

* feat: Migrate context and func signatures, Misc

 - Create SetupSTGContext to be run in another Setup_____Context
   function
 - Migrate STGShur14Context, CreateSTGContext, and SetupSTGContext
   signatures to navierstokes.h
 - Add STG contexts to Physics and CeedData
 - Add missing CHKERRQ to PetscFClose
 - Move to SPDX license headers

* examples/fluids: Pass solution time via context label

* feat: Implement STG boundary integral

 - Add theta0 and implicit members to STGShur14Context
 - Tested via implementation to the blasius BL problem (though this will
   probably go against the code history)

* feat: Fix STG Stuff

* feat: Implement STG inflow for blasius BL

 - Note that fluctuations are turned off in this case

* examples/fluids: Add stg_mean_only flag

* examples/fluids: Check cholesky decomp for nans

 - Also correct locaiton of cholesky decomposition in ReadSTGInflow()

* examples/fluids: Correct STG documentation

 - Missing a 2 sqrt(3/2) factor and didn't take square root of q

* examples/fluids: Fix STGShur14_Calc

 - Given the calculated spectrum, calculation of v' and u' verified
   against python implementation (which was validated previously against
   PHASTA)

* examples/fluids: Calc dXdx for boundary QFunctions

 - Also calculate h from the dXdx in STGShur14_Inflow
    - Replace h[0] result with constant dx spacing

* examples/fluids: Fix STG Spectra calcualtion

* examples/fluids: Fix build errors

 - Ran into an include cycle collision that resulted in over-defining
   SetupContext in advection.c
    - newtonian_types.h (which has SetupContext defined) ->
      stg_shur14_type.h -> navierstokes.h -> advection.c

* examples/fluids: Update and fix documentation

* examples/fluids: Correct dXdx comment, leave TODO

Co-authored-by: Jed Brown <jed@jedbrown.org>

* examples/fluids: Minor bib citation edits

Co-authored-by: Jed Brown <jed@jedbrown.org>

* examples/fluids: Add STGInflow.dat, fix blasius.yaml

* examples/fluids: int -> PetscInt | CeedInt

* examples/fluids: Style

* examples/fluids: Make Boolean names verb_noun format

 - Also changes the stg flag to `-stg_use`

* examples/fluids: Add STG test

* examples/fluids: Style fix up

* examples/fluids: Update docs

* examples/fluids: Implement weakT option for STG

* examples/fluids: Fix casting for ROCm

* examples/fluids: avoid PETSc dependency in qfunctions

* examples/fluids: header cleanup

* backends/hip: avoid redundant inline

* examples/fluids: avoid VLA in qfunctions

GPUs don't like VLA and some compilers reject it when targeting GPUs.

* examples/fluids: Create STG_NMODES_MAX

* examples/fluids: Refactor stg setup out of blasius.c

* examples/fluids: Fix misc GPU bugs

Co-authored-by: Jed Brown <jed@jedbrown.org>
---
 .../hip-gen/ceed-hip-gen-operator-build.cpp   |   2 +-
 .../hip-ref/ceed-hip-ref-qfunction-load.cpp   |   2 +-
 doc/sphinx/source/references.bib              |  12 +
 examples/fluids/README.md                     |  54 +++
 examples/fluids/STGInflow_blasius.dat         | 102 +++++
 examples/fluids/STGRand.dat                   |   3 +
 examples/fluids/blasius.yaml                  |   7 +-
 examples/fluids/index.md                      | 187 ++++++++-
 examples/fluids/navierstokes.c                |   2 +
 examples/fluids/problems/advection.c          |   6 +-
 examples/fluids/problems/advection2d.c        |   4 +-
 examples/fluids/problems/blasius.c            |  32 +-
 examples/fluids/problems/channel.c            |   2 +-
 examples/fluids/problems/densitycurrent.c     |   8 +-
 examples/fluids/problems/eulervortex.c        |   8 +-
 examples/fluids/problems/newtonian.c          |   8 +-
 examples/fluids/problems/shocktube.c          |   4 +-
 examples/fluids/problems/stg_shur14.c         | 388 ++++++++++++++++++
 examples/fluids/problems/stg_shur14.h         |  16 +
 examples/fluids/qfunctions/advection.h        |   3 +-
 examples/fluids/qfunctions/advection2d.h      |   3 +-
 examples/fluids/qfunctions/blasius.h          |   8 +-
 examples/fluids/qfunctions/channel.h          |  12 +-
 examples/fluids/qfunctions/eulervortex.h      | 100 ++---
 examples/fluids/qfunctions/mass.h             |   1 +
 examples/fluids/qfunctions/newtonian.h        | 122 +++---
 examples/fluids/qfunctions/setupgeo.h         |  46 ++-
 examples/fluids/qfunctions/setupgeo2d.h       |   1 +
 examples/fluids/qfunctions/shocktube.h        |  53 +--
 examples/fluids/qfunctions/stg_shur14.h       | 281 +++++++++++++
 examples/fluids/qfunctions/stg_shur14_type.h  |  44 ++
 .../fluids/tests-output/blasius_stgtest.yaml  |  39 ++
 .../fluids-navierstokes-blasius_STG.bin       | Bin 0 -> 7816 bytes
 .../fluids-navierstokes-blasius_STG_weakT.bin | Bin 0 -> 7816 bytes
 34 files changed, 1367 insertions(+), 193 deletions(-)
 create mode 100644 examples/fluids/STGInflow_blasius.dat
 create mode 100644 examples/fluids/STGRand.dat
 create mode 100644 examples/fluids/problems/stg_shur14.c
 create mode 100644 examples/fluids/problems/stg_shur14.h
 create mode 100644 examples/fluids/qfunctions/stg_shur14.h
 create mode 100644 examples/fluids/qfunctions/stg_shur14_type.h
 create mode 100644 examples/fluids/tests-output/blasius_stgtest.yaml
 create mode 100644 examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin
 create mode 100644 examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin

diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp
index 3ede5d090d..10364f84f8 100644
--- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp
+++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp
@@ -807,7 +807,7 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) {
   oper = "CeedKernel_Hip_gen_" + qFunctionName;
 
   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
-  code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
+  code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n";
   code << "#define CeedPragmaSIMD\n";
   code << "#define CEED_ERROR_SUCCESS 0\n\n";
 
diff --git a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
index dddb6ae992..3d81f0ee26 100644
--- a/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
+++ b/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
@@ -61,7 +61,7 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
 
   // Defintions
   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
-  code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
+  code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n";
   code << "#define CeedPragmaSIMD\n";
   code << "#define CEED_ERROR_SUCCESS 0\n";
   code << "#define CEED_Q_VLA 1\n\n";
diff --git a/doc/sphinx/source/references.bib b/doc/sphinx/source/references.bib
index f0cabd7df6..a1f6d382a9 100644
--- a/doc/sphinx/source/references.bib
+++ b/doc/sphinx/source/references.bib
@@ -190,3 +190,15 @@ @phdthesis{whitingStabilizedFEM1999
   langid = {english},
   school = {Rennselear Polytechnic Institute},
 }
+
+@article{shurSTG2014,
+  title = {Synthetic Turbulence Generators for {RANS-LES} Interfaces in Zonal Simulations of Aerodynamic and Aeroacoustic Problems},
+  author = {Shur, Michael L. and Spalart, Philippe R. and Strelets, Michael K. and Travin, Andrey K.},
+  year = {2014},
+  journal = {Flow, Turbulence and Combustion},
+  volume = {93},
+  number = {1},
+  pages = {63--92},
+  doi = {10.1007/s10494-014-9534-8},
+  langid = {english},
+}
diff --git a/examples/fluids/README.md b/examples/fluids/README.md
index 2774d30e6c..00bf3b2248 100644
--- a/examples/fluids/README.md
+++ b/examples/fluids/README.md
@@ -675,6 +675,11 @@ addition to the Newtonian Ideal Gas options:
   - Downward angle of the top face of the domain. This face serves as an outlet.
   - `5`
   - `degrees`
+
+* - `-stg_use`
+  - Whether to use stg for the inflow conditions
+  - `false`
+  -
 :::
 
 This problem can be run with the `blasius.yaml` file via:
@@ -686,3 +691,52 @@ This problem can be run with the `blasius.yaml` file via:
 ```{literalinclude} ../../../../../examples/fluids/blasius.yaml
 :language: yaml
 ```
+
+#### STG Inflow for Flat Plate
+
+Using the STG Inflow for the blasius problem adds the following command-line
+options:
+
+:::{list-table} Blasius Runtime Options
+:header-rows: 1
+
+* - Option
+  - Description
+  - Default value
+  - Unit
+
+* - `-stg_inflow_path`
+  - Path to the STGInflow file
+  - `./STGInflow.dat`
+  -
+
+* - `-stg_rand_path`
+  - Path to the STGRand file
+  - `./STGRand.dat`
+  -
+
+* - `-stg_alpha`
+  - Growth rate of the wavemodes
+  - `1.01`
+  -
+
+* - `-stg_u0`
+  - Convective velocity, $U_0$
+  - `0.0`
+  - `m/s`
+
+* - `-stg_mean_only`
+  - Only impose the mean velocity (no fluctutations)
+  - `false`
+  -
+
+:::
+
+This problem can be run with the `blasius.yaml` file via:
+
+```
+./navierstokes -options_file blasius.yaml -stg_use true
+```
+
+Note the added `-stg_use true` flag. This overrides the `stg: use: false`
+setting in the `blasius.yaml` file, enabling the use of the STG inflow.
diff --git a/examples/fluids/STGInflow_blasius.dat b/examples/fluids/STGInflow_blasius.dat
new file mode 100644
index 0000000000..2b12fb7e88
--- /dev/null
+++ b/examples/fluids/STGInflow_blasius.dat
@@ -0,0 +1,102 @@
+101 14
+0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000002986e-06 6.641099321171224368e-01 -2.688275721802099928e-10 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.503522332443360197e-06 7.121033120206911038e-01 5.625993976502234810e-06 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.828979380670493501e-06 7.635650401647769980e-01 1.250142628597567334e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.177956305656680519e-06 8.187457644416862301e-01 2.084248362952813662e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.552152823557324425e-06 8.779142463706052224e-01 3.090016074070208580e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.953391484292584302e-06 9.413586701190954642e-01 4.296531750310341824e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.383626548402324046e-06 1.009388046123934402e+00 5.737481358311177256e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.844953505406707854e-06 1.082333716147739100e+00 7.451857365815305122e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.339619280032274129e-06 1.160550967101845909e+00 9.484772346308667436e-05 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.870033176013613616e-06 1.244420761495690142e+00 1.188839579386735820e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+8.438778610773199320e-06 1.334350999618601818e+00 1.458894855932344988e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.048625698133911575e-06 1.430771551613953863e+00 1.581560748020152847e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.702544740349270590e-06 1.534160149046711830e+00 1.747102975094025161e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.040372069516518436e-05 1.645020353203479058e+00 1.963713567554627713e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.115556868837608125e-05 1.763892116346153394e+00 2.240939354861193116e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.196175064743038823e-05 1.891354411586620987e+00 2.589894105302736720e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.282619313710034132e-05 2.028028052815169957e+00 3.023503103593241275e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.375310648408648365e-05 2.174578718417349066e+00 3.556785072386167831e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.474700528370354663e-05 2.331720193506353400e+00 4.207177082015844298e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.581273038852838925e-05 2.500217846462653437e+00 4.994908941792155924e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.695547248610555476e-05 2.680883861457040496e+00 5.889655958320385437e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.818079738054887299e-05 2.874548263010431093e+00 6.602281079523934521e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.949467310117369896e-05 3.082208252866376785e+00 7.503663649287806939e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.090349897019489394e-05 3.304875253311903460e+00 8.627999991332345117e-04 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.241413677106653097e-05 3.543633779312999721e+00 1.001503600974398660e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.403394416927094298e-05 3.799646720719475290e+00 1.171092923974609594e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.577081054833535852e-05 4.074092395254440113e+00 1.357158033230107053e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.763319543561788559e-05 4.368221208458913374e+00 1.540937148582322697e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.963016970501833084e-05 4.683605893906867657e+00 1.769681736351030443e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.177145975729444162e-05 5.021782554436745372e+00 2.051386266834596614e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.406749489316689041e-05 5.384299303649559221e+00 2.379111642902919863e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.652945810994693139e-05 5.772598926784612061e+00 2.710603356932465152e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.916934056909526795e-05 6.188959884991291460e+00 3.121345968641189156e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000003156e-05 6.635410093001123499e+00 3.625347520186018510e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.503522332443360367e-05 7.113125338991705071e+00 4.133124687953142543e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.828979380670492993e-05 7.625363738722001017e+00 4.761465522541206800e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.177956305656680519e-05 8.173969710347833484e+00 5.483355699626112773e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.552152823557324933e-05 8.761155029286637586e+00 6.288849893152771361e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.953391484292584133e-05 9.390306008098571411e+00 7.253785738742655163e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.383626548402323707e-05 1.006267945968176925e+01 8.310353563038722774e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.844953505406707345e-05 1.078260765264970900e+01 9.566162359885154245e-03 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.339619280032274467e-05 1.155157312880497855e+01 1.097617661229051932e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.870033176013614294e-05 1.237288013889156701e+01 1.259644675763084941e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+8.438778610773198642e-05 1.324659660273999329e+01 1.442147547686566111e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.048625698133931227e-05 1.411776521727952627e+01 1.550008743586907094e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.702544740349271267e-05 1.505189076749435273e+01 1.696394574577553271e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.040372069516519487e-04 1.605352297623211655e+01 1.888691112446131207e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.115556868837609277e-04 1.712754036217140907e+01 2.135507382076490615e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.196175064743037705e-04 1.827917400098332124e+01 2.446866950728195447e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.282619313710035521e-04 1.951403300364581384e+01 2.834428819338968841e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.375310648408647010e-04 2.083813183600515728e+01 3.311742048129684424e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.474700528370356052e-04 2.225791961264622643e+01 3.894539215743021698e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.581273038852837570e-04 2.378031150774935654e+01 4.601074578209482513e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.695547248610555476e-04 2.535075036351783240e+01 5.382694402282042073e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.818079738054887231e-04 2.661271076395308555e+01 5.835740987402229796e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.949467310117369964e-04 2.796586953681483223e+01 6.410968369375426301e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.090349897019489259e-04 2.941681733513018671e+01 7.130600537355311075e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.241413677106652961e-04 3.097262110011179459e+01 8.020473439929037829e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.403394416927094298e-04 3.264085848118182298e+01 9.110596407971430222e-02 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.577081054833535445e-04 3.413934342938573963e+01 1.014467216134999905e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.763319543561788762e-04 3.511021163702044134e+01 1.073549573507629751e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.963016970501833151e-04 3.615124179295903417e+01 1.147360075583040984e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.177145975729444433e-04 3.726750430629629562e+01 1.238529307048257255e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.406749489316688770e-04 3.830092385213693262e+01 1.332273456401334155e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.652945810994693275e-04 3.872325220338986185e+01 1.367304294075183113e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.916934056909526524e-04 3.917610105136403575e+01 1.410880782566600322e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000002885e-04 3.966167602757187893e+01 1.464521141298004425e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.503522332443360638e-04 3.976907916245198749e+01 1.475593379115296422e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.828979380670493128e-04 3.988424402352507769e+01 1.489351353721966587e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.177956305656680790e-04 3.996514744096749894e+01 1.500061967534003227e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.552152823557325069e-04 3.998205278113282901e+01 1.502314641381908289e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+5.953391484292584268e-04 3.999712512512702034e+01 1.504595355677841584e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.383626548402323436e-04 3.999865492740099882e+01 1.504828737190639643e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+6.844953505406714935e-04 3.999990491833290207e+01 1.505044551060463753e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.339619280032281786e-04 3.999998824873846814e+01 1.505059777264049770e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+7.870033176013621883e-04 3.999999720599615216e+01 1.505061502639564730e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+8.438778610773207045e-04 3.999999992423261119e+01 1.505062073695294655e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.048625698133920927e-04 3.999999998123077205e+01 1.505062086204041749e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+9.702544740349281025e-04 3.999999999957353225e+01 1.505062090609098069e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.040372069516519622e-03 3.999999999999438671e+01 1.505062090717198819e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.115556868837609249e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.196175064743037596e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.282619313710035413e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.375310648408646901e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.474700528370356161e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.581273038852837407e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.695547248610553741e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.818079738054885226e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+1.949467310117369801e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.090349897019489530e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.241413677106653124e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.403394416927094081e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.577081054833535879e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.763319543561788653e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+2.963016970501833151e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.177145975729444433e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.406749489316688879e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.652945810994693166e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+3.916934056909526958e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
+4.200000000000003210e-03 4.000000000000000000e+01 1.505062090718746193e-01 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
diff --git a/examples/fluids/STGRand.dat b/examples/fluids/STGRand.dat
new file mode 100644
index 0000000000..9711655715
--- /dev/null
+++ b/examples/fluids/STGRand.dat
@@ -0,0 +1,3 @@
+2 7
+1.0E0 0.0E0                0.0E0                1.4E0 0.0E0 7.071067811865475E-1 7.071067811865475E-1
+0.0E0 7.071067811865475E-1 7.071067811865475E-1 2.4E0 1.0E0 0.0E0                0.0E0
diff --git a/examples/fluids/blasius.yaml b/examples/fluids/blasius.yaml
index 4d4cef22a7..aff73182e6 100644
--- a/examples/fluids/blasius.yaml
+++ b/examples/fluids/blasius.yaml
@@ -8,7 +8,7 @@ ts:
   max_time: 1.0e-3
 output_freq: 10
 
-#snes_max_it: 4 
+#snes_max_it: 4
 #snes_convergence_test: skip
 
 ## Linear Settings:
@@ -48,3 +48,8 @@ wall_comps: 1,2,3
 bc_inflow: 6
 bc_outflow: 5,4
 g: 0,0,0
+
+stg:
+  use: false
+  inflow_path: "./STGInflow_blasius.dat"
+  mean_only: true
diff --git a/examples/fluids/index.md b/examples/fluids/index.md
index 3eca2bb1f2..1e0588d1dc 100644
--- a/examples/fluids/index.md
+++ b/examples/fluids/index.md
@@ -390,17 +390,21 @@ The shock capturing viscosity is implemented following the first formulation des
 $$
 \nu_{SHOCK} = \tau_{SHOCK} u_{cha}^2
 $$
+
 where,
+
 $$
 \tau_{SHOCK} = \frac{h_{SHOCK}}{2u_{cha}} \left( \frac{ \,|\, \nabla \rho \,|\, h_{SHOCK}}{\rho_{ref}} \right)^{\beta}
 $$
 
-$\beta$ is a tuning parameter set between 1 (smoother shocks) and 2 (sharper shocks. The parameter $h_{SHOCK}$ is a length scale that is proportional to the element length in the direction of the density gradient unit vector. This density gradient unit vector is defined as $\hat{\bm j} = \frac{\nabla \rho}{|\nabla \rho|}. The original formulation of Tezduyar and Senga relies on the shape function gradient to define the element length scale, but this gradient is not available to qFunctions in libCEED. To avoid this problem, $h_{SHOCK}$ is defined in the current implementation as
+$\beta$ is a tuning parameter set between 1 (smoother shocks) and 2 (sharper shocks. The parameter $h_{SHOCK}$ is a length scale that is proportional to the element length in the direction of the density gradient unit vector. This density gradient unit vector is defined as $\hat{\bm j} = \frac{\nabla \rho}{|\nabla \rho|}$. The original formulation of Tezduyar and Senga relies on the shape function gradient to define the element length scale, but this gradient is not available to qFunctions in libCEED. To avoid this problem, $h_{SHOCK}$ is defined in the current implementation as
 
 $$
 h_{SHOCK} = 2 \left( C_{YZB} \,|\, \bm p \,|\, \right)^{-1}
 $$
+
 where
+
 $$
 p_k = \hat{j}_i \frac{\partial \xi_i}{x_k}
 $$
@@ -435,15 +439,182 @@ where $H$ is the channel half-height, $u_{\max}$ is the center velocity, $T_w$ i
 Boundary conditions are periodic in the streamwise direction, and no-slip and non-penetration boundary conditions at the walls.
 The flow is driven by a body force.
 
-## Blasius
+## Flat Plate Boundary Layer
+
+### Laminar Boundary Layer - Blasius
 
 Simulation of a laminar boundary layer flow, with the inflow being prescribed
 by a [Blasius similarity
 solution](https://en.wikipedia.org/wiki/Blasius_boundary_layer). At the inflow,
-the velocity is prescribed by the Blasius soution profile, temperature is set
-constant, and density is allowed to float. At the outlet, only the density is
-prescribed based on the user-set pressure. The wall is a no-slip,
-no-penetration, no-heat flux condition. The top of the domain is treated as an
-outflow and is tilted at a downward angle to ensure that flow is always exiting
-it.
+the velocity is prescribed by the Blasius soution profile, density is set
+constant, and temperature is allowed to float. Using `weakT: true`, density is
+allowed to float and temperature is set constant. At the outlet, a user-set
+pressure is used for pressure in the inviscid flux terms (all other inviscid
+flux terms use interior solution values). The viscous traction is also set to
+the analytic Blasius profile value at both the inflow and the outflow. The wall
+is a no-slip, no-penetration, no-heat flux condition. The top of the domain is
+treated as an outflow and is tilted at a downward angle to ensure that flow is
+always exiting it.
+
+### Turbulent Boundary Layer
+
+Simulating a turbulent boundary layer without modeling the turbulence requires
+resolving the turbulent flow structures. These structures may be introduced
+into the simulations either by allowing a laminar boundary layer naturally
+transition to turbulence, or imposing turbulent structures at the inflow. The
+latter approach has been taken here, specifically using a *synthetic turbulence
+generation* (STG) method.
+
+#### Synthetic Turbulence Generation (STG) Boundary Condition
+
+We use the STG method described in
+{cite}`shurSTG2014`. Below follows a re-description of the formulation to match
+the present notation, and then a description of the implementation and usage.
+
+##### Equation Formulation
+
+$$
+\bm{u}(\bm{x}, t) = \bm{\overline{u}}(\bm{x}) + \bm{C}(\bm{x}) \cdot \bm{v}'
+$$
+
+$$
+\begin{aligned}
+\bm{v}' &= 2 \sqrt{3/2} \sum^N_{n=1} \sqrt{q^n(\bm{x})} \bm{\sigma}^n \cos(\kappa^n \bm{d}^n \cdot \bm{\hat{x}}^n(\bm{x}, t) + \phi^n ) \\
+\bm{\hat{x}}^n &= \left[(x - U_0 t)\max(2\kappa_{\min}/\kappa^n, 0.1) , y, z  \right]^T
+\end{aligned}
+$$
+
+Here, we define the number of wavemodes $N$, set of random numbers $ \{\bm{\sigma}^n,
+\bm{d}^n, \phi^n\}_{n=1}^N$, the Cholesky decomposition of the Reynolds stress
+tensor $\bm{C}$ (such that $\bm{R} = \bm{CC}^T$ ), bulk velocity $U_0$,
+wavemode amplitude $q^n$, wavemode frequency $\kappa^n$, and $\kappa_{\min} =
+0.5 \min_{\bm{x}} (\kappa_e)$.
+
+$$
+\kappa_e = \frac{2\pi}{\min(2d_w, 3.0 l_t)}
+$$
+
+where $l_t$ is the turbulence length scale, and $d_w$ is the distance to the
+nearest wall.
+
+
+The set of wavemode frequencies is defined by a geometric distribution:
+
+$$
+\kappa^n = \kappa_{\min} (1 + \alpha)^{n-1} \ , \quad \forall n=1, 2, ... , N
+$$
+
+The wavemode amplitudes $q^n$ are defined by a model energy spectrum $E(\kappa)$:
+
+$$
+q^n = \frac{E(\kappa^n) \Delta \kappa^n}{\sum^N_{n=1} E(\kappa^n)\Delta \kappa^n} \ ,\quad \Delta \kappa^n = \kappa^n - \kappa^{n-1}
+$$
+
+$$ E(\kappa) = \frac{(\kappa/\kappa_e)^4}{[1 + 2.4(\kappa/\kappa_e)^2]^{17/6}} f_\eta f_{\mathrm{cut}} $$
+
+$$
+f_\eta = \exp \left[-(12\kappa /\kappa_\eta)^2 \right], \quad
+f_\mathrm{cut} = \exp \left( - \left [ \frac{4\max(\kappa-0.9\kappa_\mathrm{cut}, 0)}{\kappa_\mathrm{cut}} \right]^3 \right)
+$$
+
+$\kappa_\eta$ represents turbulent dissipation frequency, and is given as $2\pi
+(\nu^3/\varepsilon)^{-1/4}$ with $\nu$ the kinematic viscosity and
+$\varepsilon$ the turbulent dissipation. $\kappa_\mathrm{cut}$ approximates the
+effective cutoff frequency of the mesh (viewing the mesh as a filter on
+solution over $\Omega$) and is given by:
+
+$$
+\kappa_\mathrm{cut} = \frac{2\pi}{ 2\min\{ [\max(h_y, h_z, 0.3h_{\max}) + 0.1 d_w], h_{\max} \} }
+$$
+
+The enforcement of the boundary condition is identical to the blasius inflow;
+it weakly enforces velocity, with the option of weakly enforcing either density
+or temperature using the the `-weakT` flag.
+
+##### Initialization Data Flow
+
+Data flow for initializing function (which creates the context data struct) is
+given below:
+```{mermaid}
+flowchart LR
+    subgraph STGInflow.dat
+    y
+    lt[l_t]
+    eps
+    Rij[R_ij]
+    ubar
+    end
+
+    subgraph STGRand.dat
+    rand[RN Set];
+    end
+
+    subgraph User Input
+    u0[U0];
+    end
+
+    subgraph init[Create Context Function]
+    ke[k_e]
+    N;
+    end
+    lt --Calc-->ke --Calc-->kn
+    y --Calc-->ke
+
+    subgraph context[Context Data]
+    yC[y]
+    randC[RN Set]
+    Cij[C_ij]
+    u0 --Copy--> u0C[U0]
+    kn[k^n];
+    ubarC[ubar]
+    ltC[l_t]
+    epsC[eps]
+    end
+    ubar --Copy--> ubarC;
+    y --Copy--> yC;
+    lt --Copy--> ltC;
+    eps --Copy--> epsC;
+
+    rand --Copy--> randC;
+    rand --> N --Calc--> kn;
+    Rij --Calc--> Cij[C_ij]
+```
+
+This is done once at runtime. The spatially-varying terms are then evaluated at
+each quadrature point on-the-fly, either by interpolation (for $l_t$,
+$\varepsilon$, $C_{ij}$, and $\overline{\bm u}$) or by calculation (for $q^n$).
+
+The `STGInflow.dat` file is a table of values at given distances from the wall.
+These values are then interpolated to a physical location (node or quadrature
+point). It has the following format:
+```
+[Total number of locations] 14
+[d_w] [u_1] [u_2] [u_3] [R_11] [R_22] [R_33] [R_12] [R_13] [R_23] [sclr_1] [sclr_2] [l_t] [eps]
+```
+where each `[  ]` item is a number in scientific notation (ie. `3.1415E0`), and `sclr_1` and
+`sclr_2` are reserved for turbulence modeling variables. They are not used in
+this example.
+
+The `STGRand.dat` file is the table of the random number set, $\{\bm{\sigma}^n,
+\bm{d}^n, \phi^n\}_{n=1}^N$. It has the format:
+```
+[Number of wavemodes] 7
+[d_1] [d_2] [d_3] [phi] [sigma_1] [sigma_2] [sigma_3]
+```
 
+The following table is presented to help clarify the dimensionality of the
+numerous terms in the STG formulation.
+
+| Math            | Label  | $f(\bm{x})$? | $f(n)$? |
+|-----------------|--------|--------------|---------|
+| $ \{\bm{\sigma}^n, \bm{d}^n, \phi^n\}_{n=1}^N$        | RN Set | No           | Yes     |
+| $\bm{\overline{u}}$ | ubar | Yes | No |
+| $U_0$           | U0     | No           | No      |
+| $l_t$           | l_t    | Yes          | No   |
+| $\varepsilon$   | eps    | Yes          | No   |
+| $\bm{R}$        | R_ij   | Yes          | No      |
+| $\bm{C}$        | C_ij   | Yes          | No      |
+| $q^n$           | q^n    | Yes           | Yes     |
+| $\{\kappa^n\}_{n=1}^N$ | k^n  | No           | Yes      |
+| $h_i$           | h_i    | Yes          | No   |
+| $d_w$           | d_w    | Yes          | No   |
diff --git a/examples/fluids/navierstokes.c b/examples/fluids/navierstokes.c
index d3a05d798b..05d4a75bc8 100644
--- a/examples/fluids/navierstokes.c
+++ b/examples/fluids/navierstokes.c
@@ -35,6 +35,8 @@
 //TESTARGS(name="euler_implicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 1,1,2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-implicit.bin
 //TESTARGS(name="euler_explicit") -ceed {ceed_resource} -test -problem euler_vortex -degree 3 -dm_plex_box_faces 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 125,125,250 -dm_plex_dim 3 -units_meter 1e-4 -units_second 1e-4 -mean_velocity 1.4,-2.,0 -bc_inflow 4,6 -bc_outflow 3,5 -bc_slip_z 1,2 -vortex_strength 2 -ts_dt 1e-7 -ts_rk_type 5bs -ts_rtol 1e-10 -ts_atol 1e-10 -compare_final_state_atol 1E-7 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-euler-explicit.bin
 //TESTARGS(name="shocktube_explicit_su_yzb") -ceed {ceed_resource} -test -problem shocktube -degree 1 -dm_plex_box_faces 50,1,1 -units_meter 1e-2 units_second 1e-2 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1000,20,20 -dm_plex_dim 3 -bc_slip_x 5,6 -bc_slip_y 3,4 -bc_Slip_z 1,2 -yzb -stab su -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-shocktube-explicit-su-yzb.bin
+//TESTARGS(name="blasius_STG") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin
+//TESTARGS(name="blasius_STG_weakT") -ceed {ceed_resource} -test -options_file examples/fluids/tests-output/blasius_stgtest.yaml -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin -weakT
 
 /// @file
 /// Navier-Stokes example using PETSc
diff --git a/examples/fluids/problems/advection.c b/examples/fluids/problems/advection.c
index 5a3a748a58..830b41fad9 100644
--- a/examples/fluids/problems/advection.c
+++ b/examples/fluids/problems/advection.c
@@ -36,7 +36,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
   // ------------------------------------------------------
   problem->dim                               = 3;
   problem->q_data_size_vol                   = 10;
-  problem->q_data_size_sur                   = 4;
+  problem->q_data_size_sur                   = 10;
   problem->setup_vol.qfunction               = Setup;
   problem->setup_vol.qfunction_loc           = Setup_loc;
   problem->setup_sur.qfunction               = SetupBoundary;
@@ -64,7 +64,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
   PetscReal wind[3]      = {1., 0, 0}; // m/s
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
 
   // ------------------------------------------------------
@@ -169,7 +169,7 @@ PetscErrorCode NS_ADVECTION(ProblemData *problem, DM dm,
   // -- Scale variables to desired units
   E_wind *= Joule;
   rc = fabs(rc) * meter;
-  for (int i=0; i<3; i++) {
+  for (PetscInt i=0; i<3; i++) {
     wind[i] *= (meter/second);
     domain_size[i] *= meter;
   }
diff --git a/examples/fluids/problems/advection2d.c b/examples/fluids/problems/advection2d.c
index 64a826dec0..44f6e3547e 100644
--- a/examples/fluids/problems/advection2d.c
+++ b/examples/fluids/problems/advection2d.c
@@ -62,7 +62,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx) {
   PetscReal wind[2]      = {1., 0.};   // m/s
   PetscReal domain_min[2], domain_max[2], domain_size[2];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<2; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<2; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
 
   // ------------------------------------------------------
@@ -151,7 +151,7 @@ PetscErrorCode NS_ADVECTION2D(ProblemData *problem, DM dm, void *ctx) {
   // -- Scale variables to desired units
   E_wind *= Joule;
   rc = fabs(rc) * meter;
-  for (int i=0; i<2; i++) {
+  for (PetscInt i=0; i<2; i++) {
     wind[i] *= (meter/second);
     domain_size[i] *= meter;
   }
diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index 4d5d20b0b4..94033dec8f 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -10,15 +10,16 @@
 
 #include "../navierstokes.h"
 #include "../qfunctions/blasius.h"
+#include "stg_shur14.h"
 
 /* \brief Modify the domain and mesh for blasius
  *
- * Modifies mesh such that `N` elements are within 1.2*`delta0` with a geometric
- * growth ratio of `growth`. Excess elements are then geometrically distributed
- * to the top surface.
+ * Modifies mesh such that `N` elements are within `refine_height` with a
+ * geometric growth ratio of `growth`. Excess elements are then distributed
+ * linearly in logspace to the top surface.
  *
  * The top surface is also angled downwards, so that it may be used as an
- * outflow. It's angle is controlled by top_angle (in units of degrees).
+ * outflow. It's angle is controlled by `top_angle` (in units of degrees).
  */
 PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
                           PetscReal refine_height, PetscReal top_angle) {
@@ -33,7 +34,7 @@ PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
 
   // Get domain boundary information
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
   // Get coords array from DM
   ierr = DMGetCoordinatesLocal(dm, &vec_coords); CHKERRQ(ierr);
@@ -55,7 +56,7 @@ PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
   // Calculate log of sizing outside BL
   PetscReal logdy = (log(domain_max[1]) - log(refine_height)) / (faces[1] - N);
 
-  for(int i=0; i<ncoords; i++) {
+  for(PetscInt i=0; i<ncoords; i++) {
     PetscInt y_box_index = round(coords[i][1]/dybox);
     if(y_box_index <= N) {
       coords[i][1] = (1 - (coords[i][0]/domain_max[0])*angle_coeff) *
@@ -76,9 +77,10 @@ PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
 PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
 
   PetscInt ierr;
-  User              user = *(User *)ctx;
-  MPI_Comm          comm = PETSC_COMM_WORLD;
-  BlasiusContext    blasius_ctx;
+  User           user    = *(User *)ctx;
+  MPI_Comm       comm    = PETSC_COMM_WORLD;
+  PetscBool      use_stg = PETSC_FALSE;
+  BlasiusContext blasius_ctx;
   NewtonianIdealGasContext newtonian_ig_ctx;
   CeedQFunctionContext blasius_context;
 
@@ -92,10 +94,10 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   CeedQFunctionContextDestroy(&problem->ics.qfunction_context);
   problem->ics.qfunction               = ICsBlasius;
   problem->ics.qfunction_loc           = ICsBlasius_loc;
-  problem->apply_inflow.qfunction      = Blasius_Inflow;
-  problem->apply_inflow.qfunction_loc  = Blasius_Inflow_loc;
   problem->apply_outflow.qfunction     = Blasius_Outflow;
   problem->apply_outflow.qfunction_loc = Blasius_Outflow_loc;
+  problem->apply_inflow.qfunction      = Blasius_Inflow;
+  problem->apply_inflow.qfunction_loc  = Blasius_Inflow_loc;
 
   // CeedScalar mu = .04; // Pa s, dynamic viscosity
   CeedScalar Uinf          = 40;   // m/s
@@ -130,6 +132,8 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   ierr = PetscOptionsScalar("-top_angle",
                             "Geometric top_angle rate of boundary layer mesh",
                             NULL, top_angle, &top_angle, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-stg_use", "Use STG inflow boundary condition",
+                          NULL, use_stg, &use_stg, NULL); CHKERRQ(ierr);
   PetscOptionsEnd();
 
   PetscScalar meter           = user->units->meter;
@@ -149,13 +153,14 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
                               CEED_MEM_HOST, &newtonian_ig_ctx);
 
-  blasius_ctx->weakT     = !!weakT;
+  blasius_ctx->weakT     = weakT;
   blasius_ctx->Uinf      = Uinf;
   blasius_ctx->delta0    = delta0;
   blasius_ctx->theta0    = theta0;
   blasius_ctx->P0        = P0;
   blasius_ctx->implicit  = user->phys->implicit;
   blasius_ctx->newtonian_ctx = *newtonian_ig_ctx;
+
   CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
                                   &newtonian_ig_ctx);
 
@@ -171,5 +176,8 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
                                     &problem->apply_inflow.qfunction_context);
   CeedQFunctionContextReferenceCopy(blasius_context,
                                     &problem->apply_outflow.qfunction_context);
+  if (use_stg) {
+    ierr = SetupSTG(comm, dm, problem, user, weakT, theta0, P0); CHKERRQ(ierr);
+  }
   PetscFunctionReturn(0);
 }
diff --git a/examples/fluids/problems/channel.c b/examples/fluids/problems/channel.c
index eb1fb7bed7..388ea8a4ec 100644
--- a/examples/fluids/problems/channel.c
+++ b/examples/fluids/problems/channel.c
@@ -63,7 +63,7 @@ PetscErrorCode NS_CHANNEL(ProblemData *problem, DM dm,
   {
     PetscReal domain_min[3], domain_max[3], domain_size[3];
     ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-    for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+    for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
     H      = 0.5*domain_size[1]*meter;
     center = H + domain_min[1]*meter;
diff --git a/examples/fluids/problems/densitycurrent.c b/examples/fluids/problems/densitycurrent.c
index b6a1f7de1a..92bc14d5c9 100644
--- a/examples/fluids/problems/densitycurrent.c
+++ b/examples/fluids/problems/densitycurrent.c
@@ -41,7 +41,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) {
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max);
   CHKERRQ(ierr);
-  for (int i = 0; i < 3; i++)
+  for (PetscInt i = 0; i < 3; i++)
     domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
@@ -61,7 +61,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) {
   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
                             NULL, rc, &rc, NULL);
   CHKERRQ(ierr);
-  for (int i = 0; i < 3; i++)
+  for (PetscInt i = 0; i < 3; i++)
     center[i] = .5 * domain_size[i];
   PetscInt n = problem->dim;
   ierr = PetscOptionsRealArray("-center", "Location of bubble center", NULL,
@@ -77,7 +77,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) {
     PetscReal norm = PetscSqrtReal(PetscSqr(dc_axis[0]) + PetscSqr(dc_axis[1]) +
                                    PetscSqr(dc_axis[2]));
     if (norm > 0) {
-      for (int i = 0; i < 3; i++)
+      for (PetscInt i = 0; i < 3; i++)
         dc_axis[i] /= norm;
     }
   }
@@ -93,7 +93,7 @@ PetscErrorCode NS_DENSITY_CURRENT(ProblemData *problem, DM dm, void *ctx) {
   thetaC *= Kelvin;
   P0 *= Pascal;
   N *= (1. / second);
-  for (int i = 0; i < 3; i++)
+  for (PetscInt i = 0; i < 3; i++)
     center[i] *= meter;
 
   setup_context->theta0 = theta0;
diff --git a/examples/fluids/problems/eulervortex.c b/examples/fluids/problems/eulervortex.c
index 97fe9337da..b851afb6ee 100644
--- a/examples/fluids/problems/eulervortex.c
+++ b/examples/fluids/problems/eulervortex.c
@@ -32,7 +32,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) {
   // ------------------------------------------------------
   problem->dim                               = 3;
   problem->q_data_size_vol                   = 10;
-  problem->q_data_size_sur                   = 4;
+  problem->q_data_size_sur                   = 10;
   problem->setup_vol.qfunction               = Setup;
   problem->setup_vol.qfunction_loc           = Setup_loc;
   problem->setup_sur.qfunction               = SetupBoundary;
@@ -62,7 +62,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) {
             mean_velocity[3] = {1., 1., 0}; // m/s
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //             Create the PETSc context
@@ -83,7 +83,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) {
   ierr = PetscOptionsRealArray("-mean_velocity", "Background velocity vector",
                                NULL, mean_velocity, &n, &user_velocity);
   CHKERRQ(ierr);
-  for (int i=0; i<3; i++) center[i] = .5*domain_size[i];
+  for (PetscInt i=0; i<3; i++) center[i] = .5*domain_size[i];
   n = problem->dim;
   ierr = PetscOptionsRealArray("-center", "Location of vortex center",
                                NULL, center, &n, NULL); CHKERRQ(ierr);
@@ -131,7 +131,7 @@ PetscErrorCode NS_EULER_VORTEX(ProblemData *problem, DM dm, void *ctx) {
   //           Set up the libCEED context
   // ------------------------------------------------------
   // -- Scale variables to desired units
-  for (int i=0; i<3; i++) {
+  for (PetscInt i=0; i<3; i++) {
     center[i] *= meter;
     domain_size[i] *= meter;
     mean_velocity[i] *= (meter/second);
diff --git a/examples/fluids/problems/newtonian.c b/examples/fluids/problems/newtonian.c
index 11f9cc8487..66028b3807 100644
--- a/examples/fluids/problems/newtonian.c
+++ b/examples/fluids/problems/newtonian.c
@@ -32,7 +32,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) {
   // ------------------------------------------------------
   problem->dim                               = 3;
   problem->q_data_size_vol                   = 10;
-  problem->q_data_size_sur                   = 4;
+  problem->q_data_size_sur                   = 10;
   problem->setup_vol.qfunction               = Setup;
   problem->setup_vol.qfunction_loc           = Setup_loc;
   problem->ics.qfunction                     = ICsNewtonianIG;
@@ -65,7 +65,7 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) {
   CeedScalar Ctau_E  = 1.0;          // TODO make function of degree
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //             Create the PETSc context
@@ -166,8 +166,8 @@ PetscErrorCode NS_NEWTONIAN_IG(ProblemData *problem, DM dm, void *ctx) {
   cp     *= J_per_kg_K;
   mu     *= Pascal * second;
   k      *= W_per_m_K;
-  for (int i=0; i<3; i++) domain_size[i] *= meter;
-  for (int i=0; i<3; i++) g[i]           *= m_per_squared_s;
+  for (PetscInt i=0; i<3; i++) domain_size[i] *= meter;
+  for (PetscInt i=0; i<3; i++) g[i]           *= m_per_squared_s;
   problem->dm_scale = meter;
 
   // -- Setup Context
diff --git a/examples/fluids/problems/shocktube.c b/examples/fluids/problems/shocktube.c
index 542c198c98..0e234556a5 100644
--- a/examples/fluids/problems/shocktube.c
+++ b/examples/fluids/problems/shocktube.c
@@ -76,7 +76,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) {
   //                                          2 for sharp shocks
   PetscReal domain_min[3], domain_max[3], domain_size[3];
   ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
-  for (int i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+  for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
 
   // ------------------------------------------------------
   //             Create the PETSc context
@@ -133,7 +133,7 @@ PetscErrorCode NS_SHOCKTUBE(ProblemData *problem, DM dm, void *ctx) {
   //           Set up the libCEED context
   // ------------------------------------------------------
   // -- Scale variables to desired units
-  for (int i=0; i<3; i++) {
+  for (PetscInt i=0; i<3; i++) {
     domain_size[i] *= meter;
     domain_min[i] *= meter;
   }
diff --git a/examples/fluids/problems/stg_shur14.c b/examples/fluids/problems/stg_shur14.c
new file mode 100644
index 0000000000..1be568a104
--- /dev/null
+++ b/examples/fluids/problems/stg_shur14.c
@@ -0,0 +1,388 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Implementation of the Synthetic Turbulence Generation (STG) algorithm
+/// presented in Shur et al. 2014
+
+#include <stdlib.h>
+#include <math.h>
+#include <petsc.h>
+#include "../navierstokes.h"
+#include "stg_shur14.h"
+#include "../qfunctions/stg_shur14.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+/*
+ * @brief Perform Cholesky decomposition on array of symmetric 3x3 matrices
+ *
+ * This assumes the input matrices are in order [11,22,33,12,13,23]. This
+ * format is also used for the output.
+ *
+ * @param[in]  comm   MPI_Comm
+ * @param[in]  nprofs Number of matrices in Rij
+ * @param[in]  Rij    Array of the symmetric matrices [6,nprofs]
+ * @param[out] Cij    Array of the Cholesky Decomposition matrices, [6,nprofs]
+ */
+PetscErrorCode CalcCholeskyDecomp(MPI_Comm comm, PetscInt nprofs,
+                                  const CeedScalar Rij[6][nprofs], CeedScalar Cij[6][nprofs]) {
+
+  PetscFunctionBeginUser;
+  for (PetscInt i=0; i<nprofs; i++) {
+    Cij[0][i] = sqrt(Rij[0][i]);
+    Cij[3][i] = Rij[3][i] / Cij[0][i];
+    Cij[1][i] = sqrt(Rij[1][i] - pow(Cij[3][i], 2) );
+    Cij[4][i] = Rij[4][i] / Cij[0][i];
+    Cij[5][i] = (Rij[5][i] - Cij[3][i]*Cij[4][i]) / Cij[1][i];
+    Cij[2][i] = sqrt(Rij[2][i] - pow(Cij[4][i], 2) - pow(Cij[5][i], 2));
+
+    if (isnan(Cij[0][i]) || isnan(Cij[1][i]) || isnan(Cij[2][i]))
+      SETERRQ(comm, -1, "Cholesky decomposition failed at profile point %d. "
+              "Either STGInflow has non-SPD matrix or contains nan.", i+1);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+/*
+ * @brief Open a PHASTA *.dat file, grabbing dimensions and file pointer
+ *
+ * This function opens the file specified by `path` using `PetscFOpen` and
+ * passes the file pointer in `fp`. It is not closed in this function, thus
+ * `fp` must be closed sometime after this function has been called (using
+ * `PetscFClose` for example).
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space
+ *
+ * @param[in] comm MPI_Comm for the program
+ * @param[in] path Path to the file
+ * @param[in] char_array_len Length of the character array that should contain each line
+ * @param[out] dims Dimensions of the file, taken from the first line of the file
+ * @param[out] fp File pointer to the opened file
+ */
+static PetscErrorCode OpenPHASTADatFile(const MPI_Comm comm,
+                                        const char path[PETSC_MAX_PATH_LEN], const PetscInt char_array_len,
+                                        PetscInt dims[2], FILE **fp) {
+  PetscErrorCode ierr;
+  PetscInt ndims;
+  char line[char_array_len];
+  char **array;
+
+  PetscFunctionBeginUser;
+  ierr = PetscFOpen(comm, path, "r", fp); CHKERRQ(ierr);
+  ierr = PetscSynchronizedFGets(comm, *fp, char_array_len, line); CHKERRQ(ierr);
+  ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+  if (ndims != 2) SETERRQ(comm, -1,
+                            "Found %d dimensions instead of 2 on the first line of %s",
+                            ndims, path);
+
+  for (PetscInt i=0; i<ndims; i++)  dims[i] = atoi(array[i]);
+  ierr = PetscStrToArrayDestroy(ndims, array); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Get the number of rows for the PHASTA file at path
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space
+ *
+ * @param[in] comm MPI_Comm for the program
+ * @param[in] path Path to the file
+ * @param[out] nrows Number of rows
+ */
+static PetscErrorCode GetNRows(const MPI_Comm comm,
+                               const char path[PETSC_MAX_PATH_LEN], PetscInt *nrows) {
+  PetscErrorCode ierr;
+  const PetscInt char_array_len = 512;
+  PetscInt dims[2];
+  FILE *fp;
+
+  PetscFunctionBeginUser;
+  ierr = OpenPHASTADatFile(comm, path, char_array_len, dims, &fp); CHKERRQ(ierr);
+  *nrows = dims[0];
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Read the STGInflow file and load the contents into stg_ctx
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space.
+ * Assumes there are 14 columns in the file
+ *
+ * Function calculates the Cholesky decomposition from the Reynolds stress
+ * profile found in the file
+ *
+ * @param[in] comm MPI_Comm for the program
+ * @param[in] path Path to the STGInflow.dat file
+ * @param[inout] stg_ctx STGShur14Context where the data will be loaded into
+ */
+static PetscErrorCode ReadSTGInflow(const MPI_Comm comm,
+                                    const char path[PETSC_MAX_PATH_LEN], STGShur14Context stg_ctx) {
+  PetscErrorCode ierr;
+  PetscInt ndims, dims[2];
+  FILE *fp;
+  const PetscInt char_array_len=512;
+  char line[char_array_len];
+  char **array;
+
+  PetscFunctionBeginUser;
+
+  ierr = OpenPHASTADatFile(comm, path, char_array_len, dims, &fp); CHKERRQ(ierr);
+
+  CeedScalar rij[6][stg_ctx->nprofs];
+  CeedScalar *prof_dw = &stg_ctx->data[stg_ctx->offsets.prof_dw];
+  CeedScalar *eps = &stg_ctx->data[stg_ctx->offsets.eps];
+  CeedScalar *lt = &stg_ctx->data[stg_ctx->offsets.lt];
+  CeedScalar (*ubar)[stg_ctx->nprofs] = (CeedScalar (*)[stg_ctx->nprofs])
+                                        &stg_ctx->data[stg_ctx->offsets.ubar];
+
+  for (PetscInt i=0; i<stg_ctx->nprofs; i++) {
+    ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+    ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+    if (ndims < dims[1]) SETERRQ(comm, -1,
+                                   "Line %d of %s does not contain enough columns (%d instead of %d)", i,
+                                   path, ndims, dims[1]);
+
+    prof_dw[i] = (CeedScalar) atof(array[0]);
+    ubar[0][i] = (CeedScalar) atof(array[1]);
+    ubar[1][i] = (CeedScalar) atof(array[2]);
+    ubar[2][i] = (CeedScalar) atof(array[3]);
+    rij[0][i]  = (CeedScalar) atof(array[4]);
+    rij[1][i]  = (CeedScalar) atof(array[5]);
+    rij[2][i]  = (CeedScalar) atof(array[6]);
+    rij[3][i]  = (CeedScalar) atof(array[7]);
+    rij[4][i]  = (CeedScalar) atof(array[8]);
+    rij[5][i]  = (CeedScalar) atof(array[9]);
+    lt[i]      = (CeedScalar) atof(array[12]);
+    eps[i]     = (CeedScalar) atof(array[13]);
+
+    if (prof_dw[i] < 0) SETERRQ(comm, -1,
+                                  "Distance to wall in %s cannot be negative", path);
+    if (lt[i] < 0) SETERRQ(comm, -1,
+                             "Turbulent length scale in %s cannot be negative", path);
+    if (eps[i] < 0) SETERRQ(comm, -1,
+                              "Turbulent dissipation in %s cannot be negative", path);
+
+  }
+  CeedScalar (*cij)[stg_ctx->nprofs]  = (CeedScalar (*)[stg_ctx->nprofs])
+                                        &stg_ctx->data[stg_ctx->offsets.cij];
+  ierr = CalcCholeskyDecomp(comm, stg_ctx->nprofs, rij, cij); CHKERRQ(ierr);
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Read the STGRand file and load the contents into stg_ctx
+ *
+ * Assumes that the first line of the file has the number of rows and columns
+ * as the only two entries, separated by a single space.
+ * Assumes there are 7 columns in the file
+ *
+ * @param[in]    comm    MPI_Comm for the program
+ * @param[in]    path    Path to the STGRand.dat file
+ * @param[inout] stg_ctx STGShur14Context where the data will be loaded into
+ */
+static PetscErrorCode ReadSTGRand(const MPI_Comm comm,
+                                  const char path[PETSC_MAX_PATH_LEN],
+                                  STGShur14Context stg_ctx) {
+
+  PetscErrorCode ierr;
+  PetscInt ndims, dims[2];
+  FILE *fp;
+  const PetscInt char_array_len = 512;
+  char line[char_array_len];
+  char **array;
+
+  PetscFunctionBeginUser;
+  ierr = OpenPHASTADatFile(comm, path, char_array_len, dims, &fp); CHKERRQ(ierr);
+
+  CeedScalar *phi = &stg_ctx->data[stg_ctx->offsets.phi];
+  CeedScalar (*d)[stg_ctx->nmodes]     = (CeedScalar (*)[stg_ctx->nmodes])
+                                         &stg_ctx->data[stg_ctx->offsets.d];
+  CeedScalar (*sigma)[stg_ctx->nmodes] = (CeedScalar (*)[stg_ctx->nmodes])
+                                         &stg_ctx->data[stg_ctx->offsets.sigma];
+
+  for (PetscInt i=0; i<stg_ctx->nmodes; i++) {
+    ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+    ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+    if (ndims < dims[1]) SETERRQ(comm, -1,
+                                   "Line %d of %s does not contain enough columns (%d instead of %d)", i,
+                                   path, ndims, dims[1]);
+
+    d[0][i]     = (CeedScalar) atof(array[0]);
+    d[1][i]     = (CeedScalar) atof(array[1]);
+    d[2][i]     = (CeedScalar) atof(array[2]);
+    phi[i]      = (CeedScalar) atof(array[3]);
+    sigma[0][i] = (CeedScalar) atof(array[4]);
+    sigma[1][i] = (CeedScalar) atof(array[5]);
+    sigma[2][i] = (CeedScalar) atof(array[6]);
+  }
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/*
+ * @brief Read STG data from input paths and put in STGShur14Context
+ *
+ * Reads data from input paths and puts them into a STGShur14Context object.
+ * Data stored initially in `*pstg_ctx` will be copied over to the new
+ * STGShur14Context instance.
+ *
+ * @param[in]    comm            MPI_Comm for the program
+ * @param[in]    dm              DM for the program
+ * @param[in]    stg_inflow_path Path to STGInflow.dat file
+ * @param[in]    stg_rand_path   Path to STGRand.dat file
+ * @param[inout] pstg_ctx        Pointer to STGShur14Context where the data will be loaded into
+ */
+PetscErrorCode GetSTGContextData(const MPI_Comm comm, const DM dm,
+                                 char stg_inflow_path[PETSC_MAX_PATH_LEN],
+                                 char stg_rand_path[PETSC_MAX_PATH_LEN],
+                                 STGShur14Context *pstg_ctx) {
+  PetscErrorCode ierr;
+  PetscInt nmodes, nprofs;
+  STGShur14Context stg_ctx;
+  PetscFunctionBeginUser;
+
+  // Get options
+  ierr = GetNRows(comm, stg_rand_path, &nmodes); CHKERRQ(ierr);
+  ierr = GetNRows(comm, stg_inflow_path, &nprofs); CHKERRQ(ierr);
+  if (nmodes > STG_NMODES_MAX)
+    SETERRQ(comm, 1, "Number of wavemodes in %s (%d) exceeds STG_NMODES_MAX (%d). "
+            "Change size of STG_NMODES_MAX and recompile", stg_rand_path, nmodes,
+            STG_NMODES_MAX);
+
+  {
+    STGShur14Context s;
+    ierr = PetscCalloc1(1, &s); CHKERRQ(ierr);
+    *s = **pstg_ctx;
+    s->nmodes = nmodes;
+    s->nprofs = nprofs;
+    s->offsets.sigma   = 0;
+    s->offsets.d       = nmodes*3;
+    s->offsets.phi     = s->offsets.d       + nmodes*3;
+    s->offsets.kappa   = s->offsets.phi     + nmodes;
+    s->offsets.prof_dw = s->offsets.kappa   + nmodes;
+    s->offsets.ubar    = s->offsets.prof_dw + nprofs;
+    s->offsets.cij     = s->offsets.ubar    + nprofs*3;
+    s->offsets.eps     = s->offsets.cij     + nprofs*6;
+    s->offsets.lt      = s->offsets.eps     + nprofs;
+    PetscInt total_num_scalars = s->offsets.lt + nprofs;
+    s->total_bytes = sizeof(*stg_ctx) + total_num_scalars*sizeof(stg_ctx->data[0]);
+    ierr = PetscMalloc(s->total_bytes, &stg_ctx); CHKERRQ(ierr);
+    *stg_ctx = *s;
+    ierr = PetscFree(s); CHKERRQ(ierr);
+  }
+
+  ierr = ReadSTGInflow(comm, stg_inflow_path, stg_ctx); CHKERRQ(ierr);
+  ierr = ReadSTGRand(comm, stg_rand_path, stg_ctx); CHKERRQ(ierr);
+
+  // -- Calculate kappa
+  {
+    CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa];
+    CeedScalar *prof_dw = &stg_ctx->data[stg_ctx->offsets.prof_dw];
+    CeedScalar *lt = &stg_ctx->data[stg_ctx->offsets.lt];
+    CeedScalar le, le_max=0;
+
+    CeedPragmaSIMD
+    for (PetscInt i=0; i<stg_ctx->nprofs; i++) {
+      le = PetscMin(2*prof_dw[i], 3*lt[i]);
+      if (le_max < le) le_max = le;
+    }
+    CeedScalar kmin = M_PI/le_max;
+
+    CeedPragmaSIMD
+    for (PetscInt i=0; i<stg_ctx->nmodes; i++) {
+      kappa[i] = kmin*pow(stg_ctx->alpha, i);
+    }
+  } //end calculate kappa
+
+  *pstg_ctx = stg_ctx;
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm, ProblemData *problem,
+                        User user, const bool prescribe_T,
+                        const CeedScalar theta0, const CeedScalar P0) {
+  PetscErrorCode ierr;
+  char stg_inflow_path[PETSC_MAX_PATH_LEN] = "./STGInflow.dat";
+  char stg_rand_path[PETSC_MAX_PATH_LEN] = "./STGRand.dat";
+  PetscBool mean_only = PETSC_FALSE;
+  CeedScalar u0=0.0, alpha=1.01;
+  STGShur14Context stg_ctx;
+  CeedQFunctionContext stg_context;
+  NewtonianIdealGasContext newtonian_ig_ctx;
+  PetscFunctionBeginUser;
+
+  // Get options
+  PetscOptionsBegin(comm, NULL, "STG Boundary Condition Options", NULL);
+  ierr = PetscOptionsString("-stg_inflow_path", "Path to STGInflow.dat", NULL,
+                            stg_inflow_path, stg_inflow_path,
+                            sizeof(stg_inflow_path), NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsString("-stg_rand_path", "Path to STGInflow.dat", NULL,
+                            stg_rand_path,stg_rand_path,
+                            sizeof(stg_rand_path), NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsReal("-stg_alpha", "Growth rate of the wavemodes", NULL,
+                          alpha, &alpha, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsReal("-stg_u0", "Advective velocity for the fluctuations",
+                          NULL, u0, &u0, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsBool("-stg_mean_only", "Only apply mean profile",
+                          NULL, mean_only, &mean_only, NULL); CHKERRQ(ierr);
+  PetscOptionsEnd();
+
+  ierr = PetscCalloc1(1, &stg_ctx); CHKERRQ(ierr);
+  stg_ctx->alpha         = alpha;
+  stg_ctx->u0            = u0;
+  stg_ctx->is_implicit   = user->phys->implicit;
+  stg_ctx->prescribe_T   = prescribe_T;
+  stg_ctx->mean_only     = mean_only;
+  stg_ctx->theta0        = theta0;
+  stg_ctx->P0            = P0;
+
+  {
+    // Calculate dx assuming constant spacing
+    PetscReal domain_min[3], domain_max[3], domain_size[3];
+    ierr = DMGetBoundingBox(dm, domain_min, domain_max); CHKERRQ(ierr);
+    for (PetscInt i=0; i<3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+    PetscInt nmax = 3, faces[3];
+    ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax,
+                                   NULL); CHKERRQ(ierr);
+    stg_ctx->dx = domain_size[0]/faces[0];
+  }
+
+  CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,
+                              CEED_MEM_HOST, &newtonian_ig_ctx);
+  stg_ctx->newtonian_ctx = *newtonian_ig_ctx;
+  CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfunction_context,
+                                  &newtonian_ig_ctx);
+
+  ierr = GetSTGContextData(comm, dm, stg_inflow_path, stg_rand_path, &stg_ctx);
+  CHKERRQ(ierr);
+
+  CeedQFunctionContextDestroy(&problem->apply_inflow.qfunction_context);
+  CeedQFunctionContextCreate(user->ceed, &stg_context);
+  CeedQFunctionContextSetData(stg_context, CEED_MEM_HOST,
+                              CEED_USE_POINTER, stg_ctx->total_bytes, stg_ctx);
+  CeedQFunctionContextSetDataDestroy(stg_context, CEED_MEM_HOST,
+                                     FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(stg_context, "solution time",
+                                     offsetof(struct STGShur14Context_, time), 1,
+                                     "Phyiscal time of the solution");
+
+  problem->apply_inflow.qfunction         = STGShur14_Inflow;
+  problem->apply_inflow.qfunction_loc     = STGShur14_Inflow_loc;
+  problem->apply_inflow.qfunction_context = stg_context;
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/fluids/problems/stg_shur14.h b/examples/fluids/problems/stg_shur14.h
new file mode 100644
index 0000000000..7d5dc90bfd
--- /dev/null
+++ b/examples/fluids/problems/stg_shur14.h
@@ -0,0 +1,16 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#include <ceed.h>
+#include <petsc.h>
+#include "../qfunctions/stg_shur14_type.h"
+#include "../navierstokes.h"
+
+extern PetscErrorCode SetupSTG(const MPI_Comm comm, const DM dm,
+                               ProblemData *problem, User user,
+                               const bool prescribe_T, const CeedScalar theta0,
+                               const CeedScalar P0);
diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h
index 4e2bf6f39f..06560167e0 100644
--- a/examples/fluids/qfunctions/advection.h
+++ b/examples/fluids/qfunctions/advection.h
@@ -12,6 +12,7 @@
 #define advection_h
 
 #include <math.h>
+#include <ceed.h>
 
 typedef struct SetupContext_ *SetupContext;
 struct SetupContext_ {
@@ -88,7 +89,7 @@ CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
 // This helper function provides support for the exact, time-dependent solution
 //   (currently not implemented) and IC formulation for 3D advection
 // *****************************************************************************
-CEED_QFUNCTION_HELPER int Exact_Advection(CeedInt dim, CeedScalar time,
+CEED_QFUNCTION_HELPER CeedInt Exact_Advection(CeedInt dim, CeedScalar time,
     const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
   const SetupContext context = (SetupContext)ctx;
   const CeedScalar rc    = context->rc;
diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h
index 2de60ec4cf..40867fb82b 100644
--- a/examples/fluids/qfunctions/advection2d.h
+++ b/examples/fluids/qfunctions/advection2d.h
@@ -12,6 +12,7 @@
 #define advection2d_h
 
 #include <math.h>
+#include <ceed.h>
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
@@ -89,7 +90,7 @@ CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; }
 // This helper function provides the exact, time-dependent solution
 //   and IC formulation for 2D advection
 // *****************************************************************************
-CEED_QFUNCTION_HELPER int Exact_Advection2d(CeedInt dim, CeedScalar time,
+CEED_QFUNCTION_HELPER CeedInt Exact_Advection2d(CeedInt dim, CeedScalar time,
     const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
   const SetupContext context = (SetupContext)ctx;
   const CeedScalar rc    = context->rc;
diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h
index 99279ae68d..06b13aef11 100644
--- a/examples/fluids/qfunctions/blasius.h
+++ b/examples/fluids/qfunctions/blasius.h
@@ -238,7 +238,7 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0.;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
 
     const CeedScalar u_normal = norm[0]*velocity[0] +
                                 norm[1]*velocity[1] +
@@ -249,7 +249,7 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q,
     v[0][i] -= wdetJb * rho * u_normal; // interior rho
 
     // -- Momentum
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       v[j+1][i] -= wdetJb * (rho * u_normal * velocity[j] + // interior rho
                              norm[j] * P); // mixed P
     v[2][i] -= wdetJb * t12  ;
@@ -315,7 +315,7 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0.;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
 
     // Implementing outflow condition
     const CeedScalar P         = P0; // pressure
@@ -333,7 +333,7 @@ CEED_QFUNCTION(Blasius_Outflow)(void *ctx, CeedInt Q,
     v[0][i] -= wdetJb * rho * u_normal;
 
     // -- Momentum
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
     v[2][i] += wdetJb * t12  ;
 
diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h
index e2a16721ad..3d8d60e948 100644
--- a/examples/fluids/qfunctions/channel.h
+++ b/examples/fluids/qfunctions/channel.h
@@ -28,8 +28,8 @@ struct ChannelContext_ {
   struct NewtonianIdealGasContext_ newtonian_ctx;
 };
 
-CEED_QFUNCTION_HELPER int Exact_Channel(CeedInt dim, CeedScalar time,
-                                        const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
+CEED_QFUNCTION_HELPER CeedInt Exact_Channel(CeedInt dim, CeedScalar time,
+    const CeedScalar X[], CeedInt Nf, CeedScalar q[], void *ctx) {
 
   const ChannelContext context = (ChannelContext)ctx;
   const CeedScalar theta0 = context->theta0;
@@ -151,7 +151,7 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0.;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
 
     const CeedScalar u_normal = norm[0]*velocity[0] +
                                 norm[1]*velocity[1] +
@@ -162,7 +162,7 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q,
     v[0][i] -= wdetJb * rho_in * u_normal;
 
     // -- Momentum
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       v[j+1][i] -= wdetJb * (rho_in * u_normal * velocity[j] +
                              norm[j] * P);
 
@@ -215,7 +215,7 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0.;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
 
     // Implementing outflow condition
     const CeedScalar P         = P0; // pressure
@@ -226,7 +226,7 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q,
     v[0][i] -= wdetJb * rho * u_normal;
 
     // -- Momentum
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
 
     // -- Total Energy Density
diff --git a/examples/fluids/qfunctions/eulervortex.h b/examples/fluids/qfunctions/eulervortex.h
index c705e14941..ebe425d05d 100644
--- a/examples/fluids/qfunctions/eulervortex.h
+++ b/examples/fluids/qfunctions/eulervortex.h
@@ -231,7 +231,7 @@ CEED_QFUNCTION_HELPER void ConvectiveFluxJacobian_Euler(CeedScalar dF[3][5][5],
 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
                                        const CeedScalar sound_speed, const CeedScalar c_tau) {
-  for (int i=0; i<3; i++) {
+  for (CeedInt i=0; i<3; i++) {
     // length of element in direction i
     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
                             dXdx[2][i]*dXdx[2][i]);
@@ -362,11 +362,11 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0.};
     CeedScalar dUdx[3][3] = {{0.}};
     CeedScalar dXdxdXdxT[3][3] = {{0.}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
@@ -380,27 +380,27 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v and dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++) {
+    for (CeedInt j=0; j<5; j++) {
       v[j][i] = 0.;
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0.;
     }
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0.))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0.))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0.))*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
 
@@ -411,18 +411,18 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
 
     // ---- dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // ---- strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0.};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Stabilization
@@ -437,13 +437,13 @@ CEED_QFUNCTION(Euler)(void *ctx, CeedInt Q,
     case 0:        // Galerkin
       break;
     case 1:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
             stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -530,11 +530,11 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0.};
     CeedScalar dUdx[3][3] = {{0.}};
     CeedScalar dXdxdXdxT[3][3] = {{0.}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
@@ -547,30 +547,30 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v and dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++) {
+    for (CeedInt j=0; j<5; j++) {
       v[j][i] = 0.;
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0.;
     }
     //-----mass matrix
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] += wdetJ*q_dot[j][i];
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0.))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0.))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0.))*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
 
@@ -581,23 +581,23 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
 
     // ---- dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // ---- strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0.};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // ---- Strong residual
     CeedScalar strong_res[5];
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       strong_res[j] = q_dot[j][i] + strong_conv[j];
 
     // Stabilization
@@ -612,25 +612,25 @@ CEED_QFUNCTION(IFunction_Euler)(void *ctx, CeedInt Q,
     case 0:        // Galerkin
       break;
     case 1:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
             stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
       break;
     case 2:        // SUPG
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
             stab[k][j] = jacob_F_conv[j][k][l] * Tau_x[j] * strong_res[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -696,7 +696,7 @@ CEED_QFUNCTION(TravelingVortex_Inflow)(void *ctx, CeedInt Q,
                                    norm[2]*mean_velocity[2];
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0.;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
 
     // Implementing in/outflow BCs
     if (face_normal > 0) {
@@ -712,7 +712,7 @@ CEED_QFUNCTION(TravelingVortex_Inflow)(void *ctx, CeedInt Q,
       v[0][i] -= wdetJb * rho_inlet * face_normal;
 
       // -- Momentum
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         v[j+1][i] -= wdetJb *(rho_inlet * face_normal * mean_velocity[j] +
                               norm[j] * P_inlet);
 
@@ -778,7 +778,7 @@ CEED_QFUNCTION(Euler_Outflow)(void *ctx, CeedInt Q,
                                    norm[2]*mean_velocity[2];
     // The Physics
     // Zero v so all future terms can safely sum into it
-    for (int j=0; j<5; j++) v[j][i] = 0;
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0;
 
     // Implementing in/outflow BCs
     if (face_normal > 0) { // outflow
@@ -791,7 +791,7 @@ CEED_QFUNCTION(Euler_Outflow)(void *ctx, CeedInt Q,
       v[0][i] -= wdetJb * rho * u_normal;
 
       // -- Momentum
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         v[j+1][i] -= wdetJb *(rho * u_normal * u[j] + norm[j] * P);
 
       // -- Total Energy Density
diff --git a/examples/fluids/qfunctions/mass.h b/examples/fluids/qfunctions/mass.h
index 94d1daafd5..45ae3198b1 100644
--- a/examples/fluids/qfunctions/mass.h
+++ b/examples/fluids/qfunctions/mass.h
@@ -12,6 +12,7 @@
 #define mass_h
 
 #include <math.h>
+#include <ceed.h>
 
 // *****************************************************************************
 // This QFunction applies the mass matrix to five interlaced fields.
diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h
index 008a5c8e46..9c77601d84 100644
--- a/examples/fluids/qfunctions/newtonian.h
+++ b/examples/fluids/qfunctions/newtonian.h
@@ -93,7 +93,7 @@ CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
   CeedScalar drdP = 1. / ( Rd * T);
   dU[0] = drdP * dY[0] + drdT * dY[4];
   CeedScalar de_kinetic = 0;
-  for (int i=0; i<3; i++) {
+  for (CeedInt i=0; i<3; i++) {
     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
     de_kinetic += u[i] * dY[1+i];
   }
@@ -201,7 +201,7 @@ CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
                                        const CeedScalar viscosity) {
   const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
                                 (2*viscosity);
-  for (int i=0; i<3; i++) {
+  for (CeedInt i=0; i<3; i++) {
     // length of element in direction i
     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
                             dXdx[2][i]*dXdx[2][i]);
@@ -388,21 +388,21 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0};
     CeedScalar dUdx[3][3] = {{0}};
     CeedScalar dXdxdXdxT[3][3] = {{0}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
       }
     }
     CeedScalar dudx[3][3] = {{0}};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
-        for (int l=0; l<3; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt l=0; l<3; l++)
           dudx[j][k] += du[j][l] * dXdx[l][k];
     // -- grad_T
     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
@@ -446,18 +446,18 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
 
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Body force
@@ -465,40 +465,40 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<5; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0;
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
     // ---- Fuvisc
     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i] -= wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
     // ---- Fevisc
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i] -= wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
                               Fe[2]*dXdx[j][2]);
     // Body Force
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] = wdetJ * body_force[j];
 
     // Spatial Stabilization
@@ -523,13 +523,13 @@ CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
                                   tau_strong_conv_conservative);
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -635,21 +635,21 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0};
     CeedScalar dUdx[3][3] = {{0}};
     CeedScalar dXdxdXdxT[3][3] = {{0}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
       }
     }
     CeedScalar dudx[3][3] = {{0}};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
-        for (int l=0; l<3; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt l=0; l<3; l++)
           dudx[j][k] += du[j][l] * dXdx[l][k];
     // -- grad_T
     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
@@ -692,17 +692,17 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
 
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
     // strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Body force
@@ -710,49 +710,49 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
 
     // Strong residual
     CeedScalar strong_res[5];
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
 
     // The Physics
     //-----mass matrix
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] = wdetJ*q_dot[j][i];
 
     // Zero dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<5; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0;
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
     // ---- Fuvisc
     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i] += wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
     // ---- Fevisc
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i] += wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
                               Fe[2]*dXdx[j][2]);
     // Body Force
-    for (int j=0; j<5; j++)
+    for (CeedInt j=0; j<5; j++)
       v[j][i] -= wdetJ*body_force[j];
 
     // Spatial Stabilization
@@ -778,13 +778,13 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
                                   tau_strong_conv_conservative);
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
@@ -805,13 +805,13 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
                                   tau_strong_res_conservative);
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++)
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++)
             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
 
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
diff --git a/examples/fluids/qfunctions/setupgeo.h b/examples/fluids/qfunctions/setupgeo.h
index 0c90de0112..01406fd941 100644
--- a/examples/fluids/qfunctions/setupgeo.h
+++ b/examples/fluids/qfunctions/setupgeo.h
@@ -113,12 +113,19 @@ CEED_QFUNCTION(Setup)(void *ctx, CeedInt Q,
 // Physical (current) 3D coordinates: x
 // Change of coordinate matrix:
 //   dxdX_{i,j} = dx_i/dX_j (indicial notation) [3 * 2]
+// Inverse change of coordinate matrix:
+//   dXdx_{i,j} = dX_i/dx_j (indicial notation) [2 * 3]
 //
 // (J1,J2,J3) is given by the cross product of the columns of dxdX_{i,j}
 //
 // detJb is the magnitude of (J1,J2,J3)
 //
-// All quadrature data is stored in 4 field vector of quadrature data.
+// dXdx is calculated via Moore–Penrose inverse:
+//
+//   dX_i/dx_j = (dxdX^T dxdX)^(-1) dxdX
+//             = (dx_l/dX_i * dx_l/dX_k)^(-1) dx_j/dX_k
+//
+// All quadrature data is stored in 10 field vector of quadrature data.
 //
 // We require the determinant of the Jacobian to properly compute integrals of
 //   the form: int( u v )
@@ -128,12 +135,18 @@ CEED_QFUNCTION(Setup)(void *ctx, CeedInt Q,
 //
 // Normal vector = (J1,J2,J3) / detJb
 //
+//   - TODO Could possibly remove normal vector, as it could be calculated in the Qfunction from dXdx
 // Stored: (J1,J2,J3) / detJb
 //   in q_data_sur[1:3] as
 //   (detJb^-1) * [ J1 ]
 //                [ J2 ]
 //                [ J3 ]
 //
+// Stored: dXdx_{i,j}
+//   in q_data_sur[4:9] as
+//    [dXdx_11 dXdx_12 dXdx_13]
+//    [dXdx_21 dXdx_22 dXdx_23]
+//
 // *****************************************************************************
 CEED_QFUNCTION(SetupBoundary)(void *ctx, CeedInt Q,
                               const CeedScalar *const *in, CeedScalar *const *out) {
@@ -170,6 +183,37 @@ CEED_QFUNCTION(SetupBoundary)(void *ctx, CeedInt Q,
     q_data_sur[2][i] = J2 / detJb;
     q_data_sur[3][i] = J3 / detJb;
 
+    // dxdX_k,j * dxdX_j,k
+    CeedScalar dxdXTdxdX[2][2] = {{ 0. }};
+    for (CeedInt j=0; j<2; j++)
+      for (CeedInt k=0; k<2; k++)
+        for (CeedInt l=0; l<3; l++)
+          dxdXTdxdX[j][k] += dxdX[l][j]*dxdX[l][k];
+
+    const CeedScalar detdxdXTdxdX =  dxdXTdxdX[0][0] * dxdXTdxdX[1][1]
+                                     -dxdXTdxdX[1][0] * dxdXTdxdX[0][1];
+
+    // Compute inverse of dxdXTdxdX
+    CeedScalar dxdXTdxdX_inv[2][2];
+    dxdXTdxdX_inv[0][0] =  dxdXTdxdX[1][1] / detdxdXTdxdX;
+    dxdXTdxdX_inv[0][1] = -dxdXTdxdX[0][1] / detdxdXTdxdX;
+    dxdXTdxdX_inv[1][0] = -dxdXTdxdX[1][0] / detdxdXTdxdX;
+    dxdXTdxdX_inv[1][1] =  dxdXTdxdX[0][0] / detdxdXTdxdX;
+
+    // Compute dXdx from dxdXTdxdX^-1 and dxdX
+    CeedScalar dXdx[2][3] = {{ 0. }};
+    for (CeedInt j=0; j<2; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt l=0; l<2; l++)
+          dXdx[j][k] += dxdXTdxdX_inv[l][j] * dxdX[k][l];
+
+    q_data_sur[4][i] = dXdx[0][0];
+    q_data_sur[5][i] = dXdx[0][1];
+    q_data_sur[6][i] = dXdx[0][2];
+    q_data_sur[7][i] = dXdx[1][0];
+    q_data_sur[8][i] = dXdx[1][1];
+    q_data_sur[9][i] = dXdx[1][2];
+
   } // End of Quadrature Point Loop
 
   // Return
diff --git a/examples/fluids/qfunctions/setupgeo2d.h b/examples/fluids/qfunctions/setupgeo2d.h
index b1272f1a8b..2a3c715f3b 100644
--- a/examples/fluids/qfunctions/setupgeo2d.h
+++ b/examples/fluids/qfunctions/setupgeo2d.h
@@ -12,6 +12,7 @@
 #define setup_geo_2d_h
 
 #include <math.h>
+#include <ceed.h>
 
 // *****************************************************************************
 // This QFunction sets up the geometric factors required for integration and
diff --git a/examples/fluids/qfunctions/shocktube.h b/examples/fluids/qfunctions/shocktube.h
index 115699c4c1..3da66cbba1 100644
--- a/examples/fluids/qfunctions/shocktube.h
+++ b/examples/fluids/qfunctions/shocktube.h
@@ -26,6 +26,7 @@
 #define shocktube_h
 
 #include <math.h>
+#include <ceed.h>
 
 #ifndef M_PI
 #define M_PI    3.14159265358979323846
@@ -90,7 +91,7 @@ struct ShockTubeContext_ {
 // This helper function provides support for the exact, time-dependent solution
 //   (currently not implemented) and IC formulation for Euler traveling vortex
 // *****************************************************************************
-CEED_QFUNCTION_HELPER int Exact_ShockTube(CeedInt dim, CeedScalar time,
+CEED_QFUNCTION_HELPER CeedInt Exact_ShockTube(CeedInt dim, CeedScalar time,
     const CeedScalar X[], CeedInt Nf, CeedScalar q[],
     void *ctx) {
 
@@ -198,7 +199,7 @@ CEED_QFUNCTION_HELPER CeedScalar Covariant_length_along_vector(
 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
                                        const CeedScalar sound_speed, const CeedScalar c_tau) {
-  for (int i=0; i<3; i++) {
+  for (CeedInt i=0; i<3; i++) {
     // length of element in direction i
     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
                             dXdx[2][i]*dXdx[2][i]);
@@ -331,12 +332,12 @@ CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
     CeedScalar dEdx[3] = {0};
     CeedScalar dUdx[3][3] = {{0}};
     CeedScalar dXdxdXdxT[3][3] = {{0}};
-    for (int j=0; j<3; j++) {
-      for (int k=0; k<3; k++) {
+    for (CeedInt j=0; j<3; j++) {
+      for (CeedInt k=0; k<3; k++) {
         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
         drhodx[j] += drho[k] * dXdx[k][j];
         dEdx[j] += dE[k] * dXdx[k][j];
-        for (int l=0; l<3; l++) {
+        for (CeedInt l=0; l<3; l++) {
           dUdx[j][k] += dU[j][l] * dXdx[l][k];
           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
         }
@@ -351,27 +352,27 @@ CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
 
     // The Physics
     // Zero v and dv so all future terms can safely sum into it
-    for (int j=0; j<5; j++) {
+    for (CeedInt j=0; j<5; j++) {
       v[j][i] = 0;
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j][i] = 0;
     }
 
     // -- Density
     // ---- u rho
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
                              rho*u[2]*dXdx[j][2]);
     // -- Momentum
     // ---- rho (u x u) + P I3
-    for (int j=0; j<3; j++)
-      for (int k=0; k<3; k++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
     // -- Total Energy Density
     // ---- (E + P) u
-    for (int j=0; j<3; j++)
+    for (CeedInt j=0; j<3; j++)
       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
                                          u[2]*dXdx[j][2]);
 
@@ -387,7 +388,7 @@ CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
       // Unit vector aligned with the density gradient
       drho_norm = sqrt(drhodx[0]*drhodx[0] + drhodx[1]*drhodx[1] +
                        drhodx[2]*drhodx[2]);
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         j_vec[j] = drhodx[j] / (drho_norm + 1e-20);
 
       if (drho_norm == 0.0) {
@@ -400,14 +401,14 @@ CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
         nu_shock = fabs(tau_shock * acoustic_vel * acoustic_vel);
       }
 
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         dv[j][0][i] -= wdetJ * nu_shock * drhodx[j];
 
-      for (int k=0; k<3; k++)
-        for (int j=0; j<3; j++)
+      for (CeedInt k=0; k<3; k++)
+        for (CeedInt j=0; j<3; j++)
           dv[j][k][i] -= wdetJ * nu_shock * du[k][j];
 
-      for (int j=0; j<3; j++)
+      for (CeedInt j=0; j<3; j++)
         dv[j][4][i] -= wdetJ * nu_shock * dEdx[j];
     }
 
@@ -420,18 +421,18 @@ CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
 
     // dqdx collects drhodx, dUdx and dEdx in one vector
     CeedScalar dqdx[5][3];
-    for (int j=0; j<3; j++) {
+    for (CeedInt j=0; j<3; j++) {
       dqdx[0][j] = drhodx[j];
       dqdx[4][j] = dEdx[j];
-      for (int k=0; k<3; k++)
+      for (CeedInt k=0; k<3; k++)
         dqdx[k+1][j] = dUdx[k][j];
     }
 
     // strong_conv = dF/dq * dq/dx    (Strong convection)
     CeedScalar strong_conv[5] = {0};
-    for (int j=0; j<3; j++)
-      for (int k=0; k<5; k++)
-        for (int l=0; l<5; l++)
+    for (CeedInt j=0; j<3; j++)
+      for (CeedInt k=0; k<5; k++)
+        for (CeedInt l=0; l<5; l++)
           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
 
     // Stabilization
@@ -445,13 +446,13 @@ CEED_QFUNCTION(EulerShockTube)(void *ctx, CeedInt Q,
     case 0:        // Galerkin
       break;
     case 1:        // SU
-      for (int j=0; j<3; j++)
-        for (int k=0; k<5; k++)
-          for (int l=0; l<5; l++) {
+      for (CeedInt j=0; j<3; j++)
+        for (CeedInt k=0; k<5; k++)
+          for (CeedInt l=0; l<5; l++) {
             stab[k][j] += jacob_F_conv[j][k][l] * Tau_x[j] * strong_conv[l];
           }
-      for (int j=0; j<5; j++)
-        for (int k=0; k<3; k++)
+      for (CeedInt j=0; j<5; j++)
+        for (CeedInt k=0; k<3; k++)
           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
                                 stab[j][1] * dXdx[k][1] +
                                 stab[j][2] * dXdx[k][2]);
diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h
new file mode 100644
index 0000000000..e8909d1f2e
--- /dev/null
+++ b/examples/fluids/qfunctions/stg_shur14.h
@@ -0,0 +1,281 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Implementation of the Synthetic Turbulence Generation (STG) algorithm
+/// presented in Shur et al. 2014
+//
+/// SetupSTG_Rand reads in the input files and fills in STGShur14Context. Then
+/// STGShur14_CalcQF is run over quadrature points. Before the program exits,
+/// TearDownSTG is run to free the memory of the allocated arrays.
+
+#ifndef stg_shur14_h
+#define stg_shur14_h
+
+#include <math.h>
+#include <ceed.h>
+#include <stdlib.h>
+#include "stg_shur14_type.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#define STG_NMODES_MAX 1024
+
+CEED_QFUNCTION_HELPER CeedScalar Max(CeedScalar a, CeedScalar b) { return a < b ? b : a; }
+CEED_QFUNCTION_HELPER CeedScalar Min(CeedScalar a, CeedScalar b) { return a < b ? a : b; }
+
+/*
+ * @brief Interpolate quantities from input profile to given location
+ *
+ * Assumed that prof_dw[i+1] > prof_dw[i] and prof_dw[0] = 0
+ * If dw > prof_dw[-1], then the interpolation takes the values at prof_dw[-1]
+ *
+ * @param[in]  dw      Distance to the nearest wall
+ * @param[out] ubar    Mean velocity at dw
+ * @param[out] cij     Cholesky decomposition at dw
+ * @param[out] eps     Turbulent dissipation at dw
+ * @param[out] lt      Turbulent length scale at dw
+ * @param[in]  stg_ctx STGShur14Context for the problem
+ */
+CEED_QFUNCTION_HELPER void InterpolateProfile(const CeedScalar dw,
+    CeedScalar ubar[3], CeedScalar cij[6], CeedScalar *eps, CeedScalar *lt,
+    const STGShur14Context stg_ctx) {
+
+  const CeedInt    nprofs    = stg_ctx->nprofs;
+  const CeedScalar *prof_dw  = &stg_ctx->data[stg_ctx->offsets.prof_dw];
+  const CeedScalar *prof_eps = &stg_ctx->data[stg_ctx->offsets.eps];
+  const CeedScalar *prof_lt  = &stg_ctx->data[stg_ctx->offsets.lt];
+  const CeedScalar *prof_ubar = &stg_ctx->data[stg_ctx->offsets.ubar];
+  const CeedScalar *prof_cij  = &stg_ctx->data[stg_ctx->offsets.cij];
+  CeedInt idx=-1;
+
+  for(CeedInt i=0; i<nprofs; i++) {
+    if (dw < prof_dw[i]) {
+      idx = i;
+      break;
+    }
+  }
+
+  if (idx > 0) { // y within the bounds of prof_dw
+    CeedScalar coeff = (dw - prof_dw[idx-1]) / (prof_dw[idx] - prof_dw[idx-1]);
+
+    //*INDENT-OFF*
+    ubar[0] = prof_ubar[0*nprofs+idx-1] + coeff*( prof_ubar[0*nprofs+idx] - prof_ubar[0*nprofs+idx-1] );
+    ubar[1] = prof_ubar[1*nprofs+idx-1] + coeff*( prof_ubar[1*nprofs+idx] - prof_ubar[1*nprofs+idx-1] );
+    ubar[2] = prof_ubar[2*nprofs+idx-1] + coeff*( prof_ubar[2*nprofs+idx] - prof_ubar[2*nprofs+idx-1] );
+    cij[0]  = prof_cij[0*nprofs+idx-1]  + coeff*( prof_cij[0*nprofs+idx]  - prof_cij[0*nprofs+idx-1] );
+    cij[1]  = prof_cij[1*nprofs+idx-1]  + coeff*( prof_cij[1*nprofs+idx]  - prof_cij[1*nprofs+idx-1] );
+    cij[2]  = prof_cij[2*nprofs+idx-1]  + coeff*( prof_cij[2*nprofs+idx]  - prof_cij[2*nprofs+idx-1] );
+    cij[3]  = prof_cij[3*nprofs+idx-1]  + coeff*( prof_cij[3*nprofs+idx]  - prof_cij[3*nprofs+idx-1] );
+    cij[4]  = prof_cij[4*nprofs+idx-1]  + coeff*( prof_cij[4*nprofs+idx]  - prof_cij[4*nprofs+idx-1] );
+    cij[5]  = prof_cij[5*nprofs+idx-1]  + coeff*( prof_cij[5*nprofs+idx]  - prof_cij[5*nprofs+idx-1] );
+    *eps    = prof_eps[idx-1]     + coeff*( prof_eps[idx]     - prof_eps[idx-1] );
+    *lt     = prof_lt[idx-1]      + coeff*( prof_lt[idx]      - prof_lt[idx-1] );
+    //*INDENT-ON*
+  } else { // y outside bounds of prof_dw
+    ubar[0] = prof_ubar[1*nprofs-1];
+    ubar[1] = prof_ubar[2*nprofs-1];
+    ubar[2] = prof_ubar[3*nprofs-1];
+    cij[0]  = prof_cij[1*nprofs-1];
+    cij[1]  = prof_cij[2*nprofs-1];
+    cij[2]  = prof_cij[3*nprofs-1];
+    cij[3]  = prof_cij[4*nprofs-1];
+    cij[4]  = prof_cij[5*nprofs-1];
+    cij[5]  = prof_cij[6*nprofs-1];
+    *eps    = prof_eps[nprofs-1];
+    *lt     = prof_lt[nprofs-1];
+  }
+}
+
+/*
+ * @brief Calculate spectrum coefficients for STG
+ *
+ * Calculates q_n at a given distance to the wall
+ *
+ * @param[in]  dw      Distance to the nearest wall
+ * @param[in]  eps     Turbulent dissipation w/rt dw
+ * @param[in]  lt      Turbulent length scale w/rt dw
+ * @param[in]  h       Element lengths in coordinate directions
+ * @param[in]  nu      Dynamic Viscosity;
+ * @param[in]  stg_ctx STGShur14Context for the problem
+ * @param[out] qn      Spectrum coefficients, [nmodes]
+ */
+void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw,
+    const CeedScalar eps, const CeedScalar lt, const CeedScalar h[3],
+    const CeedScalar nu, CeedScalar qn[], const STGShur14Context stg_ctx) {
+
+  const CeedInt    nmodes = stg_ctx->nmodes;
+  const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa];
+
+  const CeedScalar hmax = Max( Max(h[0], h[1]), h[2]);
+  const CeedScalar ke   = 2*M_PI/Min(2*dw, 3*lt);
+  const CeedScalar keta = 2*M_PI*pow(pow(nu,3.0)/eps, -0.25);
+  const CeedScalar kcut =
+    M_PI/ Min( Max(Max(h[1], h[2]), 0.3*hmax) + 0.1*dw, hmax );
+  CeedScalar fcut, feta, Ektot=0.0;
+
+  for(CeedInt n=0; n<nmodes; n++) {
+    feta   = exp(-Square(12*kappa[n]/keta));
+    fcut   = exp( -pow(4*Max(kappa[n] - 0.9*kcut, 0)/kcut, 3.) );
+    qn[n]  = pow(kappa[n]/ke, 4.)
+             * pow(1 + 2.4*Square(kappa[n]/ke),-17./6)*feta*fcut;
+    qn[n] *= n==0 ? kappa[0] : kappa[n] - kappa[n-1];
+    Ektot += qn[n];
+  }
+
+  for(CeedInt n=0; n<nmodes; n++) qn[n] /= Ektot;
+}
+
+/******************************************************
+ * @brief Calculate u(x,t) for STG inflow condition
+ *
+ * @param[in]  X       Location to evaluate u(X,t)
+ * @param[in]  t       Time to evaluate u(X,t)
+ * @param[in]  ubar    Mean velocity at X
+ * @param[in]  cij     Cholesky decomposition at X
+ * @param[in]  qn      Wavemode amplitudes at X, [nmodes]
+ * @param[out] u       Velocity at X and t
+ * @param[in]  stg_ctx STGShur14Context for the problem
+ */
+void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3],
+    const CeedScalar t, const CeedScalar ubar[3], const CeedScalar cij[6],
+    const CeedScalar qn[], CeedScalar u[3],
+    const STGShur14Context stg_ctx) {
+
+  //*INDENT-OFF*
+  const CeedInt    nmodes = stg_ctx->nmodes;
+  const CeedScalar *kappa = &stg_ctx->data[stg_ctx->offsets.kappa];
+  const CeedScalar *phi   = &stg_ctx->data[stg_ctx->offsets.phi];
+  const CeedScalar *sigma = &stg_ctx->data[stg_ctx->offsets.sigma];
+  const CeedScalar *d     = &stg_ctx->data[stg_ctx->offsets.d];
+  //*INDENT-ON*
+  const CeedScalar tworoot1p5 = 2*sqrt(1.5);
+  CeedScalar xdotd, vp[3] = {0.};
+  CeedScalar xhat[] = {0., X[1], X[2]};
+
+  CeedPragmaSIMD
+  for(CeedInt n=0; n<nmodes; n++) {
+    xhat[0] = (X[0] - stg_ctx->u0*t)*Max(2*kappa[0]/kappa[n], 0.1);
+    xdotd = 0.;
+    for(CeedInt i=0; i<3; i++) xdotd += d[i*nmodes+n]*xhat[i];
+    const CeedScalar cos_kxdp = cos(kappa[n]*xdotd + phi[n]);
+    vp[0] += tworoot1p5*sqrt(qn[n])*sigma[0*nmodes+n] * cos_kxdp;
+    vp[1] += tworoot1p5*sqrt(qn[n])*sigma[1*nmodes+n] * cos_kxdp;
+    vp[2] += tworoot1p5*sqrt(qn[n])*sigma[2*nmodes+n] * cos_kxdp;
+  }
+
+  u[0] = ubar[0] + cij[0]*vp[0];
+  u[1] = ubar[1] + cij[3]*vp[0] + cij[1]*vp[1];
+  u[2] = ubar[2] + cij[4]*vp[0] + cij[5]*vp[1] + cij[2]*vp[2];
+}
+
+/********************************************************************
+ * @brief QFunction to calculate the inflow boundary condition
+ *
+ * This will loop through quadrature points, calculate the wavemode amplitudes
+ * at each location, then calculate the actual velocity.
+ */
+CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q,
+                                 const CeedScalar *const *in,
+                                 CeedScalar *const *out) {
+
+  //*INDENT-OFF*
+  const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA]) in[0],
+                   (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1],
+                   (*X)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA]) in[2];
+
+   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0];
+
+  //*INDENT-ON*
+
+  const STGShur14Context stg_ctx = (STGShur14Context) ctx;
+  CeedScalar qn[STG_NMODES_MAX], u[3], ubar[3], cij[6], eps, lt;
+  const bool is_implicit  = stg_ctx->is_implicit;
+  const bool mean_only    = stg_ctx->mean_only;
+  const bool prescribe_T  = stg_ctx->prescribe_T;
+  const CeedScalar dx     = stg_ctx->dx;
+  const CeedScalar mu     = stg_ctx->newtonian_ctx.mu;
+  const CeedScalar time   = stg_ctx->time;
+  const CeedScalar theta0 = stg_ctx->theta0;
+  const CeedScalar P0     = stg_ctx->P0;
+  const CeedScalar cv     = stg_ctx->newtonian_ctx.cv;
+  const CeedScalar cp     = stg_ctx->newtonian_ctx.cp;
+  const CeedScalar Rd     = cp - cv;
+  const CeedScalar gamma  = cp/cv;
+
+  CeedPragmaSIMD
+  for(CeedInt i=0; i<Q; i++) {
+    const CeedScalar rho = prescribe_T ? q[0][i] : P0 / (Rd * theta0);
+    const CeedScalar x[] = { X[0][i], X[1][i], X[2][i] };
+    const CeedScalar dXdx[2][3] = {
+      {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
+      {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
+    };
+
+    CeedScalar h[3];
+    for (CeedInt j=0; j<3; j++)
+      h[j] = 2/sqrt(dXdx[0][j]*dXdx[0][j] + dXdx[1][j]*dXdx[1][j]);
+    h[0] = dx;
+
+    InterpolateProfile(X[1][i], ubar, cij, &eps, &lt, stg_ctx);
+    if (!mean_only) {
+      CalcSpectrum(X[1][i], eps, lt, h, mu/rho, qn, stg_ctx);
+      STGShur14_Calc(x, time, ubar, cij, qn, u, stg_ctx);
+    } else {
+      for (CeedInt j=0; j<3; j++) u[j] = ubar[j];
+    }
+
+    const CeedScalar E_kinetic = .5 * rho * (u[0]*u[0] +
+                                 u[1]*u[1] +
+                                 u[2]*u[2]);
+    CeedScalar E_internal, P;
+    if (prescribe_T) {
+      // Temperature is being set weakly (theta0) and for constant cv this sets E_internal
+      E_internal = rho * cv * theta0;
+      // Find pressure using
+      P = rho * Rd * theta0; // interior rho with exterior T
+    } else {
+      E_internal = q[4][i] - E_kinetic; // uses prescribed rho and u, E from solution
+      P = E_internal * (gamma - 1.);
+    }
+
+    const CeedScalar wdetJb  = (is_implicit ? -1. : 1.) * q_data_sur[0][i];
+    // ---- Normal vect
+    const CeedScalar norm[3] = {q_data_sur[1][i],
+                                q_data_sur[2][i],
+                                q_data_sur[3][i]
+                               };
+
+    const CeedScalar E = E_internal + E_kinetic;
+
+    // Velocity normal to the boundary
+    const CeedScalar u_normal = norm[0]*u[0] +
+                                norm[1]*u[1] +
+                                norm[2]*u[2];
+    // The Physics
+    // Zero v so all future terms can safely sum into it
+    for (CeedInt j=0; j<5; j++) v[j][i] = 0.;
+
+    // The Physics
+    // -- Density
+    v[0][i] -= wdetJb * rho * u_normal;
+
+    // -- Momentum
+    for (CeedInt j=0; j<3; j++)
+      v[j+1][i] -= wdetJb *(rho * u_normal * u[j] +
+                            norm[j] * P);
+
+    // -- Total Energy Density
+    v[4][i] -= wdetJb * u_normal * (E + P);
+  }
+  return 0;
+}
+
+
+#endif // stg_shur14_h
diff --git a/examples/fluids/qfunctions/stg_shur14_type.h b/examples/fluids/qfunctions/stg_shur14_type.h
new file mode 100644
index 0000000000..da63979ca1
--- /dev/null
+++ b/examples/fluids/qfunctions/stg_shur14_type.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+#ifndef stg_shur14_type_h
+#define stg_shur14_type_h
+
+#include <ceed.h>
+#include "newtonian_types.h"
+
+/* Access data arrays via:
+ *  CeedScalar (*sigma)[ctx->nmodes] = (CeedScalar (*)[ctx->nmodes])&ctx->data[ctx->offsets.sigma]; */
+typedef struct STGShur14Context_ *STGShur14Context;
+struct STGShur14Context_ {
+  CeedInt    nmodes;      // !< Number of wavemodes
+  CeedInt    nprofs;      // !< Number of profile points in STGInflow.dat
+  CeedScalar alpha;       // !< Geometric growth rate of kappa
+  CeedScalar u0;          // !< Convective velocity
+  CeedScalar time;        // !< Solution time
+  CeedScalar P0;          // !< Inlet pressure
+  CeedScalar theta0;      // !< Inlet temperature
+  bool       is_implicit; // !< Whether using implicit time integration
+  bool       mean_only;   // !< Only apply the mean profile
+  CeedScalar dx;          // !< dx used for h calculation
+  bool       prescribe_T; // !< Prescribe temperature weakly
+  struct NewtonianIdealGasContext_ newtonian_ctx;
+
+  struct {
+    size_t sigma, d, phi; // !< Random number set, [nmodes,3], [nmodes,3], [nmodes]
+    size_t kappa;     // !< Wavemode frequencies in increasing order, [nmodes]
+    size_t prof_dw;   // !< Distance to wall for Inflow Profie, [nprof]
+    size_t ubar;      // !< Mean velocity, [nprof, 3]
+    size_t cij;       // !< Cholesky decomposition [nprof, 6]
+    size_t eps;       // !< Turbulent Disspation [nprof, 6]
+    size_t lt;        // !< Tubulent Length Scale [nprof, 6]
+  } offsets;          // !< Holds offsets for each array in data
+  size_t total_bytes; // !< Total size of struct plus array
+  CeedScalar data[1]; // !< Holds concatenated scalar array data
+};
+
+#endif
diff --git a/examples/fluids/tests-output/blasius_stgtest.yaml b/examples/fluids/tests-output/blasius_stgtest.yaml
new file mode 100644
index 0000000000..e9f64e42be
--- /dev/null
+++ b/examples/fluids/tests-output/blasius_stgtest.yaml
@@ -0,0 +1,39 @@
+problem: 'blasius'
+
+implicit: true
+ts:
+  adapt_type: 'none'
+  type: 'beuler'
+  dt: 2e-6
+  max_steps: 10
+output_freq: 10
+
+dm_plex_box_faces: 3,30,1
+nDelta: 22
+growth: 1.1664 # 1.08^2
+
+stab: 'supg'
+Ctau_t: 1
+Ctau_v: 36
+Ctau_C: 0.25
+Ctau_M: 0.25
+Ctau_E: 0.125
+
+q_extra: 0
+
+dm_plex_box_lower: 0,0,0
+dm_plex_box_upper: 4.2e-3,4.2e-3,5.e-4
+dm_plex_dim: 3
+# Faces labeled 1=z- 2=z+ 3=y- 4=y+ 5=x+ 6=x-
+bc_slip_z: 1,2
+bc_wall: 3
+wall_comps: 1,2,3
+bc_inflow: 6
+bc_outflow: 5,4
+g: 0,0,0
+
+stg:
+  use: true
+  inflow_path: "./examples/fluids/STGInflow_blasius.dat"
+  rand_path: "./examples/fluids/STGRand.dat"
+  mean_only: false
diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e46c092fc4ab7c3d07b22d4c97f81d6eed4d3d3d
GIT binary patch
literal 7816
zcmWldcRZF~7{(<l<5dz$B9)!JlXG5$7l}evG8)JXQRLUKl9eJ`%4kriWQ4~|_H5X*
zWF#X^<99!Q`g~pYb)NG+=RWuK&~XJErK8)}ftDhxxpw+_P$|u=!gtrx(Nff=N0nKu
zR7z`7R!i{|{O#otu62@1X;aJ&nmz{qKU==pG7Y$3&6AlL_`BvgzY`}Ky|Wz8=fpzT
zltumS%jSZ5Z5chcWyyMqR~)?2sA^TaUV#bPd9a(Sq4_xIlTTMq2axrB?nzgpQE`@?
zA)loQrB9`zpRYNApHkehR0GoQd9qqD8j-*Gyh5!X@i%E*ilTo9dZD{hxy!&Co>VT5
zqa~B~Q&hza;vCDbi(?&|z`yIy4d*_n&-~TuY&cr%HFdVE%O`}cGAftv{t5cDvIExq
zq<_dd-x0K!-d;J0C5Xd&Bp#~nzYP9rn`*QsNdN1<S7XpZ;7JQ^ts*459C}MENC)(L
zQarmrAK7Wn)gyz}SEFe?|Ggkif37YpU~C2d{@=0o`B0zndT=2-T4%4HFjR6UXx~0<
z`)~~Qvo%|WL!p=S&pp$&j@G;~*{n3f2?KsX6}ol~@W;-wwGpJh)M%&!TDi*LASYgq
z(1m@EoaS6XFG0`!QULfUNAzY6%$!t@#h-`~)T+}Cp9&<ve^z;H=QPyc&}21n3^TQH
zhfWQ@CTuTll1FSppntjXz>E~>KbaOMkD1iOG$K8C2}4)={3N{(;BQ#reA5W(&*W{f
zpTi8*Jk-ogJ*3|sB<_u)K(BOD`R6I%zkZ83<>AdI6^#>EDCoTKzQMZ#;Z#Zsvu{YJ
z1@M)Mg6eF{8TjO-2;*VGEmxy|F7Y4eo9>FEgW&J2Do(^>N=V7!7bbm#Ud{Tv$g&^!
zpP`TV&xZ78|IuuS*#pkbParj9*4)G=p~we%T64#e7wCDG#{VA1+uAwbyZy>UL0d|8
z-}1-@e}zL;9Ic@L8Oi&n40BH(7-iPlNL-5I+9$R*6ZG?U18tv>{?XwYE0|mAVb3m)
zlZ0B{JBp~+NAM5x&EvfZ`e{#oT^r2RbA_tw&x-7MRl>G#-3Ps66zf^?ylApb&i%qW
zX-x-ui1+A@jG?vLlVR}p+hiG00{ZWtnpvVSKkE&h0Lz!ejop%Jb0MH_lPuTp7bN{F
zv;9Kx_TM{vOKXRb@>xwDbHj1q6*F%XHAsJ!xvArrPfPz-M%6ELp5>msr?fiw-&-rR
zf&Gti9r(7Kfp`B5lpJ*bh|&dDS5~?<fdAmJg_fhBpSomFHidT!Er{qA84%&lC%cmr
zZ$bUwnPU@|N&m2$V)wC-gKzIs7CvNkm93j`-*qabozrMiVTAPOlG7Z-yRuo_>I7Pl
z_bKg}2g;1#KO}DAVh{QUUGG(!u!x8F?_P=>$~p2{;F>boAH3x#>#spSQDt+xAMdjg
z+F0GTOx(7$?+SkH3H4d*)(YVLit_SzXxNYUS~W}Wc+iPl-whlY7L^1ZDm|Wdfb?%T
zdf*G*LkZ9`|G<u}9MV5)rhFCFuXA~ve=q0<Zc~!zv1Iz2I7yFGRKU*0_QJ0KxPD52
zj63MZbRJmJV~N9()~9{PiMX+2U$jkNf7|O{nbnf#)6>0jn-muNEqPtkstEZv3AE<F
z%A-;`WQ*_YBKLn*jrST47Ii$lgDc_%3f8;&Q)ayx>JQxdBbAJnqWRyvHGhF+-^r-g
zeAt7ELJd-CzqvqtKSPBEO!}AZ_F2U;KTf@6m#QNY?aJDoyMKcEUDj1^jKSYc$1vj~
zmOff1YVcAJg;KJ__v&#|DV@8KOrS03=NvgB*|F3G31srP1x1#WaUg%-?S)(wQ_#Po
zZ8Hr#eq%*Ru5Z^hde9rouhM07XF$JAYc!JV&!5w8Y_rD-2eYIOq%#sJ$(z3MB*6RK
zrE488RSN#jZ@kjhu)G@kON|S1DApuk+oR%Q;2t*d+})s`pK~3L#&SYx_s157P@Eac
zaqNwO_4g)zUK;}c#Q)YjXRs=>w%??IE+WzuQ{KCjQYqauI+ot+p#PC5u=pQVp40Rd
z=eHq}k{LGgb6uiRdZ<~8Kj*>U!NFTH3oDgKPVQn-M@iatbTS{H|Mn<<(@0hX{c_lo
z3VN*Q6}X{cVGN}v`fO*;Era!Wsu?zK1OGNfbB;o+VI#~p5*>qTtBTA$Zat?`KB#C5
z3BQE;l(L-zhwy=olB{vRxrn4%gY`Rc7l40yRDI|taEl>_glepob?=MA8&~u&o#pO6
zl~=&yU%5#pg8#ps%2WodDi?mfJ^cX6oD11T9Givp#m34AgrlVxk=F%ni5NS$^B8Yg
zMUCcE9Sa_1Dy8?8Ter3&)ZZB~a!w5+uk1<V{skhwVfl?$j5w9jC*qf41n+m0=^Fo*
z`}p8YvuoicQS{V2tz4fn0s3@wX2TqCy4b=yhp~nkYCVNBP_9rpPmc6sD&?cO7jOAy
z@ZTRZc7GCUWu#W$KSM>$e0GZqn}Vp6!CJl)*7u;NzOyPiiK#1%cUsH(h)AJSWcs3#
zN*QA3@_X|GxKXQvq#ULy;vM>115x46eEf1)74%0hM7Q|^XZoyvS{GwG`mV1Z8PN-~
zTLPtrw!-=w-!6Un1^!Myi+x_>!;RUm6u&5;j?@jqhURcS#uV6%j{AV#-J;|wKRzsY
zgLgpa4&h_JR(8#A4!AOBmyk4YgXY^zr|}^tI<4K#?x@&9Kq9c^J(V)*p^`on0Gv&C
z###((C6`RJ(%GP5Lchh~K@P0HAw^1Y5d1If_?u^hb<Ql0ntB(Yu5-FRdiXV!^0_B>
zTelwQL)EM|eZ|_}at7^%Cy}XLHTz4Jy;RB<{tXcu3V@q1M1SeV+SYx#F6>IEylb2M
z(*6q22UVKyy$75^C%AVBAI^O{XP5F6Rg5q0xL^kJ$=D*Z<8mnYUu^YN+kth%1D<O~
zhM^uK`i#eK;r{%ZVsPdw`TS2RTTObfu8c3!IfgZKW&QZAL~{S87~aPhg#kYiYF6Ql
zbv_0%o!w}LsxB{Qx#mKD{W|hr&A~gsIigz4b+L|B;ErkIc2u33?ign#59@#0;+|Fk
z{@yD1Z3d?C{@|~(h(RBGN4wf>;r*YcoUv<y^B0xhdOw^E>ki^|#eM7WPSrZ=?q3D|
z_)Nt0v%pV=ebAD^y5;$qC4H%=+A#ysX@dU@z0^*#OyHZhN;i~aUH=J>lW)GF+M2QB
z2iVE=n>-NO*$n=kd8_kQn1%wk74Bn1pVWU2Pffu4H66BPDYFXnWe+FQ|6!V7XU&=N
z^Mt~+{+#;Da6ir%J~cW-uJ449XuB$=33S#E*~+8peQtIRy>S1`%r4iRf&LoB#pTs8
zk7=wt%chcosIFM&_4mw&P`_|h-YyvY9ZmhBk6;=tR;^Yz4E6SxU0MnK3;cDz=eix}
z3$7j5osMa;F7M{*qll<T>AHvK)}X#$Y}qm=aI^F_rUgt>zmu-7bQV=@&vuxKh4V8j
zbE$Jm7&!ODzkhz1rV-+H%v&7QvzqE%r;+QIJ@hCE&Tq`gV!GZ0Ow+%npOkHf2E@-k
z9`IKL|IseNbr#SU{?hI+#Wdx-{&C4)L~3ngi?lH8|4c;ulni<PjlTxdCBgnh>5D&Q
zLvK_@ZQ}(U!C!&yl}{6J9!tl{O_-*8rWaW#p}J~jcE(eUpf^m9eQ^%_b*(%27BS7h
zcXGsn6%Fw`cJ~p5{yej3ATCZE^iN+YOjg44<-{-YWe`t`AD*DmEdo!xr_z`O+%U9g
z>uyZLPfd(o-$g~H=k{?M!TmHV-mqG)4xG1ynUxRrS9vwu<p`?dc9%79OaT3H>x&0q
zzKfCib1VN0_!Egw9#YV7>b0jjm43h_u9hiTf&R(j){kZId_GN^r+taS6#4PC)4af+
z$mdIv^RI5!kbDHD9TMsOs1$*o9pYjgSAh9<R!CcDz7ja!%~<uzn5K75Ic`%bs#V*1
zf4Hv*^ya@e7>j^E<E<8@Pq6<}!Uk_J8qxe<{Z%&}xb~it-emuOxX4y(glSshr}vKS
zBuX=G5n~3>e`oUUJ=P=NPrUqZSQpcDkbZJT7fN0D?$P|(7yP->rPD-7e~St`a(}-Z
za8&R`HLA6e`%TqAZ<{<?A_MnN!=t8)v#`DruImC@&`5|ylKS7xz>P5d9++RFl3XhN
zAHeT-(DWvE4N;YC{M6fA7I-1IknbvR<(GmCPEh~ky(zJ0L;$x!qxd%HpR@E=-9K9c
z=RG$53Em&0JpPR_eyDm!uzC0g^7nHtC=ZhWe{ba=W+A9w{&KoI3ypY-d9`e32X4Nn
zce4=aL%a6AvxoH^?rrIiB^o|7=sZsU3_L|dY*Y-mM7W_k{C>yTmR|5K5;?^xMgF{o
zz^8v7Pax;Bt+uC)c`)sS`FR<s9f%m7zROq-{cpx^v-=4r;FIgCcE`Z~Qd8!YRWy=z
zL78E;3b=v)X&VRNrvg&#Z$bT;75?jyL`VAbp~VvDk2AM-X>qUu=No>Zb{^ACHgF!8
zOeIRHUiNoH!~8nalBCN){yv;4y50epX4~ueYnT&NSXvd8sIvf%5Nqun11?(Gr3qZO
zish~MEi~A%k#1+HK5#RSTfs2@MRBLO{O!jyd&^@l@75BZTvo#k4WxnJxb!<+0XRcy
z@}v}|xiQpC?)pwt?{zlG*#!M#Ca3<2$U1PAzq$cxnC4#oFR`-(m9EU^=HDmx@3BOc
zZ6VAD6(vW~V4iKRU5IRi`#6eu<E-g6n1AC$-jpW5-=6jLagcY~WG9R->B9fd4m5uk
z1};RoX8`keBw^_?_!f-@dR_Xwd62M++w{pJIT7mByN&6lf}SacP39~bmHGR}$n6W-
zr(ZI!P@F;5pSaiy`69}*S-8?1jfy-EcbcLS%AfB9@`|{FpIp~q5GMWFrECPyh)n&L
z?~AEOVE(6psbntcKb#_U9C+Q?>EMfK>7c_WLWYZQ4t@XN<VkqHnj0%JdhbGg#)Ds<
zuc5^b&ElrU=atJ8dbxCoo1jlrdEO86X_V(+hFvULOih;J$QmKERsGMrW%~mDO1i$9
zk4gUrbKP}l!JoM1TI`49w`D%{IJyq{Jzq>8Xaa9jT>Z_4*8BeC2&6O-&R8Y!V7?&u
zcjdnFCi??@EB7-ew6@l-`>4Q_I5g8BT_jl#`uo{c0_6ODfl;k#2U_z;bI_5#NgQcf
zX28+#`?YCg-<f<z`WxiTu%MLy>stA*-3X_#N=*OR4SETKU^cklB8P;-3x+WBjf6`Q
z5AG4>=FhYZnV`QnkF~j+?Et>c#M5ksnd<79)CJ0klP99scmGfa{WA}1fmzaDn<HEW
zGpX1s{I_A1Fd~SziD#aIzus@&KvU4qG%`8yV1~*MzIl6u&=G-`AH?b#K(EvuGw%v~
z`IL7G7v6kyL(4CNcw|o}@+syu`TVPs@&hfv|BSV--^83=s_trKOoYp5OT)TZE9mRy
zIYVfqe{%2NE0_}0xuPmMLma8kTuE6L1%JB(md3Y9e^GyDe$0O9{k|V{x6z4*7oGgg
zvOur5+0%l2f4CbjTP)yhmEA7=GM?y~QNy|#g8<}5@f*`V8$rJm$NC9j?(qlzQ7`Zj
zeu)jUPO8eFpB|_UppyQ-W!5|~x8(B?xtFN~j#IT17j*#tplA1$=0QIlR=8q}xw?5X
zCJfY&{euU77O~}^cWg7RT>-s@XTbVjyi-M~>*22mbo(|(O}@o`@OPuwKi>lS*@_8v
zG0eXayYmdJ6W4RJ9>%l4{m{1Wo8vAP(0j>NA8f<hf9<#Jb#O-#X$$+h|MUVc6B-B?
zApHfbTCy=8?rD^%qDSXTw%x4KVFLdI4W=vApm&+OzEczL{$@EZLsdYjXysWhnJ-$p
zc^ZNtpGSUCDOBpiy9M}4UpZ+KHy5^0PTE94{i}5Cg)XH3M!uR2ke@d`*QruP=8Z2d
z=AJEte8v_e?DLKE-;)&e2k*)Zw>@mzgS=Zfm15iH!GFkMrxiK>Ja{5jDT_tUPKq9J
zaYNZ^#&1=JP6Kz!?~^nJ{iu~cn#cQ2>@8esQz34P@}?&AK)-3HKOvi*0(#H&9KLnP
z&lY(@kz0|AjNwM90ho{4Z`f9L%9H*dO!M;a9*(k`DfhpjE6pB;`x7AlwCOwxN?Qc|
zpmGeMjwR#02TdGV(et6K_OV##&uysnNv9|1hice<bYlt3%h?`nL&W8_?d~Xq`K+A?
zekPp`diRy_abqm@>$UV<_+D|j|I0atLOIBXQlHOX_X7Q_&Vrl)7CjfNdP3(T3QnzS
zVGS08`U54o8=yZ$3+&Iy;>NPIksM|!SJA5;X7kk48mR9o&bR>capbVD?3HvZGcS8i
zq}qo_l(`nZZ3pzPj^;IWPjWtU=?{7A3Hh1Urz^S>g#;(MXlc?z{uOZgtqSXloV)1x
z%mzz&x&}%YC!r{T+TcB%W>CLSEycA9{Ovzwb%$X^5w{5mTSrtDOJ!V3fcc<vQ^og4
za{tCyFPI^$a3FA-%%%V$<=ky!XVrs{KXF4WH#uK9nKb$aVR_Z2m?zm*D3+04{$Wuy
zaF-ia+X>Lmuea%|VL8D8s+Qgj6c?L#kTrCH%+JL|8g<~Ga-NMp3aiq`a%EJ-qsliE
ziB0p6-@7#!tLeEwzaVHFCXAJ5>HmeDa3YdC7*Zv*G2~D6ra)P8ez*JcZ|VnDDjwfk
z?aho5XM9?cx5IqZqrf_uOnyIIPCwYN1o?S)nF!B6lsck#!uJ8(A6=eDM<Qgwzq2y2
zd<)jFzzh9eJgD|iYO8`c?9T^@;vMITpg!C6Y+-eLfUa-XJsnXZDfail?ak+bcLr*f
z!~7j(?h>T_67sV)i_>;1^w4ExQpxNs@YqIrN-g;RTPli9h5Rf#Sjy9fviM}~x@1|B
z`8nlBSsu)963R~^Lov1;)3C3wNAFrvyl)DEfA6CM(soi%e|u4e9b9Cdih(Mpz7p~I
z`9;aTEs#HVjhNo;25xf2ZR9TGXSdZ`oO0;tt`CMUdf@wCZ?f*2d2)Z~O5TKxU=7ph
z$Ak4oDA&03BGnW6<41GPn_)BHFGo1MKY_K<78IsGRiLI|ksCHAen9>$QhS!n26}X3
z<Zl3`{=1gCzA8;be18yI$aDwtCj*nsV`tz;6(WLXAwRR`{F;kJg&*@Mc^BdQ4eGnf
ze1Q8tig|0*J3)B9KDpYT*U<}4i*I*ddXf40_0NZ}z8KdKwd@{z_-&oU1UE10IAUM9
zW(4_aY_Ex1VL0er<4Wta@!?&28U;mj37^&3)n{7_fbaM0R?q^j&mSyPfDbv=xQus}
zqGE&jNnZ2Yke^+IILu%^j$-q-5q$*t`Oec>Zz)tfFn2LO#fZ$$0Xg|M!2cqP+{Zzz
zWB-N2dvpWp;yD<xuDJyHx85XP(h~F`3IhxtSbJ9a#lEM?$Y?@FYS$>tKVP;CYVeTr
z@iC21wo1s)cfV_Yv_|E5)gDVqFn@l&ns8c)<P<r(9a@l|GsUr0II7%8skLN<{yDbr
zVMCQU_+MIE5n{%=q58uMrPZi=wZuA9hywXHBF-d^9`uigSA<2d?*1PGzD<?r@>>^+
z-0A_~|HJ~Q&|jlWUvIW`h5WpqZZdfXsyZ`0biYai_<+uZ=*_@6%3|f>AU~gED%?gz
z)p=#5Mw1uG{2aa^W*Pi_`$FLRDUG{K{c^M*`XFh<y!%oB<lD7&%ctb`k$mri#m-o_
zzwBoKt1dd>l%*DuGzIxC-J|{tdH$`6%7bq}eztIR$V*1m$FfFp;^F?9UY*y{B)@NL
zRtmFDf&5&rIN_L&YP-BShEDa7`I#0+=BpU*lrU-y<V%ZO+;JLxT(GgV%<csJB|5o6
z^7kw4^6ALNv|UDN)$0|6ePPSoYYkSIKM!3KU=IOqYQ<_<3i;V5Av5F#s%Af~l3zvU
zzprmLKZo-j#kJp(_6O58{T_a=e+bpRrPdaz!+bqm82$S>%y%*8Uw7#<Vw(1yhtuH~
zQE&1%)pLRY_~ZLgml;5xKUcT{^S$&RR=rqrB9_t3=iJ{qsP7h#F-iyAWUAcT9rC3j
zXCD6$BG$4Bo=(1l{32=c-(L8B7R9~Da`8SqpQHYQdJwAT%{ra_OcL}m<rN=6A7guu
z>N5@Vd&`Myp^m73)|w-G6WnjpJwG;4bU<IwwOeQ-rYV{J5WOHvWQcn%9G8OoapwA+
z|IU;1gVAxdjggQq_j|s+c^Z}dG8VKy59f1MdW~&P6*!N%OZ---uM@C^y9m{_Xm<a1
z)(P}Seq1gy1Al$F(urM=kNCIqp0PoL<NAM{O5y&QUQ4>q0{tQCNy^0UAWT!cEWIW!
zMdUToPxLv&fj&mof9VDABb>^IA)jMo_fd&BRK)hVK!_Fk`>Y7VuVM21^5$5642S&8
z`#tD=2CCa>IWcG~1o~rfH?vB?Uq<d$J>(0$_FqYia%kA8Z)H**=A#**?|Rb_pnrVH
zOZz*fX?pB!*(^l7SX$t9cYg#t<FsY49dMl!cPAlVYjt)<S8qjm3D>Xk%ftDY-B~Xr
z3i&XK&-J8W2-K$?C9+PVTFv<IZaXc|n+rF^!2BJ<jFrzr{?xl3)b1FAM#L(;Z*;-=
zoH^K@_Kx&_kSKWy@-fxz=<Q#f#2b3y>UuXW;Mu1lXCc2tp&m7{Ie35m`HGfqMJa)E
z#|9%WfInwS=ABW}UwO?v3G!_tt(3tO)u{iOe_FNz`cn;$hJi=F^Voj}&L8ckv`<1m
z8gY|0Vt)qtU`F5Ytp@a`sD!15_l`n7uL!l@*-KPAH}m?t%>&Op`igS_xYB#KxB{pz
za?yP{hPW2w5jt*e3;uM>L>l@2@=7OgZGik7r}wbd1Xc6b3v+a8g5K#*N6Ibm_xkF5
z{w%y7eL*5=&S=D8v-0=_1l%mKPm~J!kjor8Yp_3uA23Hyh<Dy1<@a5MfhQV7aFO{@
z{D+L9E2bS=_?B63Mm)05j2n~K2z<&c&yf87w^gUKZ4C0WmZ?eYDk26~a~|G;{ygJ*
zqau~;PhXzPDX+uxC%Ee0GDIU$x(c-?cK|okepAy1+(wtZiUZS3<imQDn2C;7(S+F%
z=pQpNhg|3(Uq$g1iO~l_elC@6rE4cj#^bo$>Pmq(*SOYu0p}D}&NhSm+$zlm_i2S$
zPq9;u5Aa)k&t!{%ixiFd!TR<0+SX{up&{<AAzszw`LGV*``iJXTd#nt3G#DS!WMr%
z;*)BwBDECq!%R?Xwd!Hu3~m3#^<tXqa+vSJZK7HtJE7t}`F$uSP9nJ;ILp)(Yj{81
Yixd7mQ9-3&ytgvrd%z#_SATr=KaC5E0ssI2

literal 0
HcmV?d00001

diff --git a/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin b/examples/fluids/tests-output/fluids-navierstokes-blasius_STG_weakT.bin
new file mode 100644
index 0000000000000000000000000000000000000000..698d50c7bfe706f9a27df7eeb15926e96b4fe3cb
GIT binary patch
literal 7816
zcmW-lcRbeL8^(zw@|01^h$tj`mc}`c`IIP0L@5-qXI55LWM`EI$xI|8gpZMsO(>%f
zO&ZAh8o&GV`_t=ko%eapxIg!G=s0|I>F5}`&}`&npvKM$Dy7}dUeKBi{*EvjSpQC?
zv`MB8j+&#{sKvu=hyGD1oz7_yc0HimbIxy&r&3y1j0=D3k$#&y%N%I(cPzK94j*C0
zd3~&S@)L~bVEjnt0Xd$kUEUxXy`jCgCZPl=Pl~oB4fVqKQs?E%TFLPxyG}5oF^NHu
z!nF?s_4@ivx|6lQa|!F@>>zopZaUIvRL*Ugi@yxX@SYwqZ;b{1s>~M6&!A8E95A_w
z7Lo(wyCt6z&Rp3|4kvB{-_2EJVvgkRc+~j<&G5|gzdK}t^mt7J|E{<IU-rsOVmrxa
zn>sj!W^%&bCi%M(23)b);zNPJzfM27Rt|o<uO`9P=&z@eN&QY6q#3n$huo7<@Y5*{
zd1-+@E_+CJ4Kq+~T^Y7{M0ggOESN=g0YAV)Lw`5u;eRhGcc9hZf;)m^ozP`becp4Q
zJivd@kZJNO@E3ZWtKOish-_wC(RISyJuNk6cM<S??ez8-k^J=6X$ok?Sz~fV`zt!<
z>=(c@HxGWM<gjsT&_C|E)Upq=+U`mSH=-x}ZaCX?gyjK0kt>%rK=L~z%}X&8M}GIk
zc0FWcythi>p)vS1&cszllKfD5Pe06bie*f;&V;a2o1O}<p#eW)RL|6%<V)XhmBNgr
z@{R^ZDafL1d-w37KlnMezCV!x`oNoaQtFr^JV?P*!;c7Po;9nAxdi;ujvQYulK(nl
zaTHVTKe=YHXpda)KmN~xRT=yyRo_E;Nxqz7LMNtVWv6LoED)}L9$wnFCI$R-<2jdC
z;J1yrvG4|Ou^F^6jTk|W{U$!Ht|Q>zckFq%KIk2j`XV3kP9awQ+WowQ{~x!xWz!qL
z|N5!8SexXxuD+_nTz0YX6qP5)dr8{FI|<HjTY%13rWEk!mn@VeF_$XqoKxx=;eDA|
zHE5Mw|3-fQuQlMesZTLt!yIn|0uI=7AWvj<B9Mdp{b~~rMacWmKw}TL!@D!?az<r!
z5?(&K58?+uP$?a9D$-r1B!7-lVTE}@dy*cyB%{Dpc^yx|X7E>hj@xvK<gflZFO7M&
zz1A_eog)I${H2vB7l8lYFUHd;;J?;x6C8(kDm$lWGQL8AkLP|ZJiy?0J@heP3G~uS
zAMH<JVP50o=gf+TD@R@*yrjqg{6}>9zN9~VVg8ge7W|a1ve`=kg(goaXlBO2`mj5{
z`Xf!QzXwxa5EgVi%WmbinFwl9NDx{nrBb?dis^16@LNBnZ41P^@A0%X85}{kO0+_b
z*i(RCNfRCG1U=JZmr5-by;*sg%P$&PNe#!V1|XQf%A$UqA<3`a-kgQ^@iBTQg`1+t
zHluHS)#rhK=VJ5yBfwwuzq-u=@3m5|;}rQrgoa6e*K&>k{_7vwX-eR?GIn1W!or&m
z8Ys&AMzQZc$cEgd2fvU%gSaN>iP>-OY{wF*d99fr+)?-i7vHStFW~3?7ZlMCde~Zp
z@+mCNQryXeC@4;_;@Gwjaaf;Ig6c1x0Dn1X|LtFRKSN_Ek69cMmazHOxviB{O1EP-
z??NE>uU3{#>R?fGYEZ;!Ka_OsvEQ%7Gr%u*xWBZE96x{Xw+fa$=tc_<r9&w~S>MC2
zq=KI|_ww)x=t1w<1{1M#ZT|Re#EcRvLQ7^X30NOvi}7>h`ma9o2+qP%=a1bfZa6@M
z?KB99)D)yrdM=l2h^qj<#fWQB1eUO~kg(oagC4k*cv!Pl!~7%LPV5K*{l;LVIyY7b
zH!|n*d4nF?IkY^RI|2T0IdA^dgYIL71evhB=<zw%95Zy!WKiW_FVxqr4lbh55zR(#
z7}$Cv49gvTvVGp6ow(WE@o(fa+>hQuEyuC*;5UokaH<N+^8c+H%j`oL@_bBIbc!(l
zQr6=As-R!>Wf*>s59;lSEY(v-nRPEuY--D)Qu=t98a%~7_w4<b$AOiG7SHAH(L;~@
z^4sg?z~9r9UKf%G{Ed@_eW6(K)#3}fmj?+?6ZWgY6iM)VP{V|oNd8I2?Jux`(<QC7
zdw)^RQsDHQj2x`5w%*fI2SBH#Gh9~3M^Z-)3hM@;=Z86LZU?~mde2H|junvnMONcA
ztTHnc6MdG7vY0HD^rWCZ^j$E1eeOH(nJ2}YL-3)T4ZV|8Ph`w+W*Z0RER{0QzxqUx
ztl#F5;e(y{ptO5oD_aG6&fD*zAqe}|YdgEm#ujudeSWDjjJ*PlbE2(K1sC1+ueal=
zl%ddz{bFSOxT2{eV~Ek!{T{89a+F`dDfg)x&hPu21PPru;BT8AF4e+Ee`O_oxwsvL
z*)ZFB?tVz642}jVh{66uUSZhCdLAFqr#-K^;EPHMZwkqpFi|P}GNX$EWc`yrymD?4
zt7ZiW?{3vam9e2x1=IVfluv#&gA6L5U$v8cWR9szte5l;uc4v<p9gCn^{ABL=+^G`
z3E=P8jL<DiRb<<ED>WFU?$OmYm1L(<KE}Pu;dcSQb@5z>7{-^Br}xKDP~}mda(OG5
z--q<xl09~ytNxY~tHtW=4;lwmZ=i+^JC=FK@BimNgUq47LBGaq*~x&_`OiL9D|?Pg
zkBmrOxyMMQjJ_P15|scx?`fOIU$C0>4(*<5Z<KZN(LH^o*Hp@Q@zo2-WPf7Uqkc*o
ztKL%`+xoc|)uJWwx8EG8l#z#9Z<@}4epsqc`!Lot9=h&r{sg^ITsSTs1?&6O=N*&H
zD(EI&uN{T4#;+t+K2tjMQc*bCahaJ)nRrm;-VEn6T5Mqb>N3_ao^Fffm_dbuQ!lra
zgi<MAT6XQ1{RVy~n(<N)R)1PkXj@*58WM#ziirDBDPwQA<a5dMFBSh`+8b+m?O{@W
zTZi82nx#*>E>kH#l>6RJl!0!GoN1|8OJ4ew^!y)G!5aQWKUWCm7k;*5djs$#`UHzp
zv1Z?;Hl}B?sMy0?<M+K{@Qd7Xk|gi9i`>H2KKOnA`QP$&KyRE{4s`hMpi(Ap@ZNaE
z3%bI}ZHfb?(Kp;#8QhCH<37wt^FjaiGxN#k!Whsm=qc9mVy(|!Vlnjpp)!X~kLP+4
zRLb`Q4a?WvfUnehVz(jIs&HMq5_b}n-mzY=W`g@QbwX&BfvkUSocnjMVlAH!!U-(B
zsM$SW`G62FmGVt+KIIqLA85a>?RtV~`(yv{9(|9x+p;OWN0q=YRMyzJ4|J<4m&wDJ
zCi2zvNWC^HwUqvwO2GN}-r-~~5D$E*hm*Pzn6_)Jadl%DDw#E`I`$-hN|{zRd;Bm9
z{MUslhmK+zt2SfAtq{~gyU1^u>;r!9)xIDb&@Hl$4EkZ3>?4`{A71F)@%2+fci?<Z
z84dRd6oG#Jl9cl(rYVm)Ycz4Af-<?stEUIS|95hM`ycRSP9HSZ!!)HpLSZ}`6=#SX
z%lEbfzMo2V57~cS*ZbyJf@w$V^S4Wcp%$?#lfeqmFMpq{3O`PM-xA!}>5^dliy^;0
z)Sv;Lb<4vc0>F=P)H_)Sx+z0=cs=<4^zIxlN0~tqZ5Q9{f&Hbw>(ML@eDOEoiL01)
zI4mQ-wjVtURhs-~2>UY~%1I1Qf#1DoB7Zlg(Og;oZoY+@t$k{_ul)o+iw*x9sGpIg
zTdQJzW7<KhjPry%8dU5EwUgWc{0cceF|t1}nzgns#5C$kL4@i$x-Rv+`%17D__yqN
z=yDbKA`$&NIWg_Xix#Yrfim9m=W`}$1K;2D=QbyjpHX%Y^y6&(qUWU0n_qsbDbKA)
zep!Nc9_YjNZwKPQU*ONYR)mIe4Xrm2&gWETWGp9n|IF)@EMfmO&xJWMU?Qf&tkURR
z2>AEw-<(?mK98HP_#mdC8<9!bZYUyZZ`UskSl?;KGwz%W;P(oh_qzf7c_HTZY}DwT
zn`@R?27az+pMs;H|Ng~1v<CdMC&c+0(8nz&p2-Wt{h4}~!)(_Gy5+ZvOzE(GU%Z7A
z_Y%)IT0QH_E`lE&$wvIZXNh?-mWpYrb?J%Qm56ZB*P=%}%z&>SQj??u{s0GF4s}dB
zd0a13`!{Mx9`R73{UYbTv#0h9e81^E51UHD{5Slq_<0t6{OeTDG8Y5<+%6hB)VD}q
zPN|c7!N2nEpl>`;w*2?-UTNsxez;y^|2_i#AAaNdrR4rLSN-%SvhVG;=t=wxd_k$a
zs?Xq$ZI-L6#<WxK%a5i!MRk=*%bz|#fA+&bzw`bblHc)yb{WoB#h(Gq5%fvF*zxWu
z(5D=@l*ib?AG0^Qbtk6jS{+twJV`VJ4A6}J9sqypdm{%y@Yi&0@0W$~Z(jCJG$%@|
zOib59c7eZZCT%Pk{5hUJ&5tncT)=*ri&}`_KVKV5%z*zM#re@Y(APCL<yOJ^?UEI6
zVnd@WU2HSU(7*nilnvZT?jMnsM-{^~gVm?GY!`_3Y{r^lRj8jo=`0!?$^P4AGv~xp
zOfxZ<Rv2X?YNdv5YbHZ~KXvAY;J_B}_vBbV)x@+*{r#~y#i)`mx}cq~1OJwXYHx+X
z&)gC0W&r1FFv(ht2Mre$6ux@|_4lW-Ame3n|JSat@qNcMvyNlc0i#5JRr>R-yJSHR
zq<U4r{fc7N7yq4yX?Atx6J6@WYsU}rm9cA}XFf9X-vBy`SJ%x&Omj$1HqGNirBVNF
z9!t~(J$L?*5qbZ{qkr1kLqBL~>++ZUJB+onb1(GAB0}t3kPoBgOJ)_Jo^?8$Oba9X
zL3=I}7x@3ymD8@N*Ga#J{GO|5a_YyQ12*M^dGFPkiH>y`&)Vqjr(DoO{|N>Qp;5=J
z^@*9<=<tge6jTl0_coD_YyvOH@w1k4^3m7<{m7czp@b@n?UooJec(CDC&-fhpFL~4
zTOS&gj;CgavLKn4FXIR1V1HUy_vOhkf<6}XqFx9s+-Yr^?QbJorjl2*bt8fA;P8Dl
znB+I>JuX8tY)5zAaSKBF0{!9q>`lPmKQXP8N%F(IG~b|^Y@NdagNuYgt*b7-LOJkV
zo=M!l27bG*>QV~mue+trLdj#Kb^J@C%EAEn>BJvOTY>&b_TvHzW?)(Dif=nh_}qP4
zL-k7lzPJ4pBELVOY4vYv(CXmstqx(L$gDPMLh1tCk2aZhd6{97ud}#c6s?7y3^mq@
zAuL$VFLhL}0^gf^!y&Rh+oe@YZbB>eO2VfDqL3law@$|dsPAn|5yh9u{%vTx(Uuv^
zdeupCt1&YXNDDVlZio8P9<zbo;2g>Ky3MA8nHclFyk^75E?HTMT?6*7?P$e50l1%0
z^BdySlQEO7$Hj5ES;Ahdpkm{}D&U7jJo`s}pY0@?D#9@1i*)0fl6&aN(cpE_<51t)
zIOr0ew1ECDch^H9%n=f<ziT#y2yz?xWZ<g`{K98})yGNxHL1i9Ot~E~`$=;hxwGdQ
ztMz{b|5-Nm)n(xSdlArY3i(+(v(?jra7#$upOOyy)A?|t&v{+&+eU0no5NeIKG=(W
z5kpR=CAX{h!})L9Cm|A62YPGK<Gd2QlW%6<6ZIG(NN=I5^N=m@$MQmA$od~@dE6%#
zbD6h|=C9A78*ekRYwo9j-!t%MHJp#Ae*qg4^f4E8O}G9(3gLq-CbUMUfnV3$jt+s}
zrhH-A6>~JdSNnPO67uZal3r-R34XQqHQP6!6DvXyjCl70U$I-3CPctH!`ef`@V?V=
z;OD8rb&~Jr`BonD+&C8L>u>}GZO{4pUvw_`i~ilAmXQ4D3c4xC&s|(GhN48Eg4u!P
zk1v3~crW|_*}q+@j@~<pcPbWrRL3#s=0+jb9Bw!8yY9Yzfc$+$E6Op7SeSb-P_n6(
zaQ&k3A=Z!Nr^G8dRgrw#%Xw5RI4H1q&WRU=P0pwse>?%}v-x7D;{WeAw``jk<mXG0
zGSjw1aIoh%J0S@9>cr*Bu{rQt$0xY2;N2-_L@_rL3RmLv{L!)i{7O;FD`bC=zLvag
z6BhOB%)Z|we{78LH)km88<@X>MbImP<Zsnyl*IdXIF9G|HlS#|V`tvUZUO#nflXUo
z;rA~nwo)ELem?o^di^yb%w>fmT?5WfS6xbHAAH{;trS}{E?{A11A4udG!!SsqRJlF
z4}Kw|vp%Ar$MYC6Jj4>oI$<B=%hAn;baRJy!TU@XyNpaDnLmT~^}T4q;u{4}ZlE@b
z-*{9ud7~4okLHW`JgBcxONQD9%ppHF7^U6+N!%(<xN@~f1@f7le<mBbe^<jKj3`*t
z%sgRti7HC!P7h6ZbqDz6RJ~BL9tP9rG4)_sB?cGYs%4a*La!ep5BIm5`g`g0KG6MJ
zW@i6k=~sMS`Q@ikQgwz@Zv(l%dezU*{{j9=w#3W_$j_3FR_QauEk_^W@@f;vmlw_6
z?@Iu`MN0dv`&hy@!asVX89kUyo2$DG{b5IBvJUSN(7l+}r~0u%=-oYv!XYS4#^U{s
z6&~=9ooSOJ`%e#kLHb`<es9pmnMDDV8d>|Pk_GN>SEJO;FsM(_^h*Y78z4W&EuaP`
zBKYd*L*`LX-+G_UM#bL)zu8gSHATqJ;mU8XTtylFs-JttI$-`K?S;p8k-F<En=|;J
z&gy{vlW>%wRO5A22Hq$7IB-PTd(hoXQjN}Ir4Q271S>j}A;K9_xh)muS7W1)J`enj
za>?7nAU_X2y&Ek<_^h2CPbq=-r9RgmLn35-yZk-))@!Wb_}?KSs}AMrGm1ro!uO$D
zn>n)}33PS!W5bZ2Q}|lMDj3nTbo@ov8v2L#47z;N<olJI^%VVktn!Dm^~<N{D65%4
zb3q^<#y``}8Atwp=9BYOOMEE1&%%C_BvS3peK)WU{n0>gdf9*{_{~LyeA^&Db91{a
z(WB?5ULLSJeT~e|_tz}Qd~Mca-MtlK59PyEmmi>)L6slOzwU(md}C>^vJdE&UZhOa
zVq|{lospIZdggR6HmA!2=Jzo9!Dq<-(c5fhZR8+7Up@VcqK!iMCPXaNiXcCa2>8FU
z2fxKb?p}R-<jmi#NzX2!lJ7l^v(hmCei`o$ap)f+C3Cs%oy4jce<d>n-l6izdpkmB
zX^@}Y!V1dC`I|B5$Xa6RLORnOj%rl&!(FcXRXpV9U_+@s=zpTO-wdoX!BmCuJt?s-
z(A|;zvwaq$ke{PZ$&|=~-`bc~oeueVYs0?K3#ig>=jf60$7Fs!|7lkm=!dH%|FL29
zx4sbu%DYg#;AqchNj1pNlZONxIzhiuFukV}tMh(My<T90N|Ww7B&O6se$F56Jq+Ks
zXr88}=5efMHS)@{lLcj_Np5l?p#K;zoYLWX0De3F5#AQa&jQycBa>0BxwEZ)Hq_sd
z2fou3GT+F*6blK(nufMNrhH3K<5?#4a}v)WpE?@qS2lrem~XFi4{J;dzw{m)MlZt$
zii&==Kz@!tGTT=Ud@%{*4fc?qv)}(>sYit(RuzqntdO4@6FwT91-}#H6Ji$fa~h9f
z3MXnPUv_iNa)<ofXtjOi6X+s4&aUmiTJGn{ODM-ti?h}k>r*nHh$pA1M1yX1Ed7BZ
z){?FAl!>cE6>|N??AQ8We!jZW3C+NlkR%#CAwRPeY^v8n#eDmmx11yWLjS#-J_UZ~
z9}^7Pke|mlRP+m@HwoJd&69USe)c&ztQ!Hk#GL-p0@hkxZoFhYfI8~g6K6bopubDW
zrk*6<PmG4K;sdNTzG+u?h94>mtli6R*9rMgz+Iq05BN&Eu9=lXe&+n5qjwLLD(h}U
z7oI}@s(JV1#1rtlHg%ciKz?qZDN4wo=42<zP6xQZ-vk--Y~w&zyS6t#5YzTmY6lcg
zpl<ssgwuo)_;+x#GN*uUQRx1n8T5d%8D1t-DjRCX#b*lqx_(LT_rRB2#dA84pOYl-
z7386kyIj}14$VP+ke_m4umivQ>o0@|<Y#3DHSQ$TVp&R&?@tH6*U;J?^80YXNw*>m
z)1-E+&-Brw-Z{>2V%re#bu52+k^P%NW{lxC$j6~$<4@R80soz*`XOHMPnxX3d&6wB
zbUC&CKhU{js`KukV%k|j`Z)M~)83!gf*{{Sx_=JT=7W4v;N&1rK`m;@mM_KNedYU%
zQ`>FGPmxh&!@^%MO@V8k$6W~Z_oTKpu@?hB#QE33P|(j;UT{AJ`L0B3PG=fDVODR*
z2q))1x14!Q3Hajr%fiu+pLvN+wQnfDmoL?`@-*;+=q{SJk$mBIJ_3-hE@sThE}`Zd
z;<?=!MsPlu-#zFe&sSlF-wSI@Q%vXa(Of|v7H#EfS2qLyc_TMF<nu@ay5RZ_Ov95-
zKh9Yo)5iVg7CnC8r#l<|q6GLNyK_4hV1BwIUo*r|##Y~FULW~@e<N_2`zXn0Il2xy
zZJvMnt^s<pV#O=*=L`5ZZz#%#^AkA`&ojb`X)0&>hel)3Fzfq*y|wMYZ?4QgT?)GC
zGh62&Ow+J3nG)0{5@pN8r=RzNfA?O!v(~`p5$vrz4*8Qmp>N0@g+1QvXZ9uy_;z)=
zhH~Kd9J{Pr1o`=+Rn6oV)EMx6=_Mn)&;H=3OBaRtM^5?9f1LyVbJU$+jYdX#%pUNY
z1Hbd!?>SPpC}s`sh50MX6tuSyh0S5P1CtHlS2CO%83I0wfbI@&$bX)KXO;|!NEE7I
zNkIQGttV!d_Zs~E-x9JRKb@3O+f(%sHQaswGfM~hn;)XF>yrMUuVtOw2>I^VW*N^r
z%jjd{ag77TP~WGr>?Y{bLHE`#Z@mTnTfaB|Z6?ZNYwu+Ixdnc^6K>bw`x*5^fRR}U
z=6BFaiJ^hWIhB*8QV#1oz559Im<#?`qa7P+Am284&8Pf8buS6EtB;w%e|C$<@FVck
z4Kef&!}%P!A|jKIKFO9jd+&DvzFkGE8u`8#HU4jBCgf*x|BF4n#OvMfPYkY-zdxnF
zJ1`3THL?{NF)+T3)aRlyqU4ocSG&bca{V+M8rZ;}bD`kS3gl;-rt`aQB7z(3`)vT{
z|HoZ_jt(+kueC%zamF;YgH6RI_UO}m_nA)x7Eph_@~WH*2c2L)wPFi?RfFz}Jw(UG
z8@}hYE5X0+ILQL<zfsObe|X&?Khs2m%K3>}e6v2OMF{wMk18g9fWId(zMBU5`7PhB
zZ|SIVmqZpbZ63zoa=LW`+>fZuDy*$8a6Sy`pUX(0;Xis=Oy*Gke_m#)Y=Zh4xz=b!
z59iZd?VU}IH_;!Wx4)If1^UhQ9bPTqXFluUrv&-Ac(}>Ei+JrFasKTp20ioiFQ$K>
gv$*A5(uVvTe{!eO1S-Ap&pSmu4)i<*zQ+Oo17|sz(f|Me

literal 0
HcmV?d00001


From 7b8d38910f090ec36d75f5ddf0ab25cdb6e25045 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Mon, 23 May 2022 16:50:18 -0600
Subject: [PATCH 50/59] examples/fluids: Refactoring blasius mesh generation

---
 examples/fluids/problems/blasius.c | 51 +++++++++++++++---------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index 94033dec8f..f89d9f276a 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -21,8 +21,9 @@
  * The top surface is also angled downwards, so that it may be used as an
  * outflow. It's angle is controlled by `top_angle` (in units of degrees).
  */
-PetscErrorCode modifyMesh(DM dm, PetscInt dim, PetscReal growth, PetscInt N,
-                          PetscReal refine_height, PetscReal top_angle) {
+static PetscErrorCode ModifyMesh(DM dm, PetscInt dim, PetscReal growth,
+                                 PetscInt N, PetscReal refine_height,
+                                 PetscReal top_angle) {
 
   PetscInt ierr, narr, ncoords;
   PetscReal domain_min[3], domain_max[3], domain_size[3];
@@ -99,16 +100,15 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   problem->apply_inflow.qfunction      = Blasius_Inflow;
   problem->apply_inflow.qfunction_loc  = Blasius_Inflow_loc;
 
-  // CeedScalar mu = .04; // Pa s, dynamic viscosity
-  CeedScalar Uinf          = 40;   // m/s
-  CeedScalar delta0        = 4.2e-4;    // m
-  PetscReal  refine_height = 5.9e-4;    // m
-  PetscReal  growth        = 1.08; // [-]
-  PetscInt   Ndelta        = 45;   // [-]
-  PetscReal  top_angle     = 5;    // degrees
-  CeedScalar theta0        = 288.; // K
-  CeedScalar P0            = 1.01e5; // Pa
-  PetscBool  weakT         = PETSC_FALSE; // weak density or temperature
+  CeedScalar Uinf   = 40;          // m/s
+  CeedScalar delta0 = 4.2e-4;      // m
+  CeedScalar theta0 = 288.;        // K
+  CeedScalar P0     = 1.01e5;      // Pa
+  PetscBool  weakT  = PETSC_FALSE; // weak density or temperature
+  PetscReal  mesh_refine_height = 5.9e-4; // m
+  PetscReal  mesh_growth        = 1.08;   // [-]
+  PetscInt   mesh_Ndelta        = 45;     // [-]
+  PetscReal  mesh_top_angle     = 5;      // degrees
 
   PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
   ierr = PetscOptionsBool("-weakT", "Change from rho weak to T weak at inflow",
@@ -121,32 +121,33 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
                             NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsScalar("-P0", "Pressure at outflow",
                             NULL, P0, &P0, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsBoundedInt("-Ndelta", "Velocity at boundary layer edge",
-                                NULL, Ndelta, &Ndelta, NULL, 1); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-refine_height",
+  ierr = PetscOptionsBoundedInt("-platemesh_Ndelta",
+                                "Velocity at boundary layer edge",
+                                NULL, mesh_Ndelta, &mesh_Ndelta, NULL, 1); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-platemesh_refine_height",
                             "Height of boundary layer mesh refinement",
-                            NULL, refine_height, &refine_height, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-growth",
+                            NULL, mesh_refine_height, &mesh_refine_height, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-platemesh_growth",
                             "Geometric growth rate of boundary layer mesh",
-                            NULL, growth, &growth, NULL); CHKERRQ(ierr);
-  ierr = PetscOptionsScalar("-top_angle",
+                            NULL, mesh_growth, &mesh_growth, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsScalar("-platemesh_top_angle",
                             "Geometric top_angle rate of boundary layer mesh",
-                            NULL, top_angle, &top_angle, NULL); CHKERRQ(ierr);
+                            NULL, mesh_top_angle, &mesh_top_angle, NULL); CHKERRQ(ierr);
   ierr = PetscOptionsBool("-stg_use", "Use STG inflow boundary condition",
                           NULL, use_stg, &use_stg, NULL); CHKERRQ(ierr);
   PetscOptionsEnd();
 
-  PetscScalar meter           = user->units->meter;
-  PetscScalar second          = user->units->second;
-  PetscScalar Kelvin          = user->units->Kelvin;
-  PetscScalar Pascal          = user->units->Pascal;
+  PetscScalar meter  = user->units->meter;
+  PetscScalar second = user->units->second;
+  PetscScalar Kelvin = user->units->Kelvin;
+  PetscScalar Pascal = user->units->Pascal;
 
   theta0 *= Kelvin;
   P0     *= Pascal;
   Uinf   *= meter / second;
   delta0 *= meter;
 
-  ierr = modifyMesh(dm, problem->dim, growth, Ndelta, refine_height, top_angle);
+  ierr = ModifyMesh(dm, problem->dim, mesh_growth, mesh_Ndelta, mesh_refine_height, mesh_top_angle);
   CHKERRQ(ierr);
 
   // Some properties depend on parameters from NewtonianIdealGas

From d271451494a477190b967412c6bdc99f875ba666 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Tue, 17 May 2022 11:35:56 -0600
Subject: [PATCH 51/59] examples/fluids: Specify FPBL mesh node locations via
 file

---
 examples/fluids/problems/blasius.c | 116 +++++++++++++++++++++++------
 1 file changed, 95 insertions(+), 21 deletions(-)

diff --git a/examples/fluids/problems/blasius.c b/examples/fluids/problems/blasius.c
index f89d9f276a..730e9e533b 100644
--- a/examples/fluids/problems/blasius.c
+++ b/examples/fluids/problems/blasius.c
@@ -12,6 +12,41 @@
 #include "../qfunctions/blasius.h"
 #include "stg_shur14.h"
 
+static PetscErrorCode GetYNodeLocs(const MPI_Comm comm,
+                                   const char path[PETSC_MAX_PATH_LEN], PetscReal **pynodes,
+                                   PetscInt *nynodes) {
+  PetscErrorCode ierr;
+  PetscInt ndims, dims[2];
+  FILE *fp;
+  const PetscInt char_array_len = 512;
+  char line[char_array_len];
+  char **array;
+  PetscReal *node_locs;
+  PetscFunctionBeginUser;
+
+  ierr = PetscFOpen(comm, path, "r", &fp); CHKERRQ(ierr);
+  ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+  ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+
+  for (PetscInt i=0; i<ndims; i++)  dims[i] = atoi(array[i]);
+  if (ndims<2) dims[1] = 1; // Assume 1 column of data is not otherwise specified
+  *nynodes = dims[0];
+  ierr = PetscMalloc1(*nynodes, &node_locs); CHKERRQ(ierr);
+
+  for (PetscInt i=0; i<dims[0]; i++) {
+    ierr = PetscSynchronizedFGets(comm, fp, char_array_len, line); CHKERRQ(ierr);
+    ierr = PetscStrToArray(line, ' ', &ndims, &array); CHKERRQ(ierr);
+    if (ndims < dims[1]) SETERRQ(comm, -1,
+                                   "Line %d of %s does not contain enough columns (%d instead of %d)", i,
+                                   path, ndims, dims[1]);
+
+    node_locs[i] = (PetscReal) atof(array[0]);
+  }
+  ierr = PetscFClose(comm, fp); CHKERRQ(ierr);
+  *pynodes = node_locs;
+  PetscFunctionReturn(0);
+}
+
 /* \brief Modify the domain and mesh for blasius
  *
  * Modifies mesh such that `N` elements are within `refine_height` with a
@@ -20,10 +55,14 @@
  *
  * The top surface is also angled downwards, so that it may be used as an
  * outflow. It's angle is controlled by `top_angle` (in units of degrees).
+ *
+ * If `node_locs` is not NULL, then the nodes will be placed at `node_locs`
+ * locations.
  */
-static PetscErrorCode ModifyMesh(DM dm, PetscInt dim, PetscReal growth,
-                                 PetscInt N, PetscReal refine_height,
-                                 PetscReal top_angle) {
+static PetscErrorCode ModifyMesh(MPI_Comm comm, DM dm, PetscInt dim,
+                                 PetscReal growth, PetscInt N,
+                                 PetscReal refine_height, PetscReal top_angle,
+                                 PetscReal node_locs[], PetscInt num_node_locs) {
 
   PetscInt ierr, narr, ncoords;
   PetscReal domain_min[3], domain_max[3], domain_size[3];
@@ -49,23 +88,44 @@ static PetscErrorCode ModifyMesh(DM dm, PetscInt dim, PetscReal growth,
   PetscInt nmax = 3, faces[3];
   ierr = PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax,
                                  NULL); CHKERRQ(ierr);
+  // Get element size of the box mesh, for indexing each node
+  const PetscReal dybox = domain_size[1]/faces[1];
+
+  if (!node_locs) {
+    // Calculate the first element height
+    PetscReal dy1   = refine_height*(growth-1)/(pow(growth, N)-1);
+
+    // Calculate log of sizing outside BL
+    PetscReal logdy = (log(domain_max[1]) - log(refine_height)) / (faces[1] - N);
 
-  // Calculate the first element height
-  PetscReal dybox = domain_size[1]/faces[1];
-  PetscReal dy1   = refine_height*(growth-1)/(pow(growth, N)-1);
-
-  // Calculate log of sizing outside BL
-  PetscReal logdy = (log(domain_max[1]) - log(refine_height)) / (faces[1] - N);
-
-  for(PetscInt i=0; i<ncoords; i++) {
-    PetscInt y_box_index = round(coords[i][1]/dybox);
-    if(y_box_index <= N) {
-      coords[i][1] = (1 - (coords[i][0]/domain_max[0])*angle_coeff) *
-                     dy1*(pow(growth, coords[i][1]/dybox)-1)/(growth-1);
-    } else {
-      PetscInt j = y_box_index - N;
-      coords[i][1] = (1 - (coords[i][0]/domain_max[0])*angle_coeff) *
-                     exp(log(refine_height) + logdy*j);
+    for (PetscInt i=0; i<ncoords; i++) {
+      PetscInt y_box_index = round(coords[i][1]/dybox);
+      if (y_box_index <= N) {
+        coords[i][1] = (1 - ((coords[i][0] - domain_min[0])/domain_size[0])*angle_coeff)
+                       * dy1 * (pow(growth, coords[i][1]/dybox)-1)/(growth-1);
+      } else {
+        PetscInt j = y_box_index - N;
+        coords[i][1] = (1 - ((coords[i][0] - domain_min[0])/domain_size[0])*angle_coeff)
+                       * exp(log(refine_height) + logdy*j);
+      }
+    }
+  } else {
+    // Error checking
+    if (num_node_locs < faces[1] +1)
+      SETERRQ(comm, -1, "The y_node_locs_path has too few locations; "
+              "There are %d + 1 nodes, but only %d locations given",
+              faces[1]+1, num_node_locs);
+    if (num_node_locs > faces[1] +1) {
+      ierr = PetscPrintf(comm, "WARNING: y_node_locs_path has more locations (%d) "
+                         "than the mesh has nodes (%d). This maybe unintended.",
+                         num_node_locs, faces[1]+1); CHKERRQ(ierr);
+    }
+
+    for (PetscInt i=0; i<ncoords; i++) {
+      // Determine which y-node we're at
+      PetscInt y_box_index = round(coords[i][1]/dybox);
+      coords[i][1] = (1 - ((coords[i][0] - domain_min[0])/domain_size[0])*angle_coeff)
+                     * node_locs[y_box_index];
     }
   }
 
@@ -109,6 +169,7 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   PetscReal  mesh_growth        = 1.08;   // [-]
   PetscInt   mesh_Ndelta        = 45;     // [-]
   PetscReal  mesh_top_angle     = 5;      // degrees
+  char mesh_ynodes_path[PETSC_MAX_PATH_LEN] = "";
 
   PetscOptionsBegin(comm, NULL, "Options for CHANNEL problem", NULL);
   ierr = PetscOptionsBool("-weakT", "Change from rho weak to T weak at inflow",
@@ -133,6 +194,11 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   ierr = PetscOptionsScalar("-platemesh_top_angle",
                             "Geometric top_angle rate of boundary layer mesh",
                             NULL, mesh_top_angle, &mesh_top_angle, NULL); CHKERRQ(ierr);
+  ierr = PetscOptionsString("-platemesh_y_node_locs_path",
+                            "Path to file with y node locations. "
+                            "If empty, will use the algorithmic mesh warping.", NULL,
+                            mesh_ynodes_path, mesh_ynodes_path,
+                            sizeof(mesh_ynodes_path), NULL); CHKERRQ(ierr);
   ierr = PetscOptionsBool("-stg_use", "Use STG inflow boundary condition",
                           NULL, use_stg, &use_stg, NULL); CHKERRQ(ierr);
   PetscOptionsEnd();
@@ -147,8 +213,16 @@ PetscErrorCode NS_BLASIUS(ProblemData *problem, DM dm, void *ctx) {
   Uinf   *= meter / second;
   delta0 *= meter;
 
-  ierr = ModifyMesh(dm, problem->dim, mesh_growth, mesh_Ndelta, mesh_refine_height, mesh_top_angle);
-  CHKERRQ(ierr);
+  PetscReal *mesh_ynodes = NULL;
+  PetscInt  mesh_nynodes = 0;
+  if (strcmp(mesh_ynodes_path, "")) {
+    ierr = GetYNodeLocs(comm, mesh_ynodes_path, &mesh_ynodes, &mesh_nynodes);
+    CHKERRQ(ierr);
+  }
+  ierr = ModifyMesh(comm, dm, problem->dim, mesh_growth, mesh_Ndelta,
+                    mesh_refine_height, mesh_top_angle, mesh_ynodes,
+                    mesh_nynodes); CHKERRQ(ierr);
+  ierr = PetscFree(mesh_ynodes); CHKERRQ(ierr);
 
   // Some properties depend on parameters from NewtonianIdealGas
   CeedQFunctionContextGetData(problem->apply_vol_rhs.qfunction_context,

From 91eaef80a564ba0fab6277aaac11ed9994a4b0d5 Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Tue, 17 May 2022 11:36:31 -0600
Subject: [PATCH 52/59] examples/fluids: Update docs for meshing from file

---
 examples/fluids/README.md    | 17 +++++++++++------
 examples/fluids/blasius.yaml | 30 ++++++++++++++----------------
 examples/fluids/index.md     | 26 ++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/examples/fluids/README.md b/examples/fluids/README.md
index 00bf3b2248..602803fc1e 100644
--- a/examples/fluids/README.md
+++ b/examples/fluids/README.md
@@ -656,22 +656,22 @@ addition to the Newtonian Ideal Gas options:
   - `1.01E5`
   - `Pa`
 
-* - `-refine_height`
-  - Height at which `-Ndelta` number of elements should refined into
+* - `-platemesh_refine_height`
+  - Height at which `-platemesh_Ndelta` number of elements should refined into
   - `5.9E-4`
   - `m`
 
-* - `-Ndelta`
-  - Number of elements to keep below `-refine_height`
+* - `-platemesh_Ndelta`
+  - Number of elements to keep below `-platemesh_refine_height`
   - `45`
   -
 
-* - `-growth`
+* - `-platemesh_growth`
   - Growth rate of the elements in the refinement region
   - `1.08`
   -
 
-* - `-top_angle`
+* - `-platemesh_top_angle`
   - Downward angle of the top face of the domain. This face serves as an outlet.
   - `5`
   - `degrees`
@@ -680,6 +680,11 @@ addition to the Newtonian Ideal Gas options:
   - Whether to use stg for the inflow conditions
   - `false`
   -
+
+* - `-platemesh_y_node_locs_path`
+  - Path to file with y node locations. If empty, will use mesh warping instead.
+  - `""`
+  -
 :::
 
 This problem can be run with the `blasius.yaml` file via:
diff --git a/examples/fluids/blasius.yaml b/examples/fluids/blasius.yaml
index aff73182e6..cf3056b1ed 100644
--- a/examples/fluids/blasius.yaml
+++ b/examples/fluids/blasius.yaml
@@ -8,33 +8,31 @@ ts:
   max_time: 1.0e-3
 output_freq: 10
 
-#snes_max_it: 4
-#snes_convergence_test: skip
-
 ## Linear Settings:
 degree: 1
 dm_plex_box_faces: 40,60,1
-nDelta: 45
+platemesh_nDelta: 45
 
-## Quadratic Settings:
-#degree: 2
-#dm_plex_box_faces: 20,30,1
-#nDelta: 22
-#growth: 1.1664 # 1.08^2
+# # Quadratic Settings:
+# degree: 2
+# dm_plex_box_faces: 20,30,1
+# platemesh:
+#   nDelta: 22
+#   growth: 1.1664 # 1.08^2
 
 stab: 'supg'
 Ctau_t: 1
-#Ctau_v: 36,60,128 is what PHASTA has for p=1,2, 3
-## linear Settings:
+#Ctau_v: 36,60,128 is what PHASTA has for p=1,2,3
+# Linear Settings:
 Ctau_v: 36
 Ctau_C: 0.25
 Ctau_M: 0.25
 Ctau_E: 0.125
-## Quadratic Settings:
-#Ctau_v: 60
-#Ctau_C: 0.125
-#Ctau_M: 0.125
-#Ctau_E: 0.125
+# # Quadratic Settings:
+# Ctau_v: 60
+# Ctau_C: 0.125
+# Ctau_M: 0.125
+# Ctau_E: 0.125
 
 q_extra: 0
 
diff --git a/examples/fluids/index.md b/examples/fluids/index.md
index 1e0588d1dc..d4ccbac231 100644
--- a/examples/fluids/index.md
+++ b/examples/fluids/index.md
@@ -618,3 +618,29 @@ numerous terms in the STG formulation.
 | $\{\kappa^n\}_{n=1}^N$ | k^n  | No           | Yes      |
 | $h_i$           | h_i    | Yes          | No   |
 | $d_w$           | d_w    | Yes          | No   |
+
+### Meshing
+
+The flat plate boundary layer example has custom meshing features to better
+resolve the flow. One of those is tilting the top of the domain, allowing for
+it to be a outflow boundary condition. The angle of this tilt is controled by
+`-platemesh_top_angle`
+
+The primary meshing feature is the ability to grade the mesh, providing better
+resolution near the wall. There are two methods to do this; algorithmically, or
+specifying the node locations via a file. Algorithmically, a base node
+distribution is defined at the inlet (assumed to be $\min(x)$) and then
+linearly stretched/squeezed to match the slanted top boundary condition. Nodes
+are placed such that `-platemesh_Ndelta` elements are within
+`-platemesh_refine_height` of the wall. They are placed such that the element
+height matches a geometric growth ratio defined by `-platemesh_growth`. The
+remaining elements are then distributed from `-platemesh_refine_height` to the
+top of the domain linearly in logarithmic space.
+
+Alternatively, a file may be specified containing the locations of each node.
+The file should be newline delimited, with the first line specifying the number
+of points and the rest being the locations of the nodes. The node locations
+used exactly at the inlet (assumed to be $\min(x)$) and linearly
+stretched/squeezed to match the slanted top boundary condition. The file is
+specified via `-platemesh_y_node_locs_path`. If this flag is given an empty
+string, then the algorithmic approach will be performed.

From b9f28b1952427c98cd3bf3ceefd1e9b11987fa4b Mon Sep 17 00:00:00 2001
From: James Wright <james@jameswright.xyz>
Date: Mon, 23 May 2022 18:20:33 -0600
Subject: [PATCH 53/59] examples/fluids: Update STG test for new mesh feats

---
 examples/fluids/tests-output/blasius_stgtest.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/fluids/tests-output/blasius_stgtest.yaml b/examples/fluids/tests-output/blasius_stgtest.yaml
index e9f64e42be..7da68de046 100644
--- a/examples/fluids/tests-output/blasius_stgtest.yaml
+++ b/examples/fluids/tests-output/blasius_stgtest.yaml
@@ -9,8 +9,9 @@ ts:
 output_freq: 10
 
 dm_plex_box_faces: 3,30,1
-nDelta: 22
-growth: 1.1664 # 1.08^2
+platemesh:
+  Ndelta: 22
+  growth: 1.1664 # 1.08^2
 
 stab: 'supg'
 Ctau_t: 1

From e022e1f89e85f2e46b1310d6193ff8d6a4674140 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Wed, 25 May 2022 16:56:13 -0600
Subject: [PATCH 54/59] restr - add input checks on element restrictions

---
 interface/ceed-elemrestriction.c | 90 ++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c
index 49d960c444..9ea35256a4 100644
--- a/interface/ceed-elemrestriction.c
+++ b/interface/ceed-elemrestriction.c
@@ -374,6 +374,24 @@ int CeedElemRestrictionCreate(Ceed ceed, CeedInt num_elem, CeedInt elem_size,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (comp_stride < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction component stride must be at least 1");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
   (*rstr)->ceed = ceed;
   ierr = CeedReference(ceed); CeedChk(ierr);
@@ -446,6 +464,24 @@ int CeedElemRestrictionCreateOriented(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (comp_stride < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction component stride must be at least 1");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
   (*rstr)->ceed = ceed;
   ierr = CeedReference(ceed); CeedChk(ierr);
@@ -510,6 +546,18 @@ int CeedElemRestrictionCreateStrided(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
   (*rstr)->ceed = ceed;
   ierr = CeedReference(ceed); CeedChk(ierr);
@@ -591,6 +639,30 @@ int CeedElemRestrictionCreateBlocked(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (blk_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Block size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (comp_stride < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction component stride must be at least 1");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
 
   ierr = CeedCalloc(num_blk*blk_size*elem_size, &blk_offsets); CeedChk(ierr);
@@ -662,6 +734,24 @@ int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, CeedInt num_elem,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (elem_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Element size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (blk_size < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Block size must be at least 1");
+  // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "ElemRestriction must have at least 1 component");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, rstr); CeedChk(ierr);
 
   (*rstr)->ceed = ceed;

From 227444bf53606b684b95466f0a013ac5dc8b8cba Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Wed, 25 May 2022 17:00:09 -0600
Subject: [PATCH 55/59] basis - add input checks

---
 interface/ceed-basis.c | 77 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c
index fb5b8b0d44..f5897bd665 100644
--- a/interface/ceed-basis.c
+++ b/interface/ceed-basis.c
@@ -485,11 +485,30 @@ int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
     return CEED_ERROR_SUCCESS;
   }
 
-  if (dim<1)
+  if (dim < 1)
     // LCOV_EXCL_START
     return CeedError(ceed, CEED_ERROR_DIMENSION,
                      "Basis dimension must be a positive value");
   // LCOV_EXCL_STOP
+
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (P_1d < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (Q_1d < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE
                           : dim == 2 ? CEED_TOPOLOGY_QUAD
                           : CEED_TOPOLOGY_HEX;
@@ -549,12 +568,30 @@ int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
              *q_weight_1d;
 
-  if (dim<1)
+  if (dim < 1)
     // LCOV_EXCL_START
     return CeedError(ceed, CEED_ERROR_DIMENSION,
                      "Basis dimension must be a positive value");
   // LCOV_EXCL_STOP
 
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (P < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (Q < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   // Get Nodes and Weights
   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
@@ -655,6 +692,24 @@ int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (num_nodes < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (num_qpts < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, basis); CeedChk(ierr);
 
   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
@@ -730,6 +785,24 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
     return CEED_ERROR_SUCCESS;
   }
 
+  if (num_comp < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 component");
+  // LCOV_EXCL_STOP
+
+  if (num_nodes < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 node");
+  // LCOV_EXCL_STOP
+
+  if (num_qpts < 1)
+    // LCOV_EXCL_START
+    return CeedError(ceed, CEED_ERROR_DIMENSION,
+                     "Basis must have at least 1 quadrature point");
+  // LCOV_EXCL_STOP
+
   ierr = CeedCalloc(1, basis); CeedChk(ierr);
 
   (*basis)->ceed = ceed;

From 92ae7e4738abdc83aced3d16a73cf4ec08460821 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 26 May 2022 09:54:15 -0600
Subject: [PATCH 56/59] minor - homogonize int vs CeedInt

---
 interface/ceed-basis.c           | 20 ++++-----
 interface/ceed-elemrestriction.c | 14 +++----
 interface/ceed-operator.c        |  4 +-
 interface/ceed-preconditioning.c | 72 ++++++++++++++++----------------
 interface/ceed-qfunction.c       |  4 +-
 interface/ceed-vector.c          |  8 ++--
 interface/ceed.c                 |  6 +--
 7 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c
index f5897bd665..8c8d09879a 100644
--- a/interface/ceed-basis.c
+++ b/interface/ceed-basis.c
@@ -158,12 +158,12 @@ static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
 **/
 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
                           CeedInt n, const CeedScalar *a, FILE *stream) {
-  for (int i=0; i<m; i++) {
+  for (CeedInt i=0; i<m; i++) {
     if (m > 1)
       fprintf(stream, "%12s[%d]:", name, i);
     else
       fprintf(stream, "%12s:", name);
-    for (int j=0; j<n; j++)
+    for (CeedInt j=0; j<n; j++)
       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
     fputs("\n", stream);
   }
@@ -1366,14 +1366,14 @@ int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
   // Allocate
   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
   // Build q_ref_1d, q_weight_1d
-  for (int i = 0; i <= Q/2; i++) {
+  for (CeedInt i = 0; i <= Q/2; i++) {
     // Guess
     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
     // Pn(xi)
     P0 = 1.0;
     P1 = xi;
     P2 = 0.0;
-    for (int j = 2; j <= Q; j++) {
+    for (CeedInt j = 2; j <= Q; j++) {
       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
       P0 = P1;
       P1 = P2;
@@ -1382,10 +1382,10 @@ int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
     xi = xi-P2/dP2;
     // Newton to convergence
-    for (int k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
+    for (CeedInt k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
       P0 = 1.0;
       P1 = xi;
-      for (int j = 2; j <= Q; j++) {
+      for (CeedInt j = 2; j <= Q; j++) {
         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
         P0 = P1;
         P1 = P2;
@@ -1434,14 +1434,14 @@ int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
   q_ref_1d[0] = -1.0;
   q_ref_1d[Q-1] = 1.0;
   // Interior
-  for (int i = 1; i <= (Q-1)/2; i++) {
+  for (CeedInt i = 1; i <= (Q-1)/2; i++) {
     // Guess
     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
     // Pn(xi)
     P0 = 1.0;
     P1 = xi;
     P2 = 0.0;
-    for (int j = 2; j < Q; j++) {
+    for (CeedInt j = 2; j < Q; j++) {
       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
       P0 = P1;
       P1 = P2;
@@ -1451,10 +1451,10 @@ int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
     xi = xi-dP2/d2P2;
     // Newton to convergence
-    for (int k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
+    for (CeedInt k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
       P0 = 1.0;
       P1 = xi;
-      for (int j = 2; j < Q; j++) {
+      for (CeedInt j = 2; j < Q; j++) {
         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
         P0 = P1;
         P1 = P2;
diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c
index 9ea35256a4..21d785e472 100644
--- a/interface/ceed-elemrestriction.c
+++ b/interface/ceed-elemrestriction.c
@@ -43,8 +43,8 @@ int CeedPermutePadOffsets(const CeedInt *offsets, CeedInt *blk_offsets,
                           CeedInt num_blk, CeedInt num_elem, CeedInt blk_size,
                           CeedInt elem_size) {
   for (CeedInt e=0; e<num_blk*blk_size; e+=blk_size)
-    for (int j=0; j<blk_size; j++)
-      for (int k=0; k<elem_size; k++)
+    for (CeedInt j=0; j<blk_size; j++)
+      for (CeedInt k=0; k<elem_size; k++)
         blk_offsets[e*elem_size + k*blk_size + j]
           = offsets[CeedIntMin(e+j,num_elem-1)*elem_size + k];
   return CEED_ERROR_SUCCESS;
@@ -77,7 +77,7 @@ int CeedElemRestrictionGetStrides(CeedElemRestriction rstr,
                      "ElemRestriction has no stride data");
   // LCOV_EXCL_STOP
 
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*strides)[i] = rstr->strides[i];
   return CEED_ERROR_SUCCESS;
 }
@@ -205,7 +205,7 @@ int CeedElemRestrictionGetELayout(CeedElemRestriction rstr,
                      "ElemRestriction has no layout data");
   // LCOV_EXCL_STOP
 
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*layout)[i] = rstr->layout[i];
   return CEED_ERROR_SUCCESS;
 }
@@ -227,7 +227,7 @@ int CeedElemRestrictionGetELayout(CeedElemRestriction rstr,
 **/
 int CeedElemRestrictionSetELayout(CeedElemRestriction rstr,
                                   CeedInt layout[3]) {
-  for (int i = 0; i<3; i++)
+  for (CeedInt i = 0; i<3; i++)
     rstr->layout[i] = layout[i];
   return CEED_ERROR_SUCCESS;
 }
@@ -570,7 +570,7 @@ int CeedElemRestrictionCreateStrided(Ceed ceed, CeedInt num_elem,
   (*rstr)->blk_size = 1;
   (*rstr)->is_oriented = 0;
   ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr);
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*rstr)->strides[i] = strides[i];
   ierr = ceed->ElemRestrictionCreate(CEED_MEM_HOST, CEED_OWN_POINTER, NULL,
                                      *rstr);
@@ -765,7 +765,7 @@ int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, CeedInt num_elem,
   (*rstr)->blk_size = blk_size;
   (*rstr)->is_oriented = 0;
   ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr);
-  for (int i=0; i<3; i++)
+  for (CeedInt i=0; i<3; i++)
     (*rstr)->strides[i] = strides[i];
   ierr = ceed->ElemRestrictionCreateBlocked(CEED_MEM_HOST, CEED_OWN_POINTER,
          NULL, *rstr); CeedChk(ierr);
diff --git a/interface/ceed-operator.c b/interface/ceed-operator.c
index dc67767b39..28f3ca5632 100644
--- a/interface/ceed-operator.c
+++ b/interface/ceed-operator.c
@@ -213,7 +213,7 @@ int CeedOperatorSingleView(CeedOperator op, bool sub, FILE *stream) {
 int CeedOperatorGetActiveBasis(CeedOperator op, CeedBasis *active_basis) {
   *active_basis = NULL;
   if (op->is_composite) return CEED_ERROR_SUCCESS;
-  for (int i = 0; i < op->qf->num_input_fields; i++)
+  for (CeedInt i = 0; i < op->qf->num_input_fields; i++)
     if (op->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       *active_basis = op->input_fields[i]->basis;
       break;
@@ -245,7 +245,7 @@ int CeedOperatorGetActiveElemRestriction(CeedOperator op,
     CeedElemRestriction *active_rstr) {
   *active_rstr = NULL;
   if (op->is_composite) return CEED_ERROR_SUCCESS;
-  for (int i = 0; i < op->qf->num_input_fields; i++)
+  for (CeedInt i = 0; i < op->qf->num_input_fields; i++)
     if (op->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       *active_rstr = op->input_fields[i]->elem_restr;
       break;
diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 6367d70ba9..4aafc8e55d 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -500,11 +500,11 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
 
   // Determine i, j locations for element matrices
   CeedInt count = 0;
-  for (int e = 0; e < num_elem; ++e) {
-    for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
-      for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
-        for (int i = 0; i < elem_size; ++i) {
-          for (int j = 0; j < elem_size; ++j) {
+  for (CeedInt e = 0; e < num_elem; ++e) {
+    for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
+      for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
+        for (CeedInt i = 0; i < elem_size; ++i) {
+          for (CeedInt j = 0; j < elem_size; ++j) {
             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
                                                (comp_out)*layout_er[1] + e*layout_er[2];
             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
@@ -685,31 +685,31 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
                                      num_qpts]; // logically 3-tensor
   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
   CeedScalar elem_mat[elem_size * elem_size];
-  int count = 0;
+  CeedInt count = 0;
   CeedScalar *vals;
   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
-  for (int e = 0; e < num_elem; ++e) {
-    for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
-      for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
-        for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
+  for (CeedInt e = 0; e < num_elem; ++e) {
+    for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
+      for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
+        for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
           B_mat_in[ell] = 0.0;
         }
-        for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
+        for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
           B_mat_out[ell] = 0.0;
         }
         // Store block-diagonal D matrix as collection of small dense blocks
-        for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
+        for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
           D_mat[ell] = 0.0;
         }
         // form element matrix itself (for each block component)
-        for (int ell = 0; ell < elem_size*elem_size; ++ell) {
+        for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) {
           elem_mat[ell] = 0.0;
         }
-        for (int q = 0; q < num_qpts; ++q) {
-          for (int n = 0; n < elem_size; ++n) {
+        for (CeedInt q = 0; q < num_qpts; ++q) {
+          for (CeedInt n = 0; n < elem_size; ++n) {
             CeedInt d_in = -1;
-            for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
-              const int qq = num_eval_mode_in*q;
+            for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) {
+              const CeedInt qq = num_eval_mode_in*q;
               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
@@ -723,8 +723,8 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
               }
             }
             CeedInt d_out = -1;
-            for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
-              const int qq = num_eval_mode_out*q;
+            for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) {
+              const CeedInt qq = num_eval_mode_out*q;
               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
@@ -738,25 +738,25 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
               }
             }
           }
-          for (int ei = 0; ei < num_eval_mode_out; ++ei) {
-            for (int ej = 0; ej < num_eval_mode_in; ++ej) {
-              const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
+          for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) {
+            for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) {
+              const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
                                           +comp_out;
-              const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
+              const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
                                 e*layout_qf[2];
               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
             }
           }
         }
         // Compute B^T*D
-        for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
+        for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
           BTD[ell] = 0.0;
         }
-        for (int j = 0; j<elem_size; ++j) {
-          for (int q = 0; q<num_qpts; ++q) {
-            int qq = num_eval_mode_out*q;
-            for (int ei = 0; ei < num_eval_mode_in; ++ei) {
-              for (int ej = 0; ej < num_eval_mode_out; ++ej) {
+        for (CeedInt j = 0; j<elem_size; ++j) {
+          for (CeedInt q = 0; q<num_qpts; ++q) {
+            const CeedInt qq = num_eval_mode_out*q;
+            for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) {
+              for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) {
                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
               }
@@ -768,8 +768,8 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
 
         // put element matrix in coordinate data structure
-        for (int i = 0; i < elem_size; ++i) {
-          for (int j = 0; j < elem_size; ++j) {
+        for (CeedInt i = 0; i < elem_size; ++i) {
+          for (CeedInt j = 0; j < elem_size; ++j) {
             vals[offset + count] = elem_mat[i*elem_size + j];
             count++;
           }
@@ -861,7 +861,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
                             op_coarse); CeedChk(ierr);
   CeedElemRestriction rstr_fine = NULL;
   // -- Clone input fields
-  for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
+  for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       rstr_fine = op_fine->input_fields[i]->elem_restr;
       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
@@ -875,7 +875,7 @@ static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
     }
   }
   // -- Clone output fields
-  for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
+  for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
@@ -1722,7 +1722,7 @@ int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
   if (is_composite) {
     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
-    for (int k = 0; k < num_suboperators; ++k) {
+    for (CeedInt k = 0; k < num_suboperators; ++k) {
       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
              &single_entries); CeedChk(ierr);
       *num_entries += single_entries;
@@ -1740,7 +1740,7 @@ int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
   if (is_composite) {
     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
-    for (int k = 0; k < num_suboperators; ++k) {
+    for (CeedInt k = 0; k < num_suboperators; ++k) {
       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
              *cols); CeedChk(ierr);
       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
@@ -1812,7 +1812,7 @@ int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
   if (is_composite) {
     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
-    for (int k = 0; k < num_suboperators; ++k) {
+    for (CeedInt k = 0; k < num_suboperators; ++k) {
       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
       CeedChk(ierr);
       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c
index 8e6c096785..d94f76af46 100644
--- a/interface/ceed-qfunction.c
+++ b/interface/ceed-qfunction.c
@@ -1055,11 +1055,11 @@ int CeedQFunctionDestroy(CeedQFunction *qf) {
     ierr = (*qf)->Destroy(*qf); CeedChk(ierr);
   }
   // Free fields
-  for (int i=0; i<(*qf)->num_input_fields; i++) {
+  for (CeedInt i=0; i<(*qf)->num_input_fields; i++) {
     ierr = CeedFree(&(*(*qf)->input_fields[i]).field_name); CeedChk(ierr);
     ierr = CeedFree(&(*qf)->input_fields[i]); CeedChk(ierr);
   }
-  for (int i=0; i<(*qf)->num_output_fields; i++) {
+  for (CeedInt i=0; i<(*qf)->num_output_fields; i++) {
     ierr = CeedFree(&(*(*qf)->output_fields[i]).field_name); CeedChk(ierr);
     ierr = CeedFree(&(*qf)->output_fields[i]); CeedChk(ierr);
   }
diff --git a/interface/ceed-vector.c b/interface/ceed-vector.c
index eb40a4ba46..29b3c3cd35 100644
--- a/interface/ceed-vector.c
+++ b/interface/ceed-vector.c
@@ -308,7 +308,7 @@ int CeedVectorSetValue(CeedVector vec, CeedScalar value) {
   } else {
     CeedScalar *array;
     ierr = CeedVectorGetArrayWrite(vec, CEED_MEM_HOST, &array); CeedChk(ierr);
-    for (int i=0; i<vec->length; i++) array[i] = value;
+    for (CeedInt i=0; i<vec->length; i++) array[i] = value;
     ierr = CeedVectorRestoreArray(vec, &array); CeedChk(ierr);
   }
   vec->state += 2;
@@ -641,17 +641,17 @@ int CeedVectorNorm(CeedVector vec, CeedNormType norm_type, CeedScalar *norm) {
   *norm = 0.;
   switch (norm_type) {
   case CEED_NORM_1:
-    for (int i=0; i<vec->length; i++) {
+    for (CeedInt i=0; i<vec->length; i++) {
       *norm += fabs(array[i]);
     }
     break;
   case CEED_NORM_2:
-    for (int i=0; i<vec->length; i++) {
+    for (CeedInt i=0; i<vec->length; i++) {
       *norm += fabs(array[i])*fabs(array[i]);
     }
     break;
   case CEED_NORM_MAX:
-    for (int i=0; i<vec->length; i++) {
+    for (CeedInt i=0; i<vec->length; i++) {
       const CeedScalar abs_v_i = fabs(array[i]);
       *norm = *norm > abs_v_i ? *norm : abs_v_i;
     }
diff --git a/interface/ceed.c b/interface/ceed.c
index 34635cdf4a..9f9248f964 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -1072,7 +1072,7 @@ int CeedDestroy(Ceed *ceed) {
   }
 
   if ((*ceed)->obj_delegate_count > 0) {
-    for (int i = 0; i < (*ceed)->obj_delegate_count; i++) {
+    for (CeedInt i = 0; i < (*ceed)->obj_delegate_count; i++) {
       ierr = CeedDestroy(&((*ceed)->obj_delegates[i].delegate)); CeedChk(ierr);
       ierr = CeedFree(&(*ceed)->obj_delegates[i].obj_name); CeedChk(ierr);
     }
@@ -1083,7 +1083,7 @@ int CeedDestroy(Ceed *ceed) {
     ierr = (*ceed)->Destroy(*ceed); CeedChk(ierr);
   }
 
-  for (int i = 0; i < (*ceed)->num_jit_source_roots; i++) {
+  for (CeedInt i = 0; i < (*ceed)->num_jit_source_roots; i++) {
     ierr = CeedFree(&(*ceed)->jit_source_roots[i]); CeedChk(ierr);
   }
   ierr = CeedFree(&(*ceed)->jit_source_roots); CeedChk(ierr);
@@ -1233,7 +1233,7 @@ int CeedErrorExit(Ceed ceed, const char *filename, int line_no,
 int CeedSetErrorHandler(Ceed ceed, CeedErrorHandler handler) {
   ceed->Error = handler;
   if (ceed->delegate) CeedSetErrorHandler(ceed->delegate, handler);
-  for (int i=0; i<ceed->obj_delegate_count; i++)
+  for (CeedInt i=0; i<ceed->obj_delegate_count; i++)
     CeedSetErrorHandler(ceed->obj_delegates[i].delegate, handler);
   return CEED_ERROR_SUCCESS;
 }

From 8687e1d445b8fb5c2aba1a76f10bd56e6cda067d Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 26 May 2022 10:03:25 -0600
Subject: [PATCH 57/59] ceed - refactor fallback Ceed creation into separate fn

---
 include/ceed/backend.h           |  1 +
 interface/ceed-preconditioning.c | 25 ++++----------------
 interface/ceed.c                 | 39 ++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/include/ceed/backend.h b/include/ceed/backend.h
index 3b67db3343..a9b248bb12 100644
--- a/include/ceed/backend.h
+++ b/include/ceed/backend.h
@@ -119,6 +119,7 @@ CEED_EXTERN int CeedOperatorGetActiveBasis(CeedOperator op,
 CEED_EXTERN int CeedOperatorGetActiveElemRestriction(CeedOperator op, CeedElemRestriction *active_rstr);
 CEED_EXTERN int CeedGetOperatorFallbackResource(Ceed ceed,
     const char **resource);
+CEED_EXTERN int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed);
 CEED_EXTERN int CeedSetOperatorFallbackResource(Ceed ceed,
     const char *resource);
 CEED_EXTERN int CeedGetOperatorFallbackParentCeed(Ceed ceed, Ceed *parent);
diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 4aafc8e55d..5869132b88 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -34,42 +34,25 @@
 **/
 int CeedOperatorCreateFallback(CeedOperator op) {
   int ierr;
+  Ceed fallback_ceed;
 
   // Check not already created
   if (op->op_fallback) return CEED_ERROR_SUCCESS;
 
   // Fallback Ceed
-  const char *resource, *fallback_resource;
-  ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
-  ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
-  CeedChk(ierr);
-  if (!strcmp(resource, fallback_resource))
-    // LCOV_EXCL_START
-    return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
-                     "Backend %s cannot create an operator"
-                     "fallback to resource %s", resource, fallback_resource);
-  // LCOV_EXCL_STOP
-
-  // Fallback Ceed
-  if (!op->ceed->op_fallback_ceed) {
-    Ceed ceed_ref;
-    ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
-    ceed_ref->op_fallback_parent = op->ceed;
-    ceed_ref->Error = op->ceed->Error;
-    op->ceed->op_fallback_ceed = ceed_ref;
-  }
+  ierr = CeedGetOperatorFallbackCeed(op->ceed, &fallback_ceed); CeedChk(ierr);
 
   // Clone Op
   CeedOperator op_fallback;
   if (op->is_composite) {
-    ierr = CeedCompositeOperatorCreate(op->ceed->op_fallback_ceed, &op_fallback);
+    ierr = CeedCompositeOperatorCreate(fallback_ceed, &op_fallback);
     CeedChk(ierr);
     for (CeedInt i = 0; i < op->num_suboperators; i++) {
       ierr = CeedCompositeOperatorAddSub(op_fallback, op->sub_operators[i]);
       CeedChk(ierr);
     }
   } else {
-    ierr = CeedOperatorCreate(op->ceed->op_fallback_ceed, op->qf, op->dqf, op->dqfT,
+    ierr = CeedOperatorCreate(fallback_ceed, op->qf, op->dqf, op->dqfT,
                               &op_fallback); CeedChk(ierr);
     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
       ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
diff --git a/interface/ceed.c b/interface/ceed.c
index 9f9248f964..899fb5ab82 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -491,6 +491,45 @@ int CeedGetOperatorFallbackResource(Ceed ceed, const char **resource) {
   return CEED_ERROR_SUCCESS;
 }
 
+/**
+  @brief Get the fallback Ceed for CeedOperators
+
+  @param ceed                Ceed context
+  @param[out] fallback_ceed  Variable to store fallback Ceed
+
+  @return An error code: 0 - success, otherwise - failure
+
+  @ref Backend
+**/
+
+int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed) {
+  int ierr;
+
+  // Create fallback Ceed if uninitalized
+  if (!ceed->op_fallback_ceed) {
+    // Check resource
+    const char *resource, *fallback_resource;
+    ierr = CeedGetResource(ceed, &resource); CeedChk(ierr);
+    ierr = CeedGetOperatorFallbackResource(ceed, &fallback_resource); CeedChk(ierr);
+    if (!strcmp(resource, fallback_resource))
+      // LCOV_EXCL_START
+      return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
+                       "Backend %s cannot create an operator"
+                       "fallback to resource %s", resource, fallback_resource);
+      // LCOV_EXCL_STOP
+
+    // Create fallback
+    Ceed fallback_ceed;
+    ierr = CeedInit(fallback_resource, &fallback_ceed); CeedChk(ierr);
+    fallback_ceed->op_fallback_parent = ceed;
+    fallback_ceed->Error = ceed->Error;
+    ceed->op_fallback_ceed = fallback_ceed;
+  }
+  *fallback_ceed = ceed->op_fallback_ceed;
+
+  return CEED_ERROR_SUCCESS;
+}
+
 /**
   @brief Set the fallback resource for CeedOperators. The current resource, if
            any, is freed by calling this function. This string is freed upon the

From 8575dcac89db64491b0e0135db70cb2bd8cd0cb4 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 26 May 2022 10:16:30 -0600
Subject: [PATCH 58/59] pc - avoid direct access of basis data in multigrid
 creation

---
 interface/ceed-preconditioning.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 5869132b88..478e1af9bb 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -1876,14 +1876,16 @@ int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
+  const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL;
   if (is_tensor_f) {
-    memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
-    memcpy(interp_c, basis_coarse->interp_1d,
-           Q*P_c*sizeof basis_coarse->interp_1d[0]);
+    ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr);
+    ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr);
   } else {
-    memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
-    memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
+    ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr);
+    ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr);
   }
+  memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]);
+  memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]);
 
   // -- QR Factorization, interp_f = Q R
   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);

From 21d1e94b4e1082dc96a715f0c99c51bf12f004b1 Mon Sep 17 00:00:00 2001
From: Jeremy L Thompson <jeremy@jeremylt.org>
Date: Thu, 26 May 2022 10:21:28 -0600
Subject: [PATCH 59/59] make style

---
 interface/ceed-preconditioning.c | 10 ++++++----
 interface/ceed.c                 |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c
index 478e1af9bb..6789a90aee 100644
--- a/interface/ceed-preconditioning.c
+++ b/interface/ceed-preconditioning.c
@@ -681,7 +681,8 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
           B_mat_out[ell] = 0.0;
         }
         // Store block-diagonal D matrix as collection of small dense blocks
-        for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
+        for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts;
+             ++ell) {
           D_mat[ell] = 0.0;
         }
         // form element matrix itself (for each block component)
@@ -723,10 +724,11 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
           }
           for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) {
             for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) {
-              const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
-                                          +comp_out;
+              const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)
+                                              *num_comp
+                                              +comp_out;
               const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
-                                e*layout_qf[2];
+                                    e*layout_qf[2];
               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
             }
           }
diff --git a/interface/ceed.c b/interface/ceed.c
index 899fb5ab82..da5f922e42 100644
--- a/interface/ceed.c
+++ b/interface/ceed.c
@@ -516,7 +516,7 @@ int CeedGetOperatorFallbackCeed(Ceed ceed, Ceed *fallback_ceed) {
       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
                        "Backend %s cannot create an operator"
                        "fallback to resource %s", resource, fallback_resource);
-      // LCOV_EXCL_STOP
+    // LCOV_EXCL_STOP
 
     // Create fallback
     Ceed fallback_ceed;